-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
- */
-/*
- * Copyright (c) 2011 Whamcloud, Inc.
+ *
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* be sent to MDS. */
LLIF_SOM_DIRTY = (1 << 3),
/* File is contented */
- LLIF_CONTENDED = (1 << 4),
+ LLIF_CONTENDED = (1 << 4),
/* Truncate uses server lock for this file */
- LLIF_SRVLOCK = (1 << 5)
+ LLIF_SRVLOCK = (1 << 5),
};
struct ll_inode_info {
- int lli_inode_magic;
- cfs_semaphore_t lli_size_sem; /* protect open and change size */
- void *lli_size_sem_owner;
- cfs_semaphore_t lli_write_sem;
- cfs_rw_semaphore_t lli_trunc_sem;
- char *lli_symlink_name;
- __u64 lli_maxbytes;
- __u64 lli_ioepoch;
- unsigned long lli_flags;
-
- /* this lock protects posix_acl, pending_write_llaps, mmap_cnt */
- cfs_spinlock_t lli_lock;
- cfs_list_t lli_close_list;
- /* handle is to be sent to MDS later on done_writing and setattr.
- * Open handle data are needed for the recovery to reconstruct
- * the inode state on the MDS. XXX: recovery is not ready yet. */
- struct obd_client_handle *lli_pending_och;
-
- /* for writepage() only to communicate to fsync */
- int lli_async_rc;
- int lli_write_rc;
+ __u32 lli_inode_magic;
+ __u32 lli_flags;
+ __u64 lli_ioepoch;
- struct posix_acl *lli_posix_acl;
+ cfs_spinlock_t lli_lock;
+ struct posix_acl *lli_posix_acl;
- /* remote permission hash */
- cfs_hlist_head_t *lli_remote_perms;
- unsigned long lli_rmtperm_utime;
- cfs_semaphore_t lli_rmtperm_sem;
-
- cfs_list_t lli_dead_list;
-
- cfs_semaphore_t lli_och_sem; /* Protects access to och pointers
- and their usage counters, also
- atomicity of check-update of
- lli_smd */
- /* We need all three because every inode may be opened in different
- modes */
- struct obd_client_handle *lli_mds_read_och;
- __u64 lli_open_fd_read_count;
- struct obd_client_handle *lli_mds_write_och;
- __u64 lli_open_fd_write_count;
- struct obd_client_handle *lli_mds_exec_och;
- __u64 lli_open_fd_exec_count;
-
- struct inode lli_vfs_inode;
+ cfs_hlist_head_t *lli_remote_perms;
+ cfs_mutex_t lli_rmtperm_mutex;
/* identifying fields for both metadata and data stacks. */
- struct lu_fid lli_fid;
+ struct lu_fid lli_fid;
/* Parent fid for accessing default stripe data on parent directory
* for allocating OST objects after a mknod() and later open-by-FID. */
- struct lu_fid lli_pfid;
- struct lov_stripe_md *lli_smd;
+ struct lu_fid lli_pfid;
- /* fid capability */
+ cfs_list_t lli_close_list;
+ cfs_list_t lli_oss_capas;
/* open count currently used by capability only, indicate whether
* capability needs renewal */
- cfs_atomic_t lli_open_count;
- struct obd_capa *lli_mds_capa;
- cfs_list_t lli_oss_capas;
-
- /* metadata statahead */
- /* protect statahead stuff: lli_opendir_pid, lli_opendir_key, lli_sai,
- * and so on. */
- cfs_spinlock_t lli_sa_lock;
- /*
- * "opendir_pid" is the token when lookup/revalid -- I am the owner of
- * dir statahead.
- */
- pid_t lli_opendir_pid;
- /*
- * since parent-child threads can share the same @file struct,
- * "opendir_key" is the token when dir close for case of parent exit
- * before child -- it is me should cleanup the dir readahead. */
- void *lli_opendir_key;
- struct ll_statahead_info *lli_sai;
- __u64 lli_sa_pos;
- struct cl_object *lli_clob;
+ cfs_atomic_t lli_open_count;
+ struct obd_capa *lli_mds_capa;
+ cfs_time_t lli_rmtperm_time;
+
+ /* handle is to be sent to MDS later on done_writing and setattr.
+ * Open handle data are needed for the recovery to reconstruct
+ * the inode state on the MDS. XXX: recovery is not ready yet. */
+ struct obd_client_handle *lli_pending_och;
+
+ /* We need all three because every inode may be opened in different
+ * modes */
+ struct obd_client_handle *lli_mds_read_och;
+ struct obd_client_handle *lli_mds_write_och;
+ struct obd_client_handle *lli_mds_exec_och;
+ __u64 lli_open_fd_read_count;
+ __u64 lli_open_fd_write_count;
+ __u64 lli_open_fd_exec_count;
+ /* Protects access to och pointers and their usage counters, also
+ * atomicity of check-update of lli_smd */
+ cfs_mutex_t lli_och_mutex;
+
+ struct inode lli_vfs_inode;
+
/* the most recent timestamps obtained from mds */
- struct ost_lvb lli_lvb;
- /**
- * serialize normal readdir and statahead-readdir
+ struct ost_lvb lli_lvb;
+ cfs_spinlock_t lli_agl_lock;
+
+ /* Try to make the d::member and f::member are aligned. Before using
+ * these members, make clear whether it is directory or not. */
+ union {
+ /* for directory */
+ struct {
+ /* serialize normal readdir and statahead-readdir. */
+ cfs_mutex_t d_readdir_mutex;
+
+ /* metadata statahead */
+ /* since parent-child threads can share the same @file
+ * struct, "opendir_key" is the token when dir close for
+ * case of parent exit before child -- it is me should
+ * cleanup the dir readahead. */
+ void *d_opendir_key;
+ struct ll_statahead_info *d_sai;
+ __u64 d_sa_pos;
+ struct posix_acl *d_def_acl;
+ /* protect statahead stuff. */
+ cfs_spinlock_t d_sa_lock;
+ /* "opendir_pid" is the token when lookup/revalid
+ * -- I am the owner of dir statahead. */
+ pid_t d_opendir_pid;
+ } d;
+
+#define lli_readdir_mutex u.d.d_readdir_mutex
+#define lli_opendir_key u.d.d_opendir_key
+#define lli_sai u.d.d_sai
+#define lli_sa_pos u.d.d_sa_pos
+#define lli_def_acl u.d.d_def_acl
+#define lli_sa_lock u.d.d_sa_lock
+#define lli_opendir_pid u.d.d_opendir_pid
+
+ /* for non-directory */
+ struct {
+ cfs_semaphore_t f_size_sem;
+ void *f_size_sem_owner;
+ char *f_symlink_name;
+ __u64 f_maxbytes;
+ /*
+ * cfs_rw_semaphore_t {
+ * signed long count; // align u.d.d_def_acl
+ * cfs_spinlock_t wait_lock; // align u.d.d_sa_lock
+ * struct list_head wait_list;
+ * }
+ */
+ cfs_rw_semaphore_t f_trunc_sem;
+ cfs_mutex_t f_write_mutex;
+
+ /* for writepage() only to communicate to fsync */
+ int f_async_rc;
+ int f_write_rc;
+
+ cfs_rw_semaphore_t f_glimpse_sem;
+ cfs_time_t f_glimpse_time;
+ cfs_list_t f_agl_list;
+ __u64 f_agl_index;
+ /*
+ * whenever a process try to read/write the file, the
+ * jobid of the process will be saved here, and it'll
+ * be packed into the write PRC when flush later.
+ *
+ * so the read/write statistics for jobid will not be
+ * accurate if the file is shared by different jobs.
+ */
+ char f_jobid[JOBSTATS_JOBID_SIZE];
+ } f;
+
+#define lli_size_sem u.f.f_size_sem
+#define lli_size_sem_owner u.f.f_size_sem_owner
+#define lli_symlink_name u.f.f_symlink_name
+#define lli_maxbytes u.f.f_maxbytes
+#define lli_trunc_sem u.f.f_trunc_sem
+#define lli_write_mutex u.f.f_write_mutex
+#define lli_async_rc u.f.f_async_rc
+#define lli_write_rc u.f.f_write_rc
+#define lli_glimpse_sem u.f.f_glimpse_sem
+#define lli_glimpse_time u.f.f_glimpse_time
+#define lli_agl_list u.f.f_agl_list
+#define lli_agl_index u.f.f_agl_index
+#define lli_jobid u.f.f_jobid
+
+ } u;
+
+ /* XXX: For following frequent used members, although they maybe special
+ * used for non-directory object, it is some time-wasting to check
+ * whether the object is directory or not before using them. On the
+ * other hand, currently, sizeof(f) > sizeof(d), it cannot reduce
+ * the "ll_inode_info" size even if moving those members into u.f.
+ * So keep them out side.
+ *
+ * In the future, if more members are added only for directory,
+ * some of the following members can be moved into u.f.
*/
- cfs_semaphore_t lli_readdir_sem;
+ struct lov_stripe_md *lli_smd;
+ struct cl_object *lli_clob;
};
/*
#define LL_SBI_SOM_PREVIEW 0x1000 /* SOM preview mount option */
#define LL_SBI_32BIT_API 0x2000 /* generate 32 bit inodes. */
#define LL_SBI_64BIT_HASH 0x4000 /* support 64-bits dir hash/offset */
+#define LL_SBI_AGL_ENABLED 0x8000 /* enable agl */
+#define LL_SBI_VERBOSE 0x10000 /* verbose mount/umount */
/* default value for ll_sb_info->contention_time */
#define SBI_DEFAULT_CONTENTION_SECONDS 60
unsigned int ll_offset_process_count;
struct ll_rw_process_info ll_rw_offset_info[LL_OFFSET_HIST_MAX];
unsigned int ll_rw_offset_entry_count;
- enum stats_track_type ll_stats_track_type;
int ll_stats_track_id;
+ enum stats_track_type ll_stats_track_type;
int ll_rw_stats_on;
/* metadata stat-ahead */
* count */
atomic_t ll_sa_wrong; /* statahead thread stopped for
* low hit ratio */
+ atomic_t ll_agl_total; /* AGL thread started count */
dev_t ll_sdev_orig; /* save s_dev before assign for
* clustred nfs */
int ll_som_update(struct inode *inode, struct md_op_data *op_data);
int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
__u64 ioepoch, int sync);
-int ll_md_setattr(struct inode *inode, struct md_op_data *op_data,
+int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data,
struct md_open_data **mod);
void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
struct lustre_handle *fh);
struct lookup_intent *it, struct kstat *stat);
int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
struct ll_file_data *ll_file_data_get(void);
-#ifndef HAVE_INODE_PERMISION_2ARGS
-int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd);
+#ifdef HAVE_GENERIC_PERMISSION_4ARGS
+int ll_inode_permission(struct inode *inode, int mask, unsigned int flags);
#else
+# ifndef HAVE_INODE_PERMISION_2ARGS
+int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd);
+# else
int ll_inode_permission(struct inode *inode, int mask);
+# endif
#endif
int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
int flags, struct lov_user_md *lum,
int set_default);
int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmm,
int *lmm_size, struct ptlrpc_request **request);
+#ifdef HAVE_FILE_FSYNC_4ARGS
+int ll_fsync(struct file *file, loff_t start, loff_t end, int data);
+#elif defined(HAVE_FILE_FSYNC_2ARGS)
+int ll_fsync(struct file *file, int data);
+#else
int ll_fsync(struct file *file, struct dentry *dentry, int data);
+#endif
int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
int num_bytes);
int ll_merge_lvb(struct inode *inode);
void ll_kill_super(struct super_block *sb);
struct inode *ll_inode_from_lock(struct ldlm_lock *lock);
void ll_clear_inode(struct inode *inode);
-int ll_setattr_raw(struct inode *inode, struct iattr *attr);
+int ll_setattr_raw(struct dentry *dentry, struct iattr *attr);
int ll_setattr(struct dentry *de, struct iattr *attr);
#ifndef HAVE_STATFS_DENTRY_PARAM
int ll_statfs(struct super_block *sb, struct kstatfs *sfs);
const char *name, int namelen,
int mode, __u32 opc, void *data);
void ll_finish_md_op_data(struct md_op_data *op_data);
+int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg);
/* llite/llite_nfs.c */
extern struct export_operations lustre_export_operations;
__u64 end, ldlm_mode_t mode);
void policy_from_vma(ldlm_policy_data_t *policy,
struct vm_area_struct *vma, unsigned long addr, size_t count);
-struct vm_area_struct *our_vma(unsigned long addr, size_t count);
+struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr,
+ size_t count);
+
+static inline void ll_invalidate_page(struct page *vmpage)
+{
+ struct address_space *mapping = vmpage->mapping;
+ loff_t offset = vmpage->index << PAGE_CACHE_SHIFT;
+
+ LASSERT(PageLocked(vmpage));
+ if (mapping == NULL)
+ return;
+
+ ll_teardown_mmaps(mapping, offset, offset + CFS_PAGE_SIZE);
+ truncate_complete_page(mapping, vmpage);
+}
#define ll_s2sbi(sb) (s2lsi(sb)->lsi_llsbi)
/**
* Common IO arguments for various VFS I/O interfaces.
*/
-
int cl_sb_init(struct super_block *sb);
int cl_sb_fini(struct super_block *sb);
-int cl_inode_init(struct inode *inode, struct lustre_md *md);
-void cl_inode_fini(struct inode *inode);
-
enum cl_lock_mode vvp_mode_from_vma(struct vm_area_struct *vma);
void ll_io_init(struct cl_io *io, const struct file *file, int write);
unsigned int sai_skip_hidden;/* skipped hidden dentry count */
unsigned int sai_ls_all:1, /* "ls -al", do stat-ahead for
* hidden entries */
- sai_in_readpage:1;/* statahead is in readdir()*/
+ sai_in_readpage:1,/* statahead is in readdir()*/
+ sai_agl_valid:1;/* AGL is valid for the dir */
cfs_waitq_t sai_waitq; /* stat-ahead wait queue */
struct ptlrpc_thread sai_thread; /* stat-ahead thread */
+ struct ptlrpc_thread sai_agl_thread; /* AGL thread */
cfs_list_t sai_entries_sent; /* entries sent out */
cfs_list_t sai_entries_received; /* entries returned */
cfs_list_t sai_entries_stated; /* entries stated */
+ cfs_list_t sai_entries_agl; /* AGL entries to be sent */
cfs_list_t sai_cache[LL_SA_CACHE_SIZE];
cfs_spinlock_t sai_cache_lock[LL_SA_CACHE_SIZE];
cfs_atomic_t sai_cache_count; /* entry count in cache */
int only_unplug);
void ll_stop_statahead(struct inode *dir, void *key);
+static inline int ll_glimpse_size(struct inode *inode)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ int rc;
+
+ cfs_down_read(&lli->lli_glimpse_sem);
+ rc = cl_glimpse_size(inode);
+ lli->lli_glimpse_time = cfs_time_current();
+ cfs_up_read(&lli->lli_glimpse_sem);
+ return rc;
+}
+
static inline void
ll_statahead_mark(struct inode *dir, struct dentry *dentry)
{
struct ll_dentry_data *ldd;
if (ll_i2sbi(dir)->ll_sa_max == 0)
- return -ENOTSUPP;
+ return -EAGAIN;
lli = ll_i2info(dir);
/* not the same process, don't statahead */
#define cl_inode_mode(inode) ((inode)->i_mode)
#define cl_i2sbi ll_i2sbi
+static inline struct ll_file_data *cl_iattr2fd(struct inode *inode,
+ const struct iattr *attr)
+{
+ LASSERT(attr->ia_valid & ATTR_FILE);
+ return LUSTRE_FPRIVATE(attr->ia_file);
+}
+
static inline void cl_isize_lock(struct inode *inode, int lsmlock)
{
ll_inode_size_lock(inode, lsmlock);
struct obd_capa *cl_capa_lookup(struct inode *inode, enum cl_req_type crt);
+int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end);
+
/** direct write pages */
struct ll_dio_pages {
/** page array to be written. we don't support
static inline void cl_stats_tally(struct cl_device *dev, enum cl_req_type crt,
int rc)
{
- int opc = (crt == CIT_READ) ? LPROC_LL_OSC_READ :
+ int opc = (crt == CRT_READ) ? LPROC_LL_OSC_READ :
LPROC_LL_OSC_WRITE;
ll_stats_ops_tally(ll_s2sbi(cl2ccc_dev(dev)->cdv_sb), opc, rc);
cfs_spin_lock(&ll_lookup_lock);
spin_lock(&dcache_lock);
}
- lock_dentry(dentry);
- __d_drop(dentry);
- unlock_dentry(dentry);
- d_rehash_cond(dentry, 0);
+ if (d_unhashed(dentry))
+ d_rehash_cond(dentry, 0);
if (!locked) {
spin_unlock(&dcache_lock);
cfs_spin_unlock(&ll_lookup_lock);