struct cl_page *ccc_vmpage_page_transient(cfs_page_t *vmpage);
int ccc_object_invariant(const struct cl_object *obj);
-int cl_inode_init(struct inode *inode, struct lustre_md *md);
+int cl_file_inode_init(struct inode *inode, struct lustre_md *md);
void cl_inode_fini(struct inode *inode);
int cl_local_size(struct inode *inode);
}
/**
- * Initializes or updates CLIO part when new meta-data arrives from the
- * server.
+ * Initialize or update CLIO structures for regular files when new
+ * meta-data arrives from the server.
*
- * - allocates cl_object if necessary,
- * - updated layout, if object was already here.
+ * \param inode regular file inode
+ * \param md new file metadata from MDS
+ * - allocates cl_object if necessary,
+ * - updated layout, if object was already here.
*/
-int cl_inode_init(struct inode *inode, struct lustre_md *md)
+int cl_file_inode_init(struct inode *inode, struct lustre_md *md)
{
struct lu_env *env;
struct cl_inode_info *lli;
int refcheck;
LASSERT(md->body->valid & OBD_MD_FLID);
-
- if (!S_ISREG(cl_inode_mode(inode)))
- return 0;
+ LASSERT(S_ISREG(cl_inode_mode(inode)));
env = cl_env_get(&refcheck);
if (IS_ERR(env))
int cl_sb_init(struct llu_sb_info *sbi);
int cl_sb_fini(struct llu_sb_info *sbi);
-int cl_inode_init(struct inode *inode, struct lustre_md *md);
-void cl_inode_fini(struct inode *inode);
void llu_io_init(struct cl_io *io, struct inode *inode, int write);
if (lsm != NULL) {
if (lli->lli_smd == NULL) {
- cl_inode_init(inode, md);
+ cl_file_inode_init(inode, md);
lli->lli_smd = lsm;
lli->lli_maxbytes = lsm->lsm_maxbytes;
if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
/* The last ref on @file, maybe not the the owner pid of statahead.
* Different processes can open the same dir, "ll_opendir_key" means:
* it is me that should stop the statahead thread. */
- if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
+ if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
+ lli->lli_opendir_pid != 0)
ll_stop_statahead(inode, lli->lli_opendir_key);
if (inode->i_sb->s_root == file->f_dentry) {
RETURN(0);
}
- if (lsm)
- lov_test_and_clear_async_rc(lsm);
- lli->lli_async_rc = 0;
+ if (!S_ISDIR(inode->i_mode)) {
+ if (lsm)
+ lov_test_and_clear_async_rc(lsm);
+ lli->lli_async_rc = 0;
+ }
rc = ll_md_close(sbi->ll_md_exp, inode, file);
fd->fd_file = file;
if (S_ISDIR(inode->i_mode)) {
cfs_spin_lock(&lli->lli_sa_lock);
- if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0 &&
- lli->lli_sai == NULL) {
+ if (lli->lli_opendir_key == NULL && lli->lli_sai == NULL &&
+ lli->lli_opendir_pid == 0) {
lli->lli_opendir_key = fd;
lli->lli_opendir_pid = cfs_curproc_pid();
opendir_set = 1;
struct lov_stripe_md *lsm = lli->lli_smd;
int rc, err;
+ LASSERT(!S_ISDIR(inode->i_mode));
+
/* the application should know write failure already. */
if (lli->lli_write_rc)
return 0;
/* catch async errors that were recorded back when async writeback
* failed for pages in this mapping. */
- err = lli->lli_async_rc;
- lli->lli_async_rc = 0;
- if (rc == 0)
- rc = err;
- if (lsm) {
- err = lov_test_and_clear_async_rc(lsm);
+ if (!S_ISDIR(inode->i_mode)) {
+ err = lli->lli_async_rc;
+ lli->lli_async_rc = 0;
if (rc == 0)
rc = err;
+ if (lsm) {
+ err = lov_test_and_clear_async_rc(lsm);
+ if (rc == 0)
+ rc = err;
+ }
}
oc = ll_mdscapa_get(inode);
rc = err;
OBDO_FREE(oinfo->oi_oa);
OBD_FREE_PTR(oinfo);
- lli->lli_write_rc = err < 0 ? : 0;
+ lli->lli_write_rc = rc < 0 ? rc : 0;
}
RETURN(rc);
struct ll_close_queue *lcq = ll_i2sbi(inode)->ll_lcq;
if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
- CWARN("ino %lu/%u(flags %lu) som valid it just after "
- "recovery\n",
- inode->i_ino, inode->i_generation,
- lli->lli_flags);
+ CWARN("ino %lu/%u(flags %u) som valid it just after "
+ "recovery\n",
+ inode->i_ino, inode->i_generation,
+ lli->lli_flags);
/* DONE_WRITING is allowed and inode has no dirty page. */
cfs_spin_lock(&lcq->lcq_lock);
op_data->op_flags |= MF_SOM_CHANGE;
/* Check if Size-on-MDS attributes are valid. */
if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
- CERROR("ino %lu/%u(flags %lu) som valid it just after "
+ CERROR("ino %lu/%u(flags %u) som valid it just after "
"recovery\n", inode->i_ino, inode->i_generation,
lli->lli_flags);
LASSERT(op_data != NULL);
if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
- CERROR("ino %lu/%u(flags %lu) som valid it just after "
+ CERROR("ino %lu/%u(flags %u) som valid it just after "
"recovery\n", inode->i_ino, inode->i_generation,
lli->lli_flags);
* be sent to MDS. */
LLIF_SOM_DIRTY = (1 << 3),
/* File is contented */
- LLIF_CONTENDED = (1 << 4),
+ LLIF_CONTENDED = (1 << 4),
/* Truncate uses server lock for this file */
- LLIF_SRVLOCK = (1 << 5)
+ LLIF_SRVLOCK = (1 << 5),
};
struct ll_inode_info {
- int lli_inode_magic;
- cfs_semaphore_t lli_size_sem; /* protect open and change size */
- void *lli_size_sem_owner;
- cfs_semaphore_t lli_write_sem;
- cfs_rw_semaphore_t lli_trunc_sem;
- char *lli_symlink_name;
- __u64 lli_maxbytes;
- __u64 lli_ioepoch;
- unsigned long lli_flags;
-
- /* this lock protects posix_acl, pending_write_llaps, mmap_cnt */
- cfs_spinlock_t lli_lock;
- cfs_list_t lli_close_list;
- /* handle is to be sent to MDS later on done_writing and setattr.
- * Open handle data are needed for the recovery to reconstruct
- * the inode state on the MDS. XXX: recovery is not ready yet. */
- struct obd_client_handle *lli_pending_och;
-
- /* for writepage() only to communicate to fsync */
- int lli_async_rc;
- int lli_write_rc;
+ __u32 lli_inode_magic;
+ __u32 lli_flags;
+ __u64 lli_ioepoch;
- struct posix_acl *lli_posix_acl;
-
- /* remote permission hash */
- cfs_hlist_head_t *lli_remote_perms;
- unsigned long lli_rmtperm_utime;
- cfs_semaphore_t lli_rmtperm_sem;
-
- cfs_list_t lli_dead_list;
-
- cfs_semaphore_t lli_och_sem; /* Protects access to och pointers
- and their usage counters, also
- atomicity of check-update of
- lli_smd */
- /* We need all three because every inode may be opened in different
- modes */
- struct obd_client_handle *lli_mds_read_och;
- __u64 lli_open_fd_read_count;
- struct obd_client_handle *lli_mds_write_och;
- __u64 lli_open_fd_write_count;
- struct obd_client_handle *lli_mds_exec_och;
- __u64 lli_open_fd_exec_count;
+ cfs_spinlock_t lli_lock;
+ struct posix_acl *lli_posix_acl;
- struct inode lli_vfs_inode;
+ cfs_hlist_head_t *lli_remote_perms;
+ cfs_semaphore_t lli_rmtperm_sem;
/* identifying fields for both metadata and data stacks. */
- struct lu_fid lli_fid;
+ struct lu_fid lli_fid;
/* Parent fid for accessing default stripe data on parent directory
* for allocating OST objects after a mknod() and later open-by-FID. */
- struct lu_fid lli_pfid;
- struct lov_stripe_md *lli_smd;
+ struct lu_fid lli_pfid;
- /* fid capability */
+ cfs_list_t lli_close_list;
+ cfs_list_t lli_oss_capas;
/* open count currently used by capability only, indicate whether
* capability needs renewal */
- cfs_atomic_t lli_open_count;
- struct obd_capa *lli_mds_capa;
- cfs_list_t lli_oss_capas;
-
- /* metadata statahead */
- /* protect statahead stuff: lli_opendir_pid, lli_opendir_key, lli_sai,
- * and so on. */
- cfs_spinlock_t lli_sa_lock;
- /*
- * "opendir_pid" is the token when lookup/revalid -- I am the owner of
- * dir statahead.
- */
- pid_t lli_opendir_pid;
- /*
- * since parent-child threads can share the same @file struct,
- * "opendir_key" is the token when dir close for case of parent exit
- * before child -- it is me should cleanup the dir readahead. */
- void *lli_opendir_key;
- struct ll_statahead_info *lli_sai;
- __u64 lli_sa_pos;
- struct cl_object *lli_clob;
+ cfs_atomic_t lli_open_count;
+ struct obd_capa *lli_mds_capa;
+ cfs_time_t lli_rmtperm_time;
+
+ /* handle is to be sent to MDS later on done_writing and setattr.
+ * Open handle data are needed for the recovery to reconstruct
+ * the inode state on the MDS. XXX: recovery is not ready yet. */
+ struct obd_client_handle *lli_pending_och;
+
+ /* We need all three because every inode may be opened in different
+ * modes */
+ struct obd_client_handle *lli_mds_read_och;
+ struct obd_client_handle *lli_mds_write_och;
+ struct obd_client_handle *lli_mds_exec_och;
+ __u64 lli_open_fd_read_count;
+ __u64 lli_open_fd_write_count;
+ __u64 lli_open_fd_exec_count;
+ /* Protects access to och pointers and their usage counters, also
+ * atomicity of check-update of lli_smd */
+ cfs_semaphore_t lli_och_sem;
+
+ struct inode lli_vfs_inode;
+
/* the most recent timestamps obtained from mds */
- struct ost_lvb lli_lvb;
- /**
- * serialize normal readdir and statahead-readdir
+ struct ost_lvb lli_lvb;
+
+ /* Try to make the d::member and f::member are aligned. Before using
+ * these members, make clear whether it is directory or not. */
+ union {
+ /* for directory */
+ struct {
+ /* serialize normal readdir and statahead-readdir. */
+ cfs_semaphore_t d_readdir_sem;
+
+ /* metadata statahead */
+ /* since parent-child threads can share the same @file
+ * struct, "opendir_key" is the token when dir close for
+ * case of parent exit before child -- it is me should
+ * cleanup the dir readahead. */
+ void *d_opendir_key;
+ struct ll_statahead_info *d_sai;
+ __u64 d_sa_pos;
+ struct posix_acl *d_def_acl;
+ /* protect statahead stuff. */
+ cfs_spinlock_t d_sa_lock;
+ /* "opendir_pid" is the token when lookup/revalid
+ * -- I am the owner of dir statahead. */
+ pid_t d_opendir_pid;
+ } d;
+
+#define lli_readdir_sem u.d.d_readdir_sem
+#define lli_opendir_key u.d.d_opendir_key
+#define lli_sai u.d.d_sai
+#define lli_sa_pos u.d.d_sa_pos
+#define lli_def_acl u.d.d_def_acl
+#define lli_sa_lock u.d.d_sa_lock
+#define lli_opendir_pid u.d.d_opendir_pid
+
+ /* for non-directory */
+ struct {
+ cfs_semaphore_t f_size_sem;
+ void *f_size_sem_owner;
+ char *f_symlink_name;
+ __u64 f_maxbytes;
+ /*
+ * cfs_rw_semaphore_t {
+ * signed long count; // align u.d.d_def_acl
+ * cfs_spinlock_t wait_lock; // align u.d.d_sa_lock
+ * struct list_head wait_list;
+ * }
+ */
+ cfs_rw_semaphore_t f_trunc_sem;
+ cfs_semaphore_t f_write_sem;
+
+ /* for writepage() only to communicate to fsync */
+ int f_async_rc;
+ int f_write_rc;
+ } f;
+
+#define lli_size_sem u.f.f_size_sem
+#define lli_size_sem_owner u.f.f_size_sem_owner
+#define lli_symlink_name u.f.f_symlink_name
+#define lli_maxbytes u.f.f_maxbytes
+#define lli_trunc_sem u.f.f_trunc_sem
+#define lli_write_sem u.f.f_write_sem
+#define lli_async_rc u.f.f_async_rc
+#define lli_write_rc u.f.f_write_rc
+
+ } u;
+
+ /* XXX: For following frequent used members, although they maybe special
+ * used for non-directory object, it is some time-wasting to check
+ * whether the object is directory or not before using them. On the
+ * other hand, currently, sizeof(f) > sizeof(d), it cannot reduce
+ * the "ll_inode_info" size even if moving those members into u.f.
+ * So keep them out side.
+ *
+ * In the future, if more members are added only for directory,
+ * some of the following members can be moved into u.f.
*/
- cfs_semaphore_t lli_readdir_sem;
+ struct lov_stripe_md *lli_smd;
+ struct cl_object *lli_clob;
};
/*
/**
* Common IO arguments for various VFS I/O interfaces.
*/
-
int cl_sb_init(struct super_block *sb);
int cl_sb_fini(struct super_block *sb);
-int cl_inode_init(struct inode *inode, struct lustre_md *md);
-void cl_inode_fini(struct inode *inode);
-
enum cl_lock_mode vvp_mode_from_vma(struct vm_area_struct *vma);
void ll_io_init(struct cl_io *io, const struct file *file, int write);
void ll_lli_init(struct ll_inode_info *lli)
{
lli->lli_inode_magic = LLI_INODE_MAGIC;
- cfs_sema_init(&lli->lli_size_sem, 1);
- cfs_sema_init(&lli->lli_write_sem, 1);
- cfs_init_rwsem(&lli->lli_trunc_sem);
lli->lli_flags = 0;
- lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
+ lli->lli_ioepoch = 0;
cfs_spin_lock_init(&lli->lli_lock);
- CFS_INIT_LIST_HEAD(&lli->lli_close_list);
- lli->lli_inode_magic = LLI_INODE_MAGIC;
- cfs_sema_init(&lli->lli_och_sem, 1);
- lli->lli_mds_read_och = lli->lli_mds_write_och = NULL;
- lli->lli_mds_exec_och = NULL;
- lli->lli_open_fd_read_count = lli->lli_open_fd_write_count = 0;
- lli->lli_open_fd_exec_count = 0;
- CFS_INIT_LIST_HEAD(&lli->lli_dead_list);
+ lli->lli_posix_acl = NULL;
lli->lli_remote_perms = NULL;
- lli->lli_rmtperm_utime = 0;
cfs_sema_init(&lli->lli_rmtperm_sem, 1);
+ /* Do not set lli_fid, it has been initialized already. */
+ fid_zero(&lli->lli_pfid);
+ CFS_INIT_LIST_HEAD(&lli->lli_close_list);
CFS_INIT_LIST_HEAD(&lli->lli_oss_capas);
- cfs_spin_lock_init(&lli->lli_sa_lock);
+ cfs_atomic_set(&lli->lli_open_count, 0);
+ lli->lli_mds_capa = NULL;
+ lli->lli_rmtperm_time = 0;
+ lli->lli_pending_och = NULL;
+ lli->lli_mds_read_och = NULL;
+ lli->lli_mds_write_och = NULL;
+ lli->lli_mds_exec_och = NULL;
+ lli->lli_open_fd_read_count = 0;
+ lli->lli_open_fd_write_count = 0;
+ lli->lli_open_fd_exec_count = 0;
+ cfs_sema_init(&lli->lli_och_sem, 1);
+ lli->lli_smd = NULL;
lli->lli_clob = NULL;
- cfs_sema_init(&lli->lli_readdir_sem, 1);
- fid_zero(&lli->lli_pfid);
+
+ LASSERT(lli->lli_vfs_inode.i_mode != 0);
+ if (S_ISDIR(lli->lli_vfs_inode.i_mode)) {
+ cfs_sema_init(&lli->lli_readdir_sem, 1);
+ lli->lli_opendir_key = NULL;
+ lli->lli_sai = NULL;
+ lli->lli_sa_pos = 0;
+ lli->lli_def_acl = NULL;
+ cfs_spin_lock_init(&lli->lli_sa_lock);
+ lli->lli_opendir_pid = 0;
+ } else {
+ cfs_sema_init(&lli->lli_size_sem, 1);
+ lli->lli_size_sem_owner = NULL;
+ lli->lli_symlink_name = NULL;
+ lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
+ cfs_init_rwsem(&lli->lli_trunc_sem);
+ cfs_sema_init(&lli->lli_write_sem, 1);
+ lli->lli_async_rc = 0;
+ lli->lli_write_rc = 0;
+ }
}
static inline int ll_bdi_register(struct backing_dev_info *bdi)
if (S_ISDIR(inode->i_mode)) {
/* these should have been cleared in ll_file_release */
- LASSERT(lli->lli_sai == NULL);
LASSERT(lli->lli_opendir_key == NULL);
+ LASSERT(lli->lli_sai == NULL);
LASSERT(lli->lli_opendir_pid == 0);
}
if (lli->lli_mds_read_och)
ll_md_real_close(inode, FMODE_READ);
- if (lli->lli_symlink_name) {
+ if (S_ISLNK(inode->i_mode) && lli->lli_symlink_name) {
OBD_FREE(lli->lli_symlink_name,
strlen(lli->lli_symlink_name) + 1);
lli->lli_symlink_name = NULL;
UNLOCK_INODE_MUTEX(inode);
if (ia_valid & ATTR_SIZE)
UP_WRITE_I_ALLOC_SEM(inode);
- cfs_down_write(&lli->lli_trunc_sem);
+ if (!S_ISDIR(inode->i_mode))
+ cfs_down_write(&lli->lli_trunc_sem);
LOCK_INODE_MUTEX(inode);
if (ia_valid & ATTR_SIZE)
DOWN_WRITE_I_ALLOC_SEM(inode);
rc1 = ll_setattr_done_writing(inode, op_data, mod);
ll_finish_md_op_data(op_data);
}
- cfs_up_write(&lli->lli_trunc_sem);
+ if (!S_ISDIR(inode->i_mode))
+ cfs_up_write(&lli->lli_trunc_sem);
return rc ? rc : rc1;
}
struct ll_inode_info *lli;
struct lov_stripe_md *lsm;
+ LASSERT(!S_ISDIR(inode->i_mode));
+
lli = ll_i2info(inode);
LASSERT(lli->lli_size_sem_owner != current);
cfs_down(&lli->lli_size_sem);
LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
if (lsm != NULL) {
+ LASSERT(S_ISREG(inode->i_mode));
+
cfs_down(&lli->lli_och_sem);
if (lli->lli_smd == NULL) {
if (lsm->lsm_magic != LOV_MAGIC_V1 &&
}
CDEBUG(D_INODE, "adding lsm %p to inode %lu/%u(%p)\n",
lsm, inode->i_ino, inode->i_generation, inode);
- /* cl_inode_init must go before lli_smd or a race is
- * possible where client thinks the file has stripes,
+ /* cl_file_inode_init must go before lli_smd or a race
+ * is possible where client thinks the file has stripes,
* but lov raid0 is not setup yet and parallel e.g.
* glimpse would try to use uninitialized lov */
- cl_inode_init(inode, md);
+ cl_file_inode_init(inode, md);
cfs_spin_lock(&lli->lli_lock);
lli->lli_smd = lsm;
cfs_spin_unlock(&lli->lli_lock);
if (lli->lli_flags & (LLIF_DONE_WRITING |
LLIF_EPOCH_PENDING |
LLIF_SOM_DIRTY)) {
- CERROR("ino %lu flags %lu still has "
+ CERROR("ino %lu flags %u still has "
"size authority! do not trust "
"the size got from MDS\n",
inode->i_ino, lli->lli_flags);
CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
PFID(&lli->lli_fid), inode);
- ll_lli_init(lli);
-
LASSERT(!lli->lli_smd);
/* Core attributes from the MDS first. This is a new inode, and
}
lli->lli_fid = body->fid1;
+ if (unlikely(!(body->valid & OBD_MD_FLTYPE))) {
+ CERROR("Can not initialize inode "DFID" without object type: "
+ "valid = "LPX64"\n", PFID(&lli->lli_fid), body->valid);
+ return -EINVAL;
+ }
+
+ inode->i_mode = (inode->i_mode & ~S_IFMT) | (body->mode & S_IFMT);
+ if (unlikely(inode->i_mode == 0)) {
+ CERROR("Invalid inode "DFID" type\n", PFID(&lli->lli_fid));
+ return -EINVAL;
+ }
+
+ ll_lli_init(lli);
+
return 0;
}
ll_read_inode2(inode, md);
if (S_ISREG(inode->i_mode) &&
ll_i2info(inode)->lli_clob == NULL)
- rc = cl_inode_init(inode, md);
+ rc = cl_file_inode_init(inode, md);
if (rc != 0) {
md->lsm = NULL;
make_bad_inode(inode);
} else
unlock_new_inode(inode);
} else if (!(inode->i_state & (I_FREEING | I_CLEAR)))
- ll_update_inode(inode, md);
+ ll_update_inode(inode, md);
CDEBUG(D_VFSTRACE, "got inode: %p for "DFID"\n",
inode, PFID(&md->body->fid1));
}
lrp->lrp_fsgid = perm->rp_fsgid;
cfs_hlist_add_head(&lrp->lrp_list, head);
}
- lli->lli_rmtperm_utime = jiffies;
+ lli->lli_rmtperm_time = cfs_time_current();
cfs_spin_unlock(&lli->lli_lock);
CDEBUG(D_SEC, "new remote perm@%p: %u/%u/%u/%u - %#x\n",
struct ptlrpc_request *req = NULL;
struct mdt_remote_perm *perm;
struct obd_capa *oc;
- unsigned long utime;
+ cfs_time_t save;
int i = 0, rc;
ENTRY;
do {
- utime = lli->lli_rmtperm_utime;
+ save = lli->lli_rmtperm_time;
rc = do_check_remote_perm(lli, mask);
if (!rc || (rc != -ENOENT && i))
break;
cfs_down(&lli->lli_rmtperm_sem);
/* check again */
- if (utime != lli->lli_rmtperm_utime) {
+ if (save != lli->lli_rmtperm_time) {
rc = do_check_remote_perm(lli, mask);
if (!rc || (rc != -ENOENT && i)) {
cfs_up(&lli->lli_rmtperm_sem);
return NULL;
inode_init_once(&lli->lli_vfs_inode);
- ll_lli_init(lli);
-
return &lli->lli_vfs_inode;
}
GOTO(out, rc);
if (S_ISREG(inode->i_mode)) {
- struct ll_inode_info *lli = ll_i2info(inode);
- struct lov_stripe_md *lsm = NULL;
- lsm = lli->lli_smd;
- if (lsm == NULL)
+ if (ll_i2info(inode)->lli_smd == NULL)
rc2 = -1;
} else if (S_ISDIR(inode->i_mode)) {
rc2 = ll_dir_getstripe(inode, &lmm, &lmmsize, &request);