bool cl_is_released;
};
+enum coo_inode_opc {
+ COIO_INODE_LOCK,
+ COIO_INODE_UNLOCK,
+ COIO_SIZE_LOCK,
+ COIO_SIZE_UNLOCK,
+};
+
/**
* Operations implemented for each cl object layer.
*
int (*coo_object_flush)(const struct lu_env *env,
struct cl_object *obj,
struct ldlm_lock *lock);
+ /**
+ * operate upon inode. Used in LOV to lock/unlock inode from vvp layer.
+ */
+ int (*coo_inode_ops)(const struct lu_env *env, struct cl_object *obj,
+ enum coo_inode_opc opc, void *data);
};
/**
loff_t cl_object_maxbytes(struct cl_object *obj);
int cl_object_flush(const struct lu_env *env, struct cl_object *obj,
struct ldlm_lock *lock);
+int cl_object_inode_ops(const struct lu_env *env, struct cl_object *obj,
+ enum coo_inode_opc opc, void *data);
/**
loff_t ret = -EINVAL;
ENTRY;
- inode_lock(inode);
+ ll_inode_lock(inode);
switch (origin) {
case SEEK_SET:
break;
GOTO(out, ret);
out:
- inode_unlock(inode);
+ ll_inode_unlock(inode);
return ret;
}
op_data->op_open_handle = och->och_open_handle;
if (och->och_flags & FMODE_WRITE &&
- test_and_clear_bit(LLIF_DATA_MODIFIED, &ll_i2info(inode)->lli_flags))
+ test_and_clear_bit(LLIF_DATA_MODIFIED,
+ &ll_i2info(inode)->lli_flags))
/* For HSM: if inode data has been modified, pack it so that
* MDT can set data dirty flag in the archive. */
op_data->op_bias |= MDS_DATA_MODIFIED;
RETURN(0);
if (unlikely(lock_inode))
- inode_lock(inode);
+ ll_inode_lock(inode);
result = __generic_file_write_iter(iocb, iter);
if (unlikely(lock_inode))
- inode_unlock(inode);
+ ll_inode_unlock(inode);
/* If the page is not already dirty, ll_tiny_write_begin returns
* -ENODATA. We continue on to normal write.
if (enckey == 0 || nameenc == 0)
continue;
- inode_lock(parent);
+ ll_inode_lock(parent);
de = lookup_one_len(p, de_parent, len);
- inode_unlock(parent);
+ ll_inode_unlock(parent);
if (IS_ERR_OR_NULL(de) || !de->d_inode) {
dput(de_parent);
rc = -ENODATA;
ATTR_ATIME | ATTR_ATIME_SET;
inode_lock(inode);
-
+ /* inode lock owner set in ll_setattr_raw()*/
rc = ll_setattr_raw(file_dentry(file), attr, 0, true);
if (rc == -ENODATA)
rc = 0;
-
inode_unlock(inode);
out:
RETURN(-EINVAL);
inode_lock(inode);
+ /* inode lock owner set in ll_setattr_raw()*/
rc = ll_setattr_raw(file_dentry(file), &ia, OP_XVALID_CTIME_SET,
false);
inode_unlock(inode);
/* apply child dentry if name is valid */
name_len = strnlen(lu_project.project_name, NAME_MAX);
if (name_len > 0 && name_len <= NAME_MAX) {
- inode_lock(inode);
+ ll_inode_lock(inode);
child_dentry = lookup_one_len(lu_project.project_name,
dentry, name_len);
- inode_unlock(inode);
+ ll_inode_unlock(inode);
if (IS_ERR(child_dentry)) {
rc = PTR_ERR(child_dentry);
goto out;
if (IS_ERR(op_data))
GOTO(out_iput, rc = PTR_ERR(op_data));
- inode_lock(child_inode);
+ ll_inode_lock(child_inode);
op_data->op_fid3 = *ll_inode2fid(child_inode);
if (!fid_is_sane(&op_data->op_fid3)) {
CERROR("%s: migrate %s, but FID "DFID" is insane\n",
if (!rc)
clear_nlink(child_inode);
out_unlock:
- inode_unlock(child_inode);
+ ll_inode_unlock(child_inode);
ll_finish_md_op_data(op_data);
out_iput:
iput(child_inode);
/* for non-directory */
struct {
struct mutex lli_size_mutex;
- char *lli_symlink_name;
+ struct task_struct *lli_size_lock_owner;
+ char *lli_symlink_name;
struct ll_trunc_sem lli_trunc_sem;
struct range_lock_tree lli_write_tree;
struct mutex lli_setattr_mutex;
struct list_head lli_xattrs; /* ll_xattr_entry->xe_list */
struct list_head lli_lccs; /* list of ll_cl_context */
seqlock_t lli_page_inv_lock;
+
+ struct task_struct *lli_inode_lock_owner;
};
#ifndef HAVE_USER_NAMESPACE_ARG
spin_unlock(&lli->lli_layout_lock);
}
-enum ll_file_flags {
+enum ll_inode_flags {
/* File data is modified. */
LLIF_DATA_MODIFIED = 0,
/* File is being restored */
LLIF_UPDATE_ATIME = 4,
/* foreign file/dir can be unlinked unconditionnaly */
LLIF_FOREIGN_REMOVABLE = 5,
+ /* 6 is not used for now */
/* Xattr cache is filled */
LLIF_XATTR_CACHE_FILLED = 7,
return ll_i2info(inode)->lli_pcc_inode;
}
+static inline void ll_set_inode_lock_owner(struct inode *inode)
+{
+ ll_i2info(inode)->lli_inode_lock_owner = current;
+}
+
+static inline void ll_clear_inode_lock_owner(struct inode *inode)
+{
+ ll_i2info(inode)->lli_inode_lock_owner = NULL;
+}
+
+static inline struct task_struct *ll_get_inode_lock_owner(struct inode *inode)
+{
+ return ll_i2info(inode)->lli_inode_lock_owner;
+}
+
+/* lock inode and set inode lock owener */
+static inline void ll_inode_lock(struct inode *inode)
+{
+ inode_lock(inode);
+ ll_set_inode_lock_owner(inode);
+}
+
+/* clear inode lock owner and unlock it */
+static inline void ll_inode_unlock(struct inode *inode)
+{
+ ll_clear_inode_lock_owner(inode);
+ inode_unlock(inode);
+}
+
/* default to use at least 16M for fast read if possible */
#define RA_REMAIN_WINDOW_MIN MiB_TO_PAGES(16UL)
void ll_update_inode_flags(struct inode *inode, unsigned int ext_flags);
void ll_update_dir_depth_dmv(struct inode *dir, struct dentry *de);
int ll_read_inode2(struct inode *inode, void *opaque);
-void ll_truncate_inode_pages_final(struct inode *inode, struct cl_io *io);
+void ll_truncate_inode_pages_final(struct inode *inode);
void ll_delete_inode(struct inode *inode);
int ll_iocontrol(struct inode *inode, struct file *file,
unsigned int cmd, void __user *uarg);
/* ll_cl_context initialize */
INIT_LIST_HEAD(&lli->lli_lccs);
seqlock_init(&lli->lli_page_inv_lock);
+ lli->lli_inode_lock_owner = NULL;
}
#define MAX_STRING_SIZE 128
* cache is not cleared yet. */
op_data->op_attr.ia_valid &= ~(TIMES_SET_FLAGS | ATTR_SIZE);
if (S_ISREG(inode->i_mode))
- inode_lock(inode);
+ ll_inode_lock(inode);
rc = simple_setattr(&init_user_ns, dentry, &op_data->op_attr);
if (S_ISREG(inode->i_mode))
- inode_unlock(inode);
+ ll_inode_unlock(inode);
op_data->op_attr.ia_valid = ia_valid;
rc = ll_update_inode(inode, &md);
ENTRY;
+ /* VFS has locked the inode before calling this */
+ ll_set_inode_lock_owner(inode);
+
CDEBUG(D_VFSTRACE, "%s: setattr inode "DFID"(%p) from %llu to %llu, "
"valid %x, hsm_import %d\n",
ll_i2sbi(inode)->ll_fsname, PFID(&lli->lli_fid),
hsm_import);
if (attr->ia_valid & ATTR_SIZE) {
- /* Check new size against VFS/VM file size limit and rlimit */
- rc = inode_newsize_ok(inode, attr->ia_size);
- if (rc)
- RETURN(rc);
-
- /* The maximum Lustre file size is variable, based on the
- * OST maximum object size and number of stripes. This
- * needs another check in addition to the VFS check above. */
- if (attr->ia_size > ll_file_maxbytes(inode)) {
+ /* Check new size against VFS/VM file size limit and rlimit */
+ rc = inode_newsize_ok(inode, attr->ia_size);
+ if (rc)
+ GOTO(clear, rc);
+
+ /* The maximum Lustre file size is variable, based on the
+ * OST maximum object size and number of stripes. This
+ * needs another check in addition to the VFS check above. */
+ if (attr->ia_size > ll_file_maxbytes(inode)) {
CDEBUG(D_INODE,"file "DFID" too large %llu > %llu\n",
- PFID(&lli->lli_fid), attr->ia_size,
- ll_file_maxbytes(inode));
- RETURN(-EFBIG);
- }
+ PFID(&lli->lli_fid), attr->ia_size,
+ ll_file_maxbytes(inode));
+ GOTO(clear, rc = -EFBIG);
+ }
- attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
- }
+ attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
+ }
/* POSIX: check before ATTR_*TIME_SET set (from inode_change_ok) */
if (attr->ia_valid & TIMES_SET_FLAGS) {
if ((!uid_eq(current_fsuid(), inode->i_uid)) &&
!capable(CAP_FOWNER))
- RETURN(-EPERM);
+ GOTO(clear, rc = -EPERM);
}
/* We mark all of the fields "set" so MDS/OST does not re-set them */
if (!(attr->ia_valid & ATTR_MTIME_SET) &&
(attr->ia_valid & ATTR_MTIME)) {
attr->ia_mtime = current_time(inode);
- attr->ia_valid |= ATTR_MTIME_SET;
- }
+ attr->ia_valid |= ATTR_MTIME_SET;
+ }
if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
CDEBUG(D_INODE, "setting mtime %lld, ctime %lld, now = %lld\n",
ktime_get_real_seconds());
if (S_ISREG(inode->i_mode))
- inode_unlock(inode);
+ ll_inode_unlock(inode);
/* We always do an MDS RPC, even if we're only changing the size;
* only the MDS knows whether truncate() should fail with -ETXTBUSY */
ll_finish_md_op_data(op_data);
if (S_ISREG(inode->i_mode)) {
- inode_lock(inode);
+ ll_inode_lock(inode);
if ((attr->ia_valid & ATTR_SIZE) && !hsm_import)
inode_dio_wait(inode);
/* Once we've got the i_mutex, it's safe to set the S_NOSEC
ll_stats_ops_tally(ll_i2sbi(inode), attr->ia_valid & ATTR_SIZE ?
LPROC_LL_TRUNC : LPROC_LL_SETATTR,
ktime_us_delta(ktime_get(), kstart));
+clear:
+ ll_clear_inode_lock_owner(inode);
RETURN(rc);
}
lli = ll_i2info(inode);
mutex_lock(&lli->lli_size_mutex);
+ lli->lli_size_lock_owner = current;
}
void ll_inode_size_unlock(struct inode *inode)
struct ll_inode_info *lli;
lli = ll_i2info(inode);
+ lli->lli_size_lock_owner = NULL;
mutex_unlock(&lli->lli_size_mutex);
}
lli->lli_inherit_depth);
}
-void ll_truncate_inode_pages_final(struct inode *inode, struct cl_io *io)
+void ll_truncate_inode_pages_final(struct inode *inode)
{
struct address_space *mapping = &inode->i_data;
unsigned long nrpages;
unsigned long flags;
- LASSERTF(io == NULL || inode_is_locked(inode), "io %p (type %d)\n",
- io, io ? io->ci_type : 0);
+ LASSERTF((inode->i_state & I_FREEING) || inode_is_locked(inode),
+ DFID ":inode %p state %#lx, lli_flags %#lx\n",
+ PFID(ll_inode2fid(inode)), inode, inode->i_state,
+ ll_i2info(inode)->lli_flags);
truncate_inode_pages_final(mapping);
} /* Workaround end */
LASSERTF(nrpages == 0, "%s: inode="DFID"(%p) nrpages=%lu "
- "io %p (io_type %d), "
+ "state %#lx, lli_flags %#lx, "
"see https://jira.whamcloud.com/browse/LU-118\n",
ll_i2sbi(inode)->ll_fsname,
PFID(ll_inode2fid(inode)), inode, nrpages,
- io, io ? io->ci_type : 0);
+ inode->i_state, ll_i2info(inode)->lli_flags);
}
int ll_read_inode2(struct inode *inode, void *opaque)
CL_FSYNC_LOCAL : CL_FSYNC_DISCARD, 1);
}
- ll_truncate_inode_pages_final(inode, NULL);
+ ll_truncate_inode_pages_final(inode);
ll_clear_inode(inode);
clear_inode(inode);
if (IS_ERR(op_data))
GOTO(out, rc = PTR_ERR(op_data));
- inode_lock(dir);
+ ll_inode_lock(dir);
#ifdef HAVE_DIR_CONTEXT
rc = ll_dir_read(dir, &pos, op_data, &lgd.ctx, NULL);
#else
rc = ll_dir_read(dir, &pos, op_data, &lgd, ll_nfs_get_name_filldir,
NULL);
#endif
- inode_unlock(dir);
+ ll_inode_unlock(dir);
ll_finish_md_op_data(op_data);
if (!rc && !lgd.lgd_found)
rc = -ENOENT;
unsigned int flags)
{
struct lookup_intent *itp, it = { .it_op = IT_GETATTR };
- struct dentry *de;
+ struct dentry *de = NULL;
+
+ /* VFS has locked the inode before calling this */
+ ll_set_inode_lock_owner(parent);
CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), flags=%u\n",
dentry, PFID(ll_inode2fid(parent)), parent, flags);
if ((flags & LOOKUP_CREATE) && !(flags & LOOKUP_OPEN) &&
(inode_permission(&init_user_ns,
parent, MAY_WRITE | MAY_EXEC) == 0))
- return NULL;
+ goto clear;
if (flags & (LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE))
itp = NULL;
if (itp != NULL)
ll_intent_release(itp);
+clear:
+ ll_clear_inode_lock_owner(parent);
+
return de;
}
int rc = 0;
ENTRY;
+ /* VFS has locked the inode before calling this */
+ ll_set_inode_lock_owner(dir);
+
CDEBUG(D_VFSTRACE,
"VFS Op:name=%pd, dir="DFID"(%p), file %p, open_flags %x, mode %x opened %d\n",
dentry, PFID(ll_inode2fid(dir)), dir, file, open_flags, mode,
* Either way it's a valid race to just return -ENOENT here.
*/
if (!(open_flags & O_CREAT))
- return -ENOENT;
+ GOTO(clear, rc = -ENOENT);
/* Otherwise we just unhash it to be rehashed afresh via
* lookup if necessary
OBD_ALLOC(it, sizeof(*it));
if (!it)
- RETURN(-ENOMEM);
+ GOTO(clear, rc = -ENOMEM);
it->it_op = IT_OPEN;
if (open_flags & O_CREAT) {
out_release:
ll_intent_release(it);
OBD_FREE(it, sizeof(*it));
+clear:
+ ll_clear_inode_lock_owner(dir);
RETURN(rc);
}
int err;
ENTRY;
+ /* VFS has locked the inode before calling this */
+ ll_set_inode_lock_owner(dir);
+
CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p) mode %o dev %x\n",
dchild, PFID(ll_inode2fid(dir)), dir, mode, rdev);
if (!err)
ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_MKNOD,
ktime_us_delta(ktime_get(), kstart));
+ ll_clear_inode_lock_owner(dir);
RETURN(err);
}
ktime_t kstart = ktime_get();
int rc;
+ /* VFS has locked the inode before calling this */
+ ll_set_inode_lock_owner(dir);
+
CFS_FAIL_TIMEOUT(OBD_FAIL_LLITE_CREATE_FILE_PAUSE, cfs_fail_val);
CDEBUG(D_VFSTRACE,
ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_CREATE,
ktime_us_delta(ktime_get(), kstart));
+ ll_clear_inode_lock_owner(dir);
+
return rc;
}
int err;
ENTRY;
+ /* VFS has locked the inode before calling this */
+ ll_set_inode_lock_owner(dir);
+
CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), target=%.*s\n",
dchild, PFID(ll_inode2fid(dir)), dir, 3000, oldpath);
err = llcrypt_prepare_symlink(dir, oldpath, len, dir->i_sb->s_blocksize,
&disk_link);
if (err)
- RETURN(err);
+ GOTO(out, err);
err = ll_new_node(dir, dchild, oldpath, S_IFLNK | S_IRWXUGO,
(__u64)&disk_link, LUSTRE_OPC_SYMLINK);
ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_SYMLINK,
ktime_us_delta(ktime_get(), kstart));
+out:
+ ll_clear_inode_lock_owner(dir);
+
RETURN(err);
}
int err;
ENTRY;
+ /* VFS has locked the inodes before calling this */
+ ll_set_inode_lock_owner(src);
+ ll_set_inode_lock_owner(dir);
+
CDEBUG(D_VFSTRACE,
"VFS Op: inode="DFID"(%p), dir="DFID"(%p), target=%pd\n",
PFID(ll_inode2fid(src)), src,
err = llcrypt_prepare_link(old_dentry, dir, new_dentry);
if (err)
- RETURN(err);
+ GOTO(clear, err);
op_data = ll_prep_md_op_data(NULL, src, dir, name->name, name->len,
0, LUSTRE_OPC_ANY, NULL);
if (IS_ERR(op_data))
- RETURN(PTR_ERR(op_data));
+ GOTO(clear, err = PTR_ERR(op_data));
err = md_link(sbi->ll_md_exp, op_data, &request);
ll_finish_md_op_data(op_data);
EXIT;
out:
ptlrpc_req_finished(request);
+clear:
+ ll_clear_inode_lock_owner(src);
+ ll_clear_inode_lock_owner(dir);
+
RETURN(err);
}
int err;
ENTRY;
+ /* VFS has locked the inode before calling this */
+ ll_set_inode_lock_owner(dir);
+
CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p)\n",
dchild, PFID(ll_inode2fid(dir)), dir);
ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_MKDIR,
ktime_us_delta(ktime_get(), kstart));
+ ll_clear_inode_lock_owner(dir);
+
RETURN(err);
}
ENTRY;
+ /* VFS has locked the inodes before calling this */
+ ll_set_inode_lock_owner(dir);
+ ll_set_inode_lock_owner(dchild->d_inode);
+
CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p)\n",
dchild, PFID(ll_inode2fid(dir)), dir);
if (unlikely(d_mountpoint(dchild)))
- RETURN(-EBUSY);
+ GOTO(out, rc = -EBUSY);
/* some foreign dir may not be allowed to be removed */
if (!ll_foreign_is_removable(dchild, false))
- RETURN(-EPERM);
+ GOTO(out, rc = -EPERM);
op_data = ll_prep_md_op_data(NULL, dir, NULL, name->name, name->len,
S_IFDIR, LUSTRE_OPC_ANY, NULL);
if (IS_ERR(op_data))
- RETURN(PTR_ERR(op_data));
+ GOTO(out, rc = PTR_ERR(op_data));
if (dchild->d_inode != NULL)
op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
}
ptlrpc_req_finished(request);
+out:
+ ll_clear_inode_lock_owner(dir);
+ ll_clear_inode_lock_owner(dchild->d_inode);
RETURN(rc);
}
ENTRY;
+ /* VFS has locked the inodes before calling this */
+ ll_set_inode_lock_owner(dir);
+ ll_set_inode_lock_owner(dchild->d_inode);
+
CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p)\n",
dchild, PFID(ll_inode2fid(dir)), dir);
* just check it as vfs_unlink does.
*/
if (unlikely(d_mountpoint(dchild)))
- RETURN(-EBUSY);
+ GOTO(clear, rc = -EBUSY);
/* some foreign file/dir may not be allowed to be unlinked */
if (!ll_foreign_is_removable(dchild, false))
- RETURN(-EPERM);
+ GOTO(clear, rc = -EPERM);
op_data = ll_prep_md_op_data(NULL, dir, NULL, name->name, name->len, 0,
LUSTRE_OPC_ANY, NULL);
if (IS_ERR(op_data))
- RETURN(PTR_ERR(op_data));
+ GOTO(clear, rc = PTR_ERR(op_data));
op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
/* notify lower layer if inode has dirty pages */
if (!rc)
ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_UNLINK,
ktime_us_delta(ktime_get(), kstart));
+clear:
+ ll_clear_inode_lock_owner(dir);
+ ll_clear_inode_lock_owner(dchild->d_inode);
RETURN(rc);
}
int err;
ENTRY;
+ /* VFS has locked the inodes before calling this */
+ ll_set_inode_lock_owner(src);
+ ll_set_inode_lock_owner(tgt);
+ if (tgt_dchild->d_inode)
+ ll_set_inode_lock_owner(tgt_dchild->d_inode);
+
#if defined(HAVE_USER_NAMESPACE_ARG) || defined(HAVE_IOPS_RENAME_WITH_FLAGS)
if (flags)
- return -EINVAL;
+ GOTO(out, err = -EINVAL);
#endif
CDEBUG(D_VFSTRACE,
tgt_dchild, PFID(ll_inode2fid(tgt)), tgt);
if (unlikely(d_mountpoint(src_dchild) || d_mountpoint(tgt_dchild)))
- RETURN(-EBUSY);
+ GOTO(out, err = -EBUSY);
#if defined(HAVE_USER_NAMESPACE_ARG) || defined(HAVE_IOPS_RENAME_WITH_FLAGS)
err = llcrypt_prepare_rename(src, src_dchild, tgt, tgt_dchild, flags);
err = llcrypt_prepare_rename(src, src_dchild, tgt, tgt_dchild, 0);
#endif
if (err)
- RETURN(err);
+ GOTO(out, err);
/* we prevent an encrypted file from being renamed
* into an unencrypted dir
*/
if (IS_ENCRYPTED(src) && !IS_ENCRYPTED(tgt))
- RETURN(-EXDEV);
+ GOTO(out, err = -EXDEV);
if (src_dchild->d_inode)
mode = src_dchild->d_inode->i_mode;
op_data = ll_prep_md_op_data(NULL, src, tgt, NULL, 0, mode,
LUSTRE_OPC_ANY, NULL);
if (IS_ERR(op_data))
- RETURN(PTR_ERR(op_data));
+ GOTO(out, err = PTR_ERR(op_data));
/* If the client is using a subdir mount and does a rename to what it
* sees as /.fscrypt, interpret it as the .fscrypt dir at fs root.
err = ll_setup_filename(src, &src_dchild->d_name, 1, &foldname, NULL);
if (err)
- RETURN(err);
+ GOTO(out, err);
err = ll_setup_filename(tgt, &tgt_dchild->d_name, 1, &fnewname, NULL);
if (err) {
llcrypt_free_filename(&foldname);
- RETURN(err);
+ GOTO(out, err);
}
err = md_rename(sbi->ll_md_exp, op_data,
foldname.disk_name.name, foldname.disk_name.len,
ll_stats_ops_tally(sbi, LPROC_LL_RENAME,
ktime_us_delta(ktime_get(), kstart));
}
-
+out:
+ ll_clear_inode_lock_owner(src);
+ ll_clear_inode_lock_owner(tgt);
+ if (tgt_dchild->d_inode)
+ ll_clear_inode_lock_owner(tgt_dchild->d_inode);
RETURN(err);
}
iter = *vio->vui_iter;
if (unlikely(lock_inode))
- inode_lock(inode);
+ ll_inode_lock(inode);
result = __generic_file_write_iter(vio->vui_iocb, &iter);
if (unlikely(lock_inode))
- inode_unlock(inode);
+ ll_inode_unlock(inode);
written = result;
if (result > 0)
struct inode *inode = vvp_object_inode(io->ci_obj);
__u64 start = io->u.ci_lseek.ls_start;
- inode_lock(inode);
+ ll_inode_lock(inode);
inode_dio_wait(inode);
/* At the moment we have DLM lock so just update inode
if (io->u.ci_lseek.ls_result > i_size_read(inode))
io->u.ci_lseek.ls_result = -ENXIO;
- inode_unlock(inode);
+ ll_inode_unlock(inode);
}
static const struct cl_io_operations vvp_io_ops = {
static int vvp_prune(const struct lu_env *env, struct cl_object *obj)
{
- struct cl_io *io = vvp_env_io(env)->vui_cl.cis_io;
struct inode *inode = vvp_object_inode(obj);
int rc;
ENTRY;
RETURN(rc);
}
- if (io != NULL)
- inode_lock(inode);
+ if (ll_get_inode_lock_owner(inode) != current)
+ /* ask LOV get inode lock then lo_type_guard */
+ RETURN(-EAGAIN);
- ll_truncate_inode_pages_final(inode, io);
- mapping_clear_exiting(inode->i_mapping);
+ LASSERTF(inode_is_locked(inode), DFID ":inode %p lli_flags %#lx\n",
+ PFID(lu_object_fid(&obj->co_lu)), inode,
+ ll_i2info(inode)->lli_flags);
- if (io != NULL)
- inode_unlock(inode);
+ ll_truncate_inode_pages_final(inode);
+ mapping_clear_exiting(inode->i_mapping);
RETURN(0);
}
memcpy(attr->cra_jobid, &lli->lli_jobid, sizeof(attr->cra_jobid));
}
+static int vvp_inode_ops(const struct lu_env *env, struct cl_object *obj,
+ enum coo_inode_opc opc, void *data)
+{
+ struct inode *inode = vvp_object_inode(obj);
+ int rc = 0;
+
+ ENTRY;
+ switch (opc) {
+ case COIO_INODE_LOCK:
+ if (ll_get_inode_lock_owner(inode) != current)
+ ll_inode_lock(inode);
+ else
+ rc = -EALREADY;
+ break;
+ case COIO_INODE_UNLOCK:
+ if (ll_get_inode_lock_owner(inode) == current)
+ ll_inode_unlock(inode);
+ else
+ rc = -ENOLCK;
+ break;
+ case COIO_SIZE_LOCK:
+ if (ll_i2info(inode)->lli_size_lock_owner != current)
+ ll_inode_size_lock(inode);
+ else
+ rc = -EALREADY;
+ break;
+ case COIO_SIZE_UNLOCK:
+ if (ll_i2info(inode)->lli_size_lock_owner == current)
+ ll_inode_size_unlock(inode);
+ else
+ rc = -ENOLCK;
+ break;
+ default:
+ rc = -EINVAL;
+ break;
+ }
+
+ RETURN(rc);
+}
+
static const struct cl_object_operations vvp_ops = {
.coo_page_init = vvp_page_init,
.coo_io_init = vvp_io_init,
.coo_conf_set = vvp_conf_set,
.coo_prune = vvp_prune,
.coo_glimpse = vvp_object_glimpse,
- .coo_req_attr_set = vvp_req_attr_set
+ .coo_req_attr_set = vvp_req_attr_set,
+ .coo_inode_ops = vvp_inode_ops,
};
static int vvp_object_init0(const struct lu_env *env,
int rc;
ENTRY;
+ /* VFS has locked the inode before calling this */
+ ll_set_inode_lock_owner(inode);
+
/* When setxattr() is called with a size of 0 the value is
* unconditionally replaced by "". When removexattr() is
* called we get a NULL value and XATTR_REPLACE for flags. */
/* FIXME: enable IMA when the conditions are ready */
if (handler->flags == XATTR_SECURITY_T &&
(!strcmp(name, "ima") || !strcmp(name, "evm")))
- RETURN(-EOPNOTSUPP);
+ GOTO(out, rc = -EOPNOTSUPP);
rc = xattr_type_filter(sbi, handler);
if (rc)
- RETURN(rc);
+ GOTO(out, rc);
if ((handler->flags == XATTR_ACL_ACCESS_T ||
handler->flags == XATTR_ACL_DEFAULT_T) &&
!inode_owner_or_capable(mnt_userns, inode))
- RETURN(-EPERM);
+ GOTO(out, rc = -EPERM);
/* b10667: ignore lustre special xattr for now */
if (!strcmp(name, "hsm") ||
((handler->flags == XATTR_TRUSTED_T && !strcmp(name, "lov")) ||
(handler->flags == XATTR_LUSTRE_T && !strcmp(name, "lov"))))
- RETURN(0);
+ GOTO(out, rc = 0);
rc = ll_security_secctx_name_filter(sbi, handler->flags, name);
if (rc)
- RETURN(rc);
+ GOTO(out, rc);
/*
* In user.* namespace, only regular files and directories can have
*/
if (handler->flags == XATTR_USER_T) {
if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
- RETURN(-EPERM);
+ GOTO(out, rc = -EPERM);
}
/* This check is required for compatibility with 2.14, in which
* context is set directly in the create request.
*/
if (handler->flags == XATTR_SECURITY_T && strcmp(name, "c") == 0)
- RETURN(-EPERM);
+ GOTO(out, rc = -EPERM);
fullname = kasprintf(GFP_KERNEL, "%s%s", xattr_prefix(handler), name);
if (!fullname)
- RETURN(-ENOMEM);
+ GOTO(out, rc = -ENOMEM);
rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid, fullname,
pv, size, flags, ll_i2suppgid(inode), &req);
LCONSOLE_INFO("Disabling user_xattr feature because it is not supported on the server\n");
clear_bit(LL_SBI_USER_XATTR, sbi->ll_flags);
}
- RETURN(rc);
+ GOTO(out, rc);
}
ll_i2info(inode)->lli_synced_to_mds = false;
ll_stats_ops_tally(ll_i2sbi(inode), valid == OBD_MD_FLXATTRRM ?
LPROC_LL_REMOVEXATTR : LPROC_LL_SETXATTR,
ktime_us_delta(ktime_get(), kstart));
+out:
+ ll_clear_inode_lock_owner(inode);
- RETURN(0);
+ RETURN(rc);
}
static int get_hsm_state(struct inode *inode, u32 *hus_states)
ktime_t kstart = ktime_get();
int op_type = flags == XATTR_REPLACE ? LPROC_LL_REMOVEXATTR :
LPROC_LL_SETXATTR;
- int rc;
+ int rc = 0;
LASSERT(inode);
LASSERT(name);
+ /* VFS has locked the inode before calling this */
+ ll_set_inode_lock_owner(inode);
+
CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), xattr %s\n",
PFID(ll_inode2fid(inode)), inode, name);
size);
ll_stats_ops_tally(ll_i2sbi(inode), op_type,
ktime_us_delta(ktime_get(), kstart));
- return rc;
+ goto out;
} else if (!strcmp(name, "lma") || !strcmp(name, "link")) {
ll_stats_ops_tally(ll_i2sbi(inode), op_type,
ktime_us_delta(ktime_get(), kstart));
- return 0;
+ goto out;
}
if (strncmp(name, "lov.", 4) == 0 &&
le32_to_cpu(LOV_MAGIC_MASK)) == le32_to_cpu(LOV_MAGIC_MAGIC))
lustre_swab_lov_user_md((struct lov_user_md *)value, 0);
- return ll_xattr_set_common(handler, mnt_userns, dentry, inode, name,
- value, size, flags);
+ rc = ll_xattr_set_common(handler, mnt_userns, dentry, inode, name,
+ value, size, flags);
+out:
+ ll_clear_inode_lock_owner(inode);
+
+ return rc;
}
int ll_xattr_list(struct inode *inode, const char *name, int type, void *buffer,
LASSERT(r0->lo_sub[idx] == NULL);
}
-static void lov_delete_raid0(const struct lu_env *env, struct lov_object *lov,
+static int lov_delete_raid0(const struct lu_env *env, struct lov_object *lov,
struct lov_layout_entry *lle)
{
struct lov_layout_raid0 *r0 = &lle->lle_raid0;
+ int rc;
ENTRY;
struct lovsub_object *los = r0->lo_sub[i];
if (los != NULL) {
- cl_object_prune(env, &los->lso_cl);
+ rc = cl_object_prune(env, &los->lso_cl);
+ if (rc)
+ RETURN(rc);
/*
* If top-level object is to be evicted from
* the cache, so are its sub-objects.
}
}
- EXIT;
+ RETURN(0);
}
static void lov_fini_raid0(const struct lu_env *env,
union lov_layout_state *state)
{
struct lov_layout_entry *entry;
+ int rc;
ENTRY;
if (entry->lle_lsme && lsme_is_foreign(entry->lle_lsme))
continue;
- lov_delete_raid0(env, lov, entry);
+ rc = lov_delete_raid0(env, lov, entry);
+ if (rc)
+ RETURN(rc);
}
RETURN(0);
struct lov_stripe_md *lsm = NULL;
struct lov_object *lov = cl2lov(obj);
int result = 0;
+ struct cl_object *top = cl_object_top(obj);
+ bool unlock_inode = false;
+ bool lock_inode_size = false;
ENTRY;
if (conf->coc_opc == OBJECT_CONF_SET &&
GOTO(out_lsm, result = 0);
}
+retry:
lov_conf_lock(lov);
if (conf->coc_opc == OBJECT_CONF_WAIT) {
if (test_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags) &&
}
result = lov_layout_change(env, lov, lsm, conf);
- if (result)
+ if (result) {
+ if (result == -EAGAIN) {
+ /**
+ * we need unlocked lov conf and get inode lock.
+ * It's possible we have already taken inode's size
+ * mutex, so we need keep such lock order, lest deadlock
+ * happens:
+ * inode lock (ll_inode_lock())
+ * inode size lock (ll_inode_size_lock())
+ * lov conf lock (lov_conf_lock())
+ *
+ * e.g.
+ * vfs_setxattr inode locked
+ * ll_lov_setstripe_ea_info inode size locked
+ * ll_prep_inode
+ * ll_file_inode_init
+ * cl_conf_set
+ * lov_conf_set lov conf locked
+ */
+ lov_conf_unlock(lov);
+ if (cl_object_inode_ops(
+ env, top, COIO_SIZE_UNLOCK, NULL) == 0)
+ lock_inode_size = true;
+
+ /* take lock in order */
+ if (cl_object_inode_ops(
+ env, top, COIO_INODE_LOCK, NULL) == 0)
+ unlock_inode = true;
+ if (lock_inode_size)
+ cl_object_inode_ops(
+ env, top, COIO_SIZE_LOCK, NULL);
+ goto retry;
+ }
set_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags);
- else
+ } else {
clear_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags);
+ }
EXIT;
out:
lov_conf_unlock(lov);
+ if (unlock_inode)
+ cl_object_inode_ops(env, top, COIO_INODE_UNLOCK, NULL);
out_lsm:
lov_lsm_put(lsm);
CDEBUG(D_INODE, DFID" lo_layout_invalid=%u\n",
}
EXPORT_SYMBOL(cl_object_flush);
+int cl_object_inode_ops(const struct lu_env *env, struct cl_object *top,
+ enum coo_inode_opc opc, void *data)
+{
+ struct cl_object *obj;
+ int rc = 0;
+
+ ENTRY;
+
+ cl_object_for_each(obj, top) {
+ if (obj->co_ops->coo_inode_ops) {
+ rc = obj->co_ops->coo_inode_ops(env, obj, opc, data);
+ if (rc)
+ break;
+ }
+ }
+ RETURN(rc);
+}
+EXPORT_SYMBOL(cl_object_inode_ops);
+
/**
* Helper function removing all object locks, and marking object for
* deletion. All object pages must have been deleted at this point.