From 1a084b1378ac37fb4269ac2e6dcac5c172fa00d2 Mon Sep 17 00:00:00 2001 From: Bobi Jam Date: Thu, 4 May 2023 10:39:29 +0800 Subject: [PATCH] LU-16637 llite: call truncate_inode_pages() in inode lock In some cases vvp_prune()->truncate_inode_pages() is get called without IO context, we need protect it with inode lock as well. So we add ll_inode_info::lli_inode_lock_owner and set it according to vfs lock rules (Documentation/filesystems/Locking or Documentation/filesystems/locking.rst), so before calling truncate_inode_pages(), we'd lock the inode if it's not locked in vfs. Lustre-commit: 51d62f2122fee14fbb3ff8333b5a830e1181e4e5 Lustre-change: https://review.whamcloud.com/50857 Fixes: ef9be34478 ("LU-16637 llite: call truncate_inode_pages() under inode lock") Signed-off-by: Bobi Jam Change-Id: I84d7d999a49325810062a9a7337e184d35467820 Reviewed-by: Neil Brown Reviewed-by: Patrick Farrell Reviewed-by: Oleg Drokin Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/51643 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lustre/include/cl_object.h | 14 ++++++ lustre/llite/dir.c | 4 +- lustre/llite/file.c | 35 +++++++------- lustre/llite/llite_internal.h | 63 +++++++++++++++----------- lustre/llite/llite_lib.c | 78 ++++++++++++++++++-------------- lustre/llite/llite_mmap.c | 2 +- lustre/llite/llite_nfs.c | 4 +- lustre/llite/namei.c | 103 ++++++++++++++++++++++++++++++++++-------- lustre/llite/pcc.c | 2 +- lustre/llite/statahead.c | 2 +- lustre/llite/vvp_io.c | 16 +++---- lustre/llite/vvp_object.c | 58 ++++++++++++++++++++---- lustre/llite/xattr.c | 44 +++++++++++------- lustre/llite/xattr_cache.c | 12 ++--- lustre/lov/lov_object.c | 55 ++++++++++++++++++++-- lustre/obdclass/cl_object.c | 19 ++++++++ 16 files changed, 364 insertions(+), 147 deletions(-) diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index a589192..9633d23 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -303,6 +303,13 @@ struct cl_layout { bool cl_is_rdonly; }; +enum coo_inode_opc { + COIO_INODE_LOCK, + COIO_INODE_UNLOCK, + COIO_SIZE_LOCK, + COIO_SIZE_UNLOCK, +}; + /** * Operations implemented for each cl object layer. * @@ -430,6 +437,11 @@ struct cl_object_operations { int (*coo_object_flush)(const struct lu_env *env, struct cl_object *obj, struct ldlm_lock *lock); + /** + * operate upon inode. Used in LOV to lock/unlock inode from vvp layer. + */ + int (*coo_inode_ops)(const struct lu_env *env, struct cl_object *obj, + enum coo_inode_opc opc, void *data); }; /** @@ -2242,6 +2254,8 @@ int cl_object_layout_get(const struct lu_env *env, struct cl_object *obj, loff_t cl_object_maxbytes(struct cl_object *obj); int cl_object_flush(const struct lu_env *env, struct cl_object *obj, struct ldlm_lock *lock); +int cl_object_inode_ops(const struct lu_env *env, struct cl_object *obj, + enum coo_inode_opc opc, void *data); /** diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index dba2477..a29e24c 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -2513,7 +2513,7 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin) loff_t ret = -EINVAL; ENTRY; - inode_lock(inode); + ll_inode_lock(inode); switch (origin) { case SEEK_SET: break; @@ -2551,7 +2551,7 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin) GOTO(out, ret); out: - inode_unlock(inode); + ll_inode_unlock(inode); return ret; } diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 114b407..c182fcf 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -127,12 +127,13 @@ static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data, op_data->op_xvalid |= OP_XVALID_CTIME_SET; op_data->op_attr_blocks = inode->i_blocks; op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags); - if (ll_file_test_flag(ll_i2info(inode), LLIF_PROJECT_INHERIT)) + if (test_bit(LLIF_PROJECT_INHERIT, &ll_i2info(inode)->lli_flags)) op_data->op_attr_flags |= LUSTRE_PROJINHERIT_FL; op_data->op_open_handle = och->och_open_handle; if (och->och_flags & FMODE_WRITE && - ll_file_test_and_clear_flag(ll_i2info(inode), LLIF_DATA_MODIFIED)) + test_and_clear_bit(LLIF_DATA_MODIFIED, + &ll_i2info(inode)->lli_flags)) /* For HSM: if inode data has been modified, pack it so that * MDT can set data dirty flag in the archive. */ op_data->op_bias |= MDS_DATA_MODIFIED; @@ -1470,7 +1471,7 @@ static int ll_merge_attr_nolock(const struct lu_env *env, struct inode *inode) * POSIX. Solving this problem needs to send an RPC to MDT for each * read, this will hurt performance. */ - if (ll_file_test_and_clear_flag(lli, LLIF_UPDATE_ATIME) || + if (test_and_clear_bit(LLIF_UPDATE_ATIME, &lli->lli_flags) || inode->i_atime.tv_sec < lli->lli_atime) inode->i_atime.tv_sec = lli->lli_atime; @@ -2175,11 +2176,11 @@ static ssize_t ll_do_tiny_write(struct kiocb *iocb, struct iov_iter *iter) RETURN(0); if (unlikely(lock_inode)) - inode_lock(inode); + ll_inode_lock(inode); result = __generic_file_write_iter(iocb, iter); if (unlikely(lock_inode)) - inode_unlock(inode); + ll_inode_unlock(inode); /* If the page is not already dirty, ll_tiny_write_begin returns * -ENODATA. We continue on to normal write. @@ -2191,7 +2192,7 @@ static ssize_t ll_do_tiny_write(struct kiocb *iocb, struct iov_iter *iter) ll_heat_add(inode, CIT_WRITE, result); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_WRITE_BYTES, result); - ll_file_set_flag(ll_i2info(inode), LLIF_DATA_MODIFIED); + set_bit(LLIF_DATA_MODIFIED, &ll_i2info(inode)->lli_flags); } CDEBUG(D_VFSTRACE, "result: %zu, original count %zu\n", result, count); @@ -3044,9 +3045,9 @@ lookup: if (enckey == 0 || nameenc == 0) continue; - inode_lock(parent); + ll_inode_lock(parent); de = lookup_one_len(p, de_parent, len); - inode_unlock(parent); + ll_inode_unlock(parent); if (IS_ERR_OR_NULL(de) || !de->d_inode) { dput(de_parent); rc = -ENODATA; @@ -3462,11 +3463,10 @@ static int ll_hsm_import(struct inode *inode, struct file *file, ATTR_ATIME | ATTR_ATIME_SET; inode_lock(inode); - + /* inode lock owner set in ll_setattr_raw()*/ rc = ll_setattr_raw(file_dentry(file), attr, 0, true); if (rc == -ENODATA) rc = 0; - inode_unlock(inode); out: @@ -3515,6 +3515,7 @@ static int ll_file_futimes_3(struct file *file, const struct ll_futimes_3 *lfu) RETURN(-EINVAL); inode_lock(inode); + /* inode lock owner set in ll_setattr_raw()*/ rc = ll_setattr_raw(file_dentry(file), &ia, OP_XVALID_CTIME_SET, false); inode_unlock(inode); @@ -3772,7 +3773,7 @@ int ll_ioctl_fsgetxattr(struct inode *inode, unsigned int cmd, RETURN(-EFAULT); fsxattr.fsx_xflags = ll_inode_flags_to_xflags(inode->i_flags); - if (ll_file_test_flag(ll_i2info(inode), LLIF_PROJECT_INHERIT)) + if (test_bit(LLIF_PROJECT_INHERIT, &ll_i2info(inode)->lli_flags)) fsxattr.fsx_xflags |= FS_XFLAG_PROJINHERIT; fsxattr.fsx_projid = ll_i2info(inode)->lli_projid; if (copy_to_user((struct fsxattr __user *)arg, @@ -3805,7 +3806,7 @@ int ll_ioctl_check_project(struct inode *inode, __u32 xflags, if (ll_i2info(inode)->lli_projid != projid) return -EINVAL; - if (ll_file_test_flag(ll_i2info(inode), LLIF_PROJECT_INHERIT)) { + if (test_bit(LLIF_PROJECT_INHERIT, &ll_i2info(inode)->lli_flags)) { if (!(xflags & FS_XFLAG_PROJINHERIT)) return -EINVAL; } else { @@ -3904,10 +3905,10 @@ int ll_ioctl_project(struct file *file, unsigned int cmd, /* apply child dentry if name is valid */ name_len = strnlen(lu_project.project_name, NAME_MAX); if (name_len > 0 && name_len <= NAME_MAX) { - inode_lock(inode); + ll_inode_lock(inode); child_dentry = lookup_one_len(lu_project.project_name, dentry, name_len); - inode_unlock(inode); + ll_inode_unlock(inode); if (IS_ERR(child_dentry)) { rc = PTR_ERR(child_dentry); goto out; @@ -5239,7 +5240,7 @@ int ll_migrate(struct inode *parent, struct file *file, struct lmv_user_md *lum, if (IS_ERR(op_data)) GOTO(out_iput, rc = PTR_ERR(op_data)); - inode_lock(child_inode); + ll_inode_lock(child_inode); op_data->op_fid3 = *ll_inode2fid(child_inode); if (!fid_is_sane(&op_data->op_fid3)) { CERROR("%s: migrate %s, but FID "DFID" is insane\n", @@ -5320,7 +5321,7 @@ out_close: if (!rc) clear_nlink(child_inode); out_unlock: - inode_unlock(child_inode); + ll_inode_unlock(child_inode); ll_finish_md_op_data(op_data); out_iput: iput(child_inode); @@ -5602,7 +5603,7 @@ int ll_getattr_dentry(struct dentry *de, struct kstat *stat, u32 request_mask, * restore the MDT holds the layout lock so the glimpse will * block up to the end of restore (getattr will block) */ - if (!ll_file_test_flag(lli, LLIF_FILE_RESTORING)) { + if (!test_bit(LLIF_FILE_RESTORING, &lli->lli_flags)) { rc = ll_glimpse_size(inode); if (rc < 0) RETURN(rc); diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 6849434..f16206b 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -195,7 +195,8 @@ struct ll_inode_info { /* for non-directory */ struct { struct mutex lli_size_mutex; - char *lli_symlink_name; + struct task_struct *lli_size_lock_owner; + char *lli_symlink_name; struct ll_trunc_sem lli_trunc_sem; struct range_lock_tree lli_write_tree; struct mutex lli_setattr_mutex; @@ -289,6 +290,8 @@ struct ll_inode_info { struct list_head lli_xattrs; /* ll_xattr_entry->xe_list */ struct list_head lli_lccs; /* list of ll_cl_context */ seqlock_t lli_page_inv_lock; + + struct task_struct *lli_inode_lock_owner; }; #ifndef HAVE_USER_NAMESPACE_ARG @@ -432,7 +435,7 @@ static inline void ll_layout_version_set(struct ll_inode_info *lli, __u32 gen) spin_unlock(&lli->lli_layout_lock); } -enum ll_file_flags { +enum ll_inode_flags { /* File data is modified. */ LLIF_DATA_MODIFIED = 0, /* File is being restored */ @@ -443,35 +446,12 @@ enum ll_file_flags { LLIF_PROJECT_INHERIT = 3, /* update atime from MDS even if it's older than local inode atime. */ LLIF_UPDATE_ATIME = 4, + /* 6 is not used for now */ /* Xattr cache is filled */ LLIF_XATTR_CACHE_FILLED = 7, }; -static inline void ll_file_set_flag(struct ll_inode_info *lli, - enum ll_file_flags flag) -{ - set_bit(flag, &lli->lli_flags); -} - -static inline void ll_file_clear_flag(struct ll_inode_info *lli, - enum ll_file_flags flag) -{ - clear_bit(flag, &lli->lli_flags); -} - -static inline bool ll_file_test_flag(struct ll_inode_info *lli, - enum ll_file_flags flag) -{ - return test_bit(flag, &lli->lli_flags); -} - -static inline bool ll_file_test_and_clear_flag(struct ll_inode_info *lli, - enum ll_file_flags flag) -{ - return test_and_clear_bit(flag, &lli->lli_flags); -} - int ll_xattr_cache_destroy(struct inode *inode); int ll_xattr_cache_empty(struct inode *inode); @@ -608,6 +588,35 @@ static inline struct pcc_inode *ll_i2pcci(struct inode *inode) return ll_i2info(inode)->lli_pcc_inode; } +static inline void ll_set_inode_lock_owner(struct inode *inode) +{ + ll_i2info(inode)->lli_inode_lock_owner = current; +} + +static inline void ll_clear_inode_lock_owner(struct inode *inode) +{ + ll_i2info(inode)->lli_inode_lock_owner = NULL; +} + +static inline struct task_struct *ll_get_inode_lock_owner(struct inode *inode) +{ + return ll_i2info(inode)->lli_inode_lock_owner; +} + +/* lock inode and set inode lock owener */ +static inline void ll_inode_lock(struct inode *inode) +{ + inode_lock(inode); + ll_set_inode_lock_owner(inode); +} + +/* clear inode lock owner and unlock it */ +static inline void ll_inode_unlock(struct inode *inode) +{ + ll_clear_inode_lock_owner(inode); + inode_unlock(inode); +} + /* default to use at least 16M for fast read if possible */ #define RA_REMAIN_WINDOW_MIN MiB_TO_PAGES(16UL) @@ -1377,7 +1386,7 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md); void ll_update_inode_flags(struct inode *inode, int ext_flags); void ll_update_dir_depth(struct inode *dir, struct inode *inode); int ll_read_inode2(struct inode *inode, void *opaque); -void ll_truncate_inode_pages_final(struct inode *inode, struct cl_io *io); +void ll_truncate_inode_pages_final(struct inode *inode); void ll_delete_inode(struct inode *inode); int ll_iocontrol(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index f3140c5..2c244d1 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -1138,6 +1138,7 @@ void ll_lli_init(struct ll_inode_info *lli) /* ll_cl_context initialize */ INIT_LIST_HEAD(&lli->lli_lccs); seqlock_init(&lli->lli_page_inv_lock); + lli->lli_inode_lock_owner = NULL; } #define MAX_STRING_SIZE 128 @@ -1854,10 +1855,10 @@ static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data) * cache is not cleared yet. */ op_data->op_attr.ia_valid &= ~(TIMES_SET_FLAGS | ATTR_SIZE); if (S_ISREG(inode->i_mode)) - inode_lock(inode); + ll_inode_lock(inode); rc = simple_setattr(&init_user_ns, dentry, &op_data->op_attr); if (S_ISREG(inode->i_mode)) - inode_unlock(inode); + ll_inode_unlock(inode); op_data->op_attr.ia_valid = ia_valid; rc = ll_update_inode(inode, &md); @@ -2087,6 +2088,9 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, ENTRY; + /* VFS has locked the inode before calling this */ + ll_set_inode_lock_owner(inode); + CDEBUG(D_VFSTRACE, "%s: setattr inode "DFID"(%p) from %llu to %llu, " "valid %x, hsm_import %d\n", ll_i2sbi(inode)->ll_fsname, PFID(&lli->lli_fid), @@ -2094,29 +2098,29 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, hsm_import); if (attr->ia_valid & ATTR_SIZE) { - /* Check new size against VFS/VM file size limit and rlimit */ - rc = inode_newsize_ok(inode, attr->ia_size); - if (rc) - RETURN(rc); - - /* The maximum Lustre file size is variable, based on the - * OST maximum object size and number of stripes. This - * needs another check in addition to the VFS check above. */ - if (attr->ia_size > ll_file_maxbytes(inode)) { + /* Check new size against VFS/VM file size limit and rlimit */ + rc = inode_newsize_ok(inode, attr->ia_size); + if (rc) + GOTO(clear, rc); + + /* The maximum Lustre file size is variable, based on the + * OST maximum object size and number of stripes. This + * needs another check in addition to the VFS check above. */ + if (attr->ia_size > ll_file_maxbytes(inode)) { CDEBUG(D_INODE,"file "DFID" too large %llu > %llu\n", - PFID(&lli->lli_fid), attr->ia_size, - ll_file_maxbytes(inode)); - RETURN(-EFBIG); - } + PFID(&lli->lli_fid), attr->ia_size, + ll_file_maxbytes(inode)); + GOTO(clear, rc = -EFBIG); + } - attr->ia_valid |= ATTR_MTIME | ATTR_CTIME; - } + attr->ia_valid |= ATTR_MTIME | ATTR_CTIME; + } /* POSIX: check before ATTR_*TIME_SET set (from inode_change_ok) */ if (attr->ia_valid & TIMES_SET_FLAGS) { if ((!uid_eq(current_fsuid(), inode->i_uid)) && !cfs_capable(CFS_CAP_FOWNER)) - RETURN(-EPERM); + GOTO(clear, rc = -EPERM); } /* We mark all of the fields "set" so MDS/OST does not re-set them */ @@ -2133,8 +2137,8 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, if (!(attr->ia_valid & ATTR_MTIME_SET) && (attr->ia_valid & ATTR_MTIME)) { attr->ia_mtime = current_time(inode); - attr->ia_valid |= ATTR_MTIME_SET; - } + attr->ia_valid |= ATTR_MTIME_SET; + } if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME)) CDEBUG(D_INODE, "setting mtime %lld, ctime %lld, now = %lld\n", @@ -2142,7 +2146,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, ktime_get_real_seconds()); if (S_ISREG(inode->i_mode)) - inode_unlock(inode); + ll_inode_unlock(inode); /* We always do an MDS RPC, even if we're only changing the size; * only the MDS knows whether truncate() should fail with -ETXTBUSY */ @@ -2157,7 +2161,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, */ xvalid |= OP_XVALID_OWNEROVERRIDE; op_data->op_bias |= MDS_DATA_MODIFIED; - ll_file_clear_flag(lli, LLIF_DATA_MODIFIED); + clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags); } if (attr->ia_valid & ATTR_FILE) { @@ -2291,7 +2295,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, * LLIF_DATA_MODIFIED is not set(see vvp_io_setattr_fini()). * This way we can save an RPC for common open + trunc * operation. */ - if (ll_file_test_and_clear_flag(lli, LLIF_DATA_MODIFIED)) { + if (test_and_clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags)) { struct hsm_state_set hss = { .hss_valid = HSS_SETMASK, .hss_setmask = HS_DIRTY, @@ -2315,7 +2319,7 @@ out: ll_finish_md_op_data(op_data); if (S_ISREG(inode->i_mode)) { - inode_lock(inode); + ll_inode_lock(inode); if ((attr->ia_valid & ATTR_SIZE) && !hsm_import) inode_dio_wait(inode); /* Once we've got the i_mutex, it's safe to set the S_NOSEC @@ -2330,6 +2334,8 @@ out: ll_stats_ops_tally(ll_i2sbi(inode), attr->ia_valid & ATTR_SIZE ? LPROC_LL_TRUNC : LPROC_LL_SETATTR, ktime_us_delta(ktime_get(), kstart)); +clear: + ll_clear_inode_lock_owner(inode); RETURN(rc); } @@ -2524,6 +2530,7 @@ void ll_inode_size_lock(struct inode *inode) lli = ll_i2info(inode); mutex_lock(&lli->lli_size_mutex); + lli->lli_size_lock_owner = current; } void ll_inode_size_unlock(struct inode *inode) @@ -2531,6 +2538,7 @@ void ll_inode_size_unlock(struct inode *inode) struct ll_inode_info *lli; lli = ll_i2info(inode); + lli->lli_size_lock_owner = NULL; mutex_unlock(&lli->lli_size_mutex); } @@ -2550,9 +2558,9 @@ void ll_update_inode_flags(struct inode *inode, int ext_flags) ext_flags |= ll_inode_to_ext_flags(inode->i_flags) & LUSTRE_ENCRYPT_FL; inode->i_flags = ll_ext_to_inode_flags(ext_flags); if (ext_flags & LUSTRE_PROJINHERIT_FL) - ll_file_set_flag(ll_i2info(inode), LLIF_PROJECT_INHERIT); + set_bit(LLIF_PROJECT_INHERIT, &ll_i2info(inode)->lli_flags); else - ll_file_clear_flag(ll_i2info(inode), LLIF_PROJECT_INHERIT); + clear_bit(LLIF_PROJECT_INHERIT, &ll_i2info(inode)->lli_flags); } int ll_update_inode(struct inode *inode, struct lustre_md *md) @@ -2679,9 +2687,9 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md) * glimpsing updated attrs */ if (body->mbo_t_state & MS_RESTORE) - ll_file_set_flag(lli, LLIF_FILE_RESTORING); + set_bit(LLIF_FILE_RESTORING, &lli->lli_flags); else - ll_file_clear_flag(lli, LLIF_FILE_RESTORING); + clear_bit(LLIF_FILE_RESTORING, &lli->lli_flags); } return 0; @@ -2745,14 +2753,16 @@ void ll_update_dir_depth(struct inode *dir, struct inode *inode) PFID(&lli->lli_fid), lli->lli_dir_depth, lli->lli_inherit_depth); } -void ll_truncate_inode_pages_final(struct inode *inode, struct cl_io *io) +void ll_truncate_inode_pages_final(struct inode *inode) { struct address_space *mapping = &inode->i_data; unsigned long nrpages; unsigned long flags; - LASSERTF(io == NULL || inode_is_locked(inode), "io %p (type %d)\n", - io, io ? io->ci_type : 0); + LASSERTF((inode->i_state & I_FREEING) || inode_is_locked(inode), + DFID ":inode %p state %#lx, lli_flags %#lx\n", + PFID(ll_inode2fid(inode)), inode, inode->i_state, + ll_i2info(inode)->lli_flags); truncate_inode_pages_final(mapping); @@ -2771,11 +2781,11 @@ void ll_truncate_inode_pages_final(struct inode *inode, struct cl_io *io) } /* Workaround end */ LASSERTF(nrpages == 0, "%s: inode="DFID"(%p) nrpages=%lu " - "io %p (io_type %d), " + "state %#lx, lli_flags %#lx, " "see https://jira.whamcloud.com/browse/LU-118\n", ll_i2sbi(inode)->ll_fsname, PFID(ll_inode2fid(inode)), inode, nrpages, - io, io ? io->ci_type : 0); + inode->i_state, ll_i2info(inode)->lli_flags); } int ll_read_inode2(struct inode *inode, void *opaque) @@ -2849,7 +2859,7 @@ void ll_delete_inode(struct inode *inode) CL_FSYNC_LOCAL : CL_FSYNC_DISCARD, 1); } - ll_truncate_inode_pages_final(inode, NULL); + ll_truncate_inode_pages_final(inode); ll_clear_inode(inode); clear_inode(inode); diff --git a/lustre/llite/llite_mmap.c b/lustre/llite/llite_mmap.c index aba6d7b..449d8d5 100644 --- a/lustre/llite/llite_mmap.c +++ b/lustre/llite/llite_mmap.c @@ -224,7 +224,7 @@ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage, } if (result == 0) - ll_file_set_flag(lli, LLIF_DATA_MODIFIED); + set_bit(LLIF_DATA_MODIFIED, &lli->lli_flags); } EXIT; diff --git a/lustre/llite/llite_nfs.c b/lustre/llite/llite_nfs.c index 2d955f5..caa5a35 100644 --- a/lustre/llite/llite_nfs.c +++ b/lustre/llite/llite_nfs.c @@ -291,13 +291,13 @@ static int ll_get_name(struct dentry *dentry, char *name, struct dentry *child) if (IS_ERR(op_data)) GOTO(out, rc = PTR_ERR(op_data)); - inode_lock(dir); + ll_inode_lock(dir); #ifdef HAVE_DIR_CONTEXT rc = ll_dir_read(dir, &pos, op_data, &lgd.ctx); #else rc = ll_dir_read(dir, &pos, op_data, &lgd, ll_nfs_get_name_filldir); #endif - inode_unlock(dir); + ll_inode_unlock(dir); ll_finish_md_op_data(op_data); if (!rc && !lgd.lgd_found) rc = -ENOENT; diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 8d2d34d..41a1112 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -328,7 +328,7 @@ static void ll_lock_cancel_bits(struct ldlm_lock *lock, __u64 to_cancel) lli = ll_i2info(inode); if (bits & MDS_INODELOCK_UPDATE) - ll_file_set_flag(lli, LLIF_UPDATE_ATIME); + set_bit(LLIF_UPDATE_ATIME, &lli->lli_flags); if ((bits & MDS_INODELOCK_UPDATE) && S_ISDIR(inode->i_mode)) { CDEBUG(D_INODE, "invalidating inode "DFID" lli = %p, " @@ -1207,7 +1207,10 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry, unsigned int flags) { struct lookup_intent *itp, it = { .it_op = IT_GETATTR }; - struct dentry *de; + struct dentry *de = NULL; + + /* VFS has locked the inode before calling this */ + ll_set_inode_lock_owner(parent); CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), flags=%u\n", dentry, PFID(ll_inode2fid(parent)), parent, flags); @@ -1220,7 +1223,7 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry, if ((flags & LOOKUP_CREATE) && !(flags & LOOKUP_OPEN) && (inode_permission(&init_user_ns, parent, MAY_WRITE | MAY_EXEC) == 0)) - return NULL; + goto clear; if (flags & (LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE)) itp = NULL; @@ -1232,6 +1235,9 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry, if (itp != NULL) ll_intent_release(itp); +clear: + ll_clear_inode_lock_owner(parent); + return de; } @@ -1277,6 +1283,9 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry, int rc = 0; ENTRY; + /* VFS has locked the inode before calling this */ + ll_set_inode_lock_owner(dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), file %p, open_flags %x, mode %x opened %d\n", dentry, PFID(ll_inode2fid(dir)), dir, file, open_flags, mode, @@ -1292,7 +1301,7 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry, * Either way it's a valid race to just return -ENOENT here. */ if (!(open_flags & O_CREAT)) - return -ENOENT; + GOTO(clear, rc = -ENOENT); /* Otherwise we just unhash it to be rehashed afresh via * lookup if necessary @@ -1302,7 +1311,7 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry, OBD_ALLOC(it, sizeof(*it)); if (!it) - RETURN(-ENOMEM); + GOTO(clear, rc = -ENOMEM); it->it_op = IT_OPEN; if (open_flags & O_CREAT) { @@ -1464,6 +1473,8 @@ out_release: ll_intent_release(it); out_free: OBD_FREE(it, sizeof(*it)); +clear: + ll_clear_inode_lock_owner(dir); RETURN(rc); } @@ -1880,6 +1891,9 @@ static int ll_mknod(struct user_namespace *mnt_userns, struct inode *dir, int err; ENTRY; + /* VFS has locked the inode before calling this */ + ll_set_inode_lock_owner(dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p) mode %o dev %x\n", dchild, PFID(ll_inode2fid(dir)), dir, mode, rdev); @@ -1908,6 +1922,7 @@ static int ll_mknod(struct user_namespace *mnt_userns, struct inode *dir, if (!err) ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_MKNOD, ktime_us_delta(ktime_get(), kstart)); + ll_clear_inode_lock_owner(dir); RETURN(err); } @@ -1922,6 +1937,9 @@ static int ll_create_nd(struct user_namespace *mnt_userns, ktime_t kstart = ktime_get(); int rc; + /* VFS has locked the inode before calling this */ + ll_set_inode_lock_owner(dir); + CFS_FAIL_TIMEOUT(OBD_FAIL_LLITE_CREATE_FILE_PAUSE, cfs_fail_val); CDEBUG(D_VFSTRACE, @@ -1939,6 +1957,8 @@ static int ll_create_nd(struct user_namespace *mnt_userns, ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_CREATE, ktime_us_delta(ktime_get(), kstart)); + ll_clear_inode_lock_owner(dir); + return rc; } @@ -1951,13 +1971,16 @@ static int ll_symlink(struct user_namespace *mnt_userns, struct inode *dir, int err; ENTRY; + /* VFS has locked the inode before calling this */ + ll_set_inode_lock_owner(dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), target=%.*s\n", dchild, PFID(ll_inode2fid(dir)), dir, 3000, oldpath); err = llcrypt_prepare_symlink(dir, oldpath, len, dir->i_sb->s_blocksize, &disk_link); if (err) - RETURN(err); + GOTO(out, err); err = ll_new_node(dir, dchild, oldpath, S_IFLNK | S_IRWXUGO, (__u64)&disk_link, LUSTRE_OPC_SYMLINK); @@ -1969,6 +1992,9 @@ static int ll_symlink(struct user_namespace *mnt_userns, struct inode *dir, ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_SYMLINK, ktime_us_delta(ktime_get(), kstart)); +out: + ll_clear_inode_lock_owner(dir); + RETURN(err); } @@ -1984,6 +2010,10 @@ static int ll_link(struct dentry *old_dentry, struct inode *dir, int err; ENTRY; + /* VFS has locked the inodes before calling this */ + ll_set_inode_lock_owner(src); + ll_set_inode_lock_owner(dir); + CDEBUG(D_VFSTRACE, "VFS Op: inode="DFID"(%p), dir="DFID"(%p), target=%pd\n", PFID(ll_inode2fid(src)), src, @@ -1991,12 +2021,12 @@ static int ll_link(struct dentry *old_dentry, struct inode *dir, err = llcrypt_prepare_link(old_dentry, dir, new_dentry); if (err) - RETURN(err); + GOTO(clear, err); op_data = ll_prep_md_op_data(NULL, src, dir, name->name, name->len, 0, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) - RETURN(PTR_ERR(op_data)); + GOTO(clear, err = PTR_ERR(op_data)); err = md_link(sbi->ll_md_exp, op_data, &request); ll_finish_md_op_data(op_data); @@ -2009,6 +2039,10 @@ static int ll_link(struct dentry *old_dentry, struct inode *dir, EXIT; out: ptlrpc_req_finished(request); +clear: + ll_clear_inode_lock_owner(src); + ll_clear_inode_lock_owner(dir); + RETURN(err); } @@ -2019,6 +2053,9 @@ static int ll_mkdir(struct user_namespace *mnt_userns, struct inode *dir, int err; ENTRY; + /* VFS has locked the inode before calling this */ + ll_set_inode_lock_owner(dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p)\n", dchild, PFID(ll_inode2fid(dir)), dir); @@ -2032,6 +2069,8 @@ static int ll_mkdir(struct user_namespace *mnt_userns, struct inode *dir, ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_MKDIR, ktime_us_delta(ktime_get(), kstart)); + ll_clear_inode_lock_owner(dir); + RETURN(err); } @@ -2045,16 +2084,20 @@ static int ll_rmdir(struct inode *dir, struct dentry *dchild) ENTRY; + /* VFS has locked the inodes before calling this */ + ll_set_inode_lock_owner(dir); + ll_set_inode_lock_owner(dchild->d_inode); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p)\n", dchild, PFID(ll_inode2fid(dir)), dir); if (unlikely(d_mountpoint(dchild))) - RETURN(-EBUSY); + GOTO(out, rc = -EBUSY); op_data = ll_prep_md_op_data(NULL, dir, NULL, name->name, name->len, S_IFDIR, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) - RETURN(PTR_ERR(op_data)); + GOTO(out, rc = PTR_ERR(op_data)); if (dchild->d_inode != NULL) op_data->op_fid3 = *ll_inode2fid(dchild->d_inode); @@ -2084,6 +2127,9 @@ static int ll_rmdir(struct inode *dir, struct dentry *dchild) } ptlrpc_req_finished(request); +out: + ll_clear_inode_lock_owner(dir); + ll_clear_inode_lock_owner(dchild->d_inode); RETURN(rc); } @@ -2130,6 +2176,10 @@ static int ll_unlink(struct inode *dir, struct dentry *dchild) ENTRY; + /* VFS has locked the inodes before calling this */ + ll_set_inode_lock_owner(dir); + ll_set_inode_lock_owner(dchild->d_inode); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p)\n", dchild, PFID(ll_inode2fid(dir)), dir); @@ -2138,12 +2188,12 @@ static int ll_unlink(struct inode *dir, struct dentry *dchild) * just check it as vfs_unlink does. */ if (unlikely(d_mountpoint(dchild))) - RETURN(-EBUSY); + GOTO(clear, rc = -EBUSY); op_data = ll_prep_md_op_data(NULL, dir, NULL, name->name, name->len, 0, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) - RETURN(PTR_ERR(op_data)); + GOTO(clear, rc = PTR_ERR(op_data)); op_data->op_fid3 = *ll_inode2fid(dchild->d_inode); /* notify lower layer if inode has dirty pages */ @@ -2176,6 +2226,9 @@ out: if (!rc) ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_UNLINK, ktime_us_delta(ktime_get(), kstart)); +clear: + ll_clear_inode_lock_owner(dir); + ll_clear_inode_lock_owner(dchild->d_inode); RETURN(rc); } @@ -2196,9 +2249,15 @@ static int ll_rename(struct user_namespace *mnt_userns, int err; ENTRY; + /* VFS has locked the inodes before calling this */ + ll_set_inode_lock_owner(src); + ll_set_inode_lock_owner(tgt); + if (tgt_dchild->d_inode) + ll_set_inode_lock_owner(tgt_dchild->d_inode); + #if defined(HAVE_USER_NAMESPACE_ARG) || defined(HAVE_IOPS_RENAME_WITH_FLAGS) if (flags) - return -EINVAL; + GOTO(out, err = -EINVAL); #endif CDEBUG(D_VFSTRACE, @@ -2207,7 +2266,7 @@ static int ll_rename(struct user_namespace *mnt_userns, tgt_dchild, PFID(ll_inode2fid(tgt)), tgt); if (unlikely(d_mountpoint(src_dchild) || d_mountpoint(tgt_dchild))) - RETURN(-EBUSY); + GOTO(out, err = -EBUSY); #if defined(HAVE_USER_NAMESPACE_ARG) || defined(HAVE_IOPS_RENAME_WITH_FLAGS) err = llcrypt_prepare_rename(src, src_dchild, tgt, tgt_dchild, flags); @@ -2215,12 +2274,12 @@ static int ll_rename(struct user_namespace *mnt_userns, err = llcrypt_prepare_rename(src, src_dchild, tgt, tgt_dchild, 0); #endif if (err) - RETURN(err); + GOTO(out, err); /* we prevent an encrypted file from being renamed * into an unencrypted dir */ if (IS_ENCRYPTED(src) && !IS_ENCRYPTED(tgt)) - RETURN(-EXDEV); + GOTO(out, err = -EXDEV); if (src_dchild->d_inode) mode = src_dchild->d_inode->i_mode; @@ -2231,7 +2290,7 @@ static int ll_rename(struct user_namespace *mnt_userns, op_data = ll_prep_md_op_data(NULL, src, tgt, NULL, 0, mode, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) - RETURN(PTR_ERR(op_data)); + GOTO(out, err = PTR_ERR(op_data)); /* If the client is using a subdir mount and does a rename to what it * sees as /.fscrypt, interpret it as the .fscrypt dir at fs root. @@ -2251,11 +2310,11 @@ static int ll_rename(struct user_namespace *mnt_userns, err = ll_setup_filename(src, &src_dchild->d_name, 1, &foldname, NULL); if (err) - RETURN(err); + GOTO(out, err); err = ll_setup_filename(tgt, &tgt_dchild->d_name, 1, &fnewname, NULL); if (err) { llcrypt_free_filename(&foldname); - RETURN(err); + GOTO(out, err); } err = md_rename(sbi->ll_md_exp, op_data, foldname.disk_name.name, foldname.disk_name.len, @@ -2276,7 +2335,11 @@ static int ll_rename(struct user_namespace *mnt_userns, ll_stats_ops_tally(sbi, LPROC_LL_RENAME, ktime_us_delta(ktime_get(), kstart)); } - +out: + ll_clear_inode_lock_owner(src); + ll_clear_inode_lock_owner(tgt); + if (tgt_dchild->d_inode) + ll_clear_inode_lock_owner(tgt_dchild->d_inode); RETURN(err); } diff --git a/lustre/llite/pcc.c b/lustre/llite/pcc.c index 20bdac5..1af4937 100644 --- a/lustre/llite/pcc.c +++ b/lustre/llite/pcc.c @@ -2820,7 +2820,7 @@ int pcc_inode_getattr(struct inode *inode, u32 request_mask, GOTO(out, rc); ll_inode_size_lock(inode); - if (ll_file_test_and_clear_flag(lli, LLIF_UPDATE_ATIME) || + if (test_and_clear_bit(LLIF_UPDATE_ATIME, &lli->lli_flags) || inode->i_atime.tv_sec < lli->lli_atime) inode->i_atime.tv_sec = lli->lli_atime; diff --git a/lustre/llite/statahead.c b/lustre/llite/statahead.c index 3794dba..ac952a2 100644 --- a/lustre/llite/statahead.c +++ b/lustre/llite/statahead.c @@ -584,7 +584,7 @@ static void ll_agl_trigger(struct inode *inode, struct ll_statahead_info *sai) * the MDT holds the layout lock so the glimpse will block up to the * end of restore (statahead/agl will block) */ - if (ll_file_test_flag(lli, LLIF_FILE_RESTORING)) { + if (test_bit(LLIF_FILE_RESTORING, &lli->lli_flags)) { lli->lli_agl_index = 0; iput(inode); RETURN_EXIT; diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index 2616e6f..124c356 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -360,8 +360,8 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) /* today successful restore is the only possible * case */ /* restore was done, clear restoring state */ - ll_file_clear_flag(ll_i2info(vvp_object_inode(obj)), - LLIF_FILE_RESTORING); + clear_bit(LLIF_FILE_RESTORING, + &ll_i2info(vvp_object_inode(obj))->lli_flags); } GOTO(out, 0); } @@ -812,7 +812,7 @@ static void vvp_io_setattr_fini(const struct lu_env *env, if (restore_needed && !ios->cis_io->ci_restore_needed) { /* restore finished, set data modified flag for HSM */ - ll_file_set_flag(ll_i2info(inode), LLIF_DATA_MODIFIED); + set_bit(LLIF_DATA_MODIFIED, &ll_i2info(inode)->lli_flags); } } @@ -1365,10 +1365,10 @@ static int vvp_io_write_start(const struct lu_env *env, iter = *vio->vui_iter; if (unlikely(lock_inode)) - inode_lock(inode); + ll_inode_lock(inode); result = __generic_file_write_iter(vio->vui_iocb, &iter); if (unlikely(lock_inode)) - inode_unlock(inode); + ll_inode_unlock(inode); written = result; if (result > 0) @@ -1417,7 +1417,7 @@ static int vvp_io_write_start(const struct lu_env *env, vio->vui_iocb->ki_pos = pos + io->ci_nob - nob; } if (result > 0 || result == -EIOCBQUEUED) { - ll_file_set_flag(ll_i2info(inode), LLIF_DATA_MODIFIED); + set_bit(LLIF_DATA_MODIFIED, &ll_i2info(inode)->lli_flags); if (result != -EIOCBQUEUED && result < cnt) io->ci_continue = 0; @@ -1767,7 +1767,7 @@ static int vvp_io_lseek_start(const struct lu_env *env, struct inode *inode = vvp_object_inode(io->ci_obj); __u64 start = io->u.ci_lseek.ls_start; - inode_lock(inode); + ll_inode_lock(inode); inode_dio_wait(inode); /* At the moment we have DLM lock so just update inode @@ -1790,7 +1790,7 @@ static void vvp_io_lseek_end(const struct lu_env *env, if (io->u.ci_lseek.ls_result > i_size_read(inode)) io->u.ci_lseek.ls_result = -ENXIO; - inode_unlock(inode); + ll_inode_unlock(inode); } static const struct cl_io_operations vvp_io_ops = { diff --git a/lustre/llite/vvp_object.c b/lustre/llite/vvp_object.c index efd64b3..0f26b5d 100644 --- a/lustre/llite/vvp_object.c +++ b/lustre/llite/vvp_object.c @@ -157,7 +157,6 @@ static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj, static int vvp_prune(const struct lu_env *env, struct cl_object *obj) { - struct cl_io *io = vvp_env_io(env)->vui_cl.cis_io; struct inode *inode = vvp_object_inode(obj); int rc; ENTRY; @@ -169,14 +168,16 @@ static int vvp_prune(const struct lu_env *env, struct cl_object *obj) RETURN(rc); } - if (io != NULL) - inode_lock(inode); + if (ll_get_inode_lock_owner(inode) != current) + /* ask LOV get inode lock then lo_type_guard */ + RETURN(-EAGAIN); - ll_truncate_inode_pages_final(inode, io); - clear_bit(AS_EXITING, &inode->i_mapping->flags); + LASSERTF(inode_is_locked(inode), DFID ":inode %p lli_flags %#lx\n", + PFID(lu_object_fid(&obj->co_lu)), inode, + ll_i2info(inode)->lli_flags); - if (io != NULL) - inode_unlock(inode); + ll_truncate_inode_pages_final(inode); + clear_bit(AS_EXITING, &inode->i_mapping->flags); RETURN(0); } @@ -227,6 +228,46 @@ static void vvp_req_attr_set(const struct lu_env *env, struct cl_object *obj, sizeof(attr->cra_jobid)); } +static int vvp_inode_ops(const struct lu_env *env, struct cl_object *obj, + enum coo_inode_opc opc, void *data) +{ + struct inode *inode = vvp_object_inode(obj); + int rc = 0; + + ENTRY; + switch (opc) { + case COIO_INODE_LOCK: + if (ll_get_inode_lock_owner(inode) != current) + ll_inode_lock(inode); + else + rc = -EALREADY; + break; + case COIO_INODE_UNLOCK: + if (ll_get_inode_lock_owner(inode) == current) + ll_inode_unlock(inode); + else + rc = -ENOLCK; + break; + case COIO_SIZE_LOCK: + if (ll_i2info(inode)->lli_size_lock_owner != current) + ll_inode_size_lock(inode); + else + rc = -EALREADY; + break; + case COIO_SIZE_UNLOCK: + if (ll_i2info(inode)->lli_size_lock_owner == current) + ll_inode_size_unlock(inode); + else + rc = -ENOLCK; + break; + default: + rc = -EINVAL; + break; + } + + RETURN(rc); +} + static const struct cl_object_operations vvp_ops = { .coo_page_init = vvp_page_init, .coo_io_init = vvp_io_init, @@ -235,7 +276,8 @@ static const struct cl_object_operations vvp_ops = { .coo_conf_set = vvp_conf_set, .coo_prune = vvp_prune, .coo_glimpse = vvp_object_glimpse, - .coo_req_attr_set = vvp_req_attr_set + .coo_req_attr_set = vvp_req_attr_set, + .coo_inode_ops = vvp_inode_ops, }; static int vvp_object_init0(const struct lu_env *env, diff --git a/lustre/llite/xattr.c b/lustre/llite/xattr.c index 3ca94ad..d8bbd7b 100644 --- a/lustre/llite/xattr.c +++ b/lustre/llite/xattr.c @@ -112,6 +112,9 @@ static int ll_xattr_set_common(const struct xattr_handler *handler, int rc; ENTRY; + /* VFS has locked the inode before calling this */ + ll_set_inode_lock_owner(inode); + /* When setxattr() is called with a size of 0 the value is * unconditionally replaced by "". When removexattr() is * called we get a NULL value and XATTR_REPLACE for flags. */ @@ -123,26 +126,26 @@ static int ll_xattr_set_common(const struct xattr_handler *handler, /* FIXME: enable IMA when the conditions are ready */ if (handler->flags == XATTR_SECURITY_T && (!strcmp(name, "ima") || !strcmp(name, "evm"))) - RETURN(-EOPNOTSUPP); + GOTO(out, rc = -EOPNOTSUPP); rc = xattr_type_filter(sbi, handler); if (rc) - RETURN(rc); + GOTO(out, rc); if ((handler->flags == XATTR_ACL_ACCESS_T || handler->flags == XATTR_ACL_DEFAULT_T) && !inode_owner_or_capable(mnt_userns, inode)) - RETURN(-EPERM); + GOTO(out, rc = -EPERM); /* b10667: ignore lustre special xattr for now */ if (!strcmp(name, "hsm") || ((handler->flags == XATTR_TRUSTED_T && !strcmp(name, "lov")) || (handler->flags == XATTR_LUSTRE_T && !strcmp(name, "lov")))) - RETURN(0); + GOTO(out, rc = 0); rc = ll_security_secctx_name_filter(sbi, handler->flags, name); if (rc) - RETURN(rc); + GOTO(out, rc); /* * In user.* namespace, only regular files and directories can have @@ -150,7 +153,7 @@ static int ll_xattr_set_common(const struct xattr_handler *handler, */ if (handler->flags == XATTR_USER_T) { if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) - RETURN(-EPERM); + GOTO(out, rc = -EPERM); } /* This check is required for compatibility with 2.14, in which @@ -161,11 +164,11 @@ static int ll_xattr_set_common(const struct xattr_handler *handler, * context is set directly in the create request. */ if (handler->flags == XATTR_SECURITY_T && strcmp(name, "c") == 0) - RETURN(-EPERM); + GOTO(out, rc = -EPERM); fullname = kasprintf(GFP_KERNEL, "%s%s", xattr_prefix(handler), name); if (!fullname) - RETURN(-ENOMEM); + GOTO(out, rc = -ENOMEM); rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid, fullname, pv, size, flags, ll_i2suppgid(inode), &req); @@ -175,7 +178,7 @@ static int ll_xattr_set_common(const struct xattr_handler *handler, LCONSOLE_INFO("Disabling user_xattr feature because it is not supported on the server\n"); sbi->ll_flags &= ~LL_SBI_USER_XATTR; } - RETURN(rc); + GOTO(out, rc); } ptlrpc_req_finished(req); @@ -183,8 +186,10 @@ static int ll_xattr_set_common(const struct xattr_handler *handler, ll_stats_ops_tally(ll_i2sbi(inode), valid == OBD_MD_FLXATTRRM ? LPROC_LL_REMOVEXATTR : LPROC_LL_SETXATTR, ktime_us_delta(ktime_get(), kstart)); +out: + ll_clear_inode_lock_owner(inode); - RETURN(0); + RETURN(rc); } static int get_hsm_state(struct inode *inode, u32 *hus_states) @@ -352,11 +357,14 @@ static int ll_xattr_set(const struct xattr_handler *handler, int op_type = flags == XATTR_REPLACE ? LPROC_LL_REMOVEXATTR : LPROC_LL_SETXATTR; struct lov_user_md *lum = (struct lov_user_md *)value; - int rc; + int rc = 0; LASSERT(inode); LASSERT(name); + /* VFS has locked the inode before calling this */ + ll_set_inode_lock_owner(inode); + CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), xattr %s\n", PFID(ll_inode2fid(inode)), inode, name); @@ -365,11 +373,11 @@ static int ll_xattr_set(const struct xattr_handler *handler, rc = ll_setstripe_ea(dentry, lum, size); ll_stats_ops_tally(ll_i2sbi(inode), op_type, ktime_us_delta(ktime_get(), kstart)); - return rc; + goto out; } else if (!strcmp(name, "lma") || !strcmp(name, "link")) { ll_stats_ops_tally(ll_i2sbi(inode), op_type, ktime_us_delta(ktime_get(), kstart)); - return 0; + goto out; } if (strncmp(name, "lov.", 4) == 0) { @@ -379,11 +387,15 @@ static int ll_xattr_set(const struct xattr_handler *handler, rc = lustre_check_lov_user_md(lum); if (rc < 0) - return rc; + goto out; } - return ll_xattr_set_common(handler, mnt_userns, dentry, inode, name, - value, size, flags); + rc = ll_xattr_set_common(handler, mnt_userns, dentry, inode, name, + value, size, flags); +out: + ll_clear_inode_lock_owner(inode); + + return rc; } int ll_xattr_list(struct inode *inode, const char *name, int type, void *buffer, diff --git a/lustre/llite/xattr_cache.c b/lustre/llite/xattr_cache.c index cd777c6..f54d8f6 100644 --- a/lustre/llite/xattr_cache.c +++ b/lustre/llite/xattr_cache.c @@ -85,7 +85,7 @@ static void ll_xattr_cache_init(struct ll_inode_info *lli) LASSERT(lli != NULL); INIT_LIST_HEAD(&lli->lli_xattrs); - ll_file_set_flag(lli, LLIF_XATTR_CACHE); + set_bit(LLIF_XATTR_CACHE, &lli->lli_flags); } /** @@ -264,7 +264,7 @@ static int ll_xattr_cache_list(struct list_head *cache, */ static int ll_xattr_cache_valid(struct ll_inode_info *lli) { - return ll_file_test_flag(lli, LLIF_XATTR_CACHE); + return test_bit(LLIF_XATTR_CACHE, &lli->lli_flags); } /** @@ -275,7 +275,7 @@ static int ll_xattr_cache_valid(struct ll_inode_info *lli) */ static int ll_xattr_cache_filled(struct ll_inode_info *lli) { - return ll_file_test_flag(lli, LLIF_XATTR_CACHE_FILLED); + return test_bit(LLIF_XATTR_CACHE_FILLED, &lli->lli_flags); } /** @@ -295,8 +295,8 @@ static int ll_xattr_cache_destroy_locked(struct ll_inode_info *lli) while (ll_xattr_cache_del(&lli->lli_xattrs, NULL) == 0) /* empty loop */ ; - ll_file_clear_flag(lli, LLIF_XATTR_CACHE_FILLED); - ll_file_clear_flag(lli, LLIF_XATTR_CACHE); + clear_bit(LLIF_XATTR_CACHE_FILLED, &lli->lli_flags); + clear_bit(LLIF_XATTR_CACHE, &lli->lli_flags); RETURN(0); } @@ -530,7 +530,7 @@ static int ll_xattr_cache_refill(struct inode *inode) if (xdata != xtail || xval != xvtail) CERROR("a hole in xattr data\n"); else - ll_file_set_flag(lli, LLIF_XATTR_CACHE_FILLED); + set_bit(LLIF_XATTR_CACHE_FILLED, &lli->lli_flags); ll_set_lock_data(sbi->ll_md_exp, inode, &oit, NULL); ll_intent_drop_lock(&oit); diff --git a/lustre/lov/lov_object.c b/lustre/lov/lov_object.c index d316c5c..235043a 100644 --- a/lustre/lov/lov_object.c +++ b/lustre/lov/lov_object.c @@ -311,10 +311,11 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov, LASSERT(r0->lo_sub[idx] == NULL); } -static void lov_delete_raid0(const struct lu_env *env, struct lov_object *lov, +static int lov_delete_raid0(const struct lu_env *env, struct lov_object *lov, struct lov_layout_entry *lle) { struct lov_layout_raid0 *r0 = &lle->lle_raid0; + int rc; ENTRY; @@ -325,7 +326,9 @@ static void lov_delete_raid0(const struct lu_env *env, struct lov_object *lov, struct lovsub_object *los = r0->lo_sub[i]; if (los != NULL) { - cl_object_prune(env, &los->lso_cl); + rc = cl_object_prune(env, &los->lso_cl); + if (rc) + RETURN(rc); /* * If top-level object is to be evicted from * the cache, so are its sub-objects. @@ -335,7 +338,7 @@ static void lov_delete_raid0(const struct lu_env *env, struct lov_object *lov, } } - EXIT; + RETURN(0); } static void lov_fini_raid0(const struct lu_env *env, @@ -857,6 +860,7 @@ static int lov_delete_composite(const struct lu_env *env, union lov_layout_state *state) { struct lov_layout_entry *entry; + int rc; ENTRY; @@ -867,7 +871,9 @@ static int lov_delete_composite(const struct lu_env *env, if (entry->lle_lsme && lsme_is_foreign(entry->lle_lsme)) continue; - lov_delete_raid0(env, lov, entry); + rc = lov_delete_raid0(env, lov, entry); + if (rc) + RETURN(rc); } RETURN(0); @@ -1389,6 +1395,9 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj, struct lov_stripe_md *lsm = NULL; struct lov_object *lov = cl2lov(obj); int result = 0; + struct cl_object *top = cl_object_top(obj); + bool unlock_inode = false; + bool lock_inode_size = false; ENTRY; if (conf->coc_opc == OBJECT_CONF_SET && @@ -1401,6 +1410,7 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj, dump_lsm(D_INODE, lsm); } +retry: lov_conf_lock(lov); if (conf->coc_opc == OBJECT_CONF_INVALIDATE) { lov->lo_layout_invalid = true; @@ -1455,10 +1465,47 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj, result = lov_layout_change(env, lov, lsm, conf); lov->lo_layout_invalid = result != 0; + if (result) { + if (result == -EAGAIN) { + /** + * we need unlocked lov conf and get inode lock. + * It's possible we have already taken inode's size + * mutex, so we need keep such lock order, lest deadlock + * happens: + * inode lock (ll_inode_lock()) + * inode size lock (ll_inode_size_lock()) + * lov conf lock (lov_conf_lock()) + * + * e.g. + * vfs_setxattr inode locked + * ll_lov_setstripe_ea_info inode size locked + * ll_prep_inode + * ll_file_inode_init + * cl_conf_set + * lov_conf_set lov conf locked + */ + lov_conf_unlock(lov); + if (cl_object_inode_ops( + env, top, COIO_SIZE_UNLOCK, NULL) == 0) + lock_inode_size = true; + + /* take lock in order */ + if (cl_object_inode_ops( + env, top, COIO_INODE_LOCK, NULL) == 0) + unlock_inode = true; + if (lock_inode_size) + cl_object_inode_ops( + env, top, COIO_SIZE_LOCK, NULL); + goto retry; + } + } EXIT; out: lov_conf_unlock(lov); + if (unlock_inode) + cl_object_inode_ops(env, top, COIO_INODE_UNLOCK, NULL); + lov_lsm_put(lsm); CDEBUG(D_INODE, DFID" lo_layout_invalid=%d\n", PFID(lu_object_fid(lov2lu(lov))), lov->lo_layout_invalid); diff --git a/lustre/obdclass/cl_object.c b/lustre/obdclass/cl_object.c index d5d491f..0c392db 100644 --- a/lustre/obdclass/cl_object.c +++ b/lustre/obdclass/cl_object.c @@ -443,6 +443,25 @@ int cl_object_flush(const struct lu_env *env, struct cl_object *obj, } EXPORT_SYMBOL(cl_object_flush); +int cl_object_inode_ops(const struct lu_env *env, struct cl_object *top, + enum coo_inode_opc opc, void *data) +{ + struct cl_object *obj; + int rc = 0; + + ENTRY; + + cl_object_for_each(obj, top) { + if (obj->co_ops->coo_inode_ops) { + rc = obj->co_ops->coo_inode_ops(env, obj, opc, data); + if (rc) + break; + } + } + RETURN(rc); +} +EXPORT_SYMBOL(cl_object_inode_ops); + /** * Helper function removing all object locks, and marking object for * deletion. All object pages must have been deleted at this point. -- 1.8.3.1