From: Bobi Jam Date: Wed, 2 Aug 2023 11:30:09 +0000 (+0800) Subject: LU-16958 llite: call truncate_inode_pages() in inode lock X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=3797f7201fdd6b6589d775149821ad6aac3d5c15;p=fs%2Flustre-release.git LU-16958 llite: call truncate_inode_pages() in inode lock In some cases vvp_prune()->truncate_inode_pages() is get called without IO context, we need protect it with inode lock as well. So we add ll_inode_info::lli_inode_lock_owner and set it according to vfs lock rules (Documentation/filesystems/Locking or Documentation/filesystems/locking.rst), so before calling truncate_inode_pages(), we'd lock the inode if it's not locked in vfs. And in lov_conf_set(), when it requires inode lock, we'd take heed of the possible inode size lock, inode layout lock and lov conf lock that have been taken by itself, and it also need to take these locks in order lest deadlock being ensued. Lustre-commit: 51d62f2122fee14fbb3ff8333b5a830e1181e4e5 Lustre-change: https://review.whamcloud.com/50857 Lustre-commit: 8f2c1592c3bbd0351ab3984a88a3eed7075690c8 Lustre-change: https://review.whamcloud.com/51641 Fixes: ef9be34478 ("LU-16637 llite: call truncate_inode_pages() under inode lock") Signed-off-by: Bobi Jam Change-Id: I7ee58039a6d31daefc625ac571a52baf112f8151 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/51644 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index a589192..adfa913 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -303,6 +303,15 @@ struct cl_layout { bool cl_is_rdonly; }; +enum coo_inode_opc { + COIO_INODE_LOCK, + COIO_INODE_UNLOCK, + COIO_SIZE_LOCK, + COIO_SIZE_UNLOCK, + COIO_LAYOUT_LOCK, + COIO_LAYOUT_UNLOCK, +}; + /** * Operations implemented for each cl object layer. * @@ -430,6 +439,11 @@ struct cl_object_operations { int (*coo_object_flush)(const struct lu_env *env, struct cl_object *obj, struct ldlm_lock *lock); + /** + * operate upon inode. Used in LOV to lock/unlock inode from vvp layer. + */ + int (*coo_inode_ops)(const struct lu_env *env, struct cl_object *obj, + enum coo_inode_opc opc, void *data); }; /** @@ -2242,6 +2256,8 @@ int cl_object_layout_get(const struct lu_env *env, struct cl_object *obj, loff_t cl_object_maxbytes(struct cl_object *obj); int cl_object_flush(const struct lu_env *env, struct cl_object *obj, struct ldlm_lock *lock); +int cl_object_inode_ops(const struct lu_env *env, struct cl_object *obj, + enum coo_inode_opc opc, void *data); /** diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index 9fe520c..a1016cd 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -2512,7 +2512,7 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin) loff_t ret = -EINVAL; ENTRY; - inode_lock(inode); + ll_inode_lock(inode); switch (origin) { case SEEK_SET: break; @@ -2550,7 +2550,7 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin) GOTO(out, ret); out: - inode_unlock(inode); + ll_inode_unlock(inode); return ret; } diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 47a152b..12bf18e 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -130,12 +130,13 @@ static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data, op_data->op_xvalid |= OP_XVALID_CTIME_SET; op_data->op_attr_blocks = inode->i_blocks; op_data->op_attr_flags = ll_inode_to_ext_flags(inode->i_flags); - if (ll_file_test_flag(ll_i2info(inode), LLIF_PROJECT_INHERIT)) + if (test_bit(LLIF_PROJECT_INHERIT, &ll_i2info(inode)->lli_flags)) op_data->op_attr_flags |= LUSTRE_PROJINHERIT_FL; op_data->op_open_handle = och->och_open_handle; if (och->och_flags & FMODE_WRITE && - ll_file_test_and_clear_flag(ll_i2info(inode), LLIF_DATA_MODIFIED)) + test_and_clear_bit(LLIF_DATA_MODIFIED, + &ll_i2info(inode)->lli_flags)) /* For HSM: if inode data has been modified, pack it so that * MDT can set data dirty flag in the archive. */ op_data->op_bias |= MDS_DATA_MODIFIED; @@ -1473,7 +1474,7 @@ static int ll_merge_attr_nolock(const struct lu_env *env, struct inode *inode) * POSIX. Solving this problem needs to send an RPC to MDT for each * read, this will hurt performance. */ - if (ll_file_test_and_clear_flag(lli, LLIF_UPDATE_ATIME) || + if (test_and_clear_bit(LLIF_UPDATE_ATIME, &lli->lli_flags) || inode->i_atime.tv_sec < lli->lli_atime) inode->i_atime.tv_sec = lli->lli_atime; @@ -2218,11 +2219,11 @@ static ssize_t ll_do_tiny_write(struct kiocb *iocb, struct iov_iter *iter) RETURN(0); if (unlikely(lock_inode)) - inode_lock(inode); + ll_inode_lock(inode); result = __generic_file_write_iter(iocb, iter); if (unlikely(lock_inode)) - inode_unlock(inode); + ll_inode_unlock(inode); /* If the page is not already dirty, ll_tiny_write_begin returns * -ENODATA. We continue on to normal write. @@ -2234,7 +2235,7 @@ static ssize_t ll_do_tiny_write(struct kiocb *iocb, struct iov_iter *iter) ll_heat_add(inode, CIT_WRITE, result); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_WRITE_BYTES, result); - ll_file_set_flag(ll_i2info(inode), LLIF_DATA_MODIFIED); + set_bit(LLIF_DATA_MODIFIED, &ll_i2info(inode)->lli_flags); } CDEBUG(D_VFSTRACE, "result: %zu, original count %zu\n", result, count); @@ -3087,9 +3088,9 @@ lookup: if (enckey == 0 || nameenc == 0) continue; - inode_lock(parent); + ll_inode_lock(parent); de = lookup_one_len(p, de_parent, len); - inode_unlock(parent); + ll_inode_unlock(parent); if (IS_ERR_OR_NULL(de) || !de->d_inode) { dput(de_parent); rc = -ENODATA; @@ -3505,11 +3506,10 @@ static int ll_hsm_import(struct inode *inode, struct file *file, ATTR_ATIME | ATTR_ATIME_SET; inode_lock(inode); - + /* inode lock owner set in ll_setattr_raw()*/ rc = ll_setattr_raw(file_dentry(file), attr, 0, true); if (rc == -ENODATA) rc = 0; - inode_unlock(inode); out: @@ -3558,6 +3558,7 @@ static int ll_file_futimes_3(struct file *file, const struct ll_futimes_3 *lfu) RETURN(-EINVAL); inode_lock(inode); + /* inode lock owner set in ll_setattr_raw()*/ rc = ll_setattr_raw(file_dentry(file), &ia, OP_XVALID_CTIME_SET, false); inode_unlock(inode); @@ -3815,7 +3816,7 @@ int ll_ioctl_fsgetxattr(struct inode *inode, unsigned int cmd, RETURN(-EFAULT); fsxattr.fsx_xflags = ll_inode_flags_to_xflags(inode->i_flags); - if (ll_file_test_flag(ll_i2info(inode), LLIF_PROJECT_INHERIT)) + if (test_bit(LLIF_PROJECT_INHERIT, &ll_i2info(inode)->lli_flags)) fsxattr.fsx_xflags |= FS_XFLAG_PROJINHERIT; fsxattr.fsx_projid = ll_i2info(inode)->lli_projid; if (copy_to_user((struct fsxattr __user *)arg, @@ -3848,7 +3849,7 @@ int ll_ioctl_check_project(struct inode *inode, __u32 xflags, if (ll_i2info(inode)->lli_projid != projid) return -EINVAL; - if (ll_file_test_flag(ll_i2info(inode), LLIF_PROJECT_INHERIT)) { + if (test_bit(LLIF_PROJECT_INHERIT, &ll_i2info(inode)->lli_flags)) { if (!(xflags & FS_XFLAG_PROJINHERIT)) return -EINVAL; } else { @@ -3947,10 +3948,10 @@ int ll_ioctl_project(struct file *file, unsigned int cmd, /* apply child dentry if name is valid */ name_len = strnlen(lu_project.project_name, NAME_MAX); if (name_len > 0 && name_len <= NAME_MAX) { - inode_lock(inode); + ll_inode_lock(inode); child_dentry = lookup_one_len(lu_project.project_name, dentry, name_len); - inode_unlock(inode); + ll_inode_unlock(inode); if (IS_ERR(child_dentry)) { rc = PTR_ERR(child_dentry); goto out; @@ -5282,7 +5283,7 @@ int ll_migrate(struct inode *parent, struct file *file, struct lmv_user_md *lum, if (IS_ERR(op_data)) GOTO(out_iput, rc = PTR_ERR(op_data)); - inode_lock(child_inode); + ll_inode_lock(child_inode); op_data->op_fid3 = *ll_inode2fid(child_inode); if (!fid_is_sane(&op_data->op_fid3)) { CERROR("%s: migrate %s, but FID "DFID" is insane\n", @@ -5363,7 +5364,7 @@ out_close: if (!rc) clear_nlink(child_inode); out_unlock: - inode_unlock(child_inode); + ll_inode_unlock(child_inode); ll_finish_md_op_data(op_data); out_iput: iput(child_inode); @@ -5649,7 +5650,7 @@ int ll_getattr_dentry(struct dentry *de, struct kstat *stat, u32 request_mask, * restore the MDT holds the layout lock so the glimpse will * block up to the end of restore (getattr will block) */ - if (!ll_file_test_flag(lli, LLIF_FILE_RESTORING)) { + if (!test_bit(LLIF_FILE_RESTORING, &lli->lli_flags)) { rc = ll_glimpse_size(inode); if (rc < 0) RETURN(rc); @@ -6482,6 +6483,7 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen) /* take layout lock mutex to enqueue layout lock exclusively. */ mutex_lock(&lli->lli_layout_mutex); + lli->lli_layout_lock_owner = current; while (1) { /* mostly layout lock is caching on the local side, so try to @@ -6503,6 +6505,7 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen) if (rc == 0) *gen = ll_layout_version_get(lli); + lli->lli_layout_lock_owner = NULL; mutex_unlock(&lli->lli_layout_mutex); RETURN(rc); diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index f61377d..e22d0ff 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -195,7 +195,8 @@ struct ll_inode_info { /* for non-directory */ struct { struct mutex lli_size_mutex; - char *lli_symlink_name; + struct task_struct *lli_size_lock_owner; + char *lli_symlink_name; struct ll_trunc_sem lli_trunc_sem; struct range_lock_tree lli_write_tree; struct mutex lli_setattr_mutex; @@ -278,6 +279,7 @@ struct ll_inode_info { /* mutex to request for layout lock exclusively. */ struct mutex lli_layout_mutex; + struct task_struct *lli_layout_lock_owner; /* Layout version, protected by lli_layout_lock */ __u32 lli_layout_gen; spinlock_t lli_layout_lock; @@ -289,6 +291,8 @@ struct ll_inode_info { struct list_head lli_xattrs; /* ll_xattr_entry->xe_list */ struct list_head lli_lccs; /* list of ll_cl_context */ seqlock_t lli_page_inv_lock; + + struct task_struct *lli_inode_lock_owner; }; #ifndef HAVE_USER_NAMESPACE_ARG @@ -432,7 +436,7 @@ static inline void ll_layout_version_set(struct ll_inode_info *lli, __u32 gen) spin_unlock(&lli->lli_layout_lock); } -enum ll_file_flags { +enum ll_inode_flags { /* File data is modified. */ LLIF_DATA_MODIFIED = 0, /* File is being restored */ @@ -443,35 +447,12 @@ enum ll_file_flags { LLIF_PROJECT_INHERIT = 3, /* update atime from MDS even if it's older than local inode atime. */ LLIF_UPDATE_ATIME = 4, + /* 6 is not used for now */ /* Xattr cache is filled */ LLIF_XATTR_CACHE_FILLED = 7, }; -static inline void ll_file_set_flag(struct ll_inode_info *lli, - enum ll_file_flags flag) -{ - set_bit(flag, &lli->lli_flags); -} - -static inline void ll_file_clear_flag(struct ll_inode_info *lli, - enum ll_file_flags flag) -{ - clear_bit(flag, &lli->lli_flags); -} - -static inline bool ll_file_test_flag(struct ll_inode_info *lli, - enum ll_file_flags flag) -{ - return test_bit(flag, &lli->lli_flags); -} - -static inline bool ll_file_test_and_clear_flag(struct ll_inode_info *lli, - enum ll_file_flags flag) -{ - return test_and_clear_bit(flag, &lli->lli_flags); -} - int ll_xattr_cache_destroy(struct inode *inode); int ll_xattr_cache_empty(struct inode *inode); @@ -608,6 +589,35 @@ static inline struct pcc_inode *ll_i2pcci(struct inode *inode) return ll_i2info(inode)->lli_pcc_inode; } +static inline void ll_set_inode_lock_owner(struct inode *inode) +{ + ll_i2info(inode)->lli_inode_lock_owner = current; +} + +static inline void ll_clear_inode_lock_owner(struct inode *inode) +{ + ll_i2info(inode)->lli_inode_lock_owner = NULL; +} + +static inline struct task_struct *ll_get_inode_lock_owner(struct inode *inode) +{ + return ll_i2info(inode)->lli_inode_lock_owner; +} + +/* lock inode and set inode lock owener */ +static inline void ll_inode_lock(struct inode *inode) +{ + inode_lock(inode); + ll_set_inode_lock_owner(inode); +} + +/* clear inode lock owner and unlock it */ +static inline void ll_inode_unlock(struct inode *inode) +{ + ll_clear_inode_lock_owner(inode); + inode_unlock(inode); +} + /* default to use at least 16M for fast read if possible */ #define RA_REMAIN_WINDOW_MIN MiB_TO_PAGES(16UL) @@ -1375,7 +1385,7 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md); void ll_update_inode_flags(struct inode *inode, int ext_flags); void ll_update_dir_depth(struct inode *dir, struct inode *inode); int ll_read_inode2(struct inode *inode, void *opaque); -void ll_truncate_inode_pages_final(struct inode *inode, struct cl_io *io); +void ll_truncate_inode_pages_final(struct inode *inode); void ll_delete_inode(struct inode *inode); int ll_iocontrol(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 860c6ec..6421abe 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -1134,10 +1134,12 @@ void ll_lli_init(struct ll_inode_info *lli) lli->lli_group_gid = 0; } mutex_init(&lli->lli_layout_mutex); + lli->lli_layout_lock_owner = NULL; memset(lli->lli_jobid, 0, sizeof(lli->lli_jobid)); /* ll_cl_context initialize */ INIT_LIST_HEAD(&lli->lli_lccs); seqlock_init(&lli->lli_page_inv_lock); + lli->lli_inode_lock_owner = NULL; } #define MAX_STRING_SIZE 128 @@ -1845,10 +1847,10 @@ static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data) * cache is not cleared yet. */ op_data->op_attr.ia_valid &= ~(TIMES_SET_FLAGS | ATTR_SIZE); if (S_ISREG(inode->i_mode)) - inode_lock(inode); + ll_inode_lock(inode); rc = simple_setattr(&nop_mnt_idmap, dentry, &op_data->op_attr); if (S_ISREG(inode->i_mode)) - inode_unlock(inode); + ll_inode_unlock(inode); op_data->op_attr.ia_valid = ia_valid; rc = ll_update_inode(inode, &md); @@ -2079,6 +2081,9 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, ENTRY; + /* VFS has locked the inode before calling this */ + ll_set_inode_lock_owner(inode); + CDEBUG(D_VFSTRACE, "%s: setattr inode "DFID"(%p) from %llu to %llu, " "valid %x, hsm_import %d\n", ll_i2sbi(inode)->ll_fsname, PFID(&lli->lli_fid), @@ -2086,29 +2091,29 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, hsm_import); if (attr->ia_valid & ATTR_SIZE) { - /* Check new size against VFS/VM file size limit and rlimit */ - rc = inode_newsize_ok(inode, attr->ia_size); - if (rc) - RETURN(rc); - - /* The maximum Lustre file size is variable, based on the - * OST maximum object size and number of stripes. This - * needs another check in addition to the VFS check above. */ - if (attr->ia_size > ll_file_maxbytes(inode)) { + /* Check new size against VFS/VM file size limit and rlimit */ + rc = inode_newsize_ok(inode, attr->ia_size); + if (rc) + GOTO(clear, rc); + + /* The maximum Lustre file size is variable, based on the + * OST maximum object size and number of stripes. This + * needs another check in addition to the VFS check above. */ + if (attr->ia_size > ll_file_maxbytes(inode)) { CDEBUG(D_INODE,"file "DFID" too large %llu > %llu\n", - PFID(&lli->lli_fid), attr->ia_size, - ll_file_maxbytes(inode)); - RETURN(-EFBIG); - } + PFID(&lli->lli_fid), attr->ia_size, + ll_file_maxbytes(inode)); + GOTO(clear, rc = -EFBIG); + } - attr->ia_valid |= ATTR_MTIME | ATTR_CTIME; - } + attr->ia_valid |= ATTR_MTIME | ATTR_CTIME; + } /* POSIX: check before ATTR_*TIME_SET set (from inode_change_ok) */ if (attr->ia_valid & TIMES_SET_FLAGS) { if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_FOWNER)) - RETURN(-EPERM); + GOTO(clear, rc = -EPERM); } /* We mark all of the fields "set" so MDS/OST does not re-set them */ @@ -2125,8 +2130,8 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, if (!(attr->ia_valid & ATTR_MTIME_SET) && (attr->ia_valid & ATTR_MTIME)) { attr->ia_mtime = current_time(inode); - attr->ia_valid |= ATTR_MTIME_SET; - } + attr->ia_valid |= ATTR_MTIME_SET; + } if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME)) CDEBUG(D_INODE, "setting mtime %lld, ctime %lld, now = %lld\n", @@ -2134,7 +2139,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, ktime_get_real_seconds()); if (S_ISREG(inode->i_mode)) - inode_unlock(inode); + ll_inode_unlock(inode); /* We always do an MDS RPC, even if we're only changing the size; * only the MDS knows whether truncate() should fail with -ETXTBUSY */ @@ -2149,7 +2154,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, */ xvalid |= OP_XVALID_OWNEROVERRIDE; op_data->op_bias |= MDS_DATA_MODIFIED; - ll_file_clear_flag(lli, LLIF_DATA_MODIFIED); + clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags); } if (attr->ia_valid & ATTR_FILE) { @@ -2283,7 +2288,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, * LLIF_DATA_MODIFIED is not set(see vvp_io_setattr_fini()). * This way we can save an RPC for common open + trunc * operation. */ - if (ll_file_test_and_clear_flag(lli, LLIF_DATA_MODIFIED)) { + if (test_and_clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags)) { struct hsm_state_set hss = { .hss_valid = HSS_SETMASK, .hss_setmask = HS_DIRTY, @@ -2307,7 +2312,7 @@ out: ll_finish_md_op_data(op_data); if (S_ISREG(inode->i_mode)) { - inode_lock(inode); + ll_inode_lock(inode); if ((attr->ia_valid & ATTR_SIZE) && !hsm_import) inode_dio_wait(inode); /* Once we've got the i_mutex, it's safe to set the S_NOSEC @@ -2322,6 +2327,8 @@ out: ll_stats_ops_tally(ll_i2sbi(inode), attr->ia_valid & ATTR_SIZE ? LPROC_LL_TRUNC : LPROC_LL_SETATTR, ktime_us_delta(ktime_get(), kstart)); +clear: + ll_clear_inode_lock_owner(inode); RETURN(rc); } @@ -2515,6 +2522,7 @@ void ll_inode_size_lock(struct inode *inode) lli = ll_i2info(inode); mutex_lock(&lli->lli_size_mutex); + lli->lli_size_lock_owner = current; } void ll_inode_size_unlock(struct inode *inode) @@ -2522,6 +2530,7 @@ void ll_inode_size_unlock(struct inode *inode) struct ll_inode_info *lli; lli = ll_i2info(inode); + lli->lli_size_lock_owner = NULL; mutex_unlock(&lli->lli_size_mutex); } @@ -2541,9 +2550,9 @@ void ll_update_inode_flags(struct inode *inode, int ext_flags) ext_flags |= ll_inode_to_ext_flags(inode->i_flags) & LUSTRE_ENCRYPT_FL; inode->i_flags = ll_ext_to_inode_flags(ext_flags); if (ext_flags & LUSTRE_PROJINHERIT_FL) - ll_file_set_flag(ll_i2info(inode), LLIF_PROJECT_INHERIT); + set_bit(LLIF_PROJECT_INHERIT, &ll_i2info(inode)->lli_flags); else - ll_file_clear_flag(ll_i2info(inode), LLIF_PROJECT_INHERIT); + clear_bit(LLIF_PROJECT_INHERIT, &ll_i2info(inode)->lli_flags); } int ll_update_inode(struct inode *inode, struct lustre_md *md) @@ -2670,9 +2679,9 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md) * glimpsing updated attrs */ if (body->mbo_t_state & MS_RESTORE) - ll_file_set_flag(lli, LLIF_FILE_RESTORING); + set_bit(LLIF_FILE_RESTORING, &lli->lli_flags); else - ll_file_clear_flag(lli, LLIF_FILE_RESTORING); + clear_bit(LLIF_FILE_RESTORING, &lli->lli_flags); } return 0; @@ -2726,14 +2735,16 @@ void ll_update_dir_depth(struct inode *dir, struct inode *inode) lli->lli_inherit_depth); } -void ll_truncate_inode_pages_final(struct inode *inode, struct cl_io *io) +void ll_truncate_inode_pages_final(struct inode *inode) { struct address_space *mapping = &inode->i_data; unsigned long nrpages; unsigned long flags; - LASSERTF(io == NULL || inode_is_locked(inode), "io %p (type %d)\n", - io, io ? io->ci_type : 0); + LASSERTF((inode->i_state & I_FREEING) || inode_is_locked(inode), + DFID ":inode %p state %#lx, lli_flags %#lx\n", + PFID(ll_inode2fid(inode)), inode, inode->i_state, + ll_i2info(inode)->lli_flags); truncate_inode_pages_final(mapping); @@ -2752,11 +2763,11 @@ void ll_truncate_inode_pages_final(struct inode *inode, struct cl_io *io) } /* Workaround end */ LASSERTF(nrpages == 0, "%s: inode="DFID"(%p) nrpages=%lu " - "io %p (io_type %d), " + "state %#lx, lli_flags %#lx, " "see https://jira.whamcloud.com/browse/LU-118\n", ll_i2sbi(inode)->ll_fsname, PFID(ll_inode2fid(inode)), inode, nrpages, - io, io ? io->ci_type : 0); + inode->i_state, ll_i2info(inode)->lli_flags); } int ll_read_inode2(struct inode *inode, void *opaque) @@ -2830,7 +2841,7 @@ void ll_delete_inode(struct inode *inode) CL_FSYNC_LOCAL : CL_FSYNC_DISCARD, 1); } - ll_truncate_inode_pages_final(inode, NULL); + ll_truncate_inode_pages_final(inode); ll_clear_inode(inode); clear_inode(inode); diff --git a/lustre/llite/llite_mmap.c b/lustre/llite/llite_mmap.c index aba6d7b..449d8d5 100644 --- a/lustre/llite/llite_mmap.c +++ b/lustre/llite/llite_mmap.c @@ -224,7 +224,7 @@ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage, } if (result == 0) - ll_file_set_flag(lli, LLIF_DATA_MODIFIED); + set_bit(LLIF_DATA_MODIFIED, &lli->lli_flags); } EXIT; diff --git a/lustre/llite/llite_nfs.c b/lustre/llite/llite_nfs.c index 2d955f5..caa5a35 100644 --- a/lustre/llite/llite_nfs.c +++ b/lustre/llite/llite_nfs.c @@ -291,13 +291,13 @@ static int ll_get_name(struct dentry *dentry, char *name, struct dentry *child) if (IS_ERR(op_data)) GOTO(out, rc = PTR_ERR(op_data)); - inode_lock(dir); + ll_inode_lock(dir); #ifdef HAVE_DIR_CONTEXT rc = ll_dir_read(dir, &pos, op_data, &lgd.ctx); #else rc = ll_dir_read(dir, &pos, op_data, &lgd, ll_nfs_get_name_filldir); #endif - inode_unlock(dir); + ll_inode_unlock(dir); ll_finish_md_op_data(op_data); if (!rc && !lgd.lgd_found) rc = -ENOENT; diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index aa7a13e..c64351f 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -328,7 +328,7 @@ static void ll_lock_cancel_bits(struct ldlm_lock *lock, __u64 to_cancel) lli = ll_i2info(inode); if (bits & MDS_INODELOCK_UPDATE) - ll_file_set_flag(lli, LLIF_UPDATE_ATIME); + set_bit(LLIF_UPDATE_ATIME, &lli->lli_flags); if ((bits & MDS_INODELOCK_UPDATE) && S_ISDIR(inode->i_mode)) { CDEBUG(D_INODE, "invalidating inode "DFID" lli = %p, " @@ -1209,7 +1209,10 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry, unsigned int flags) { struct lookup_intent *itp, it = { .it_op = IT_GETATTR }; - struct dentry *de; + struct dentry *de = NULL; + + /* VFS has locked the inode before calling this */ + ll_set_inode_lock_owner(parent); CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), flags=%u\n", dentry, PFID(ll_inode2fid(parent)), parent, flags); @@ -1222,7 +1225,7 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry, if ((flags & LOOKUP_CREATE) && !(flags & LOOKUP_OPEN) && (inode_permission(&nop_mnt_idmap, parent, MAY_WRITE | MAY_EXEC) == 0)) - return NULL; + goto clear; if (flags & (LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE)) itp = NULL; @@ -1234,6 +1237,9 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry, if (itp != NULL) ll_intent_release(itp); +clear: + ll_clear_inode_lock_owner(parent); + return de; } @@ -1279,6 +1285,9 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry, int rc = 0; ENTRY; + /* VFS has locked the inode before calling this */ + ll_set_inode_lock_owner(dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), file %p, open_flags %x, mode %x opened %d\n", dentry, PFID(ll_inode2fid(dir)), dir, file, open_flags, mode, @@ -1294,7 +1303,7 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry, * Either way it's a valid race to just return -ENOENT here. */ if (!(open_flags & O_CREAT)) - return -ENOENT; + GOTO(clear, rc = -ENOENT); /* Otherwise we just unhash it to be rehashed afresh via * lookup if necessary @@ -1304,7 +1313,7 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry, OBD_ALLOC(it, sizeof(*it)); if (!it) - RETURN(-ENOMEM); + GOTO(clear, rc = -ENOMEM); it->it_op = IT_OPEN; if (open_flags & O_CREAT) { @@ -1466,6 +1475,8 @@ out_release: ll_intent_release(it); out_free: OBD_FREE(it, sizeof(*it)); +clear: + ll_clear_inode_lock_owner(dir); RETURN(rc); } @@ -1883,6 +1894,9 @@ static int ll_mknod(struct mnt_idmap *map, struct inode *dir, int err; ENTRY; + /* VFS has locked the inode before calling this */ + ll_set_inode_lock_owner(dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p) mode %o dev %x\n", dchild, PFID(ll_inode2fid(dir)), dir, mode, rdev); @@ -1911,6 +1925,7 @@ static int ll_mknod(struct mnt_idmap *map, struct inode *dir, if (!err) ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_MKNOD, ktime_us_delta(ktime_get(), kstart)); + ll_clear_inode_lock_owner(dir); RETURN(err); } @@ -1924,6 +1939,9 @@ static int ll_create_nd(struct mnt_idmap *map, struct inode *dir, ktime_t kstart = ktime_get(); int rc; + /* VFS has locked the inode before calling this */ + ll_set_inode_lock_owner(dir); + CFS_FAIL_TIMEOUT(OBD_FAIL_LLITE_CREATE_FILE_PAUSE, cfs_fail_val); CDEBUG(D_VFSTRACE, @@ -1941,6 +1959,8 @@ static int ll_create_nd(struct mnt_idmap *map, struct inode *dir, ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_CREATE, ktime_us_delta(ktime_get(), kstart)); + ll_clear_inode_lock_owner(dir); + return rc; } @@ -1953,13 +1973,16 @@ static int ll_symlink(struct mnt_idmap *map, struct inode *dir, int err; ENTRY; + /* VFS has locked the inode before calling this */ + ll_set_inode_lock_owner(dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p), target=%.*s\n", dchild, PFID(ll_inode2fid(dir)), dir, 3000, oldpath); err = llcrypt_prepare_symlink(dir, oldpath, len, dir->i_sb->s_blocksize, &disk_link); if (err) - RETURN(err); + GOTO(out, err); err = ll_new_node(dir, dchild, oldpath, S_IFLNK | S_IRWXUGO, (__u64)&disk_link, LUSTRE_OPC_SYMLINK); @@ -1971,6 +1994,9 @@ static int ll_symlink(struct mnt_idmap *map, struct inode *dir, ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_SYMLINK, ktime_us_delta(ktime_get(), kstart)); +out: + ll_clear_inode_lock_owner(dir); + RETURN(err); } @@ -1986,6 +2012,10 @@ static int ll_link(struct dentry *old_dentry, struct inode *dir, int err; ENTRY; + /* VFS has locked the inodes before calling this */ + ll_set_inode_lock_owner(src); + ll_set_inode_lock_owner(dir); + CDEBUG(D_VFSTRACE, "VFS Op: inode="DFID"(%p), dir="DFID"(%p), target=%pd\n", PFID(ll_inode2fid(src)), src, @@ -1993,12 +2023,12 @@ static int ll_link(struct dentry *old_dentry, struct inode *dir, err = llcrypt_prepare_link(old_dentry, dir, new_dentry); if (err) - RETURN(err); + GOTO(clear, err); op_data = ll_prep_md_op_data(NULL, src, dir, name->name, name->len, 0, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) - RETURN(PTR_ERR(op_data)); + GOTO(clear, err = PTR_ERR(op_data)); err = md_link(sbi->ll_md_exp, op_data, &request); ll_finish_md_op_data(op_data); @@ -2011,6 +2041,10 @@ static int ll_link(struct dentry *old_dentry, struct inode *dir, EXIT; out: ptlrpc_req_finished(request); +clear: + ll_clear_inode_lock_owner(src); + ll_clear_inode_lock_owner(dir); + RETURN(err); } @@ -2021,6 +2055,9 @@ static int ll_mkdir(struct mnt_idmap *map, struct inode *dir, int err; ENTRY; + /* VFS has locked the inode before calling this */ + ll_set_inode_lock_owner(dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p)\n", dchild, PFID(ll_inode2fid(dir)), dir); @@ -2034,6 +2071,8 @@ static int ll_mkdir(struct mnt_idmap *map, struct inode *dir, ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_MKDIR, ktime_us_delta(ktime_get(), kstart)); + ll_clear_inode_lock_owner(dir); + RETURN(err); } @@ -2047,16 +2086,20 @@ static int ll_rmdir(struct inode *dir, struct dentry *dchild) ENTRY; + /* VFS has locked the inodes before calling this */ + ll_set_inode_lock_owner(dir); + ll_set_inode_lock_owner(dchild->d_inode); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p)\n", dchild, PFID(ll_inode2fid(dir)), dir); if (unlikely(d_mountpoint(dchild))) - RETURN(-EBUSY); + GOTO(out, rc = -EBUSY); op_data = ll_prep_md_op_data(NULL, dir, NULL, name->name, name->len, S_IFDIR, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) - RETURN(PTR_ERR(op_data)); + GOTO(out, rc = PTR_ERR(op_data)); if (dchild->d_inode != NULL) op_data->op_fid3 = *ll_inode2fid(dchild->d_inode); @@ -2086,6 +2129,9 @@ static int ll_rmdir(struct inode *dir, struct dentry *dchild) } ptlrpc_req_finished(request); +out: + ll_clear_inode_lock_owner(dir); + ll_clear_inode_lock_owner(dchild->d_inode); RETURN(rc); } @@ -2132,6 +2178,10 @@ static int ll_unlink(struct inode *dir, struct dentry *dchild) ENTRY; + /* VFS has locked the inodes before calling this */ + ll_set_inode_lock_owner(dir); + ll_set_inode_lock_owner(dchild->d_inode); + CDEBUG(D_VFSTRACE, "VFS Op:name=%pd, dir="DFID"(%p)\n", dchild, PFID(ll_inode2fid(dir)), dir); @@ -2140,12 +2190,12 @@ static int ll_unlink(struct inode *dir, struct dentry *dchild) * just check it as vfs_unlink does. */ if (unlikely(d_mountpoint(dchild))) - RETURN(-EBUSY); + GOTO(clear, rc = -EBUSY); op_data = ll_prep_md_op_data(NULL, dir, NULL, name->name, name->len, 0, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) - RETURN(PTR_ERR(op_data)); + GOTO(clear, rc = PTR_ERR(op_data)); op_data->op_fid3 = *ll_inode2fid(dchild->d_inode); /* notify lower layer if inode has dirty pages */ @@ -2178,6 +2228,9 @@ out: if (!rc) ll_stats_ops_tally(ll_i2sbi(dir), LPROC_LL_UNLINK, ktime_us_delta(ktime_get(), kstart)); +clear: + ll_clear_inode_lock_owner(dir); + ll_clear_inode_lock_owner(dchild->d_inode); RETURN(rc); } @@ -2198,9 +2251,15 @@ static int ll_rename(struct mnt_idmap *map, int err; ENTRY; + /* VFS has locked the inodes before calling this */ + ll_set_inode_lock_owner(src); + ll_set_inode_lock_owner(tgt); + if (tgt_dchild->d_inode) + ll_set_inode_lock_owner(tgt_dchild->d_inode); + #if defined(HAVE_USER_NAMESPACE_ARG) || defined(HAVE_IOPS_RENAME_WITH_FLAGS) if (flags) - return -EINVAL; + GOTO(out, err = -EINVAL); #endif CDEBUG(D_VFSTRACE, @@ -2209,7 +2268,7 @@ static int ll_rename(struct mnt_idmap *map, tgt_dchild, PFID(ll_inode2fid(tgt)), tgt); if (unlikely(d_mountpoint(src_dchild) || d_mountpoint(tgt_dchild))) - RETURN(-EBUSY); + GOTO(out, err = -EBUSY); #if defined(HAVE_USER_NAMESPACE_ARG) || defined(HAVE_IOPS_RENAME_WITH_FLAGS) err = llcrypt_prepare_rename(src, src_dchild, tgt, tgt_dchild, flags); @@ -2217,12 +2276,12 @@ static int ll_rename(struct mnt_idmap *map, err = llcrypt_prepare_rename(src, src_dchild, tgt, tgt_dchild, 0); #endif if (err) - RETURN(err); + GOTO(out, err); /* we prevent an encrypted file from being renamed * into an unencrypted dir */ if (IS_ENCRYPTED(src) && !IS_ENCRYPTED(tgt)) - RETURN(-EXDEV); + GOTO(out, err = -EXDEV); if (src_dchild->d_inode) mode = src_dchild->d_inode->i_mode; @@ -2233,7 +2292,7 @@ static int ll_rename(struct mnt_idmap *map, op_data = ll_prep_md_op_data(NULL, src, tgt, NULL, 0, mode, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) - RETURN(PTR_ERR(op_data)); + GOTO(out, err = PTR_ERR(op_data)); /* If the client is using a subdir mount and does a rename to what it * sees as /.fscrypt, interpret it as the .fscrypt dir at fs root. @@ -2253,11 +2312,11 @@ static int ll_rename(struct mnt_idmap *map, err = ll_setup_filename(src, &src_dchild->d_name, 1, &foldname, NULL); if (err) - RETURN(err); + GOTO(out, err); err = ll_setup_filename(tgt, &tgt_dchild->d_name, 1, &fnewname, NULL); if (err) { llcrypt_free_filename(&foldname); - RETURN(err); + GOTO(out, err); } err = md_rename(sbi->ll_md_exp, op_data, foldname.disk_name.name, foldname.disk_name.len, @@ -2278,7 +2337,11 @@ static int ll_rename(struct mnt_idmap *map, ll_stats_ops_tally(sbi, LPROC_LL_RENAME, ktime_us_delta(ktime_get(), kstart)); } - +out: + ll_clear_inode_lock_owner(src); + ll_clear_inode_lock_owner(tgt); + if (tgt_dchild->d_inode) + ll_clear_inode_lock_owner(tgt_dchild->d_inode); RETURN(err); } diff --git a/lustre/llite/pcc.c b/lustre/llite/pcc.c index 0758c55..2650153 100644 --- a/lustre/llite/pcc.c +++ b/lustre/llite/pcc.c @@ -2824,7 +2824,7 @@ int pcc_inode_getattr(struct inode *inode, u32 request_mask, GOTO(out, rc); ll_inode_size_lock(inode); - if (ll_file_test_and_clear_flag(lli, LLIF_UPDATE_ATIME) || + if (test_and_clear_bit(LLIF_UPDATE_ATIME, &lli->lli_flags) || inode->i_atime.tv_sec < lli->lli_atime) inode->i_atime.tv_sec = lli->lli_atime; diff --git a/lustre/llite/statahead.c b/lustre/llite/statahead.c index 3794dba..ac952a2 100644 --- a/lustre/llite/statahead.c +++ b/lustre/llite/statahead.c @@ -584,7 +584,7 @@ static void ll_agl_trigger(struct inode *inode, struct ll_statahead_info *sai) * the MDT holds the layout lock so the glimpse will block up to the * end of restore (statahead/agl will block) */ - if (ll_file_test_flag(lli, LLIF_FILE_RESTORING)) { + if (test_bit(LLIF_FILE_RESTORING, &lli->lli_flags)) { lli->lli_agl_index = 0; iput(inode); RETURN_EXIT; diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index 2616e6f..124c356 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -360,8 +360,8 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) /* today successful restore is the only possible * case */ /* restore was done, clear restoring state */ - ll_file_clear_flag(ll_i2info(vvp_object_inode(obj)), - LLIF_FILE_RESTORING); + clear_bit(LLIF_FILE_RESTORING, + &ll_i2info(vvp_object_inode(obj))->lli_flags); } GOTO(out, 0); } @@ -812,7 +812,7 @@ static void vvp_io_setattr_fini(const struct lu_env *env, if (restore_needed && !ios->cis_io->ci_restore_needed) { /* restore finished, set data modified flag for HSM */ - ll_file_set_flag(ll_i2info(inode), LLIF_DATA_MODIFIED); + set_bit(LLIF_DATA_MODIFIED, &ll_i2info(inode)->lli_flags); } } @@ -1365,10 +1365,10 @@ static int vvp_io_write_start(const struct lu_env *env, iter = *vio->vui_iter; if (unlikely(lock_inode)) - inode_lock(inode); + ll_inode_lock(inode); result = __generic_file_write_iter(vio->vui_iocb, &iter); if (unlikely(lock_inode)) - inode_unlock(inode); + ll_inode_unlock(inode); written = result; if (result > 0) @@ -1417,7 +1417,7 @@ static int vvp_io_write_start(const struct lu_env *env, vio->vui_iocb->ki_pos = pos + io->ci_nob - nob; } if (result > 0 || result == -EIOCBQUEUED) { - ll_file_set_flag(ll_i2info(inode), LLIF_DATA_MODIFIED); + set_bit(LLIF_DATA_MODIFIED, &ll_i2info(inode)->lli_flags); if (result != -EIOCBQUEUED && result < cnt) io->ci_continue = 0; @@ -1767,7 +1767,7 @@ static int vvp_io_lseek_start(const struct lu_env *env, struct inode *inode = vvp_object_inode(io->ci_obj); __u64 start = io->u.ci_lseek.ls_start; - inode_lock(inode); + ll_inode_lock(inode); inode_dio_wait(inode); /* At the moment we have DLM lock so just update inode @@ -1790,7 +1790,7 @@ static void vvp_io_lseek_end(const struct lu_env *env, if (io->u.ci_lseek.ls_result > i_size_read(inode)) io->u.ci_lseek.ls_result = -ENXIO; - inode_unlock(inode); + ll_inode_unlock(inode); } static const struct cl_io_operations vvp_io_ops = { diff --git a/lustre/llite/vvp_object.c b/lustre/llite/vvp_object.c index efd64b3..ea03a84 100644 --- a/lustre/llite/vvp_object.c +++ b/lustre/llite/vvp_object.c @@ -157,7 +157,6 @@ static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj, static int vvp_prune(const struct lu_env *env, struct cl_object *obj) { - struct cl_io *io = vvp_env_io(env)->vui_cl.cis_io; struct inode *inode = vvp_object_inode(obj); int rc; ENTRY; @@ -169,14 +168,16 @@ static int vvp_prune(const struct lu_env *env, struct cl_object *obj) RETURN(rc); } - if (io != NULL) - inode_lock(inode); + if (ll_get_inode_lock_owner(inode) != current) + /* ask LOV get inode lock then lo_type_guard */ + RETURN(-EAGAIN); - ll_truncate_inode_pages_final(inode, io); - clear_bit(AS_EXITING, &inode->i_mapping->flags); + LASSERTF(inode_is_locked(inode), DFID ":inode %p lli_flags %#lx\n", + PFID(lu_object_fid(&obj->co_lu)), inode, + ll_i2info(inode)->lli_flags); - if (io != NULL) - inode_unlock(inode); + ll_truncate_inode_pages_final(inode); + clear_bit(AS_EXITING, &inode->i_mapping->flags); RETURN(0); } @@ -227,6 +228,61 @@ static void vvp_req_attr_set(const struct lu_env *env, struct cl_object *obj, sizeof(attr->cra_jobid)); } +static int vvp_inode_ops(const struct lu_env *env, struct cl_object *obj, + enum coo_inode_opc opc, void *data) +{ + struct inode *inode = vvp_object_inode(obj); + struct ll_inode_info *lli = ll_i2info(inode); + int rc = 0; + + ENTRY; + switch (opc) { + case COIO_INODE_LOCK: + if (ll_get_inode_lock_owner(inode) != current) + ll_inode_lock(inode); + else + rc = -EALREADY; + break; + case COIO_INODE_UNLOCK: + if (ll_get_inode_lock_owner(inode) == current) + ll_inode_unlock(inode); + else + rc = -ENOLCK; + break; + case COIO_SIZE_LOCK: + if (lli->lli_size_lock_owner != current) + ll_inode_size_lock(inode); + else + rc = -EALREADY; + break; + case COIO_SIZE_UNLOCK: + if (lli->lli_size_lock_owner == current) + ll_inode_size_unlock(inode); + else + rc = -ENOLCK; + break; + case COIO_LAYOUT_LOCK: + if (lli->lli_layout_lock_owner != current) { + mutex_lock(&lli->lli_layout_mutex); + lli->lli_layout_lock_owner = current; + } + break; + case COIO_LAYOUT_UNLOCK: + if (lli->lli_layout_lock_owner == current) { + lli->lli_layout_lock_owner = NULL; + mutex_unlock(&lli->lli_layout_mutex); + } else { + rc = -ENOLCK; + } + break; + default: + rc = -EINVAL; + break; + } + + RETURN(rc); +} + static const struct cl_object_operations vvp_ops = { .coo_page_init = vvp_page_init, .coo_io_init = vvp_io_init, @@ -235,7 +291,8 @@ static const struct cl_object_operations vvp_ops = { .coo_conf_set = vvp_conf_set, .coo_prune = vvp_prune, .coo_glimpse = vvp_object_glimpse, - .coo_req_attr_set = vvp_req_attr_set + .coo_req_attr_set = vvp_req_attr_set, + .coo_inode_ops = vvp_inode_ops, }; static int vvp_object_init0(const struct lu_env *env, diff --git a/lustre/llite/xattr.c b/lustre/llite/xattr.c index 58af7f5..bfbc013 100644 --- a/lustre/llite/xattr.c +++ b/lustre/llite/xattr.c @@ -112,6 +112,9 @@ static int ll_xattr_set_common(const struct xattr_handler *handler, int rc; ENTRY; + /* VFS has locked the inode before calling this */ + ll_set_inode_lock_owner(inode); + /* When setxattr() is called with a size of 0 the value is * unconditionally replaced by "". When removexattr() is * called we get a NULL value and XATTR_REPLACE for flags. */ @@ -123,26 +126,26 @@ static int ll_xattr_set_common(const struct xattr_handler *handler, /* FIXME: enable IMA when the conditions are ready */ if (handler->flags == XATTR_SECURITY_T && (!strcmp(name, "ima") || !strcmp(name, "evm"))) - RETURN(-EOPNOTSUPP); + GOTO(out, rc = -EOPNOTSUPP); rc = xattr_type_filter(sbi, handler); if (rc) - RETURN(rc); + GOTO(out, rc); if ((handler->flags == XATTR_ACL_ACCESS_T || handler->flags == XATTR_ACL_DEFAULT_T) && !inode_owner_or_capable(map, inode)) - RETURN(-EPERM); + GOTO(out, rc = -EPERM); /* b10667: ignore lustre special xattr for now */ if (!strcmp(name, "hsm") || ((handler->flags == XATTR_TRUSTED_T && !strcmp(name, "lov")) || (handler->flags == XATTR_LUSTRE_T && !strcmp(name, "lov")))) - RETURN(0); + GOTO(out, rc = 0); rc = ll_security_secctx_name_filter(sbi, handler->flags, name); if (rc) - RETURN(rc); + GOTO(out, rc); /* * In user.* namespace, only regular files and directories can have @@ -150,7 +153,7 @@ static int ll_xattr_set_common(const struct xattr_handler *handler, */ if (handler->flags == XATTR_USER_T) { if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) - RETURN(-EPERM); + GOTO(out, rc = -EPERM); } /* This check is required for compatibility with 2.14, in which @@ -161,11 +164,11 @@ static int ll_xattr_set_common(const struct xattr_handler *handler, * context is set directly in the create request. */ if (handler->flags == XATTR_SECURITY_T && strcmp(name, "c") == 0) - RETURN(-EPERM); + GOTO(out, rc = -EPERM); fullname = kasprintf(GFP_KERNEL, "%s%s", xattr_prefix(handler), name); if (!fullname) - RETURN(-ENOMEM); + GOTO(out, rc = -ENOMEM); rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid, fullname, pv, size, flags, ll_i2suppgid(inode), &req); @@ -175,7 +178,7 @@ static int ll_xattr_set_common(const struct xattr_handler *handler, LCONSOLE_INFO("Disabling user_xattr feature because it is not supported on the server\n"); sbi->ll_flags &= ~LL_SBI_USER_XATTR; } - RETURN(rc); + GOTO(out, rc); } ptlrpc_req_finished(req); @@ -183,8 +186,10 @@ static int ll_xattr_set_common(const struct xattr_handler *handler, ll_stats_ops_tally(ll_i2sbi(inode), valid == OBD_MD_FLXATTRRM ? LPROC_LL_REMOVEXATTR : LPROC_LL_SETXATTR, ktime_us_delta(ktime_get(), kstart)); +out: + ll_clear_inode_lock_owner(inode); - RETURN(0); + RETURN(rc); } static int get_hsm_state(struct inode *inode, u32 *hus_states) @@ -352,11 +357,14 @@ static int ll_xattr_set(const struct xattr_handler *handler, int op_type = flags == XATTR_REPLACE ? LPROC_LL_REMOVEXATTR : LPROC_LL_SETXATTR; struct lov_user_md *lum = (struct lov_user_md *)value; - int rc; + int rc = 0; LASSERT(inode); LASSERT(name); + /* VFS has locked the inode before calling this */ + ll_set_inode_lock_owner(inode); + CDEBUG(D_VFSTRACE, "VFS Op:inode=" DFID "(%p), xattr %s\n", PFID(ll_inode2fid(inode)), inode, name); @@ -365,11 +373,11 @@ static int ll_xattr_set(const struct xattr_handler *handler, rc = ll_setstripe_ea(dentry, lum, size); ll_stats_ops_tally(ll_i2sbi(inode), op_type, ktime_us_delta(ktime_get(), kstart)); - return rc; + goto out; } else if (!strcmp(name, "lma") || !strcmp(name, "link")) { ll_stats_ops_tally(ll_i2sbi(inode), op_type, ktime_us_delta(ktime_get(), kstart)); - return 0; + goto out; } if (strncmp(name, "lov.", 4) == 0) { @@ -379,11 +387,15 @@ static int ll_xattr_set(const struct xattr_handler *handler, rc = lustre_check_lov_user_md(lum); if (rc < 0) - return rc; + goto out; } - return ll_xattr_set_common(handler, map, dentry, inode, name, - value, size, flags); + rc = ll_xattr_set_common(handler, map, dentry, inode, name, + value, size, flags); +out: + ll_clear_inode_lock_owner(inode); + + return rc; } int ll_xattr_list(struct inode *inode, const char *name, int type, void *buffer, diff --git a/lustre/llite/xattr_cache.c b/lustre/llite/xattr_cache.c index cd777c6..f54d8f6 100644 --- a/lustre/llite/xattr_cache.c +++ b/lustre/llite/xattr_cache.c @@ -85,7 +85,7 @@ static void ll_xattr_cache_init(struct ll_inode_info *lli) LASSERT(lli != NULL); INIT_LIST_HEAD(&lli->lli_xattrs); - ll_file_set_flag(lli, LLIF_XATTR_CACHE); + set_bit(LLIF_XATTR_CACHE, &lli->lli_flags); } /** @@ -264,7 +264,7 @@ static int ll_xattr_cache_list(struct list_head *cache, */ static int ll_xattr_cache_valid(struct ll_inode_info *lli) { - return ll_file_test_flag(lli, LLIF_XATTR_CACHE); + return test_bit(LLIF_XATTR_CACHE, &lli->lli_flags); } /** @@ -275,7 +275,7 @@ static int ll_xattr_cache_valid(struct ll_inode_info *lli) */ static int ll_xattr_cache_filled(struct ll_inode_info *lli) { - return ll_file_test_flag(lli, LLIF_XATTR_CACHE_FILLED); + return test_bit(LLIF_XATTR_CACHE_FILLED, &lli->lli_flags); } /** @@ -295,8 +295,8 @@ static int ll_xattr_cache_destroy_locked(struct ll_inode_info *lli) while (ll_xattr_cache_del(&lli->lli_xattrs, NULL) == 0) /* empty loop */ ; - ll_file_clear_flag(lli, LLIF_XATTR_CACHE_FILLED); - ll_file_clear_flag(lli, LLIF_XATTR_CACHE); + clear_bit(LLIF_XATTR_CACHE_FILLED, &lli->lli_flags); + clear_bit(LLIF_XATTR_CACHE, &lli->lli_flags); RETURN(0); } @@ -530,7 +530,7 @@ static int ll_xattr_cache_refill(struct inode *inode) if (xdata != xtail || xval != xvtail) CERROR("a hole in xattr data\n"); else - ll_file_set_flag(lli, LLIF_XATTR_CACHE_FILLED); + set_bit(LLIF_XATTR_CACHE_FILLED, &lli->lli_flags); ll_set_lock_data(sbi->ll_md_exp, inode, &oit, NULL); ll_intent_drop_lock(&oit); diff --git a/lustre/lov/lov_object.c b/lustre/lov/lov_object.c index d316c5c..778157d 100644 --- a/lustre/lov/lov_object.c +++ b/lustre/lov/lov_object.c @@ -311,10 +311,11 @@ static void lov_subobject_kill(const struct lu_env *env, struct lov_object *lov, LASSERT(r0->lo_sub[idx] == NULL); } -static void lov_delete_raid0(const struct lu_env *env, struct lov_object *lov, +static int lov_delete_raid0(const struct lu_env *env, struct lov_object *lov, struct lov_layout_entry *lle) { struct lov_layout_raid0 *r0 = &lle->lle_raid0; + int rc; ENTRY; @@ -325,7 +326,9 @@ static void lov_delete_raid0(const struct lu_env *env, struct lov_object *lov, struct lovsub_object *los = r0->lo_sub[i]; if (los != NULL) { - cl_object_prune(env, &los->lso_cl); + rc = cl_object_prune(env, &los->lso_cl); + if (rc) + RETURN(rc); /* * If top-level object is to be evicted from * the cache, so are its sub-objects. @@ -335,7 +338,7 @@ static void lov_delete_raid0(const struct lu_env *env, struct lov_object *lov, } } - EXIT; + RETURN(0); } static void lov_fini_raid0(const struct lu_env *env, @@ -857,6 +860,7 @@ static int lov_delete_composite(const struct lu_env *env, union lov_layout_state *state) { struct lov_layout_entry *entry; + int rc; ENTRY; @@ -867,7 +871,9 @@ static int lov_delete_composite(const struct lu_env *env, if (entry->lle_lsme && lsme_is_foreign(entry->lle_lsme)) continue; - lov_delete_raid0(env, lov, entry); + rc = lov_delete_raid0(env, lov, entry); + if (rc) + RETURN(rc); } RETURN(0); @@ -1389,6 +1395,10 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj, struct lov_stripe_md *lsm = NULL; struct lov_object *lov = cl2lov(obj); int result = 0; + struct cl_object *top = cl_object_top(obj); + bool unlock_inode = false; + bool lock_inode_size = false; + bool lock_layout = false; ENTRY; if (conf->coc_opc == OBJECT_CONF_SET && @@ -1401,6 +1411,7 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj, dump_lsm(D_INODE, lsm); } +retry: lov_conf_lock(lov); if (conf->coc_opc == OBJECT_CONF_INVALIDATE) { lov->lo_layout_invalid = true; @@ -1455,10 +1466,61 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj, result = lov_layout_change(env, lov, lsm, conf); lov->lo_layout_invalid = result != 0; + if (result) { + if (result == -EAGAIN) { + /** + * we need unlocked lov conf and get inode lock. + * It's possible we have already taken inode's size + * mutex and/or layout mutex, so we need keep such lock + * order, lest deadlock happens: + * inode lock (ll_inode_lock()) + * inode size lock (ll_inode_size_lock()) + * inode layout lock (ll_layout_refresh()) + * lov conf lock (lov_conf_lock()) + * + * e.g. + * vfs_setxattr inode locked + * ll_lov_setstripe_ea_info inode size locked + * ll_prep_inode + * ll_file_inode_init + * cl_conf_set + * lov_conf_set lov conf locked + * + * ll_migrate inode locked + * ... + * ll_layout_refresh inode layout locked + * ll_layout_conf + * cl_conf_set + * lov_conf_set lov conf locked + */ + lov_conf_unlock(lov); + if (cl_object_inode_ops(env, top, COIO_LAYOUT_UNLOCK, + NULL) == 0) + lock_layout = true; + if (cl_object_inode_ops(env, top, COIO_SIZE_UNLOCK, + NULL) == 0) + lock_inode_size = true; + + /* take lock in order */ + if (cl_object_inode_ops( + env, top, COIO_INODE_LOCK, NULL) == 0) + unlock_inode = true; + if (lock_inode_size) + cl_object_inode_ops(env, top, COIO_SIZE_LOCK, + NULL); + if (lock_layout) + cl_object_inode_ops(env, top, COIO_LAYOUT_LOCK, + NULL); + goto retry; + } + } EXIT; out: lov_conf_unlock(lov); + if (unlock_inode) + cl_object_inode_ops(env, top, COIO_INODE_UNLOCK, NULL); + lov_lsm_put(lsm); CDEBUG(D_INODE, DFID" lo_layout_invalid=%d\n", PFID(lu_object_fid(lov2lu(lov))), lov->lo_layout_invalid); diff --git a/lustre/obdclass/cl_object.c b/lustre/obdclass/cl_object.c index d5d491f..0c392db 100644 --- a/lustre/obdclass/cl_object.c +++ b/lustre/obdclass/cl_object.c @@ -443,6 +443,25 @@ int cl_object_flush(const struct lu_env *env, struct cl_object *obj, } EXPORT_SYMBOL(cl_object_flush); +int cl_object_inode_ops(const struct lu_env *env, struct cl_object *top, + enum coo_inode_opc opc, void *data) +{ + struct cl_object *obj; + int rc = 0; + + ENTRY; + + cl_object_for_each(obj, top) { + if (obj->co_ops->coo_inode_ops) { + rc = obj->co_ops->coo_inode_ops(env, obj, opc, data); + if (rc) + break; + } + } + RETURN(rc); +} +EXPORT_SYMBOL(cl_object_inode_ops); + /** * Helper function removing all object locks, and marking object for * deletion. All object pages must have been deleted at this point.