From be8c960ee0328dc0c44f1e4c34ef53bb238c7620 Mon Sep 17 00:00:00 2001 From: Qian Yingjin Date: Thu, 1 Sep 2022 14:54:23 +0800 Subject: [PATCH] EX-5014 pcc: avoid deadlock during DIO open attach on rhel7 The Maloo testing fails with sanity-pcc/45 due to the following deadlock on rhel7 kernel: ll_fid_path_cop D ffff9a32db5eb180 0 10783 10782 0x00000080 Call Trace: schedule_preempt_disabled+0x29/0x70 __mutex_lock_slowpath+0xc7/0x1d0 mutex_lock+0x1f/0x2f lookup_slow+0x33/0xa7 link_path_walk+0x80f/0x8b0 path_openat+0xae/0x5a0 do_filp_open+0x4d/0xb0 do_sys_open+0x124/0x220 SyS_open+0x1e/0x20 dd D ffff9a32fb5b6300 0 10779 10755 0x00000080 Call Trace: wait_for_completion+0xfd/0x140 call_usermodehelper_exec+0x179/0x1a0 call_usermodehelper+0x40/0x60 pcc_copy_data_dio+0x267/0x340 [lustre] pcc_attach_data_archive+0x6ff/0xe80 [lustre] pcc_readonly_attach+0x3d2/0xad0 [lustre] pcc_readonly_attach_sync+0x205/0x260 [lustre] pcc_file_open+0x798/0xdd0 [lustre] ll_atomic_open+0xd80/0x1780 [lustre] do_last+0xa53/0x1340 path_openat+0xcd/0x5a0 do_filp_open+0x4d/0xb0 do_sys_open+0x124/0x220 SyS_open+0x1e/0x20 This bug only happened on el7 kernel which uses mutex for inode locking. During ->ll_atomic_open(), the kernel will take this mutex on the parent inode. However, when copy data via the user space helper program ll_fid_path_copy, it will also try to obtain this mutex lock on the parent inode during lookup, resulting in deadlock. Test-Parameters: clientdistro=el7.9 testlist=sanity-pcc Test-Parameters: clientdistro=el8.5 mdscount=2 mdtcount=4 testlist=sanity-pcc env=ONLY=45,ONLY_REPEAT=10 Change-Id: I384c7b1979d93183b86bbde311d29a50346a8d56 Signed-off-by: Qian Yingjin Reviewed-on: https://review.whamcloud.com/48405 Reviewed-by: Andreas Dilger Tested-by: Andreas Dilger --- lustre/autoconf/lustre-core.m4 | 21 ++++++++++++ lustre/include/lustre_compat.h | 1 + lustre/llite/file.c | 6 ++-- lustre/llite/namei.c | 2 +- lustre/llite/pcc.c | 78 ++++++++++++++++++++++++++++-------------- lustre/llite/pcc.h | 3 +- lustre/tests/sanity-pcc.sh | 5 ++- 7 files changed, 84 insertions(+), 32 deletions(-) diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index 040a39f..d0f4c83 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -1469,6 +1469,26 @@ inode_lock, [ ]) # LC_HAVE_INODE_LOCK # +# LC_HAVE_INODE_RWSEM +# +# kernel version 4.5 commit 9902af79c01a8e39bb99b922fa3eef6d4ea23d69 +# acutl switch to rwsem from mutex for inode lock +# +AC_DEFUN([LC_HAVE_INODE_RWSEM], [ +LB_CHECK_COMPILE([if struct inode has i_rwsem], +i_rwsem, [ + #include +], [ + struct inode inode = {}; + + init_rwsem(&inode.i_rwsem); +], [ + AC_DEFINE(HAVE_INODE_RWSEM, 1, + [struct inode uses i_rwsem for inode locking]) +]) +]) # LC_HAVE_INODE_RWSEM + +# # LC_HAVE_IOP_GET_LINK # # 4.5 vfs replaced iop->follow_link with @@ -2649,6 +2669,7 @@ AC_DEFUN([LC_PROG_LINUX], [ # 4.5 LC_HAVE_INODE_LOCK + LC_HAVE_INODE_RWSEM LC_HAVE_IOP_GET_LINK # 4.6 diff --git a/lustre/include/lustre_compat.h b/lustre/include/lustre_compat.h index ec2f649..cd49b06 100644 --- a/lustre/include/lustre_compat.h +++ b/lustre/include/lustre_compat.h @@ -141,6 +141,7 @@ static inline bool d_is_positive(const struct dentry *dentry) # define inode_lock(inode) mutex_lock(&(inode)->i_mutex) # define inode_unlock(inode) mutex_unlock(&(inode)->i_mutex) # define inode_trylock(inode) mutex_trylock(&(inode)->i_mutex) +# define inode_is_locked(inode) mutex_is_locked(&(inode)->i_mutex) #endif /* Old kernels lacked both Xarray support and the page cache diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 60a2538..b26abaa 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -823,6 +823,7 @@ int ll_file_open(struct inode *inode, struct file *file) __u64 *och_usecount = NULL; struct ll_file_data *fd; ktime_t kstart = ktime_get(); + bool is_atomic_open; int rc = 0; ENTRY; @@ -831,6 +832,7 @@ int ll_file_open(struct inode *inode, struct file *file) it = file->private_data; /* XXX: compat macro */ file->private_data = NULL; /* prevent ll_local_open assertion */ + is_atomic_open = it != NULL; if (S_ISREG(inode->i_mode)) { rc = ll_file_open_encrypt(inode, file); @@ -1018,8 +1020,8 @@ restart: mutex_unlock(&lli->lli_och_mutex); /* It is not from atomic_open(). */ - if (it == &oit) { - rc = pcc_file_open(inode, file); + if (!is_atomic_open) { + rc = pcc_file_open(inode, file, false); if (rc) GOTO(out_och_free, rc); } diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 53345cd..5d30286 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -1464,7 +1464,7 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry, * the intent lock first before call PCC open. */ ll_intent_release(it); - rc = pcc_file_open(dentry->d_inode, file); + rc = pcc_file_open(dentry->d_inode, file, true); GOTO(out_free, rc); } } else { diff --git a/lustre/llite/pcc.c b/lustre/llite/pcc.c index d95ca12..79e250e 100644 --- a/lustre/llite/pcc.c +++ b/lustre/llite/pcc.c @@ -1868,7 +1868,7 @@ static inline void pcc_readonly_attach_fini(struct inode *inode) } static int pcc_readonly_attach(struct file *file, struct inode *inode, - __u32 roid); + __u32 roid, bool atomic_open_locked); static int pcc_readonly_attach_thread(void *arg) { @@ -1891,7 +1891,8 @@ static int pcc_readonly_attach_thread(void *arg) if (IS_ERR_OR_NULL(file)) GOTO(out, rc = file == NULL ? -EINVAL : PTR_ERR(file)); - rc = pcc_readonly_attach(file, pccx->pccx_inode, pccx->pccx_attach_id); + rc = pcc_readonly_attach(file, pccx->pccx_inode, + pccx->pccx_attach_id, false); pcc_readonly_attach_fini(pccx->pccx_inode); CDEBUG(D_CACHE, "PCC-RO attach in background for %pd "DFID" rc = %d\n", @@ -1949,11 +1950,11 @@ out: RETURN(rc); } -static int pcc_readonly_attach_sync(struct file *file, - struct inode *inode, __u32 roid); +static int pcc_readonly_attach_sync(struct file *file, struct inode *inode, + __u32 roid, bool atomic_open_locked); -static inline int pcc_do_readonly_attach(struct file *file, - struct inode *inode, __u32 roid) +static inline int pcc_do_readonly_attach(struct file *file, struct inode *inode, + __u32 roid, bool atomic_open_locked) { struct pcc_super *super = ll_i2pccs(inode); bool async = true; @@ -1978,14 +1979,14 @@ static inline int pcc_do_readonly_attach(struct file *file, return rc; } - rc = pcc_readonly_attach_sync(file, inode, roid); + rc = pcc_readonly_attach_sync(file, inode, roid, atomic_open_locked); return rc; } /* Call with pcci_mutex hold */ static int pcc_try_readonly_open_attach(struct inode *inode, struct file *file, - bool *cached) + bool atomic_open_locked, bool *cached) { struct dentry *dentry = file->f_path.dentry; struct pcc_dataset *dataset; @@ -2014,7 +2015,8 @@ static int pcc_try_readonly_open_attach(struct inode *inode, struct file *file, if ((dataset->pccd_flags & PCC_DATASET_PCC_ALL) == PCC_DATASET_ROPCC) { pcc_inode_unlock(inode); - rc = pcc_do_readonly_attach(file, inode, dataset->pccd_roid); + rc = pcc_do_readonly_attach(file, inode, dataset->pccd_roid, + atomic_open_locked); pcc_inode_lock(inode); pcci = ll_i2pcci(inode); if (pcci && pcc_inode_has_layout(pcci)) @@ -2384,7 +2386,8 @@ bool pcc_inode_permission(struct inode *inode) (mask & (S_IROTH | S_IXOTH)); } -int pcc_file_open(struct inode *inode, struct file *file) +int pcc_file_open(struct inode *inode, struct file *file, + bool atomic_open_locked) { struct pcc_inode *pcci; struct ll_inode_info *lli = ll_i2info(inode); @@ -2421,7 +2424,9 @@ int pcc_file_open(struct inode *inode, struct file *file) rc = pcc_try_auto_attach(inode, &cached, PIT_OPEN); if (rc == 0 && !cached && pcc_inode_permission(inode)) - rc = pcc_try_readonly_open_attach(inode, file, &cached); + rc = pcc_try_readonly_open_attach(inode, file, + atomic_open_locked, + &cached); if (rc < 0) GOTO(out_unlock, rc); @@ -3858,18 +3863,37 @@ static ssize_t pcc_copy_data_dio(struct pcc_super *super, rc = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); if (rc != 0) { - CERROR("%s: Error copying data for attach, Lustre file: %s, PCC file: %s, Lustre mntpath %s, iosize %u, rc: %lu.\n", + CERROR("%s: Error copying data for attach, Lustre file: %s, PCC file: %s, Lustre mntpath %s, iosize %u, pid %d rc: %lu:%zd.\n", ll_i2sbi(file_inode(lu_file))->ll_fsname, fidstring, - pcc_filepath, super->pccs_lu_pathname, iosize, rc); + pcc_filepath, super->pccs_lu_pathname, iosize, + current->pid, rc, rc); } RETURN(rc); } static ssize_t pcc_copy_data(struct pcc_super *super, struct file *lu_file, - struct file *pcc_file, char *pcc_pathname) + struct file *pcc_file, char *pcc_pathname, + bool atomic_open_locked) { - if (pcc_pathname) + if (pcc_pathname) { +#ifndef HAVE_INODE_RWSEM + int rc; + struct dentry *dentry = file_dentry(lu_file); + struct inode *dir = dentry->d_parent->d_inode; + + if (atomic_open_locked) { + LASSERT(inode_is_locked(dir)); + inode_unlock(dir); + } + rc = pcc_copy_data_dio(super, lu_file, pcc_pathname); + if (atomic_open_locked) + inode_lock(dir); + + return rc; +#else return pcc_copy_data_dio(super, lu_file, pcc_pathname); +#endif + } return pcc_copy_data_buffered(lu_file, pcc_file); } @@ -3877,7 +3901,8 @@ static ssize_t pcc_copy_data(struct pcc_super *super, struct file *lu_file, static int pcc_attach_data_archive(struct file *lu_file, struct inode *lu_inode, struct pcc_dataset *dataset, - struct dentry **pcc_dentry) + struct dentry **pcc_dentry, + bool atomic_open_locked) { struct pcc_super *super = ll_i2pccs(lu_inode); const struct cred *old_cred; @@ -3970,7 +3995,8 @@ static int pcc_attach_data_archive(struct file *lu_file, direct = true; } - ret = pcc_copy_data(super, lu_file, pcc_file, pcc_pathname); + ret = pcc_copy_data(super, lu_file, pcc_file, pcc_pathname, + atomic_open_locked); if (direct) lu_file->f_flags |= O_DIRECT; if (ret < 0) @@ -4030,7 +4056,7 @@ int pcc_readwrite_attach(struct file *file, struct inode *inode, if (dataset == NULL) RETURN(-ENOENT); - rc = pcc_attach_data_archive(file, inode, dataset, &dentry); + rc = pcc_attach_data_archive(file, inode, dataset, &dentry, false); if (rc) GOTO(out_dataset_put, rc); @@ -4194,8 +4220,8 @@ repeat: RETURN(rc); } -static int pcc_readonly_attach(struct file *file, - struct inode *inode, __u32 roid) +static int pcc_readonly_attach(struct file *file, struct inode *inode, + __u32 roid, bool atomic_open_locked) { struct pcc_super *super = ll_i2pccs(inode); struct ll_inode_info *lli = ll_i2info(inode); @@ -4222,7 +4248,8 @@ static int pcc_readonly_attach(struct file *file, if (dataset == NULL) RETURN(-ENOENT); - rc = pcc_attach_data_archive(file, inode, dataset, &dentry); + rc = pcc_attach_data_archive(file, inode, dataset, + &dentry, atomic_open_locked); if (rc) GOTO(out_dataset_put, rc); @@ -4286,8 +4313,8 @@ out_dataset_put: RETURN(rc); } -static int pcc_readonly_attach_sync(struct file *file, - struct inode *inode, __u32 roid) +static int pcc_readonly_attach_sync(struct file *file, struct inode *inode, + __u32 roid, bool atomic_open_locked) { int rc; @@ -4311,7 +4338,7 @@ static int pcc_readonly_attach_sync(struct file *file, RETURN(rc); } - rc = pcc_readonly_attach(file, inode, roid); + rc = pcc_readonly_attach(file, inode, roid, atomic_open_locked); pcc_readonly_attach_fini(inode); RETURN(rc); } @@ -4328,7 +4355,8 @@ int pcc_ioctl_attach(struct file *file, struct inode *inode, rc = -ENOTSUPP; break; case LU_PCC_READONLY: - rc = pcc_readonly_attach_sync(file, inode, attach->pcca_id); + rc = pcc_readonly_attach_sync(file, inode, + attach->pcca_id, false); break; default: rc = -EINVAL; diff --git a/lustre/llite/pcc.h b/lustre/llite/pcc.h index 68ac95f..d6a647a 100644 --- a/lustre/llite/pcc.h +++ b/lustre/llite/pcc.h @@ -312,7 +312,8 @@ int pcc_ioctl_state(struct file *file, struct inode *inode, struct lu_pcc_state *state); void pcc_file_init(struct pcc_file *pccf); bool pcc_inode_permission(struct inode *inode); -int pcc_file_open(struct inode *inode, struct file *file); +int pcc_file_open(struct inode *inode, struct file *file, + bool atomic_open_locked); void pcc_file_release(struct inode *inode, struct file *file); ssize_t pcc_file_read_iter(struct kiocb *iocb, struct iov_iter *iter, bool *cached); diff --git a/lustre/tests/sanity-pcc.sh b/lustre/tests/sanity-pcc.sh index 2ab6116..4b0c218 100644 --- a/lustre/tests/sanity-pcc.sh +++ b/lustre/tests/sanity-pcc.sh @@ -12,8 +12,8 @@ export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/utils:$PATH:/sbin:/usr/sbin ONLY=${ONLY:-"$*"} ALWAYS_EXCEPT="$SANITY_PCC_EXCEPT " -# bug number for skipped test: EX-3763 EX-5014 -ALWAYS_EXCEPT+=" 44 45" +# bug number for skipped test: EX-3763 +ALWAYS_EXCEPT+=" 44" # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! ENABLE_PROJECT_QUOTAS=${ENABLE_PROJECT_QUOTAS:-true} @@ -3591,7 +3591,6 @@ test_45() { error "Write $file failed" local n=16 - local lpid local -a pids1 local -a pids2 -- 1.8.3.1