From 6805611f23c0a0c903a44ad4f0fc0c74e91164a4 Mon Sep 17 00:00:00 2001 From: Qian Yingjin Date: Thu, 27 Aug 2020 11:16:38 +0800 Subject: [PATCH] LU-13930 pcc: auto PCC-RO attach in atomic_open Auto PCC-RO attach during PCC open will try to change the layout into read-only state on MDT. If the intent open in atomic_open returns a lock with MDS_INODELOCK_LAYOUT bit set, it may cause dead lock on lock blocking callback caused by the layout change into read-only state on MDT. Thus it would better to release the open intent lock frist before call PCC open. Test-Parameters: clientcount=3 testlist=sanity-pcc,sanity-pcc,sanity-pcc Signed-off-by: Qian Yingjin Change-Id: I7ab86bd56da9bacc40b201162ffd022ab5a2352c --- lustre/llite/file.c | 12 ++++--- lustre/llite/namei.c | 12 +++++++ lustre/tests/sanity-pcc.sh | 81 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 100 insertions(+), 5 deletions(-) diff --git a/lustre/llite/file.c b/lustre/llite/file.c index fbc2632..ca39303 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -749,6 +749,7 @@ int ll_file_open(struct inode *inode, struct file *file) struct ll_file_data *fd; ktime_t kstart = ktime_get(); int rc = 0; + ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n", @@ -912,12 +913,15 @@ restart: GOTO(out_och_free, rc); } - rc = pcc_file_open(inode, file); - if (rc) - GOTO(out_och_free, rc); - mutex_unlock(&lli->lli_och_mutex); + /* It is not from atomic_open(). */ + if (it == &oit) { + rc = pcc_file_open(inode, file); + if (rc) + GOTO(out_och_free, rc); + } + /* lockless for direct IO so that it can do IO in parallel */ if (file->f_flags & O_DIRECT) fd->fd_flags |= LL_FILE_LOCKLESS_IO; diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index ef0ec44..1165469 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -1193,6 +1193,17 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry, */ if (de != NULL) dput(de); + + /* Auto PCC-RO attach during PCC open will try + * to change the layout to read-only state. If + * the intent open returns the lock with + * MDS_INODELOCK_LAYOUT bit set, it may cause + * dead lock. Thus it would better to release + * the intent lock first before call PCC open. + */ + ll_intent_release(it); + rc = pcc_file_open(dentry->d_inode, file); + GOTO(out_free, rc); } } else { rc = finish_no_open(file, de); @@ -1203,6 +1214,7 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry, out_release: ll_intent_release(it); +out_free: OBD_FREE(it, sizeof(*it)); RETURN(rc); diff --git a/lustre/tests/sanity-pcc.sh b/lustre/tests/sanity-pcc.sh index 1113e32..14affb2 100644 --- a/lustre/tests/sanity-pcc.sh +++ b/lustre/tests/sanity-pcc.sh @@ -198,7 +198,7 @@ setup_loopdev() { local mntpt=$3 local size=${4:-50} - do_facet $facet mkdir -p $mntpt || error "mkdir -p $hsm_root failed" + do_facet $facet mkdir -p $mntpt || error "mkdir -p $mntpt failed" stack_trap "do_facet $facet rm -rf $mntpt" EXIT do_facet $facet dd if=/dev/zero of=$file bs=1M count=$size stack_trap "do_facet $facet rm -f $file" EXIT @@ -1049,6 +1049,57 @@ test_10d() { } run_test 10d "Test RO-PCC with group quota on loop PCC device" +test_usrgrp_edquot() { + local loopfile="$TMP/$tfile" + local mntpt="/mnt/pcc.$tdir" + local hsm_root="$mntpt/$tdir" + local file=$DIR/$tfile + local id=$RUNAS_ID + local ug=$1 + + [[ $ug == "g" ]] && id=$RUNAS_GID + setup_loopdev $SINGLEAGT $loopfile $mntpt 50 + do_facet $SINGLEAGT quotacheck -c$ug $mntpt || + error "quotacheck -c$ug $mntpt failed" + do_facet $SINGLEAGT quotaon -$ug $mntpt || + error "quotaon -$ug $mntpt failed" + do_facet $SINGLEAGT setquota -$ug $id 0 4096 0 0 $mntpt || + error "setquota -$ug $id on $mntpt failed" + do_facet $SINGLEAGT repquota -${ug}vs $mntpt + do_facet $SINGLEAGT mkdir $hsm_root || error "mkdir $hsm_root failed" + setup_pcc_mapping $SINGLEAGT \ + "${ug}id={$id}\ roid=$HSM_ARCHIVE_NUMBER\ ropcc=1" + do_facet $SINGLEAGT $LCTL pcc list $MOUNT + + dd if=/dev/zero of=$file bs=1M count=2 || + error "dd write $file failed" + chown $RUNAS_ID:$RUNAS_GID $file || + error "chown $RUNAS_ID:$RUNAS_GID $file failed" + do_facet $SINGLEAGT $RUNAS dd if=$file of=/dev/null bs=1M count=2 || + error "dd read $file failed" + check_lpcc_state $file "readonly" + $LFS getstripe -v $file + do_facet $SINGLEAGT dd if=/dev/zero of=$file bs=1M count=5 || + error "dd write $file failed" + check_lpcc_state $file "none" + do_facet $SINGLEAGT $RUNAS dd if=$file of=/dev/null bs=1M count=5 || + error "dd read $file failed" + do_facet $SINGLEAGT $LFS pcc state $file + $LFS getstripe -v $file + do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file || + error "PCC-RO attach $file failed" +} + +test_10e() { + test_usrgrp_edquot "u" +} +run_test 10e "Tolerate -EDQUOT failure when auto PCC-RO attach with user quota" + +test_10f() { + test_usrgrp_edquot "g" +} +run_test 10f "Tolerate -EDQUOT failure when auto PCC-RO attach with group quota" + test_11() { local loopfile="$TMP/$tfile" local mntpt="/mnt/pcc.$tdir" @@ -2704,6 +2755,34 @@ test_34() { } run_test 34 "Cache rule with comparaotr (>, <) for Project ID range" +test_35() { + local loopfile="$TMP/$tfile" + local mntpt="/mnt/pcc.$tdir" + local hsm_root="$mntpt/$tdir" + local file=$DIR/myfile.dat + + setup_loopdev $SINGLEAGT $loopfile $mntpt 50 + do_facet $SINGLEAGT mkdir $hsm_root || error "mkdir $hsm_root failed" + setup_pcc_mapping $SINGLEAGT \ + "fname={*.dat}\ roid=$HSM_ARCHIVE_NUMBER\ ropcc=1" + do_facet $SINGLEAGT $LCTL pcc list $MOUNT + + do_facet $SINGLEAGT dd if=/dev/zero of=$file bs=4k count=1 || + error "Write $file failed" + do_facet $SINGLEAGT dd if=$file of=/dev/null bs=4k count=1 || + error "Read $file failed" + do_facet $SINGLEAGT $LFS pcc state $file + check_lpcc_state $file "readonly" + do_facet $SINGLEAGT dd if=/dev/zero of=$file bs=4k count=1 || + error "Write $file failed" + sysctl vm.drop_caches=3 + do_facet $SINGLEAGT dd if=$file of=/dev/null bs=4k count=1 || + error "Read $file failed" + do_facet $SINGLEAGT $LFS pcc state $file + check_lpcc_state $file "readonly" +} +run_test 35 "Auto PCC-RO attach in atomic_open" + complete $SECONDS check_and_cleanup_lustre exit_status -- 1.8.3.1