From 640b334a96a909ce141bae1625f496eecbe427fa Mon Sep 17 00:00:00 2001 From: Qian Yingjin Date: Wed, 12 May 2021 11:43:28 +0800 Subject: [PATCH] EX-3080 pcc: avoid dead lock for auto attach in PCC-RO In this patch, It releases the pcc inode lock when calling ll_layout_refresh() in @pcc_try_auto_attach() as it may cause the following deadlock: 1. The client is writing or truncating a file in readonly mode. At this time, it will send a write layout intent lock to clear the readonly state on the layout on MDT. 2. A read process tries to auto attach the file with pcc inode lock hold. During the pregress of auto attach, it will call ll_layout_refresh(). The client-side enqueue request for a layout lock returned a blocked lock, it will sleep and wait for the lock being granted; 3. MDT will take EX layout lock to cancel all cached layout lock on client to change the layout for clearing the PCC-RO state. 4. when the client handles the revocation of layout lock, it needs to invalidate the PCC state which needs under the protection of pcc inode lock. Signed-off-by: Qian Yingjin Change-Id: I18890d19d03726a5991c923505e8c5363382fdc2 Reviewed-on: https://review.whamcloud.com/43668 Reviewed-by: Andreas Dilger Tested-by: jenkins Reviewed-by: Yang Sheng Tested-by: Maloo Reviewed-by: Li Xi --- lustre/llite/pcc.c | 10 +++--- lustre/tests/sanity-pcc.sh | 81 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 4 deletions(-) diff --git a/lustre/llite/pcc.c b/lustre/llite/pcc.c index 9fd03ae..8ff4efb 100644 --- a/lustre/llite/pcc.c +++ b/lustre/llite/pcc.c @@ -1354,12 +1354,15 @@ static void pcc_inode_put(struct pcc_inode *pcci) void pcc_inode_free(struct inode *inode) { - struct pcc_inode *pcci = ll_i2pcci(inode); + struct pcc_inode *pcci; + pcc_inode_lock(inode); + pcci = ll_i2pcci(inode); if (pcci) { WARN_ON(atomic_read(&pcci->pcci_refcount) > 1); pcc_inode_put(pcci); } + pcc_inode_unlock(inode); } /* @@ -1898,7 +1901,9 @@ static int pcc_try_auto_attach(struct inode *inode, bool *cached, if (ll_layout_version_get(lli) == CL_LAYOUT_GEN_NONE) RETURN(0); } else { + pcc_inode_unlock(inode); rc = ll_layout_refresh(inode, &gen); + pcc_inode_lock(inode); if (rc) RETURN(rc); } @@ -3648,10 +3653,8 @@ static int pcc_readonly_attach(struct file *file, if (rc) GOTO(out_dataset_put, rc); - mutex_lock(&lli->lli_layout_mutex); pcc_inode_lock(inode); old_cred = override_creds(super->pccs_cred); - lli->lli_pcc_state &= ~PCC_STATE_FL_ATTACHING; if (gen != ll_layout_version_get(lli)) { CDEBUG(D_CACHE, "L.Gen mismatch %u:%u\n", gen, ll_layout_version_get(lli)); @@ -3693,7 +3696,6 @@ out_put_unlock: } revert_creds(old_cred); pcc_inode_unlock(inode); - mutex_unlock(&lli->lli_layout_mutex); out_dataset_put: pcc_dataset_put(dataset); diff --git a/lustre/tests/sanity-pcc.sh b/lustre/tests/sanity-pcc.sh index 01df9af..ff0f639 100644 --- a/lustre/tests/sanity-pcc.sh +++ b/lustre/tests/sanity-pcc.sh @@ -3298,6 +3298,87 @@ test_41() { } run_test 41 "Test mtime rule for PCC-RO open attach with O_RDONLY mode" +test_100() { + local loopfile="$TMP/$tfile" + local mntpt="/mnt/pcc.$tdir" + local hsm_root="$mntpt/$tdir" + local file=$DIR/$tfile + + setup_loopdev $SINGLEAGT $loopfile $mntpt 60 + do_facet $SINGLEAGT mkdir $hsm_root || error "mkdir $hsm_root failed" + setup_pcc_mapping $SINGLEAGT \ + "projid={0}\ roid=$HSM_ARCHIVE_NUMBER\ ropcc=1" + do_facet $SINGLEAGT $LCTL pcc list $MOUNT + + do_facet $SINGLEAGT dd if=/dev/zero of=$file bs=1M count=50 || + error "Write $file failed" + + local rpid + local rpid2 + local wpid + local upid + local dpid + local lpcc_path + + lpcc_path=$(lpcc_fid2path $hsm_root $file) + ( + while [ ! -e $DIR/sanity-pcc.42.lck ]; do + do_facet $SINGLEAGT dd if=/dev/zero of=$file bs=1M count=50 || + error "failed to write $file" + sleep 0.$((RANDOM % 4 + 1)) + done + )& + wpid=$! + + ( + while [ ! -e $DIR/sanity-pcc.42.lck ]; do + do_facet $SINGLEAGT dd if=$file of=/dev/null bs=1M count=50 || + error "failed to write $file" + sleep 0.$((RANDOM % 4 + 1)) + done + )& + rpid=$! + + ( + while [ ! -e $DIR/sanity-pcc.42.lck ]; do + do_facet $SINGLEAGT dd if=$file of=/dev/null bs=1M count=50 || + error "failed to write $file" + sleep 0.$((RANDOM % 4 + 1)) + done + )& + rpid2=$! + + ( + while [ ! -e $DIR/sanity-pcc.42.lck ]; do + echo "Unlink $lpcc_path" + do_facet $SINGLEAGT unlink $lpcc_path + sleep 1 + done + true + )& + upid=$! + + ( + while [ ! -e $DIR/sanity-pcc.42.lck ]; do + echo "Detach $file ..." + do_facet $SINGLEAGT $LFS pcc detach $file + sleep 0.$((RANDOM % 8 + 1)) + done + )& + dpid=$! + + sleep 60 + touch $DIR/sanity-pcc.42.lck + wait $wpid || error "$?: write failed" + wait $rpid || error "$?: read failed" + wait $rpid2 || error "$?: read2 failed" + wait $upid || error "$?: unlink failed" + wait $dpid || error "$?: detach failed" + + do_facet $SINGLEAGT $LFS pcc detach $file +} +run_test 100 "race among PCC unlink | read | write | detach for PCC-RO file" + complete $SECONDS check_and_cleanup_lustre exit_status -- 1.8.3.1