From ee25babfe72378f9496a9732742984f26eb7d4a5 Mon Sep 17 00:00:00 2001 From: Bruno Faccini Date: Tue, 7 Feb 2017 12:30:35 +0100 Subject: [PATCH] LU-8907 llite: handle client racy case during create Some very infrequent situations exists on client side able to cause a race during create when concurrent access by fid occurs. The result of the race can allow a d_alias to be already present when it was not expected when original code/LBUG has been written. One of the identified scenario is when a concurrent access of inode thru the .lustre/fid/<[FID]> method occurs. New sanity/test_161d has been added to reproduce this scenario. Final fix is to remove inaccurate LASSERT(ll_d_hlist_empty(&inode->i_dentry)); in ll_create_node(). Signed-off-by: Bruno Faccini Change-Id: I1ff19883cc5b53831f1d5c577f4152225fa9e0fb Reviewed-on: https://review.whamcloud.com/25296 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Lai Siyao Reviewed-by: Niu Yawei Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/llite/namei.c | 3 ++- lustre/tests/sanity.sh | 49 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 0c47add..7322802 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -530,6 +530,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_LLITE_CREATE_FILE_PAUSE 0x1409 #define OBD_FAIL_LLITE_NEWNODE_PAUSE 0x140a #define OBD_FAIL_LLITE_SETDIRSTRIPE_PAUSE 0x140b +#define OBD_FAIL_LLITE_CREATE_NODE_PAUSE 0x140c #define OBD_FAIL_FID_INDIR 0x1501 diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 1d9e9ce..3b8a889 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -904,7 +904,8 @@ static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it) if (rc) GOTO(out, inode = ERR_PTR(rc)); - LASSERT(ll_d_hlist_empty(&inode->i_dentry)); + /* Pause to allow for a race with concurrent access by fid */ + OBD_FAIL_TIMEOUT(OBD_FAIL_LLITE_CREATE_NODE_PAUSE, cfs_fail_val); /* We asked for a lock on the directory, but were granted a * lock on the inode. Since we finally have an inode pointer, diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 75bb587..c7dde17 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -11206,6 +11206,55 @@ test_161c() { } run_test 161c "check CL_RENME[UNLINK] changelog record flags" +test_161d() { + local user + local pid + local fid + + # cleanup previous run + rm -rf $DIR/$tdir/$tfile + + user=$(do_facet $SINGLEMDS $LCTL --device $MDT0 \ + changelog_register -n) + [[ $? -eq 0 ]] || error "changelog_register failed" + + # work in a standalone dir to avoid locking on $DIR/$MOUNT to + # interfer with $MOUNT/.lustre/fid/ access + mkdir $DIR/$tdir + [[ $? -eq 0 ]] || error "mkdir failed" + + #define OBD_FAIL_LLITE_CREATE_NODE_PAUSE 0x140c | OBD_FAIL_ONCE + $LCTL set_param fail_loc=0x8000140c + # 5s pause + $LCTL set_param fail_val=5 + + # create file + echo foofoo > $DIR/$tdir/$tfile & + pid=$! + + # wait for create to be delayed + sleep 2 + + ps -q $pid + [[ $? -eq 0 ]] || error "create should be blocked" + + local tempfile=$(mktemp) + fid=$(changelog_extract_field $MDT0 "CREAT" "$tfile" "t=") + cat $MOUNT/.lustre/fid/$fid 2>/dev/null >$tempfile || error "cat failed" + # some delay may occur during ChangeLog publishing and file read just + # above, that could allow file write to happen finally + [[ -s $tempfile ]] && echo "file should be empty" + + $LCTL set_param fail_loc=0 + + wait $pid + [[ $? -eq 0 ]] || error "create failed" + + $LFS changelog_clear $MDT0 $user 0 + do_facet $SINGLEMDS $LCTL --device $MDT0 changelog_deregister $user +} +run_test 161d "create with concurrent .lustre/fid access" + check_path() { local expected=$1 shift -- 1.8.3.1