From 79acb9a9e7d3c3185a047f5b067382a814c0e9e5 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Wed, 10 Jan 2018 14:08:06 +0100 Subject: [PATCH] LU-10235 mdt: mdt_create: check EEXIST without lock mkdir() currently gets a write lock on the parent even if the new directory already exists. This patch adds an initial lookup of the new directory without a DLM lock so that other clients do not need to cancel their DLM lock if the "new" directory already exists, but will continue as usual if directory did not exist. There is a small race window that child was created by others after our check and before locking parent, but this can be detected later during index insert. Performance change on two haswell 16-core VMs with ib, mean values of mpirun -n 8 ./mdtest -D -i 8 -I 1000 test environment | directory creation | tree creation local, no patch | 1725/s | 769/s local, patch | 1821/s | 788/s remote, no patch | 1729/s | 772/s remote, patch | 1687/s | 787/s The differences are of the order of the noise here, with all mkdirs being effective. If directories exist, some simple stress on four nodes shows intended improvements: clush -w vm[0-3] 'seq 0 10000 | xargs -P 7 -I{} sh -c "(({}%3==0)) && mkdir /mnt/lustre/testdir/foo 2>/dev/null || stat /mnt/lustre/testdir > /dev/null"' with patch: 10s without patch: 19s (the difference grows exponentially with number of clients and hangs with over 60 clients without the patch; exact time was not re-measured with patch) Updated sanityn.sh 43a 45a to avoid race conditions. Add sanityn.sh test_43j to verify above scenario. Test-Parameters: envdefinitions=SLOW=yes testlist=replay-vbr,replay-vbr Change-Id: I37fc9c8ffc7ab334c0645042beda5bef01284564 Signed-off-by: Dominique Martinet Signed-off-by: Lai Siyao Reviewed-on: https://review.whamcloud.com/30880 Reviewed-by: Andreas Dilger Reviewed-by: Quentin Bouget Tested-by: jenkins Tested-by: Maloo --- lustre/include/obd_support.h | 1 + lustre/mdt/mdt_reint.c | 34 ++++++++++++------------ lustre/tests/sanityn.sh | 62 ++++++++++++++++++++++++-------------------- 3 files changed, 53 insertions(+), 44 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 3e2d73c..3d4f445 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -244,6 +244,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_MDS_HSM_CDT_DELAY 0x164 #define OBD_FAIL_MDS_ORPHAN_DELETE 0x165 #define OBD_FAIL_MDS_RMFID_NET 0x166 +#define OBD_FAIL_MDS_CREATE_RACE 0x167 /* layout lock */ #define OBD_FAIL_MDS_NO_LL_GETATTR 0x170 diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index 18323ce..e189d72 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -406,34 +406,36 @@ static int mdt_create(struct mdt_thread_info *info) if (!mdt_object_exists(parent)) GOTO(put_parent, rc = -ENOENT); - lh = &info->mti_lh[MDT_LH_PARENT]; - mdt_lock_pdo_init(lh, LCK_PW, &rr->rr_name); - rc = mdt_object_lock(info, parent, lh, MDS_INODELOCK_UPDATE); - if (rc) - GOTO(put_parent, rc); - - if (!mdt_object_remote(parent)) { - rc = mdt_version_get_check_save(info, parent, 0); - if (rc) - GOTO(unlock_parent, rc); - } - /* - * Check child name version during replay. - * During create replay a file may exist with same name. + * LU-10235: check if name exists locklessly first to avoid massive + * lock recalls on existing directories. */ rc = mdt_lookup_version_check(info, parent, &rr->rr_name, &info->mti_tmp_fid1, 1); if (rc == 0) - GOTO(unlock_parent, rc = -EEXIST); + GOTO(put_parent, rc = -EEXIST); /* -ENOENT is expected here */ if (rc != -ENOENT) - GOTO(unlock_parent, rc); + GOTO(put_parent, rc); /* save version of file name for replay, it must be ENOENT here */ mdt_enoent_version_save(info, 1); + OBD_RACE(OBD_FAIL_MDS_CREATE_RACE); + + lh = &info->mti_lh[MDT_LH_PARENT]; + mdt_lock_pdo_init(lh, LCK_PW, &rr->rr_name); + rc = mdt_object_lock(info, parent, lh, MDS_INODELOCK_UPDATE); + if (rc) + GOTO(put_parent, rc); + + if (!mdt_object_remote(parent)) { + rc = mdt_version_get_check_save(info, parent, 0); + if (rc) + GOTO(unlock_parent, rc); + } + child = mdt_object_new(info->mti_env, mdt, rr->rr_fid2); if (unlikely(IS_ERR(child))) GOTO(unlock_parent, rc = PTR_ERR(child)); diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index 22f1570..fe85b4a 100755 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -1949,23 +1949,15 @@ test_42h() { } run_test 42h "pdirops: mkdir vs readdir ==============" -# test 43: unlink and blocking operations +# test 43: rmdir,mkdir won't return -EEXIST test_43a() { - pdo_lru_clear - touch $DIR1/$tfile -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 - rm $DIR1/$tfile & - PID1=$! ; pdo_sched - mkdir $DIR2/$tfile & - PID2=$! ; pdo_sched - do_facet $SINGLEMDS lctl set_param fail_loc=0 - check_pdo_conflict $PID1 && { wait $PID1; error "mkdir isn't blocked"; } - wait $PID2 ; [ $? -eq 0 ] || error "mkdir must succeed" - rm -rf $DIR/$tfile* + for i in {1..1000}; do + mkdir $DIR1/$tdir || error "mkdir $tdir failed" + rmdir $DIR2/$tdir || error "rmdir $tdir failed" + done return 0 } -run_test 43a "pdirops: unlink vs mkdir ==============" +run_test 43a "rmdir,mkdir doesn't return -EEXIST ==============" test_43b() { pdo_lru_clear @@ -2107,6 +2099,26 @@ test_43i() { } run_test 43i "pdirops: unlink vs remote mkdir" +test_43j() { + [[ $MDS1_VERSION -lt $(version_code 2.13.52) ]] && + skip "Need MDS version newer than 2.13.52" + + for i in {1..100}; do +#define OBD_FAIL_ONCE|OBD_FAIL_MDS_CREATE_RACE 0x167 + do_facet $SINGLEMDS lctl set_param fail_loc=0x80000167 + OK=0 + mkdir $DIR1/$tdir & + PID1=$! + mkdir $DIR2/$tdir && ((OK++)) + wait $PID1 && ((OK++)) + (( OK == 1 )) || error "exactly one mkdir should succeed" + + rmdir $DIR1/$tdir || error "rmdir failed" + done + return 0 +} +run_test 43j "racy mkdir return EEXIST ==============" + # test 44: rename tgt and blocking operations test_44a() { pdo_lru_clear @@ -2269,23 +2281,17 @@ test_44i() { } run_test 44i "pdirops: rename tgt vs remote mkdir" -# test 45: rename src and blocking operations +# test 45: rename,mkdir doesn't fail with -EEXIST test_45a() { - pdo_lru_clear - touch $DIR1/$tfile -#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 - do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145 - mv $DIR1/$tfile $DIR1/$tfile-2 & - PID1=$! ; pdo_sched - mkdir $DIR2/$tfile & - PID2=$! ; pdo_sched - do_facet $SINGLEMDS lctl set_param fail_loc=0 - check_pdo_conflict $PID1 && { wait $PID1; error "mkdir isn't blocked"; } - wait $PID2 ; [ $? -eq 0 ] || error "mkdir must succeed" - rm -rf $DIR/$tfile* + for i in {1..1000}; do + mkdir $DIR1/$tdir || error "mkdir $tdir failed" + mrename $DIR2/$tdir $DIR2/$tdir.$i > /dev/null || + error "mrename to $tdir.$i failed" + done + rm -rf $DIR/$tdir* return 0 } -run_test 45a "pdirops: rename src vs mkdir ==============" +run_test 45a "rename,mkdir doesn't return -EEXIST ==============" test_45b() { pdo_lru_clear -- 1.8.3.1