mkdir() currently gets a write lock on the parent even if the new
directory already exists.
This patch adds an initial lookup of the new directory without a DLM
lock so that other clients do not need to cancel their DLM lock if the
"new" directory already exists, but will continue as usual if directory
did not exist.
There is a small race window that child was created by others after our
check and before locking parent, but this can be detected later during
index insert.
Performance change on two haswell 16-core VMs with ib, mean values of
mpirun -n 8 ./mdtest -D -i 8 -I 1000
test environment | directory creation | tree creation
local, no patch | 1725/s | 769/s
local, patch | 1821/s | 788/s
remote, no patch | 1729/s | 772/s
remote, patch | 1687/s | 787/s
The differences are of the order of the noise here, with all mkdirs
being effective.
If directories exist, some simple stress on four nodes shows intended
improvements:
clush -w vm[0-3] 'seq 0 10000 |
xargs -P 7 -I{} sh -c "(({}%3==0)) &&
mkdir /mnt/lustre/testdir/foo 2>/dev/null ||
stat /mnt/lustre/testdir > /dev/null"'
with patch: 10s
without patch: 19s
(the difference grows exponentially with number of clients and hangs
with over 60 clients without the patch; exact time was not re-measured
with patch)
Updated sanityn.sh 43a 45a to avoid race conditions.
Add sanityn.sh test_43j to verify above scenario.
Test-Parameters: envdefinitions=SLOW=yes testlist=replay-vbr,replay-vbr
Change-Id: I37fc9c8ffc7ab334c0645042beda5bef01284564
Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/30880
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Quentin Bouget <quentin.bouget@cea.fr>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
#define OBD_FAIL_MDS_HSM_CDT_DELAY 0x164
#define OBD_FAIL_MDS_ORPHAN_DELETE 0x165
#define OBD_FAIL_MDS_RMFID_NET 0x166
#define OBD_FAIL_MDS_HSM_CDT_DELAY 0x164
#define OBD_FAIL_MDS_ORPHAN_DELETE 0x165
#define OBD_FAIL_MDS_RMFID_NET 0x166
+#define OBD_FAIL_MDS_CREATE_RACE 0x167
/* layout lock */
#define OBD_FAIL_MDS_NO_LL_GETATTR 0x170
/* layout lock */
#define OBD_FAIL_MDS_NO_LL_GETATTR 0x170
if (!mdt_object_exists(parent))
GOTO(put_parent, rc = -ENOENT);
if (!mdt_object_exists(parent))
GOTO(put_parent, rc = -ENOENT);
- lh = &info->mti_lh[MDT_LH_PARENT];
- mdt_lock_pdo_init(lh, LCK_PW, &rr->rr_name);
- rc = mdt_object_lock(info, parent, lh, MDS_INODELOCK_UPDATE);
- if (rc)
- GOTO(put_parent, rc);
-
- if (!mdt_object_remote(parent)) {
- rc = mdt_version_get_check_save(info, parent, 0);
- if (rc)
- GOTO(unlock_parent, rc);
- }
-
- * Check child name version during replay.
- * During create replay a file may exist with same name.
+ * LU-10235: check if name exists locklessly first to avoid massive
+ * lock recalls on existing directories.
*/
rc = mdt_lookup_version_check(info, parent, &rr->rr_name,
&info->mti_tmp_fid1, 1);
if (rc == 0)
*/
rc = mdt_lookup_version_check(info, parent, &rr->rr_name,
&info->mti_tmp_fid1, 1);
if (rc == 0)
- GOTO(unlock_parent, rc = -EEXIST);
+ GOTO(put_parent, rc = -EEXIST);
/* -ENOENT is expected here */
if (rc != -ENOENT)
/* -ENOENT is expected here */
if (rc != -ENOENT)
- GOTO(unlock_parent, rc);
/* save version of file name for replay, it must be ENOENT here */
mdt_enoent_version_save(info, 1);
/* save version of file name for replay, it must be ENOENT here */
mdt_enoent_version_save(info, 1);
+ OBD_RACE(OBD_FAIL_MDS_CREATE_RACE);
+
+ lh = &info->mti_lh[MDT_LH_PARENT];
+ mdt_lock_pdo_init(lh, LCK_PW, &rr->rr_name);
+ rc = mdt_object_lock(info, parent, lh, MDS_INODELOCK_UPDATE);
+ if (rc)
+ GOTO(put_parent, rc);
+
+ if (!mdt_object_remote(parent)) {
+ rc = mdt_version_get_check_save(info, parent, 0);
+ if (rc)
+ GOTO(unlock_parent, rc);
+ }
+
child = mdt_object_new(info->mti_env, mdt, rr->rr_fid2);
if (unlikely(IS_ERR(child)))
GOTO(unlock_parent, rc = PTR_ERR(child));
child = mdt_object_new(info->mti_env, mdt, rr->rr_fid2);
if (unlikely(IS_ERR(child)))
GOTO(unlock_parent, rc = PTR_ERR(child));
}
run_test 42h "pdirops: mkdir vs readdir =============="
}
run_test 42h "pdirops: mkdir vs readdir =============="
-# test 43: unlink and blocking operations
+# test 43: rmdir,mkdir won't return -EEXIST
- pdo_lru_clear
- touch $DIR1/$tfile
-#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145
- do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145
- rm $DIR1/$tfile &
- PID1=$! ; pdo_sched
- mkdir $DIR2/$tfile &
- PID2=$! ; pdo_sched
- do_facet $SINGLEMDS lctl set_param fail_loc=0
- check_pdo_conflict $PID1 && { wait $PID1; error "mkdir isn't blocked"; }
- wait $PID2 ; [ $? -eq 0 ] || error "mkdir must succeed"
- rm -rf $DIR/$tfile*
+ for i in {1..1000}; do
+ mkdir $DIR1/$tdir || error "mkdir $tdir failed"
+ rmdir $DIR2/$tdir || error "rmdir $tdir failed"
+ done
-run_test 43a "pdirops: unlink vs mkdir =============="
+run_test 43a "rmdir,mkdir doesn't return -EEXIST =============="
test_43b() {
pdo_lru_clear
test_43b() {
pdo_lru_clear
}
run_test 43i "pdirops: unlink vs remote mkdir"
}
run_test 43i "pdirops: unlink vs remote mkdir"
+test_43j() {
+ [[ $MDS1_VERSION -lt $(version_code 2.13.52) ]] &&
+ skip "Need MDS version newer than 2.13.52"
+
+ for i in {1..100}; do
+#define OBD_FAIL_ONCE|OBD_FAIL_MDS_CREATE_RACE 0x167
+ do_facet $SINGLEMDS lctl set_param fail_loc=0x80000167
+ OK=0
+ mkdir $DIR1/$tdir &
+ PID1=$!
+ mkdir $DIR2/$tdir && ((OK++))
+ wait $PID1 && ((OK++))
+ (( OK == 1 )) || error "exactly one mkdir should succeed"
+
+ rmdir $DIR1/$tdir || error "rmdir failed"
+ done
+ return 0
+}
+run_test 43j "racy mkdir return EEXIST =============="
+
# test 44: rename tgt and blocking operations
test_44a() {
pdo_lru_clear
# test 44: rename tgt and blocking operations
test_44a() {
pdo_lru_clear
}
run_test 44i "pdirops: rename tgt vs remote mkdir"
}
run_test 44i "pdirops: rename tgt vs remote mkdir"
-# test 45: rename src and blocking operations
+# test 45: rename,mkdir doesn't fail with -EEXIST
- pdo_lru_clear
- touch $DIR1/$tfile
-#define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145
- do_facet $SINGLEMDS lctl set_param fail_loc=0x80000145
- mv $DIR1/$tfile $DIR1/$tfile-2 &
- PID1=$! ; pdo_sched
- mkdir $DIR2/$tfile &
- PID2=$! ; pdo_sched
- do_facet $SINGLEMDS lctl set_param fail_loc=0
- check_pdo_conflict $PID1 && { wait $PID1; error "mkdir isn't blocked"; }
- wait $PID2 ; [ $? -eq 0 ] || error "mkdir must succeed"
- rm -rf $DIR/$tfile*
+ for i in {1..1000}; do
+ mkdir $DIR1/$tdir || error "mkdir $tdir failed"
+ mrename $DIR2/$tdir $DIR2/$tdir.$i > /dev/null ||
+ error "mrename to $tdir.$i failed"
+ done
+ rm -rf $DIR/$tdir*
-run_test 45a "pdirops: rename src vs mkdir =============="
+run_test 45a "rename,mkdir doesn't return -EEXIST =============="
test_45b() {
pdo_lru_clear
test_45b() {
pdo_lru_clear