X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fsanityn.sh;h=0ab469be3e6f86166ef2a1daba5fe0a18c849f07;hp=442b6e2082aea598ddff9472dd9c458bf689fd70;hb=cbc62b0b829afdceaa01820996e567b5bdeb281c;hpb=cda353e6efae5013a26aedbe49d8aa6fb8fe456e diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index 442b6e2..0ab469b 100755 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -389,6 +389,8 @@ test_16a() { local stripe_size=$(do_facet $SINGLEMDS \ "$LCTL get_param -n lod.$(facet_svc $SINGLEMDS)*.stripesize") + check_set_fallocate + # to allocate grant because it may run out due to test_15. $LFS setstripe -c -1 $file1 dd if=/dev/zero of=$file1 bs=$stripe_size count=$OSTCOUNT oflag=sync @@ -396,12 +398,12 @@ test_16a() { rm -f $file1 $LFS setstripe -c -1 $file1 # b=10919 - fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 || + $FSX -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 || error "fsx failed" rm -f $file1 # O_DIRECT reads and writes must be aligned to the device block size. - fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 -Z -r 4096 \ + $FSX -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 -Z -r 4096 \ -w 4096 $file1 $file2 || error "fsx with O_DIRECT failed." } run_test 16a "$FSXNUM iterations of dual-mount fsx" @@ -412,6 +414,8 @@ test_16b() { local file2=$DIR2/$tfile local stripe_size=($($LFS getstripe -S $DIR)) + check_set_fallocate + # to allocate grant because it may run out due to test_15. lfs setstripe -c -1 $file1 dd if=/dev/zero of=$file1 bs=$stripe_size count=$OSTCOUNT oflag=sync || @@ -423,7 +427,7 @@ test_16b() { lfs setstripe -c -1 $file1 # b=10919 # -o is set to 8192 because writes < 1 page and between 1 and 2 pages # create a mix of tiny writes & normal writes - fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -o 8192 -S 0 \ + $FSX -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -o 8192 -S 0 \ $file1 $file2 || error "fsx with tiny write failed." } run_test 16b "$FSXNUM iterations of dual-mount fsx at small size" @@ -436,6 +440,8 @@ test_16c() { [ "$ost1_FSTYPE" != ldiskfs ] && skip "dio on ldiskfs only" + check_set_fallocate + # to allocate grant because it may run out due to test_15. $LFS setstripe -c -1 $file1 dd if=/dev/zero of=$file1 bs=$stripe_size count=$OSTCOUNT oflag=sync @@ -452,7 +458,7 @@ test_16c() { set_osd_param $list '' writethrough_cache_enable 0 $LFS setstripe -c -1 $file1 # b=10919 - fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 || + $FSX -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 || error "fsx failed" rm -f $file1 @@ -467,16 +473,17 @@ test_16d() { local file1=$DIR1/$tfile local file2=$DIR2/$tfile local file3=$DIR1/file + local tmpfile=$(mktemp) local stripe_size=$(do_facet $SINGLEMDS \ "$LCTL get_param -n lod.$(facet_svc $SINGLEMDS)*.stripesize") # to allocate grant because it may run out due to test_15. $LFS setstripe -c -1 $file1 + stack_trap "rm -f $file1 $file2 $file3 $tmpfile" dd if=/dev/zero of=$file1 bs=$stripe_size count=$OSTCOUNT oflag=sync dd if=/dev/zero of=$file2 bs=$stripe_size count=$OSTCOUNT oflag=sync rm -f $file1 - local tmpfile=`mktemp` $LFS setstripe -c -1 $file1 # b=10919 $LCTL set_param ldlm.namespaces.*.lru_size=clear @@ -497,13 +504,15 @@ test_16d() { # buffer read from another client dd if=$file2 of=$file3 bs=1M count=100 diff $file3 $tmpfile || error "file different(3)" - - rm -f $file1 $file2 $file3 $tmpfile - } run_test 16d "Verify DIO and buffer IO with two clients" test_16e() { # LU-13227 + # issue: LU-14314 + + (( "$MDS1_VERSION" >= $(version_code 2.13.53) )) || + skip "Need MDS version at least 2.13.53" + local file1=$DIR1/$tfile local file2=$DIR2/$tfile @@ -559,15 +568,18 @@ run_test 18 "mmap sanity check =================================" test_19() { # bug3811 local node=$(facet_active_host ost1) + local device="$FSNAME-OST*" - [ "x$DOM" = "xyes" ] && node=$(facet_active_host $SINGLEMDS) + [ "x$DOM" = "xyes" ] && node=$(facet_active_host $SINGLEMDS) && + device="$FSNAME-MDT*" # check whether obdfilter is cache capable at all - get_osd_param $node '' read_cache_enable >/dev/null || + get_osd_param $node $device read_cache_enable >/dev/null || skip "not cache-capable obdfilter" - local MAX=$(get_osd_param $node '' readcache_max_filesize | head -n 1) - set_osd_param $node '' readcache_max_filesize 4096 + local max=$(get_osd_param $node $device readcache_max_filesize |\ + head -n 1) + set_osd_param $node $device readcache_max_filesize 4096 dd if=/dev/urandom of=$TMP/$tfile bs=512k count=32 local SUM=$(cksum $TMP/$tfile | cut -d" " -f 1,2) cp $TMP/$tfile $DIR1/$tfile @@ -582,7 +594,7 @@ test_19() { # bug3811 [ "$(cat $TMP/sum2)" = "$SUM" ] || \ error "$DIR2/$tfile $(cat $TMP/sum2) != $SUM" done - set_osd_param $node '' readcache_max_filesize $MAX + set_osd_param $node $device readcache_max_filesize $max rm $DIR1/$tfile } run_test 19 "test concurrent uncached read races ===============" @@ -877,13 +889,15 @@ test_32a() { # bug 11270 log "checking cached lockless truncate" $TRUNCATE $DIR1/$tfile 8000000 - $CHECKSTAT -s 8000000 $DIR2/$tfile || error "wrong file size" + $CHECKSTAT -s 8000000 $DIR2/$tfile || + error "cached truncate - wrong file size" [ $(calc_stats $OSC.*.${OSC}_stats lockless_truncate) -ne 0 ] || error "cached truncate isn't lockless" log "checking not cached lockless truncate" $TRUNCATE $DIR2/$tfile 5000000 - $CHECKSTAT -s 5000000 $DIR1/$tfile || error "wrong file size" + $CHECKSTAT -s 5000000 $DIR1/$tfile || + error "not cached truncate - wrong file size" [ $(calc_stats $OSC.*.${OSC}_stats lockless_truncate) -ne 0 ] || error "not cached truncate isn't lockless" @@ -891,7 +905,8 @@ test_32a() { # bug 11270 enable_lockless_truncate 0 clear_stats $OSC.*.${OSC}_stats $TRUNCATE $DIR2/$tfile 3000000 - $CHECKSTAT -s 3000000 $DIR1/$tfile || error "wrong file size" + $CHECKSTAT -s 3000000 $DIR1/$tfile || + error "lockless truncate disabled - wrong file size" [ $(calc_stats $OSC.*.${OSC}_stats lockless_truncate) -eq 0 ] || error "lockless truncate disabling failed" rm -f $DIR1/$tfile @@ -1092,7 +1107,7 @@ test_33c() { local sync_count - mkdir $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir sync_all_data do_facet mds1 "lctl set_param -n mdt.*.sync_count=0" # do twice in case transaction is committed before unlock, see LU-8200 @@ -1149,7 +1164,7 @@ test_33d() { skip "DNE CoS not supported" # remote directory create - op_trigger_cos "mkdir $DIR/$tdir" "$LFS mkdir -i 1 $DIR/$tdir/subdir" + op_trigger_cos "$LFS mkdir -i 0 $DIR/$tdir" "$LFS mkdir -i 1 $DIR/$tdir/subdir" # remote directory unlink op_trigger_cos "$LFS mkdir -i 1 $DIR/$tdir" "rmdir $DIR/$tdir" # striped directory create @@ -1560,25 +1575,26 @@ check_pdo_conflict() { # test 40: check non-blocking operations test_40a() { remote_mds_nodsh && skip "remote MDS with nodsh" && return + + mkdir $DIR2/$tdir pdo_lru_clear #define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" - touch $DIR2 - mkdir $DIR1/$tfile & + mkdir $DIR1/$tdir/$tfile & PID1=$!; pdo_sched - touch $DIR2/$tfile-2 + touch $DIR2/$tdir/$tfile-2 check_pdo_conflict $PID1 || error "create is blocked" - mkdir $DIR2/$tfile-3 + mkdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "mkdir is blocked" - link $DIR2/$tfile-2 $DIR2/$tfile-4 + link $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-4 check_pdo_conflict $PID1 || error "link is blocked" - mv $DIR2/$tfile-2 $DIR2/$tfile-5 + mv $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-5 check_pdo_conflict $PID1 || error "rename is blocked" - stat $DIR2/$tfile-3 $DIR2/$tfile-4 > /dev/null + stat $DIR2/$tdir/$tfile-3 $DIR2/$tdir/$tfile-4 > /dev/null check_pdo_conflict $PID1 || error "getattr is blocked" - rm $DIR2/$tfile-4 $DIR2/$tfile-5 - rmdir $DIR2/$tfile-3 + rm $DIR2/$tdir/$tfile-4 $DIR2/$tdir/$tfile-5 + rmdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "unlink is blocked" # all operations above shouldn't wait the first one @@ -1586,32 +1602,34 @@ test_40a() { do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0 2>/dev/null || true" wait $PID1 - rm -rf $DIR/$tfile* + rm -rf $DIR/$tdir return 0 } run_test 40a "pdirops: create vs others ==============" test_40b() { remote_mds_nodsh && skip "remote MDS with nodsh" && return + + mkdir $DIR2/$tdir pdo_lru_clear #define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" - touch $DIR1/$tfile & + touch $DIR1/$tdir/$tfile & PID1=$!; pdo_sched # open|create - touch $DIR2/$tfile-2 + touch $DIR2/$tdir/$tfile-2 check_pdo_conflict $PID1 || error "create is blocked" - mkdir $DIR2/$tfile-3 + mkdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "mkdir is blocked" - link $DIR2/$tfile-2 $DIR2/$tfile-4 + link $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-4 check_pdo_conflict $PID1 || error "link is blocked" - mv $DIR2/$tfile-2 $DIR2/$tfile-5 + mv $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-5 check_pdo_conflict $PID1 || error "rename is blocked" - stat $DIR2/$tfile-3 $DIR2/$tfile-4 > /dev/null + stat $DIR2/$tdir/$tfile-3 $DIR2/$tdir/$tfile-4 > /dev/null check_pdo_conflict $PID1 || error "getattr is blocked" - rm $DIR2/$tfile-4 $DIR2/$tfile-5 - rmdir $DIR2/$tfile-3 + rm $DIR2/$tdir/$tfile-4 $DIR2/$tdir/$tfile-5 + rmdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "unlink is blocked" # all operations above shouldn't wait the first one @@ -1619,33 +1637,35 @@ test_40b() { do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0 2>/dev/null || true" wait $PID1 - rm -rf $DIR/$tfile* + rm -rf $DIR/$tdir return 0 } run_test 40b "pdirops: open|create and others ==============" test_40c() { remote_mds_nodsh && skip "remote MDS with nodsh" && return + + mkdir $DIR2/$tdir pdo_lru_clear - touch $DIR1/$tfile + touch $DIR1/$tdir/$tfile #define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" - link $DIR1/$tfile $DIR1/$tfile-0 & + link $DIR1/$tdir/$tfile $DIR1/$tdir/$tfile-0 & PID1=$!; pdo_sched # open|create - touch $DIR2/$tfile-2 + touch $DIR2/$tdir/$tfile-2 check_pdo_conflict $PID1 || error "create is blocked" - mkdir $DIR2/$tfile-3 + mkdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "mkdir is blocked" - link $DIR2/$tfile-2 $DIR2/$tfile-4 + link $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-4 check_pdo_conflict $PID1 || error "link is blocked" - mv $DIR2/$tfile-2 $DIR2/$tfile-5 + mv $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-5 check_pdo_conflict $PID1 || error "rename is blocked" - stat $DIR2/$tfile-3 $DIR2/$tfile-4 > /dev/null + stat $DIR2/$tdir/$tfile-3 $DIR2/$tdir/$tfile-4 > /dev/null check_pdo_conflict $PID1 || error "getattr is blocked" - rm $DIR2/$tfile-4 $DIR2/$tfile-5 - rmdir $DIR2/$tfile-3 + rm $DIR2/$tdir/$tfile-4 $DIR2/$tdir/$tfile-5 + rmdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "unlink is blocked" # all operations above shouldn't wait the first one @@ -1653,33 +1673,35 @@ test_40c() { do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0 2>/dev/null || true" wait $PID1 - rm -rf $DIR/$tfile* + rm -rf $DIR/$tdir return 0 } run_test 40c "pdirops: link and others ==============" test_40d() { remote_mds_nodsh && skip "remote MDS with nodsh" && return + + mkdir $DIR2/$tdir pdo_lru_clear - touch $DIR1/$tfile + touch $DIR1/$tdir/$tfile #define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" - rm $DIR1/$tfile & + rm $DIR1/$tdir/$tfile & PID1=$!; pdo_sched # open|create - touch $DIR2/$tfile-2 + touch $DIR2/$tdir/$tfile-2 check_pdo_conflict $PID1 || error "create is blocked" - mkdir $DIR2/$tfile-3 + mkdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "mkdir is blocked" - link $DIR2/$tfile-2 $DIR2/$tfile-4 + link $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-4 check_pdo_conflict $PID1 || error "link is blocked" - mv $DIR2/$tfile-2 $DIR2/$tfile-5 + mv $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-5 check_pdo_conflict $PID1 || error "rename is blocked" - stat $DIR2/$tfile-3 $DIR2/$tfile-4 > /dev/null + stat $DIR2/$tdir/$tfile-3 $DIR2/$tdir/$tfile-4 > /dev/null check_pdo_conflict $PID1 || error "getattr is blocked" - rm $DIR2/$tfile-4 $DIR2/$tfile-5 - rmdir $DIR2/$tfile-3 + rm $DIR2/$tdir/$tfile-4 $DIR2/$tdir/$tfile-5 + rmdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "unlink is blocked" # all operations above shouldn't wait the first one @@ -1693,24 +1715,26 @@ run_test 40d "pdirops: unlink and others ==============" test_40e() { remote_mds_nodsh && skip "remote MDS with nodsh" && return + + mkdir $DIR2/$tdir pdo_lru_clear - touch $DIR1/$tfile + touch $DIR1/$tdir/$tfile #define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" - mv $DIR1/$tfile $DIR1/$tfile-0 & + mv $DIR1/$tdir/$tfile $DIR1/$tdir/$tfile-0 & PID1=$!; pdo_sched # open|create - touch $DIR2/$tfile-2 + touch $DIR2/$tdir/$tfile-2 check_pdo_conflict $PID1 || error "create is blocked" - mkdir $DIR2/$tfile-3 + mkdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "mkdir is blocked" - link $DIR2/$tfile-2 $DIR2/$tfile-4 + link $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-4 check_pdo_conflict $PID1 || error "link is blocked" - stat $DIR2/$tfile-3 $DIR2/$tfile-4 > /dev/null + stat $DIR2/$tdir/$tfile-3 $DIR2/$tdir/$tfile-4 > /dev/null check_pdo_conflict $PID1 || error "getattr is blocked" - rm $DIR2/$tfile-4 $DIR2/$tfile-2 - rmdir $DIR2/$tfile-3 + rm $DIR2/$tdir/$tfile-4 $DIR2/$tdir/$tfile-2 + rmdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "unlink is blocked" # all operations above shouldn't wait the first one @@ -1718,7 +1742,7 @@ test_40e() { do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0 2>/dev/null || true" wait $PID1 - rm -rf $DIR/$tfile* + rm -rf $DIR/$tdir return 0 } run_test 40e "pdirops: rename and others ==============" @@ -1870,6 +1894,57 @@ test_41h() { } run_test 41h "pdirops: create vs readdir ==============" +sub_test_41i() { + local PID1 PID2 + local fail_loc="$1" + local ret=0 + + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=${fail_loc} || true" &>/dev/null + + $MULTIOP $DIR1/$tfile oO_CREAT:O_EXCL:c 2>/dev/null & + PID1=$! + sleep 0.2 + $MULTIOP $DIR2/$tfile oO_CREAT:O_EXCL:c 2>/dev/null & + PID2=$! + + if ! wait $PID1 && ! wait $PID2; then + echo "Both creates failed (1 should fail, 1 should succeed)" + ret=1 + elif wait $PID1 && wait $PID2; then + echo "Both creates succeeded (1 should fail, 1 should succeed)" + ret=2 + fi + + #Clean + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x0 || true" &>/dev/null + rm -f $DIR/$tfile + + return $ret +} + +test_41i() { + [[ $MDS1_VERSION -le $(version_code 2.13.56) ]] || + skip "Need MDS version newer than 2.13.56" + local msg fail_loc + +#define OBD_FAIL_ONCE|OBD_FAIL_MDS_REINT_OPEN 0x169 +#define OBD_FAIL_ONCE|OBD_FAIL_MDS_REINT_OPEN2 0x16a + for fail_loc in "0x80000169" "0x8000016a"; do + echo "Begin 100 tests with fail_loc=$fail_loc" + printf "Progress: " + for i in {1..100}; do + printf "*" + msg=$(sub_test_41i "$fail_loc") || + { echo; error "iter=$i : $msg"; } + done + echo + done +} +run_test 41i "reint_open: create vs create" + + # test 42: unlink and blocking operations test_42a() { pdo_lru_clear @@ -1982,20 +2057,20 @@ test_42f() { run_test 42f "pdirops: mkdir and rename (src) ==============" test_42g() { + mkdir_on_mdt0 $DIR1/$tdir pdo_lru_clear #define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" - mkdir $DIR1/$tfile & + mkdir $DIR1/$tdir/$tfile & PID1=$! ; pdo_sched - stat $DIR2/$tfile > /dev/null & + stat $DIR2/$tdir/$tfile > /dev/null & PID2=$! ; pdo_sched do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "getattr isn't blocked"; } wait $PID2 ; [ $? -eq 0 ] || error "stat must succeed" - rm -rf $DIR/$tfile* - return 0 + rm -rf $DIR/$tdir } run_test 42g "pdirops: mkdir vs getattr ==============" @@ -2187,24 +2262,81 @@ test_43j() { [[ $MDS1_VERSION -lt $(version_code 2.13.52) ]] && skip "Need MDS version newer than 2.13.52" + mkdir $DIR1/$tdir for i in {1..100}; do #define OBD_FAIL_ONCE|OBD_FAIL_MDS_CREATE_RACE 0x167 do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0x80000167 2>/dev/null || true" OK=0 - mkdir $DIR1/$tdir & + mkdir $DIR1/$tdir/sub & PID1=$! - mkdir $DIR2/$tdir && ((OK++)) + mkdir $DIR2/$tdir/sub && ((OK++)) wait $PID1 && ((OK++)) (( OK == 1 )) || error "exactly one mkdir should succeed" - rmdir $DIR1/$tdir || error "rmdir failed" + rmdir $DIR1/$tdir/sub || error "rmdir failed" done return 0 } run_test 43j "racy mkdir return EEXIST ==============" +sub_test_43k() { + local PID1 PID2 + local fail_loc="$1" + local ret=0 + + # We test in a separate directory to be able to unblock server thread in + # cfs_race() if LCK_PW is taken on the parent by mdt_reint_unlink. + test_mkdir $DIR2/$tdir + touch $DIR2/$tdir/$tfile + + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=${fail_loc} || true" &>/dev/null + echo content > $DIR1/$tdir/$tfile & PID1=$! + pdo_sched + multiop $DIR2/$tdir/$tfile u & PID2=$! + + wait $PID1 || + { ret=$?; \ + echo -n "overwriting $tfile should succeed (err=$ret); "; } + wait $PID2 || + { ret=$?; \ + echo -n "unlinking $tfile should succeed (err=$ret);"; } + + #Clean + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x0 || true" &>/dev/null + rm -rf $DIR/$tdir + + return $ret +} + +test_43k() { + [[ $MDS1_VERSION -le $(version_code 2.13.56) ]] || + skip "Need MDS version newer than 2.13.56" + local msg fail_loc + +#define OBD_FAIL_ONCE|OBD_FAIL_MDS_REINT_OPEN 0x169 +#define OBD_FAIL_ONCE|OBD_FAIL_MDS_REINT_OPEN2 0x16a + for fail_loc in "0x80000169" "0x8000016a"; do + echo "Begin 100 tests with fail_loc=$fail_loc" + printf "Progress: " + for i in {1..100}; do + printf "*" + msg=$(sub_test_43k "$fail_loc") || + { echo; error "iter=$i : $msg"; } + done + echo + done + + #Clean + reset_fail_loc + + return 0 +} +run_test 43k "unlink vs create" + # test 44: rename tgt and blocking operations test_44a() { pdo_lru_clear @@ -2552,6 +2684,60 @@ test_45i() { } run_test 45i "pdirops: rename src vs remote mkdir" +sub_test_45j() { + local PID1 PID2 + local fail_loc="$1" + local ret=0 + + # We test in a sparate directory to be able to unblock server thread in + # cfs_race if LCK_PW is taken on the parent by mdt_reint_rename. + test_mkdir $DIR2/$tdir + echo file1 > $DIR2/$tdir/$tfile + echo file2 > $DIR2/$tdir/$tfile-2 + + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=${fail_loc} || true" &>/dev/null + + cat $DIR1/$tdir/$tfile >/dev/null & + PID1=$! + pdo_sched + mrename $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile > /dev/null & + PID2=$! + + wait $PID1 || + { ret=$?; echo -n "cat $tfile should succeed (err=$ret); "; } + wait $PID2 || + { ret=$?; \ + echo -n "mrename $tfile-2 to $tfile failed (err=$ret);"; } + + #Clean + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x0 || true" &>/dev/null + rm -rf $DIR/$tdir + + return $ret +} + +test_45j() { + [[ $MDS1_VERSION -le $(version_code 2.13.56) ]] || + skip "Need MDS version newer than 2.13.56" + local msg fail_loc + +#define OBD_FAIL_ONCE|OBD_FAIL_MDS_REINT_OPEN 0x169 +#define OBD_FAIL_ONCE|OBD_FAIL_MDS_REINT_OPEN2 0x16a + for fail_loc in "0x80000169" "0x8000016a"; do + echo "Begin 100 tests with fail_loc=$fail_loc" + printf "Progress: " + for i in {1..100}; do + printf "*" + msg=$(sub_test_45j "$fail_loc") || + { echo; error "iter=$i : $msg"; } + done + echo + done +} +run_test 45j "read vs rename ==============" + # test 46: link and blocking operations test_46a() { pdo_lru_clear @@ -3012,6 +3198,9 @@ test_51d() { run_test 51d "layout lock: losing layout lock should clean up memory map region" test_51e() { + (( $MDS1_VERSION >= $(version_code 2.13.54.148) )) || + skip "MDS version must be at least 2.13.54.148" + local pid $MULTIOP $DIR/$tfile oO_CREAT:O_RDWR:eW_E+eUc & @@ -3101,56 +3290,59 @@ test_54() { run_test 54 "rename locking" test_55a() { - mkdir -p $DIR/d1/d2 $DIR/d3 || error "(1) mkdir failed" + mkdir_on_mdt0 $DIR/$tdir + mkdir -p $DIR/$tdir/d1/d2 $DIR/$tdir/d3 || error "(1) mkdir failed" #define OBD_FAIL_MDS_RENAME4 0x156 do_facet mds1 $LCTL set_param fail_loc=0x80000156 - mv -T $DIR/d1/d2 $DIR/d3/d2 & + mv -T $DIR/$tdir/d1/d2 $DIR/$tdir/d3/d2 & PID1=$! sleep 1 - rm -r $DIR2/d3 + rm -r $DIR2/$tdir/d3 wait $PID1 && error "(2) mv succeeded" - rm -rf $DIR/d1 + rm -rf $DIR/$tdir } run_test 55a "rename vs unlink target dir" test_55b() { - mkdir -p $DIR/d1/d2 $DIR/d3 || error "(1) mkdir failed" + mkdir_on_mdt0 $DIR/$tdir + mkdir -p $DIR/$tdir/d1/d2 $DIR/$tdir/d3 || error "(1) mkdir failed" #define OBD_FAIL_MDS_RENAME4 0x156 do_facet mds1 $LCTL set_param fail_loc=0x80000156 - mv -T $DIR/d1/d2 $DIR/d3/d2 & + mv -T $DIR/$tdir/d1/d2 $DIR/$tdir/d3/d2 & PID1=$! sleep 1 - rm -r $DIR2/d1 + rm -r $DIR2/$tdir/d1 wait $PID1 && error "(2) mv succeeded" - rm -rf $DIR/d3 + rm -rf $DIR/$tdir } run_test 55b "rename vs unlink source dir" test_55c() { - mkdir -p $DIR/d1/d2 $DIR/d3 || error "(1) mkdir failed" + mkdir_on_mdt0 $DIR/$tdir + mkdir -p $DIR/$tdir/d1/d2 $DIR/$tdir/d3 || error "(1) mkdir failed" #define OBD_FAIL_MDS_RENAME4 0x156 do_facet mds1 $LCTL set_param fail_loc=0x156 - mv -T $DIR/d1/d2 $DIR/d3/d2 & + mv -T $DIR/$tdir/d1/d2 $DIR/$tdir/d3/d2 & PID1=$! sleep 1 # while rename is sleeping, open and remove d3 - $MULTIOP $DIR2/d3 D_c & + $MULTIOP $DIR2/$tdir/d3 D_c & PID2=$! sleep 1 - rm -rf $DIR2/d3 + rm -rf $DIR2/$tdir/d3 sleep 5 # while rename is sleeping 2nd time, close d3 @@ -3159,28 +3351,30 @@ test_55c() wait $PID1 && error "(2) mv succeeded" - rm -rf $DIR/d1 + rm -rf $DIR/$tdir } run_test 55c "rename vs unlink orphan target dir" test_55d() { - touch $DIR/f1 + mkdir_on_mdt0 $DIR/$tdir + + touch $DIR/$tdir/f1 #define OBD_FAIL_MDS_RENAME3 0x155 do_facet mds1 $LCTL set_param fail_loc=0x155 - mv $DIR/f1 $DIR/$tdir & + mv $DIR/$tdir/f1 $DIR/$tdir/$tdir & PID1=$! sleep 2 # while rename is sleeping, create $tdir, but as a directory - mkdir -p $DIR2/$tdir || error "(1) mkdir failed" + mkdir -p $DIR2/$tdir/$tdir || error "(1) mkdir failed" # link in reverse locking order - ln $DIR2/f1 $DIR2/$tdir/ + ln $DIR2/$tdir/f1 $DIR2/$tdir/$tdir/ wait $PID1 && error "(2) mv succeeded" - rm -rf $DIR/f1 + rm -rf $DIR/$tdir } run_test 55d "rename file vs link" @@ -5023,8 +5217,28 @@ test_102() { echo "Test file_handle syscalls" > $DIR/$tfile || error "write failed" check_fhandle_syscalls $DIR/$tfile $DIR2 || - error "check_fhandle_syscalls failed" - rm -f $DIR2/$tfile + error "check_fhandle_syscalls $tfile failed" + + # test this is working on DNE directories also + if (( MDSCOUNT > 1 MDS1_VERSION >= $(version_code 2.14.52) )); then + $LFS mkdir -i 1 $DIR/$tdir.remote + cancel_lru_locks mdc + check_fhandle_syscalls $DIR/$tdir.remote $DIR2 || + error "check_fhandle_syscalls $tdir.remote failed" + $LFS mkdir -c -1 $DIR/$tdir.remote/subdir + cancel_lru_locks mdc + check_fhandle_syscalls $DIR/$tdir.remote/subdir $DIR2 || + error "check_fhandle_syscalls $tdir.remote/subdir fail" + + $LFS mkdir -c -1 $DIR/$tdir.stripe + cancel_lru_locks mdc + check_fhandle_syscalls $DIR/$tdir.stripe $DIR2 || + error "check_fhandle_syscalls $tdir.stripe failed" + $LFS mkdir -c -1 $DIR/$tdir.stripe/subdir + cancel_lru_locks mdc + check_fhandle_syscalls $DIR/$tdir.stripe/subdir $DIR2 || + error "check_fhandle_syscalls $tdir.stripe/subdir fail" + fi } run_test 102 "Test open by handle of unlinked file" @@ -5394,6 +5608,87 @@ test_108a() { } run_test 108a "lseek: parallel updates" +# LU-14110 +test_109() { + local i + local pid1 pid2 + + ! local_mode || + skip "Clients need to be on different nodes than the servers" + + umount_client $MOUNT + umount_client $MOUNT2 + + echo "Starting race between client mount instances (50 iterations):" + for i in {1..50}; do + log "Iteration $i" + +#define OBD_FAIL_ONCE|OBD_FAIL_LLITE_RACE_MOUNT 0x80001417 + $LCTL set_param -n fail_loc=0x80001417 + + mount_client $MOUNT & pid1=$! + mount_client $MOUNT2 & pid2=$! + wait $pid1 || error "Mount $MOUNT fails with $?" + wait $pid2 || error "Mount $MOUNT2 fails with $?" + + umount_client $MOUNT & pid1=$! + umount_client $MOUNT2 & pid2=$! + wait $pid1 || error "Umount $MOUNT fails with $?" + wait $pid2 || error "Umount $MOUNT2 fails with $?" + + $LUSTRE_RMMOD || error "Fail to remove lustre modules" + load_modules + echo + done + + mount_client $MOUNT + mount_client $MOUNT2 +} + +run_test 109 "Race with several mount instances on 1 node" + +test_110() { + local before=$(date +%s) + local evict + + mkdir -p $DIR/$tdir + touch $DIR/$tdir/f1 + touch $DIR/$tfile + + #define OBD_FAIL_PTLRPC_RESEND_RACE 0x525 + do_facet mds1 lctl set_param fail_loc=0x525 fail_val=3 + + # disable last_xid logic by dropping link reply + ln $DIR/$tdir/f1 $DIR/$tdir/f2 & + sleep 1 + + #define OBD_FAIL_PTLRPC_ENQ_RESEND 0x534 + do_facet mds1 lctl set_param fail_loc=0x534 + + # RPC will race with its Resend and the Resend will sleep to let + # the original lock to get granted & cancelled. + # + # AST_SENT is set artificially, so an explicit conflict is not needed + # + # The woken up Resend gets a new lock, but client does not wait for it + stat $DIR/$tfile + sleep $TIMEOUT + do_facet mds1 lctl set_param fail_loc=0 fail_val=0 + + # Take a conflict to wait long enough to see the eviction + touch $DIR2/$tfile + + # let the client reconnect + client_reconnect + evict=$(do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state | + awk -F"[ [,]" '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }') + + [ -z "$evict" ] || [[ $evict -le $before ]] || + (do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state; + error "eviction happened: $evict before:$before") +} +run_test 110 "do not grant another lock on resend" + log "cleanup: ======================================================" # kill and wait in each test only guarentee script finish, but command in script