X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;ds=sidebyside;f=lustre%2Ftests%2Fsanityn.sh;h=c1ead99bcdcc975373f7993bb5f7df40e1befd91;hb=56f69854dae234fa974ab4f1dff909aea601a592;hp=1aa42c7a73d3c028a289401ba78e2177e60c26dd;hpb=2acb48e6fd13b2b60bfb7cbad1fbb91c13497aa1;p=fs%2Flustre-release.git diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index 1aa42c7..c1ead99 100755 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -283,7 +283,7 @@ test_11() { run_test 11 "execution of file opened for write should return error ====" test_12() { - DIR=$DIR DIR2=$DIR2 sh lockorder.sh + DIR=$DIR DIR2=$DIR2 bash lockorder.sh } run_test 12 "test lock ordering (link, stat, unlink)" @@ -361,7 +361,7 @@ run_test 14d "chmod of executing file is still possible ========" test_15() { # bug 974 - ENOSPC echo "PATH=$PATH" - sh oos2.sh $MOUNT1 $MOUNT2 + bash oos2.sh $MOUNT1 $MOUNT2 wait_delete_completed grant_error=$(dmesg | grep "< tot_grant") [ -z "$grant_error" ] || error "$grant_error" @@ -389,6 +389,8 @@ test_16a() { local stripe_size=$(do_facet $SINGLEMDS \ "$LCTL get_param -n lod.$(facet_svc $SINGLEMDS)*.stripesize") + check_set_fallocate + # to allocate grant because it may run out due to test_15. $LFS setstripe -c -1 $file1 dd if=/dev/zero of=$file1 bs=$stripe_size count=$OSTCOUNT oflag=sync @@ -396,12 +398,12 @@ test_16a() { rm -f $file1 $LFS setstripe -c -1 $file1 # b=10919 - fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 \ - || error "fsx failed" + $FSX -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 || + error "fsx failed" rm -f $file1 # O_DIRECT reads and writes must be aligned to the device block size. - fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 -Z -r 4096 \ + $FSX -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 -Z -r 4096 \ -w 4096 $file1 $file2 || error "fsx with O_DIRECT failed." } run_test 16a "$FSXNUM iterations of dual-mount fsx" @@ -412,6 +414,8 @@ test_16b() { local file2=$DIR2/$tfile local stripe_size=($($LFS getstripe -S $DIR)) + check_set_fallocate + # to allocate grant because it may run out due to test_15. lfs setstripe -c -1 $file1 dd if=/dev/zero of=$file1 bs=$stripe_size count=$OSTCOUNT oflag=sync || @@ -423,8 +427,8 @@ test_16b() { lfs setstripe -c -1 $file1 # b=10919 # -o is set to 8192 because writes < 1 page and between 1 and 2 pages # create a mix of tiny writes & normal writes - fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -o 8192 -S 0 $file1 \ - $file2 || error "fsx with tiny write failed." + $FSX -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -o 8192 -S 0 \ + $file1 $file2 || error "fsx with tiny write failed." } run_test 16b "$FSXNUM iterations of dual-mount fsx at small size" @@ -436,6 +440,8 @@ test_16c() { [ "$ost1_FSTYPE" != ldiskfs ] && skip "dio on ldiskfs only" + check_set_fallocate + # to allocate grant because it may run out due to test_15. $LFS setstripe -c -1 $file1 dd if=/dev/zero of=$file1 bs=$stripe_size count=$OSTCOUNT oflag=sync @@ -452,8 +458,8 @@ test_16c() { set_osd_param $list '' writethrough_cache_enable 0 $LFS setstripe -c -1 $file1 # b=10919 - fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 \ - || error "fsx failed" + $FSX -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 || + error "fsx failed" rm -f $file1 set_osd_param $list '' read_cache_enable 1 @@ -467,16 +473,17 @@ test_16d() { local file1=$DIR1/$tfile local file2=$DIR2/$tfile local file3=$DIR1/file + local tmpfile=$(mktemp) local stripe_size=$(do_facet $SINGLEMDS \ "$LCTL get_param -n lod.$(facet_svc $SINGLEMDS)*.stripesize") # to allocate grant because it may run out due to test_15. $LFS setstripe -c -1 $file1 + stack_trap "rm -f $file1 $file2 $file3 $tmpfile" dd if=/dev/zero of=$file1 bs=$stripe_size count=$OSTCOUNT oflag=sync dd if=/dev/zero of=$file2 bs=$stripe_size count=$OSTCOUNT oflag=sync rm -f $file1 - local tmpfile=`mktemp` $LFS setstripe -c -1 $file1 # b=10919 $LCTL set_param ldlm.namespaces.*.lru_size=clear @@ -497,13 +504,15 @@ test_16d() { # buffer read from another client dd if=$file2 of=$file3 bs=1M count=100 diff $file3 $tmpfile || error "file different(3)" - - rm -f $file1 $file2 $file3 $tmpfile - } run_test 16d "Verify DIO and buffer IO with two clients" test_16e() { # LU-13227 + # issue: LU-14314 + + (( "$MDS1_VERSION" >= $(version_code 2.13.53) )) || + skip "Need MDS version at least 2.13.53" + local file1=$DIR1/$tfile local file2=$DIR2/$tfile @@ -550,22 +559,27 @@ test_18() { excepts="$excepts -e $(($(printf %d \'$idx)-96))" done - $LUSTRE/tests/mmap_sanity -d $MOUNT1 -m $MOUNT2 $excepts + excepts="$excepts -e 7 -e 8 -e 9" + $LUSTRE/tests/mmap_sanity -d $MOUNT1 -m $MOUNT2 $excepts || + error "mmap_sanity test failed" sync; sleep 1; sync } run_test 18 "mmap sanity check =================================" test_19() { # bug3811 local node=$(facet_active_host ost1) + local device="$FSNAME-OST*" - [ "x$DOM" = "xyes" ] && node=$(facet_active_host $SINGLEMDS) + [ "x$DOM" = "xyes" ] && node=$(facet_active_host $SINGLEMDS) && + device="$FSNAME-MDT*" # check whether obdfilter is cache capable at all - get_osd_param $node '' read_cache_enable >/dev/null || + get_osd_param $node $device read_cache_enable >/dev/null || skip "not cache-capable obdfilter" - local MAX=$(get_osd_param $node '' readcache_max_filesize | head -n 1) - set_osd_param $node '' readcache_max_filesize 4096 + local max=$(get_osd_param $node $device readcache_max_filesize |\ + head -n 1) + set_osd_param $node $device readcache_max_filesize 4096 dd if=/dev/urandom of=$TMP/$tfile bs=512k count=32 local SUM=$(cksum $TMP/$tfile | cut -d" " -f 1,2) cp $TMP/$tfile $DIR1/$tfile @@ -580,22 +594,22 @@ test_19() { # bug3811 [ "$(cat $TMP/sum2)" = "$SUM" ] || \ error "$DIR2/$tfile $(cat $TMP/sum2) != $SUM" done - set_osd_param $node '' readcache_max_filesize $MAX + set_osd_param $node $device readcache_max_filesize $max rm $DIR1/$tfile } run_test 19 "test concurrent uncached read races ===============" test_20() { - test_mkdir $DIR1/d20 - cancel_lru_locks $OSC - CNT=$((`lctl get_param -n llite.*.dump_page_cache | wc -l`)) - $MULTIOP $DIR1/f20 Ow8190c - $MULTIOP $DIR2/f20 Oz8194w8190c - $MULTIOP $DIR1/f20 Oz0r8190c - cancel_lru_locks $OSC - CNTD=$((`lctl get_param -n llite.*.dump_page_cache | wc -l` - $CNT)) - [ $CNTD -gt 0 ] && \ - error $CNTD" page left in cache after lock cancel" || true + test_mkdir $DIR1/$tdir + cancel_lru_locks + CNT=$($LCTL get_param -n llite.*.dump_page_cache | wc -l) + $MULTIOP $DIR1/$tdir/$tfile Ow8190c + $MULTIOP $DIR2/$tdir/$tfile Oz8194w8190c + $MULTIOP $DIR1/$tdir/$tfile Oz0r8190c + cancel_lru_locks + CNT2=$($LCTL get_param -n llite.*.dump_page_cache | wc -l) + [[ $CNT2 == $CNT ]] || + error $((CNT2 - CNT))" page left in cache after lock cancel" } run_test 20 "test extra readahead page left in cache ====" @@ -751,7 +765,7 @@ test_27() { lctl clear dd if=/dev/zero of=$DIR2/$tfile bs=$((4096+4))k conv=notrunc count=4 seek=3 & DD2_PID=$! - usleep 50 + sleep 0.5 log "dd 1 started" dd if=/dev/zero of=$DIR1/$tfile bs=$((16384-1024))k conv=notrunc count=1 seek=4 & @@ -808,7 +822,7 @@ run_test 28 "read/write/truncate file with lost stripes" test_30() { #b=11110, LU-2523 test_mkdir $DIR1/$tdir cp -f /bin/bash $DIR1/$tdir/bash - /bin/sh -c 'sleep 1; rm -f $DIR2/$tdir/bash; cp /bin/bash $DIR2/$tdir' & + bash -c 'sleep 1; rm -f $DIR2/$tdir/bash; cp /bin/bash $DIR2/$tdir' & $DIR1/$tdir/bash -c 'sleep 2; openfile -f O_RDONLY /proc/$$/exe >& /dev/null; echo $?' wait @@ -852,49 +866,29 @@ test_31b() { } run_test 31b "voluntary OST cancel / blocking ast race==============" -# enable/disable lockless truncate feature, depending on the arg 0/1 -enable_lockless_truncate() { - lctl set_param -n $OSC.*.lockless_truncate $1 -} - -test_32a() { # bug 11270 - local save="$TMP/$TESTSUITE-$TESTNAME.parameters" - local stripe_size=$(do_facet $SINGLEMDS \ - "$LCTL get_param -n lod.$(facet_svc $SINGLEMDS)*.stripesize") +#LU-14949 - multi-client version of the test 31r in sanity. +test_31r() { + touch $DIR/$tfile.target + touch $DIR/$tfile.source - save_lustre_params client "$OSC.*.lockless_truncate" > $save - # restore lockless_truncate default values on exit - stack_trap "restore_lustre_params < $save; rm -f $save" EXIT - cancel_lru_locks $OSC - enable_lockless_truncate 1 - rm -f $DIR1/$tfile - lfs setstripe -c -1 $DIR1/$tfile - dd if=/dev/zero of=$DIR1/$tfile count=$OSTCOUNT bs=$stripe_size > \ - /dev/null 2>&1 - clear_stats $OSC.*.${OSC}_stats + ls -l $DIR/$tfile.target # cache it for sure - log "checking cached lockless truncate" - $TRUNCATE $DIR1/$tfile 8000000 - $CHECKSTAT -s 8000000 $DIR2/$tfile || error "wrong file size" - [ $(calc_stats $OSC.*.${OSC}_stats lockless_truncate) -ne 0 ] || - error "cached truncate isn't lockless" + #OBD_FAIL_LLITE_OPEN_DELAY 0x1419 + $LCTL set_param fail_loc=0x1419 fail_val=3 + cat $DIR/$tfile.target & + CATPID=$! - log "checking not cached lockless truncate" - $TRUNCATE $DIR2/$tfile 5000000 - $CHECKSTAT -s 5000000 $DIR1/$tfile || error "wrong file size" - [ $(calc_stats $OSC.*.${OSC}_stats lockless_truncate) -ne 0 ] || - error "not cached truncate isn't lockless" + # Guarantee open is waiting before we get here + sleep 1 + mv $DIR2/$tfile.source $DIR2/$tfile.target - log "disabled lockless truncate" - enable_lockless_truncate 0 - clear_stats $OSC.*.${OSC}_stats - $TRUNCATE $DIR2/$tfile 3000000 - $CHECKSTAT -s 3000000 $DIR1/$tfile || error "wrong file size" - [ $(calc_stats $OSC.*.${OSC}_stats lockless_truncate) -eq 0 ] || - error "lockless truncate disabling failed" - rm -f $DIR1/$tfile + wait $CATPID + RC=$? + if [[ $RC -ne 0 ]]; then + error "open with cat failed, rc=$RC" + fi } -run_test 32a "lockless truncate" +run_test 31r "open-rename(replace) race" test_32b() { # bug 11270 remote_ost_nodsh && skip "remote OST with nodsh" && return @@ -947,25 +941,27 @@ test_32b() { # bug 11270 restore_lustre_params <$p rm -f $p } -run_test 32b "lockless i/o" +# Disable test 32b prior to full removal +#run_test 32b "lockless i/o" print_jbd_stat () { - local dev - local mdts=$(get_facets MDS) - local varcvs - local mds - - local stat=0 - for mds in ${mdts//,/ }; do - varsvc=${mds}_svc - dev=$(basename $(do_facet $mds "lctl get_param -n osd*.${!varsvc}.mntdev|\ - xargs readlink -f" )) - val=$(do_facet $mds "cat /proc/fs/jbd*/${dev}{,:*,-*}/info 2>/dev/null | - head -n1") - val=${val%% *}; - stat=$(( stat + val)) - done - echo $stat + local mdts=$(get_facets MDS) + local stat=0 + local varsvc + local dev + local mds + + for mds in ${mdts//,/ }; do + varsvc=${mds}_svc + + dev=$(basename $(do_facet $mds "lctl get_param -n \ + osd*.${!varsvc}.mntdev | xargs readlink -f")) + val=$(do_facet $mds "cat /proc/fs/jbd*/${dev}{,:*,-*}/info \ + 2>/dev/null | head -n1") + val=${val%% *}; + stat=$((stat + val)) + done + echo $stat } # commit on sharing tests @@ -1084,9 +1080,13 @@ test_33c() { [ "$MDS1_VERSION" -lt $(version_code 2.7.63) ] && skip "DNE CoS not supported" + # LU-13522 + stop mds1 + start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS || error "start mds1 failed" + local sync_count - mkdir $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir sync_all_data do_facet mds1 "lctl set_param -n mdt.*.sync_count=0" # do twice in case transaction is committed before unlock, see LU-8200 @@ -1123,9 +1123,9 @@ op_trigger_cos() { # trigger CoS twice in case transaction commit before unlock for i in 1 2; do - sh -c "$1" + bash -c "$1" do_nodes $nodes "lctl set_param -n mdt.*.async_commit_count=0" - sh -c "$2" + bash -c "$2" commit_nr=$(do_nodes $nodes \ "lctl get_param -n mdt.*.async_commit_count" | calc_sum) total=$((total + commit_nr)); @@ -1143,7 +1143,7 @@ test_33d() { skip "DNE CoS not supported" # remote directory create - op_trigger_cos "mkdir $DIR/$tdir" "$LFS mkdir -i 1 $DIR/$tdir/subdir" + op_trigger_cos "$LFS mkdir -i 0 $DIR/$tdir" "$LFS mkdir -i 1 $DIR/$tdir/subdir" # remote directory unlink op_trigger_cos "$LFS mkdir -i 1 $DIR/$tdir" "rmdir $DIR/$tdir" # striped directory create @@ -1554,25 +1554,26 @@ check_pdo_conflict() { # test 40: check non-blocking operations test_40a() { remote_mds_nodsh && skip "remote MDS with nodsh" && return + + mkdir_on_mdt0 $DIR2/$tdir pdo_lru_clear #define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" - touch $DIR2 - mkdir $DIR1/$tfile & + mkdir $DIR1/$tdir/$tfile & PID1=$!; pdo_sched - touch $DIR2/$tfile-2 + touch $DIR2/$tdir/$tfile-2 check_pdo_conflict $PID1 || error "create is blocked" - mkdir $DIR2/$tfile-3 + mkdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "mkdir is blocked" - link $DIR2/$tfile-2 $DIR2/$tfile-4 + link $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-4 check_pdo_conflict $PID1 || error "link is blocked" - mv $DIR2/$tfile-2 $DIR2/$tfile-5 + mv $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-5 check_pdo_conflict $PID1 || error "rename is blocked" - stat $DIR2/$tfile-3 $DIR2/$tfile-4 > /dev/null + stat $DIR2/$tdir/$tfile-3 $DIR2/$tdir/$tfile-4 > /dev/null check_pdo_conflict $PID1 || error "getattr is blocked" - rm $DIR2/$tfile-4 $DIR2/$tfile-5 - rmdir $DIR2/$tfile-3 + rm $DIR2/$tdir/$tfile-4 $DIR2/$tdir/$tfile-5 + rmdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "unlink is blocked" # all operations above shouldn't wait the first one @@ -1580,32 +1581,34 @@ test_40a() { do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0 2>/dev/null || true" wait $PID1 - rm -rf $DIR/$tfile* + rm -rf $DIR/$tdir return 0 } run_test 40a "pdirops: create vs others ==============" test_40b() { remote_mds_nodsh && skip "remote MDS with nodsh" && return + + mkdir_on_mdt0 $DIR2/$tdir pdo_lru_clear #define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" - touch $DIR1/$tfile & + touch $DIR1/$tdir/$tfile & PID1=$!; pdo_sched # open|create - touch $DIR2/$tfile-2 + touch $DIR2/$tdir/$tfile-2 check_pdo_conflict $PID1 || error "create is blocked" - mkdir $DIR2/$tfile-3 + mkdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "mkdir is blocked" - link $DIR2/$tfile-2 $DIR2/$tfile-4 + link $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-4 check_pdo_conflict $PID1 || error "link is blocked" - mv $DIR2/$tfile-2 $DIR2/$tfile-5 + mv $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-5 check_pdo_conflict $PID1 || error "rename is blocked" - stat $DIR2/$tfile-3 $DIR2/$tfile-4 > /dev/null + stat $DIR2/$tdir/$tfile-3 $DIR2/$tdir/$tfile-4 > /dev/null check_pdo_conflict $PID1 || error "getattr is blocked" - rm $DIR2/$tfile-4 $DIR2/$tfile-5 - rmdir $DIR2/$tfile-3 + rm $DIR2/$tdir/$tfile-4 $DIR2/$tdir/$tfile-5 + rmdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "unlink is blocked" # all operations above shouldn't wait the first one @@ -1613,33 +1616,35 @@ test_40b() { do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0 2>/dev/null || true" wait $PID1 - rm -rf $DIR/$tfile* + rm -rf $DIR/$tdir return 0 } run_test 40b "pdirops: open|create and others ==============" test_40c() { remote_mds_nodsh && skip "remote MDS with nodsh" && return + + mkdir_on_mdt0 $DIR2/$tdir pdo_lru_clear - touch $DIR1/$tfile + touch $DIR1/$tdir/$tfile #define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" - link $DIR1/$tfile $DIR1/$tfile-0 & + link $DIR1/$tdir/$tfile $DIR1/$tdir/$tfile-0 & PID1=$!; pdo_sched # open|create - touch $DIR2/$tfile-2 + touch $DIR2/$tdir/$tfile-2 check_pdo_conflict $PID1 || error "create is blocked" - mkdir $DIR2/$tfile-3 + mkdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "mkdir is blocked" - link $DIR2/$tfile-2 $DIR2/$tfile-4 + link $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-4 check_pdo_conflict $PID1 || error "link is blocked" - mv $DIR2/$tfile-2 $DIR2/$tfile-5 + mv $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-5 check_pdo_conflict $PID1 || error "rename is blocked" - stat $DIR2/$tfile-3 $DIR2/$tfile-4 > /dev/null + stat $DIR2/$tdir/$tfile-3 $DIR2/$tdir/$tfile-4 > /dev/null check_pdo_conflict $PID1 || error "getattr is blocked" - rm $DIR2/$tfile-4 $DIR2/$tfile-5 - rmdir $DIR2/$tfile-3 + rm $DIR2/$tdir/$tfile-4 $DIR2/$tdir/$tfile-5 + rmdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "unlink is blocked" # all operations above shouldn't wait the first one @@ -1647,33 +1652,35 @@ test_40c() { do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0 2>/dev/null || true" wait $PID1 - rm -rf $DIR/$tfile* + rm -rf $DIR/$tdir return 0 } run_test 40c "pdirops: link and others ==============" test_40d() { remote_mds_nodsh && skip "remote MDS with nodsh" && return + + mkdir_on_mdt0 $DIR2/$tdir pdo_lru_clear - touch $DIR1/$tfile + touch $DIR1/$tdir/$tfile #define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" - rm $DIR1/$tfile & + rm $DIR1/$tdir/$tfile & PID1=$!; pdo_sched # open|create - touch $DIR2/$tfile-2 + touch $DIR2/$tdir/$tfile-2 check_pdo_conflict $PID1 || error "create is blocked" - mkdir $DIR2/$tfile-3 + mkdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "mkdir is blocked" - link $DIR2/$tfile-2 $DIR2/$tfile-4 + link $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-4 check_pdo_conflict $PID1 || error "link is blocked" - mv $DIR2/$tfile-2 $DIR2/$tfile-5 + mv $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-5 check_pdo_conflict $PID1 || error "rename is blocked" - stat $DIR2/$tfile-3 $DIR2/$tfile-4 > /dev/null + stat $DIR2/$tdir/$tfile-3 $DIR2/$tdir/$tfile-4 > /dev/null check_pdo_conflict $PID1 || error "getattr is blocked" - rm $DIR2/$tfile-4 $DIR2/$tfile-5 - rmdir $DIR2/$tfile-3 + rm $DIR2/$tdir/$tfile-4 $DIR2/$tdir/$tfile-5 + rmdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "unlink is blocked" # all operations above shouldn't wait the first one @@ -1687,24 +1694,26 @@ run_test 40d "pdirops: unlink and others ==============" test_40e() { remote_mds_nodsh && skip "remote MDS with nodsh" && return + + mkdir_on_mdt0 $DIR2/$tdir pdo_lru_clear - touch $DIR1/$tfile + touch $DIR1/$tdir/$tfile #define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" - mv $DIR1/$tfile $DIR1/$tfile-0 & + mv $DIR1/$tdir/$tfile $DIR1/$tdir/$tfile-0 & PID1=$!; pdo_sched # open|create - touch $DIR2/$tfile-2 + touch $DIR2/$tdir/$tfile-2 check_pdo_conflict $PID1 || error "create is blocked" - mkdir $DIR2/$tfile-3 + mkdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "mkdir is blocked" - link $DIR2/$tfile-2 $DIR2/$tfile-4 + link $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile-4 check_pdo_conflict $PID1 || error "link is blocked" - stat $DIR2/$tfile-3 $DIR2/$tfile-4 > /dev/null + stat $DIR2/$tdir/$tfile-3 $DIR2/$tdir/$tfile-4 > /dev/null check_pdo_conflict $PID1 || error "getattr is blocked" - rm $DIR2/$tfile-4 $DIR2/$tfile-2 - rmdir $DIR2/$tfile-3 + rm $DIR2/$tdir/$tfile-4 $DIR2/$tdir/$tfile-2 + rmdir $DIR2/$tdir/$tfile-3 check_pdo_conflict $PID1 || error "unlink is blocked" # all operations above shouldn't wait the first one @@ -1712,7 +1721,7 @@ test_40e() { do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0 2>/dev/null || true" wait $PID1 - rm -rf $DIR/$tfile* + rm -rf $DIR/$tdir return 0 } run_test 40e "pdirops: rename and others ==============" @@ -1864,6 +1873,57 @@ test_41h() { } run_test 41h "pdirops: create vs readdir ==============" +sub_test_41i() { + local PID1 PID2 + local fail_loc="$1" + local ret=0 + + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=${fail_loc} || true" &>/dev/null + + $MULTIOP $DIR1/$tfile oO_CREAT:O_EXCL:c 2>/dev/null & + PID1=$! + sleep 0.2 + $MULTIOP $DIR2/$tfile oO_CREAT:O_EXCL:c 2>/dev/null & + PID2=$! + + if ! wait $PID1 && ! wait $PID2; then + echo "Both creates failed (1 should fail, 1 should succeed)" + ret=1 + elif wait $PID1 && wait $PID2; then + echo "Both creates succeeded (1 should fail, 1 should succeed)" + ret=2 + fi + + #Clean + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x0 || true" &>/dev/null + rm -f $DIR/$tfile + + return $ret +} + +test_41i() { + [[ $MDS1_VERSION -le $(version_code 2.13.56) ]] || + skip "Need MDS version newer than 2.13.56" + local msg fail_loc + +#define OBD_FAIL_ONCE|OBD_FAIL_MDS_REINT_OPEN 0x169 +#define OBD_FAIL_ONCE|OBD_FAIL_MDS_REINT_OPEN2 0x16a + for fail_loc in "0x80000169" "0x8000016a"; do + echo "Begin 100 tests with fail_loc=$fail_loc" + printf "Progress: " + for i in {1..100}; do + printf "*" + msg=$(sub_test_41i "$fail_loc") || + { echo; error "iter=$i : $msg"; } + done + echo + done +} +run_test 41i "reint_open: create vs create" + + # test 42: unlink and blocking operations test_42a() { pdo_lru_clear @@ -1976,20 +2036,20 @@ test_42f() { run_test 42f "pdirops: mkdir and rename (src) ==============" test_42g() { + mkdir_on_mdt0 $DIR1/$tdir pdo_lru_clear #define OBD_FAIL_ONCE|OBD_FAIL_MDS_PDO_LOCK 0x145 do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0x80000145 2>/dev/null || true" - mkdir $DIR1/$tfile & + mkdir $DIR1/$tdir/$tfile & PID1=$! ; pdo_sched - stat $DIR2/$tfile > /dev/null & + stat $DIR2/$tdir/$tfile > /dev/null & PID2=$! ; pdo_sched do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0 2>/dev/null || true" check_pdo_conflict $PID1 && { wait $PID1; error "getattr isn't blocked"; } wait $PID2 ; [ $? -eq 0 ] || error "stat must succeed" - rm -rf $DIR/$tfile* - return 0 + rm -rf $DIR/$tdir } run_test 42g "pdirops: mkdir vs getattr ==============" @@ -2181,24 +2241,81 @@ test_43j() { [[ $MDS1_VERSION -lt $(version_code 2.13.52) ]] && skip "Need MDS version newer than 2.13.52" + mkdir_on_mdt0 $DIR1/$tdir for i in {1..100}; do #define OBD_FAIL_ONCE|OBD_FAIL_MDS_CREATE_RACE 0x167 do_nodes $(comma_list $(mdts_nodes)) \ "lctl set_param -n fail_loc=0x80000167 2>/dev/null || true" OK=0 - mkdir $DIR1/$tdir & + mkdir $DIR1/$tdir/sub & PID1=$! - mkdir $DIR2/$tdir && ((OK++)) + mkdir $DIR2/$tdir/sub && ((OK++)) wait $PID1 && ((OK++)) (( OK == 1 )) || error "exactly one mkdir should succeed" - rmdir $DIR1/$tdir || error "rmdir failed" + rmdir $DIR1/$tdir/sub || error "rmdir failed" done return 0 } run_test 43j "racy mkdir return EEXIST ==============" +sub_test_43k() { + local PID1 PID2 + local fail_loc="$1" + local ret=0 + + # We test in a separate directory to be able to unblock server thread in + # cfs_race() if LCK_PW is taken on the parent by mdt_reint_unlink. + test_mkdir $DIR2/$tdir + touch $DIR2/$tdir/$tfile + + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=${fail_loc} || true" &>/dev/null + echo content > $DIR1/$tdir/$tfile & PID1=$! + pdo_sched + multiop $DIR2/$tdir/$tfile u & PID2=$! + + wait $PID1 || + { ret=$?; \ + echo -n "overwriting $tfile should succeed (err=$ret); "; } + wait $PID2 || + { ret=$?; \ + echo -n "unlinking $tfile should succeed (err=$ret);"; } + + #Clean + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x0 || true" &>/dev/null + rm -rf $DIR/$tdir + + return $ret +} + +test_43k() { + [[ $MDS1_VERSION -le $(version_code 2.13.56) ]] || + skip "Need MDS version newer than 2.13.56" + local msg fail_loc + +#define OBD_FAIL_ONCE|OBD_FAIL_MDS_REINT_OPEN 0x169 +#define OBD_FAIL_ONCE|OBD_FAIL_MDS_REINT_OPEN2 0x16a + for fail_loc in "0x80000169" "0x8000016a"; do + echo "Begin 100 tests with fail_loc=$fail_loc" + printf "Progress: " + for i in {1..100}; do + printf "*" + msg=$(sub_test_43k "$fail_loc") || + { echo; error "iter=$i : $msg"; } + done + echo + done + + #Clean + reset_fail_loc + + return 0 +} +run_test 43k "unlink vs create" + # test 44: rename tgt and blocking operations test_44a() { pdo_lru_clear @@ -2546,6 +2663,60 @@ test_45i() { } run_test 45i "pdirops: rename src vs remote mkdir" +sub_test_45j() { + local PID1 PID2 + local fail_loc="$1" + local ret=0 + + # We test in a sparate directory to be able to unblock server thread in + # cfs_race if LCK_PW is taken on the parent by mdt_reint_rename. + test_mkdir $DIR2/$tdir + echo file1 > $DIR2/$tdir/$tfile + echo file2 > $DIR2/$tdir/$tfile-2 + + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=${fail_loc} || true" &>/dev/null + + cat $DIR1/$tdir/$tfile >/dev/null & + PID1=$! + pdo_sched + mrename $DIR2/$tdir/$tfile-2 $DIR2/$tdir/$tfile > /dev/null & + PID2=$! + + wait $PID1 || + { ret=$?; echo -n "cat $tfile should succeed (err=$ret); "; } + wait $PID2 || + { ret=$?; \ + echo -n "mrename $tfile-2 to $tfile failed (err=$ret);"; } + + #Clean + do_nodes $(comma_list $(mdts_nodes)) \ + "lctl set_param -n fail_loc=0x0 || true" &>/dev/null + rm -rf $DIR/$tdir + + return $ret +} + +test_45j() { + [[ $MDS1_VERSION -le $(version_code 2.13.56) ]] || + skip "Need MDS version newer than 2.13.56" + local msg fail_loc + +#define OBD_FAIL_ONCE|OBD_FAIL_MDS_REINT_OPEN 0x169 +#define OBD_FAIL_ONCE|OBD_FAIL_MDS_REINT_OPEN2 0x16a + for fail_loc in "0x80000169" "0x8000016a"; do + echo "Begin 100 tests with fail_loc=$fail_loc" + printf "Progress: " + for i in {1..100}; do + printf "*" + msg=$(sub_test_45j "$fail_loc") || + { echo; error "iter=$i : $msg"; } + done + echo + done +} +run_test 45j "read vs rename ==============" + # test 46: link and blocking operations test_46a() { pdo_lru_clear @@ -2913,29 +3084,32 @@ test_51a() { run_test 51a "layout lock: refresh layout should work" test_51b() { - [[ "$MDS1_VERSION" -ge $(version_code 2.3.59) ]] || + (( $MDS1_VERSION >= $(version_code 2.3.59) )) || skip "Need MDS version at least 2.3.59" local tmpfile=`mktemp` - # create an empty file - $MCREATE $DIR1/$tfile || error "mcreate $DIR1/$tfile failed" + $LFS setstripe -E 1m -S 1M -c 1 -E -1 -c 1 $DIR1/$tfile || + error "Create $DIR1/$tfile failed" + + dd if=/dev/zero of=$DIR1/$tfile bs=1k count=1 conv=notrunc || + error "dd $DIR1/$tfile failed" # delay glimpse so that layout has changed when glimpse finish #define OBD_FAIL_GLIMPSE_DELAY 0x1404 - $LCTL set_param fail_loc=0x1404 + $LCTL set_param fail_loc=0x1404 fail_val=4 stat -c %s $DIR2/$tfile |tee $tmpfile & local pid=$! - sleep 1 + sleep 0.2 - # create layout of testing file - dd if=/dev/zero of=$DIR1/$tfile bs=1k count=1 conv=notrunc >/dev/null || + # extend layout of testing file + dd if=/dev/zero of=$DIR1/$tfile bs=1M count=1 seek=2 conv=notrunc || error "dd $DIR1/$tfile failed" wait $pid local fsize=$(cat $tmpfile) - [ x$fsize = x1024 ] || error "file size is $fsize, should be 1024" + [ x$fsize = x3145728 ] || error "file size is $fsize, should be 3145728" rm -f $DIR1/$tfile $tmpfile } @@ -3002,6 +3176,30 @@ test_51d() { } run_test 51d "layout lock: losing layout lock should clean up memory map region" +test_51e() { + (( $MDS1_VERSION >= $(version_code 2.13.54.148) )) || + skip "MDS version must be at least 2.13.54.148" + + local pid + + $MULTIOP $DIR/$tfile oO_CREAT:O_RDWR:eW_E+eUc & + pid=$! + sleep 1 + + $LFS getstripe $DIR2/$tfile + kill -USR1 $pid + wait $pid || error "multiop failed" + + $MULTIOP $DIR/$tfile oO_RDONLY:eR_E+eUc & + pid=$! + sleep 1 + + $LFS getstripe $DIR2/$tfile + kill -USR1 $pid + wait $pid || error "multiop failed" +} +run_test 51e "lfs getstripe does not break leases, part 2" + test_54_part1() { echo "==> rename vs getattr vs setxattr should not deadlock" @@ -3071,56 +3269,59 @@ test_54() { run_test 54 "rename locking" test_55a() { - mkdir -p $DIR/d1/d2 $DIR/d3 || error "(1) mkdir failed" + mkdir_on_mdt0 $DIR/$tdir + mkdir -p $DIR/$tdir/d1/d2 $DIR/$tdir/d3 || error "(1) mkdir failed" #define OBD_FAIL_MDS_RENAME4 0x156 do_facet mds1 $LCTL set_param fail_loc=0x80000156 - mv -T $DIR/d1/d2 $DIR/d3/d2 & + mv -T $DIR/$tdir/d1/d2 $DIR/$tdir/d3/d2 & PID1=$! sleep 1 - rm -r $DIR2/d3 + rm -r $DIR2/$tdir/d3 wait $PID1 && error "(2) mv succeeded" - rm -rf $DIR/d1 + rm -rf $DIR/$tdir } run_test 55a "rename vs unlink target dir" test_55b() { - mkdir -p $DIR/d1/d2 $DIR/d3 || error "(1) mkdir failed" + mkdir_on_mdt0 $DIR/$tdir + mkdir -p $DIR/$tdir/d1/d2 $DIR/$tdir/d3 || error "(1) mkdir failed" #define OBD_FAIL_MDS_RENAME4 0x156 do_facet mds1 $LCTL set_param fail_loc=0x80000156 - mv -T $DIR/d1/d2 $DIR/d3/d2 & + mv -T $DIR/$tdir/d1/d2 $DIR/$tdir/d3/d2 & PID1=$! sleep 1 - rm -r $DIR2/d1 + rm -r $DIR2/$tdir/d1 wait $PID1 && error "(2) mv succeeded" - rm -rf $DIR/d3 + rm -rf $DIR/$tdir } run_test 55b "rename vs unlink source dir" test_55c() { - mkdir -p $DIR/d1/d2 $DIR/d3 || error "(1) mkdir failed" + mkdir_on_mdt0 $DIR/$tdir + mkdir -p $DIR/$tdir/d1/d2 $DIR/$tdir/d3 || error "(1) mkdir failed" #define OBD_FAIL_MDS_RENAME4 0x156 do_facet mds1 $LCTL set_param fail_loc=0x156 - mv -T $DIR/d1/d2 $DIR/d3/d2 & + mv -T $DIR/$tdir/d1/d2 $DIR/$tdir/d3/d2 & PID1=$! sleep 1 # while rename is sleeping, open and remove d3 - $MULTIOP $DIR2/d3 D_c & + $MULTIOP $DIR2/$tdir/d3 D_c & PID2=$! sleep 1 - rm -rf $DIR2/d3 + rm -rf $DIR2/$tdir/d3 sleep 5 # while rename is sleeping 2nd time, close d3 @@ -3129,28 +3330,30 @@ test_55c() wait $PID1 && error "(2) mv succeeded" - rm -rf $DIR/d1 + rm -rf $DIR/$tdir } run_test 55c "rename vs unlink orphan target dir" test_55d() { - touch $DIR/f1 + mkdir_on_mdt0 $DIR/$tdir + + touch $DIR/$tdir/f1 #define OBD_FAIL_MDS_RENAME3 0x155 do_facet mds1 $LCTL set_param fail_loc=0x155 - mv $DIR/f1 $DIR/$tdir & + mv $DIR/$tdir/f1 $DIR/$tdir/$tdir & PID1=$! sleep 2 # while rename is sleeping, create $tdir, but as a directory - mkdir -p $DIR2/$tdir || error "(1) mkdir failed" + mkdir -p $DIR2/$tdir/$tdir || error "(1) mkdir failed" # link in reverse locking order - ln $DIR2/f1 $DIR2/$tdir/ + ln $DIR2/$tdir/f1 $DIR2/$tdir/$tdir/ wait $PID1 && error "(2) mv succeeded" - rm -rf $DIR/f1 + rm -rf $DIR/$tdir } run_test 55d "rename file vs link" @@ -4214,6 +4417,77 @@ test_77n() { #LU-10802 } run_test 77n "check wildcard support for TBF JobID NRS policy" +test_77o() { + (( $OST1_VERSION > $(version_code 2.14.54) )) || + skip "need OST > 2.14.54" + + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_policies="tbf\ nid" + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="start\ name\ nid={192.168.*.*@tcp}\ rate=10000" + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="start\ name1\ nid={192.168.*.*@tcp}\ rate=10000" + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="change\ name1\ rank=name" + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="stop\ name" + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_policies="fifo" +} +run_test 77o "Changing rank should not panic" + +test_77q() { + local i + + (( $MDS1_VERSION > $(version_code 2.14.54) )) || + skip "need MDS >= 2.14.54" + + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_policies="tbf" + stack_trap "do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_policies=fifo" + + for i in {1..50}; do + local pid1 pid2 + + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="'start rule77q_1 uid={500}&gid={500} rate=100'" & + pid1=$! + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="'start rule77q_2 uid={1000}&gid={1000} rate=100'" & + pid2=$! + wait $pid1 || error "$i: Fail to start TBF rule 'rule77q_1'" + wait $pid2 || error "$i: Fail to start TBF rule 'rule77q_2'" + + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="'stop rule77q_1'" & + pid1=$! + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="'stop rule77q_2'" & + pid2=$! + wait $pid1 || error "$i: Fail to stop TBF rule 'rule77q_1'" + wait $pid2 || error "$i: Fail to stop TBF rule 'rule77q_2'" + done +} +run_test 77q "Parallel TBF rule definitions should not panic" + +test_77p() { + local c + local -a spec_chars=( + '@' '.' '~' '#' '/' '^' '%' '*' ';' ',' '?' '<' '>' ':' + '+' '=' ')' '(' '{' '}' '|' '[' ']' '!' '&' '\$' '\`' '\\') + + (( $MDS1_VERSION > $(version_code 2.14.54) )) || + skip "need MDS >= 2.14.54" + + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_policies="tbf" + stack_trap "do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_policies=fifo" + + # TBF rule name size is 16 bytes + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="start\ test_77p_overflo\ uid={500}\ rate=500" && + error "The length of tbf rule name is not checked" || true + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="start\ \ uid={500}\ rate=500" && + error "The server should not accept empty tbf rule name" || true + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="start\ test_77p_empty" && + error "The server should not accept 'start ' without an expression" || true + + # Test with special chars + for c in "${spec_chars[@]}"; do + do_facet mds1 $LCTL set_param mds.MDS.mdt.nrs_tbf_rule="'start test77p${c}spec uid={500} rate=500'" && + error "Special char '${c}' should not be accepted in a tbf rule name" || true + done + +} +run_test 77p "Check validity of rule names for TBF policies" + test_78() { #LU-6673 local rc @@ -4272,6 +4546,7 @@ test_80a() { local file local pid + mkdir_on_mdt0 $DIR1/$tdir mkdir -p $DIR1/$tdir/dir createmany -o $DIR1/$tdir/dir/f 10 || error "create files under remote dir failed $i" @@ -4485,6 +4760,52 @@ test_81b() { } run_test 81b "rename under striped directory doesn't deadlock" +test_81c() { + [ $MDSCOUNT -lt 4 ] && skip_env "needs >= 4 MDTs" + [ $MDS1_VERSION -lt $(version_code 2.13.52) ] && + skip "Need MDS version at least 2.13.52" + + # source is local, source parent is remote + $LFS mkdir -i 0 $DIR1/${tdir}_src || error "mkdir ${tdir}_src" + $LFS mkdir -i 1 $DIR1/${tdir}_tgt || error "mkdir ${tdir}_tgt" + $LFS mkdir -i 3 $DIR1/${tdir}_src/sub || error "mkdir sub" + $LFS mkdir -i 3 $DIR1/${tdir}_tgt/sub || error "mkdir sub" + stat $DIR2/${tdir}_src/sub || error "stat sub failed" + mv $DIR1/${tdir}_src/sub $DIR1/${tdir}_tgt/ || error "mv failed" + [ -f $DIR2/${tdir}_src/sub ] && error "sub should be gone" + rm -rf $DIR1/${tdir}_src $DIR1/${tdir}_tgt + + # source is remote, source parent is local + $LFS mkdir -i 3 $DIR1/${tdir}_src || error "mkdir ${tdir}_src" + $LFS mkdir -i 1 $DIR1/${tdir}_tgt || error "mkdir ${tdir}_tgt" + $LFS mkdir -i 0 $DIR1/${tdir}_src/sub || error "mkdir sub" + $LFS mkdir -i 3 $DIR1/${tdir}_tgt/sub || error "mkdir sub" + stat $DIR2/${tdir}_src/sub || error "stat sub failed" + mv $DIR1/${tdir}_src/sub $DIR1/${tdir}_tgt/ || error "mv failed" + [ -f $DIR2/${tdir}_src/sub ] && error "sub should be gone" + rm -rf $DIR1/${tdir}_src $DIR1/${tdir}_tgt + + # source and source parent are remote + $LFS mkdir -i 0 $DIR1/${tdir}_src || error "mkdir ${tdir}_src" + $LFS mkdir -i 1 $DIR1/${tdir}_tgt || error "mkdir ${tdir}_tgt" + mkdir $DIR1/${tdir}_src/sub || error "mkdir sub" + $LFS mkdir -i 3 $DIR1/${tdir}_tgt/sub || error "mkdir sub" + stat $DIR2/${tdir}_src/sub || error "stat sub failed" + mv $DIR1/${tdir}_src/sub $DIR1/${tdir}_tgt/ || error "mv failed" + [ -f $DIR2/${tdir}_src/sub ] && error "sub should be gone" + rm -rf $DIR1/${tdir}_src $DIR1/${tdir}_tgt + + # source and source parent are remote, and source is remote object + $LFS mkdir -i 0 $DIR1/${tdir}_src || error "mkdir ${tdir}_src" + $LFS mkdir -i 1 $DIR1/${tdir}_tgt || error "mkdir ${tdir}_tgt" + $LFS mkdir -i 2 $DIR1/${tdir}_src/sub || error "mkdir sub" + $LFS mkdir -i 3 $DIR1/${tdir}_tgt/sub || error "mkdir sub" + stat $DIR2/${tdir}_src/sub || error "stat sub failed" + mv $DIR1/${tdir}_src/sub $DIR1/${tdir}_tgt/ || error "mv failed" + [ -f $DIR2/${tdir}_src/sub ] && error "sub should be gone" || true +} +run_test 81c "rename revoke LOOKUP lock for remote object" + test_82() { [[ "$MDS1_VERSION" -gt $(version_code 2.6.91) ]] || skip "Need MDS version at least 2.6.92" @@ -4947,8 +5268,28 @@ test_102() { echo "Test file_handle syscalls" > $DIR/$tfile || error "write failed" check_fhandle_syscalls $DIR/$tfile $DIR2 || - error "check_fhandle_syscalls failed" - rm -f $DIR2/$tfile + error "check_fhandle_syscalls $tfile failed" + + # test this is working on DNE directories also + if (( MDSCOUNT > 1 MDS1_VERSION >= $(version_code 2.14.52) )); then + $LFS mkdir -i 1 $DIR/$tdir.remote + cancel_lru_locks mdc + check_fhandle_syscalls $DIR/$tdir.remote $DIR2 || + error "check_fhandle_syscalls $tdir.remote failed" + $LFS mkdir -c -1 $DIR/$tdir.remote/subdir + cancel_lru_locks mdc + check_fhandle_syscalls $DIR/$tdir.remote/subdir $DIR2 || + error "check_fhandle_syscalls $tdir.remote/subdir fail" + + $LFS mkdir -c -1 $DIR/$tdir.stripe + cancel_lru_locks mdc + check_fhandle_syscalls $DIR/$tdir.stripe $DIR2 || + error "check_fhandle_syscalls $tdir.stripe failed" + $LFS mkdir -c -1 $DIR/$tdir.stripe/subdir + cancel_lru_locks mdc + check_fhandle_syscalls $DIR/$tdir.stripe/subdir $DIR2 || + error "check_fhandle_syscalls $tdir.stripe/subdir fail" + fi } run_test 102 "Test open by handle of unlinked file" @@ -5033,7 +5374,7 @@ check_mdt_xtimes() local mdt_xtimes=($(get_mdt_xtimes $mdtdev)) echo "STAT a|m|ctime ${xtimes[*]}" - echo "MDT a|m|ctime ${xtimes[*]}" + echo "MDT a|m|ctime ${mdt_xtimes[*]}" [[ ${xtimes[0]} == ${mdt_xtimes[0]} ]] || error "$DIR/$tfile atime (${xtimes[0]}:${mdt_xtimes[0]}) diff" [[ ${xtimes[1]} == ${mdt_xtimes[1]} ]] || @@ -5111,6 +5452,359 @@ test_105() { } run_test 105 "Glimpse and lock cancel race" +test_106a() { + [ "$mds1_FSTYPE" == "ldiskfs" ] && statx_supported || + skip_env "Test only for ldiskfs and statx() supported" + + local btime + local mdt_btime + local output + local mdtdev=$(mdsdevname ${SINGLEMDS//mds/}) + + dd if=/dev/zero of=$DIR/$tfile bs=1k count=1 conv=notrunc + btime=$($STATX -c %W $DIR/$tfile) + output=$(do_facet mds1 "$DEBUGFS -c -R 'stat ROOT/$tfile' $mdtdev") + echo $output + ((mdt_btime=$(awk -F ':' /crtime/'{ print $2 }' <<< "$output"))) + [[ $btime == $mdt_btime ]] || + error "$DIR/$tfile btime ($btime:$mdt_btime) diff" + +} +run_test 106a "Verify the btime via statx()" + +test_106b() { + statx_supported || skip_env "statx() only test" + + local rpcs_before + local rpcs_after + + $LFS setstripe -c 1 $DIR/$tfile || error "$DIR/$tfile setstripe failed" + dd if=/dev/zero of=$DIR/$tfile bs=1k count=1 conv=notrunc + cancel_lru_locks $OSC + rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + $STATX $DIR/$tfile + rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + [ $rpcs_after -eq $((rpcs_before + 1)) ] || + error "$STATX should send 1 glimpse RPC to $OSC" + + cancel_lru_locks $OSC + rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + # %n: FILENAME; %i: STATX_INO; %A STATX_MODE; %h STATX_NLINK; + # %u: STATX_UID; %g: STATX_GID; %W STATX_BTIME; %X STATX_ATIME; + # %Z: STATX_CTIME + $STATX -c "%n %i %A %h %u %g %W %X %Z" $DIR/$tfile + rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + [ $rpcs_after -eq $rpcs_before ] || + error "$STATX should not send glimpse RPCs to $OSC" + + cancel_lru_locks $OSC + rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + $STATX --cached=always $DIR/$tfile + rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + [ $rpcs_after -eq $rpcs_before ] || + error "$STATX should not send glimpse RPCs to $OSC" + + cancel_lru_locks $OSC + rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + $STATX -c %Y $DIR/$tfile + rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + [ $rpcs_after -eq $((rpcs_before + 1)) ] || + error "$STATX -c %Y should send 1 glimpse RPC to $OSC" + + cancel_lru_locks $OSC + rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + $STATX -c %s $DIR/$tfile + rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + [ $rpcs_after -eq $((rpcs_before + 1)) ] || + error "$STATX -c %s should send 1 glimpse RPC to $OSC" + + cancel_lru_locks $OSC + rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + $STATX -c %b $DIR/$tfile + rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + [ $rpcs_after -eq $((rpcs_before + 1)) ] || + error "$STATX -c %b should send 1 glimpse RPC to $OSC" +} +run_test 106b "Glimpse RPCs test for statx" + +test_106c() { + statx_supported || skip_env "statx() only test" + + local mask + + touch $DIR/$tfile + # Mask supported in stx_attributes by Lustre is + # STATX_ATTR_IMMUTABLE(0x10) | STATX_ATTR_APPEND(0x20) : (0x30). + mask=$($STATX -c %p $DIR/$tfile) + [[ $mask == "30" ]] || + error "supported stx_attributes: got '$mask', expected '30'" + chattr +i $DIR/$tfile || error "chattr +i $DIR/$tfile failed" + mask=$($STATX -c %r $DIR/$tfile) + [[ $mask == "10" ]] || + error "got immutable flags '$mask', expected '10'" + chattr -i $DIR/$tfile || error "chattr -i $DIR/$tfile failed" + mask=$($STATX -c %r $DIR/$tfile) + [[ $mask == "0" ]] || error "got flags '$mask', expected '0'" + chattr +a $DIR/$tfile || error "chattr +a $DIR/$tfile failed" + mask=$($STATX -c %r $DIR/$tfile) + [[ $mask == "20" ]] || error "got flags '$mask', expected '20'" + chattr -a $DIR/$tfile || error "chattr -a $DIR/$tfile failed" + mask=$($STATX -c %r $DIR/$tfile) + [[ $mask == "0" ]] || error "got flags '$mask', expected '0'" + chattr +ia $DIR/$tfile || error "chattr +ia $DIR/$tfile failed" + mask=$($STATX -c %r $DIR/$tfile) + [[ $mask == "30" ]] || error "got flags '$mask', expected '30'" + chattr -ia $DIR/$tfile || error "chattr -ia $DIR/$tfile failed" + mask=$($STATX -c %r $DIR/$tfile) + [[ $mask == "0" ]] || error "got flags '$mask', expected '0'" +} +run_test 106c "Verify statx attributes mask" + +test_107a() { # LU-1031 + dd if=/dev/zero of=$DIR1/$tfile bs=1M count=10 + local gid1=14091995 + local gid2=16022000 + + $LFS getstripe $DIR1/$tfile + + multiop_bg_pause $DIR1/$tfile OG${gid1}_g${gid1}c || return 1 + local MULTIPID1=$! + multiop_bg_pause $DIR2/$tfile O_G${gid2}r10g${gid2}c || return 2 + local MULTIPID2=$! + kill -USR1 $MULTIPID2 + sleep 2 + if [[ $(ps h -o comm -p $MULTIPID2) == "" ]]; then + error "First grouplock does not block second one" + else + echo "First grouplock blocks second one" + fi + kill -USR1 $MULTIPID1 + wait $MULTIPID1 + wait $MULTIPID2 +} +run_test 107a "Basic grouplock conflict" + +test_107b() { + dd if=/dev/zero of=$DIR1/$tfile bs=1M count=10 + local gid1=14091995 + local gid2=16022000 + + $LFS getstripe $DIR1/$tfile + + multiop_bg_pause $DIR1/$tfile OG${gid1}_g${gid1}c || return 1 + local MULTIPID1=$! + multiop $DIR2/$tfile Or10c & + local MULTIPID2=$! + sleep 2 + + if [[ $(ps h -o comm -p $MULTIPID2) == "" ]]; then + error "Grouplock does not block IO" + else + echo "Grouplock blocks IO" + fi + + multiop $DIR2/$tfile OG${gid2}_g${gid2}c & + local MULTIPID3=$! + sleep 2 + if [[ $(ps h -o comm -p $MULTIPID3) == "" ]]; then + error "First grouplock does not block second one" + else + echo "First grouplock blocks second one" + fi + + kill -USR1 $MULTIPID1 + sleep 2 + + if [[ $(ps h -o comm -p $MULTIPID3) == "" ]]; then + error "Second grouplock thread disappeared" + fi + + if [[ $(ps h -o comm -p $MULTIPID2) == "" ]]; then + error "Second grouplock does not block IO" + else + echo "Second grouplock blocks IO" + fi + + kill -USR1 $MULTIPID3 + wait $MULTIPID1 + wait $MULTIPID2 + wait $MULTIPID3 +} +run_test 107b "Grouplock is added to the head of waiting list" + +test_108a() { + local offset + + $LFS setstripe -E 1M -c 1 -E -1 $DIR1/$tfile || + error "Create $DIR1/$tfile failed" + + dd if=/dev/zero of=$DIR1/$tfile bs=10000 count=1 || + error "dd $DIR1/$tfile failed" + offset=$(lseek_test -d 5000 $DIR2/$tfile) + [[ $offset == 5000 ]] || error "offset $offset != 5000" + + $TRUNCATE $DIR1/$tfile 2000 + offset=$(lseek_test -l 1000 $DIR2/$tfile) + [[ $offset == 2000 ]] || error "offset $offset != 2000" + + #define OBD_FAIL_OSC_DELAY_IO 0x414 + $LCTL set_param fail_val=4 fail_loc=0x80000414 + dd if=/dev/zero of=$DIR1/$tfile count=1 bs=8M conv=notrunc oflag=dsync & + local pid=$! + sleep 2 + + offset=$(lseek_test -l 8000 $DIR2/$tfile) + wait $pid + [[ $offset == 8388608 ]] || error "offset $offset != 8388608" +} +run_test 108a "lseek: parallel updates" + +# LU-14110 +test_109() { + local i + local pid1 pid2 + + ! local_mode || + skip "Clients need to be on different nodes than the servers" + + umount_client $MOUNT + umount_client $MOUNT2 + + echo "Starting race between client mount instances (50 iterations):" + for i in {1..50}; do + log "Iteration $i" + +#define OBD_FAIL_ONCE|OBD_FAIL_LLITE_RACE_MOUNT 0x80001417 + $LCTL set_param -n fail_loc=0x80001417 + + mount_client $MOUNT & pid1=$! + mount_client $MOUNT2 & pid2=$! + wait $pid1 || error "Mount $MOUNT fails with $?" + wait $pid2 || error "Mount $MOUNT2 fails with $?" + + umount_client $MOUNT & pid1=$! + umount_client $MOUNT2 & pid2=$! + wait $pid1 || error "Umount $MOUNT fails with $?" + wait $pid2 || error "Umount $MOUNT2 fails with $?" + + $LUSTRE_RMMOD || error "Fail to remove lustre modules" + load_modules + echo + done + + mount_client $MOUNT + mount_client $MOUNT2 +} + +run_test 109 "Race with several mount instances on 1 node" + +test_110() { + local before=$(date +%s) + local evict + + mkdir -p $DIR/$tdir + touch $DIR/$tdir/f1 + touch $DIR/$tfile + + #define OBD_FAIL_PTLRPC_RESEND_RACE 0x525 + do_facet mds1 lctl set_param fail_loc=0x525 fail_val=3 + + # disable last_xid logic by dropping link reply + ln $DIR/$tdir/f1 $DIR/$tdir/f2 & + sleep 1 + + #define OBD_FAIL_PTLRPC_ENQ_RESEND 0x534 + do_facet mds1 lctl set_param fail_loc=0x534 + + # RPC will race with its Resend and the Resend will sleep to let + # the original lock to get granted & cancelled. + # + # AST_SENT is set artificially, so an explicit conflict is not needed + # + # The woken up Resend gets a new lock, but client does not wait for it + stat $DIR/$tfile + sleep $TIMEOUT + do_facet mds1 lctl set_param fail_loc=0 fail_val=0 + + # Take a conflict to wait long enough to see the eviction + touch $DIR2/$tfile + + # let the client reconnect + client_reconnect + evict=$(do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state | + awk -F"[ [,]" '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }') + + [ -z "$evict" ] || [[ $evict -le $before ]] || + (do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state; + error "eviction happened: $evict before:$before") +} +run_test 110 "do not grant another lock on resend" + +test_111() { + [ $MDSCOUNT -ge 2 ] || skip "needs >= 2 MDTs" + [[ $(facet_active_host mds1) = $(facet_active_host mds2) ]] || + skip "MDT0 and MDT1 should be on the same node" + + mkdir $DIR1/$tdir + $LFS mkdir -i 0 $DIR1/$tdir/mdt0dir + $LFS mkdir -i 1 $DIR1/$tdir/mdt1dir + + mkdir $DIR1/$tdir/mdt0dir/foodir + touch $DIR1/$tdir/mdt0dir/foodir/{file1,file2} + + $MULTIOP $DIR2/$tdir/mdt0dir/foodir/file2 Ow4096_c & + MULTIOP_PID=$! + ln $DIR1/$tdir/mdt0dir/foodir/file2 $DIR1/$tdir/mdt1dir/file2 + + #define OBD_FAIL_MDS_LINK_RENAME_RACE 0x18a + do_facet mds1 $LCTL set_param fail_loc=0x8000018a + + ln $DIR1/$tdir/mdt0dir/foodir/file2 $DIR1/$tdir/mdt1dir/file2x & + sleep 1 + + rm $DIR2/$tdir/mdt1dir/file2 + sleep 1 + + mv $DIR2/$tdir/mdt0dir/foodir/file1 $DIR2/$tdir/mdt0dir/foodir/file2 + sleep 1 + + kill $MULTIOP_PID + wait + rm -r $DIR1/$tdir || error "Removing test dir failed" +} +run_test 111 "A racy rename/link an open file should not cause fs corruption" + +test_112() { + (( MDSCOUNT >= 2 )) || + skip "We need at least 2 MDTs for this test" + + (( MDS1_VERSION >= $(version_code 2.14.54) )) || + skip "Need server version at least 2.14.54" + + local rr + local count + + rr=$($LCTL get_param -n lmv.*.qos_threshold_rr | head -n1) + rr=${rr%%%} + stack_trap "$LCTL set_param lmv.*.qos_threshold_rr=$rr > /dev/null" + + mkdir -p $DIR1/$tdir/s1/s2 || error "mkdir s2 failed" + $LFS mkdir -i 0 $DIR1/$tdir/s1/s2/s3 || error "mkdir s3 failed" + $LFS setdirstripe -D -i -1 --max-inherit-rr=0 $DIR1/$tdir/s1/s2/s3 || + error "setdirstripe s3 failed" + $LCTL set_param lmv.*.qos_threshold_rr=90 + mkdir $DIR2/$tdir/s1/s2/s3/d{1..64} + count=$($LFS getstripe -m $DIR2/$tdir/s1/s2/s3/d* | grep ^0 | wc -l) + (( count == 64 )) || error "only $count subdirs created on MDT0" + + $LFS setdirstripe -D -i -1 --max-inherit-rr=3 $DIR1/$tdir/s1/s2/s3 || + error "setdirstripe s3 failed" + mkdir $DIR2/$tdir/s1/s2/s3/s{1..64} + count=$($LFS getstripe -m $DIR2/$tdir/s1/s2/s3/s* | grep ^0 | wc -l) + (( count == 64 / MDSCOUNT )) || error "$count subdirs created on MDT0" +} +run_test 112 "update max-inherit in default LMV" + log "cleanup: ======================================================" # kill and wait in each test only guarentee script finish, but command in script