X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fsanity.sh;h=c7b40bf61f3723a84f35c6a5c44b4694240fc4e1;hp=62a7f8940debc532f49ef80d83e726896e347057;hb=79c8abecdac052e3e00251547cc500f2cba742ab;hpb=66172e3274ca3187edd519dd87a81a478a6d7259 diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 62a7f89..c7b40bf 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -41,13 +41,8 @@ init_logging ALWAYS_EXCEPT="$SANITY_EXCEPT " # bug number for skipped test: LU-9693 LU-6493 LU-9693 ALWAYS_EXCEPT+=" 42a 42b 42c " -# bug number: LU-8411 LU-9054 LU-13314 -ALWAYS_EXCEPT+=" 407 312 56ob" - -if $SHARED_KEY; then - # bug number: LU-9795 LU-9795 LU-9795 LU-9795 - ALWAYS_EXCEPT+=" 17n 60a 133g 300f" -fi +# bug number: LU-8411 LU-9054 +ALWAYS_EXCEPT+=" 407 312" selinux_status=$(getenforce) if [ "$selinux_status" != "Disabled" ]; then @@ -61,10 +56,12 @@ if [[ $(uname -m) = aarch64 ]]; then ALWAYS_EXCEPT+=" $GRANT_CHECK_LIST" # bug number: LU-11671 LU-11667 ALWAYS_EXCEPT+=" 45 317" + # bug number: LU-14067 LU-14067 + ALWAYS_EXCEPT+=" 400a 400b" fi -# skip nfs tests on kernels >= 4.14.0 until they are fixed -if [ $LINUX_VERSION_CODE -ge $(version_code 4.14.0) ]; then +# skip nfs tests on kernels >= 4.12.0 until they are fixed +if [ $LINUX_VERSION_CODE -ge $(version_code 4.12.0) ]; then # bug number: LU-12661 ALWAYS_EXCEPT+=" 817" fi @@ -144,6 +141,13 @@ check_swap_layouts_support() skip "Does not support layout lock." } +check_swap_layout_no_dom() +{ + local FOLDER=$1 + local SUPP=$(lfs getstripe $FOLDER | grep "pattern: mdt" | wc -l) + [ $SUPP -eq 0 ] || skip "layout swap does not support DOM files so far" +} + check_and_setup_lustre DIR=${DIR:-$MOUNT} assert_DIR @@ -1436,6 +1440,23 @@ test_24F () { } run_test 24F "hash order vs readdir (LU-11330)" +test_24G () { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" + + local ino1 + local ino2 + + $LFS mkdir -i 0 $DIR/$tdir-0 || error "mkdir $tdir-0" + $LFS mkdir -i 1 $DIR/$tdir-1 || error "mkdir $tdir-1" + touch $DIR/$tdir-0/f1 || error "touch f1" + ln -s $DIR/$tdir-0/f1 $DIR/$tdir-0/s1 || error "ln s1" + ino1=$(stat -c%i $DIR/$tdir-0/s1) + mv $DIR/$tdir-0/s1 $DIR/$tdir-1 || error "mv s1" + ino2=$(stat -c%i $DIR/$tdir-1/s1) + [ $ino1 -ne $ino2 ] || error "s1 should be migrated" +} +run_test 24G "migrate symlink in rename" + test_25a() { echo '== symlink sanity =============================================' @@ -1780,17 +1801,24 @@ run_test 27m "create file while OST0 was full" # OSCs keep a NOSPC flag that will be reset after ~5s (qos_maxage) # if the OST isn't full anymore. reset_enospc() { - local OSTIDX=${1:-""} + local ostidx=${1:-""} + local delay + local ready + local get_prealloc local list=$(comma_list $(osts_nodes)) - [ "$OSTIDX" ] && list=$(facet_host ost$((OSTIDX + 1))) + [ "$ostidx" ] && list=$(facet_host ost$((ostidx + 1))) do_nodes $list lctl set_param fail_loc=0 - sync # initiate all OST_DESTROYs from MDS to OST - sleep_maxage + wait_delete_completed # initiate all OST_DESTROYs from MDS to OST + delay=$(do_facet $SINGLEMDS lctl get_param -n lov.*.qos_maxage | + awk '{print $1 * 2;exit;}') + get_prealloc="$LCTL get_param -n osc.*MDT*.prealloc_status | + grep -v \"^0$\"" + wait_update_facet $SINGLEMDS "$get_prealloc" "" $delay } -exhaust_precreations() { +__exhaust_precreations() { local OSTIDX=$1 local FAILLOC=$2 local FAILIDX=${3:-$OSTIDX} @@ -1821,14 +1849,19 @@ exhaust_precreations() { createmany -o $DIR/$tdir/${OST}/f $next_id $((last_id - next_id + 2)) do_facet $mfacet lctl get_param osp.$mdtosc_proc2.prealloc* do_facet $ofacet lctl set_param fail_loc=$FAILLOC +} + +exhaust_precreations() { + __exhaust_precreations $1 $2 $3 sleep_maxage } exhaust_all_precreations() { local i for (( i=0; i < OSTCOUNT; i++ )) ; do - exhaust_precreations $i $1 -1 + __exhaust_precreations $i $1 -1 done + sleep_maxage } test_27n() { @@ -2600,6 +2633,7 @@ test_27G() { #LU-10629 local ostrange="0 0 1" test_mkdir $DIR/$tdir + touch $DIR/$tdir/$tfile.nopool pool_add $POOL || error "pool_add failed" pool_add_targets $POOL $ostrange || error "pool_add_targets failed" $LFS setstripe -p $POOL $DIR/$tdir @@ -2607,14 +2641,18 @@ test_27G() { #LU-10629 local pool=$($LFS getstripe -p $DIR/$tdir) [ "$pool" = "$POOL" ] || error "Striping failed got '$pool' not '$POOL'" + touch $DIR/$tdir/$tfile.default + $LFS setstripe -E 1M --pool $POOL -c 1 -E eof -c 1 $DIR/$tdir/$tfile.pfl + $LFS find $DIR/$tdir -type f --pool $POOL + local found=$($LFS find $DIR/$tdir -type f --pool $POOL | wc -l) + [[ "$found" == "2" ]] || + error "found $found != 2 files in '$DIR/$tdir' in '$POOL'" $LFS setstripe -d $DIR/$tdir - pool=$($LFS getstripe -p $DIR/$tdir) + pool=$($LFS getstripe -p -d $DIR/$tdir) - rmdir $DIR/$tdir - - [ -z "$pool" ] || error "'$pool' is not empty" + [[ "$pool" != "$POOL" ]] || error "$DIR/$tdir is still '$pool'" } run_test 27G "Clear OST pool from stripe" @@ -3075,7 +3113,7 @@ run_test 30b "execute binary from Lustre as non-root ===========" test_30c() { # b=22376 [ $PARALLEL == "yes" ] && skip "skip parallel run" - cp `which ls` $DIR || cp /bin/ls $DIR + cp $(which ls) $DIR || cp /bin/ls $DIR chmod a-rw $DIR/ls cancel_lru_locks mdc cancel_lru_locks osc @@ -3084,6 +3122,22 @@ test_30c() { # b=22376 } run_test 30c "execute binary from Lustre without read perms ====" +test_30d() { + cp $(which dd) $DIR || error "failed to copy dd to $DIR/dd" + + for i in {1..10}; do + $DIR/dd bs=1M count=128 if=/dev/zero of=$DIR/$tfile & + local PID=$! + sleep 1 + $LCTL set_param ldlm.namespaces.*MDT*.lru_size=clear + wait $PID || error "executing dd from Lustre failed" + rm -f $DIR/$tfile + done + + rm -f $DIR/dd +} +run_test 30d "execute binary from Lustre while clear locks" + test_31a() { $OPENUNLINK $DIR/f31 $DIR/f31 || error "openunlink failed" $CHECKSTAT -a $DIR/f31 || error "$DIR/f31 exists" @@ -3293,6 +3347,21 @@ test_31p() { } run_test 31p "remove of open striped directory" +test_31q() { + [ $MDSCOUNT -lt 3 ] && skip_env "needs >= 3 MDTs" + + $LFS mkdir -i 3,1 $DIR/$tdir || error "mkdir failed" + index=$($LFS getdirstripe -i $DIR/$tdir) + [ $index -eq 3 ] || error "first stripe index $index != 3" + index=$($LFS getdirstripe $DIR/$tdir | tail -1 | awk '{print $1}') + [ $index -eq 1 ] || error "second stripe index $index != 1" + + # when "-c " is set, the number of MDTs specified after + # "-i" should equal to the stripe count + $LFS mkdir -i 3,1 -c 3 $DIR/$tdir.2 && error "mkdir should fail" || true +} +run_test 31q "create striped directory on specific MDTs" + cleanup_test32_mount() { local rc=0 trap 0 @@ -3794,7 +3863,7 @@ test_33h() { done local failed=0 - for i in {1..50}; do + for i in {1..250}; do for fname in $(mktemp -u $DIR/$tdir/.$tfile.XXXXXX) \ $(mktemp $DIR/$tdir/$tfile.XXXXXXXX); do touch $fname || error "touch $fname failed" @@ -3806,7 +3875,7 @@ test_33h() { done done echo "$failed MDT index mismatches" - (( failed < 4 )) || error "MDT index mismatch $failed times" + (( failed < 20 )) || error "MDT index mismatch $failed times" } run_test 33h "temp file is located on the same MDT as target" @@ -4840,6 +4909,8 @@ test_43A() { # was test_43 $DIR/$tdir/$tfile && error "execute $DIR/$tdir/$tfile succeeded" || true kill -USR1 $pid + # Wait for multiop to exit + wait $pid } run_test 43A "execution of file opened for write should return -ETXTBSY" @@ -5089,6 +5160,27 @@ test_48e() { # bug 4134 } run_test 48e "Access to recreated parent subdir (should return errors)" +test_48f() { + [[ $MDS1_VERSION -ge $(version_code 2.13.55) ]] || + skip "need MDS >= 2.13.55" + [[ $MDSCOUNT -ge 2 ]] || skip "needs >= 2 MDTs" + [[ "$(facet_host mds1)" != "$(facet_host mds2)" ]] || + skip "needs different host for mdt1 mdt2" + [[ $(facet_fstype mds1) == ldiskfs ]] || skip "ldiskfs only" + + $LFS mkdir -i0 $DIR/$tdir + $LFS mkdir -i 1 $DIR/$tdir/sub1 $DIR/$tdir/sub2 $DIR/$tdir/sub3 + + for d in sub1 sub2 sub3; do + #define OBD_FAIL_OSD_REF_DEL 0x19c + do_facet mds1 $LCTL set_param fail_loc=0x8000019c + rm -rf $DIR/$tdir/$d && error "rm $d should fail" + done + + rm -d --interactive=never $DIR/$tdir || error "rm $tdir fail" +} +run_test 48f "non-zero nlink dir unlink won't LBUG()" + test_49() { # LU-1030 [ $PARALLEL == "yes" ] && skip "skip parallel run" remote_ost_nodsh && skip "remote OST with nodsh" @@ -5608,16 +5700,24 @@ test_56c() { local ost_idx=0 local ost_name=$(ostname_from_index $ost_idx) local old_status=$(ost_dev_status $ost_idx) + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" [[ -z "$old_status" ]] || skip_env "OST $ost_name is in $old_status status" do_facet ost1 $LCTL set_param -n obdfilter.$ost_name.degraded=1 - [[ $OST1_VERSION -ge $(version_code 2.12.55) ]] && do_facet ost1 \ + [[ $OST1_VERSION -lt $(version_code 2.12.55) ]] || do_facet ost1 \ $LCTL set_param -n obdfilter.$ost_name.no_precreate=1 + if [[ $OST1_VERSION -ge $(version_code 2.12.57) ]]; then + save_lustre_params ost1 osd-*.$ost_name.nonrotational > $p + do_facet ost1 $LCTL set_param -n osd-*.$ost_name.nonrotational=1 + fi + + [[ $($LFS df -v $MOUNT |& grep -c "inactive device") -eq 0 ]] || + error "$LFS df -v showing inactive devices" sleep_maxage - local new_status=$(ost_dev_status $ost_idx) + local new_status=$(ost_dev_status $ost_idx $MOUNT -v) [[ "$new_status" =~ "D" ]] || error "$ost_name status is '$new_status', missing 'D'" @@ -5625,18 +5725,37 @@ test_56c() { [[ "$new_status" =~ "N" ]] || error "$ost_name status is '$new_status', missing 'N'" fi + if [[ $OST1_VERSION -ge $(version_code 2.12.57) ]]; then + [[ "$new_status" =~ "f" ]] || + error "$ost_name status is '$new_status', missing 'f'" + fi do_facet ost1 $LCTL set_param -n obdfilter.$ost_name.degraded=0 - [[ $OST1_VERSION -ge $(version_code 2.12.55) ]] && do_facet ost1 \ + [[ $OST1_VERSION -lt $(version_code 2.12.55) ]] || do_facet ost1 \ $LCTL set_param -n obdfilter.$ost_name.no_precreate=0 + [[ -z "$p" ]] && restore_lustre_params < $p || true sleep_maxage new_status=$(ost_dev_status $ost_idx) [[ ! "$new_status" =~ "D" && ! "$new_status" =~ "N" ]] || error "$ost_name status is '$new_status', has 'D' and/or 'N'" + # can't check 'f' as devices may actually be on flash } run_test 56c "check 'lfs df' showing device status" +test_56d() { + local mdts=$($LFS df -v $MOUNT | grep -c MDT) + local osts=$($LFS df -v $MOUNT | grep -c OST) + + $LFS df $MOUNT + + (( mdts == MDSCOUNT )) || + error "lfs df -v showed $mdts MDTs, not $MDSCOUNT" + (( osts == OSTCOUNT )) || + error "lfs df -v showed $osts OSTs, not $OSTCOUNT" +} +run_test 56d "'lfs df -v' prints only configured devices" + NUMFILES=3 NUMDIRS=3 setup_56() { @@ -5852,7 +5971,7 @@ test_56ob() { cmd="$LFS find $dir -ctime +1s -type f" nums=$($cmd | wc -l) (( $nums == $count * 2 + 1)) || - error "'$cmd' wrong: found $nums, expected $((expected*2+1))" + error "'$cmd' wrong: found $nums, expected $((count * 2 + 1))" } run_test 56ob "check lfs find -atime -mtime -ctime with units" @@ -5864,37 +5983,48 @@ test_newerXY_base() { local negref if [ $y == "t" ]; then - ref="\"$(date +"%Y-%m-%d %H:%M:%S")\"" + if [ $x == "b" ]; then + ref="\"$(do_facet mds1 date +"%Y-%m-%d\ %H:%M:%S")\"" + else + ref="\"$(date +"%Y-%m-%d %H:%M:%S")\"" + fi else - ref=$DIR/$tfile.newer + ref=$DIR/$tfile.newer.$x$y touch $ref || error "touch $ref failed" fi + + echo "before = $ref" sleep 2 setup_56 $dir $NUMFILES $NUMDIRS "-i0 -c1" "-i0 -c1" sleep 2 if [ $y == "t" ]; then - negref="\"$(date +"%Y-%m-%d %H:%M:%S")\"" + if [ $x == "b" ]; then + negref="\"$(do_facet mds1 date +"%Y-%m-%d\ %H:%M:%S")\"" + else + negref="\"$(date +"%Y-%m-%d %H:%M:%S")\"" + fi else - negref=$DIR/$tfile.newerneg + negref=$DIR/$tfile.negnewer.$x$y touch $negref || error "touch $negref failed" fi + echo "after = $negref" local cmd="$LFS find $dir -newer$x$y $ref" local nums=$(eval $cmd | wc -l) local expected=$(((NUMFILES + 2) * NUMDIRS + 1)) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" + [ $nums -eq $expected ] || { ls -lauR --full-time $dir ; + error "'$cmd' wrong: found $nums newer, expected $expected" ; } cmd="$LFS find $dir ! -newer$x$y $negref" nums=$(eval $cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" + [ $nums -eq $expected ] || { ls -lauR --full-time $dir ; + error "'$cmd' wrong: found $nums older, expected $expected" ; } cmd="$LFS find $dir -newer$x$y $ref ! -newer$x$y $negref" nums=$(eval $cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" + [ $nums -eq $expected ] || { ls -lauR --full-time $dir ; + error "'$cmd' wrong: found $nums between, expected $expected"; } rm -rf $DIR/* } @@ -5909,12 +6039,105 @@ test_56oc() { test_newerXY_base "c" "a" test_newerXY_base "c" "m" test_newerXY_base "c" "c" + + [[ -n "$sles_version" ]] && + echo "skip timestamp tests on SLES, LU-13665" && return 0 + test_newerXY_base "a" "t" test_newerXY_base "m" "t" test_newerXY_base "c" "t" + + [[ $MDS1_VERSION -lt $(version_code 2.13.54) || + $CLIENT_VERSION -lt $(version_code 2.13.54) ]] && + ! btime_supported && echo "btime unsupported" && return 0 + + test_newerXY_base "b" "b" + test_newerXY_base "b" "t" } run_test 56oc "check lfs find -newerXY work" +btime_supported() { + local dir=$DIR/$tdir + local rc + + mkdir -p $dir + touch $dir/$tfile + $LFS find $dir -btime -1d -type f + rc=$? + rm -rf $dir + return $rc +} + +test_56od() { + [ $MDS1_VERSION -lt $(version_code 2.13.53) ] && + ! btime_supported && skip "btime unsupported on MDS" + + [ $CLIENT_VERSION -lt $(version_code 2.13.53) ] && + ! btime_supported && skip "btime unsupported on clients" + + local dir=$DIR/$tdir + local ref=$DIR/$tfile.ref + local negref=$DIR/$tfile.negref + + mkdir $dir || error "mkdir $dir failed" + touch $dir/$tfile.n1 || error "touch $dir/$tfile.n1 failed" + touch $dir/$tfile.n2 || error "touch $dir/$tfile.n2 failed" + mkdir $dir/$tdir.n1 || error "mkdir $dir/$tdir.n1 failed" + mkdir $dir/$tdir.n2 || error "mkdir $dir/$tdir.n2 failed" + touch $ref || error "touch $ref failed" + # sleep 3 seconds at least + sleep 3 + + local before=$(do_facet mds1 date +%s) + local skew=$(($(date +%s) - before + 1)) + + if (( skew < 0 && skew > -5 )); then + sleep $((0 - skew + 1)) + skew=0 + fi + + # Set the dir stripe params to limit files all on MDT0, + # otherwise we need to calc the max clock skew between + # the client and MDTs. + setup_56 $dir/d.btime $NUMFILES $NUMDIRS "-i0 -c1" "-i0 -c1" + sleep 2 + touch $negref || error "touch $negref failed" + + local cmd="$LFS find $dir -newerbb $ref ! -newerbb $negref -type f" + local nums=$($cmd | wc -l) + local expected=$(((NUMFILES + 1) * NUMDIRS)) + + [ $nums -eq $expected ] || + error "'$cmd' wrong: found $nums, expected $expected" + + cmd="$LFS find $dir -newerbb $ref ! -newerbb $negref -type d" + nums=$($cmd | wc -l) + expected=$((NUMFILES + 1)) + [ $nums -eq $expected ] || + error "'$cmd' wrong: found $nums, expected $expected" + + [ $skew -lt 0 ] && return + + local after=$(do_facet mds1 date +%s) + local age=$((after - before + 1 + skew)) + + cmd="$LFS find $dir -btime -${age}s -type f" + nums=$($cmd | wc -l) + expected=$(((NUMFILES + 1) * NUMDIRS)) + + echo "Clock skew between client and server: $skew, age:$age" + [ $nums -eq $expected ] || + error "'$cmd' wrong: found $nums, expected $expected" + + expected=$(($NUMDIRS + 1)) + cmd="$LFS find $dir -btime -${age}s -type d" + nums=$($cmd | wc -l) + [ $nums -eq $expected ] || + error "'$cmd' wrong: found $nums, expected $expected" + rm -f $ref $negref || error "Failed to remove $ref $negref" +} +run_test 56od "check lfs find -btime with units" + test_56p() { [ $RUNAS_ID -eq $UID ] && skip_env "RUNAS_ID = UID = $UID -- skipping" @@ -6043,154 +6266,72 @@ test_56r() { } run_test 56r "check lfs find -size works" -test_56ra() { - [[ $MDS1_VERSION -gt $(version_code 2.12.58) ]] || - skip "MDS < 2.12.58 doesn't return LSOM data" - local dir=$DIR/$tdir - - [[ $OSC == "mdc" ]] && skip "DoM files" && return - - setup_56 $dir $NUMFILES $NUMDIRS "-c 1" +test_56ra_sub() { + local expected=$1 + local glimpses=$2 + local cmd="$3" cancel_lru_locks $OSC local rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - local expected=12 - local cmd="$LFS find -size 0 -type f -lazy $dir" local nums=$($cmd | wc -l) [ $nums -eq $expected ] || error "'$cmd' wrong: found $nums, expected $expected" local rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - [ $rpcs_before -eq $rpcs_after ] || - error "'$cmd' should not send glimpse RPCs to OST" - cmd="$LFS find -size 0 -type f $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - echo "Before: $rpcs_before After: $rpcs_after $NUMFILES" - $LCTL get_param osc.*.stats - [ $rpcs_after -eq $((rpcs_before + 12)) ] || - error "'$cmd' should send 12 glimpse RPCs to OST" - cancel_lru_locks $OSC - rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - expected=0 - cmd="$LFS find ! -size 0 -type f -lazy $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - $LCTL get_param mdc.*.stats - [ $rpcs_before -eq $rpcs_after ] || - error "'$cmd' should not send glimpse RPCs to OST" - cmd="$LFS find ! -size 0 -type f $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - echo "Before: $rpcs_before After: $rpcs_after $NUMFILES" - [ $rpcs_after -eq $((rpcs_before + 12)) ] || - error "'$cmd' should send 12 glimpse RPCs to OST" + if (( rpcs_before + glimpses != rpcs_after )); then + echo "Before: $rpcs_before After: $rpcs_after $NUMFILES" + $LCTL get_param osc.*.stats | grep ldlm_glimpse_enqueue - echo "test" > $dir/$tfile - echo "test2" > $dir/$tfile.2 && sync - cancel_lru_locks $OSC - rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - expected=1 - cmd="$LFS find -size 5 -type f -lazy $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - [ $rpcs_before -eq $rpcs_after ] || - error "'$cmd' should not send glimpse RPCs to OST" - cmd="$LFS find -size 5 -type f $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - echo "Before: $rpcs_before After: $rpcs_after $NUMFILES" - [ $rpcs_after -eq $((rpcs_before + 14)) ] || - error "'$cmd' should send 14 glimpse RPCs to OST" + if [[ $glimpses == 0 ]]; then + error "'$cmd' should not send glimpse RPCs to OST" + else + error "'$cmd' should send $glimpses glimpse RPCs to OST" + fi + fi +} - cancel_lru_locks $OSC - rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - expected=1 - cmd="$LFS find -size +5 -type f -lazy $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - [ $rpcs_before -eq $rpcs_after ] || - error "'$cmd' should not send glimpse RPCs to OST" - cmd="$LFS find -size +5 -type f $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - echo "Before: $rpcs_before After: $rpcs_after $NUMFILES" - [ $rpcs_after -eq $((rpcs_before + 14)) ] || - error "'$cmd' should send 14 glimpse RPCs to OST" +test_56ra() { + [[ $MDS1_VERSION -gt $(version_code 2.12.58) ]] || + skip "MDS < 2.12.58 doesn't return LSOM data" + local dir=$DIR/$tdir + local old_agl=$($LCTL get_param -n llite.*.statahead_agl) - cancel_lru_locks $OSC - rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - expected=2 - cmd="$LFS find -size +0 -type f -lazy $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - [ $rpcs_before -eq $rpcs_after ] || - error "'$cmd' should not send glimpse RPCs to OST" - cmd="$LFS find -size +0 -type f $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - echo "Before: $rpcs_before After: $rpcs_after $NUMFILES" - [ $rpcs_after -eq $((rpcs_before + 14)) ] || - error "'$cmd' should send 14 glimpse RPCs to OST" + [[ $OSC == "mdc" ]] && skip "statahead not needed for DoM files" + + # statahead_agl may cause extra glimpse which confuses results. LU-13017 + $LCTL set_param -n llite.*.statahead_agl=0 + stack_trap "$LCTL set_param -n llite.*.statahead_agl=$old_agl" + setup_56 $dir $NUMFILES $NUMDIRS "-c 1" + # open and close all files to ensure LSOM is updated cancel_lru_locks $OSC - rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - expected=2 - cmd="$LFS find ! -size -5 -type f -lazy $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - [ $rpcs_before -eq $rpcs_after ] || - error "'$cmd' should not send glimpse RPCs to OST" - cmd="$LFS find ! -size -5 -type f $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - echo "Before: $rpcs_before After: $rpcs_after $NUMFILES" - [ $rpcs_after -eq $((rpcs_before + 14)) ] || - error "'$cmd' should send 14 glimpse RPCs to OST" + find $dir -type f | xargs cat > /dev/null + + # expect_found glimpse_rpcs command_to_run + test_56ra_sub 12 0 "$LFS find -size 0 -type f -lazy $dir" + test_56ra_sub 12 12 "$LFS find -size 0 -type f $dir" + test_56ra_sub 0 0 "$LFS find ! -size 0 -type f -lazy $dir" + test_56ra_sub 0 12 "$LFS find ! -size 0 -type f $dir" + echo "test" > $dir/$tfile + echo "test2" > $dir/$tfile.2 && sync cancel_lru_locks $OSC - rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - expected=12 - cmd="$LFS find -size -5 -type f -lazy $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - [ $rpcs_before -eq $rpcs_after ] || - error "'$cmd' should not send glimpse RPCs to OST" - cmd="$LFS find -size -5 -type f $dir" - nums=$($cmd | wc -l) - [ $nums -eq $expected ] || - error "'$cmd' wrong: found $nums, expected $expected" - rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) - echo "Before: $rpcs_before After: $rpcs_after $NUMFILES" - [ $rpcs_after -eq $((rpcs_before + 14)) ] || - error "'$cmd' should send 14 glimpse RPCs to OST" + cat $dir/$tfile $dir/$tfile.2 > /dev/null + + test_56ra_sub 1 0 "$LFS find -size 5 -type f -lazy $dir" + test_56ra_sub 1 14 "$LFS find -size 5 -type f $dir" + test_56ra_sub 1 0 "$LFS find -size +5 -type f -lazy $dir" + test_56ra_sub 1 14 "$LFS find -size +5 -type f $dir" + + test_56ra_sub 2 0 "$LFS find -size +0 -type f -lazy $dir" + test_56ra_sub 2 14 "$LFS find -size +0 -type f $dir" + test_56ra_sub 2 0 "$LFS find ! -size -5 -type f -lazy $dir" + test_56ra_sub 2 14 "$LFS find ! -size -5 -type f $dir" + test_56ra_sub 12 0 "$LFS find -size -5 -type f -lazy $dir" + test_56ra_sub 12 14 "$LFS find -size -5 -type f $dir" } run_test 56ra "check lfs find -size -lazy works for data on OSTs" @@ -6202,14 +6343,15 @@ test_56rb() { test_mkdir -p $dir || error "failed to mkdir $dir" $LFS setstripe -c 1 -i 0 $dir/$tfile || error "failed to setstripe $dir/$tfile" + mdt_idx=$($LFS getdirstripe -i $dir) dd if=/dev/zero of=$dir/$tfile bs=1M count=1 stack_trap "rm -f $tmp" EXIT - $LFS find --size +100K --ost 0 $dir 2>&1 | tee $tmp - [ -z "$(cat $tmp | grep "obd_uuid: ")" ] || + $LFS find --size +100K --ost 0 $dir |& tee $tmp + ! grep -q obd_uuid $tmp || error "failed to find --size +100K --ost 0 $dir" - $LFS find --size +100K --mdt $mdt_idx $dir 2>&1 | tee $tmp - [ -z "$(cat $tmp | grep "obd_uuid: ")" ] || + $LFS find --size +100K --mdt $mdt_idx $dir |& tee $tmp + ! grep -q obd_uuid $tmp || error "failed to find --size +100K --mdt $mdt_idx $dir" } run_test 56rb "check lfs find --size --ost/--mdt works" @@ -6735,6 +6877,24 @@ test_56wd() { } run_test 56wd "check lfs_migrate --rsync and --no-rsync work" +test_56we() { + local td=$DIR/$tdir + local tf=$td/$tfile + + test_mkdir $td || error "cannot create $td" + touch $tf || error "cannot touch $tf" + + echo -n "Make sure --non-direct|-D works..." + $LFS_MIGRATE -y --non-direct -v $tf 2>&1 | + grep -q "lfs migrate --non-direct" || + error "--non-direct option cannot work correctly" + $LFS_MIGRATE -y -D -v $tf 2>&1 | + grep -q "lfs migrate -D" || + error "-D option cannot work correctly" + echo "done." +} +run_test 56we "check lfs_migrate --non-direct|-D support" + test_56x() { [[ $OSTCOUNT -lt 2 ]] && skip_env "needs >= 2 OSTs" check_swap_layouts_support @@ -6908,7 +7068,7 @@ test_56xc() { error "cannot setstripe 20MB file" echo "done" echo -n "Sizing 20MB test file..." - truncate "$dir/20mb" 20971520 || error "cannot create 20MB test file" + $TRUNCATE "$dir/20mb" 20971520 || error "cannot create 20MB test file" echo "done" echo -n "Verifying small file autostripe count is 1..." $LFS_MIGRATE -y -A -C 1 "$dir/20mb" || @@ -6928,7 +7088,7 @@ test_56xc() { echo "done" echo -n "Sizing 1GB test file..." # File size is 1GB + 3KB - truncate "$dir/1gb" 1073744896 || error "cannot create 1GB test file" + $TRUNCATE "$dir/1gb" 1073744896 || error "cannot create 1GB test file" echo "done" # need at least 512MB per OST for 1GB file to fit in 2 stripes @@ -7039,6 +7199,46 @@ test_56xe() { } run_test 56xe "migrate a composite layout file" +test_56xf() { + [[ $OSTCOUNT -ge 2 ]] || skip_env "needs >= 2 OSTs" + + [[ $MDS1_VERSION -ge $(version_code 2.13.53) ]] || + skip "Need server version at least 2.13.53" + + local dir=$DIR/$tdir + local f_comp=$dir/$tfile + local layout="-E 1M -c1 -E -1 -c2" + local fid_before="" + local fid_after="" + + test_mkdir "$dir" || error "cannot create dir $dir" + $LFS setstripe $layout $f_comp || + error "cannot setstripe $f_comp with layout $layout" + fid_before=$($LFS getstripe --fid $f_comp) + dd if=/dev/zero of=$f_comp bs=1M count=4 + + # 1. migrate a comp layout file to a comp layout + $LFS migrate $f_comp || error "cannot migrate $f_comp by lfs migrate" + fid_after=$($LFS getstripe --fid $f_comp) + [ "$fid_before" == "$fid_after" ] || + error "comp-to-comp migrate: $fid_before != $fid_after" + + # 2. migrate a comp layout file to a plain layout + $LFS migrate -c2 $f_comp || + error "cannot migrate $f_comp by lfs migrate" + fid_after=$($LFS getstripe --fid $f_comp) + [ "$fid_before" == "$fid_after" ] || + error "comp-to-plain migrate: $fid_before != $fid_after" + + # 3. migrate a plain layout file to a comp layout + $LFS migrate $layout $f_comp || + error "cannot migrate $f_comp by lfs migrate" + fid_after=$($LFS getstripe --fid $f_comp) + [ "$fid_before" == "$fid_after" ] || + error "plain-to-comp migrate: $fid_before != $fid_after" +} +run_test 56xf "FID is not lost during migration of a composite layout file" + test_56y() { [ $MDS1_VERSION -lt $(version_code 2.4.53) ] && skip "No HSM $(lustre_build_version $SINGLEMDS) MDS < 2.4.53" @@ -7562,7 +7762,7 @@ test_60g() { do_facet mds$index $LCTL set_param fail_loc=0x8000019a \ > /dev/null - usleep 100 + sleep 0.01 done kill -9 $pid @@ -7715,18 +7915,25 @@ test_64c() { } run_test 64c "verify grant shrink" +import_param() { + local tgt=$1 + local param=$2 + + $LCTL get_param osc.$tgt.import | awk "/$param/ { print \$2 }" +} + # this does exactly what osc_request.c:osc_announce_cached() does in # order to calculate max amount of grants to ask from server want_grant() { local tgt=$1 - local nrpages=$($LCTL get_param -n osc.${tgt}.max_pages_per_rpc) - local rpc_in_flight=$($LCTL get_param -n osc.${tgt}.max_rpcs_in_flight) + local nrpages=$($LCTL get_param -n osc.$tgt.max_pages_per_rpc) + local rpc_in_flight=$($LCTL get_param -n osc.$tgt.max_rpcs_in_flight) - ((rpc_in_flight ++)); + ((rpc_in_flight++)); nrpages=$((nrpages * rpc_in_flight)) - local dirty_max_pages=$($LCTL get_param -n osc.${tgt}.max_dirty_mb) + local dirty_max_pages=$($LCTL get_param -n osc.$tgt.max_dirty_mb) dirty_max_pages=$((dirty_max_pages * 1024 * 1024 / PAGE_SIZE)) @@ -7734,13 +7941,11 @@ want_grant() { local undirty=$((nrpages * PAGE_SIZE)) local max_extent_pages - max_extent_pages=$($LCTL get_param osc.${tgt}.import | - grep grant_max_extent_size | awk '{print $2}') + max_extent_pages=$(import_param $tgt grant_max_extent_size) max_extent_pages=$((max_extent_pages / PAGE_SIZE)) local nrextents=$(((nrpages + max_extent_pages - 1) / max_extent_pages)) local grant_extent_tax - grant_extent_tax=$($LCTL get_param osc.${tgt}.import | - grep grant_extent_tax | awk '{print $2}') + grant_extent_tax=$(import_param $tgt grant_extent_tax) undirty=$((undirty + nrextents * grant_extent_tax)) @@ -7754,70 +7959,185 @@ grant_chunk() { local max_brw_size local grant_extent_tax - max_brw_size=$($LCTL get_param osc.${tgt}.import | - grep max_brw_size | awk '{print $2}') + max_brw_size=$(import_param $tgt max_brw_size) - grant_extent_tax=$($LCTL get_param osc.${tgt}.import | - grep grant_extent_tax | awk '{print $2}') + grant_extent_tax=$(import_param $tgt grant_extent_tax) echo $(((max_brw_size + grant_extent_tax) * 2)) } test_64d() { - [ $OST1_VERSION -lt $(version_code 2.10.56) ] && + [ $OST1_VERSION -ge $(version_code 2.10.56) ] || skip "OST < 2.10.55 doesn't limit grants enough" - local tgt=$($LCTL dl | grep "0000-osc-[^mM]" | awk '{print $4}') - local file=$DIR/$tfile + local tgt=$($LCTL dl | awk '/OST0000-osc-[^mM]/ { print $4 }') - [[ $($LCTL get_param osc.${tgt}.import | - grep "connect_flags:.*grant_param") ]] || + [[ "$($LCTL get_param osc.${tgt}.import)" =~ "grant_param" ]] || skip "no grant_param connect flag" - local olddebug=$($LCTL get_param -n debug 2> /dev/null) + local olddebug="$($LCTL get_param -n debug 2> /dev/null)" + + $LCTL set_param -n -n debug="$OLDDEBUG" || true + stack_trap "$LCTL set_param -n debug='$olddebug'" EXIT - $LCTL set_param debug="$OLDDEBUG" 2> /dev/null || true local max_cur_granted=$(($(want_grant $tgt) + $(grant_chunk $tgt))) - stack_trap "rm -f $file" EXIT + stack_trap "rm -f $DIR/$tfile && wait_delete_completed" EXIT - $LFS setstripe $file -i 0 -c 1 - dd if=/dev/zero of=$file bs=1M count=1000 & + $LFS setstripe $DIR/$tfile -i 0 -c 1 + dd if=/dev/zero of=$DIR/$tfile bs=1M count=1000 & ddpid=$! - while true - do - local cur_grant=$($LCTL get_param -n osc.${tgt}.cur_grant_bytes) - if [[ $cur_grant -gt $max_cur_granted ]] - then + while kill -0 $ddpid; do + local cur_grant=$($LCTL get_param -n osc.$tgt.cur_grant_bytes) + + if [[ $cur_grant -gt $max_cur_granted ]]; then kill $ddpid error "cur_grant $cur_grant > $max_cur_granted" fi - kill -0 $ddpid - [[ $? -ne 0 ]] && break; - sleep 2 - done - rm -f $DIR/$tfile - wait_delete_completed - $LCTL set_param debug="$olddebug" 2> /dev/null || true + sleep 1 + done } run_test 64d "check grant limit exceed" -# bug 1414 - set/get directories' stripe info -test_65a() { - [ $PARALLEL == "yes" ] && skip "skip parallel run" +check_grants() { + local tgt=$1 + local expected=$2 + local msg=$3 + local cur_grants=$($LCTL get_param -n osc.$tgt.cur_grant_bytes) - test_mkdir $DIR/$tdir - touch $DIR/$tdir/f1 - $LVERIFY $DIR/$tdir $DIR/$tdir/f1 || error "lverify failed" + ((cur_grants == expected)) || + error "$msg: grants mismatch: $cur_grants, expected $expected" } -run_test 65a "directory with no stripe info" -test_65b() { +round_up_p2() { + echo $((($1 + $2 - 1) & ~($2 - 1))) +} + +test_64e() { [ $PARALLEL == "yes" ] && skip "skip parallel run" + [ $OST1_VERSION -ge $(version_code 2.11.56) ] || + skip "Need OSS version at least 2.11.56" - test_mkdir $DIR/$tdir + # Remount client to reset grant + remount_client $MOUNT || error "failed to remount client" + local osc_tgt="$FSNAME-OST0000-osc-$($LFS getname -i $DIR)" + + local init_grants=$(import_param $osc_tgt initial_grant) + + check_grants $osc_tgt $init_grants "init grants" + + local extent_tax=$(import_param $osc_tgt grant_extent_tax) + local max_brw_size=$(import_param $osc_tgt max_brw_size) + local gbs=$(import_param $osc_tgt grant_block_size) + + # write random number of bytes from max_brw_size / 4 to max_brw_size + local write_bytes=$(shuf -i $((max_brw_size / 4))-$max_brw_size -n 1) + # align for direct io + write_bytes=$(round_up_p2 $write_bytes PAGE_SIZE) + # round to grant consumption unit + local wb_round_up=$(round_up_p2 $write_bytes gbs) + + local grants=$((wb_round_up + extent_tax)) + + $LFS setstripe -c 1 -i 0 $DIR/$tfile || error "lfs setstripe failed" + + # define OBD_FAIL_TGT_NO_GRANT 0x725 + # make the server not grant more back + do_facet ost1 $LCTL set_param fail_loc=0x725 + dd if=/dev/zero of=$DIR/$tfile bs=$write_bytes count=1 oflag=direct + + do_facet ost1 $LCTL set_param fail_loc=0 + + check_grants $osc_tgt $((init_grants - grants)) "dio w/o grant alloc" + + rm -f $DIR/$tfile || error "rm failed" + + # Remount client to reset grant + remount_client $MOUNT || error "failed to remount client" + osc_tgt="$FSNAME-OST0000-osc-$($LFS getname -i $DIR)" + + $LFS setstripe -c 1 -i 0 $DIR/$tfile || error "lfs setstripe failed" + + # define OBD_FAIL_TGT_NO_GRANT 0x725 + # make the server not grant more back + do_facet ost1 $LCTL set_param fail_loc=0x725 + $MULTIOP $DIR/$tfile "oO_WRONLY:w${write_bytes}yc" + do_facet ost1 $LCTL set_param fail_loc=0 + + check_grants $osc_tgt $((init_grants - grants)) "buf io w/o grant alloc" +} +run_test 64e "check grant consumption (no grant allocation)" + +test_64f() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" + + # Remount client to reset grant + remount_client $MOUNT || error "failed to remount client" + local osc_tgt="$FSNAME-OST0000-osc-$($LFS getname -i $DIR)" + + local init_grants=$(import_param $osc_tgt initial_grant) + local extent_tax=$(import_param $osc_tgt grant_extent_tax) + local max_brw_size=$(import_param $osc_tgt max_brw_size) + local gbs=$(import_param $osc_tgt grant_block_size) + local chunk=$(grant_chunk $osc_tgt) + + # write random number of bytes from max_brw_size / 4 to max_brw_size + local write_bytes=$(shuf -i $((max_brw_size / 4))-$max_brw_size -n 1) + # align for direct io + write_bytes=$(round_up_p2 $write_bytes PAGE_SIZE) + # round to grant consumption unit + local wb_round_up=$(round_up_p2 $write_bytes gbs) + + local grants=$((wb_round_up + extent_tax)) + + $LFS setstripe -c 1 -i 0 $DIR/$tfile || error "lfs setstripe failed" + dd if=/dev/zero of=$DIR/$tfile bs=$write_bytes count=1 oflag=direct || + error "error writing to $DIR/$tfile" + + check_grants $osc_tgt $((init_grants - grants + chunk)) \ + "direct io with grant allocation" + + rm -f $DIR/$tfile || error "rm failed" + + # Remount client to reset grant + remount_client $MOUNT || error "failed to remount client" + osc_tgt="$FSNAME-OST0000-osc-$($LFS getname -i $DIR)" + + $LFS setstripe -c 1 -i 0 $DIR/$tfile || error "lfs setstripe failed" + + local cmd="oO_WRONLY:w${write_bytes}_yc" + + $MULTIOP $DIR/$tfile $cmd & + MULTIPID=$! + sleep 1 + + check_grants $osc_tgt $((init_grants - grants)) \ + "buffered io, not write rpc" + + kill -USR1 $MULTIPID + wait + + check_grants $osc_tgt $((init_grants - grants + chunk)) \ + "buffered io, one RPC" +} +run_test 64f "check grant consumption (with grant allocation)" + +# bug 1414 - set/get directories' stripe info +test_65a() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" + + test_mkdir $DIR/$tdir + touch $DIR/$tdir/f1 + $LVERIFY $DIR/$tdir $DIR/$tdir/f1 || error "lverify failed" +} +run_test 65a "directory with no stripe info" + +test_65b() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" + + test_mkdir $DIR/$tdir local STRIPESIZE=$($LFS getstripe -S $DIR/$tdir) $LFS setstripe -S $((STRIPESIZE * 2)) -i 0 -c 1 $DIR/$tdir || @@ -8084,7 +8404,11 @@ test_65n() { local file2_stripe_size=$($LFS getstripe -S $file2) [[ $file2_stripe_size -eq $new_def_stripe_size ]] || + { + echo "file2_stripe_size: '$file2_stripe_size'" + echo "new_def_stripe_size: '$new_def_stripe_size'" error "$file2 didn't inherit stripe size $new_def_stripe_size" + } local dir3=$MOUNT/$tdir-3 mkdir $dir3 || error "mkdir $dir3 failed" @@ -8094,7 +8418,11 @@ test_65n() { local dir3_layout=$(get_layout_param $dir3) local root_dir_layout=$(get_layout_param $MOUNT) [[ "$dir3_layout" = "$root_dir_layout" ]] || + { + echo "dir3_layout: '$dir3_layout'" + echo "root_dir_layout: '$root_dir_layout'" error "$dir3 should show the default layout from $MOUNT" + } # set OST pool on root directory local pool=$TESTNAME @@ -8112,7 +8440,7 @@ test_65n() { local file3_pool=$($LFS getstripe -p $file3) [[ "$file3_pool" = "$pool" ]] || - error "$file3 didn't inherit OST pool $pool" + error "$file3 ('$file3_pool') didn't inherit OST pool '$pool'" local dir4=$MOUNT/$tdir-4 mkdir $dir4 || error "mkdir $dir4 failed" @@ -8123,7 +8451,11 @@ test_65n() { echo "$LFS getstripe -d $MOUNT" $LFS getstripe -d $MOUNT [[ "$dir4_layout" = "$root_dir_layout" ]] || + { + echo "dir4_layout: '$dir4_layout'" + echo "root_dir_layout: '$root_dir_layout'" error "$dir4 should show the default layout from $MOUNT" + } # new file created in $dir4 should inherit the pool from # the filesystem default @@ -8132,7 +8464,7 @@ test_65n() { local file4_pool=$($LFS getstripe -p $file4) [[ "$file4_pool" = "$pool" ]] || - error "$file4 didn't inherit OST pool $pool" + error "$file4 ('$file4_pool') didn't inherit OST pool $pool" # new subdirectory under non-root directory should inherit # the default layout from its parent directory @@ -8145,7 +8477,11 @@ test_65n() { dir4_layout=$(get_layout_param $dir4) local dir5_layout=$(get_layout_param $dir5) [[ "$dir4_layout" = "$dir5_layout" ]] || + { + echo "dir4_layout: '$dir4_layout'" + echo "dir5_layout: '$dir5_layout'" error "$dir5 should inherit the default layout from $dir4" + } # though subdir under ROOT doesn't inherit default layout, but # its sub dir/file should be created with default layout. @@ -8355,39 +8691,78 @@ test_74c() { } run_test 74c "ldlm_lock_create error path, (shouldn't LBUG)" -num_inodes() { - awk '/lustre_inode_cache/ {print $2; exit}' /proc/slabinfo +slab_lic=/sys/kernel/slab/lustre_inode_cache +num_objects() { + [ -f $slab_lic/shrink ] && echo 1 > $slab_lic/shrink + [ -f $slab_lic/objects ] && awk '{ print $1 }' $slab_lic/objects || + awk '/lustre_inode_cache/ { print $2; exit }' /proc/slabinfo } -test_76() { # Now for bug 20433, added originally in bug 1443 +test_76a() { # Now for b=20433, added originally in b=1443 [ $PARALLEL == "yes" ] && skip "skip parallel run" cancel_lru_locks osc + # there may be some slab objects cached per core local cpus=$(getconf _NPROCESSORS_ONLN 2>/dev/null) - local before=$(num_inodes) + local before=$(num_objects) local count=$((512 * cpus)) - [ "$SLOW" = "no" ] && count=$((64 * cpus)) + [ "$SLOW" = "no" ] && count=$((128 * cpus)) + local margin=$((count / 10)) + if [[ -f $slab_lic/aliases ]]; then + local aliases=$(cat $slab_lic/aliases) + (( aliases > 0 )) && margin=$((margin * aliases)) + fi - echo "before inodes: $before" + echo "before slab objects: $before" for i in $(seq $count); do touch $DIR/$tfile rm -f $DIR/$tfile done cancel_lru_locks osc - local after=$(num_inodes) - echo "after inodes: $after" - while (( after > before + 8 * ${cpus:-1} )); do + local after=$(num_objects) + echo "created: $count, after slab objects: $after" + # shared slab counts are not very accurate, allow significant margin + # the main goal is that the cache growth is not permanently > $count + while (( after > before + margin )); do sleep 1 - after=$(num_inodes) + after=$(num_objects) wait=$((wait + 1)) - (( wait % 5 == 0 )) && echo "wait $wait seconds inodes: $after" - if (( wait > 30 )); then - error "inode slab grew from $before to $after" + (( wait % 5 == 0 )) && echo "wait $wait seconds objects: $after" + if (( wait > 60 )); then + error "inode slab grew from $before+$margin to $after" fi done } -run_test 76 "confirm clients recycle inodes properly ====" +run_test 76a "confirm clients recycle inodes properly ====" + +test_76b() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" + [ $CLIENT_VERSION -ge $(version_code 2.13.55) ] || skip "not supported" + + local count=512 + local before=$(num_objects) + + for i in $(seq $count); do + mkdir $DIR/$tdir + rmdir $DIR/$tdir + done + + local after=$(num_objects) + local wait=0 + + while (( after > before )); do + sleep 1 + after=$(num_objects) + wait=$((wait + 1)) + (( wait % 5 == 0 )) && echo "wait $wait seconds objects: $after" + if (( wait > 60 )); then + error "inode slab grew from $before to $after" + fi + done + echo "slab objects before: $before, after: $after" +} +run_test 76b "confirm clients recycle directory inodes properly ====" export ORIG_CSUM="" set_checksums() @@ -8645,9 +9020,8 @@ test_77k() { # LU-10906 local i [ "$ORIG_CSUM" ] || ORIG_CSUM=$(eval $get_checksum) - stack_trap "wait_update $HOSTNAME '$get_checksum' $ORIG_CSUM" EXIT - stack_trap "do_facet mgs $LCTL set_param -P $cksum_param=$ORIG_CSUM" \ - EXIT + stack_trap "wait_update $HOSTNAME '$get_checksum' $ORIG_CSUM || true" + stack_trap "do_facet mgs $LCTL set_param -P $cksum_param=$ORIG_CSUM" for i in 0 1; do do_facet mgs $LCTL set_param -P $cksum_param=$i || @@ -8870,28 +9244,6 @@ test_81b() { # LU-456 } run_test 81b "OST should return -ENOSPC when retry still fails =======" -test_82() { # LU-1031 - dd if=/dev/zero of=$DIR/$tfile bs=1M count=10 - local gid1=14091995 - local gid2=16022000 - - multiop_bg_pause $DIR/$tfile OG${gid1}_g${gid1}c || return 1 - local MULTIPID1=$! - multiop_bg_pause $DIR/$tfile O_G${gid2}r10g${gid2}c || return 2 - local MULTIPID2=$! - kill -USR1 $MULTIPID2 - sleep 2 - if [[ `ps h -o comm -p $MULTIPID2` == "" ]]; then - error "First grouplock does not block second one" - else - echo "Second grouplock blocks first one" - fi - kill -USR1 $MULTIPID1 - wait $MULTIPID1 - wait $MULTIPID2 -} -run_test 82 "Basic grouplock test" - test_99() { [ -z "$(which cvs 2>/dev/null)" ] && skip_env "could not find cvs" @@ -9161,17 +9513,21 @@ test_101d() { stack_trap "$LCTL set_param -n llite.*.max_read_ahead_mb $old_RA" EXIT $LCTL get_param -n llite.*.max_read_ahead_mb - echo Reading the test file $file with read-ahead disabled + echo "Reading the test file $file with read-ahead disabled" local sz_KB=$((sz_MB * 1024 / 4)) - local raOFF=$(do_and_time "dd if=$file of=/dev/null bs=4k count=$sz_KB") + # 10485760 bytes transferred in 0.000938 secs (11179579337 bytes/sec) + # 104857600 bytes (105 MB) copied, 0.00876352 s, 12.0 GB/s + local raOFF=$(LANG=C dd if=$file of=/dev/null bs=4k count=$sz_KB |& + sed -e '/records/d' -e 's/.* \([0-9]*\.[0-9]*\) *s.*/\1/') - echo Cancel LRU locks on lustre client to flush the client cache + echo "Cancel LRU locks on lustre client to flush the client cache" cancel_lru_locks osc echo Enable read-ahead with ${ra_MB}MB $LCTL set_param -n llite.*.max_read_ahead_mb=$ra_MB - echo Reading the test file $file with read-ahead enabled - local raON=$(do_and_time "dd if=$file of=/dev/null bs=4k count=$sz_KB") + echo "Reading the test file $file with read-ahead enabled" + local raON=$(LANG=C dd if=$file of=/dev/null bs=4k count=$sz_KB |& + sed -e '/records/d' -e 's/.* \([0-9]*\.[0-9]*\) *s.*/\1/') echo "read-ahead disabled time read $raOFF" echo "read-ahead enabled time read $raON" @@ -9179,7 +9535,8 @@ test_101d() { rm -f $file wait_delete_completed - [ $raOFF -le 1 ] || [ $raON -lt $raOFF ] || + # use awk for this check instead of bash because it handles decimals + awk "{ exit !($raOFF < 1.0 || $raOFF > $raON) }" <<<"ignore_me" || error "readahead ${raON}s > no-readahead ${raOFF}s ${sz_MB}M" } run_test 101d "file read with and without read-ahead enabled" @@ -9283,12 +9640,11 @@ test_101g_brw_size_test() { sed -n '/pages per rpc/,/^$/p' | awk '/'$pages':/ { reads += $2; writes += $6 }; \ END { print reads,writes }')) - [ ${rpcs[0]} -ne $count ] && error "${rpcs[0]} != $count read RPCs" && - return 5 - [ ${rpcs[1]} -ne $count ] && error "${rpcs[1]} != $count write RPCs" && - return 6 - - return 0 + # allow one extra full-sized read RPC for async readahead + [[ ${rpcs[0]} == $count || ${rpcs[0]} == $((count + 1)) ]] || + { error "${rpcs[0]} != $count read RPCs"; return 5; } + [[ ${rpcs[1]} == $count ]] || + { error "${rpcs[1]} != $count write RPCs"; return 6; } } test_101g() { @@ -9504,36 +9860,54 @@ test_102a() { } run_test 102a "user xattr test ==================================" +check_102b_layout() { + local layout="$*" + local testfile=$DIR/$tfile + + echo "test layout '$layout'" + $LFS setstripe $layout $testfile || error "setstripe failed" + $LFS getstripe -y $testfile + + echo "get/set/list trusted.lov xattr ..." # b=10930 + local value=$(getfattr -n trusted.lov -e hex $testfile | grep trusted) + [[ "$value" =~ "trusted.lov" ]] || + error "can't get trusted.lov from $testfile" + local stripe_count_orig=$($LFS getstripe -c $testfile) || + error "getstripe failed" + + $MCREATE $testfile.2 || error "mcreate $testfile.2 failed" + + value=$(cut -d= -f2 <<<$value) + # LU-13168: truncated xattr should fail if short lov_user_md header + [ $CLIENT_VERSION -lt $(version_code 2.13.53) ] && + lens="${#value}" || lens="$(seq 4 2 ${#value})" + for len in $lens; do + echo "setfattr $len $testfile.2" + setfattr -n trusted.lov -v ${value:0:$len} $testfile.2 && + [ $len -lt 66 ] && error "short xattr len=$len worked" + done + local stripe_size=$($LFS getstripe -S $testfile.2) + local stripe_count=$($LFS getstripe -c $testfile.2) + [[ $stripe_size -eq 65536 ]] || + error "stripe size $stripe_size != 65536" + [[ $stripe_count -eq $stripe_count_orig ]] || + error "stripe count $stripe_count != $stripe_count_orig" + rm $testfile $testfile.2 +} + test_102b() { [ -z "$(which setfattr 2>/dev/null)" ] && skip_env "could not find setfattr" [[ $OSTCOUNT -lt 2 ]] && skip_env "needs >= 2 OSTs" - # b10930: get/set/list trusted.lov xattr - echo "get/set/list trusted.lov xattr ..." - local testfile=$DIR/$tfile - $LFS setstripe -S 65536 -i 1 -c $OSTCOUNT $testfile || - error "setstripe failed" - local STRIPECOUNT=$($LFS getstripe -c $testfile) || - error "getstripe failed" - getfattr -d -m "^trusted" $testfile 2>/dev/null | grep "trusted.lov" || - error "can't get trusted.lov from $testfile" + # check plain layout + check_102b_layout -S 65536 -i 1 -c $OSTCOUNT - local testfile2=${testfile}2 - local value=$(getfattr -n trusted.lov $testfile 2>/dev/null | - grep "trusted.lov" | sed -e 's/[^=]\+=//') + # and also check composite layout + check_102b_layout -E 1M -S 65536 -i 1 -c $OSTCOUNT -Eeof -S4M - $MCREATE $testfile2 - setfattr -n trusted.lov -v $value $testfile2 - local stripe_size=$($LFS getstripe -S $testfile2) - local stripe_count=$($LFS getstripe -c $testfile2) - [[ $stripe_size -eq 65536 ]] || - error "stripe size $stripe_size != 65536" - [[ $stripe_count -eq $STRIPECOUNT ]] || - error "stripe count $stripe_count != $STRIPECOUNT" - rm -f $DIR/$tfile } -run_test 102b "getfattr/setfattr for trusted.lov EAs ============" +run_test 102b "getfattr/setfattr for trusted.lov EAs" test_102c() { [ -z "$(which setfattr 2>/dev/null)" ] && @@ -11313,8 +11687,8 @@ test_121() { #bug #10589 } run_test 121 "read cancel race =========" -test_123a() { # was test 123, statahead(bug 11401) - [ $PARALLEL == "yes" ] && skip "skip parallel run" +test_123a_base() { # was test 123, statahead(bug 11401) + local lsx="$1" SLOWOK=0 if ! grep -q "processor.*: 1" /proc/cpuinfo; then @@ -11326,76 +11700,120 @@ test_123a() { # was test 123, statahead(bug 11401) test_mkdir $DIR/$tdir NUMFREE=$(df -i -P $DIR | tail -n 1 | awk '{ print $4 }') [[ $NUMFREE -gt 100000 ]] && NUMFREE=100000 || NUMFREE=$((NUMFREE-1000)) - MULT=10 - for ((i=100, j=0; i<=$NUMFREE; j=$i, i=$((i * MULT)) )); do - createmany -o $DIR/$tdir/$tfile $j $((i - j)) - - max=`lctl get_param -n llite.*.statahead_max | head -n 1` - lctl set_param -n llite.*.statahead_max 0 - lctl get_param llite.*.statahead_max - cancel_lru_locks mdc - cancel_lru_locks osc - stime=`date +%s` - time ls -l $DIR/$tdir | wc -l - etime=`date +%s` - delta=$((etime - stime)) - log "ls $i files without statahead: $delta sec" - lctl set_param llite.*.statahead_max=$max - - swrong=`lctl get_param -n llite.*.statahead_stats | grep "statahead wrong:" | awk '{print $3}'` - lctl get_param -n llite.*.statahead_max | grep '[0-9]' - cancel_lru_locks mdc - cancel_lru_locks osc - stime=`date +%s` - time ls -l $DIR/$tdir | wc -l - etime=`date +%s` - delta_sa=$((etime - stime)) - log "ls $i files with statahead: $delta_sa sec" - lctl get_param -n llite.*.statahead_stats - ewrong=`lctl get_param -n llite.*.statahead_stats | grep "statahead wrong:" | awk '{print $3}'` + MULT=10 + for ((i=100, j=0; i<=$NUMFREE; j=$i, i=$((i * MULT)) )); do + createmany -o $DIR/$tdir/$tfile $j $((i - j)) + + max=$(lctl get_param -n llite.*.statahead_max | head -n 1) + lctl set_param -n llite.*.statahead_max 0 + lctl get_param llite.*.statahead_max + cancel_lru_locks mdc + cancel_lru_locks osc + stime=$(date +%s) + time $lsx $DIR/$tdir | wc -l + etime=$(date +%s) + delta=$((etime - stime)) + log "$lsx $i files without statahead: $delta sec" + lctl set_param llite.*.statahead_max=$max + + swrong=$(lctl get_param -n llite.*.statahead_stats | + grep "statahead wrong:" | awk '{print $3}') + lctl get_param -n llite.*.statahead_max | grep '[0-9]' + cancel_lru_locks mdc + cancel_lru_locks osc + stime=$(date +%s) + time $lsx $DIR/$tdir | wc -l + etime=$(date +%s) + delta_sa=$((etime - stime)) + log "$lsx $i files with statahead: $delta_sa sec" + lctl get_param -n llite.*.statahead_stats + ewrong=$(lctl get_param -n llite.*.statahead_stats | + grep "statahead wrong:" | awk '{print $3}') [[ $swrong -lt $ewrong ]] && log "statahead was stopped, maybe too many locks held!" [[ $delta -eq 0 || $delta_sa -eq 0 ]] && continue - if [ $((delta_sa * 100)) -gt $((delta * 105)) -a $delta_sa -gt $((delta + 2)) ]; then - max=`lctl get_param -n llite.*.statahead_max | head -n 1` - lctl set_param -n llite.*.statahead_max 0 - lctl get_param llite.*.statahead_max - cancel_lru_locks mdc - cancel_lru_locks osc - stime=`date +%s` - time ls -l $DIR/$tdir | wc -l - etime=`date +%s` - delta=$((etime - stime)) - log "ls $i files again without statahead: $delta sec" - lctl set_param llite.*.statahead_max=$max - if [ $((delta_sa * 100)) -gt $((delta * 105)) -a $delta_sa -gt $((delta + 2)) ]; then - if [ $SLOWOK -eq 0 ]; then - error "ls $i files is slower with statahead!" - else - log "ls $i files is slower with statahead!" - fi - break - fi - fi + if [ $((delta_sa * 100)) -gt $((delta * 105)) -a $delta_sa -gt $((delta + 2)) ]; then + max=$(lctl get_param -n llite.*.statahead_max | + head -n 1) + lctl set_param -n llite.*.statahead_max 0 + lctl get_param llite.*.statahead_max + cancel_lru_locks mdc + cancel_lru_locks osc + stime=$(date +%s) + time $lsx $DIR/$tdir | wc -l + etime=$(date +%s) + delta=$((etime - stime)) + log "$lsx $i files again without statahead: $delta sec" + lctl set_param llite.*.statahead_max=$max + if [ $((delta_sa * 100 > delta * 105 && delta_sa > delta + 2)) ]; then + if [ $SLOWOK -eq 0 ]; then + error "$lsx $i files is slower with statahead!" + else + log "$lsx $i files is slower with statahead!" + fi + break + fi + fi - [ $delta -gt 20 ] && break - [ $delta -gt 8 ] && MULT=$((50 / delta)) - [ "$SLOW" = "no" -a $delta -gt 5 ] && break - done - log "ls done" + [ $delta -gt 20 ] && break + [ $delta -gt 8 ] && MULT=$((50 / delta)) + [ "$SLOW" = "no" -a $delta -gt 5 ] && break + done + log "$lsx done" - stime=`date +%s` - rm -r $DIR/$tdir - sync - etime=`date +%s` - delta=$((etime - stime)) - log "rm -r $DIR/$tdir/: $delta seconds" - log "rm done" - lctl get_param -n llite.*.statahead_stats + stime=$(date +%s) + rm -r $DIR/$tdir + sync + etime=$(date +%s) + delta=$((etime - stime)) + log "rm -r $DIR/$tdir/: $delta seconds" + log "rm done" + lctl get_param -n llite.*.statahead_stats +} + +test_123aa() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" + + test_123a_base "ls -l" } -run_test 123a "verify statahead work" +run_test 123aa "verify statahead work" + +test_123ab() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" + + statx_supported || skip_env "Test must be statx() syscall supported" + + test_123a_base "$STATX -l" +} +run_test 123ab "verify statahead work by using statx" + +test_123ac() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" + + statx_supported || skip_env "Test must be statx() syscall supported" + + local rpcs_before + local rpcs_after + local agl_before + local agl_after + + cancel_lru_locks $OSC + rpcs_before=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + agl_before=$($LCTL get_param -n llite.*.statahead_stats | + awk '/agl.total:/ {print $3}') + test_123a_base "$STATX -c \"%n %i %A %h %u %g %W %X %Z\" -D" + test_123a_base "$STATX --cached=always -D" + agl_after=$($LCTL get_param -n llite.*.statahead_stats | + awk '/agl.total:/ {print $3}') + [ $agl_before -eq $agl_after ] || + error "Should not trigger AGL thread - $agl_before:$agl_after" + rpcs_after=$(calc_stats $OSC.*$OSC*.stats ldlm_glimpse_enqueue) + [ $rpcs_after -eq $rpcs_before ] || + error "$STATX should not send glimpse RPCs to $OSC" +} +run_test 123ac "verify statahead work by using statx without glimpse RPCs" test_123b () { # statahead(bug 15027) [ $PARALLEL == "yes" ] && skip "skip parallel run" @@ -11484,18 +11902,18 @@ test_124a() { skip "Limit is too small $LIMIT" fi - # Make LVF so higher that sleeping for $SLEEP is enough to _start_ - # killing locks. Some time was spent for creating locks. This means - # that up to the moment of sleep finish we must have killed some of - # them (10-100 locks). This depends on how fast ther were created. - # Many of them were touched in almost the same moment and thus will - # be killed in groups. - local LVF=$(($MAX_HRS * 60 * 60 / $SLEEP * $LIMIT / $LRU_SIZE)) - - # Use $LRU_SIZE_B here to take into account real number of locks - # created in the case of CMD, LRU_SIZE_B != $NR in most of cases - local LRU_SIZE_B=$LRU_SIZE - log "LVF=$LVF" + # Make LVF so higher that sleeping for $SLEEP is enough to _start_ + # killing locks. Some time was spent for creating locks. This means + # that up to the moment of sleep finish we must have killed some of + # them (10-100 locks). This depends on how fast ther were created. + # Many of them were touched in almost the same moment and thus will + # be killed in groups. + local LVF=$(($MAX_HRS * 60 * 60 / $SLEEP * $LIMIT / $LRU_SIZE * 100)) + + # Use $LRU_SIZE_B here to take into account real number of locks + # created in the case of CMD, LRU_SIZE_B != $NR in most of cases + local LRU_SIZE_B=$LRU_SIZE + log "LVF=$LVF" local OLD_LVF=$($LCTL get_param -n $NSDIR.pool.lock_volume_factor) log "OLD_LVF=$OLD_LVF" $LCTL set_param -n $NSDIR.pool.lock_volume_factor $LVF @@ -11920,7 +12338,7 @@ set_dir_limits () { check_mds_dmesg() { local facets=$(get_facets MDS) for facet in ${facets//,/ }; do - do_facet $facet "dmesg | tail -3 | grep -q $1" && return 0 + do_facet $facet "dmesg | tail -3 | grep $1" && return 0 done return 1 } @@ -11935,55 +12353,49 @@ test_129() { remote_mds_nodsh && skip "remote MDS with nodsh" local ENOSPC=28 - local EFBIG=27 local has_warning=false rm -rf $DIR/$tdir mkdir -p $DIR/$tdir # block size of mds1 - local maxsize=$(($($LCTL get_param -n mdc.*MDT0000*.blocksize) * 5)) - set_dir_limits $maxsize $maxsize + local maxsize=$(($($LCTL get_param -n mdc.*MDT0000*.blocksize) * 8)) + set_dir_limits $maxsize $((maxsize * 6 / 8)) + stack_trap "set_dir_limits 0 0" + stack_trap "unlinkmany $DIR/$tdir/file_base_ 2000 || true" local dirsize=$(stat -c%s "$DIR/$tdir") local nfiles=0 - while [[ $dirsize -le $maxsize ]]; do - $MULTIOP $DIR/$tdir/file_base_$nfiles Oc + while (( $dirsize <= $maxsize )); do + $MCREATE $DIR/$tdir/file_base_$nfiles rc=$? - if ! $has_warning; then - check_mds_dmesg '"is approaching"' && has_warning=true - fi # check two errors: - # ENOSPC for new ext4 max_dir_size (kernel commit df981d03ee) - # EFBIG for previous versions included in ldiskfs series - if [ $rc -eq $EFBIG ] || [ $rc -eq $ENOSPC ]; then + # ENOSPC for ext4 max_dir_size, which has been used since + # kernel v3.6-rc1-8-gdf981d03ee, lustre v2_4_50_0-79-gaed82035c0 + if (( rc == ENOSPC )); then set_dir_limits 0 0 - echo "return code $rc received as expected" + echo "rc=$rc returned as expected after $nfiles files" createmany -o $DIR/$tdir/file_extra_$nfiles. 5 || - error_exit "create failed w/o dir size limit" + error "create failed w/o dir size limit" - check_mds_dmesg '"has reached"' || - error_exit "reached message should be output" - - [ $has_warning = "false" ] && - error_exit "warning message should be output" + # messages may be rate limited if test is run repeatedly + check_mds_dmesg '"is approaching max"' || + echo "warning message should be output" + check_mds_dmesg '"has reached max"' || + echo "reached message should be output" dirsize=$(stat -c%s "$DIR/$tdir") [[ $dirsize -ge $maxsize ]] && return 0 - error_exit "current dir size $dirsize, " \ - "previous limit $maxsize" - elif [ $rc -ne 0 ]; then - set_dir_limits 0 0 - error_exit "return $rc received instead of expected " \ - "$EFBIG or $ENOSPC, files in dir $dirsize" + error "dirsize $dirsize < $maxsize after $nfiles files" + elif (( rc != 0 )); then + break fi nfiles=$((nfiles + 1)) dirsize=$(stat -c%s "$DIR/$tdir") done - set_dir_limits 0 0 - error "exceeded dir size limit $maxsize($MDSCOUNT) : $dirsize bytes" + error "rc=$rc, size=$dirsize/$maxsize, mdt=$MDSCOUNT, nfiles=$nfiles" } run_test 129 "test directory size limit ========================" @@ -12710,38 +13122,21 @@ test_133e() { } run_test 133e "Verifying OST {read,write}_bytes nid stats =================" -proc_regexp="/{proc,sys}/{fs,sys,kernel/debug}/{lustre,lnet}/" - -# Some versions of find (4.5.11, 4.5.14) included in CentOS 7.3-7.5 do -# not honor the -ignore_readdir_race option correctly. So we call -# error_ignore() rather than error() in these cases. See LU-11152. -error_133() { - if (find --version; do_facet mds1 find --version) | - grep -q '\b4\.5\.1[1-4]\b'; then - error_ignore LU-11152 "$@" - else - error "$@" - fi -} - test_133f() { - # First without trusting modes. - local proc_dirs=$(eval \ls -d $proc_regexp 2>/dev/null) - echo "proc_dirs='$proc_dirs'" - [ -n "$proc_dirs" ] || error "no proc_dirs on $HOSTNAME" - find $proc_dirs -exec cat '{}' \; &> /dev/null + [[ $(lustre_version_code $facet) -ge $(version_code 2.7.65) ]] || + skip "too old lustre for get_param -R ($facet_ver)" - # Second verifying readability. + # verifying readability. $LCTL get_param -R '*' &> /dev/null # Verifing writability with badarea_io. - find $proc_dirs \ - -ignore_readdir_race \ - -type f \ - -not -name force_lbug \ - -not -name changelog_mask \ - -exec badarea_io '{}' \; || - error_133 "find $proc_dirs failed" + $LCTL list_param -FR '*' | grep '=' | tr -d = | + egrep -v 'force_lbug|changelog_mask' | xargs badarea_io || + error "client badarea_io failed" + + # remount the FS in case writes/reads /proc break the FS + cleanup || error "failed to unmount" + setup || error "failed to setup" } run_test 133f "Check reads/writes of client lustre proc files with bad area io" @@ -12758,10 +13153,10 @@ test_133g() { log "$facet: too old lustre for get_param -R" fi if [ $facet_ver -ge $(version_code 2.5.54) ]; then - do_facet $facet "$LCTL list_param -R '*' | grep '=' | + do_facet $facet "$LCTL list_param -FR '*' | grep '=' | tr -d = | egrep -v 'force_lbug|changelog_mask' | xargs badarea_io" || - error_133 "$facet badarea_io failed" + error "$facet badarea_io failed" else skip_noexit "$facet: too old lustre for get_param -R" fi @@ -12770,7 +13165,6 @@ test_133g() { # remount the FS in case writes/reads /proc break the FS cleanup || error "failed to unmount" setup || error "failed to setup" - true } run_test 133g "Check reads/writes of server lustre proc files with bad area io" @@ -12781,24 +13175,21 @@ test_133h() { skip "Need MDS version at least 2.9.54" local facet - for facet in client mds1 ost1; do - local facet_proc_dirs=$(do_facet $facet \ - \\\ls -d $proc_regexp 2> /dev/null) - [ -z "$facet_proc_dirs" ] && error "no proc_dirs on $facet" - echo "${facet}_proc_dirs='$facet_proc_dirs'" # Get the list of files that are missing the terminating newline - local missing=($(do_facet $facet \ - find ${facet_proc_dirs} -type f \| \ - while read F\; do \ - awk -v FS='\v' -v RS='\v\v' \ - "'END { if(NR>0 && \ - \\\$NF !~ /.*\\\n\$/) \ - print FILENAME}'" \ - '\$F'\; \ - done 2>/dev/null)) - [ ${#missing[*]} -eq 0 ] || - error "files do not end with newline: ${missing[*]}" + local plist=$(do_facet $facet + $LCTL list_param -FR '*' | grep '=' | tr -d =) + local ent + for ent in $plist; do + local missing=$(do_facet $facet $LCTL get_param $ent \|\ + awk -v FS='\v' -v RS='\v\v' \ + "'END { if(NR>0 && \\\$NF !~ /.*\\\n\$/) \ + print FILENAME}'" 2>/dev/null) + [ -z $missing ] || { + do_facet $facet $LCTL get_param $ent | od -An -tx1 + error "file does not end with newline: $facet-$ent" + } + done done } run_test 133h "Proc files should end with newlines" @@ -12987,7 +13378,7 @@ test_140() { #bug-17379 } run_test 140 "Check reasonable stack depth (shouldn't LBUG) ====" -test_150() { +test_150a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" local TF="$TMP/$tfile" @@ -13018,7 +13409,105 @@ test_150() { rm -f $TF true } -run_test 150 "truncate/append tests" +run_test 150a "truncate/append tests" + +test_150b() { + [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend" + [ $OST1_VERSION -lt $(version_code 2.13.50) ] && + skip "Need OST version at least 2.13.53" + touch $DIR/$tfile + check_fallocate $DIR/$tfile || error "fallocate failed" +} +run_test 150b "Verify fallocate (prealloc) functionality" + +test_150c() { + local bytes + local want + + [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend" + [ $OST1_VERSION -lt $(version_code 2.13.50) ] && + skip "Need OST version at least 2.13.53" + + $LFS setstripe -c $OSTCOUNT -S1M $DIR/$tdir || error "setstripe failed" + fallocate -l ${OSTCOUNT}m $DIR/$tdir || error "fallocate failed" + sync; sync_all_data + cancel_lru_locks $OSC + sleep 5 + bytes=$(($(stat -c '%b * %B' $DIR/$tdir))) + want=$((OSTCOUNT * 1048576)) + + # Must allocate all requested space, not more than 5% extra + (( $bytes >= $want && $bytes < $want * 105 / 100 )) || + error "bytes $bytes is not $want" +} +run_test 150c "Verify fallocate Size and Blocks" + +test_150d() { + local bytes + local want + + [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend" + [ $OST1_VERSION -lt $(version_code 2.13.50) ] && + skip "Need OST version at least 2.13.53" + + $LFS setstripe -c $OSTCOUNT -S1M $DIR/$tdir || error "setstripe failed" + fallocate -o 1G -l ${OSTCOUNT}m $DIR/$tdir || error "fallocate failed" + sync; sync_all_data + cancel_lru_locks $OSC + sleep 5 + bytes=$(($(stat -c '%b * %B' $DIR/$tdir))) + want=$((OSTCOUNT * 1048576)) + + # Must allocate all requested space, not more than 5% extra + (( $bytes >= $want && $bytes < $want * 105 / 100 )) || + error "bytes $bytes is not $want" +} +run_test 150d "Verify fallocate Size and Blocks - Non zero start" + +test_150e() { + [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend" + [ $OST1_VERSION -ge $(version_code 2.13.55) ] || + skip "Need OST version at least 2.13.55" + + echo "df before:" + $LFS df + $LFS setstripe -c${OSTCOUNT} $DIR/$tfile || + error "$LFS setstripe -c${OSTCOUNT} $DIR/$tfile failed" + + # Find OST with Minimum Size + min_size_ost=$($LFS df | awk "/$FSNAME-OST/ { print \$4 }" | + sort -un | head -1) + + # Get 90% of the available space + local space=$(((min_size_ost * 90)/100 * OSTCOUNT)) + + fallocate -l${space}k $DIR/$tfile || + error "fallocate ${space}k $DIR/$tfile failed" + echo "'fallocate -l ${space}k $DIR/$tfile' succeeded" + + # get size immediately after fallocate. This should be correctly + # updated + local size=$(stat -c '%s' $DIR/$tfile) + local used=$(( $(stat -c '%b * %B' $DIR/$tfile) / 1024)) + + # Sleep for a while for statfs to get updated. And not pull from cache. + sleep 2 + + echo "df after fallocate:" + $LFS df + + (( size / 1024 == space )) || error "size $size != requested $space" + [ "$ost1_FSTYPE" != ldiskfs ] || (( used >= space )) || + error "used $used < space $space" + + rm $DIR/$tfile || error "rm failed" + sync + wait_delete_completed + + echo "df after unlink:" + $LFS df +} +run_test 150e "Verify 90% of available OST space consumed by fallocate" #LU-2902 roc_hit was not able to read all values from lproc function roc_hit_init() { @@ -13305,10 +13794,19 @@ test_154A() { [ -z "$fid" ] && error "path2fid unable to get $tf FID" # check that we get the same pathname back - local found=$($LFS fid2path $MOUNT "$fid") - [ -z "$found" ] && error "fid2path unable to get '$fid' path" - [ "$found" == "$tf" ] || - error "fid2path($fid=path2fid($tf)) = $found != $tf" + local rootpath + local found + for rootpath in "$MOUNT" "$MOUNT///" "$MOUNT/$tfile"; do + echo "$rootpath $fid" + found=$($LFS fid2path $rootpath "$fid") + [ -z "$found" ] && error "fid2path unable to get '$fid' path" + [ "$found" == "$tf" ] || error "fid2path $found != $tf" + done + + # check wrong root path format + rootpath=$MOUNT"_wrong" + found=$($LFS fid2path $rootpath "$fid") + [ -z "$found" ] || error "should fail ($rootpath != $MOUNT)" } run_test 154A "lfs path2fid and fid2path basic checks" @@ -14648,6 +15146,70 @@ test_160k() { } run_test 160k "Verify that changelog records are not lost" +# Verifies that a file passed as a parameter has recently had an operation +# performed on it that has generated an MTIME changelog which contains the +# correct parent FID. As files might reside on a different MDT from the +# parent directory in DNE configurations, the FIDs are translated to paths +# before being compared, which should be identical +compare_mtime_changelog() { + local file="${1}" + local mdtidx + local mtime + local cl_fid + local pdir + local dir + + mdtidx=$($LFS getstripe --mdt-index $file) + mdtidx=$(printf "%04x" $mdtidx) + + # Obtain the parent FID from the MTIME changelog + mtime=$($LFS changelog $FSNAME-MDT$mdtidx | tail -n 1 | grep MTIME) + [ -z "$mtime" ] && error "MTIME changelog not recorded" + + cl_fid=$(sed -e 's/.* p=//' -e 's/ .*//' <<<$mtime) + [ -z "$cl_fid" ] && error "parent FID not present" + + # Verify that the path for the parent FID is the same as the path for + # the test directory + pdir=$($LFS fid2path $MOUNT "$cl_fid") + + dir=$(dirname $1) + + [[ "${pdir%/}" == "$dir" ]] || + error "MTIME changelog parent FID is wrong, expected $dir, got $pdir" +} + +test_160l() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" + + remote_mds_nodsh && skip "remote MDS with nodsh" + [[ $MDS1_VERSION -ge $(version_code 2.13.55) ]] || + skip "Need MDS version at least 2.13.55" + + local cl_user + + changelog_register || error "changelog_register failed" + cl_user="${CL_USERS[$SINGLEMDS]%% *}" + + changelog_users $SINGLEMDS | grep -q $cl_user || + error "User '$cl_user' not found in changelog_users" + + # Clear some types so that MTIME changelogs are generated + changelog_chmask "-CREAT" + changelog_chmask "-CLOSE" + + test_mkdir $DIR/$tdir || error "failed to mkdir $DIR/$tdir" + + # Test CL_MTIME during setattr + touch $DIR/$tdir/$tfile + compare_mtime_changelog $DIR/$tdir/$tfile + + # Test CL_MTIME during close + $MULTIOP $DIR/$tdir/${tfile}_2 O_2w4096c || error "multiop failed" + compare_mtime_changelog $DIR/$tdir/${tfile}_2 +} +run_test 160l "Verify that MTIME changelog records contain the parent FID" + test_161a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" @@ -15019,8 +15581,11 @@ test_165a() { local rc local count - do_facet ost1 ofd_access_log_reader --debug=- --trace=- > "${trace}" & + (( $OST1_VERSION >= $(version_code 2.13.54) )) || + skip "OFD access log unsupported" + setup_165 + do_facet ost1 ofd_access_log_reader --debug=- --trace=- > "${trace}" & sleep 5 do_facet ost1 ofd_access_log_reader --list @@ -15052,13 +15617,19 @@ test_165b() { local size local flags + (( $OST1_VERSION >= $(version_code 2.13.54) )) || + skip "OFD access log unsupported" + setup_165 + do_facet ost1 ofd_access_log_reader --debug=- --trace=- > "${trace}" & + sleep 5 - lfs setstripe -c 1 -i 0 "${file}" - $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_WRONLY:w1048576c || error "cannot create '${file}'" do_facet ost1 ofd_access_log_reader --list - do_facet ost1 ofd_access_log_reader --debug=- --trace=- > "${trace}" & + lfs setstripe -c 1 -i 0 "${file}" + $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_WRONLY:w1048576c || + error "cannot create '${file}'" + sleep 5 do_facet ost1 killall -TERM ofd_access_log_reader wait @@ -15094,8 +15665,12 @@ test_165b() { fi do_facet ost1 ofd_access_log_reader --debug=- --trace=- > "${trace}" & - $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_RDONLY:r524288c || error "cannot read '${file}'" sleep 5 + + $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_RDONLY:r524288c || + error "cannot read '${file}'" + sleep 5 + do_facet ost1 killall -TERM ofd_access_log_reader wait rc=$? @@ -15127,89 +15702,202 @@ test_165b() { run_test 165b "ofd access log entries are produced and consumed" test_165c() { + local trace="/tmp/${tfile}.trace" local file="${DIR}/${tdir}/${tfile}" + + (( $OST1_VERSION >= $(version_code 2.13.54) )) || + skip "OFD access log unsupported" + test_mkdir "${DIR}/${tdir}" setup_165 + do_facet ost1 ofd_access_log_reader --debug=- --trace=- > "${trace}" & + sleep 5 lfs setstripe -c 1 -i 0 "${DIR}/${tdir}" # 4096 / 64 = 64. Create twice as many entries. for ((i = 0; i < 128; i++)); do - $MULTIOP "${file}-${i}" oO_CREAT:O_WRONLY:w512c || error "cannot create file" + $MULTIOP "${file}-${i}" oO_CREAT:O_WRONLY:w512c || + error "cannot create file" done sync - do_facet ost1 ofd_access_log_reader --list + + do_facet ost1 killall -TERM ofd_access_log_reader + wait + rc=$? + if ((rc != 0)); then + error "ofd_access_log_reader exited with rc = '${rc}'" + fi + unlinkmany "${file}-%d" 128 } run_test 165c "full ofd access logs do not block IOs" -oal_peek_entry_count() { - do_facet ost1 ofd_access_log_reader --list | awk '$1 == "_entry_count:" { print $2; }' +oal_get_read_count() { + local stats="$1" + + # STATS lustre-OST0001 alr_read_count 1 + + do_facet ost1 cat "${stats}" | + awk '$1 == "STATS" && $3 == "alr_read_count" { count = $4; } + END { print count; }' } -oal_expect_entry_count() { - local entry_count=$(oal_peek_entry_count) - local expect="$1" +oal_expect_read_count() { + local stats="$1" + local count + local expect="$2" - if ((entry_count == expect)); then + # Ask ofd_access_log_reader to write stats. + do_facet ost1 killall -USR1 ofd_access_log_reader + + # Allow some time for things to happen. + sleep 1 + + count=$(oal_get_read_count "${stats}") + if ((count == expect)); then return 0 fi - error_noexit "bad entry count, got ${entry_count}, expected ${expect}" - do_facet ost1 ofd_access_log_reader --list >&2 + error_noexit "bad read count, got ${count}, expected ${expect}" + do_facet ost1 cat "${stats}" >&2 exit 1 } test_165d() { - local trace="/tmp/${tfile}.trace" + local stats="/tmp/${tfile}.stats" local file="${DIR}/${tdir}/${tfile}" local param="obdfilter.${FSNAME}-OST0000.access_log_mask" - local entry_count + + (( $OST1_VERSION >= $(version_code 2.13.54) )) || + skip "OFD access log unsupported" + test_mkdir "${DIR}/${tdir}" setup_165 + do_facet ost1 ofd_access_log_reader --stats="${stats}" & + sleep 5 + lfs setstripe -c 1 -i 0 "${file}" do_facet ost1 lctl set_param "${param}=rw" - $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_WRONLY:w1048576c || error "cannot create '${file}'" - oal_expect_entry_count 1 + $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_WRONLY:w1048576c || + error "cannot create '${file}'" + oal_expect_read_count "${stats}" 1 - $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_RDONLY:r1048576c || error "cannot read '${file}'" - oal_expect_entry_count 2 + $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_RDONLY:r1048576c || + error "cannot read '${file}'" + oal_expect_read_count "${stats}" 2 do_facet ost1 lctl set_param "${param}=r" - $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_WRONLY:w1048576c || error "cannot create '${file}'" - oal_expect_entry_count 2 + $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_WRONLY:w1048576c || + error "cannot create '${file}'" + oal_expect_read_count "${stats}" 2 - $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_RDONLY:r1048576c || error "cannot read '${file}'" - oal_expect_entry_count 3 + $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_RDONLY:r1048576c || + error "cannot read '${file}'" + oal_expect_read_count "${stats}" 3 do_facet ost1 lctl set_param "${param}=w" - $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_WRONLY:w1048576c || error "cannot create '${file}'" - oal_expect_entry_count 4 + $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_WRONLY:w1048576c || + error "cannot create '${file}'" + oal_expect_read_count "${stats}" 4 + + $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_RDONLY:r1048576c || + error "cannot read '${file}'" + oal_expect_read_count "${stats}" 4 + + do_facet ost1 lctl set_param "${param}=0" + $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_WRONLY:w1048576c || + error "cannot create '${file}'" + oal_expect_read_count "${stats}" 4 + + $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_RDONLY:r1048576c || + error "cannot read '${file}'" + oal_expect_read_count "${stats}" 4 + + do_facet ost1 killall -TERM ofd_access_log_reader + wait + rc=$? + if ((rc != 0)); then + error "ofd_access_log_reader exited with rc = '${rc}'" + fi +} +run_test 165d "ofd_access_log mask works" + +test_165e() { + local stats="/tmp/${tfile}.stats" + local file0="${DIR}/${tdir}-0/${tfile}" + local file1="${DIR}/${tdir}-1/${tfile}" + + (( $OST1_VERSION >= $(version_code 2.13.54) )) || + skip "OFD access log unsupported" + + [[ $MDSCOUNT -lt 2 ]] && skip_env "needs >= 2 MDTs" + + test_mkdir -c 1 -i 0 "${DIR}/${tdir}-0" + test_mkdir -c 1 -i 1 "${DIR}/${tdir}-1" + + lfs setstripe -c 1 -i 0 "${file0}" + lfs setstripe -c 1 -i 0 "${file1}" + + setup_165 + do_facet ost1 ofd_access_log_reader -I 1 --stats="${stats}" & + sleep 5 + + $MULTIOP "${file0}" oO_CREAT:O_WRONLY:w512c || + error "cannot create '${file0}'" + sync + oal_expect_read_count "${stats}" 0 + + $MULTIOP "${file1}" oO_CREAT:O_WRONLY:w512c || + error "cannot create '${file1}'" + sync + oal_expect_read_count "${stats}" 1 + + do_facet ost1 killall -TERM ofd_access_log_reader + wait + rc=$? + if ((rc != 0)); then + error "ofd_access_log_reader exited with rc = '${rc}'" + fi +} +run_test 165e "ofd_access_log MDT index filter works" + +test_165f() { + local trace="/tmp/${tfile}.trace" + local rc + local count - $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_RDONLY:r1048576c || error "cannot read '${file}'" - oal_expect_entry_count 4 + setup_165 + do_facet ost1 timeout 60 ofd_access_log_reader \ + --exit-on-close --debug=- --trace=- > "${trace}" & + sleep 5 + stop ost1 - do_facet ost1 lctl set_param "${param}=0" - $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_WRONLY:w1048576c || error "cannot create '${file}'" - oal_expect_entry_count 4 + wait + rc=$? - $MULTIOP "${file}" oO_CREAT:O_DIRECT:O_RDONLY:r1048576c || error "cannot read '${file}'" - oal_expect_entry_count 4 + if ((rc != 0)); then + error_noexit "ofd_access_log_reader exited with rc = '${rc}'" + cat "${trace}" + exit 1 + fi } -run_test 165d "ofd_access_log mask works" +run_test 165f "ofd_access_log_reader --exit-on-close works" test_169() { # do directio so as not to populate the page cache log "creating a 10 Mb file" - $MULTIOP $DIR/$tfile oO_CREAT:O_DIRECT:O_RDWR:w$((10*1048576))c || error "multiop failed while creating a file" + $MULTIOP $DIR/$tfile oO_CREAT:O_DIRECT:O_RDWR:w$((10*1048576))c || + error "multiop failed while creating a file" log "starting reads" dd if=$DIR/$tfile of=/dev/null bs=4096 & log "truncating the file" - $MULTIOP $DIR/$tfile oO_TRUNC:c || error "multiop failed while truncating the file" + $MULTIOP $DIR/$tfile oO_TRUNC:c || + error "multiop failed while truncating the file" log "killing dd" kill %+ || true # reads might have finished echo "wait until dd is finished" @@ -15357,7 +16045,8 @@ obdecho_test() { test_180a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" - if ! module_loaded obdecho; then + if ! [ -d /sys/fs/lustre/echo_client ] && + ! module_loaded obdecho; then load_module obdecho/obdecho && stack_trap "rmmod obdecho" EXIT || error "unable to load obdecho on client" @@ -15568,6 +16257,7 @@ test_184c() { local cmpn_arg=$(cmp -n 2>&1 | grep "invalid option") [ -n "$cmpn_arg" ] && skip_env "cmp does not support -n" check_swap_layouts_support + check_swap_layout_no_dom $DIR local dir0=$DIR/$tdir/$testnum mkdir -p $dir0 || error "creating dir $dir0" @@ -15586,8 +16276,9 @@ test_184c() { dd if=$ref1 of=$file1 bs=16k & local DD_PID=$! - # Make sure dd starts to copy file - while [ ! -f $file1 ]; do sleep 0.1; done + # Make sure dd starts to copy file, but wait at most 5 seconds + local loops=0 + while [ ! -s $file1 -a $((loops++)) -lt 50 ]; do sleep 0.1; done $LFS swap_layouts $file1 $file2 local rc=$? @@ -15615,6 +16306,7 @@ run_test 184c "Concurrent write and layout swap" test_184d() { check_swap_layouts_support + check_swap_layout_no_dom $DIR [ -z "$(which getfattr 2>/dev/null)" ] && skip_env "no getfattr command" @@ -15651,6 +16343,7 @@ test_184e() { [[ $MDS1_VERSION -ge $(version_code 2.6.94) ]] || skip "Need MDS version at least 2.6.94" check_swap_layouts_support + check_swap_layout_no_dom $DIR [ -z "$(which getfattr 2>/dev/null)" ] && skip_env "no getfattr command" @@ -16133,24 +16826,55 @@ test_205a() { # Job stats error "Unexpected jobids when jobid_var=$JOBENV" fi - lctl set_param jobid_var=USER jobid_name="S.%j.%e.%u.%h.E" - JOBENV="JOBCOMPLEX" - JOBCOMPLEX="S.$USER.touch.$(id -u).$(hostname).E" + # test '%j' access to environment variable - if supported + if lctl set_param jobid_var=USER jobid_name="S.%j.%e.%u.%h.E"; then + JOBENV="JOBCOMPLEX" + JOBCOMPLEX="S.$USER.touch.$(id -u).$(hostname).E" + + verify_jobstats "touch $DIR/$tfile" $SINGLEMDS + fi + + # test '%j' access to per-session jobid - if supported + if lctl list_param jobid_this_session > /dev/null 2>&1 + then + lctl set_param jobid_var=session jobid_name="S.%j.%e.%u.%h.E" + lctl set_param jobid_this_session=$USER + + JOBENV="JOBCOMPLEX" + JOBCOMPLEX="S.$USER.touch.$(id -u).$(hostname).E" - verify_jobstats "touch $DIR/$tfile" $SINGLEMDS + verify_jobstats "touch $DIR/$tfile" $SINGLEMDS + fi } run_test 205a "Verify job stats" -# LU-13117 +# LU-13117, LU-13597 test_205b() { - $LCTL set_param jobid_var=USER jobid_name="%e.%u" + job_stats="mdt.*.job_stats" + $LCTL set_param $job_stats=clear + # Setting jobid_var to USER might not be supported + $LCTL set_param jobid_var=USER || true + $LCTL set_param jobid_name="%e.%u" env -i USERTESTJOBSTATS=foolish touch $DIR/$tfile.1 - do_facet $SINGLEMDS $LCTL get_param mdt.*.job_stats | - grep job_id: | grep foolish && + do_facet $SINGLEMDS $LCTL get_param $job_stats | + grep "job_id:.*foolish" && error "Unexpected jobid found" - true + do_facet $SINGLEMDS $LCTL get_param $job_stats | + grep "open:.*min.*max.*sum" || + error "wrong job_stats format found" +} +run_test 205b "Verify job stats jobid and output format" + +# LU-13733 +test_205c() { + $LCTL set_param llite.*.stats=0 + dd if=/dev/zero of=$DIR/$tfile.1 bs=4k count=1 + $LCTL get_param llite.*.stats + $LCTL get_param llite.*.stats | grep \ + "write_bytes *1 samples \[bytes\] 4096 4096 4096 16777216" || + error "wrong client stats format found" } -run_test 205b "Verify job stats jobid parsing" +run_test 205c "Verify client stats format" # LU-1480, LU-1773 and LU-1657 test_206() { @@ -16258,6 +16982,8 @@ test_209() { sync; sleep 5; sync; echo 3 > /proc/sys/vm/drop_caches + [ -f /sys/kernel/slab/ptlrpc_cache/shrink ] && + echo 1 > /sys/kernel/slab/ptlrpc_cache/shrink req_before=$(awk '/ptlrpc_cache / { print $2 }' /proc/slabinfo) # open/close 500 times @@ -16266,6 +16992,8 @@ test_209() { done echo 3 > /proc/sys/vm/drop_caches + [ -f /sys/kernel/slab/ptlrpc_cache/shrink ] && + echo 1 > /sys/kernel/slab/ptlrpc_cache/shrink req_after=$(awk '/ptlrpc_cache / { print $2 }' /proc/slabinfo) echo "before: $req_before, after: $req_after" @@ -16275,6 +17003,27 @@ test_209() { } run_test 209 "read-only open/close requests should be freed promptly" +test_210() { + local pid + + $MULTIOP $DIR/$tfile oO_CREAT:O_RDWR:eW_E+eUc & + pid=$! + sleep 1 + + $LFS getstripe $DIR/$tfile + kill -USR1 $pid + wait $pid || error "multiop failed" + + $MULTIOP $DIR/$tfile oO_RDONLY:eR_E+eUc & + pid=$! + sleep 1 + + $LFS getstripe $DIR/$tfile + kill -USR1 $pid + wait $pid || error "multiop failed" +} +run_test 210 "lfs getstripe does not break leases" + test_212() { size=`date +%s` size=$((size % 8192 + 1)) @@ -16838,6 +17587,34 @@ test_226b () { } run_test 226b "call path2fid and fid2path on files of all type under remote dir" +test_226c () { + [ $MDSCOUNT -lt 2 ] && skip_env "needs >= 2 MDTs" + [[ $MDS1_VERSION -ge $(version_code 2.13.55) ]] || + skip "Need MDS version at least 2.13.55" + + local submnt=/mnt/submnt + local srcfile=/etc/passwd + local dstfile=$submnt/passwd + local path + local fid + + rm -rf $DIR/$tdir + rm -rf $submnt + $LFS setdirstripe -c -1 -i 1 $DIR/$tdir || + error "create remote directory failed" + mkdir -p $submnt || error "create $submnt failed" + $MOUNT_CMD $MGSNID:/$FSNAME/$tdir $submnt || + error "mount $submnt failed" + stack_trap "umount $submnt" EXIT + + cp $srcfile $dstfile + fid=$($LFS path2fid $dstfile) + path=$($LFS fid2path $submnt "$fid") + [ "$path" = "$dstfile" ] || + error "fid2path $submnt $fid failed ($path != $dstfile)" +} +run_test 226c "call path2fid and fid2path under remote dir with subdir mount" + # LU-1299 Executing or running ldd on a truncated executable does not # cause an out-of-memory condition. test_227() { @@ -17535,7 +18312,7 @@ run_test 230i "lfs migrate -m tolerates trailing slashes" test_230j() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" - [ $MDS1_VERSION -lt $(version_code 2.11.52) ] && + [ $MDS1_VERSION -lt $(version_code 2.13.52) ] && skip "Need MDS version at least 2.11.52" $LFS mkdir -m 0 -c 1 $DIR/$tdir || error "mkdir $tdir failed" @@ -17543,7 +18320,7 @@ test_230j() { error "create $tfile failed" cat /etc/passwd > $DIR/$tdir/$tfile - $LFS migrate -m 1 $DIR/$tdir + $LFS migrate -m 1 $DIR/$tdir || error "migrate failed" cmp /etc/passwd $DIR/$tdir/$tfile || error "DoM file mismatch after migration" @@ -17682,6 +18459,200 @@ test_230n() { } run_test 230n "Dir migration with mirrored file" +test_230o() { + [ $MDSCOUNT -ge 2 ] || skip "needs >= 2 MDTs" + [ $MDS1_VERSION -ge $(version_code 2.13.52) ] || + skip "Need MDS version at least 2.13.52" + + local mdts=$(comma_list $(mdts_nodes)) + local timeout=100 + + local restripe_status + local delta + local i + local j + + [[ $(facet_fstype mds1) == zfs ]] && timeout=300 + + # in case "crush" hash type is not set + do_nodes $mdts "$LCTL set_param lod.*.mdt_hash=crush" + + restripe_status=$(do_facet mds1 $LCTL get_param -n \ + mdt.*MDT0000.enable_dir_restripe) + do_nodes $mdts "$LCTL set_param mdt.*.enable_dir_restripe=1" + stack_trap "do_nodes $mdts $LCTL set_param \ + mdt.*.enable_dir_restripe=$restripe_status" + + mkdir $DIR/$tdir + createmany -m $DIR/$tdir/f 100 || + error "create files under remote dir failed $i" + createmany -d $DIR/$tdir/d 100 || + error "create dirs under remote dir failed $i" + + for i in $(seq 2 $MDSCOUNT); do + do_nodes $mdts "$LCTL set_param mdt.*.md_stats=clear > /dev/null" + $LFS setdirstripe -c $i $DIR/$tdir || + error "split -c $i $tdir failed" + wait_update $HOSTNAME \ + "$LFS getdirstripe -H $DIR/$tdir" "crush" $timeout || + error "dir split not finished" + delta=$(do_nodes $mdts "lctl get_param -n mdt.*MDT*.md_stats" | + awk '/migrate/ {sum += $2} END { print sum }') + echo "$delta files migrated when dir split from $((i - 1)) to $i stripes" + # delta is around total_files/stripe_count + [ $delta -lt $((200 /(i - 1))) ] || + error "$delta files migrated" + done +} +run_test 230o "dir split" + +test_230p() { + [ $MDSCOUNT -ge 2 ] || skip "needs >= 2 MDTs" + [ $MDS1_VERSION -ge $(version_code 2.13.52) ] || + skip "Need MDS version at least 2.13.52" + + local mdts=$(comma_list $(mdts_nodes)) + local timeout=100 + + local restripe_status + local delta + local i + local j + + [[ $(facet_fstype mds1) == zfs ]] && timeout=300 + + do_nodes $mdts "$LCTL set_param lod.*.mdt_hash=crush" + + restripe_status=$(do_facet mds1 $LCTL get_param -n \ + mdt.*MDT0000.enable_dir_restripe) + do_nodes $mdts "$LCTL set_param mdt.*.enable_dir_restripe=1" + stack_trap "do_nodes $mdts $LCTL set_param \ + mdt.*.enable_dir_restripe=$restripe_status" + + test_mkdir -c $MDSCOUNT -H crush $DIR/$tdir + createmany -m $DIR/$tdir/f 100 || + error "create files under remote dir failed $i" + createmany -d $DIR/$tdir/d 100 || + error "create dirs under remote dir failed $i" + + for i in $(seq $((MDSCOUNT - 1)) -1 1); do + local mdt_hash="crush" + + do_nodes $mdts "$LCTL set_param mdt.*.md_stats=clear > /dev/null" + $LFS setdirstripe -c $i $DIR/$tdir || + error "split -c $i $tdir failed" + [ $i -eq 1 ] && mdt_hash="none" + wait_update $HOSTNAME \ + "$LFS getdirstripe -H $DIR/$tdir" $mdt_hash $timeout || + error "dir merge not finished" + delta=$(do_nodes $mdts "lctl get_param -n mdt.*MDT*.md_stats" | + awk '/migrate/ {sum += $2} END { print sum }') + echo "$delta files migrated when dir merge from $((i + 1)) to $i stripes" + # delta is around total_files/stripe_count + [ $delta -lt $((200 / i)) ] || + error "$delta files migrated" + done +} +run_test 230p "dir merge" + +test_230q() { + [ $MDSCOUNT -ge 2 ] || skip "needs >= 2 MDTs" + [ $MDS1_VERSION -ge $(version_code 2.13.52) ] || + skip "Need MDS version at least 2.13.52" + + local mdts=$(comma_list $(mdts_nodes)) + local saved_threshold=$(do_facet mds1 \ + $LCTL get_param -n mdt.*-MDT0000.dir_split_count) + local saved_delta=$(do_facet mds1 \ + $LCTL get_param -n mdt.*-MDT0000.dir_split_delta) + local threshold=100 + local delta=2 + local total=0 + local stripe_count=0 + local stripe_index + local nr_files + + # test with fewer files on ZFS + [ "$mds1_FSTYPE" == "zfs" ] && threshold=40 + + stack_trap "do_nodes $mdts $LCTL set_param \ + mdt.*.dir_split_count=$saved_threshold" + stack_trap "do_nodes $mdts $LCTL set_param \ + mdt.*.dir_split_delta=$saved_delta" + stack_trap "do_nodes $mdts $LCTL set_param mdt.*.dir_restripe_nsonly=1" + do_nodes $mdts "$LCTL set_param mdt.*.enable_dir_auto_split=1" + do_nodes $mdts "$LCTL set_param mdt.*.dir_split_count=$threshold" + do_nodes $mdts "$LCTL set_param mdt.*.dir_split_delta=$delta" + do_nodes $mdts "$LCTL set_param mdt.*.dir_restripe_nsonly=0" + do_nodes $mdts "$LCTL set_param lod.*.mdt_hash=crush" + + $LFS mkdir -i -1 -c 1 $DIR/$tdir || error "mkdir $tdir failed" + stripe_index=$($LFS getdirstripe -i $DIR/$tdir) + + while [ $stripe_count -lt $MDSCOUNT ]; do + createmany -m $DIR/$tdir/f $total $((threshold * 3 / 2)) || + error "create sub files failed" + stat $DIR/$tdir > /dev/null + total=$((total + threshold * 3 / 2)) + stripe_count=$((stripe_count + delta)) + [ $stripe_count -gt $MDSCOUNT ] && stripe_count=$MDSCOUNT + + wait_update $HOSTNAME \ + "$LFS getdirstripe -c $DIR/$tdir" "$stripe_count" 40 || + error "stripe count $($LFS getdirstripe -c $DIR/$tdir) != $stripe_count" + + wait_update $HOSTNAME \ + "$LFS getdirstripe -H $DIR/$tdir" "crush" 200 || + error "stripe hash $($LFS getdirstripe -H $DIR/$tdir) != crush" + + nr_files=$($LFS getstripe -m $DIR/$tdir/* | + grep -w $stripe_index | wc -l) + echo "$nr_files files on MDT$stripe_index after split" + [ $nr_files -lt $((total / (stripe_count - 1))) ] || + error "$nr_files files on MDT$stripe_index after split" + + nr_files=$(ls $DIR/$tdir | wc -w) + [ $nr_files -eq $total ] || + error "total sub files $nr_files != $total" + done +} +run_test 230q "dir auto split" + +test_230r() { + [[ $PARALLEL != "yes" ]] || skip "skip parallel run" + [[ $MDSCOUNT -ge 2 ]] || skip_env "needs >= 2 MDTs" + [[ $MDS1_VERSION -ge $(version_code 2.13.54) ]] || + skip "Need MDS version at least 2.13.54" + + # maximum amount of local locks: + # parent striped dir - 2 locks + # new stripe in parent to migrate to - 1 lock + # source and target - 2 locks + # Total 5 locks for regular file + mkdir -p $DIR/$tdir + $LFS mkdir -i1 -c2 $DIR/$tdir/dir1 + touch $DIR/$tdir/dir1/eee + + # create 4 hardlink for 4 more locks + # Total: 9 locks > RS_MAX_LOCKS (8) + $LFS mkdir -i1 -c1 $DIR/$tdir/dir2 + $LFS mkdir -i1 -c1 $DIR/$tdir/dir3 + $LFS mkdir -i1 -c1 $DIR/$tdir/dir4 + $LFS mkdir -i1 -c1 $DIR/$tdir/dir5 + ln $DIR/$tdir/dir1/eee $DIR/$tdir/dir2/eee + ln $DIR/$tdir/dir1/eee $DIR/$tdir/dir3/eee + ln $DIR/$tdir/dir1/eee $DIR/$tdir/dir4/eee + ln $DIR/$tdir/dir1/eee $DIR/$tdir/dir5/eee + + cancel_lru_locks mdc + + $LFS migrate -m1 -c1 $DIR/$tdir/dir1 || + error "migrate dir fails" + + rm -rf $DIR/$tdir || error "rm dir failed after migration" +} +run_test 230r "migrate with too many local locks" + test_231a() { # For simplicity this test assumes that max_pages_per_rpc @@ -18143,7 +19114,7 @@ test_247c() { $LFS fid2path $submount $fid && error "fid2path should fail" cleanup_247 $submount } -run_test 247c "running fid2path outside root" +run_test 247c "running fid2path outside subdirectory root" test_247d() { lctl get_param -n mdc.$FSNAME-MDT0000*.import | grep -q subtree || @@ -18156,11 +19127,28 @@ test_247d() { FILESET="$FILESET/$tdir" mount_client $submount || error "mount $submount failed" trap "cleanup_247 $submount" EXIT - local fid=$($LFS path2fid $submount/dir1) - $LFS fid2path $submount $fid || error "fid2path should succeed" + + local td=$submount/dir1 + local fid=$($LFS path2fid $td) + [ -z "$fid" ] && error "path2fid unable to get $td FID" + + # check that we get the same pathname back + local rootpath + local found + for rootpath in "$submount" "$submount///" "$submount/dir1"; do + echo "$rootpath $fid" + found=$($LFS fid2path $rootpath "$fid") + [ -n "found" ] || error "fid2path should succeed" + [ "$found" == "$td" ] || error "fid2path $found != $td" + done + # check wrong root path format + rootpath=$submount"_wrong" + found=$($LFS fid2path $rootpath "$fid") + [ -z "$found" ] || error "fid2path should fail ($rootpath != $submount)" + cleanup_247 $submount } -run_test 247d "running fid2path inside root" +run_test 247d "running fid2path inside subdirectory root" # LU-8037 test_247e() { @@ -18178,6 +19166,38 @@ test_247e() { } run_test 247e "mount .. as fileset" +test_247f() { + [ $MDSCOUNT -lt 2 ] && skip_env "needs >= 2 MDTs" + [ $MDS1_VERSION -lt $(version_code 2.13.52) ] && + skip "Need at least version 2.13.52" + lctl get_param -n mdc.$FSNAME-MDT0000*.import | + grep -q subtree || + skip "Fileset feature is not supported" + + mkdir $DIR/$tdir || error "mkdir $tdir failed" + $LFS mkdir -i $((MDSCOUNT - 1)) $DIR/$tdir/remote || + error "mkdir remote failed" + mkdir $DIR/$tdir/remote/subdir || error "mkdir remote/subdir failed" + $LFS mkdir -c $MDSCOUNT $DIR/$tdir/striped || + error "mkdir striped failed" + mkdir $DIR/$tdir/striped/subdir || error "mkdir striped/subdir failed" + + local submount=${MOUNT}_$tdir + + mkdir -p $submount || error "mkdir $submount failed" + + local dir + local fileset=$FILESET + + for dir in $tdir/remote $tdir/remote/subdir \ + $tdir/striped $tdir/striped/subdir $tdir/striped/. ; do + FILESET="$fileset/$dir" mount_client $submount || + error "mount $dir failed" + umount_client $submount + done +} +run_test 247f "mount striped or remote directory as fileset" + test_248a() { local fast_read_sav=$($LCTL get_param -n llite.*.fast_read 2>/dev/null) [ -z "$fast_read_sav" ] && skip "no fast read support" @@ -18619,7 +19639,7 @@ ladvise_willread_performance() return 0 lowest_speedup=$(bc <<<"scale=2; $average_cache / 2") - [ ${average_ladvise%.*} -gt $lowest_speedup ] || + [[ ${average_ladvise%.*} > $lowest_speedup ]] || error_not_in_vm "Speedup with willread is less than " \ "$lowest_speedup%, got $average_ladvise%" } @@ -19397,6 +20417,36 @@ test_270g() { } run_test 270g "DoM: default DoM stripe size depends on free space" +test_270h() { + [[ $MDS1_VERSION -ge $(version_code 2.13.53) ]] || + skip "Need MDS version at least 2.13.53" + + local mdtname=${FSNAME}-MDT0000-mdtlov + local dom=$DIR/$tdir/$tfile + local save="$TMP/$TESTSUITE-$TESTNAME.parameters" + + save_lustre_params mds1 "lod.*.dom_stripesize" > $save + stack_trap "restore_lustre_params < $save; rm -f $save" EXIT + + $LFS mkdir -i 0 -c 1 $DIR/$tdir + $LFS setstripe -E 1M -c1 -E -1 -c2 ${dom}_1 || + error "can't create OST file" + # mirrored file with DOM entry in the second mirror + $LFS mirror extend -N -E 1M -L mdt -E eof -c2 ${dom}_1 || + error "can't create mirror with DoM component" + + do_facet mds1 $LCTL set_param -n lod.$mdtname.dom_stripesize=0 + + # DOM component in the middle and has other enries in the same mirror, + # should succeed but lost DoM component + $LFS setstripe --copy=${dom}_1 $dom || + error "Can't create file from OST|DOM mirror layout" + # check new file has no DoM layout after all + [[ $($LFS getstripe -L $dom) != "mdt" ]] || + error "File has DoM component while DoM is disabled" +} +run_test 270h "DoM: DoM stripe removal when disabled on server" + test_271a() { [ $MDS1_VERSION -lt $(version_code 2.10.55) ] && skip "Need MDS version at least 2.10.55" @@ -20727,6 +21777,56 @@ test_300r() { } run_test 300r "test -1 striped directory" +test_300s_helper() { + local count=$1 + + local stripe_dir=$DIR/$tdir/striped_dir.$count + + $LFS mkdir -c $count $stripe_dir || + error "lfs mkdir -c error" + + $LFS getdirstripe $stripe_dir || + error "lfs getdirstripe fails" + + local stripe_count + stripe_count=$($LFS getdirstripe $stripe_dir | + awk '/lmv_stripe_count:/ { print $2 }') + + [ $count -ne $stripe_count ] && + error_noexit "bad stripe count $stripe_count expected $count" + + local dupe_stripes + dupe_stripes=$($LFS getdirstripe $stripe_dir | + awk '/0x/ {count[$1] += 1}; END { + for (idx in count) { + if (count[idx]>1) { + print "index " idx " count " count[idx] + } + } + }') + + if [[ -n "$dupe_stripes" ]] ; then + lfs getdirstripe $stripe_dir + error_noexit "Dupe MDT above: $dupe_stripes " + fi + + rm -rf $stripe_dir || + error_noexit "unlink $stripe_dir fails" +} + +test_300s() { + [ $MDS1_VERSION -lt $(version_code 2.7.55) ] && + skip "Need MDS version at least 2.7.55" && return + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + + mkdir $DIR/$tdir + for count in $(seq 2 $MDSCOUNT); do + test_300s_helper $count + done +} +run_test 300s "test lfs mkdir -c without -i" + + prepare_remote_file() { mkdir $DIR/$tdir/src_dir || error "create remote source failed" @@ -21254,11 +22354,44 @@ test_398c() { # LU-4198 --filename=$DIR/$tfile [ $? -eq 0 ] || error "fio mixed read write error" + echo "AIO with large block size ${size}M" + fio --name=rand-rw --rw=randrw --bs=${size}M --direct=1 \ + --numjobs=1 --fallocate=none --ioengine=libaio \ + --iodepth=16 --allow_file_create=0 --size=${size}M \ + --filename=$DIR/$tfile + [ $? -eq 0 ] || error "fio large block size failed" + rm -rf $DIR/$tfile $LCTL set_param debug="$saved_debug" } run_test 398c "run fio to test AIO" +test_398d() { # LU-13846 + test -f aiocp || skip_env "no aiocp installed" + local aio_file=$DIR/aio_file + + $LFS setstripe -c -1 -S 1M $DIR/$tfile $aio_file + + dd if=/dev/urandom of=$DIR/$tfile bs=1M count=64 + aiocp -a $PAGE_SIZE -b 64M -s 64M -f O_DIRECT $DIR/$tfile $aio_file + + diff $DIR/$tfile $aio_file || "file diff after aiocp" + + # make sure we don't crash and fail properly + aiocp -a 512 -b 64M -s 64M -f O_DIRECT $DIR/$tfile $aio_file && + error "aio not aligned with PAGE SIZE should fail" + + rm -rf $DIR/$tfile $aio_file +} +run_test 398d "run aiocp to verify block size > stripe size" + +test_398e() { + dd if=/dev/zero of=$DIR/$tfile bs=1234 count=1 + touch $DIR/$tfile.new + dd if=$DIR/$tfile of=$DIR/$tfile.new bs=1M count=1 oflag=direct +} +run_test 398e "O_Direct open cleared by fcntl doesn't cause hang" + test_fake_rw() { local read_write=$1 if [ "$read_write" = "write" ]; then @@ -21281,7 +22414,7 @@ test_fake_rw() { [ $blocks -gt 1000 ] && blocks=1000 # 1G in maximum if [ "$read_write" = "read" ]; then - truncate -s $(expr 1048576 \* $blocks) $DIR/$tfile + $TRUNCATE $DIR/$tfile $(expr 1048576 \* $blocks) fi local start_time=$(date +%s.%N) @@ -21401,6 +22534,7 @@ test_401a() { #LU-7437 #count the number of parameters by "list_param -R" local params=$($LCTL list_param -R '*' 2>/dev/null | wc -l) #count the number of parameters by listing proc files + local proc_regexp="/{proc,sys}/{fs,sys,kernel/debug}/{lustre,lnet}/" local proc_dirs=$(eval \ls -d $proc_regexp 2>/dev/null) echo "proc_dirs='$proc_dirs'" [ -n "$proc_dirs" ] || error "no proc_dirs on $HOSTNAME" @@ -21422,70 +22556,92 @@ test_401a() { #LU-7437 run_test 401a "Verify if 'lctl list_param -R' can list parameters recursively" test_401b() { - local save=$($LCTL get_param -n jobid_var) - local tmp=testing + # jobid_var may not allow arbitrary values, so use jobid_name + # if available + if $LCTL list_param jobid_name > /dev/null 2>&1; then + local testname=jobid_name tmp='testing%p' + else + local testname=jobid_var tmp=testing + fi - $LCTL set_param foo=bar jobid_var=$tmp bar=baz && + local save=$($LCTL get_param -n $testname) + + $LCTL set_param foo=bar $testname=$tmp bar=baz && error "no error returned when setting bad parameters" - local jobid_new=$($LCTL get_param -n foe jobid_var baz) + local jobid_new=$($LCTL get_param -n foe $testname baz) [[ "$jobid_new" == "$tmp" ]] || error "jobid tmp $jobid_new != $tmp" - $LCTL set_param -n fog=bam jobid_var=$save bat=fog - local jobid_old=$($LCTL get_param -n foe jobid_var bag) + $LCTL set_param -n fog=bam $testname=$save bat=fog + local jobid_old=$($LCTL get_param -n foe $testname bag) [[ "$jobid_old" == "$save" ]] || error "jobid new $jobid_old != $save" } run_test 401b "Verify 'lctl {get,set}_param' continue after error" test_401c() { - local jobid_var_old=$($LCTL get_param -n jobid_var) + # jobid_var may not allow arbitrary values, so use jobid_name + # if available + if $LCTL list_param jobid_name > /dev/null 2>&1; then + local testname=jobid_name + else + local testname=jobid_var + fi + + local jobid_var_old=$($LCTL get_param -n $testname) local jobid_var_new - $LCTL set_param jobid_var= && + $LCTL set_param $testname= && error "no error returned for 'set_param a='" - jobid_var_new=$($LCTL get_param -n jobid_var) + jobid_var_new=$($LCTL get_param -n $testname) [[ "$jobid_var_old" == "$jobid_var_new" ]] || - error "jobid_var was changed by setting without value" + error "$testname was changed by setting without value" - $LCTL set_param jobid_var && + $LCTL set_param $testname && error "no error returned for 'set_param a'" - jobid_var_new=$($LCTL get_param -n jobid_var) + jobid_var_new=$($LCTL get_param -n $testname) [[ "$jobid_var_old" == "$jobid_var_new" ]] || - error "jobid_var was changed by setting without value" + error "$testname was changed by setting without value" } run_test 401c "Verify 'lctl set_param' without value fails in either format." test_401d() { - local jobid_var_old=$($LCTL get_param -n jobid_var) + # jobid_var may not allow arbitrary values, so use jobid_name + # if available + if $LCTL list_param jobid_name > /dev/null 2>&1; then + local testname=jobid_name new_value='foo=bar%p' + else + local testname=jobid_var new_valuie=foo=bar + fi + + local jobid_var_old=$($LCTL get_param -n $testname) local jobid_var_new - local new_value="foo=bar" - $LCTL set_param jobid_var=$new_value || + $LCTL set_param $testname=$new_value || error "'set_param a=b' did not accept a value containing '='" - jobid_var_new=$($LCTL get_param -n jobid_var) + jobid_var_new=$($LCTL get_param -n $testname) [[ "$jobid_var_new" == "$new_value" ]] || error "'set_param a=b' failed on a value containing '='" - # Reset the jobid_var to test the other format - $LCTL set_param jobid_var=$jobid_var_old - jobid_var_new=$($LCTL get_param -n jobid_var) + # Reset the $testname to test the other format + $LCTL set_param $testname=$jobid_var_old + jobid_var_new=$($LCTL get_param -n $testname) [[ "$jobid_var_new" == "$jobid_var_old" ]] || - error "failed to reset jobid_var" + error "failed to reset $testname" - $LCTL set_param jobid_var $new_value || + $LCTL set_param $testname $new_value || error "'set_param a b' did not accept a value containing '='" - jobid_var_new=$($LCTL get_param -n jobid_var) + jobid_var_new=$($LCTL get_param -n $testname) [[ "$jobid_var_new" == "$new_value" ]] || error "'set_param a b' failed on a value containing '='" - $LCTL set_param jobid_var $jobid_var_old - jobid_var_new=$($LCTL get_param -n jobid_var) + $LCTL set_param $testname $jobid_var_old + jobid_var_new=$($LCTL get_param -n $testname) [[ "$jobid_var_new" == "$jobid_var_old" ]] || - error "failed to reset jobid_var" + error "failed to reset $testname" } run_test 401d "Verify 'lctl set_param' accepts values containing '='" @@ -21572,6 +22728,7 @@ test_405() { skip "Layout swap lock is not supported" check_swap_layouts_support + check_swap_layout_no_dom $DIR test_mkdir $DIR/$tdir swap_lock_test -d $DIR/$tdir || @@ -21722,6 +22879,8 @@ test_410() { [[ $CLIENT_VERSION -lt $(version_code 2.9.59) ]] && skip "Need client version at least 2.9.59" + [ -f $LUSTRE/tests/kernel/kinode.ko ] || + skip "Need MODULES build" # Create a file, and stat it from the kernel local testfile=$DIR/$tfile @@ -22561,6 +23720,283 @@ test_423() { } run_test 423 "statfs should return a right data" +test_424() { +#define OBD_FAIL_PTLRPC_BULK_REPLY_ATTACH 0x522 | OBD_FAIL_ONCE + $LCTL set_param fail_loc=0x80000522 + dd if=/dev/zero of=$DIR/$tfile bs=2M count=1 oflag=sync + rm -f $DIR/$tfile +} +run_test 424 "simulate ENOMEM in ptl_send_rpc bulk reply ME attach" + +test_425() { + test_mkdir -c -1 $DIR/$tdir + $LFS setstripe -c -1 $DIR/$tdir + + lru_resize_disable "" 100 + stack_trap "lru_resize_enable" EXIT + + sleep 5 + + for i in $(seq $((MDSCOUNT * 125))); do + local t=$DIR/$tdir/$tfile_$i + + dd if=/dev/zero of=$t bs=4K count=1 > /dev/null 2>&1 || + error_noexit "Create file $t" + done + stack_trap "rm -rf $DIR/$tdir" EXIT + + for oscparam in $($LCTL list_param ldlm.namespaces.*osc-[-0-9a-f]*); do + local lru_size=$($LCTL get_param -n $oscparam.lru_size) + local lock_count=$($LCTL get_param -n $oscparam.lock_count) + + [ $lock_count -le $lru_size ] || + error "osc lock count $lock_count > lru size $lru_size" + done + + for mdcparam in $($LCTL list_param ldlm.namespaces.*mdc-*); do + local lru_size=$($LCTL get_param -n $mdcparam.lru_size) + local lock_count=$($LCTL get_param -n $mdcparam.lock_count) + + [ $lock_count -le $lru_size ] || + error "mdc lock count $lock_count > lru size $lru_size" + done +} +run_test 425 "lock count should not exceed lru size" + +test_426() { + splice-test -r $DIR/$tfile + splice-test -rd $DIR/$tfile + splice-test $DIR/$tfile + splice-test -d $DIR/$tfile +} +run_test 426 "splice test on Lustre" + +lseek_test_430() { + local offset + local file=$1 + + # data at [200K, 400K) + dd if=/dev/urandom of=$file bs=256K count=1 seek=1 || + error "256K->512K dd fails" + # data at [2M, 3M) + dd if=/dev/urandom of=$file bs=1M count=1 seek=2 || + error "2M->3M dd fails" + # data at [4M, 5M) + dd if=/dev/urandom of=$file bs=1M count=1 seek=4 || + error "4M->5M dd fails" + echo "Data at 256K...512K, 2M...3M and 4M...5M" + # start at first component hole #1 + printf "Seeking hole from 1000 ... " + offset=$(lseek_test -l 1000 $file) + echo $offset + [[ $offset == 1000 ]] || error "offset $offset != 1000" + printf "Seeking data from 1000 ... " + offset=$(lseek_test -d 1000 $file) + echo $offset + [[ $offset == 262144 ]] || error "offset $offset != 262144" + + # start at first component data block + printf "Seeking hole from 300000 ... " + offset=$(lseek_test -l 300000 $file) + echo $offset + [[ $offset == 524288 ]] || error "offset $offset != 524288" + printf "Seeking data from 300000 ... " + offset=$(lseek_test -d 300000 $file) + echo $offset + [[ $offset == 300000 ]] || error "offset $offset != 300000" + + # start at the first component but beyond end of object size + printf "Seeking hole from 1000000 ... " + offset=$(lseek_test -l 1000000 $file) + echo $offset + [[ $offset == 1000000 ]] || error "offset $offset != 1000000" + printf "Seeking data from 1000000 ... " + offset=$(lseek_test -d 1000000 $file) + echo $offset + [[ $offset == 2097152 ]] || error "offset $offset != 2097152" + + # start at second component stripe 2 (empty file) + printf "Seeking hole from 1500000 ... " + offset=$(lseek_test -l 1500000 $file) + echo $offset + [[ $offset == 1500000 ]] || error "offset $offset != 1500000" + printf "Seeking data from 1500000 ... " + offset=$(lseek_test -d 1500000 $file) + echo $offset + [[ $offset == 2097152 ]] || error "offset $offset != 2097152" + + # start at second component stripe 1 (all data) + printf "Seeking hole from 3000000 ... " + offset=$(lseek_test -l 3000000 $file) + echo $offset + [[ $offset == 3145728 ]] || error "offset $offset != 3145728" + printf "Seeking data from 3000000 ... " + offset=$(lseek_test -d 3000000 $file) + echo $offset + [[ $offset == 3000000 ]] || error "offset $offset != 3000000" + + dd if=/dev/urandom of=$file bs=640K count=1 seek=1 || + error "2nd dd fails" + echo "Add data block at 640K...1280K" + + # start at before new data block, in hole + printf "Seeking hole from 600000 ... " + offset=$(lseek_test -l 600000 $file) + echo $offset + [[ $offset == 600000 ]] || error "offset $offset != 600000" + printf "Seeking data from 600000 ... " + offset=$(lseek_test -d 600000 $file) + echo $offset + [[ $offset == 655360 ]] || error "offset $offset != 655360" + + # start at the first component new data block + printf "Seeking hole from 1000000 ... " + offset=$(lseek_test -l 1000000 $file) + echo $offset + [[ $offset == 1310720 ]] || error "offset $offset != 1310720" + printf "Seeking data from 1000000 ... " + offset=$(lseek_test -d 1000000 $file) + echo $offset + [[ $offset == 1000000 ]] || error "offset $offset != 1000000" + + # start at second component stripe 2, new data + printf "Seeking hole from 1200000 ... " + offset=$(lseek_test -l 1200000 $file) + echo $offset + [[ $offset == 1310720 ]] || error "offset $offset != 1310720" + printf "Seeking data from 1200000 ... " + offset=$(lseek_test -d 1200000 $file) + echo $offset + [[ $offset == 1200000 ]] || error "offset $offset != 1200000" + + # start beyond file end + printf "Using offset > filesize ... " + lseek_test -l 4000000 $file && error "lseek should fail" + printf "Using offset > filesize ... " + lseek_test -d 4000000 $file && error "lseek should fail" + + printf "Done\n\n" +} + +test_430a() { + $LCTL get_param mdc.*.import | grep -q 'connect_flags:.*seek' || + skip "MDT does not support SEEK_HOLE" + + $LCTL get_param osc.*.import | grep -q 'connect_flags:.*seek' || + skip "OST does not support SEEK_HOLE" + + local file=$DIR/$tdir/$tfile + + mkdir -p $DIR/$tdir + + $LFS setstripe -E 1M -L mdt -E eof -c2 $file + # OST stripe #1 will have continuous data at [1M, 3M) + # OST stripe #2 is empty + echo "Component #1: 1M DoM, component #2: EOF, 2 stripes 1M" + lseek_test_430 $file + rm $file + $LFS setstripe -E 1M -c2 -S 64K -E 10M -c2 -S 1M $file + echo "Component #1: 1M, 2 stripes 64K, component #2: EOF, 2 stripes 1M" + lseek_test_430 $file + rm $file + $LFS setstripe -c2 -S 512K $file + echo "Two stripes, stripe size 512K" + lseek_test_430 $file + rm $file + # FLR with stale mirror + $LFS setstripe -N -E 512K -c1 -S 64K -E eof -c2 -S 512K \ + -N -c2 -S 1M $file + echo "Mirrored file:" + echo "Component #1: 512K, stripe 64K, component #2: EOF, 2 stripes 512K" + echo "Plain 2 stripes 1M" + lseek_test_430 $file + rm $file +} +run_test 430a "lseek: SEEK_DATA/SEEK_HOLE basic functionality" + +test_430b() { + $LCTL get_param osc.*.import | grep -q 'connect_flags:.*seek' || + skip "OST does not support SEEK_HOLE" + + local offset + local file=$DIR/$tdir/$tfile + + mkdir -p $DIR/$tdir + # Empty layout lseek should fail + $MCREATE $file + # seek from 0 + printf "Seeking hole from 0 ... " + lseek_test -l 0 $file && error "lseek should fail" + printf "Seeking data from 0 ... " + lseek_test -d 0 $file && error "lseek should fail" + rm $file + + # 1M-hole file + $LFS setstripe -E 1M -c2 -E eof $file + $TRUNCATE $file 1048576 + printf "Seeking hole from 1000000 ... " + offset=$(lseek_test -l 1000000 $file) + echo $offset + [[ $offset == 1000000 ]] || error "offset $offset != 1000000" + printf "Seeking data from 1000000 ... " + lseek_test -d 1000000 $file && error "lseek should fail" + # full first component, non-inited second one + dd if=/dev/urandom of=$file bs=1M count=1 + printf "Seeking hole from 1000000 ... " + offset=$(lseek_test -l 1000000 $file) + echo $offset + [[ $offset == 1048576 ]] || error "offset $offset != 1048576" + printf "Seeking hole from 1048576 ... " + lseek_test -l 1048576 $file && error "lseek should fail" + # init second component and truncate back + echo "123" >> $file + $TRUNCATE $file 1048576 + ls -lia $file + printf "Seeking hole from 1000000 ... " + offset=$(lseek_test -l 1000000 $file) + echo $offset + [[ $offset == 1048576 ]] || error "offset $offset != 1048576" + printf "Seeking hole from 1048576 ... " + lseek_test -l 1048576 $file && error "lseek should fail" + # boundary checks for big values + dd if=/dev/urandom of=$file.10g bs=1 count=1 seek=10G + offset=$(lseek_test -d 0 $file.10g) + [[ $offset == 10737418240 ]] || error "offset $offset != 10737418240" + dd if=/dev/urandom of=$file.100g bs=1 count=1 seek=100G + offset=$(lseek_test -d 0 $file.100g) + [[ $offset == 107374182400 ]] || error "offset $offset != 107374182400" + return 0 +} +run_test 430b "lseek: SEEK_DATA/SEEK_HOLE special cases" + +test_430c() { + $LCTL get_param osc.*.import | grep -q 'connect_flags:.*seek' || + skip "OST does not support SEEK_HOLE" + + local file=$DIR/$tdir/$tfile + local start + + mkdir -p $DIR/$tdir + dd if=/dev/urandom of=$file bs=1k count=1 seek=5M + + # cp version 8.33+ prefers lseek over fiemap + if [[ $(cp --version | head -n1 | sed "s/[^0-9]//g") -ge 833 ]]; then + start=$SECONDS + time cp $file /dev/null + (( SECONDS - start < 5 )) || + error "cp: too long runtime $((SECONDS - start))" + + fi + # tar version 1.29+ supports SEEK_HOLE/DATA + if [[ $(tar --version | head -n1 | sed "s/[^0-9]//g") -ge 129 ]]; then + start=$SECONDS + time tar cS $file - | cat > /dev/null + (( SECONDS - start < 5 )) || + error "tar: too long runtime $((SECONDS - start))" + fi +} +run_test 430c "lseek: external tools check" + prep_801() { [[ $MDS1_VERSION -lt $(version_code 2.9.55) ]] || [[ $OST1_VERSION -lt $(version_code 2.9.55) ]] && @@ -22869,7 +24305,7 @@ test_802b() { } run_test 802b "be able to set MDTs to readonly" -test_803() { +test_803a() { [[ $MDSCOUNT -lt 2 ]] && skip_env "needs >= 2 MDTs" [ $MDS1_VERSION -lt $(version_code 2.10.54) ] && skip "MDS needs to be newer than 2.10.54" @@ -22917,7 +24353,39 @@ test_803() { [ $after_used -le $((before_used + 1)) ] || error "after ($after_used) > before ($before_used) + 1" } -run_test 803 "verify agent object for remote object" +run_test 803a "verify agent object for remote object" + +test_803b() { + [[ $MDSCOUNT -lt 2 ]] && skip_env "needs >= 2 MDTs" + [ $MDS1_VERSION -lt $(version_code 2.13.56) ] && + skip "MDS needs to be newer than 2.13.56" + [ $PARALLEL == "yes" ] && skip "skip parallel run" + + for i in $(seq 0 $((MDSCOUNT - 1))); do + $LFS mkdir -i $i $DIR/$tdir.$i || error "mkdir $tdir.$i" + done + + local before=0 + local after=0 + + local tmp + + stat $DIR/$tdir.* >/dev/null || error "stat $tdir.*" + for i in $(seq 0 $((MDSCOUNT - 1))); do + tmp=$(do_facet mds$i $LCTL get_param mdt.*-MDT000$i.md_stats | + awk '/getattr/ { print $2 }') + before=$((before + tmp)) + done + stat $DIR/$tdir.* >/dev/null || error "stat $tdir.*" + for i in $(seq 0 $((MDSCOUNT - 1))); do + tmp=$(do_facet mds$i $LCTL get_param mdt.*-MDT000$i.md_stats | + awk '/getattr/ { print $2 }') + after=$((after + tmp)) + done + + [ $before -eq $after ] || error "getattr count $before != $after" +} +run_test 803b "remote object can getattr from cache" test_804() { [[ $MDSCOUNT -lt 2 ]] && skip_env "needs >= 2 MDTs" @@ -23316,8 +24784,6 @@ run_test 810 "partial page writes on ZFS (LU-11663)" test_812a() { [ $OST1_VERSION -lt $(version_code 2.12.51) ] && skip "OST < 2.12.51 doesn't support this fail_loc" - [ "$SHARED_KEY" = true ] && - skip "OSC connections never go IDLE with Shared-Keys enabled" $LFS setstripe -c 1 -i 0 $DIR/$tfile # ensure ost1 is connected @@ -23339,8 +24805,6 @@ run_test 812a "do not drop reqs generated when imp is going to idle (LU-11951)" test_812b() { # LU-12378 [ $OST1_VERSION -lt $(version_code 2.12.51) ] && skip "OST < 2.12.51 doesn't support this fail_loc" - [ "$SHARED_KEY" = true ] && - skip "OSC connections never go IDLE with Shared-Keys enabled" $LFS setstripe -c 1 -i 0 $DIR/$tfile || error "setstripe failed" # ensure ost1 is connected @@ -23548,9 +25012,6 @@ test_815() run_test 815 "zero byte tiny write doesn't hang (LU-12382)" test_816() { - [ "$SHARED_KEY" = true ] && - skip "OSC connections never go IDLE with Shared-Keys enabled" - $LFS setstripe -c 1 -i 0 $DIR/$tfile # ensure ost1 is connected stat $DIR/$tfile >/dev/null || error "can't stat" @@ -23659,8 +25120,18 @@ test_820() { # open intent should update default EA size # see mdc_update_max_ea_from_body() # notice this is the very first RPC to MDS2 - cp /etc/services $DIR/$tdir/mds2 || - error "Failed to copy files to mds$n" + out=$(cp /etc/services $DIR/$tdir/mds2 2>&1) + ret=$? + echo $out + # With SSK, this situation can lead to -EPERM being returned. + # In that case, simply retry. + if [ $ret -ne 0 ] && $SHARED_KEY; then + if echo "$out" | grep -q "not permitted"; then + cp /etc/services $DIR/$tdir/mds2 + ret=$? + fi + fi + [ $ret -eq 0 ] || error "Failed to copy files to mds$n" } run_test 820 "update max EA from open intent"