X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fsanity.sh;h=2338abcb8c87b0e7e555108fc52ff4fe8f2f5893;hp=1ece84ed0e313b4ef34b50df2509b945a7a13ea1;hb=f276f1cb0859e8718448e69bd99ee305f5e62d42;hpb=c4ff4aef7eb939d536acffaac4465039f3cfa935 diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 1ece84e..2338abc 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -28,7 +28,6 @@ ACCEPTOR_PORT=${ACCEPTOR_PORT:-988} DEF_STRIPE_COUNT=-1 CHECK_GRANT=${CHECK_GRANT:-"yes"} GRANT_CHECK_LIST=${GRANT_CHECK_LIST:-""} -export PARALLEL=${PARALLEL:-"no"} TRACE=${TRACE:-""} LUSTRE=${LUSTRE:-$(dirname $0)/..} @@ -52,7 +51,6 @@ fi # skip the grant tests for ARM until they are fixed if [[ $(uname -m) = aarch64 ]]; then always_except LU-11671 45 - always_except LU-14067 400a 400b fi # skip nfs tests on kernels >= 4.12.0 until they are fixed @@ -2895,6 +2893,8 @@ test_27K() { $DIR/$tdir/${tdir}2 || error "$DIR/$tdir/${tdir}2: create failed" + $LFS getdirstripe -v $DIR/$tdir/${tdir}2 + $LFS getdirstripe -v $DIR/$tdir/${tdir}2 | grep "lfm_magic:.*0x0CD50CD0" || error "$DIR/$tdir/${tdir}2: invalid LMV EA magic" @@ -2961,11 +2961,10 @@ test_27M() { remote_mds_nodsh && skip "remote MDS with nodsh" [[ $OSTCOUNT -lt 2 ]] && skip_env "need > 1 OST" - test_mkdir $DIR/$tdir - # Set default striping on directory local setcount=4 local stripe_opt + local mdts=$(comma_list $(mdts_nodes)) # if we run against a 2.12 server which lacks overstring support # then the connect_flag will not report overstriping, even if client @@ -2977,6 +2976,18 @@ test_27M() { else skip "server does not support overstriping" fi + + test_mkdir $DIR/$tdir + + # Validate existing append_* params and ensure restore + local pool=$(do_facet mds1 $LCTL get_param -n mdd.$FSNAME-MDT0000.append_pool) + [[ "$pool" == "" ]] || error "expected append_pool == '', got '$pool'" + stack_trap "do_nodes $mdts $LCTL set_param mdd.*.append_pool=none" + + local orig_count=$(do_facet mds1 $LCTL get_param -n mdd.$FSNAME-MDT0000.append_stripe_count) + ((orig_count == 1)) || error "expected append_stripe_count == 1, got $orig_count" + stack_trap "do_nodes $mdts $LCTL set_param mdd.*.append_stripe_count=1" + $LFS setstripe $stripe_opt $DIR/$tdir echo 1 > $DIR/$tdir/${tfile}.1 @@ -2984,11 +2995,6 @@ test_27M() { [ $count -eq $setcount ] || error "(1) stripe count $count, should be $setcount" - # Capture existing append_stripe_count setting for restore - local orig_count=$(do_facet mds1 $LCTL get_param -n mdd.$FSNAME-MDT0000.append_stripe_count) - local mdts=$(comma_list $(mdts_nodes)) - stack_trap "do_nodes $mdts $LCTL set_param mdd.*.append_stripe_count=$orig_count" EXIT - local appendcount=$orig_count echo 1 >> $DIR/$tdir/${tfile}.2_append count=$($LFS getstripe -c $DIR/$tdir/${tfile}.2_append) @@ -3066,12 +3072,9 @@ test_27M() { error "(9) stripe count $count, should be $appendcount for append" # Now test O_APPEND striping with pools - do_nodes $mdts $LCTL set_param mdd.*.append_pool="$TESTNAME" - stack_trap "do_nodes $mdts $LCTL set_param mdd.*.append_pool='none'" EXIT - - # Create the pool pool_add $TESTNAME || error "pool creation failed" pool_add_targets $TESTNAME 0 1 || error "Pool add targets failed" + do_nodes $mdts $LCTL set_param mdd.*.append_pool="$TESTNAME" echo 1 >> $DIR/$tdir/${tfile}.10_append @@ -3236,6 +3239,8 @@ test_27P() { --flags=0xda05 --mode 0750 $DIR/$tdir/${tdir} || error "$DIR/$tdir/${tdir}: create failed" + $LFS getdirstripe -v $DIR/$tdir/${tdir} + $LFS getdirstripe -v $DIR/$tdir/${tdir} | grep "lfm_magic:.*0x0CD50CD0" || error "$DIR/$tdir/${tdir}: invalid LMV EA magic" @@ -3373,31 +3378,6 @@ test_27R() { } run_test 27R "test max_stripecount limitation when stripe count is set to -1" -test_27S() { - (( $MDS1_VERSION >= $(version_code 2.14.54) )) || - skip "Need MDS version at least 2.14.54" - [[ "$(facet_host mds1)" != "$(facet_host ost1)" ]] || - skip "needs different host for mdt1 ost1" - - local count=$(precreated_ost_obj_count 0 0) - - echo "precreate count $count" - mkdir_on_mdt0 $DIR/$tdir || error "mkdir $tdir failed" - $LFS setstripe -i 0 -c 1 $DIR/$tdir || error "setstripe $tdir failed" - #define OBD_FAIL_OSP_GET_LAST_FID 0x2109 - do_facet mds1 $LCTL set_param fail_loc=0x2109 - #define OBD_FAIL_OST_GET_LAST_FID 0x252 - do_facet ost1 $LCTL set_param fail_loc=0x252 - createmany -o $DIR/$tdir/f $count & - pid=$! - echo "precreate count $(precreated_ost_obj_count 0 0)" - do_facet mds1 $LCTL set_param fail_loc=0 - do_facet ost1 $LCTL set_param fail_loc=0 - wait $pid || error "createmany failed" - echo "precreate count $(precreated_ost_obj_count 0 0)" -} -run_test 27S "don't deactivate OSP on network issue" - test_27T() { [ $(facet_host client) == $(facet_host ost1) ] && skip "need ost1 and client on different nodes" @@ -3412,6 +3392,66 @@ test_27T() { } run_test 27T "no eio on close on partial write due to enosp" +test_27U() { + local dir=$DIR/$tdir + local file=$dir/$tfile + local append_pool=${TESTNAME}-append + local normal_pool=${TESTNAME}-normal + local pool + local stripe_count + local stripe_count2 + local mdts=$(comma_list $(mdts_nodes)) + + # FIMXE + # (( $MDS1_VERSION >= $(version_code 2.15.42) )) || + # skip "Need MDS version at least 2.15.42" + + # Validate existing append_* params and ensure restore + pool=$(do_facet mds1 $LCTL get_param -n mdd.$FSNAME-MDT0000.append_pool) + [[ "$pool" == "" ]] || error "expected append_pool == '', got '$pool'" + stack_trap "do_nodes $mdts $LCTL set_param mdd.*.append_pool=none" + + stripe_count=$(do_facet mds1 $LCTL get_param -n mdd.$FSNAME-MDT0000.append_stripe_count) + ((stripe_count == 1)) || error "expected append_stripe_count != 0, got $stripe_count" + stack_trap "do_nodes $mdts $LCTL set_param mdd.*.append_stripe_count=$stripe_count" + + pool_add $append_pool || error "pool creation failed" + pool_add_targets $append_pool 0 1 || error "Pool add targets failed" + + pool_add $normal_pool || error "pool creation failed" + pool_add_targets $normal_pool 0 1 || error "Pool add targets failed" + + test_mkdir $dir + $LFS setstripe -E 1M -c 1 -p $normal_pool -E 2M -c 2 -p $normal_pool -E eof -c -1 $dir + + echo XXX >> $file.1 + $LFS getstripe $file.1 + + pool=$($LFS getstripe -p $file.1) + [[ "$pool" == "$normal_pool" ]] || error "got pool '$pool', expected '$normal_pool'" + + stripe_count2=$($LFS getstripe -c $file.1) + ((stripe_count2 == stripe_count)) || + error "got stripe_count '$stripe_count2', expected '$stripe_count'" + + do_nodes $mdts $LCTL set_param mdd.*.append_pool=$append_pool + + echo XXX >> $file.2 + $LFS getstripe $file.2 + + pool=$($LFS getstripe -p $file.2) + [[ "$pool" == "$append_pool" ]] || error "got pool '$pool', expected '$append_pool'" + + do_nodes $mdts $LCTL set_param mdd.*.append_stripe_count=2 + + echo XXX >> $file.3 + $LFS getstripe $file.3 + + stripe_count2=$($LFS getstripe -c $file.3) + ((stripe_count2 == 2)) || error "got stripe_count '$stripe_count2', expected 2" +} +run_test 27U "append pool and stripe count work with composite default layout" + # createtest also checks that device nodes are created and # then visible correctly (#2091) test_28() { # bug 2091 @@ -4237,45 +4277,122 @@ test_33g() { } run_test 33g "nonroot user create already existing root created file" -test_33h() { - [ $MDSCOUNT -lt 2 ] && skip_env "needs >= 2 MDTs" - [ $MDS1_VERSION -lt $(version_code 2.13.50) ] && - skip "Need MDS version at least 2.13.50" +sub_33h() { + local hash_type=$1 + local count=250 - test_mkdir -c $MDSCOUNT -H crush $DIR/$tdir || - error "mkdir $tdir failed" + test_mkdir -c $MDSCOUNT -H $hash_type $DIR/$tdir || + error "lfs mkdir -H $hash_type $tdir failed" touch $DIR/$tdir/$tfile || error "touch $tfile failed" local index=$($LFS getstripe -m $DIR/$tdir/$tfile) local index2 + local fname for fname in $DIR/$tdir/$tfile.bak \ $DIR/$tdir/$tfile.SAV \ $DIR/$tdir/$tfile.orig \ $DIR/$tdir/$tfile~; do - touch $fname || error "touch $fname failed" + touch $fname || error "touch $fname failed" index2=$($LFS getstripe -m $fname) - [ $index -eq $index2 ] || + (( $index == $index2 )) || error "$fname MDT index mismatch $index != $index2" done local failed=0 - for i in {1..250}; do - for fname in $(mktemp -u $DIR/$tdir/.$tfile.XXXXXX) \ - $(mktemp $DIR/$tdir/$tfile.XXXXXXXX); do - touch $fname || error "touch $fname failed" + local patterns=(".$tfile.XXXXXX" "$tfile.XXXXXXXX") + local pattern + + for pattern in ${patterns[*]}; do + echo "pattern $pattern" + fname=$DIR/$tdir/$pattern + for (( i = 0; i < $count; i++ )); do + fname=$(mktemp $DIR/$tdir/$pattern) || + error "mktemp $DIR/$tdir/$pattern failed" index2=$($LFS getstripe -m $fname) - if [[ $index != $index2 ]]; then - failed=$((failed + 1)) - echo "$fname MDT index mismatch $index != $index2" - fi + (( $index == $index2 )) && continue + + failed=$((failed + 1)) + echo "$fname MDT index mismatch $index != $index2" + done + done + + echo "$failed/$count MDT index mismatches, expect ~2-4" + (( failed < 10 )) || error "MDT index mismatch $failed/$count times" + + local same=0 + local expect + + # verify that "crush" is still broken with all files on same MDT, + # crush2 should have about 1/MDSCOUNT files on each MDT, with margin + [[ "$hash_type" == "crush" ]] && expect=$count || + expect=$((count / MDSCOUNT)) + + # crush2 doesn't put all-numeric suffixes on the same MDT, + # filename like $tfile.12345678 should *not* be considered temp + for pattern in ${patterns[*]}; do + local base=${pattern%%X*} + local suff=${pattern#$base} + + echo "pattern $pattern" + for (( i = 0; i < $count; i++ )); do + fname=$DIR/$tdir/$base$((${suff//X/1} + i)) + touch $fname || error "touch $fname failed" + index2=$($LFS getstripe -m $fname) + (( $index != $index2 )) && continue + + same=$((same + 1)) done done - echo "$failed MDT index mismatches" - (( failed < 20 )) || error "MDT index mismatch $failed times" + echo "$((same/${#patterns[*]}))/$count matches, expect ~$expect for $1" + (( same / ${#patterns[*]} < expect * 5 / 4 && + same / ${#patterns[*]} > expect * 4 / 5 )) || + error "MDT index match $((same / ${#patterns[*]}))/$count times" + same=0 + + # crush2 doesn't put suffixes with special characters on the same MDT + # filename like $tfile.txt.1234 should *not* be considered temp + for pattern in ${patterns[*]}; do + local base=${pattern%%X*} + local suff=${pattern#$base} + + pattern=$base...${suff/XXX} + echo "pattern=$pattern" + for (( i = 0; i < $count; i++ )); do + fname=$(mktemp $DIR/$tdir/$pattern) || + error "touch $fname failed" + index2=$($LFS getstripe -m $fname) + (( $index != $index2 )) && continue + + same=$((same + 1)) + done + done + + echo "$((same/${#patterns[*]}))/$count matches, expect ~$expect for $1" + (( same / ${#patterns[*]} < expect * 5 / 4 && + same / ${#patterns[*]} > expect * 4 / 5 )) || + error "MDT index match $((same / ${#patterns[*]}))/$count times" } -run_test 33h "temp file is located on the same MDT as target" + +test_33h() { + (( $MDSCOUNT >= 2 )) || skip "needs >= 2 MDTs" + (( $MDS1_VERSION >= $(version_code 2.13.50) )) || + skip "Need MDS version at least 2.13.50" + + sub_33h crush +} +run_test 33h "temp file is located on the same MDT as target (crush)" + +test_33hh() { + (( $MDSCOUNT >= 2 )) || skip "needs >= 2 MDTs" + echo "MDS1_VERSION=$MDS1_VERSION version_code=$(version_code 2.15.0)" + (( $MDS1_VERSION > $(version_code 2.15.0) )) || + skip "Need MDS version at least 2.15.0 for crush2" + + sub_33h crush2 +} +run_test 33hh "temp file is located on the same MDT as target (crush2)" test_33i() { @@ -5728,22 +5845,13 @@ test_51b() { } run_test 51b "exceed 64k subdirectory nlink limit on create, verify unlink" -test_51d() { - [ $PARALLEL == "yes" ] && skip "skip parallel run" - [[ $OSTCOUNT -lt 3 ]] && skip_env "needs >= 3 OSTs" - local qos_old - - test_mkdir $DIR/$tdir - $LFS setstripe -c $OSTCOUNT $DIR/$tdir +test_51d_sub() { + local stripecount=$1 + local nfiles=$((200 * $OSTCOUNT)) - qos_old=$(do_facet mds1 \ - "$LCTL get_param -n lod.$FSNAME-*.qos_threshold_rr" | head -n 1) - do_nodes $(comma_list $(mdts_nodes)) \ - "$LCTL set_param lod.$FSNAME-*.qos_threshold_rr=100" - stack_trap "do_nodes $(comma_list $(mdts_nodes)) \ - '$LCTL set_param lod.$FSNAME-*.qos_threshold_rr=${qos_old%%%}'" - - createmany -o $DIR/$tdir/t- 1000 + log "create files with stripecount=$stripecount" + $LFS setstripe -C $stripecount $DIR/$tdir + createmany -o $DIR/$tdir/t- $nfiles $LFS getstripe $DIR/$tdir > $TMP/$tfile for ((n = 0; n < $OSTCOUNT; n++)); do objs[$n]=$(awk -vobjs=0 '($1 == '$n') { objs += 1 } \ @@ -5753,28 +5861,58 @@ test_51d() { END { printf("%0.0f", objs) }') log "OST$n has ${objs[$n]} objects, ${objs0[$n]} are index 0" done - unlinkmany $DIR/$tdir/t- 1000 - - nlast=0 - for ((n = 0; n < $OSTCOUNT; n++)); do + unlinkmany $DIR/$tdir/t- $nfiles + rm -f $TMP/$tfile + + local nlast + local min=4 + local max=6 # allow variance of (1 - $min/$max) = 33% by default + + # For some combinations of stripecount and OSTCOUNT current code + # is not ideal, and allocates 50% fewer *first* objects to some OSTs + # than others. Rather than skipping this test entirely, check that + # and keep testing to ensure imbalance does not get worse. LU-15282 + (( (OSTCOUNT == 6 && stripecount == 4) || + (OSTCOUNT == 10 && (stripecount == 4 || stripecount == 8)) || + (OSTCOUNT == 12 && (stripecount == 8 || stripecount == 9)))) && max=9 + for ((nlast=0, n = 1; n < $OSTCOUNT; nlast=n,n++)); do (( ${objs[$n]} > ${objs[$nlast]} * 4 / 5 )) || { $LFS df && $LFS df -i && - error "OST $n has fewer objects vs. OST $nlast" \ - " (${objs[$n]} < ${objs[$nlast]}"; } + error "OST $n has fewer objects vs. OST $nlast " \ + "(${objs[$n]} < ${objs[$nlast]} x 4/5)"; } (( ${objs[$n]} < ${objs[$nlast]} * 5 / 4 )) || { $LFS df && $LFS df -i && - error "OST $n has fewer objects vs. OST $nlast" \ - " (${objs[$n]} < ${objs[$nlast]}"; } + error "OST $n has fewer objects vs. OST $nlast " \ + "(${objs[$n]} > ${objs[$nlast]} x 5/4)"; } - (( ${objs0[$n]} > ${objs0[$nlast]} * 4 / 5 )) || + (( ${objs0[$n]} > ${objs0[$nlast]} * $min / $max )) || { $LFS df && $LFS df -i && - error "OST $n has fewer #0 objects vs. OST $nlast" \ - " (${objs0[$n]} < ${objs0[$nlast]}"; } - (( ${objs0[$n]} < ${objs0[$nlast]} * 5 / 4 )) || + error "OST $n has fewer #0 objects vs. OST $nlast " \ + "(${objs0[$n]} < ${objs0[$nlast]} x $min/$max)"; } + (( ${objs0[$n]} < ${objs0[$nlast]} * $max / $min )) || { $LFS df && $LFS df -i && - error "OST $n has fewer #0 objects vs. OST $nlast" \ - " (${objs0[$n]} < ${objs0[$nlast]}"; } - nlast=$n + error "OST $n has fewer #0 objects vs. OST $nlast " \ + "(${objs0[$n]} > ${objs0[$nlast]} x $max/$min)"; } + done +} + +test_51d() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" + [[ $OSTCOUNT -lt 3 ]] && skip_env "needs >= 3 OSTs" + + local stripecount + local qos_old=$(do_facet mds1 \ + "$LCTL get_param -n lod.$FSNAME-*.qos_threshold_rr" | head -n 1) + + do_nodes $(comma_list $(mdts_nodes)) \ + "$LCTL set_param lod.$FSNAME-*.qos_threshold_rr=100" + stack_trap "do_nodes $(comma_list $(mdts_nodes)) \ + '$LCTL set_param lod.$FSNAME-*.qos_threshold_rr=${qos_old%%%}'" + + test_mkdir $DIR/$tdir + + for ((stripecount = 3; stripecount <= $OSTCOUNT; stripecount++)); do + test_51d_sub $stripecount done } run_test 51d "check object distribution" @@ -8967,6 +9105,9 @@ test_64g() { run_test 64g "grant shrink on MDT" test_64h() { + (( $OST1_VERSION >= $(version_code 2.14.56) )) || + skip "need OST at least 2.14.56 to avoid grant shrink on read" + local instance=$($LFS getname -i $DIR) local osc_tgt="$FSNAME-OST0000-osc-$instance" local num_exps=$(do_facet ost1 \ @@ -9021,8 +9162,8 @@ test_64h() { run_test 64h "grant shrink on read" test_64i() { - (( $OST1_VERSION >= $(version_code 2.14.55) )) || - skip "need OST at least 2.14.55 to avoid grant shrink on replay" + (( $OST1_VERSION >= $(version_code 2.14.56) )) || + skip "need OST at least 2.14.56 to avoid grant shrink on replay" [ $PARALLEL == "yes" ] && skip "skip parallel run" remote_ost_nodsh && skip "remote OSTs with nodsh" @@ -11653,7 +11794,7 @@ test_104c() { echo "Before recordsize change" lfs_df=($($LFS df -h | grep "filesystem_summary:")) - df=($(df -h | grep "/mnt/lustre"$)) + df=($(df -h | grep "$MOUNT"$)) # For checking. echo "lfs output : ${lfs_df[*]}" @@ -11685,7 +11826,7 @@ test_104c() { echo "After recordsize change" lfs_df_after=($($LFS df -h | grep "filesystem_summary:")) - df_after=($(df -h | grep "/mnt/lustre"$)) + df_after=($(df -h | grep "$MOUNT"$)) # For checking. echo "lfs output : ${lfs_df_after[*]}" @@ -11946,29 +12087,6 @@ test_115() { } run_test 115 "verify dynamic thread creation====================" -free_min_max () { - wait_delete_completed - AVAIL=($(lctl get_param -n osc.*[oO][sS][cC]-[^M]*.kbytesavail)) - echo "OST kbytes available: ${AVAIL[*]}" - MAXV=${AVAIL[0]} - MAXI=0 - MINV=${AVAIL[0]} - MINI=0 - for ((i = 0; i < ${#AVAIL[@]}; i++)); do - #echo OST $i: ${AVAIL[i]}kb - if [[ ${AVAIL[i]} -gt $MAXV ]]; then - MAXV=${AVAIL[i]} - MAXI=$i - fi - if [[ ${AVAIL[i]} -lt $MINV ]]; then - MINV=${AVAIL[i]} - MINI=$i - fi - done - echo "Min free space: OST $MINI: $MINV" - echo "Max free space: OST $MAXI: $MAXV" -} - test_116a() { # was previously test_116() [ $PARALLEL == "yes" ] && skip "skip parallel run" [[ $OSTCOUNT -lt 2 ]] && skip_env "needs >= 2 OSTs" @@ -12896,6 +13014,7 @@ test_123a_base() { # was test 123, statahead(bug 11401) log "testing UP system. Performance may be lower than expected." SLOWOK=1 fi + running_in_vm && SLOWOK=1 rm -rf $DIR/$tdir test_mkdir $DIR/$tdir @@ -15512,6 +15631,12 @@ test_155_big_load() { free_min_max local cache_size=$(do_facet ost$((MAXI+1)) \ "awk '/cache/ {sum+=\\\$4} END {print sum}' /proc/cpuinfo") + + # LU-16042: can not get the cache size on Arm64 VM here, fallback to a + # pre-set value + if [ -z "$cache_size" ]; then + cache_size=256 + fi local large_file_size=$((cache_size * 2)) echo "OSS cache size: $cache_size KB" @@ -17712,6 +17837,36 @@ test_171() { # bug20592 } run_test 171 "test libcfs_debug_dumplog_thread stuck in do_exit() ======" +test_172() { + + #define OBD_FAIL_OBD_CLEANUP 0x60e + $LCTL set_param fail_loc=0x60e + umount $MOUNT || error "umount $MOUNT failed" + stack_trap "mount_client $MOUNT" + + (( $($LCTL dl | egrep -c " osc | lov | lmv | mdc ") > 0 )) || + error "no client OBDs are remained" + + $LCTL dl | while read devno state type name foo; do + case $type in + lov|osc|lmv|mdc) + $LCTL --device $name cleanup + $LCTL --device $name detach + ;; + *) + # skip server devices + ;; + esac + done + + if (( $($LCTL dl | egrep -c " osc | lov | lmv | mdc ") > 0 )); then + $LCTL dl | egrep " osc | lov | lmv | mdc " + error "some client OBDs are still remained" + fi + +} +run_test 172 "manual device removal with lctl cleanup/detach ======" + # it would be good to share it with obdfilter-survey/iokit-libecho code setup_obdecho_osc () { local rc=0 @@ -17773,33 +17928,7 @@ obdecho_test() { } test_180a() { - [ $PARALLEL == "yes" ] && skip "skip parallel run" - - if ! [ -d /sys/fs/lustre/echo_client ] && - ! module_loaded obdecho; then - load_module obdecho/obdecho && - stack_trap "rmmod obdecho" EXIT || - error "unable to load obdecho on client" - fi - - local osc=$($LCTL dl | grep -v mdt | awk '$3 == "osc" {print $4; exit}') - local host=$($LCTL get_param -n osc.$osc.import | - awk '/current_connection:/ { print $2 }' ) - local target=$($LCTL get_param -n osc.$osc.import | - awk '/target:/ { print $2 }' ) - target=${target%_UUID} - - if [ -n "$target" ]; then - setup_obdecho_osc $host $target && - stack_trap "cleanup_obdecho_osc $target" EXIT || - { error "obdecho setup failed with $?"; return; } - - obdecho_test ${target}_osc client || - error "obdecho_test failed on ${target}_osc" - else - $LCTL get_param osc.$osc.import - error "there is no osc.$osc.import target" - fi + skip "obdecho on osc is no longer supported" } run_test 180a "test obdecho on osc" @@ -17867,7 +17996,7 @@ test_181() { # bug 22177 } run_test 181 "Test open-unlinked dir ========================" -test_182() { +test_182a() { local fcount=1000 local tcount=10 @@ -17893,7 +18022,75 @@ test_182() { rm -rf $DIR/$tdir } -run_test 182 "Test parallel modify metadata operations ================" +run_test 182a "Test parallel modify metadata operations from mdc" + +test_182b() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + local dcount=1000 + local tcount=10 + local stime + local etime + local delta + + do_facet mds1 $LCTL list_param \ + osp.$FSNAME-MDT*-osp-MDT*.rpc_stats || + skip "MDS lacks parallel RPC handling" + + $LFS mkdir -i 0 $DIR/$tdir || error "creating dir $DIR/$tdir" + + rpc_count=$(do_facet mds1 $LCTL get_param -n \ + osp.$FSNAME-MDT0001-osp-MDT0000.max_mod_rpcs_in_flight) + + stime=$(date +%s) + createmany -i 0 -d $DIR/$tdir/t- $tcount + + for (( i = 0; i < $tcount; i++ )) ; do + createmany -i 0 -d $DIR/$tdir/t-$i/d- 0 $dcount & + done + wait + etime=$(date +%s) + delta=$((etime - stime)) + echo "Time for file creation $delta sec for $rpc_count parallel RPCs" + + stime=$(date +%s) + for (( i = 0; i < $tcount; i++ )) ; do + unlinkmany -d $DIR/$tdir/$i/d- $dcount & + done + wait + etime=$(date +%s) + delta=$((etime - stime)) + echo "Time for file removal $delta sec for $rpc_count parallel RPCs" + + rm -rf $DIR/$tdir + + $LFS mkdir -i 0 $DIR/$tdir || error "creating dir $DIR/$tdir" + + do_facet mds1 $LCTL set_param osp.$FSNAME-MDT0001-osp-MDT0000.max_mod_rpcs_in_flight=1 + + stime=$(date +%s) + createmany -i 0 -d $DIR/$tdir/t- $tcount + + for (( i = 0; i < $tcount; i++ )) ; do + createmany -i 0 -d $DIR/$tdir/t-$i/d- 0 $dcount & + done + wait + etime=$(date +%s) + delta=$((etime - stime)) + echo "Time for file creation $delta sec for 1 RPC sent at a time" + + stime=$(date +%s) + for (( i = 0; i < $tcount; i++ )) ; do + unlinkmany -d $DIR/$tdir/t-$i/d- $dcount & + done + wait + etime=$(date +%s) + delta=$((etime - stime)) + echo "Time for file removal $delta sec for 1 RPC sent at a time" + + do_facet mds1 $LCTL set_param osp.$FSNAME-MDT0001-osp-MDT0000.max_mod_rpcs_in_flight=$rpc_count +} +run_test 182b "Test parallel modify metadata operations from osp" test_183() { # LU-2275 [ $PARALLEL == "yes" ] && skip "skip parallel run" @@ -18590,18 +18787,32 @@ test_205b() { (( $MDS1_VERSION >= $(version_code 2.13.54.91) )) || skip "Need MDS version at least 2.13.54.91" - job_stats="mdt.*.job_stats" - $LCTL set_param $job_stats=clear + local job_stats="mdt.*.job_stats" + local old_jobid=$(do_facet mds1 $LCTL get_param jobid_var) + + do_facet mds1 $LCTL set_param $job_stats=clear + # Setting jobid_var to USER might not be supported + [[ -n "$old_jobid" ]] && stack_trap "$LCTL set_param $old_jobid" $LCTL set_param jobid_var=USER || true - $LCTL set_param jobid_name="%e.%u" + stack_trap "$LCTL set_param $($LCTL get_param jobid_name)" + $LCTL set_param jobid_name="%j.%e.%u" + env -i USERTESTJOBSTATS=foolish touch $DIR/$tfile.1 - do_facet $SINGLEMDS $LCTL get_param $job_stats | - grep "job_id:.*foolish" && - error "Unexpected jobid found" - do_facet $SINGLEMDS $LCTL get_param $job_stats | - grep "open:.*min.*max.*sum" || - error "wrong job_stats format found" + do_facet mds1 $LCTL get_param $job_stats | grep "job_id:.*foolish" && + { do_facet mds1 $LCTL get_param $job_stats; + error "Unexpected jobid found"; } + do_facet mds1 $LCTL get_param $job_stats | grep "open:.*min.*max.*sum"|| + { do_facet mds1 $LCTL get_param $job_stats; + error "wrong job_stats format found"; } + + (( $MDS1_VERSION <= $(version_code 2.15.0) )) && + echo "MDS does not yet escape jobid" && return 0 + $LCTL set_param jobid_var=TEST205b + env -i TEST205b="has sp" touch $DIR/$tfile.2 + do_facet mds1 $LCTL get_param $job_stats | grep "has.*x20sp" || + { do_facet mds1 $LCTL get_param $job_stats; + error "jobid not escaped"; } } run_test 205b "Verify job stats jobid and output format" @@ -20514,10 +20725,12 @@ run_test 230v "subdir migrated to the MDT where its parent is located" test_230w() { (( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs" - (( MDS1_VERSION >= $(version_code 2.14.53) )) || - skip "Need MDS version at least 2.14.53" + (( MDS1_VERSION >= $(version_code 2.15.0) )) || + skip "Need MDS version at least 2.15.0" - mkdir -p $DIR/$tdir/sub || error "mkdir failed" + mkdir -p $DIR/$tdir || error "mkdir $tdir failed" + createmany -o $DIR/$tdir/f 10 || error "create files failed" + createmany -d $DIR/$tdir/d 10 || error "create dirs failed" $LFS migrate -m 1 -c $MDSCOUNT -d $DIR/$tdir || error "migrate failed" @@ -20525,8 +20738,10 @@ test_230w() { (( $($LFS getdirstripe -c $DIR/$tdir) == MDSCOUNT )) || error "$tdir stripe count mismatch" - (( $($LFS getdirstripe -c $DIR/$tdir/sub) == 0 )) || - error "$tdir/sub is striped" + for i in $(seq 0 9); do + (( $($LFS getdirstripe -c $DIR/$tdir/d$i) == 0 )) || + error "d$i is striped" + done } run_test 230w "non-recursive mode dir migration" @@ -20909,7 +21124,7 @@ test_244b() } run_test 244b "multi-threaded write with group lock" -test_245() { +test_245a() { local flagname="multi_mod_rpcs" local connect_data_name="max_mod_rpcs" local out @@ -20932,7 +21147,35 @@ test_245() { echo "$out" | grep -qw $connect_data_name || error "import should have connect data $connect_data_name" } -run_test 245 "check mdc connection flag/data: multiple modify RPCs" +run_test 245a "check mdc connection flag/data: multiple modify RPCs" + +test_245b() { + local flagname="multi_mod_rpcs" + local connect_data_name="max_mod_rpcs" + local out + + remote_mds_nodsh && skip "remote MDS with nodsh" + [[ $MDSCOUNT -ge 2 ]] || skip "needs >= 2 MDTs" + + # check if multiple modify RPCs flag is set + out=$(do_facet mds1 \ + $LCTL get_param osp.$FSNAME-MDT0001-osp-MDT0000.import | + grep "connect_flags:") + echo "$out" + + [[ "$out" =~ $flagname ]] || skip "connect flag $flagname is not set" + + # check if multiple modify RPCs data is set + out=$(do_facet mds1 \ + $LCTL get_param osp.$FSNAME-MDT0001-osp-MDT0000.import) + + [[ "$out" =~ $connect_data_name ]] || + { + echo "$out" + error "missing connect data $connect_data_name" + } +} +run_test 245b "check osp connection flag/data: multiple modify RPCs" cleanup_247() { local submount=$1 @@ -23418,9 +23661,9 @@ test_300h() { run_test 300h "check default striped directory for striped directory" test_300i() { - [ $PARALLEL == "yes" ] && skip "skip parallel run" - [ $MDSCOUNT -lt 2 ] && skip_env "needs >= 2 MDTs" - [ $MDS1_VERSION -lt $(version_code 2.7.55) ] && + [[ $PARALLEL == "yes" ]] && skip "skip parallel run" + (( $MDSCOUNT >= 2 )) || skip_env "needs >= 2 MDTs" + (( $MDS1_VERSION >= $(version_code 2.7.55) )) || skip "Need MDS version at least 2.7.55" local stripe_count @@ -23451,11 +23694,31 @@ test_300i() { $LFS find -H fnv_1a_64,crush $DIR/$tdir/hashdir local dircnt=$($LFS find -H fnv_1a_64,crush $DIR/$tdir/hashdir | wc -l) - [ $dircnt -eq 2 ] || error "lfs find striped dir got:$dircnt,except:1" - - #set the stripe to be unknown hash type - #define OBD_FAIL_UNKNOWN_LMV_STRIPE 0x1901 - $LCTL set_param fail_loc=0x1901 + (( $dircnt == 2 )) || error "lfs find striped dir got $dircnt != 2" + + if (( $MDS1_VERSION > $(version_code 2.15.0) )); then + $LFS mkdir -i0 -c$MDSCOUNT -H crush2 $DIR/$tdir/hashdir/d3 || + error "create crush2 dir $tdir/hashdir/d3 failed" + $LFS find -H crush2 $DIR/$tdir/hashdir + dircnt=$($LFS find -H crush2 $DIR/$tdir/hashdir | wc -l) + (( $dircnt == 1 )) || error "find crush2 dir got $dircnt != 1" + + # mkdir with an invalid hash type (hash=fail_val) from client + # should be replaced on MDS with a valid (default) hash type + #define OBD_FAIL_LMV_UNKNOWN_STRIPE 0x1901 + $LCTL set_param fail_loc=0x1901 fail_val=99 + $LFS mkdir -c2 $DIR/$tdir/hashdir/d99 + + local hash=$($LFS getdirstripe -H $DIR/$tdir/hashdir/d99) + local expect=$(do_facet mds1 \ + $LCTL get_param -n lod.$FSNAME-MDT0000-mdtlov.mdt_hash) + [[ $hash == $expect ]] || + error "d99 hash '$hash' != expected hash '$expect'" + fi + + #set the stripe to be unknown hash type on read + #define OBD_FAIL_LMV_UNKNOWN_STRIPE 0x1901 + $LCTL set_param fail_loc=0x1901 fail_val=99 for ((i = 0; i < 10; i++)); do $CHECKSTAT -t file $DIR/$tdir/striped_dir/f-$i || error "stat f-$i failed" @@ -24300,25 +24563,37 @@ run_test 398a "direct IO should cancel lock otherwise lockless" test_398b() { # LU-4198 which fio || skip_env "no fio installed" - $LFS setstripe -c -1 $DIR/$tfile + $LFS setstripe -c -1 -S 1M $DIR/$tfile - local size=12 + local size=48 dd if=/dev/zero of=$DIR/$tfile bs=1M count=$size local njobs=4 - echo "mix direct rw ${size}M to OST0 by fio with $njobs jobs..." - fio --name=rand-rw --rw=randrw --bs=$PAGE_SIZE --direct=1 \ - --numjobs=$njobs --fallocate=none \ - --iodepth=16 --allow_file_create=0 --size=$((size/njobs))M \ - --filename=$DIR/$tfile & - bg_pid=$! - - echo "mix buffer rw ${size}M to OST0 by fio with $njobs jobs..." - fio --name=rand-rw --rw=randrw --bs=$PAGE_SIZE \ - --numjobs=$njobs --fallocate=none \ - --iodepth=16 --allow_file_create=0 --size=$((size/njobs))M \ - --filename=$DIR/$tfile || true - wait $bg_pid + # Single page, multiple pages, stripe size, 4*stripe size + for bsize in $(( $PAGE_SIZE )) $(( 4*$PAGE_SIZE )) 1048576 4194304; do + echo "mix direct rw ${bsize} by fio with $njobs jobs..." + fio --name=rand-rw --rw=randrw --bs=$bsize --direct=1 \ + --numjobs=$njobs --fallocate=none \ + --iodepth=16 --allow_file_create=0 --size=$((size/njobs))M \ + --filename=$DIR/$tfile & + bg_pid=$! + + echo "mix buffer rw ${bsize} by fio with $njobs jobs..." + fio --name=rand-rw --rw=randrw --bs=$bsize \ + --numjobs=$njobs --fallocate=none \ + --iodepth=16 --allow_file_create=0 --size=$((size/njobs))M \ + --filename=$DIR/$tfile || true + wait $bg_pid + done + + evict=$(do_facet client $LCTL get_param \ + osc.$FSNAME-OST*-osc-*/state | + awk -F"[ [,]" '/EVICTED ]$/ { if (t<$5) {t=$5;} } END { print t }') + + [ -z "$evict" ] || [[ $evict -le $before ]] || + (do_facet client $LCTL get_param \ + osc.$FSNAME-OST*-osc-*/state; + error "eviction happened: $evict before:$before") rm -f $DIR/$tfile } @@ -25308,6 +25583,7 @@ test_412() { } run_test 412 "mkdir on specific MDTs" +TEST413_COUNT=${TEST413_COUNT:-200} generate_uneven_mdts() { local threshold=$1 local lmv_qos_maxage @@ -25365,23 +25641,24 @@ generate_uneven_mdts() { local testdir=$DIR/$tdir-fillmdt local start - mkdir -p $testdir - i=0 while (( diff < threshold )); do + mkdir -p $testdir # generate uneven MDTs, create till $threshold% diff echo -n "weight diff=$diff% must be > $threshold% ..." - echo "Fill MDT$min_index with 1000 files: loop $i" + echo "Fill MDT$min_index with $TEST413_COUNT files: loop $i" testdir=$DIR/$tdir-fillmdt/$i - [ -d $testdir ] || $LFS mkdir -i $min_index $testdir || + [ -d $testdir ] && continue + $LFS mkdir -i $min_index $testdir || error "mkdir $testdir failed" $LFS setstripe -E 1M -L mdt $testdir || error "setstripe $testdir failed" start=$SECONDS - for F in f.{0..999}; do - dd if=/dev/zero of=$testdir/$F bs=64K count=1 > \ + for ((F=0; F < TEST413_COUNT; F++)); do + dd if=/dev/zero of=$testdir/f.$F bs=128K count=1 > \ /dev/null 2>&1 || error "dd $F failed" done + sync; sleep 1; sync # wait for QOS to update (( SECONDS < start + 1 )) && sleep $((start + 1 - SECONDS)) @@ -25768,26 +26045,15 @@ test_413e() { } run_test 413e "check default max-inherit value" -test_413f() { - (( MDSCOUNT >= 2 )) || skip "We need at least 2 MDTs for this test" - - (( MDS1_VERSION >= $(version_code 2.14.55) )) || - skip "Need server version at least 2.14.55" - - getfattr -d -m trusted.dmv --absolute-names $DIR > $TMP/dmv.ea || - error "dump $DIR default LMV failed" - stack_trap "setfattr --restore=$TMP/dmv.ea" - - $LFS setdirstripe -D -i -1 -c 1 -X 3 --max-inherit-rr 3 $DIR || - error "set $DIR default LMV failed" - +test_fs_dmv_inherit() +{ local testdir=$DIR/$tdir local count local inherit local inherit_rr - for i in $(seq 3); do + for i in 1 2 3; do mkdir $testdir || error "mkdir $testdir failed" count=$($LFS getdirstripe -D -c $testdir) (( count == 1 )) || @@ -25806,15 +26072,107 @@ test_413f() { (( count == 0 )) || error "$testdir default LMV count not zero: $count" } + +test_413f() { + (( MDSCOUNT >= 2 )) || skip "We need at least 2 MDTs for this test" + + (( MDS1_VERSION >= $(version_code 2.14.55) )) || + skip "Need server version at least 2.14.55" + + getfattr -d -m trusted.dmv --absolute-names $DIR > $TMP/dmv.ea || + error "dump $DIR default LMV failed" + stack_trap "setfattr --restore=$TMP/dmv.ea" + + $LFS setdirstripe -D -i -1 -c 1 -X 3 --max-inherit-rr 3 $DIR || + error "set $DIR default LMV failed" + + test_fs_dmv_inherit +} run_test 413f "lfs getdirstripe -D list ROOT default LMV if it's not set on dir" +test_413g() { + (( MDSCOUNT >= 2 )) || skip "We need at least 2 MDTs for this test" + + mkdir -p $DIR/$tdir/l2/l3/l4 || error "mkdir $tdir/l1/l2/l3 failed" + getfattr -d -m trusted.dmv --absolute-names $DIR > $TMP/dmv.ea || + error "dump $DIR default LMV failed" + stack_trap "setfattr --restore=$TMP/dmv.ea" + + $LFS setdirstripe -D -i -1 -c 1 -X 3 --max-inherit-rr 3 $DIR || + error "set $DIR default LMV failed" + + FILESET="$FILESET/$tdir/l2/l3/l4" mount_client $MOUNT2 || + error "mount $MOUNT2 failed" + stack_trap "umount_client $MOUNT2" + + local saved_DIR=$DIR + + export DIR=$MOUNT2 + + stack_trap "export DIR=$saved_DIR" + + # first check filesystem-wide default LMV inheritance + test_fs_dmv_inherit || error "incorrect fs default LMV inheritance" + + # then check subdirs are spread to all MDTs + createmany -d $DIR/s $((MDSCOUNT * 100)) || error "createmany failed" + + local count=$($LFS getstripe -m $DIR/s* | sort -u | wc -l) + + (( $count == $MDSCOUNT )) || error "dirs are spread to $count MDTs" +} +run_test 413g "enforce ROOT default LMV on subdir mount" + +test_413h() { + (( MDSCOUNT >= 2 )) || + skip "We need at least 2 MDTs for this test" + + (( MDS1_VERSION >= $(version_code 2.15.50.6) )) || + skip "Need server version at least 2.15.50.6" + + local lmv_qos_maxage=$($LCTL get_param -n lmv.*.qos_maxage) + + stack_trap "$LCTL set_param \ + lmv.*.qos_maxage=$lmv_qos_maxage > /dev/null" + $LCTL set_param lmv.*.qos_maxage=1 + + local depth=5 + local rr_depth=4 + local dir=$DIR/$tdir/l1/l2/l3/l4/l5 + local count=$((MDSCOUNT * 20)) + + generate_uneven_mdts 50 + + mkdir -p $dir || error "mkdir $dir failed" + stack_trap "rm -rf $dir" + $LFS setdirstripe -D -c 1 -i -1 --max-inherit=$depth \ + --max-inherit-rr=$rr_depth $dir + + for ((d=0; d < depth + 2; d++)); do + log "dir=$dir:" + for ((sub=0; sub < count; sub++)); do + mkdir $dir/d$sub + done + $LFS getdirstripe -i $dir/d* | sort | uniq -c | sort -nr + local num=($($LFS getdirstripe -i $dir/d* | sort | uniq -c)) + # subdirs within $rr_depth should be created round-robin + if (( d < rr_depth )); then + (( ${num[0]} != count )) || + error "all objects created on MDT ${num[1]}" + fi + + dir=$dir/d0 + done +} +run_test 413h "don't stick to parent for round-robin dirs" + test_413z() { local pids="" local subdir local pid for subdir in $(\ls -1 -d $DIR/d413*-fillmdt/*); do - unlinkmany $subdir/f. 1000 & + unlinkmany $subdir/f. $TEST413_COUNT & pids="$pids $!" done @@ -26786,6 +27144,45 @@ test_432() { } run_test 432 "mv dir from outside Lustre" +test_433() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" + + [[ -n "$($LCTL list_param llite.*.inode_cache 2>/dev/null)" ]] || + skip "inode cache not supported" + + $LCTL set_param llite.*.inode_cache=0 + stack_trap "$LCTL set_param llite.*.inode_cache=1" + + local count=256 + local before + local after + + cancel_lru_locks mdc + test_mkdir $DIR/$tdir || error "mkdir $tdir" + createmany -m $DIR/$tdir/f $count + createmany -d $DIR/$tdir/d $count + ls -l $DIR/$tdir > /dev/null + stack_trap "rm -rf $DIR/$tdir" + + before=$(num_objects) + cancel_lru_locks mdc + after=$(num_objects) + + # sometimes even @before is less than 2 * count + while (( before - after < count )); do + sleep 1 + after=$(num_objects) + wait=$((wait + 1)) + (( wait % 5 == 0 )) && echo "wait $wait seconds objects: $after" + if (( wait > 60 )); then + error "inode slab grew from $before to $after" + fi + done + + echo "lustre_inode_cache $before objs before lock cancel, $after after" +} +run_test 433 "ldlm lock cancel releases dentries and inodes" + prep_801() { [[ $MDS1_VERSION -lt $(version_code 2.9.55) ]] || [[ $OST1_VERSION -lt $(version_code 2.9.55) ]] && @@ -27436,11 +27833,6 @@ test_807() { changelog_users $SINGLEMDS | grep -q $cl_user || error "User $cl_user not found in changelog_users" - local save="$TMP/$TESTSUITE-$TESTNAME.parameters" - save_lustre_params client "llite.*.xattr_cache" > $save - lctl set_param llite.*.xattr_cache=0 - stack_trap "restore_lustre_params < $save; rm -f $save" EXIT - rm -rf $DIR/$tdir || error "rm $tdir failed" mkdir_on_mdt0 $DIR/$tdir || error "mkdir $tdir failed" touch $DIR/$tdir/trunc || error "touch $tdir/trunc failed" @@ -27961,34 +28353,6 @@ test_820() { } run_test 820 "update max EA from open intent" -test_822() { - local p="$TMP/$TESTSUITE-$TESTNAME.parameters" - - save_lustre_params mds1 \ - "osp.$FSNAME-OST*-osc-MDT0000.max_create_count" > $p - do_facet $SINGLEMDS "$LCTL set_param -n \ - osp.$FSNAME-OST*MDT0000.max_create_count=0" - do_facet $SINGLEMDS "$LCTL set_param -n \ - osp.$FSNAME-OST0000*MDT0000.max_create_count=20000" - - # wait for statfs update to clear OS_STATFS_NOPRECREATE - local maxage=$(do_facet mds1 $LCTL get_param -n \ - osp.$FSNAME-OST0000*MDT0000.maxage) - sleep $((maxage + 1)) - - #define OBD_FAIL_NET_ERROR_RPC 0x532 - do_facet mds1 "$LCTL set_param fail_loc=0x80000532 fail_val=5" - - stack_trap "restore_lustre_params < $p; rm $p" - - local count=$(do_facet $SINGLEMDS "lctl get_param -n \ - osp.$FSNAME-OST0000*MDT0000.create_count") - for i in $(seq 1 $count); do - touch $DIR/$tfile.${i} || error "touch failed" - done -} -run_test 822 "test precreate failure" - test_823() { local p="$TMP/$TESTSUITE-$TESTNAME.parameters" local OST_MAX_PRECREATE=20000 @@ -28229,6 +28593,27 @@ test_904() { } run_test 904 "virtual project ID xattr" +# LU-8582 +test_905() { + (( $OST1_VERSION >= $(version_code 2.8.54) )) || + skip "lustre < 2.8.54 does not support ladvise" + + remote_ost_nodsh && skip "remote OST with nodsh" + $LFS setstripe -c -1 -i 0 $DIR/$tfile || error "setstripe failed" + + $LFS ladvise -a willread $DIR/$tfile || error "ladvise does not work" + + #define OBD_FAIL_OST_OPCODE 0x253 + # OST_LADVISE = 21 + do_facet ost1 "$LCTL set_param fail_val=21 fail_loc=0x0253" + $LFS ladvise -a willread $DIR/$tfile && + error "unexpected success of ladvise with fault injection" + $LFS ladvise -a willread $DIR/$tfile |& + grep -q "Operation not supported" + (( $? == 0 )) || error "unexpected stderr of ladvise with fault injection" +} +run_test 905 "bad or new opcode should not stuck client" + complete $SECONDS [ -f $EXT2_DEV ] && rm $EXT2_DEV || true check_and_cleanup_lustre