From: Patrick Farrell Date: Thu, 13 Jun 2019 22:30:24 +0000 (+0300) Subject: LU-10070 tests: New test-framework functionality X-Git-Tag: 2.12.56~24 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=c1aaa3e55090c7a5e067ec52cf74b2e6406133d2 LU-10070 tests: New test-framework functionality The self-extending layout tests will make heavy use of setting OST low & high watermarks to simulate low/out of space conditions. To this end, add improved ways of working with these to the test framework and use them in sanity 253. Add a component-count helper in sanity-pfl. Fix pool_add_targets so it can add only 1 target. Also move one helper from sanity to test-framework so it can be used from sanity-pfl. Cray-bug-id: LUS-2528 Signed-off-by: Patrick Farrell Change-Id: I4e75c7db07b201ff2c410734d5daa991e74bd5c1 Reviewed-on: https://review.whamcloud.com/33778 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Elena Gryaznova Reviewed-by: Patrick Farrell Reviewed-by: Oleg Drokin --- diff --git a/lustre/tests/sanity-pfl.sh b/lustre/tests/sanity-pfl.sh index 35640cf..b21434c 100644 --- a/lustre/tests/sanity-pfl.sh +++ b/lustre/tests/sanity-pfl.sh @@ -174,6 +174,11 @@ test_1c() { } run_test 1c "Test overstriping w/max stripe count" +check_component_count() { + local comp_cnt=$($LFS getstripe --component-count $1) + [ $comp_cnt -ne $2 ] && error "$1, component count $comp_cnt != $2" +} + test_2() { local comp_file=$DIR/$tdir/$tfile local rw_len=$((5 * 1024 * 1024)) # 5M @@ -184,8 +189,7 @@ test_2() { $LFS setstripe -E 1m -S 1m $comp_file || error "Create $comp_file failed" - local comp_cnt=$($LFS getstripe --component-count $comp_file) - [ $comp_cnt -ne 1 ] && error "component count $comp_cnt != 1" + check_component_count $comp_file 1 dd if=/dev/zero of=$comp_file bs=1M count=1 > /dev/null 2>&1 || error "Write first component failed" @@ -200,14 +204,12 @@ test_2() { $LFS setstripe --component-add -E 2M -S 1M -c 1 $comp_file || error "Add component to $comp_file failed" - comp_cnt=$($LFS getstripe --component-count $comp_file) - [ $comp_cnt -ne 2 ] && error "component count $comp_cnt != 2" + check_component_count $comp_file 2 $LFS setstripe --component-add -E -1 -c 3 $comp_file || error "Add last component to $comp_file failed" - comp_cnt=$($LFS getstripe --component-count $comp_file) - [ $comp_cnt -ne 3 ] && error "component count $comp_cnt != 3" + check_component_count $comp_file 3 small_write $comp_file $rw_len || error "Verify RW failed" @@ -247,8 +249,7 @@ test_3() { $LFS setstripe -E 1M -S 1M -E 64M -c 2 -E -1 -c 3 $comp_file || error "Create $comp_file failed" - local comp_cnt=$($LFS getstripe --component-count $comp_file) - [ $comp_cnt -ne 3 ] && error "component count $comp_cnt != 3" + check_component_count $comp_file 3 dd if=/dev/zero of=$comp_file bs=1M count=2 @@ -292,8 +293,7 @@ test_5() { # create file under parent touch $comp_file || error "Create $comp_file failed" - local comp_cnt=$($LFS getstripe --component-count $comp_file) - [ $comp_cnt -ne 2 ] && error "file $comp_cnt != 2" + check_component_count $comp_file 2 #instantiate all components, so that objs are allocted dd if=/dev/zero of=$comp_file bs=1k count=1 seek=65k @@ -315,8 +315,7 @@ test_5() { # create file under subdir touch $subdir/$tfile || error "Create $subdir/$tfile failed" - comp_cnt=$($LFS getstripe --component-count $subdir/$tfile) - [ $comp_cnt -ne 2 ] && error "$subdir/$tfile $comp_cnt != 2" + check_component_count $subdir/$tfile 2 # delete default layout setting from parent $LFS setstripe -d $parent || @@ -341,8 +340,7 @@ test_6() { $LFS setstripe -c 1 -S 128K $comp_file || error "Create v1 $comp_file failed" - local comp_cnt=$($LFS getstripe --component-count $comp_file) - [ $comp_cnt -ne 0 ] && error "Wrong component count $comp_cnt" + check_component_count $comp_file 0 dd if=/dev/urandom of=$comp_file bs=1M count=5 oflag=sync || error "Write to v1 $comp_file failed" @@ -353,8 +351,7 @@ test_6() { $LFS migrate -E 1M -S 512K -c 1 -E -1 -S 1M -c 2 $comp_file || error "Migrate(v1 -> composite) $comp_file failed" - comp_cnt=$($LFS getstripe --component-count $comp_file) - [ "$comp_cnt" -ne 2 ] && error "$comp_cnt != 2" + check_component_count $comp_file 2 local chksum=$(md5sum $comp_file) [ "$old_chksum" != "$chksum" ] && @@ -365,8 +362,7 @@ test_6() { -E -1 -S 3M -c 3 $comp_file || error "Migrate(compsoite -> composite) $comp_file failed" - comp_cnt=$($LFS getstripe --component-count $comp_file) - [ "$comp_cnt" -ne 3 ] && error "$comp_cnt != 3" + check_component_count $comp_file 3 chksum=$(md5sum $comp_file) [ "$old_chksum" != "$chksum" ] && @@ -376,8 +372,7 @@ test_6() { $LFS migrate -c 2 -S 2M $comp_file || error "Migrate(composite -> v1) $comp_file failed" - comp_cnt=$($LFS getstripe --component-count $comp_file) - [ $comp_cnt -ne 0 ] && error "$comp_cnt isn't 0" + check_component_count $comp_file 0 chksum=$(md5sum $comp_file) [ "$old_chksum" != "$chksum" ] && @@ -433,8 +428,7 @@ test_9() { $LFS setstripe -E 1M -S 1M -E -1 -c 1 $comp_file || error "Create $comp_file failed" - local comp_cnt=$($LFS getstripe --component-count $comp_file) - [ $comp_cnt -ne 2 ] && error "component count $comp_cnt != 2" + check_component_count $comp_file 2 replay_barrier $SINGLEMDS diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index c7ee8d1..d98df7a 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -41,8 +41,8 @@ init_logging ALWAYS_EXCEPT="$SANITY_EXCEPT " # bug number for skipped test: LU-9693 LU-6493 LU-9693 ALWAYS_EXCEPT+=" 42a 42b 42c " -# bug number: LU-8411 LU-9096 LU-9054 -ALWAYS_EXCEPT+=" 407 253 312 " +# bug number: LU-8411 LU-9054 +ALWAYS_EXCEPT+=" 407 312 " if $SHARED_KEY; then # bug number: LU-9795 LU-9795 LU-9795 LU-9795 @@ -1742,12 +1742,6 @@ test_27m() { } run_test 27m "create file while OST0 was full" -sleep_maxage() { - local delay=$(do_facet $SINGLEMDS lctl get_param -n lo[vd].*.qos_maxage | - awk '{ print $1 * 2; exit; }') - sleep $delay -} - # OSCs keep a NOSPC flag that will be reset after ~5s (qos_maxage) # if the OST isn't full anymore. reset_enospc() { @@ -16874,33 +16868,6 @@ test_252() { } run_test 252 "check lr_reader tool" -test_253_fill_ost() { - local size_mb #how many MB should we write to pass watermark - local lwm=$3 #low watermark - local free_10mb #10% of free space - - free_kb=$($LFS df $MOUNT | grep $1 | awk '{ print $4 }') - size_mb=$((free_kb / 1024 - lwm)) - free_10mb=$((free_kb / 10240)) - #If 10% of free space cross low watermark use it - if (( free_10mb > size_mb )); then - size_mb=$free_10mb - else - #At least we need to store 1.1 of difference between - #free space and low watermark - size_mb=$((size_mb + size_mb / 10)) - fi - if (( lwm <= $((free_kb / 1024)) )) || [ ! -f $DIR/$tdir/1 ]; then - dd if=/dev/zero of=$DIR/$tdir/1 bs=1M count=$size_mb \ - oflag=append conv=notrunc - fi - - sleep_maxage - - free_kb=$($LFS df $MOUNT | grep $1 | awk '{ print $4 }') - echo "OST still has $((free_kb / 1024)) mbytes free" -} - test_253() { [ $PARALLEL == "yes" ] && skip "skip parallel run" remote_mds_nodsh && skip "remote MDS with nodsh" @@ -16908,9 +16875,8 @@ test_253() { local ostidx=0 local rc=0 + local ost_name=$(ostname_from_index $ostidx) - local ost_name=$($LFS osts | - sed -n 's/^'$ostidx': \(.*\)_UUID .*/\1/p') # on the mdt's osc local mdtosc_proc1=$(get_mdtosc_proc_path $SINGLEMDS $ost_name) do_facet $SINGLEMDS $LCTL get_param -n \ @@ -16922,60 +16888,45 @@ test_253() { wait_delete_completed mkdir $DIR/$tdir - local last_wm_h=$(do_facet $SINGLEMDS $LCTL get_param -n \ - osp.$mdtosc_proc1.reserved_mb_high) - local last_wm_l=$(do_facet $SINGLEMDS $LCTL get_param -n \ - osp.$mdtosc_proc1.reserved_mb_low) - echo "prev high watermark $last_wm_h, prev low watermark $last_wm_l" - if ! combined_mgs_mds ; then mount_mgs_client fi - create_pool $FSNAME.$TESTNAME || error "Pool creation failed" - do_facet mgs $LCTL pool_add $FSNAME.$TESTNAME $ost_name || - error "Adding $ost_name to pool failed" - - # Wait for client to see a OST at pool - wait_update $HOSTNAME "$LCTL get_param -n - lov.$FSNAME-*.pools.$TESTNAME | sort -u | - grep $ost_name" "$ost_name""_UUID" $((TIMEOUT/2)) || - error "Client can not see the pool" + pool_add $TESTNAME || error "Pool creation failed" + pool_add_targets $TESTNAME 0 || error "Pool add targets failed" + $LFS setstripe $DIR/$tdir -i $ostidx -c 1 -p $FSNAME.$TESTNAME || error "Setstripe failed" - dd if=/dev/zero of=$DIR/$tdir/0 bs=1M count=10 - local blocks=$($LFS df $MOUNT | grep $ost_name | awk '{ print $4 }') - echo "OST still has $((blocks/1024)) mbytes free" - - local new_lwm=$((blocks/1024-10)) - do_facet $SINGLEMDS $LCTL set_param \ - osp.$mdtosc_proc1.reserved_mb_high=$((new_lwm+5)) - do_facet $SINGLEMDS $LCTL set_param \ - osp.$mdtosc_proc1.reserved_mb_low=$new_lwm + dd if=/dev/zero of=$DIR/$tdir/$tfile.0 bs=1M count=10 - test_253_fill_ost $ost_name $mdtosc_proc1 $new_lwm - - #First enospc could execute orphan deletion so repeat. - test_253_fill_ost $ost_name $mdtosc_proc1 $new_lwm + local wms=$(ost_watermarks_set_enospc $tfile $ostidx | + grep "watermarks") + stack_trap "ost_watermarks_clear_enospc $tfile $ostidx $wms" EXIT local oa_status=$(do_facet $SINGLEMDS $LCTL get_param -n \ osp.$mdtosc_proc1.prealloc_status) echo "prealloc_status $oa_status" - dd if=/dev/zero of=$DIR/$tdir/2 bs=1M count=1 && + dd if=/dev/zero of=$DIR/$tdir/$tfile.1 bs=1M count=1 && error "File creation should fail" + #object allocation was stopped, but we still able to append files - dd if=/dev/zero of=$DIR/$tdir/1 bs=1M seek=6 count=5 oflag=append || - error "Append failed" - rm -f $DIR/$tdir/1 $DIR/$tdir/0 $DIR/$tdir/r* + dd if=/dev/zero of=$DIR/$tdir/$tfile.0 bs=1M seek=6 count=5 \ + oflag=append || error "Append failed" + + rm -f $DIR/$tdir/$tfile.0 + + # For this test, we want to delete the files we created to go out of + # space but leave the watermark, so we remain nearly out of space + ost_watermarks_enospc_delete_files $tfile $ostidx wait_delete_completed sleep_maxage for i in $(seq 10 12); do - dd if=/dev/zero of=$DIR/$tdir/$i bs=1M count=1 2>/dev/null || - error "File creation failed after rm"; + dd if=/dev/zero of=$DIR/$tdir/$tfile.$i bs=1M count=1 \ + 2>/dev/null || error "File creation failed after rm" done oa_status=$(do_facet $SINGLEMDS $LCTL get_param -n \ @@ -16985,16 +16936,6 @@ test_253() { if (( oa_status != 0 )); then error "Object allocation still disable after rm" fi - do_facet $SINGLEMDS $LCTL set_param \ - osp.$mdtosc_proc1.reserved_mb_high=$last_wm_h - do_facet $SINGLEMDS $LCTL set_param \ - osp.$mdtosc_proc1.reserved_mb_low=$last_wm_l - - - do_facet mgs $LCTL pool_remove $FSNAME.$TESTNAME $ost_name || - error "Remove $ost_name from pool failed" - do_facet mgs $LCTL pool_destroy $FSNAME.$TESTNAME || - error "Pool destroy fialed" if ! combined_mgs_mds ; then umount_mgs_client diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index e842f10..03d7655 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -3005,6 +3005,142 @@ wait_zfs_commit() { fi } +fill_ost() { + local filename=$1 + local ost_idx=$2 + local lwm=$3 #low watermark + local size_mb #how many MB should we write to pass watermark + local ost_name=$(ostname_from_index $ost_idx) + + free_kb=$($LFS df $MOUNT | awk "/$ost_name/ { print \$4 }") + size_mb=0 + if (( $free_kb / 1024 > lwm )); then + size_mb=$((free_kb / 1024 - lwm)) + fi + #If 10% of free space cross low watermark use it + if (( $free_kb / 10240 > size_mb )); then + size_mb=$((free_kb / 10240)) + else + #At least we need to store 1.1 of difference between + #free space and low watermark + size_mb=$((size_mb + size_mb / 10)) + fi + if (( lwm <= $free_kb / 1024 )) || + [ ! -f $DIR/${filename}.fill_ost$ost_idx ]; then + $LFS setstripe -i $ost_idx -c1 $DIR/${filename}.fill_ost$ost_idx + dd if=/dev/zero of=$DIR/${filename}.fill_ost$ost_idx bs=1M \ + count=$size_mb oflag=append conv=notrunc + fi + + sleep_maxage + + free_kb=$($LFS df $MOUNT | awk "/$ost_name/ { print \$4 }") + echo "OST still has $((free_kb / 1024)) MB free" +} + +# This checks only the primary MDS +ost_watermarks_get() { + local ost_idx=$1 + local ost_name=$(ostname_from_index $ost_idx) + local mdtosc_proc=$(get_mdtosc_proc_path $SINGLEMDS $ost_name) + + local hwm=$(do_facet $SINGLEMDS $LCTL get_param -n \ + osp.$mdtosc_proc.reserved_mb_high) + local lwm=$(do_facet $SINGLEMDS $LCTL get_param -n \ + osp.$mdtosc_proc.reserved_mb_low) + + echo "$lwm $hwm" +} + +# Note that we set watermarks on all MDSes (necessary for striped dirs) +ost_watermarks_set() { + local ost_idx=$1 + local lwm=$2 + local hwm=$3 + local ost_name=$(ostname_from_index $ost_idx) + local facets=$(get_facets MDS) + + do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \ + osp.*$ost_name*.reserved_mb_low=$lwm \ + osp.*$ost_name*.reserved_mb_high=$hwm > /dev/null + + # sleep to ensure we see the change + sleep_maxage +} + +ost_watermarks_set_low_space() { + local ost_idx=$1 + local wms=$(ost_watermarks_get $ost_idx) + local ost_name=$(ostname_from_index $ost_idx) + + local old_lwm=$(echo $wms | awk '{ print $1 }') + local old_hwm=$(echo $wms | awk '{ print $2 }') + + local blocks=$($LFS df $MOUNT | awk "/$ost_name/ { print \$4 }") + # minimal extension size is 64M + local new_lwm=50 + if (( $blocks / 1024 > 50 )); then + new_lwm=$((blocks / 1024 - 50)) + fi + local new_hwm=$((new_lwm + 5)) + + ost_watermarks_set $ost_idx $new_lwm $new_hwm + echo "watermarks: $old_lwm $old_hwm $new_lwm $new_hwm" +} + +# Set watermarks to ~current available space & then write data to fill it +# Note OST is not *actually* full after this, it just reports ENOSPC in the +# internal statfs used by the stripe allocator +# +# first parameter is the filename-prefix, which must get under t-f cleanup +# requirements (rm -rf $DIR/[Rdfs][0-9]*), i.e. $tfile work fine +ost_watermarks_set_enospc() { + local filename=$1 + local ost_idx=$2 + # on the mdt's osc + local ost_name=$(ostname_from_index $ost_idx) + local facets=$(get_facets MDS) + local wms + local MDS + + for MDS in ${facets//,/ }; do + local mdtosc_proc=$(get_mdtosc_proc_path $MDS $ost_name) + + do_facet $MDS $LCTL get_param -n \ + osp.$mdtosc_proc.reserved_mb_high || + skip "remote MDS does not support reserved_mb_high" + done + + wms=$(ost_watermarks_set_low_space $ost_idx) + local new_lwm=$(echo $wms | awk '{ print $4 }') + fill_ost $filename $ost_idx $new_lwm + #First enospc could execute orphan deletion so repeat + fill_ost $filename $ost_idx $new_lwm + echo $wms +} + +ost_watermarks_enospc_delete_files() { + local filename=$1 + local ost_idx=$2 + + rm -f $DIR/${filename}.fill_ost$ost_idx + + wait_delete_completed + wait_mds_ost_sync +} + +# clean up from "ost_watermarks_set_enospc" +ost_watermarks_clear_enospc() { + local filename=$1 + local ost_idx=$2 + local old_lwm=$4 + local old_hwm=$5 + + ost_watermarks_enospc_delete_files $filename $ost_idx + ost_watermarks_set $ost_idx $old_lwm $old_hwm + echo "set OST$ost_idx lwm back to $old_lwm, hwm back to $old_hwm" +} + wait_delete_completed_mds() { local max_wait=${1:-20} local mds2sync="" @@ -10017,3 +10153,9 @@ rmultiop_stop() { wait ${!do_node_pid} } + +sleep_maxage() { + local delay=$(do_facet $SINGLEMDS lctl get_param -n lo[vd].*.qos_maxage | + awk '{ print $1 * 2; exit; }') + sleep $delay +}