X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;ds=sidebyside;f=lustre%2Ftests%2Fsanity.sh;h=027271c65a37e1486cc3c277a93db1e857ce1aaf;hb=0ad54d59777366fba8ee61eaaa27b3060c91782f;hp=1e7d1e197be3ddee9d9bcdbfa780aa8e7a6dcf60;hpb=84097792f56c38265b6eb0417a47c557f81f9461;p=fs%2Flustre-release.git diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 1e7d1e1..027271c 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -45,8 +45,14 @@ ALWAYS_EXCEPT+=" 42a 42b 42c " ALWAYS_EXCEPT+=" 407 312 " if $SHARED_KEY; then - # bug number: LU-9795 LU-9795 LU-9795 LU-9795 - ALWAYS_EXCEPT+=" 17n 60a 133g 300f " + # bug number: LU-9795 LU-9795 LU-9795 LU-9795 LU-12781 + ALWAYS_EXCEPT+=" 17n 60a 133g 300f 272a" +fi + +selinux_status=$(getenforce) +if [ "$selinux_status" != "Disabled" ]; then + # bug number: + ALWAYS_EXCEPT+="" fi # skip the grant tests for ARM until they are fixed @@ -5495,19 +5501,27 @@ test_56c() { skip_env "OST $ost_name is in $old_status status" do_facet ost1 $LCTL set_param -n obdfilter.$ost_name.degraded=1 + [[ $OST1_VERSION -ge $(version_code 2.12.55) ]] && do_facet ost1 \ + $LCTL set_param -n obdfilter.$ost_name.no_precreate=1 sleep_maxage local new_status=$(ost_dev_status $ost_idx) - [[ "$new_status" = "D" ]] || - error "OST $ost_name is in status of '$new_status', not 'D'" + [[ "$new_status" =~ "D" ]] || + error "$ost_name status is '$new_status', missing 'D'" + if [[ $OST1_VERSION -ge $(version_code 2.12.55) ]]; then + [[ "$new_status" =~ "N" ]] || + error "$ost_name status is '$new_status', missing 'N'" + fi do_facet ost1 $LCTL set_param -n obdfilter.$ost_name.degraded=0 + [[ $OST1_VERSION -ge $(version_code 2.12.55) ]] && do_facet ost1 \ + $LCTL set_param -n obdfilter.$ost_name.no_precreate=0 sleep_maxage new_status=$(ost_dev_status $ost_idx) - [[ -z "$new_status" ]] || - error "OST $ost_name is in status of '$new_status', not ''" + [[ ! "$new_status" =~ "D" && ! "$new_status" =~ "N" ]] || + error "$ost_name status is '$new_status', has 'D' and/or 'N'" } run_test 56c "check 'lfs df' showing device status" @@ -6231,7 +6245,7 @@ test_56w() { done # $LFS_MIGRATE will fail if hard link migration is unsupported - if [[ $(lustre_version_code mds1) -gt $(version_code 2.5.55) ]]; then + if [[ $MDS1_VERSION -gt $(version_code 2.5.55) ]]; then createmany -l$dir/dir1/file1 $dir/dir1/link 200 || error "creating links to $dir/dir1/file1 failed" fi @@ -6569,6 +6583,7 @@ check_migrate_links() { local file1="$dir/file1" local begin="$2" local count="$3" + local runas="$4" local total_count=$(($begin + $count - 1)) local symlink_count=10 local uniq_count=10 @@ -6613,7 +6628,7 @@ check_migrate_links() { fi echo -n "migrating files..." - local migrate_out=$($LFS_MIGRATE -y -S '1m' $dir) + local migrate_out=$($runas $LFS_MIGRATE -y -S '1m' $dir) local rc=$? [ $rc -eq 0 ] || error "migrate failed rc = $rc" echo "done" @@ -6668,6 +6683,9 @@ test_56xb() { echo "testing rsync mode when all links do not fit within xattrs" LFS_MIGRATE_RSYNC_MODE=true check_migrate_links "$dir" 101 100 + chown -R $RUNAS_ID $dir + echo "testing non-root lfs migrate mode when not all links are in xattr" + LFS_MIGRATE_RSYNC_MODE=false check_migrate_links "$dir" 101 100 "$RUNAS" # clean up rm -rf $dir @@ -11343,41 +11361,52 @@ run_test 126 "check that the fsgid provided by the client is taken into account" test_127a() { # bug 15521 [ $PARALLEL == "yes" ] && skip "skip parallel run" + local name count samp unit min max sum sumsq $LFS setstripe -i 0 -c 1 $DIR/$tfile || error "setstripe failed" + echo "stats before reset" + $LCTL get_param osc.*.stats $LCTL set_param osc.*.stats=0 - FSIZE=$((2048 * 1024)) - dd if=/dev/zero of=$DIR/$tfile bs=$FSIZE count=1 + local fsize=$((2048 * 1024)) + + dd if=/dev/zero of=$DIR/$tfile bs=$fsize count=1 cancel_lru_locks osc - dd if=$DIR/$tfile of=/dev/null bs=$FSIZE - - $LCTL get_param osc.*0000-osc-*.stats | grep samples > $DIR/${tfile}.tmp - while read NAME COUNT SAMP UNIT MIN MAX SUM SUMSQ; do - echo "got $COUNT $NAME" - [ ! $MIN ] && error "Missing min value for $NAME proc entry" - eval $NAME=$COUNT || error "Wrong proc format" - - case $NAME in - read_bytes|write_bytes) - [ $MIN -lt 4096 ] && error "min is too small: $MIN" - [ $MIN -gt $FSIZE ] && error "min is too big: $MIN" - [ $MAX -lt 4096 ] && error "max is too small: $MAX" - [ $MAX -gt $FSIZE ] && error "max is too big: $MAX" - [ $SUM -ne $FSIZE ] && error "sum is wrong: $SUM" - [ $SUMSQ -lt $(((FSIZE /4096) * (4096 * 4096))) ] && - error "sumsquare is too small: $SUMSQ" - [ $SUMSQ -gt $((FSIZE * FSIZE)) ] && - error "sumsquare is too big: $SUMSQ" - ;; - *) ;; - esac - done < $DIR/${tfile}.tmp - - #check that we actually got some stats - [ "$read_bytes" ] || error "Missing read_bytes stats" - [ "$write_bytes" ] || error "Missing write_bytes stats" - [ "$read_bytes" != 0 ] || error "no read done" - [ "$write_bytes" != 0 ] || error "no write done" + dd if=$DIR/$tfile of=/dev/null bs=$fsize + + $LCTL get_param osc.*0000-osc-*.stats | grep samples > $DIR/$tfile.tmp + stack_trap "rm -f $TMP/$tfile.tmp" + while read name count samp unit min max sum sumsq; do + echo "got name=$name count=$count unit=$unit min=$min max=$max" + [ ! $min ] && error "Missing min value for $name proc entry" + eval $name=$count || error "Wrong proc format" + + case $name in + read_bytes|write_bytes) + [[ "$unit" =~ "bytes" ]] || + error "unit is not 'bytes': $unit" + (( $min >= 4096 )) || error "min is too small: $min" + (( $min <= $fsize )) || error "min is too big: $min" + (( $max >= 4096 )) || error "max is too small: $max" + (( $max <= $fsize )) || error "max is too big: $max" + (( $sum == $fsize )) || error "sum is wrong: $sum" + (( $sumsq >= ($fsize / 4096) * (4096 * 4096) )) || + error "sumsquare is too small: $sumsq" + (( $sumsq <= $fsize * $fsize )) || + error "sumsquare is too big: $sumsq" + ;; + ost_read|ost_write) + [[ "$unit" =~ "usec" ]] || + error "unit is not 'usec': $unit" + ;; + *) ;; + esac + done < $DIR/$tfile.tmp + + #check that we actually got some stats + [ "$read_bytes" ] || error "Missing read_bytes stats" + [ "$write_bytes" ] || error "Missing write_bytes stats" + [ "$read_bytes" != 0 ] || error "no read done" + [ "$write_bytes" != 0 ] || error "no write done" } run_test 127a "verify the client stats are sane" @@ -11385,6 +11414,8 @@ test_127b() { # bug LU-333 [ $PARALLEL == "yes" ] && skip "skip parallel run" local name count samp unit min max sum sumsq + echo "stats before reset" + $LCTL get_param llite.*.stats $LCTL set_param llite.*.stats=0 # perform 2 reads and writes so MAX is different from SUM. @@ -11395,30 +11426,28 @@ test_127b() { # bug LU-333 dd if=$DIR/$tfile of=/dev/null bs=$PAGE_SIZE count=1 $LCTL get_param llite.*.stats | grep samples > $TMP/$tfile.tmp + stack_trap "rm -f $TMP/$tfile.tmp" while read name count samp unit min max sum sumsq; do - echo "got $count $name" + echo "got name=$name count=$count unit=$unit min=$min max=$max" eval $name=$count || error "Wrong proc format" case $name in - read_bytes) - [ $count -ne 2 ] && error "count is not 2: $count" - [ $min -ne $PAGE_SIZE ] && + read_bytes|write_bytes) + [[ "$unit" =~ "bytes" ]] || + error "unit is not 'bytes': $unit" + (( $count == 2 )) || error "count is not 2: $count" + (( $min == $PAGE_SIZE )) || error "min is not $PAGE_SIZE: $min" - [ $max -ne $PAGE_SIZE ] && - error "max is incorrect: $max" - [ $sum -ne $((PAGE_SIZE * 2)) ] && - error "sum is wrong: $sum" + (( $max == $PAGE_SIZE )) || + error "max is not $PAGE_SIZE: $max" + (( $sum == $PAGE_SIZE * 2 )) || + error "sum is not $((PAGE_SIZE * 2)): $sum" ;; - write_bytes) - [ $count -ne 2 ] && error "count is not 2: $count" - [ $min -ne $PAGE_SIZE ] && - error "min is not $PAGE_SIZE: $min" - [ $max -ne $PAGE_SIZE ] && - error "max is incorrect: $max" - [ $sum -ne $((PAGE_SIZE * 2)) ] && - error "sum is wrong: $sum" + read|write) + [[ "$unit" =~ "usec" ]] || + error "unit is not 'usec': $unit" ;; - *) ;; + *) ;; esac done < $TMP/$tfile.tmp @@ -11427,8 +11456,6 @@ test_127b() { # bug LU-333 [ "$write_bytes" ] || error "Missing write_bytes stats" [ "$read_bytes" != 0 ] || error "no read done" [ "$write_bytes" != 0 ] || error "no write done" - - rm -f $TMP/${tfile}.tmp } run_test 127b "verify the llite client stats are sane" @@ -12680,8 +12707,9 @@ test_151() { error "NOT IN CACHE: before: $BEFORE, after: $AFTER" fi - # the following read invalidates the cache cancel_lru_locks osc + # invalidates OST cache + do_nodes $list "echo 1 > /proc/sys/vm/drop_caches" set_osd_param $list '' read_cache_enable 0 cat $DIR/$tfile >/dev/null @@ -13326,9 +13354,9 @@ test_156() { cat $file >/dev/null AFTER=$(roc_hit) if ! let "AFTER - BEFORE == CPAGES"; then - error "NOT IN CACHE: before: $BEFORE, after: $AFTER" + error "NOT IN CACHE (2): before: $BEFORE, after: $AFTER" else - log "cache hits:: before: $BEFORE, after: $AFTER" + log "cache hits: before: $BEFORE, after: $AFTER" fi log "Read again; it should be satisfied from the cache." @@ -13337,7 +13365,7 @@ test_156() { cat $file >/dev/null AFTER=$(roc_hit) if ! let "AFTER - BEFORE == CPAGES"; then - error "NOT IN CACHE: before: $BEFORE, after: $AFTER" + error "NOT IN CACHE (3): before: $BEFORE, after: $AFTER" else log "cache hits:: before: $BEFORE, after: $AFTER" fi @@ -13352,20 +13380,23 @@ test_156() { cat $file >/dev/null AFTER=$(roc_hit) if ! let "AFTER - BEFORE == CPAGES"; then - error "NOT IN CACHE: before: $BEFORE, after: $AFTER" + error "NOT IN CACHE (4): before: $BEFORE, after: $AFTER" else log "cache hits:: before: $BEFORE, after: $AFTER" fi - log "Read again; it should not be satisfied from the cache." - BEFORE=$AFTER - cancel_lru_locks osc - cat $file >/dev/null - AFTER=$(roc_hit) - if ! let "AFTER - BEFORE == 0"; then - error "IN CACHE: before: $BEFORE, after: $AFTER" - else - log "cache hits:: before: $BEFORE, after: $AFTER" + if [ $OST1_VERSION -lt $(version_code 2.12.55) ]; then + # > 2.12.56 uses pagecache if cached + log "Read again; it should not be satisfied from the cache." + BEFORE=$AFTER + cancel_lru_locks osc + cat $file >/dev/null + AFTER=$(roc_hit) + if ! let "AFTER - BEFORE == 0"; then + error "IN CACHE (5): before: $BEFORE, after: $AFTER" + else + log "cache hits:: before: $BEFORE, after: $AFTER" + fi fi log "Write data and read it back." @@ -13376,20 +13407,23 @@ test_156() { cat $file >/dev/null AFTER=$(roc_hit) if ! let "AFTER - BEFORE == CPAGES"; then - error "NOT IN CACHE: before: $BEFORE, after: $AFTER" + error "NOT IN CACHE (6): before: $BEFORE, after: $AFTER" else log "cache hits:: before: $BEFORE, after: $AFTER" fi - log "Read again; it should not be satisfied from the cache." - BEFORE=$AFTER - cancel_lru_locks osc - cat $file >/dev/null - AFTER=$(roc_hit) - if ! let "AFTER - BEFORE == 0"; then - error "IN CACHE: before: $BEFORE, after: $AFTER" - else - log "cache hits:: before: $BEFORE, after: $AFTER" + if [ $OST1_VERSION -lt $(version_code 2.12.55) ]; then + # > 2.12.56 uses pagecache if cached + log "Read again; it should not be satisfied from the cache." + BEFORE=$AFTER + cancel_lru_locks osc + cat $file >/dev/null + AFTER=$(roc_hit) + if ! let "AFTER - BEFORE == 0"; then + error "IN CACHE (7): before: $BEFORE, after: $AFTER" + else + log "cache hits:: before: $BEFORE, after: $AFTER" + fi fi log "Turn off read and write cache" @@ -13405,7 +13439,7 @@ test_156() { cat $file >/dev/null AFTER=$(roc_hit) if ! let "AFTER - BEFORE == 0"; then - error_ignore bz20762 "IN CACHE: before: $BEFORE, after: $AFTER" + error_ignore bz20762 "IN CACHE (8):before:$BEFORE,after:$AFTER" else log "cache hits:: before: $BEFORE, after: $AFTER" fi @@ -13423,7 +13457,7 @@ test_156() { cat $file >/dev/null AFTER=$(roc_hit) if ! let "AFTER - BEFORE == 0"; then - error_ignore bz20762 "IN CACHE: before: $BEFORE, after: $AFTER" + error_ignore bz20762 "IN CACHE (9):before:$BEFORE,after:$AFTER" else log "cache hits:: before: $BEFORE, after: $AFTER" fi @@ -13434,7 +13468,7 @@ test_156() { cat $file >/dev/null AFTER=$(roc_hit) if ! let "AFTER - BEFORE == CPAGES"; then - error "NOT IN CACHE: before: $BEFORE, after: $AFTER" + error "NOT IN CACHE (1): before: $BEFORE, after: $AFTER" else log "cache hits:: before: $BEFORE, after: $AFTER" fi @@ -16892,6 +16926,61 @@ test_230l() { } run_test 230l "readdir between MDTs won't crash" +test_230m() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" + [ $MDS1_VERSION -lt $(version_code 2.11.56) ] && + skip "Need MDS version at least 2.11.56" + + local MDTIDX=1 + local mig_dir=$DIR/$tdir/migrate_dir + local longstr="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + local shortstr="b" + local val + + echo "Creating files and dirs with xattrs" + test_mkdir $DIR/$tdir + test_mkdir -i0 -c1 $mig_dir + mkdir $mig_dir/dir + setfattr -n user.attr1 -v $longstr $mig_dir/dir || + error "cannot set xattr attr1 on dir" + setfattr -n user.attr2 -v $shortstr $mig_dir/dir || + error "cannot set xattr attr2 on dir" + touch $mig_dir/dir/f0 + setfattr -n user.attr1 -v $longstr $mig_dir/dir/f0 || + error "cannot set xattr attr1 on file" + setfattr -n user.attr2 -v $shortstr $mig_dir/dir/f0 || + error "cannot set xattr attr2 on file" + sync ; sync ; echo 3 > /proc/sys/vm/drop_caches + val=$(getfattr --only-values -n user.attr1 $mig_dir/dir 2>/dev/null) + [ "$val" = $longstr ] || error "xattr attr1 not set properly on dir" + val=$(getfattr --only-values -n user.attr2 $mig_dir/dir 2>/dev/null) + [ "$val" = $shortstr ] || error "xattr attr2 not set properly on dir" + val=$(getfattr --only-values -n user.attr1 $mig_dir/dir/f0 2>/dev/null) + [ "$val" = $longstr ] || error "xattr attr1 not set properly on file" + val=$(getfattr --only-values -n user.attr2 $mig_dir/dir/f0 2>/dev/null) + [ "$val" = $shortstr ] || error "xattr attr2 not set properly on file" + + echo "Migrating to MDT1" + $LFS migrate -m $MDTIDX $mig_dir || + error "fails on migrating dir to MDT1" + + sync ; sync ; echo 3 > /proc/sys/vm/drop_caches + echo "Checking xattrs" + val=$(getfattr --only-values -n user.attr1 $mig_dir/dir 2>/dev/null) + [ "$val" = $longstr ] || + error "expecting xattr1 $longstr on dir, found $val" + val=$(getfattr --only-values -n user.attr2 $mig_dir/dir 2>/dev/null) + [ "$val" = $shortstr ] || + error "expecting xattr2 $shortstr on dir, found $val" + val=$(getfattr --only-values -n user.attr1 $mig_dir/dir/f0 2>/dev/null) + [ "$val" = $longstr ] || + error "expecting xattr1 $longstr on file, found $val" + val=$(getfattr --only-values -n user.attr2 $mig_dir/dir/f0 2>/dev/null) + [ "$val" = $shortstr ] || + error "expecting xattr2 $shortstr on file, found $val" +} +run_test 230m "xattrs not changed after dir migration" + test_231a() { # For simplicity this test assumes that max_pages_per_rpc @@ -17296,21 +17385,6 @@ test_245() { } run_test 245 "check mdc connection flag/data: multiple modify RPCs" -test_246() { # LU-7371 - remote_ost_nodsh && skip "remote OST with nodsh" - [ $OST1_VERSION -lt $(version_code 2.7.62) ] && - skip "Need OST version >= 2.7.62" - - do_facet ost1 $LCTL set_param fail_val=4095 -#define OBD_FAIL_OST_READ_SIZE 0x234 - do_facet ost1 $LCTL set_param fail_loc=0x234 - $LFS setstripe $DIR/$tfile -i 0 -c 1 - dd if=/dev/zero of=$DIR/$tfile bs=4095 count=1 > /dev/null 2>&1 - cancel_lru_locks $FSNAME-OST0000 - dd if=$DIR/$tfile of=/dev/null bs=1048576 || error "Read failed" -} -run_test 246 "Read file of size 4095 should return right length" - cleanup_247() { local submount=$1 @@ -17403,7 +17477,7 @@ test_247e() { } run_test 247e "mount .. as fileset" -test_248() { +test_248a() { local fast_read_sav=$($LCTL get_param -n llite.*.fast_read 2>/dev/null) [ -z "$fast_read_sav" ] && skip "no fast read support" @@ -17454,7 +17528,89 @@ test_248() { $LCTL set_param -n llite.*.fast_read=$fast_read_sav rm -f $DIR/$tfile } -run_test 248 "fast read verification" +run_test 248a "fast read verification" + +test_248b() { + # Default short_io_bytes=16384, try both smaller and larger sizes. + # Lustre O_DIRECT read and write needs to be a multiple of PAGE_SIZE. + # 6017024 = 2^12*13*113 = 47008*128 = 11752*512 = 4096*1469 = 53248*113 + echo "bs=53248 count=113 normal buffered write" + dd if=/dev/urandom of=$TMP/$tfile.0 bs=53248 count=113 || + error "dd of initial data file failed" + stack_trap "rm -f $DIR/$tfile.[0-3] $TMP/$tfile.[0-3]" EXIT + + echo "bs=47008 count=128 oflag=dsync normal write $tfile.0" + dd if=$TMP/$tfile.0 of=$DIR/$tfile.0 bs=47008 count=128 oflag=dsync || + error "dd with sync normal writes failed" + cmp $TMP/$tfile.0 $DIR/$tfile.0 || error "compare $DIR/$tfile.0 failed" + + echo "bs=11752 count=512 oflag=dsync small write $tfile.1" + dd if=$TMP/$tfile.0 of=$DIR/$tfile.1 bs=11752 count=512 oflag=dsync || + error "dd with sync small writes failed" + cmp $TMP/$tfile.0 $DIR/$tfile.1 || error "compare $DIR/$tfile.1 failed" + + cancel_lru_locks osc + + # calculate the small O_DIRECT size and count for the client PAGE_SIZE + local num=$((13 * 113 / (PAGE_SIZE / 4096))) + echo "bs=$PAGE_SIZE count=$num iflag=direct small read $tfile.1" + dd if=$DIR/$tfile.1 of=$TMP/$tfile.1 bs=$PAGE_SIZE count=$num \ + iflag=direct || error "dd with O_DIRECT small read failed" + # adjust bytes checked to handle larger PAGE_SIZE for ARM/PPC + cmp --bytes=$((PAGE_SIZE * num)) $TMP/$tfile.0 $TMP/$tfile.1 || + error "compare $TMP/$tfile.1 failed" + + local save=$($LCTL get_param -n osc.*OST000*.short_io_bytes | head -n 1) + stack_trap "$LCTL set_param osc.$FSNAME-*.short_io_bytes=$save" EXIT + + # just to see what the maximum tunable value is, and test parsing + echo "test invalid parameter 2MB" + $LCTL set_param osc.$FSNAME-OST0000*.short_io_bytes=2M && + error "too-large short_io_bytes allowed" + echo "test maximum parameter 512KB" + # if we can set a larger short_io_bytes, run test regardless of version + if ! $LCTL set_param osc.$FSNAME-OST0000*.short_io_bytes=512K; then + # older clients may not allow setting it this large, that's OK + [ $CLIENT_VERSION -ge $(version_code 2.13.50) ] || + skip "Need at least client version 2.13.50" + error "medium short_io_bytes failed" + fi + $LCTL get_param osc.$FSNAME-OST0000*.short_io_bytes + size=$($LCTL get_param -n osc.$FSNAME-OST0000*.short_io_bytes) + + echo "test large parameter 64KB" + $LCTL set_param osc.$FSNAME-*.short_io_bytes=65536 + $LCTL get_param osc.$FSNAME-OST0000*.short_io_bytes + + echo "bs=47008 count=128 oflag=dsync large write $tfile.2" + dd if=$TMP/$tfile.0 of=$DIR/$tfile.2 bs=47008 count=128 oflag=dsync || + error "dd with sync large writes failed" + cmp $TMP/$tfile.0 $DIR/$tfile.2 || error "compare $DIR/$tfile.2 failed" + + # calculate the large O_DIRECT size and count for the client PAGE_SIZE + local size=$(((4096 * 13 + PAGE_SIZE - 1) / PAGE_SIZE * PAGE_SIZE)) + num=$((113 * 4096 / PAGE_SIZE)) + echo "bs=$size count=$num oflag=direct large write $tfile.3" + dd if=$TMP/$tfile.0 of=$DIR/$tfile.3 bs=$size count=$num oflag=direct || + error "dd with O_DIRECT large writes failed" + cmp --bytes=$((size * num)) $TMP/$tfile.0 $DIR/$tfile.3 || + error "compare $DIR/$tfile.3 failed" + + cancel_lru_locks osc + + echo "bs=$size count=$num iflag=direct large read $tfile.2" + dd if=$DIR/$tfile.2 of=$TMP/$tfile.2 bs=$size count=$num iflag=direct || + error "dd with O_DIRECT large read failed" + cmp --bytes=$((size * num)) $TMP/$tfile.0 $TMP/$tfile.2 || + error "compare $TMP/$tfile.2 failed" + + echo "bs=$size count=$num iflag=direct large read $tfile.3" + dd if=$DIR/$tfile.3 of=$TMP/$tfile.3 bs=$size count=$num iflag=direct || + error "dd with O_DIRECT large read failed" + cmp --bytes=$((size * num)) $TMP/$tfile.0 $TMP/$tfile.3 || + error "compare $TMP/$tfile.3 failed" +} +run_test 248b "test short_io read and write for both small and large sizes" test_249() { # LU-7890 [ $MDS1_VERSION -lt $(version_code 2.8.53) ] && @@ -20738,87 +20894,86 @@ test_412() { } run_test 412 "mkdir on specific MDTs" -test_413a() { - [ $MDSCOUNT -lt 2 ] && - skip "We need at least 2 MDTs for this test" - - if [ $(lustre_version_code mds1) -lt $(version_code 2.10.55) ]; then - skip "Need server version at least 2.10.55" - fi - - mkdir $DIR/$tdir || error "mkdir failed" - - # find MDT that is the most full - local max=$($LFS df | grep MDT | - awk 'BEGIN { a=0 } - { sub("%", "", $5) - if (0+$5 >= a) - { - a = $5 - b = $6 - } - } - END { split(b, c, ":") - sub("]", "", c[2]) - print c[2] - }') - - for i in $(seq $((MDSCOUNT - 1))); do - $LFS mkdir -c $i $DIR/$tdir/d$i || - error "mkdir d$i failed" - $LFS getdirstripe $DIR/$tdir/d$i - local stripe_index=$($LFS getdirstripe -i $DIR/$tdir/d$i) - [ $stripe_index -ne $max ] || - error "don't expect $max" - done -} -run_test 413a "mkdir on less full MDTs" - -test_413b() { - [ $MDSCOUNT -lt 2 ] && - skip "We need at least 2 MDTs for this test" - - [ $MDS1_VERSION -lt $(version_code 2.12.52) ] && - skip "Need server version at least 2.12.52" - - mkdir $DIR/$tdir || error "mkdir failed" - $LFS setdirstripe -D -i -1 -H space $DIR/$tdir || - error "setdirstripe failed" +test_qos_mkdir() { + local mkdir_cmd=$1 + local stripe_count=$2 + local mdts=$(comma_list $(mdts_nodes)) - local qos_prio_free - local qos_threshold_rr + local testdir + local lmv_qos_prio_free + local lmv_qos_threshold_rr + local lmv_qos_maxage + local lod_qos_prio_free + local lod_qos_threshold_rr + local lod_qos_maxage local count + local i - qos_prio_free=$($LCTL get_param -n lmv.*.qos_prio_free | head -n1) - qos_prio_free=${qos_prio_free%%%} - qos_threshold_rr=$($LCTL get_param -n lmv.*.qos_threshold_rr | head -n1) - qos_threshold_rr=${qos_threshold_rr%%%} - qos_maxage=$($LCTL get_param -n lmv.*.qos_maxage) - - stack_trap "$LCTL set_param lmv.*.qos_prio_free=$qos_prio_free" EXIT - stack_trap "$LCTL set_param lmv.*.qos_threshold_rr=$qos_threshold_rr" \ + lmv_qos_prio_free=$($LCTL get_param -n lmv.*.qos_prio_free | head -n1) + lmv_qos_prio_free=${lmv_qos_prio_free%%%} + lmv_qos_threshold_rr=$($LCTL get_param -n lmv.*.qos_threshold_rr | + head -n1) + lmv_qos_threshold_rr=${lmv_qos_threshold_rr%%%} + lmv_qos_maxage=$($LCTL get_param -n lmv.*.qos_maxage) + stack_trap "$LCTL set_param \ + lmv.*.qos_prio_free=$lmv_qos_prio_free > /dev/null" EXIT + stack_trap "$LCTL set_param \ + lmv.*.qos_threshold_rr=$lmv_qos_threshold_rr > /dev/null" EXIT + stack_trap "$LCTL set_param \ + lmv.*.qos_maxage=$lmv_qos_maxage > /dev/null" EXIT + + lod_qos_prio_free=$(do_facet mds1 $LCTL get_param -n \ + lod.lustre-MDT0000-mdtlov.mdt_qos_prio_free | head -n1) + lod_qos_prio_free=${lod_qos_prio_free%%%} + lod_qos_threshold_rr=$(do_facet mds1 $LCTL get_param -n \ + lod.lustre-MDT0000-mdtlov.mdt_qos_threshold_rr | head -n1) + lod_qos_threshold_rr=${lod_qos_threshold_rr%%%} + lod_qos_maxage=$(do_facet mds1 $LCTL get_param -n \ + lod.lustre-MDT0000-mdtlov.qos_maxage | awk '{ print $1 }') + stack_trap "do_nodes $mdts $LCTL set_param \ + lod.*.mdt_qos_prio_free=$lod_qos_prio_free > /dev/null" EXIT + stack_trap "do_nodes $mdts $LCTL set_param \ + lod.*.mdt_qos_threshold_rr=$lod_qos_threshold_rr > /dev/null" \ EXIT - stack_trap "$LCTL set_param lmv.*.qos_maxage=$qos_maxage" EXIT + stack_trap "do_nodes $mdts $LCTL set_param \ + lod.*.mdt_qos_maxage=$lod_qos_maxage > /dev/null" EXIT + + echo + echo "Mkdir (stripe_count $stripe_count) roundrobin:" - echo "mkdir with roundrobin" + $LCTL set_param lmv.*.qos_threshold_rr=100 > /dev/null + do_nodes $mdts $LCTL set_param lod.*.mdt_qos_threshold_rr=100 > /dev/null + + testdir=$DIR/$tdir-s$stripe_count/rr - $LCTL set_param lmv.*.qos_threshold_rr=100 for i in $(seq $((100 * MDSCOUNT))); do - mkdir $DIR/$tdir/subdir$i || error "mkdir subdir$i failed" + eval $mkdir_cmd $testdir/subdir$i || + error "$mkdir_cmd subdir$i failed" done + for i in $(seq $MDSCOUNT); do - count=$($LFS getdirstripe -i $DIR/$tdir/* | grep ^$((i - 1))$ | - wc -w) + count=$($LFS getdirstripe -i $testdir/* | + grep ^$((i - 1))$ | wc -l) echo "$count directories created on MDT$((i - 1))" [ $count -eq 100 ] || error "subdirs are not evenly distributed" + + if [ $stripe_count -gt 1 ]; then + count=$($LFS getdirstripe $testdir/* | + grep -P "^\s+$((i - 1))\t" | wc -l) + echo "$count stripes created on MDT$((i - 1))" + # deviation should < 5% of average + [ $count -lt $((95 * stripe_count)) ] || + [ $count -gt $((105 * stripe_count)) ] && + error "stripes are not evenly distributed" + fi done - rm -rf $DIR/$tdir/* + $LCTL set_param lmv.*.qos_threshold_rr=$lmv_qos_threshold_rr > /dev/null + do_nodes $mdts $LCTL set_param \ + lod.*.mdt_qos_threshold_rr=$lod_qos_threshold_rr > /dev/null - $LCTL set_param lmv.*.qos_threshold_rr=$qos_threshold_rr - # Shorten statfs result age, so that it can be updated in time - $LCTL set_param lmv.*.qos_maxage=1 - sleep_maxage + echo + echo "Check for uneven MDTs: " local ffree local bavail @@ -20855,9 +21010,8 @@ test_413b() { # Check if we need to generate uneven MDTs local threshold=50 - local diff=$(((max - min ) * 100 / min)) + local diff=$(((max - min) * 100 / min)) local value="$(generate_string 1024)" - local i while [ $diff -lt $threshold ]; do # generate uneven MDTs, create till $threshold% diff @@ -20872,11 +21026,11 @@ test_413b() { error "mkdir $tdir-MDT$min_index failed" for i in $(seq $count); do $OPENFILE -f O_CREAT:O_LOV_DELAY_CREATE \ - $DIR/$tdir-MDT$min_index/f$i > /dev/null || - error "create f$i failed" + $DIR/$tdir-MDT$min_index/f$j_$i > /dev/null || + error "create f$j_$i failed" setfattr -n user.413b -v $value \ - $DIR/$tdir-MDT$min_index/f$i || - error "setfattr f$i failed" + $DIR/$tdir-MDT$min_index/f$j_$i || + error "setfattr f$j_$i failed" done ffree=($(lctl get_param -n mdc.*[mM][dD][cC]-*.filesfree)) @@ -20892,31 +21046,95 @@ test_413b() { echo "MDT blocks available: ${bavail[@]}" echo "weight diff=$diff%" - echo "mkdir with balanced space usage" - $LCTL set_param lmv.*.qos_prio_free=100 + echo + echo "Mkdir (stripe_count $stripe_count) with balanced space usage:" + + $LCTL set_param lmv.*.qos_prio_free=100 > /dev/null + do_nodes $mdts $LCTL set_param lod.*.mdt_qos_prio_free=100 > /dev/null + # decrease statfs age, so that it can be updated in time + $LCTL set_param lmv.*.qos_maxage=1 > /dev/null + do_nodes $mdts $LCTL set_param lod.*.mdt_qos_maxage=1 > /dev/null + + sleep 1 + + testdir=$DIR/$tdir-s$stripe_count/qos + for i in $(seq $((100 * MDSCOUNT))); do - mkdir $DIR/$tdir/subdir$i || error "mkdir subdir$i failed" + eval $mkdir_cmd $testdir/subdir$i || + error "$mkdir_cmd subdir$i failed" done for i in $(seq $MDSCOUNT); do - count=$($LFS getdirstripe -i $DIR/$tdir/* | grep ^$((i - 1))$ | - wc -w) + count=$($LFS getdirstripe -i $testdir/* | grep ^$((i - 1))$ | + wc -l) echo "$count directories created on MDT$((i - 1))" + + if [ $stripe_count -gt 1 ]; then + count=$($LFS getdirstripe $testdir/* | + grep -P "^\s+$((i - 1))\t" | wc -l) + echo "$count stripes created on MDT$((i - 1))" + fi done - max=$($LFS getdirstripe -i $DIR/$tdir/* | grep ^$max_index$ | wc -l) - min=$($LFS getdirstripe -i $DIR/$tdir/* | grep ^$min_index$ | wc -l) + max=$($LFS getdirstripe -i $testdir/* | grep ^$max_index$ | wc -l) + min=$($LFS getdirstripe -i $testdir/* | grep ^$min_index$ | wc -l) + # D-value should > 10% of averge [ $((max - min)) -lt 10 ] && error "subdirs shouldn't be evenly distributed" - which getfattr > /dev/null 2>&1 || skip_env "no getfattr command" + # ditto + if [ $stripe_count -gt 1 ]; then + max=$($LFS getdirstripe $testdir/* | + grep -P "^\s+$max_index\t" | wc -l) + min=$($LFS getdirstripe $testdir/* | + grep -P "^\s+$min_index\t" | wc -l) + [ $((max - min)) -le $((10 * stripe_count)) ] && + error "stripes shouldn't be evenly distributed"|| true + fi +} + +test_413a() { + [ $MDSCOUNT -lt 2 ] && + skip "We need at least 2 MDTs for this test" + + [ $MDS1_VERSION -lt $(version_code 2.12.52) ] && + skip "Need server version at least 2.12.52" + + local stripe_count + + for stripe_count in $(seq 1 $((MDSCOUNT - 1))); do + mkdir $DIR/$tdir-s$stripe_count || error "mkdir failed" + mkdir $DIR/$tdir-s$stripe_count/rr || error "mkdir failed" + mkdir $DIR/$tdir-s$stripe_count/qos || error "mkdir failed" + test_qos_mkdir "$LFS mkdir -c $stripe_count" $stripe_count + done +} +run_test 413a "QoS mkdir with 'lfs mkdir -i -1'" + +test_413b() { + [ $MDSCOUNT -lt 2 ] && + skip "We need at least 2 MDTs for this test" + + [ $MDS1_VERSION -lt $(version_code 2.12.52) ] && + skip "Need server version at least 2.12.52" - $LFS setdirstripe -D -d $DIR/$tdir || error "setdirstripe -d failed" - getfattr -n trusted.dmv $DIR/$tdir && - error "default dir layout exists" || true + local stripe_count + + for stripe_count in $(seq 1 $((MDSCOUNT - 1))); do + mkdir $DIR/$tdir-s$stripe_count || error "mkdir failed" + mkdir $DIR/$tdir-s$stripe_count/rr || error "mkdir failed" + mkdir $DIR/$tdir-s$stripe_count/qos || error "mkdir failed" + $LFS setdirstripe -D -c $stripe_count \ + $DIR/$tdir-s$stripe_count/rr || + error "setdirstripe failed" + $LFS setdirstripe -D -c $stripe_count \ + $DIR/$tdir-s$stripe_count/qos || + error "setdirstripe failed" + test_qos_mkdir "mkdir" $stripe_count + done } -run_test 413b "mkdir with balanced space usage" +run_test 413b "QoS mkdir under dir whose default LMV starting MDT offset is -1" test_414() { #define OBD_FAIL_PTLRPC_BULK_ATTACH 0x521 @@ -21871,7 +22089,7 @@ cleanup_805() { } test_805() { - local zfs_version=$(do_node $SINGLEMDS cat /sys/module/zfs/version) + local zfs_version=$(do_facet mds1 cat /sys/module/zfs/version) [ "$mds1_FSTYPE" != "zfs" ] && skip "ZFS specific test" [ $(version_code $zfs_version) -lt $(version_code 0.7.2) ] && skip "netfree not implemented before 0.7" @@ -22500,6 +22718,31 @@ test_900() { } run_test 900 "umount should not race with any mgc requeue thread" +# LUS-6253/LU-11185 +test_901() { + local oldc + local newc + local olds + local news + [ $PARALLEL == "yes" ] && skip "skip parallel run" + + # some get_param have a bug to handle dot in param name + cancel_lru_locks MGC + oldc=$($LCTL get_param -n 'ldlm.namespaces.MGC*.lock_count') + olds=$(do_facet mgs $LCTL get_param -n 'ldlm.namespaces.MGS*.lock_count') + umount_client $MOUNT || error "umount failed" + mount_client $MOUNT || error "mount failed" + cancel_lru_locks MGC + newc=$($LCTL get_param -n 'ldlm.namespaces.MGC*.lock_count') + news=$(do_facet mgs $LCTL get_param -n 'ldlm.namespaces.MGS*.lock_count') + + [ $oldc -lt $newc ] && error "mgc lock leak ($oldc != $newc)" + [ $olds -lt $news ] && error "mgs lock leak ($olds != $news)" + + return 0 +} +run_test 901 "don't leak a mgc lock on client umount" + complete $SECONDS [ -f $EXT2_DEV ] && rm $EXT2_DEV || true check_and_cleanup_lustre