X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fsanity.sh;h=c50b0406bfc19eea4f7d0422f06181a040524fac;hp=bd56ac5c1e991d13a69752c11b7849ad8e85e7a7;hb=3e4061862e751dbbbbba273403b56201e705a830;hpb=5b53406773c7fe9b9fa9f2ef4c342f85b5db1de1 diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index bd56ac5..c50b040 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -9,10 +9,8 @@ set -e ONLY=${ONLY:-"$*"} -# bug number for skipped test: 13297 2108 9789 3637 9789 3561 12622 12653 12653 5188 16260 19742 -ALWAYS_EXCEPT=" 27u 42a 42b 42c 42d 45 51d 65a 65e 68b $SANITY_EXCEPT" -# bug number for skipped test: 2108 9789 3637 9789 3561 5188/5749 1443 -#ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"27m 42a 42b 42c 42d 45 68 76"} +# bug number for skipped test: 13297 2108 9789 3637 9789 3561 12622 5188 +ALWAYS_EXCEPT=" 27u 42a 42b 42c 42d 45 51d 68b $SANITY_EXCEPT" # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! # Tests that fail on uml @@ -468,6 +466,7 @@ test_17g() { run_test 17g "symlinks: really long symlink name ===============================" test_17h() { #bug 17378 + remote_mds_nodsh && skip "remote MDS with nodsh" && return mkdir -p $DIR/$tdir $SETSTRIPE $DIR/$tdir -c -1 #define OBD_FAIL_MDS_LOV_PREP_CREATE 0x141 @@ -477,6 +476,7 @@ test_17h() { #bug 17378 run_test 17h "create objects: lov_free_memmd() doesn't lbug" test_17i() { #bug 20018 + remote_mds_nodsh && skip "remote MDS with nodsh" && return mkdir -p $DIR/$tdir local foo=$DIR/$tdir/$tfile ln -s $foo $foo || error "create symlink failed" @@ -784,6 +784,10 @@ test_24u() { # bug12192 } run_test 24u "create stripe file" +page_size() { + getconf PAGE_SIZE +} + test_24v() { local NRFILES=100000 local FREE_INODES=`lfs df -i|grep "filesystem summary" | awk '{print $5}'` @@ -793,8 +797,26 @@ test_24v() { mkdir -p $DIR/d24v createmany -m $DIR/d24v/$tfile $NRFILES + + cancel_lru_locks mdc + lctl set_param mdc.*.stats clear + ls $DIR/d24v >/dev/null || error "error in listing large dir" + # LU-5 large readdir + # DIRENT_SIZE = 32 bytes for sizeof(struct lu_dirent) + + # 8 bytes for name(filename is mostly 5 in this test) + + # 8 bytes for luda_type + # take into account of overhead in lu_dirpage header and end mark in + # each page, plus one in RPC_NUM calculation. + DIRENT_SIZE=48 + RPC_SIZE=$(($(lctl get_param -n mdc.*.max_pages_per_rpc)*$(page_size))) + RPC_NUM=$(((NRFILES * DIRENT_SIZE + RPC_SIZE - 1) / RPC_SIZE + 1)) + mds_readpage=`lctl get_param mdc.*.stats | \ + awk '/^mds_readpage/ {print $2}'` + [ $mds_readpage -gt $RPC_NUM ] && \ + error "large readdir doesn't take effect" + rm $DIR/d24v -rf } run_test 24v "list directory with large files (handle hash collision, bug: 17560)" @@ -1023,8 +1045,7 @@ exhaust_precreations() { local MDSIDX=$(get_mds_dir "$DIR/$tdir") echo OSTIDX=$OSTIDX MDSIDX=$MDSIDX - local OST=$(lfs osts | grep ${OSTIDX}": " | \ - awk '{print $2}' | sed -e 's/_UUID$//') + local OST=$(ostname_from_index $OSTIDX) local MDT_INDEX=$(lfs df | grep "\[MDT:$((MDSIDX - 1))\]" | awk '{print $1}' | \ sed -e 's/_UUID$//;s/^.*-//') @@ -1118,6 +1139,7 @@ test_27q() { reset_enospc rm -f $DIR/$tdir/$tfile + mkdir -p $DIR/$tdir $MCREATE $DIR/$tdir/$tfile || error "mcreate $DIR/$tdir/$tfile failed" $TRUNCATE $DIR/$tdir/$tfile 80000000 ||error "truncate $DIR/$tdir/$tfile failed" $CHECKSTAT -s 80000000 $DIR/$tdir/$tfile || error "checkstat failed" @@ -1187,9 +1209,9 @@ test_27u() { # bug 4900 run_test 27u "skip object creation on OSC w/o objects ==========" test_27v() { # bug 4900 - [ "$OSTCOUNT" -lt "2" ] && skip_env "too few OSTs" && return - remote_mds_nodsh && skip "remote MDS with nodsh" && return - remote_ost_nodsh && skip "remote OST with nodsh" && return + [ "$OSTCOUNT" -lt "2" ] && skip_env "too few OSTs" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return exhaust_all_precreations 0x215 reset_enospc @@ -1200,7 +1222,7 @@ test_27v() { # bug 4900 touch $DIR/$tdir/$tfile #define OBD_FAIL_TGT_DELAY_PRECREATE 0x705 # all except ost1 - for (( i=0; i < OSTCOUNT; i++ )) ; do + for (( i=1; i < OSTCOUNT; i++ )); do do_facet ost$i lctl set_param fail_loc=0x705 done local START=`date +%s` @@ -1208,9 +1230,10 @@ test_27v() { # bug 4900 local FINISH=`date +%s` local TIMEOUT=`lctl get_param -n timeout` - [ $((FINISH - START)) -ge $((TIMEOUT / 2)) ] && \ + local PROCESS=$((FINISH - START)) + [ $PROCESS -ge $((TIMEOUT / 2)) ] && \ error "$FINISH - $START >= $TIMEOUT / 2" - + sleep $((TIMEOUT / 2 - PROCESS)) reset_enospc } run_test 27v "skip object creation on slow OST =================" @@ -1236,10 +1259,11 @@ test_27w() { # bug 10997 run_test 27w "check lfs setstripe -c -s -i options =============" test_27x() { + remote_ost_nodsh && skip "remote OST with nodsh" && return [ "$OSTCOUNT" -lt "2" ] && skip_env "$OSTCOUNT < 2 OSTs" && return OFFSET=$(($OSTCOUNT - 1)) OSTIDX=0 - local OST=$(lfs osts | awk '/'${OSTIDX}': / { print $2 }' | sed -e 's/_UUID$//') + local OST=$(ostname_from_index $OSTIDX) mkdir -p $DIR/$tdir $SETSTRIPE $DIR/$tdir -c 1 # 1 stripe per file @@ -1257,6 +1281,7 @@ run_test 27x "create files while OST0 is degraded" test_27y() { [ "$OSTCOUNT" -lt "2" ] && skip_env "$OSTCOUNT < 2 OSTs -- skipping" && return remote_mds_nodsh && skip "remote MDS with nodsh" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return local mdtosc=$(get_mdtosc_proc_path $SINGLEMDS $FSNAME-OST0000) local last_id=$(do_facet $SINGLEMDS lctl get_param -n \ @@ -1279,14 +1304,14 @@ test_27y() { } fi done - OSTIDX=$(lfs osts | grep ${OST} | awk '{print $1}' | sed -e 's/://') + OSTIDX=$(index_from_ostuuid $OST) mkdir -p $DIR/$tdir $SETSTRIPE $DIR/$tdir -c 1 # 1 stripe / file - do_facet ost$OSTIDX lctl set_param -n obdfilter.$OST.degraded 1 + do_facet ost$((OSTIDX+1)) lctl set_param -n obdfilter.$OST.degraded 1 sleep_maxage createmany -o $DIR/$tdir/$tfile $fcount - do_facet ost$OSTIDX lctl set_param -n obdfilter.$OST.degraded 0 + do_facet ost$((OSTIDX+1)) lctl set_param -n obdfilter.$OST.degraded 0 for i in `seq 0 $OFFSET`; do [ `$GETSTRIPE $DIR/$tdir/$tfile$i | grep -A 10 obdidx | awk '{print $1}'| grep -w "$OSTIDX"` ] || \ @@ -1328,7 +1353,7 @@ check_seq_oid() local objid=${lmm[$((j+1))]} local group=${lmm[$((j+3))]} local dev=$(ostdevname $devnum) - local dir=${MOUNT%/*}/ost$devnum + local dir=$(facet_mntpt ost$devnum) stop ost$devnum do_facet ost$devnum mount -t $FSTYPE $dev $dir $OST_MOUNT_OPTS || @@ -1348,12 +1373,13 @@ check_seq_oid() [ $stripe -eq $i ] || { error "stripe mismatch"; return 6; } echo -e "\t\tost $obdidx, objid $objid, group $group" - do_facet ost$devnum umount -d $dev + do_facet ost$devnum umount -d $dir start ost$devnum $dev $OST_MOUNT_OPTS done } test_27z() { + remote_ost_nodsh && skip "remote OST with nodsh" && return mkdir -p $DIR/$tdir $SETSTRIPE $DIR/$tdir/$tfile-1 -c 1 -o 0 -s 1m || { error "setstripe -c -1 failed"; return 1; } @@ -1855,6 +1881,7 @@ test_33c() { local write_bytes local all_zeros + remote_ost_nodsh && skip "remote OST with nodsh" && return all_zeros=: rm -fr $DIR/d33 mkdir -p $DIR/d33 @@ -1864,7 +1891,7 @@ test_33c() { for ostnum in $(seq $OSTCOUNT); do # test-framework's OST numbering is one-based, while Lustre's # is zero-based - ostname=$(printf "lustre-OST%.4d" $((ostnum - 1))) + ostname=$(printf "$FSNAME-OST%.4d" $((ostnum - 1))) # Parsing llobdstat's output sucks; we could grep the /proc # path, but that's likely to not be as portable as using the # llobdstat utility. So we parse lctl output instead. @@ -1888,7 +1915,7 @@ test_33c() { # Total up write_bytes after writing. We'd better find non-zeros. for ostnum in $(seq $OSTCOUNT); do - ostname=$(printf "lustre-OST%.4d" $((ostnum - 1))) + ostname=$(printf "$FSNAME-OST%.4d" $((ostnum - 1))) write_bytes=$(do_facet ost$ostnum lctl get_param -n \ obdfilter/$ostname/stats | awk '/^write_bytes/ {print $7}' ) @@ -1903,7 +1930,7 @@ test_33c() { if $all_zeros then for ostnum in $(seq $OSTCOUNT); do - ostname=$(printf "lustre-OST%.4d" $((ostnum - 1))) + ostname=$(printf "$FSNAME-OST%.4d" $((ostnum - 1))) echo "Check that write_bytes is present in obdfilter/*/stats:" do_facet ost$ostnum lctl get_param -n \ obdfilter/$ostname/stats @@ -2390,6 +2417,7 @@ run_test 39k "write, utime, close, stat ========================" TEST_39_ATIME=`date -d "1 year" +%s` test_39l() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return local atime_diff=$(do_facet $SINGLEMDS lctl get_param -n mdd.*.atime_diff) mkdir -p $DIR/$tdir @@ -2613,10 +2641,6 @@ test_42d() { } run_test 42d "test complete truncate of file with cached dirty data" -page_size() { - getconf PAGE_SIZE -} - test_42e() { # bug22074 local TDIR=$DIR/${tdir}e local pagesz=$(page_size) @@ -3241,7 +3265,7 @@ test_56a() { # was test_56 [ "$OSTCOUNT" -lt 2 ] && \ skip_env "skipping other lfs getstripe --obd test" && return OSTIDX=1 - OBDUUID=$(lfs osts | grep ${OSTIDX}": " | awk '{print $2}') + OBDUUID=$(ostuuid_from_index $OSTIDX) FILENUM=`$GETSTRIPE -ir $DIR/d56 | grep -x $OSTIDX | wc -l` FOUND=`$GETSTRIPE -r --obd $OBDUUID $DIR/d56 | grep obdidx | wc -l` [ $FOUND -eq $FILENUM ] || \ @@ -3495,6 +3519,7 @@ test_57a() { run_test 57a "verify MDS filesystem created with large inodes ==" test_57b() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return local dir=$DIR/d57b local FILECOUNT=100 @@ -3566,9 +3591,10 @@ run_test 59 "verify cancellation of llog records async =========" TEST60_HEAD="test_60 run $RANDOM" test_60a() { + remote_mgs_nodsh && skip "remote MGS with nodsh" && return [ ! -f run-llog.sh ] && skip_env "missing subtest run-llog.sh" && return log "$TEST60_HEAD - from kernel mode" - sh run-llog.sh + do_facet mgs sh run-llog.sh } run_test 60a "llog sanity tests run from kernel module ==========" @@ -3878,6 +3904,8 @@ swap_used() { # test case for lloop driver, basic function test_68a() { [ "$UID" != 0 ] && skip_env "must run as root" && return + llite_lloop_enabled || \ + { skip_env "llite_lloop module disabled" && return; } trap cleanup_68 EXIT @@ -4151,13 +4179,12 @@ set_checksums() return 0 } -export ORIG_CSUM_TYPE="" +export ORIG_CSUM_TYPE="`lctl get_param -n osc/*osc-[^mM]*/checksum_type | + sed 's/.*\[\(.*\)\].*/\1/g' | head -n1`" CKSUM_TYPES=${CKSUM_TYPES:-"crc32 adler"} +[ "$ORIG_CSUM_TYPE" = "crc32c" ] && CKSUM_TYPES="$CKSUM_TYPES crc32c" set_checksum_type() { - [ "$ORIG_CSUM_TYPE" ] || \ - ORIG_CSUM_TYPE=`lctl get_param -n osc/*osc-[^mM]*/checksum_type | - sed 's/.*\[\(.*\)\].*/\1/g' | head -n1` lctl set_param -n osc.*osc-[^mM]*.checksum_type $1 log "set checksum type to $1" return 0 @@ -4403,10 +4430,44 @@ test_80() { # bug 10718 error "elapsed for 1M@1T = $DIFF" fi true - rm -f $DIR/$tfile + rm -f $DIR/$tfile } run_test 80 "Page eviction is equally fast at high offsets too ====" +test_81a() { # LU-456 + remote_ost_nodsh && skip "remote OST with nodsh" && return + # define OBD_FAIL_OST_MAPBLK_ENOSPC 0x228 + # MUST OR with the OBD_FAIL_ONCE (0x80000000) + do_facet ost0 lctl set_param fail_loc=0x80000228 + + # write should trigger a retry and success + $SETSTRIPE -i 0 -c 1 $DIR/$tfile + multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c + RC=$? + if [ $RC -ne 0 ] ; then + error "write should success, but failed for $RC" + fi +} +run_test 81a "OST should retry write when get -ENOSPC ===============" + +test_81b() { # LU-456 + remote_ost_nodsh && skip "remote OST with nodsh" && return + # define OBD_FAIL_OST_MAPBLK_ENOSPC 0x228 + # Don't OR with the OBD_FAIL_ONCE (0x80000000) + do_facet ost0 lctl set_param fail_loc=0x228 + + # write should retry several times and return -ENOSPC finally + $SETSTRIPE -i 0 -c 1 $DIR/$tfile + multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c + RC=$? + ENOSPC=28 + if [ $RC -ne $ENOSPC ] ; then + error "dd should fail for -ENOSPC, but succeed." + fi +} +run_test 81b "OST should return -ENOSPC when retry still fails =======" + + test_99a() { [ -z "$(which cvs 2>/dev/null)" ] && skip_env "could not find cvs" && \ return @@ -5035,10 +5096,13 @@ test_104a() { lfs df $DIR/$tfile || error "lfs df $DIR/$tfile failed" lfs df -ih $DIR/$tfile || error "lfs df -ih $DIR/$tfile failed" - OSC=`lctl get_param -n devices | awk '/-osc-/ {print $4}' | head -n 1` + OSC=`lctl dl |grep OST0000-osc-[^M] |awk '{print $4}'` lctl --device %$OSC deactivate lfs df || error "lfs df with deactivated OSC failed" - lctl --device %$OSC recover + lctl --device %$OSC activate + # wait the osc back to normal + wait_osc_import_state client ost FULL + lfs df || error "lfs df with reactivated OSC failed" rm -f $DIR/$tfile } @@ -5662,6 +5726,15 @@ test_118k() } run_test 118k "bio alloc -ENOMEM and IO TERM handling =========" +test_118l() +{ + # LU-646 + mkdir -p $DIR/$tdir + multiop $DIR/$tdir Dy || error "fsync dir failed" + rm -rf $DIR/$tdir +} +run_test 118l "fsync dir =========" + [ "$SLOW" = "no" ] && [ -n "$OLD_RESENDCOUNT" ] && set_resend_count $OLD_RESENDCOUNT test_119a() # bug 11737 @@ -6040,6 +6113,7 @@ test_124a() { LRU_SIZE=$(lctl get_param -n $PARAM) if [ $LRU_SIZE -gt $(default_lru_size) ]; then NSDIR=$(echo $PARAM | cut -d "." -f1-3) + log "NSDIR=$NSDIR" log "NS=$(basename $NSDIR)" break fi @@ -6057,6 +6131,7 @@ test_124a() { # for 10h. After that locks begin to be killed by client. local MAX_HRS=10 local LIMIT=`lctl get_param -n $NSDIR.pool.limit` + log "LIMIT=$LIMIT" # Make LVF so higher that sleeping for $SLEEP is enough to _start_ # killing locks. Some time was spent for creating locks. This means @@ -6071,6 +6146,7 @@ test_124a() { local LRU_SIZE_B=$LRU_SIZE log "LVF=$LVF" local OLD_LVF=`lctl get_param -n $NSDIR.pool.lock_volume_factor` + log "OLD_LVF=$OLD_LVF" lctl set_param -n $NSDIR.pool.lock_volume_factor $LVF # Let's make sure that we really have some margin. Client checks @@ -6204,7 +6280,7 @@ test_126() { # bug 12829/13455 } run_test 126 "check that the fsgid provided by the client is taken into account" -test_127() { # bug 15521 +test_127a() { # bug 15521 $SETSTRIPE -i 0 -c 1 $DIR/$tfile || error "setstripe failed" $LCTL set_param osc.*.stats=0 FSIZE=$((2048 * 1024)) @@ -6240,7 +6316,47 @@ test_127() { # bug 15521 [ "$read_bytes" != 0 ] || error "no read done" [ "$write_bytes" != 0 ] || error "no write done" } -run_test 127 "verify the client stats are sane" +run_test 127a "verify the client stats are sane" + +test_127b() { # bug LU-333 + $LCTL set_param llite.*.stats=0 + FSIZE=65536 # sized fixed to match PAGE_SIZE for most clients + # perform 2 reads and writes so MAX is different from SUM. + dd if=/dev/zero of=$DIR/$tfile bs=$FSIZE count=1 + dd if=/dev/zero of=$DIR/$tfile bs=$FSIZE count=1 + cancel_lru_locks osc + dd if=$DIR/$tfile of=/dev/null bs=$FSIZE count=1 + dd if=$DIR/$tfile of=/dev/null bs=$FSIZE count=1 + + $LCTL get_param llite.*.stats | grep samples > $TMP/${tfile}.tmp + while read NAME COUNT SAMP UNIT MIN MAX SUM SUMSQ; do + echo "got $COUNT $NAME" + eval $NAME=$COUNT || error "Wrong proc format" + + case $NAME in + read_bytes) + [ $COUNT -ne 2 ] && error "count is not 2: $COUNT" + [ $MIN -ne $FSIZE ] && error "min is not $FSIZE: $MIN" + [ $MAX -ne $FSIZE ] && error "max is incorrect: $MAX" + [ $SUM -ne $((FSIZE * 2)) ] && error "sum is wrong: $SUM" + ;; + write_bytes) + [ $COUNT -ne 2 ] && error "count is not 2: $COUNT" + [ $MIN -ne $FSIZE ] && error "min is not $FSIZE: $MIN" + [ $MAX -ne $FSIZE ] && error "max is incorrect: $MAX" + [ $SUM -ne $((FSIZE * 2)) ] && error "sum is wrong: $SUM" + ;; + *) ;; + esac + done < $TMP/${tfile}.tmp + + #check that we actually got some stats + [ "$read_bytes" ] || error "Missing read_bytes stats" + [ "$write_bytes" ] || error "Missing write_bytes stats" + [ "$read_bytes" != 0 ] || error "no read done" + [ "$write_bytes" != 0 ] || error "no write done" +} +run_test 127b "verify the llite client stats are sane" test_128() { # bug 15212 touch $DIR/$tfile @@ -6633,6 +6749,7 @@ som_mode_switch() { } test_132() { #1028, SOM + remote_mds_nodsh && skip "remote MDS with nodsh" && return local num=$(get_mds_dir $DIR) local mymds=mds${num} local MOUNTOPT_SAVE=$MOUNTOPT @@ -6671,6 +6788,111 @@ test_132() { #1028, SOM } run_test 132 "som avoids glimpse rpc" +check_stats() { + local res + local count + case $1 in + $SINGLEMDS) res=`do_facet $SINGLEMDS $LCTL get_param mdt.$FSNAME-MDT0000.md_stats | grep "$2"` + ;; + ost) res=`do_facet ost1 $LCTL get_param obdfilter.$FSNAME-OST0000.stats | grep "$2"` + ;; + *) error "Wrong argument $1" ;; + esac + echo $res + count=`echo $res | awk '{print $2}'` + [ -z "$res" ] && error "The counter for $2 on $1 was not incremented" + # if the argument $3 is zero, it means any stat increment is ok. + if [ $3 -gt 0 ] ; then + [ $count -ne $3 ] && error "The $2 counter on $1 is wrong - expected $3" + fi +} + +test_133a() { + remote_ost_nodsh && skip "remote OST with nodsh" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return + local testdir=$DIR/${tdir}/stats_testdir + mkdir -p $DIR/${tdir} + + # clear stats. + do_facet $SINGLEMDS $LCTL set_param mdt.*.md_stats=clear + do_facet ost1 $LCTL set_param obdfilter.*.stats=clear + + # verify mdt stats first. + mkdir ${testdir} || error "mkdir failed" + check_stats $SINGLEMDS "mkdir" 1 + touch ${testdir}/${tfile} || "touch failed" + check_stats $SINGLEMDS "open" 1 + check_stats $SINGLEMDS "close" 1 + mknod ${testdir}/${tfile}-pipe p || "mknod failed" + check_stats $SINGLEMDS "mknod" 1 + rm -f ${testdir}/${tfile}-pipe || "pipe remove failed" + check_stats $SINGLEMDS "unlink" 1 + rm -f ${testdir}/${tfile} || error "file remove failed" + check_stats $SINGLEMDS "unlink" 2 + + # remove working dir and check mdt stats again. + rmdir ${testdir} || error "rmdir failed" + check_stats $SINGLEMDS "rmdir" 1 + + rm -rf $DIR/${tdir} +} +run_test 133a "Verifying MDT stats ========================================" + +test_133b() { + remote_ost_nodsh && skip "remote OST with nodsh" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return + local testdir=$DIR/${tdir}/stats_testdir + mkdir -p ${testdir} || error "mkdir failed" + touch ${testdir}/${tfile} || "touch failed" + cancel_lru_locks mdc + + # clear stats. + do_facet $SINGLEMDS $LCTL set_param mdt.*.md_stats=clear + do_facet ost1 $LCTL set_param obdfilter.*.stats=clear + + # extra mdt stats verification. + chmod 444 ${testdir}/${tfile} || error "chmod failed" + check_stats $SINGLEMDS "setattr" 1 + $LFS df || error "lfs failed" + check_stats $SINGLEMDS "statfs" 1 + + rm -rf $DIR/${tdir} +} +run_test 133b "Verifying extra MDT stats ==================================" + +test_133c() { + remote_ost_nodsh && skip "remote OST with nodsh" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return + local testdir=$DIR/${tdir}/stats_testdir + mkdir -p ${testdir} || error "mkdir failed" + + # verify obdfilter stats. + $LFS setstripe -c 1 -o 0 ${testdir}/${tfile} + sync + cancel_lru_locks osc + + # clear stats. + do_facet $SINGLEMDS $LCTL set_param mdt.*.md_stats=clear + do_facet ost1 $LCTL set_param obdfilter.*.stats=clear + + dd if=/dev/zero of=${testdir}/${tfile} conv=notrunc bs=1024k count=1 || error "dd failed" + sync + cancel_lru_locks osc + check_stats ost "write" 1 + + dd if=${testdir}/${tfile} of=/dev/null bs=1k count=1 || error "dd failed" + check_stats ost "read" 1 + + > ${testdir}/${tfile} || error "truncate failed" + check_stats ost "punch" 1 + + rm -f ${testdir}/${tfile} || error "file remove failed" + check_stats ost "destroy" 1 + + rm -rf $DIR/${tdir} +} +run_test 133c "Verifying OST stats ========================================" + test_140() { #bug-17379 mkdir -p $DIR/$tdir || error "Creating dir $DIR/$tdir" cd $DIR/$tdir || error "Changing to $DIR/$tdir" @@ -6843,23 +7065,9 @@ test_154() { } run_test 154 "Opening a file by FID" -test_155_load() { +test_155_small_load() { local temp=$TMP/$tfile local file=$DIR/$tfile - local list=$(comma_list $(osts_nodes)) - local big=$(do_nodes $list grep "cache" /proc/cpuinfo | \ - awk '{sum+=$4} END{print sum}') - local min_avail=$(lctl get_param -n osc.*[oO][sS][cC]-[^M]*.kbytesavail | \ - sort -n | head -1) - local large_file_size=$((big * 2)) - - log "cache size on OSS is $big KB" - log "large file size is $large_file_size KB" - log "min available OST size is $min_avail KB" - - [ $min_avail -le $large_file_size ] && \ - skip "the minimum available OST size needs > $large_file_size KB" && \ - return 0 dd if=/dev/urandom of=$temp bs=6096 count=1 || \ error "dd of=$temp bs=6096 count=1 failed" @@ -6879,6 +7087,29 @@ test_155_load() { echo "12345" >>$file cmp $temp $file || error "$temp $file differ (append2)" + rm -f $temp $file + true +} + +test_155_big_load() { + remote_ost_nodsh && skip "remote OST with nodsh" && return + local temp=$TMP/$tfile + local file=$DIR/$tfile + + free_min_max + local cache_size=$(do_facet ost$((MAXI+1)) \ + "awk '/cache/ {sum+=\\\$4} END {print sum}' /proc/cpuinfo") + local large_file_size=$((cache_size * 2)) + + echo "OSS cache size: $cache_size KB" + echo "Large file size: $large_file_size KB" + + [ $MAXV -le $large_file_size ] && \ + skip_env "max available OST size needs > $large_file_size KB" && \ + return 0 + + $SETSTRIPE $file -c 1 -i $MAXI || error "$SETSTRIPE $file failed" + dd if=/dev/urandom of=$temp bs=$large_file_size count=1k || \ error "dd of=$temp bs=$large_file_size count=1k failed" cp $temp $file @@ -6893,30 +7124,58 @@ test_155_load() { test_155a() { set_cache read on set_cache writethrough on - test_155_load + test_155_small_load } -run_test 155a "Verification of correctness: read cache:on write_cache:on" +run_test 155a "Verify small file correctness: read cache:on write_cache:on" test_155b() { set_cache read on set_cache writethrough off - test_155_load + test_155_small_load } -run_test 155b "Verification of correctness: read cache:on write_cache:off" +run_test 155b "Verify small file correctness: read cache:on write_cache:off" test_155c() { set_cache read off set_cache writethrough on - test_155_load + test_155_small_load } -run_test 155c "Verification of correctness: read cache:off write_cache:on" +run_test 155c "Verify small file correctness: read cache:off write_cache:on" test_155d() { set_cache read off set_cache writethrough off - test_155_load + test_155_small_load +} +run_test 155d "Verify small file correctness: read cache:off write_cache:off" + +test_155e() { + set_cache read on + set_cache writethrough on + test_155_big_load +} +run_test 155e "Verify big file correctness: read cache:on write_cache:on" + +test_155f() { + set_cache read on + set_cache writethrough off + test_155_big_load +} +run_test 155f "Verify big file correctness: read cache:on write_cache:off" + +test_155g() { + set_cache read off + set_cache writethrough on + test_155_big_load +} +run_test 155g "Verify big file correctness: read cache:off write_cache:on" + +test_155h() { + set_cache read off + set_cache writethrough off + test_155_big_load } -run_test 155d "Verification of correctness: read cache:off write_cache:off " +run_test 155h "Verify big file correctness: read cache:off write_cache:off" test_156() { local CPAGES=3 @@ -7065,6 +7324,7 @@ err17935 () { fi } test_160() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return USER=$(do_facet $SINGLEMDS lctl --device $MDT0 changelog_register -n) echo "Registered as changelog user $USER" do_facet $SINGLEMDS lctl get_param -n mdd.$MDT0.changelog_users | \ @@ -7310,7 +7570,9 @@ test_170() { local expected_good=$((good_line1 + good_line2*2)) rm -f $TMP/${tfile}* - if [ $bad_line -ne $bad_line_new ]; then + # LU-231, short malformed line may not be counted into bad lines + if [ $bad_line -ne $bad_line_new ] && + [ $bad_line -ne $((bad_line_new - 1)) ]; then error "expected $bad_line bad lines, but got $bad_line_new" return 1 fi @@ -7347,6 +7609,9 @@ setup_obdecho_osc () { local ost_nid=$1 local obdfilter_name=$2 echo "Creating new osc for $obdfilter_name on $ost_nid" + # make sure we can find loopback nid + $LCTL add_uuid $ost_nid $ost_nid >/dev/null 2>&1 + [ $rc -eq 0 ] && { $LCTL attach osc ${obdfilter_name}_osc \ ${obdfilter_name}_osc_UUID || rc=2; } [ $rc -eq 0 ] && { $LCTL --device ${obdfilter_name}_osc setup \ @@ -7385,6 +7650,7 @@ obdecho_create_test() { } test_180a() { + remote_ost_nodsh && skip "remote OST with nodsh" && return local rc=0 local rmmod_local=0 @@ -7394,8 +7660,10 @@ test_180a() { fi local osc=$($LCTL dl | grep -v mdt | awk '$3 == "osc" {print $4; exit}') - local host=$(awk '/current_connection:/ {print $2}' /proc/fs/lustre/osc/$osc/import) - local target=$(awk '/target:/ {print $2}' /proc/fs/lustre/osc/$osc/import) + local host=$(lctl get_param -n osc.$osc.import | + awk '/current_connection:/ {print $2}' ) + local target=$(lctl get_param -n osc.$osc.import | + awk '/target:/ {print $2}' ) target=${target%_UUID} [[ -n $target ]] && { setup_obdecho_osc $host $target || rc=1; } || rc=1 @@ -7407,15 +7675,16 @@ test_180a() { run_test 180a "test obdecho on osc" test_180b() { + remote_ost_nodsh && skip "remote OST with nodsh" && return local rc=0 local rmmod_remote=0 - do_facet ost "lsmod | grep -q obdecho || " \ - "{ insmod ${LUSTRE}/obdecho/obdecho.ko || " \ - "modprobe obdecho; }" && rmmod_remote=1 - target=$(do_facet ost $LCTL dl | awk '/obdfilter/ {print $4;exit}') - [[ -n $target ]] && { obdecho_create_test $target ost || rc=1; } - [ $rmmod_remote -eq 1 ] && do_facet ost "rmmod obdecho" + do_facet ost1 "lsmod | grep -q obdecho || " \ + "{ insmod ${LUSTRE}/obdecho/obdecho.ko || " \ + "modprobe obdecho; }" && rmmod_remote=1 + target=$(do_facet ost1 $LCTL dl | awk '/obdfilter/ {print $4;exit}') + [[ -n $target ]] && { obdecho_create_test $target ost1 || rc=1; } + [ $rmmod_remote -eq 1 ] && do_facet ost1 "rmmod obdecho" return $rc } run_test 180b "test obdecho directly on obdfilter" @@ -7624,6 +7893,136 @@ run_test 201c "Remove a pool ============================================" cleanup_pools $FSNAME +# usage: default_attr +default_attr() { + $LCTL get_param -n lov.$FSNAME-clilov-\*.stripe${1} +} + +# usage: trim +# Trims leading and trailing whitespace from the parameter string +trim() { + echo $@ +} + +# usage: check_default_stripe_attr +check_default_stripe_attr() { + # $GETSTRIPE returns trailing whitespace which needs to be trimmed off + ACTUAL=$(trim $($GETSTRIPE --$1 $DIR/$tdir)) + if [ $1 = "count" -o $1 = "size" ]; then + EXPECTED=`default_attr $1`; + else + # the 'stripeoffset' parameter prints as an unsigned int, so + # until this is fixed we hard-code -1 here + EXPECTED=-1; + fi + [ "x$ACTUAL" != "x$EXPECTED" ] && + error "$DIR/$tdir has stripe $1 '$ACTUAL', not '$EXPECTED'" +} + +# usage: check_raw_stripe_attr +check_raw_stripe_attr() { + # $GETSTRIPE returns trailing whitespace which needs to be trimmed off + ACTUAL=$(trim $($GETSTRIPE --raw --$1 $DIR/$tdir)) + if [ $1 = "count" -o $1 = "size" ]; then + EXPECTED=0; + else + EXPECTED=-1; + fi + [ "x$ACTUAL" != "x$EXPECTED" ] && + error "$DIR/$tdir has raw stripe $1 '$ACTUAL', not '$EXPECTED'" +} + + +test_204a() { + mkdir -p $DIR/$tdir + $SETSTRIPE --count 0 --size 0 --offset -1 $DIR/$tdir + + check_default_stripe_attr count + check_default_stripe_attr size + check_default_stripe_attr offset + + return 0 +} +run_test 204a "Print default stripe attributes =================" + +test_204b() { + mkdir -p $DIR/$tdir + $SETSTRIPE --count 1 $DIR/$tdir + + check_default_stripe_attr size + check_default_stripe_attr offset + + return 0 +} +run_test 204b "Print default stripe size and offset ===========" + +test_204c() { + mkdir -p $DIR/$tdir + $SETSTRIPE --size 65536 $DIR/$tdir + + check_default_stripe_attr count + check_default_stripe_attr offset + + return 0 +} +run_test 204c "Print default stripe count and offset ===========" + +test_204d() { + mkdir -p $DIR/$tdir + $SETSTRIPE --offset 0 $DIR/$tdir + + check_default_stripe_attr count + check_default_stripe_attr size + + return 0 +} +run_test 204d "Print default stripe count and size =============" + +test_204e() { + mkdir -p $DIR/$tdir + $SETSTRIPE -d $DIR/$tdir + + check_raw_stripe_attr count + check_raw_stripe_attr size + check_raw_stripe_attr offset + + return 0 +} +run_test 204e "Print raw stripe attributes =================" + +test_204f() { + mkdir -p $DIR/$tdir + $SETSTRIPE --count 1 $DIR/$tdir + + check_raw_stripe_attr size + check_raw_stripe_attr offset + + return 0 +} +run_test 204f "Print raw stripe size and offset ===========" + +test_204g() { + mkdir -p $DIR/$tdir + $SETSTRIPE --size 65536 $DIR/$tdir + + check_raw_stripe_attr count + check_raw_stripe_attr offset + + return 0 +} +run_test 204g "Print raw stripe count and offset ===========" + +test_204h() { + mkdir -p $DIR/$tdir + $SETSTRIPE --offset 0 $DIR/$tdir + + check_raw_stripe_attr count + check_raw_stripe_attr size + + return 0 +} +run_test 204h "Print raw stripe count and size =============" + test_212() { size=`date +%s` size=$((size % 8192 + 1)) @@ -7787,6 +8186,7 @@ test_215() { # for bugs 18102, 21079, 21517 run_test 215 "/proc/sys/lnet exists and has proper content - bugs 18102, 21079, 21517" test_216() { # bug 20317 + remote_ost_nodsh && skip "remote OST with nodsh" && return local node local p="$TMP/sanityN-$TESTNAME.parameters" save_lustre_params $HOSTNAME "osc.*.contention_seconds" > $p @@ -7824,12 +8224,15 @@ run_test 216 "check lockless direct write works and updates file size and kms co test_217() { # bug 22430 local node + local nid + for node in $(nodes_list); do - if [[ $node = *-* ]] ; then - echo "lctl ping $node@$NETTYPE" - lctl ping $node@$NETTYPE + nid=$(host_nids_address $node $NETTYPE) + if [[ $nid = *-* ]] ; then + echo "lctl ping $nid@$NETTYPE" + lctl ping $nid@$NETTYPE else - echo "skipping $node (no hiphen detected)" + echo "skipping $node (no hyphen detected)" fi done } @@ -7852,6 +8255,80 @@ test_218() { } run_test 218 "parallel read and truncate should not deadlock =======================" +test_219() { + # write one partial page + dd if=/dev/zero of=$DIR/$tfile bs=1024 count=1 + # set no grant so vvp_io_commit_write will do sync write + $LCTL set_param fail_loc=0x411 + # write a full page at the end of file + dd if=/dev/zero of=$DIR/$tfile bs=4096 count=1 seek=1 conv=notrunc + + $LCTL set_param fail_loc=0 + dd if=/dev/zero of=$DIR/$tfile bs=4096 count=1 seek=3 + $LCTL set_param fail_loc=0x411 + dd if=/dev/zero of=$DIR/$tfile bs=1024 count=1 seek=2 conv=notrunc +} +run_test 219 "LU-394: Write partial won't cause uncontiguous pages vec at LND" + +test_220() { #LU-325 + local OSTIDX=0 + + mkdir -p $DIR/$tdir + local OST=$(lfs osts | grep ${OSTIDX}": " | \ + awk '{print $2}' | sed -e 's/_UUID$//') + + # on the mdt's osc + local mdtosc_proc1=$(get_mdtosc_proc_path $SINGLEMDS $OST) + local last_id=$(do_facet $SINGLEMDS lctl get_param -n \ + osc.$mdtosc_proc1.prealloc_last_id) + local next_id=$(do_facet $SINGLEMDS lctl get_param -n \ + osc.$mdtosc_proc1.prealloc_next_id) + + $LFS df -i + + do_facet mgs $LCTL pool_new $FSNAME.$TESTNAME || return 1 + do_facet mgs $LCTL pool_add $FSNAME.$TESTNAME $OST || return 2 + + $SETSTRIPE $DIR/$tdir -i $OSTIDX -c 1 -p $FSNAME.$TESTNAME + + echo "preallocated objects in MDS is $((last_id - next_id))" \ + "($last_id - $next_id)" + + count=$($LFS df -i $MOUNT | grep ^$OST | awk '{print $4}') + echo "OST still has $count objects" + + free=$((count + last_id - next_id)) + echo "create $((free - next_id)) files @next_id..." + createmany -o $DIR/$tdir/f $next_id $free || return 3 + + local last_id2=$(do_facet mds${MDSIDX} lctl get_param -n \ + osc.$mdtosc_proc1.prealloc_last_id) + local next_id2=$(do_facet mds${MDSIDX} lctl get_param -n \ + osc.$mdtosc_proc1.prealloc_next_id) + + echo "after creation, last_id=$last_id2, next_id=$next_id2" + $LFS df -i + + echo "cleanup..." + + do_facet mgs $LCTL pool_remove $FSNAME.$TESTNAME $OST || return 4 + do_facet mgs $LCTL pool_destroy $FSNAME.$TESTNAME || return 5 + echo "unlink $((free - next_id)) files @ $next_id..." + unlinkmany $DIR/$tdir/f $next_id $free || return 3 +} +run_test 220 "the preallocated objects in MDS still can be used if ENOSPC is returned by OST with enough disk space" + +test_221() { + cp `which date` $MOUNT + + #define OBD_FAIL_LLITE_FAULT_TRUNC_RACE 0x1401 + $LCTL set_param fail_loc=0x80001401 + + $MOUNT/date > /dev/null + rm -f $MOUNT/date +} +run_test 221 "make sure fault and truncate race to not cause OOM" + # # tests that do cleanup/setup should be run at the end #