X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fsanity.sh;h=63506ad9853103857d93fda39ddcac93bf013d3a;hp=5aabec9914403df2d6159e37c5df99c3867a0541;hb=8dcf444f507f3dd7786ba940100419d476963d31;hpb=d16655be0693de265dd2fce6317a1c93ecdbd497 diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 5aabec9..63506ad 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -7,14 +7,12 @@ set -e ONLY=${ONLY:-"$*"} -# bug number for skipped test: 4900 4900 2108 9789 3637 9789 3561 13310 10764 -ALWAYS_EXCEPT=" 27o 27q 42a 42b 42c 42d 45 74b 75 $SANITY_EXCEPT" +# bug number for skipped test: 2108 9789 3637 9789 3561 12622 12653 12653 10764 +ALWAYS_EXCEPT=" 42a 42b 42c 42d 45 51d 65a 65e 75 $SANITY_EXCEPT" # bug number for skipped test: 2108 9789 3637 9789 3561 5188/5749 1443 #ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"27m 42a 42b 42c 42d 45 68 76"} # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! -[ "$SLOW" = "no" ] && EXCEPT="$EXCEPT 24o 27m 36f 36g 51b 51c 63 64b 68 71 73 101 115" - # Tests that fail on uml CPU=`awk '/model/ {print $4}' /proc/cpuinfo` # buffer i/o errs sock spc runas @@ -29,8 +27,8 @@ case `uname -r` in *) error "unsupported kernel" ;; esac -SRCDIR=`dirname $0` -export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH:/sbin +SRCDIR=$(cd $(dirname $0); echo $PWD) +export PATH=$PATH:/sbin TMP=${TMP:-/tmp} @@ -63,21 +61,6 @@ STRIPES_PER_OBJ=-1 CHECK_GRANT=${CHECK_GRANT:-"yes"} GRANT_CHECK_LIST=${GRANT_CHECK_LIST:-""} -if [ $UID -ne 0 ]; then - echo "Warning: running as non-root uid $UID" - RUNAS_ID="$UID" - RUNAS="" -else - RUNAS_ID=${RUNAS_ID:-500} - RUNAS=${RUNAS:-"runas -u $RUNAS_ID"} - - # $RUNAS_ID may get set incorrectly somewhere else - if [ $RUNAS_ID -eq 0 ]; then - echo "Error: \$RUNAS_ID set to 0, but \$UID is also 0!" - exit 1 - fi -fi - export NAME=${NAME:-local} SAVE_PWD=$PWD @@ -85,21 +68,19 @@ SAVE_PWD=$PWD CLEANUP=${CLEANUP:-:} SETUP=${SETUP:-:} TRACE=${TRACE:-""} -LUSTRE=${LUSTRE:-`dirname $0`/..} +LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} . $LUSTRE/tests/test-framework.sh init_test_env $@ -. ${CONFIG:=$LUSTRE/tests/cfg/local.sh} +. ${CONFIG:=$LUSTRE/tests/cfg/${NAME}.sh} -if $GSS_KRB5; then - $RUNAS krb5_login.sh || exit 1 - $RUNAS -u $(($RUNAS_ID + 1)) krb5_login.sh || exit 1 -fi +[ "$SLOW" = "no" ] && EXCEPT_SLOW="24o 27m 36f 36g 51b 51c 60c 63 64b 68 71 73 77f 78 101 103 115 120g 124b" SANITYLOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh).log} FAIL_ON_ERROR=false cleanup() { echo -n "cln.." + pgrep ll_sa > /dev/null && { echo "There are ll_sa thread not exit!"; exit 20; } cleanupall ${FORCE} $* || { echo "FAILed to clean up"; exit 20; } } setup() { @@ -110,10 +91,9 @@ setup() { } check_kernel_version() { - VERSION_FILE=$LPROC/version + VERSION_FILE=version WANT_VER=$1 - [ ! -f $VERSION_FILE ] && echo "can't find kernel version" && return 1 - GOT_VER=$(awk '/kernel:/ {print $2}' $VERSION_FILE) + GOT_VER=$(lctl get_param -n $VERSION_FILE | awk '/kernel:/ {print $2}') [ $GOT_VER == "patchless" ] && return 0 [ $GOT_VER -ge $WANT_VER ] && return 0 log "test needs at least kernel version $WANT_VER, running $GOT_VER" @@ -130,18 +110,23 @@ fi check_and_setup_lustre DIR=${DIR:-$MOUNT} -[ -z "`echo $DIR | grep $MOUNT`" ] && echo "$DIR not in $MOUNT" && exit 99 +assert_DIR -LOVNAME=`cat $LPROC/llite/*/lov/common_name | tail -n 1` -OSTCOUNT=`cat $LPROC/lov/$LOVNAME/numobd` -STRIPECOUNT=`cat $LPROC/lov/$LOVNAME/stripecount` -STRIPESIZE=`cat $LPROC/lov/$LOVNAME/stripesize` -ORIGFREE=`cat $LPROC/lov/$LOVNAME/kbytesavail` +LOVNAME=`lctl get_param -n llite.*.lov.common_name | tail -n 1` +OSTCOUNT=`lctl get_param -n lov.$LOVNAME.numobd` +STRIPECOUNT=`lctl get_param -n lov.$LOVNAME.stripecount` +STRIPESIZE=`lctl get_param -n lov.$LOVNAME.stripesize` +ORIGFREE=`lctl get_param -n lov.$LOVNAME.kbytesavail` MAXFREE=${MAXFREE:-$((200000 * $OSTCOUNT))} [ -f $DIR/d52a/foo ] && chattr -a $DIR/d52a/foo [ -f $DIR/d52b/foo ] && chattr -i $DIR/d52b/foo -rm -rf $DIR/[Rdfs][1-9]* +rm -rf $DIR/[Rdfs][0-9]* + +# $RUNAS_ID may get set incorrectly somewhere else +[ $UID -eq 0 -a $RUNAS_ID -eq 0 ] && error "\$RUNAS_ID set to 0, but \$UID is also 0!" + +check_runas_id $RUNAS_ID $RUNAS build_test_filter @@ -158,6 +143,8 @@ echo # add a newline after mke2fs. umask 077 +OLDDEBUG="`lctl get_param -n debug 2> /dev/null`" +lctl set_param debug=-1 2> /dev/null || true test_0() { touch $DIR/$tfile $CHECKSTAT -t file $DIR/$tfile || error @@ -528,7 +515,7 @@ run_test 21 "write to dangling link ============================" test_22() { WDIR=$DIR/$tdir - mkdir $WDIR + mkdir -p $WDIR chown $RUNAS_ID $WDIR (cd $WDIR || error "cd $WDIR failed"; $RUNAS tar cf - /etc/hosts /etc/sysconfig/network | \ @@ -699,9 +686,8 @@ run_test 24p "mkdir .../R12{a,b}; rename .../R12a .../R12b" test_24q() { mkdir $DIR/R13{a,b} DIRINO=`ls -lid $DIR/R13a | awk '{ print $1 }'` - multiop $DIR/R13b D_c & + multiop_bg_pause $DIR/R13b D_c || return 1 MULTIPID=$! - usleep 500 mrename $DIR/R13a $DIR/R13b $CHECKSTAT -a $DIR/R13a || error @@ -796,15 +782,16 @@ run_test 26e "unlink multiple component recursive symlink ======" # recursive symlinks (bug 7022) test_26f() { - mkdir $DIR/$tfile || error "mkdir $DIR/$tfile failed" - cd $DIR/$tfile || error "cd $DIR/$tfile failed" - mkdir -p $tdir/bar1 || error "mkdir $tdir/bar1 failed" + mkdir -p $DIR/$tdir + mkdir $DIR/$tdir/$tfile || error "mkdir $DIR/$tdir/$tfile failed" + cd $DIR/$tdir/$tfile || error "cd $DIR/$tdir/$tfile failed" + mkdir -p lndir/bar1 || error "mkdir lndir/bar1 failed" mkdir $tfile || error "mkdir $tfile failed" cd $tfile || error "cd $tfile failed" ln -s .. dotdot || error "ln dotdot failed" - ln -s dotdot/$tdir $tdir || error "ln $tdir failed" - cd ../.. || error "cd ../.. failed" - output=`ls $tfile/$tfile/$tdir/bar1` + ln -s dotdot/lndir lndir || error "ln lndir failed" + cd $DIR/$tdir || error "cd $DIR/$tdir failed" + output=`ls $tfile/$tfile/lndir/bar1` [ "$output" = bar1 ] && error "unexpected output" rm -r $tfile || error "rm $tfile failed" $CHECKSTAT -a $DIR/$tfile || error "$tfile not gone" @@ -814,7 +801,7 @@ run_test 26f "rm -r of a directory which has recursive symlink =" test_27a() { echo '== stripe sanity ==============================================' mkdir -p $DIR/d27 || error "mkdir failed" - $SETSTRIPE $DIR/d27/f0 65536 0 1 || error "lstripe failed" + $SETSTRIPE $DIR/d27/f0 -c 1 || error "lstripe failed" $CHECKSTAT -t file $DIR/d27/f0 || error "checkstat failed" pass log "== test_27a: write to one stripe file =========================" @@ -825,11 +812,11 @@ run_test 27a "one stripe file ==================================" test_27c() { [ "$OSTCOUNT" -lt "2" ] && skip "skipping 2-stripe test" && return mkdir -p $DIR/d27 - $SETSTRIPE $DIR/d27/f01 65536 0 2 || error "lstripe failed" + $SETSTRIPE $DIR/d27/f01 -c 2 || error "lstripe failed" [ `$GETSTRIPE $DIR/d27/f01 | grep -A 10 obdidx | wc -l` -eq 4 ] || error "two-stripe file doesn't have two stripes" pass - log "== test_27d: write to two stripe file file f01 ================" + log "== test_27c: write to two stripe file file f01 ================" dd if=/dev/zero of=$DIR/d27/f01 bs=4k count=4 || error "dd failed" } run_test 27c "create two stripe file f01 =======================" @@ -844,15 +831,15 @@ run_test 27d "create file with default settings ================" test_27e() { mkdir -p $DIR/d27 - $SETSTRIPE $DIR/d27/f12 65536 0 2 || error "lstripe failed" - $SETSTRIPE $DIR/d27/f12 65536 0 2 && error "lstripe succeeded twice" + $SETSTRIPE $DIR/d27/f12 -c 2 || error "lstripe failed" + $SETSTRIPE $DIR/d27/f12 -c 2 && error "lstripe succeeded twice" $CHECKSTAT -t file $DIR/d27/f12 || error "checkstat failed" } run_test 27e "lstripe existing file (should return error) ======" test_27f() { mkdir -p $DIR/d27 - $SETSTRIPE $DIR/d27/fbad 100 0 1 && error "lstripe failed" + $SETSTRIPE $DIR/d27/fbad -s 100 -i 0 -c 1 && error "lstripe failed" dd if=/dev/zero of=$DIR/d27/f12 bs=4k count=4 || error "dd failed" $GETSTRIPE $DIR/d27/fbad || error "lfs getstripe failed" } @@ -873,7 +860,7 @@ run_test 27g "test lfs getstripe ===========================================" test_27j() { mkdir -p $DIR/d27 - $SETSTRIPE $DIR/d27/f27j 65536 $OSTCOUNT 1 && error "lstripe failed"||true + $SETSTRIPE $DIR/d27/f27j -i $OSTCOUNT && error "lstripe failed"||true } run_test 27j "lstripe with bad stripe offset (should return error)" @@ -882,7 +869,7 @@ test_27k() { # bug 2844 FILE=$DIR/d27/f27k LL_MAX_BLKSIZE=$((4 * 1024 * 1024)) [ ! -d $DIR/d27 ] && mkdir -p $DIR/d27 - $SETSTRIPE $FILE 67108864 -1 0 || error "lstripe failed" + $SETSTRIPE $FILE -s 67108864 || error "lstripe failed" BLKSIZE=`stat $FILE | awk '/IO Block:/ { print $7 }'` [ $BLKSIZE -le $LL_MAX_BLKSIZE ] || error "$BLKSIZE > $LL_MAX_BLKSIZE" dd if=/dev/zero of=$FILE bs=4k count=1 @@ -894,7 +881,7 @@ run_test 27k "limit i_blksize for broken user apps =============" test_27l() { mkdir -p $DIR/d27 mcreate $DIR/f27l || error "creating file" - $RUNAS $SETSTRIPE $DIR/f27l 65536 -1 1 && \ + $RUNAS $SETSTRIPE $DIR/f27l -c 1 && \ error "lstripe should have failed" || true } run_test 27l "check setstripe permissions (should return error)" @@ -906,11 +893,11 @@ test_27m() { return fi mkdir -p $DIR/d27 - $SETSTRIPE $DIR/d27/f27m_1 0 0 1 + $SETSTRIPE $DIR/d27/f27m_1 -i 0 -c 1 dd if=/dev/zero of=$DIR/d27/f27m_1 bs=1024 count=$MAXFREE && \ error "dd should fill OST0" i=2 - while $SETSTRIPE $DIR/d27/f27m_$i 0 0 1 ; do + while $SETSTRIPE $DIR/d27/f27m_$i -i 0 -c 1 ; do i=`expr $i + 1` [ $i -gt 256 ] && break done @@ -932,25 +919,25 @@ reset_enospc() { [ "$1" ] && FAIL_LOC=$1 || FAIL_LOC=0 mkdir -p $DIR/d27/nospc rmdir $DIR/d27/nospc - sysctl -w lustre.fail_loc=$FAIL_LOC + lctl set_param fail_loc=$FAIL_LOC } exhaust_precreations() { OSTIDX=$1 - OST=$(grep ${OSTIDX}": " $LPROC/lov/${LOVNAME}/target_obd | \ - awk '{print $2}' | sed -e 's/_UUID$//') + OST=$(lctl get_param -n lov.${LOVNAME}.target_obd | + grep ${OSTIDX}": " | \ + awk '{print $2}' | sed -e 's/_UUID$//') # on the mdt's osc - OSC=$(ls $LPROC/osc | grep "${OST}-osc-MDT0000") - last_id=$(cat $LPROC/osc/${OSC}/prealloc_last_id) - next_id=$(cat $LPROC/osc/${OSC}/prealloc_next_id) + last_id=$(lctl get_param -n osc.*${OST}-osc-MDT0000.prealloc_last_id) + next_id=$(lctl get_param -n osc.*${OST}-osc-MDT0000.prealloc_next_id) mkdir -p $DIR/d27/${OST} - $SETSTRIPE $DIR/d27/${OST} 0 $OSTIDX 1 + $SETSTRIPE $DIR/d27/${OST} -i $OSTIDX -c 1 #define OBD_FAIL_OST_ENOSPC 0x215 - sysctl -w lustre.fail_loc=0x215 + lctl set_param fail_loc=0x215 echo "Creating to objid $last_id on ost $OST..." createmany -o $DIR/d27/${OST}/f $next_id $((last_id - next_id + 2)) - grep '[0-9]' $LPROC/osc/${OSC}/prealloc* + lctl get_param -n osc.*${OST}-osc-MDT0000.prealloc* | grep '[0-9]' reset_enospc $2 } @@ -1039,7 +1026,7 @@ test_27r() { rm -f $DIR/d27/f27r exhaust_precreations 0 0x80000215 - $SETSTRIPE $DIR/d27/f27r 0 0 2 # && error + $SETSTRIPE $DIR/d27/f27r -i 0 -c 2 # && error reset_enospc } @@ -1068,16 +1055,17 @@ test_27u() { # bug 4900 #define OBD_FAIL_MDS_OSC_PRECREATE 0x13d - sysctl -w lustre.fail_loc=0x13d + lctl set_param fail_loc=0x13d mkdir -p $DIR/d27u createmany -o $DIR/d27u/t- 1000 - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 - $LFS getstripe $DIR/d27u > $TMP/files - OBJS=`cat $TMP/files | awk -vobjs=0 '($1 == 0) { objs += 1 } END { print objs;}'` + TLOG=$DIR/$tfile.getstripe + $GETSTRIPE $DIR/d27u > $TLOG + OBJS=`awk -vobj=0 '($1 == 0) { obj += 1 } END { print obj;}' $TLOG` unlinkmany $DIR/d27u/t- 1000 [ $OBJS -gt 0 ] && \ - error "Found $OBJS objects were created on OST-0" || pass + error "$OBJS objects created on OST-0. See $TLOG" || pass } run_test 27u "skip object creation on OSC w/o objects ==========" @@ -1088,19 +1076,19 @@ test_27v() { # bug 4900 exhaust_all_precreations mkdir -p $DIR/$tdir - lfs setstripe $DIR/$tdir 0 -1 1 # 1 stripe / file + $SETSTRIPE $DIR/$tdir -c 1 # 1 stripe / file touch $DIR/$tdir/$tfile #define OBD_FAIL_TGT_DELAY_PRECREATE 0x705 - sysctl -w lustre.fail_loc=0x705 + lctl set_param fail_loc=0x705 START=`date +%s` for F in `seq 1 32`; do touch $DIR/$tdir/$tfile.$F done - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 FINISH=`date +%s` - TIMEOUT=`sysctl -n lustre.timeout` + TIMEOUT=`lctl get_param -n timeout` [ $((FINISH - START)) -ge $((TIMEOUT / 2)) ] && \ error "$FINISH - $START >= $TIMEOUT / 2" @@ -1138,17 +1126,16 @@ test_29() { touch $DIR/d29/foo log 'first d29' ls -l $DIR/d29 - MDCDIR=${MDCDIR:-$LPROC/ldlm/namespaces/*-mdc-*} - LOCKCOUNTORIG=`cat $MDCDIR/lock_count` - LOCKUNUSEDCOUNTORIG=`cat $MDCDIR/lock_unused_count` + LOCKCOUNTORIG=`lctl get_param -n ldlm.namespaces.*mdc*.lock_count` + LOCKUNUSEDCOUNTORIG=`lctl get_param -n ldlm.namespaces.*mdc*.lock_unused_count` [ -z $"LOCKCOUNTORIG" ] && echo "No mdc lock count" && return 1 log 'second d29' ls -l $DIR/d29 log 'done' - LOCKCOUNTCURRENT=`cat $MDCDIR/lock_count` - LOCKUNUSEDCOUNTCURRENT=`cat $MDCDIR/lock_unused_count` + LOCKCOUNTCURRENT=`lctl get_param -n ldlm.namespaces.*mdc*.lock_count` + LOCKUNUSEDCOUNTCURRENT=`lctl get_param -n ldlm.namespaces.*mdc*.lock_unused_count` if [ "$LOCKCOUNTCURRENT" -gt "$LOCKCOUNTORIG" ]; then - echo > $LPROC/ldlm/dump_namespaces + lctl set_param -n ldlm.dump_namespaces "" error "CURRENT: $LOCKCOUNTCURRENT > $LOCKCOUNTORIG" $LCTL dk | sort -k4 -t: > $TMP/test_29.dk log "dumped log to $TMP/test_29.dk (bug 5793)" @@ -1187,10 +1174,9 @@ run_test 31b "unlink file with multiple links while open =======" test_31c() { touch $DIR/f31 || error ln $DIR/f31 $DIR/f31c || error - multiop $DIR/f31 O_uc & + multiop_bg_pause $DIR/f31 O_uc || return 1 MULTIPID=$! multiop $DIR/f31c Ouc - usleep 500 kill -USR1 $MULTIPID wait $MULTIPID } @@ -1211,26 +1197,22 @@ run_test 31e "remove of open non-empty directory ===============" test_31f() { # bug 4554 set -vx mkdir $DIR/d31f - lfs setstripe $DIR/d31f 1048576 -1 1 + $SETSTRIPE $DIR/d31f -s 1048576 -c 1 cp /etc/hosts $DIR/d31f ls -l $DIR/d31f - lfs getstripe $DIR/d31f/hosts - multiop $DIR/d31f D_c & + $GETSTRIPE $DIR/d31f/hosts + multiop_bg_pause $DIR/d31f D_c || return 1 MULTIPID=$! - sleep 1 - rm -rv $DIR/d31f || error "first of $DIR/d31f" mkdir $DIR/d31f - lfs setstripe $DIR/d31f 1048576 -1 1 + $SETSTRIPE $DIR/d31f -s 1048576 -c 1 cp /etc/hosts $DIR/d31f ls -l $DIR/d31f - lfs getstripe $DIR/d31f/hosts - multiop $DIR/d31f D_c & + $DIR/d31f/hosts + multiop_bg_pause $DIR/d31f D_c || return 1 MULTIPID2=$! - sleep 6 - kill -USR1 $MULTIPID || error "first opendir $MULTIPID not running" wait $MULTIPID || error "first opendir $MULTIPID failed" @@ -1549,6 +1531,14 @@ test_33a() { } run_test 33a "test open file(mode=0444) with O_RDWR (should return error)" +test_33b() { + rm -fr $DIR/d33 + mkdir -p $DIR/d33 + chown $RUNAS_ID $DIR/d33 + $RUNAS $OPENFILE -f 1286739555 $DIR/d33/f33 && error "create" || true +} +run_test 33b "test open file with malformed flags (No panic and return error)" + TEST_34_SIZE=${TEST_34_SIZE:-2000000000000} test_34a() { rm -f $DIR/f34 @@ -1661,7 +1651,7 @@ test_36f() { DATESTR="Dec 20 2000" mkdir -p $DIR/$tdir #define OBD_FAIL_OST_BRW_PAUSE_BULK 0x214 - sysctl -w lustre.fail_loc=0x80000214 + lctl set_param fail_loc=0x80000214 date; date +%s cp /etc/hosts $DIR/$tdir/$tfile sync & # write RPC generated with "current" inode timestamp, but delayed @@ -1679,9 +1669,9 @@ test_36f() { } run_test 36f "utime on file racing with OST BRW write ==========" -export FMD_MAX_AGE=`do_facet ost1 cat $LPROC/obdfilter/*/client_cache_seconds | head -n 1` test_36g() { - [ -z "$FMD_MAX_AGE" ] && skip "skip test for remote OST" && return + remote_ost && skip "remote OST" && return + export FMD_MAX_AGE=`do_facet ost1 lctl get_param -n obdfilter.*.client_cache_seconds 2> /dev/null | head -n 1` FMD_BEFORE="`awk '/ll_fmd_cache/ { print $2 }' /proc/slabinfo`" touch $DIR/d36/$tfile sleep $((FMD_MAX_AGE + 12)) @@ -1741,7 +1731,7 @@ test_41() { run_test 41 "test small file write + fstat =====================" count_ost_writes() { - cat $LPROC/osc/*/stats | + lctl get_param -n osc.*.stats | awk -vwrites=0 '/ost_write/ { writes += $2 } END { print writes; }' } @@ -1757,9 +1747,9 @@ start_writeback() { # in 2.6, restore /proc/sys/vm/dirty_writeback_centisecs, # dirty_ratio, dirty_background_ratio if [ -f /proc/sys/vm/dirty_writeback_centisecs ]; then - echo $WRITEBACK_SAVE > /proc/sys/vm/dirty_writeback_centisecs - echo $BG_DIRTY_RATIO_SAVE > /proc/sys/vm/dirty_background_ratio - echo $DIRTY_RATIO_SAVE > /proc/sys/vm/dirty_ratio + sysctl -w vm.dirty_writeback_centisecs=$WRITEBACK_SAVE + sysctl -w vm.dirty_background_ratio=$BG_DIRTY_RATIO_SAVE + sysctl -w vm.dirty_ratio=$DIRTY_RATIO_SAVE else # if file not here, we are a 2.4 kernel kill -CONT `pidof kupdated` @@ -1772,15 +1762,15 @@ stop_writeback() { trap start_writeback EXIT # in 2.6, save and 0 /proc/sys/vm/dirty_writeback_centisecs if [ -f /proc/sys/vm/dirty_writeback_centisecs ]; then - WRITEBACK_SAVE=`cat /proc/sys/vm/dirty_writeback_centisecs` - echo 0 > /proc/sys/vm/dirty_writeback_centisecs - echo 0 > /proc/sys/vm/dirty_writeback_centisecs + WRITEBACK_SAVE=`sysctl -n vm.dirty_writeback_centisecs` + sysctl -w vm.dirty_writeback_centisecs=0 + sysctl -w vm.dirty_writeback_centisecs=0 # save and increase /proc/sys/vm/dirty_ratio - DIRTY_RATIO_SAVE=`cat /proc/sys/vm/dirty_ratio` - echo $MAX_DIRTY_RATIO > /proc/sys/vm/dirty_ratio + DIRTY_RATIO_SAVE=`sysctl -n vm.dirty_ratio` + sysctl -w vm.dirty_ratio=$MAX_DIRTY_RATIO # save and increase /proc/sys/vm/dirty_background_ratio - BG_DIRTY_RATIO_SAVE=`cat /proc/sys/vm/dirty_background_ratio` - echo $MAX_BG_DIRTY_RATIO > /proc/sys/vm/dirty_background_ratio + BG_DIRTY_RATIO_SAVE=`sysctl -n vm.dirty_background_ratio` + sysctl -w vm.dirty_background_ratio=$MAX_BG_DIRTY_RATIO else # if file not here, we are a 2.4 kernel kill -STOP `pidof kupdated` @@ -1805,7 +1795,7 @@ test_42a() { stop_writeback sync; sleep 1; sync # just to be safe BEFOREWRITES=`count_ost_writes` - grep "[0-9]" $LPROC/osc/*[oO][sS][cC][_-]*/cur_grant_bytes + lctl get_param -n osc.*[oO][sS][cC][_-]*.cur_grant_bytes | grep "[0-9]" dd if=/dev/zero of=$DIR/f42a bs=1024 count=100 AFTERWRITES=`count_ost_writes` [ $BEFOREWRITES -eq $AFTERWRITES ] || \ @@ -1887,23 +1877,26 @@ test_42d() { run_test 42d "test complete truncate of file with cached dirty data" test_43() { - mkdir $DIR/$tdir + mkdir -p $DIR/$tdir cp -p /bin/ls $DIR/$tdir/$tfile - exec 9>> $DIR/$tdir/$tfile + multiop $DIR/$tdir/$tfile Ow_c & + pid=$! + # give multiop a chance to open + sleep 1 + $DIR/$tdir/$tfile && error || true - exec 9<&- + kill -USR1 $pid } run_test 43 "execution of file opened for write should return -ETXTBSY" test_43a() { mkdir -p $DIR/d43 cp -p `which multiop` $DIR/d43/multiop || cp -p multiop $DIR/d43/multiop - $DIR/d43/multiop $TMP/test43.junk O_c & - MULTIPID=$! - sleep 1 + MULTIOP_PROG=$DIR/d43/multiop multiop_bg_pause $TMP/test43.junk O_c || return 1 + MULTIOP_PID=$! multiop $DIR/d43/multiop Oc && error "expected error, got success" - kill -USR1 $MULTIPID || return 2 - wait $MULTIPID || return 3 + kill -USR1 $MULTIOP_PID || return 2 + wait $MULTIOP_PID || return 3 rm $TMP/test43.junk } run_test 43a "open(RDWR) of file being executed should return -ETXTBSY" @@ -1911,12 +1904,11 @@ run_test 43a "open(RDWR) of file being executed should return -ETXTBSY" test_43b() { mkdir -p $DIR/d43 cp -p `which multiop` $DIR/d43/multiop || cp -p multiop $DIR/d43/multiop - $DIR/d43/multiop $TMP/test43.junk O_c & - MULTIPID=$! - sleep 1 + MULTIOP_PROG=$DIR/d43/multiop multiop_bg_pause $TMP/test43.junk O_c || return 1 + MULTIOP_PID=$! truncate $DIR/d43/multiop 0 && error "expected error, got success" - kill -USR1 $MULTIPID || return 2 - wait $MULTIPID || return 3 + kill -USR1 $MULTIOP_PID || return 2 + wait $MULTIOP_PID || return 3 rm $TMP/test43.junk } run_test 43b "truncate of file being executed should return -ETXTBSY" @@ -1944,7 +1936,7 @@ test_44a() { [ "$nstripe" -gt "$OSTCOUNT" ] && skip "Wrong default_stripe_count: $nstripe (OSTCOUNT: $OSTCOUNT)" && return local stride=`$LCTL lov_getconfig $DIR | grep default_stripe_size: | \ awk '{print $2}'` - if [ $nstripe -eq 0 -o $nstripe -gt 1024 ] ; then + if [ $nstripe -eq 0 -o $nstripe -eq -1 ] ; then nstripe=`$LCTL lov_getconfig $DIR | grep obd_count: | awk '{print $2}'` fi @@ -1975,8 +1967,8 @@ run_test 44a "test sparse pwrite ===============================" dirty_osc_total() { tot=0 - for d in $LPROC/osc/*/cur_dirty_bytes; do - tot=$(($tot + `cat $d`)) + for d in `lctl get_param -n osc.*.cur_dirty_bytes`; do + tot=$(($tot + $d)) done echo $tot } @@ -2081,7 +2073,7 @@ run_test 48b "Access removed working dir (should return errors)=" test_48c() { # bug 2350 check_kernel_version 36 || return 0 - #sysctl -w lnet.debug=-1 + #lctl set_param debug=-1 #set -vx mkdir -p $DIR/d48c/dir cd $DIR/d48c/dir @@ -2104,7 +2096,7 @@ run_test 48c "Access removed working subdir (should return errors)" test_48d() { # bug 2350 check_kernel_version 36 || return 0 - #sysctl -w lnet.debug=-1 + #lctl set_param debug=-1 #set -vx mkdir -p $DIR/d48d/dir cd $DIR/d48d/dir @@ -2128,7 +2120,7 @@ run_test 48d "Access removed parent subdir (should return errors)" test_48e() { # bug 4134 check_kernel_version 41 || return 0 - #sysctl -w lnet.debug=-1 + #lctl set_param debug=-1 #set -vx mkdir -p $DIR/d48e/dir cd $DIR/d48e/dir @@ -2152,7 +2144,7 @@ test_50() { } run_test 50 "special situations: /proc symlinks ===============" -test_51() { +test_51a() { # was test_51 # bug 1516 - create an empty entry right after ".." then split dir mkdir $DIR/d51 touch $DIR/d51/foo @@ -2168,7 +2160,7 @@ test_51() { echo ls -l $DIR/d51 > /dev/null || error } -run_test 51 "special situations: split htree with empty entry ==" +run_test 51a "special situations: split htree with empty entry ==" #export NUMTEST=70000 # FIXME: I select a relatively small number to do basic test. @@ -2266,10 +2258,11 @@ test_53() { remote_mds && skip "remote MDS" && return # only test MDT0000 - for i in `ls -d $LPROC/osc/*-osc-MDT0000 2> /dev/null` ; do - ostname=`basename $i | cut -d - -f 1-2` - ost_last=`cat $LPROC/obdfilter/$ostname/last_id` - mds_last=`cat $i/prealloc_last_id` + for value in `lctl get_param osc.*-osc-MDT0000.prealloc_last_id` ; do + param=`echo ${value[0]} | cut -d "=" -f1` + ostname=`echo $param | cut -d "." -f2 | cut -d - -f 1-2` + ost_last=`lctl get_param -n obdfilter.$ostname.last_id` + mds_last=`lctl get_param -n $param` echo "$ostname.last_id=$ost_last ; MDS.last_id=$mds_last" if [ $ost_last != $mds_last ]; then error "$ostname.last_id=$ost_last ; MDS.last_id=$mds_last" @@ -2343,7 +2336,7 @@ test_54e() { check_kernel_version 46 || return 0 f="$DIR/f54e" string="aaaaaa" - mknod $f c 4 0 + cp -aL /dev/console $f echo $string > $f || error } run_test 54e "console/tty device works in lustre ======================" @@ -2373,7 +2366,7 @@ test_55() { } run_test 55 "check iopen_connect_dentry() ======================" -test_56() { +test_56a() { # was test_56 rm -rf $DIR/d56 $SETSTRIPE -d $DIR mkdir $DIR/d56 @@ -2423,7 +2416,7 @@ test_56() { error "lfs getstripe --obd wrong: should not show file on other obd" echo "lfs getstripe --obd passed." } -run_test 56 "check lfs getstripe ====================================" +run_test 56a "check lfs getstripe ====================================" NUMFILES=3 NUMDIRS=3 @@ -2444,6 +2437,25 @@ setup_56() { fi } +setup_56_special() { + LOCAL_NUMFILES=$1 + LOCAL_NUMDIRS=$2 + TDIR=$DIR/${tdir}g + setup_56 $1 $2 + if [ ! -e "$TDIR/loop1b" ] ; then + for i in `seq 1 $LOCAL_NUMFILES` ; do + mknod $TDIR/loop${i}b b 7 $i + mknod $TDIR/null${i}c c 1 3 + ln -s $TDIR/file1 $TDIR/link${i}l + done + for i in `seq 1 $LOCAL_NUMDIRS` ; do + mknod $TDIR/dir$i/loop${i}b b 7 $i + mknod $TDIR/dir$i/null${i}c c 1 3 + ln -s $TDIR/dir$i/file1 $TDIR/dir$i/link${i}l + done + fi +} + test_56g() { $LSTRIPE -d $DIR @@ -2485,11 +2497,80 @@ test_56i() { } run_test 56i "check 'lfs find -ost UUID' skips directories =======" +test_56j() { + setup_56_special $NUMFILES $NUMDIRS + + EXPECTED=$((NUMDIRS+1)) + NUMS=`$LFIND -type d $DIR/${tdir}g | wc -l` + [ $NUMS -eq $EXPECTED ] || \ + error "lfs find -type d $DIR/${tdir}g wrong: found $NUMS, expected $EXPECTED" +} +run_test 56j "check lfs find -type d =============================" + +test_56k() { + setup_56_special $NUMFILES $NUMDIRS + + EXPECTED=$(((NUMDIRS+1) * NUMFILES)) + NUMS=`$LFIND -type f $DIR/${tdir}g | wc -l` + [ $NUMS -eq $EXPECTED ] || \ + error "lfs find -type f $DIR/${tdir}g wrong: found $NUMS, expected $EXPECTED" +} +run_test 56k "check lfs find -type f =============================" + +test_56l() { + setup_56_special $NUMFILES $NUMDIRS + + EXPECTED=$((NUMDIRS + NUMFILES)) + NUMS=`$LFIND -type b $DIR/${tdir}g | wc -l` + [ $NUMS -eq $EXPECTED ] || \ + error "lfs find -type b $DIR/${tdir}g wrong: found $NUMS, expected $EXPECTED" +} +run_test 56l "check lfs find -type b =============================" + +test_56m() { + setup_56_special $NUMFILES $NUMDIRS + + EXPECTED=$((NUMDIRS + NUMFILES)) + NUMS=`$LFIND -type c $DIR/${tdir}g | wc -l` + [ $NUMS -eq $EXPECTED ] || \ + error "lfs find -type c $DIR/${tdir}g wrong: found $NUMS, expected $EXPECTED" +} +run_test 56m "check lfs find -type c =============================" + +test_56n() { + setup_56_special $NUMFILES $NUMDIRS + + EXPECTED=$((NUMDIRS + NUMFILES)) + NUMS=`$LFIND -type l $DIR/${tdir}g | wc -l` + [ $NUMS -eq $EXPECTED ] || \ + error "lfs find -type l $DIR/${tdir}g wrong: found $NUMS, expected $EXPECTED" +} +run_test 56n "check lfs find -type l =============================" + +test_56o() { + setup_56 $NUMFILES $NUMDIRS + TDIR=$DIR/${tdir}g + + utime $TDIR/file1 > /dev/null || error + utime $TDIR/file2 > /dev/null || error + utime $TDIR/dir1 > /dev/null || error + utime $TDIR/dir2 > /dev/null || error + utime $TDIR/dir1/file1 > /dev/null || error + + EXPECTED=5 + NUMS=`$LFIND -mtime +1 $TDIR | wc -l` + [ $NUMS -eq $EXPECTED ] || \ + error "lfs find -mtime $TDIR wrong: found $NUMS, expected $EXPECTED" +} +run_test 56o "check lfs find -mtime for old files ==========================" + test_57a() { # note test will not do anything if MDS is not local remote_mds && skip "remote MDS" && return - - for DEV in `cat $LPROC/mds/*/mntdev`; do + local MNTDEV="osd.*MDT*.mntdev" + DEV=$(lctl get_param -n $MNTDEV) + [ -z "$DEV" ] && error "can't access $MNTDEV" + for DEV in `lctl get_param -n $MNTDEV`; do dumpe2fs -h $DEV > $TMP/t57a.dump || error "can't access $DEV" DEVISIZE=`awk '/Inode size:/ { print $3 }' $TMP/t57a.dump` [ "$DEVISIZE" -gt 128 ] || error "inode size $DEVISIZE" @@ -2512,8 +2593,8 @@ test_57b() { $GETSTRIPE $FILE1 2>&1 | grep -q "no stripe" || error "$FILE1 has an EA" $GETSTRIPE $FILEN 2>&1 | grep -q "no stripe" || error "$FILEN has an EA" - MDSFREE="`cat $LPROC/mds/*/kbytesfree 2> /dev/null`" - MDCFREE="`cat $LPROC/mdc/*/kbytesfree | head -n 1`" + MDSFREE="`lctl get_param -n osd.*MDT*.kbytesfree 2> /dev/null`" + MDCFREE="`lctl get_param -n mdc.*.kbytesfree | head -n 1`" echo "opening files to create objects/EAs" for FILE in `seq -f $DIR/d57b/f%g 1 $FILECOUNT`; do $OPENFILE -f O_RDWR $FILE > /dev/null || error "opening $FILE" @@ -2524,8 +2605,8 @@ test_57b() { $GETSTRIPE $FILEN | grep -q "obdidx" || error "$FILEN missing EA" sleep 1 # make sure we get new statfs data -# MDSFREE2="`cat $LPROC/mds/*/kbytesfree`" -# MDCFREE2="`cat $LPROC/mdc/*/kbytesfree`" +# MDSFREE2="`lctl get_param -n mds.*.kbytesfree`" +# MDCFREE2="`lctl get_param -n mdc.*.kbytesfree`" # if [ "$MDCFREE2" -lt "$((MDCFREE - 8))" ]; then # if [ "$MDSFREE" != "$MDSFREE2" ]; then # error "MDC before $MDCFREE != after $MDCFREE2" @@ -2565,8 +2646,19 @@ run_test 60a "llog sanity tests run from kernel module ==========" test_60b() { # bug 6411 dmesg > $DIR/$tfile - LLOG_COUNT=`dmesg | grep -A 1000 "$TEST60_HEAD" | grep -c llog_test` - [ $LLOG_COUNT -gt 50 ] && error "CDEBUG_LIMIT not limiting messages"|| true + LLOG_COUNT=`dmesg | awk "/$TEST60_HEAD/{marker = 1; from_marker = 0;} + /llog.test/ { + if (marker) + from_marker++ + from_begin++ + } + END { + if (marker) + print from_marker + else + print from_begin + }"` + [ $LLOG_COUNT -gt 50 ] && error "CDEBUG_LIMIT not limiting messages ($LLOG_COUNT)"|| true } run_test 60b "limit repeated messages from CERROR/CWARN ========" @@ -2574,12 +2666,31 @@ test_60c() { echo "create 5000 files" createmany -o $DIR/f60c- 5000 #define OBD_FAIL_MDS_LLOG_CREATE_FAILED 0x13c - sysctl -w lustre.fail_loc=0x8000013c + lctl set_param fail_loc=0x8000013c unlinkmany $DIR/f60c- 5000 - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 } run_test 60c "unlink file when mds full" +test_60d() { + SAVEPRINTK=$(lctl get_param -n printk) + + # verify "lctl mark" is even working" + MESSAGE="test message ID $RANDOM $$" + $LCTL mark "$MESSAGE" || error "$LCTL mark failed" + dmesg | grep -q "$MESSAGE" || error "didn't find debug marker in log" + + lctl set_param printk=0 || error "set lnet.printk failed" + lctl get_param -n printk | grep emerg || error "lnet.printk dropped emerg" + MESSAGE="new test message ID $RANDOM $$" + # Assume here that libcfs_debug_mark_buffer() uses D_WARNING + $LCTL mark "$MESSAGE" || error "$LCTL mark failed" + dmesg | grep -q "$MESSAGE" && error "D_WARNING wasn't masked" || true + + lctl set_param -n printk="$SAVEPRINTK" +} +run_test 60d "test printk console message masking" + test_61() { f="$DIR/f61" dd if=/dev/zero of=$f bs=`page_size` count=1 @@ -2594,18 +2705,18 @@ test_62() { f="$DIR/f62" echo foo > $f cancel_lru_locks osc - sysctl -w lustre.fail_loc=0x405 + lctl set_param fail_loc=0x405 cat $f && error "cat succeeded, expect -EIO" - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 } -run_test 62 "verify obd_match failure doesn't LBUG (should -EIO)" +# This test is now irrelevant (as of bug 10718 inclusion), we no longer +# match every page all of the time. +#run_test 62 "verify obd_match failure doesn't LBUG (should -EIO)" # bug 2319 - oig_wait() interrupted causes crash because of invalid waitq. -test_63() { - MAX_DIRTY_MB=`cat $LPROC/osc/*/max_dirty_mb | head -n 1` - for i in $LPROC/osc/*/max_dirty_mb ; do - echo 0 > $i - done +test_63a() { # was test_63 + MAX_DIRTY_MB=`lctl get_param -n osc.*.max_dirty_mb | head -n 1` + lctl set_param -n osc.*.max_dirty_mb 0 for i in `seq 10` ; do dd if=/dev/zero of=$DIR/f63 bs=8k & sleep 5 @@ -2613,29 +2724,27 @@ test_63() { sleep 1 done - for i in $LPROC/osc/*/max_dirty_mb ; do - echo $MAX_DIRTY_MB > $i - done + lctl set_param -n osc.*.max_dirty_mb $MAX_DIRTY_MB rm -f $DIR/f63 || true } -run_test 63 "Verify oig_wait interruption does not crash =======" +run_test 63a "Verify oig_wait interruption does not crash =======" # bug 2248 - async write errors didn't return to application on sync # bug 3677 - async write errors left page locked test_63b() { debugsave - sysctl -w lnet.debug=-1 + lctl set_param debug=-1 # ensure we have a grant to do async writes dd if=/dev/zero of=$DIR/$tfile bs=4k count=1 rm $DIR/$tfile #define OBD_FAIL_OSC_BRW_PREP_REQ 0x406 - sysctl -w lustre.fail_loc=0x80000406 + lctl set_param fail_loc=0x80000406 multiop $DIR/$tfile Owy && \ error "sync didn't return ENOMEM" sync; sleep 2; sync # do a real sync this time to flush page - grep locked $LPROC/llite/*/dump_page_cache && \ + lctl get_param -n llite.*.dump_page_cache | grep locked && \ error "locked page left in cache after async error" || true debugrestore } @@ -2643,7 +2752,7 @@ run_test 63b "async write errors should be returned to fsync ===" test_64a () { df $DIR - grep "[0-9]" $LPROC/osc/*[oO][sS][cC][_-]*/cur* + lctl get_param -n osc.*[oO][sS][cC][_-]*.cur* | grep "[0-9]" } run_test 64a "verify filter grant calculations (in kernel) =====" @@ -2663,7 +2772,7 @@ run_test 65a "directory with no stripe info ====================" test_65b() { mkdir -p $DIR/d65 - $SETSTRIPE $DIR/d65 $(($STRIPESIZE * 2)) 0 1 || error "setstripe" + $SETSTRIPE $DIR/d65 -s $(($STRIPESIZE * 2)) -i 0 -c 1 || error "setstripe" touch $DIR/d65/f2 $LVERIFY $DIR/d65 $DIR/d65/f2 || error "lverify failed" } @@ -2672,8 +2781,8 @@ run_test 65b "directory setstripe $(($STRIPESIZE * 2)) 0 1 ===============" test_65c() { if [ $OSTCOUNT -gt 1 ]; then mkdir -p $DIR/d65 - $SETSTRIPE $DIR/d65 $(($STRIPESIZE * 4)) 1 \ - $(($OSTCOUNT - 1)) || error "setstripe" + $SETSTRIPE $DIR/d65 -s $(($STRIPESIZE * 4)) -i 1 \ + -c $(($OSTCOUNT - 1)) || error "setstripe" touch $DIR/d65/f3 $LVERIFY $DIR/d65 $DIR/d65/f3 || error "lverify failed" fi @@ -2690,7 +2799,7 @@ test_65d() { else sc=$(($STRIPECOUNT - 1)) fi - $SETSTRIPE $DIR/d65 $STRIPESIZE -1 $sc || error "setstripe" + $SETSTRIPE $DIR/d65 -s $STRIPESIZE -c $sc || error "setstripe" touch $DIR/d65/f4 $DIR/d65/f5 $LVERIFY $DIR/d65 $DIR/d65/f4 $DIR/d65/f5 || error "lverify failed" } @@ -2699,8 +2808,8 @@ run_test 65d "directory setstripe $STRIPESIZE -1 stripe_count ==============" test_65e() { mkdir -p $DIR/d65 - $SETSTRIPE $DIR/d65 0 -1 0 || error "setstripe" - $GETSTRIPE -v $DIR/d65 | grep "has no stripe info" || error "no stripe info failed" + $SETSTRIPE $DIR/d65 || error "setstripe" + $GETSTRIPE -v $DIR/d65 | grep "Default" || error "no stripe info failed" touch $DIR/d65/f6 $LVERIFY $DIR/d65 $DIR/d65/f6 || error "lverify failed" } @@ -2708,22 +2817,22 @@ run_test 65e "directory setstripe 0 -1 0 =======================" test_65f() { mkdir -p $DIR/d65f - $RUNAS $SETSTRIPE $DIR/d65f 0 -1 0 && error "setstripe succeeded" || true + $RUNAS $SETSTRIPE $DIR/d65f && error "setstripe succeeded" || true } run_test 65f "dir setstripe permission (should return error) ===" test_65g() { mkdir -p $DIR/d65 - $SETSTRIPE $DIR/d65 $(($STRIPESIZE * 2)) 0 1 || error "setstripe" + $SETSTRIPE $DIR/d65 -s $(($STRIPESIZE * 2)) -i 0 -c 1 || error "setstripe" $SETSTRIPE -d $DIR/d65 || error "setstripe" - $GETSTRIPE -v $DIR/d65 | grep "has no stripe info" || \ + $GETSTRIPE -v $DIR/d65 | grep "Default" || \ error "delete default stripe failed" } run_test 65g "directory setstripe -d ===========================" test_65h() { mkdir -p $DIR/d65 - $SETSTRIPE $DIR/d65 $(($STRIPESIZE * 2)) 0 1 || error "setstripe" + $SETSTRIPE $DIR/d65 -s $(($STRIPESIZE * 2)) -i 0 -c 1 || error "setstripe" mkdir -p $DIR/d65/dd1 [ "`$GETSTRIPE -v $DIR/d65 | grep "^count"`" == \ "`$GETSTRIPE -v $DIR/d65/dd1 | grep "^count"`" ] || error "stripe info inherit failed" @@ -2731,14 +2840,15 @@ test_65h() { run_test 65h "directory stripe info inherit ====================" test_65i() { # bug6367 - $SETSTRIPE $MOUNT 65536 -1 -1 + $SETSTRIPE $MOUNT -s 65536 -c -1 } run_test 65i "set non-default striping on root directory (bug 6367)=" test_65j() { # bug6367 + sync; sleep 1 # if we aren't already remounting for each test, do so for this test if [ "$CLEANUP" = ":" -a "$I_MOUNTED" = "yes" ]; then - cleanup -f || error "failed to unmount" + cleanup || error "failed to unmount" setup fi $SETSTRIPE -d $MOUNT || error "setstripe failed" @@ -2747,6 +2857,7 @@ run_test 65j "set default striping on root directory (bug 6367)=" test_65k() { # bug11679 [ "$OSTCOUNT" -lt 2 ] && skip "too few OSTs" && return + remote_mds_nodsh && skip "remote MDS" && return echo "Check OST status: " MDS_OSCS=`do_facet mds lctl dl | awk '/[oO][sS][cC].*md[ts]/ { print $4 }'` @@ -2760,10 +2871,10 @@ test_65k() { # bug11679 do_facet mds lctl --device %$INACTIVE_OSC deactivate for STRIPE_OSC in $MDS_OSCS; do STRIPE_OST=`osc_to_ost $STRIPE_OSC` - STRIPE_INDEX=`do_facet mds cat $LPROC/lov/*md*/target_obd | + STRIPE_INDEX=`do_facet mds lctl get_param -n lov.*md*.target_obd | grep $STRIPE_OST | awk -F: '{print $1}'` - echo "$SETSTRIPE $DIR/$tdir/${STRIPE_INDEX} 0 ${STRIPE_INDEX} 1" - do_facet client $SETSTRIPE $DIR/$tdir/${STRIPE_INDEX} 0 ${STRIPE_INDEX} 1 + echo "$SETSTRIPE $DIR/$tdir/${STRIPE_INDEX} -i ${STRIPE_INDEX} -c 1" + do_facet client $SETSTRIPE $DIR/$tdir/${STRIPE_INDEX} -i ${STRIPE_INDEX} -c 1 RC=$? [ $RC -ne 0 ] && error "setstripe should have succeeded" done @@ -2776,7 +2887,7 @@ run_test 65k "validate manual striping works properly with deactivated OSCs" test_65l() { # bug 12836 mkdir -p $DIR/$tdir/test_dir - $LFS setstripe $DIR/$tdir/test_dir 65536 -1 -1 + $SETSTRIPE $DIR/$tdir/test_dir -c -1 $LFS find -mtime -1 $DIR/$tdir >/dev/null } run_test 65l "lfs find on -1 stripe dir ========================" @@ -2791,12 +2902,6 @@ test_66() { } run_test 66 "update inode blocks count on client ===============" -test_67() { - [ ! -f sanity-sec.sh ] && skip "missing subtest sanity-sec.sh" && return - sh sanity-sec.sh -} -run_test 67 "security test =====================================" - LLOOP= cleanup_68() { trap 0 @@ -2822,12 +2927,15 @@ swap_used() { # and then consuming memory until it is used. test_68() { [ "$UID" != 0 ] && skip "must run as root" && return - grep -q obdfilter $LPROC/devices && \ + lctl get_param -n devices | grep -q obdfilter && \ skip "local OST" && return grep -q llite_lloop /proc/modules [ $? -ne 0 ] && skip "can't find module llite_lloop" && return + [ -z "`$LCTL list_nids | grep -v tcp`" ] && \ + skip "can't reliably test swap with TCP" && return + MEMTOTAL=`meminfo MemTotal` NR_BLOCKS=$((MEMTOTAL>>8)) [[ $NR_BLOCKS -le 2048 ]] && NR_BLOCKS=2048 @@ -2856,38 +2964,34 @@ run_test 68 "support swapping to Lustre ========================" # bug5265, obdfilter oa2dentry return -ENOENT # #define OBD_FAIL_OST_ENOENT 0x217 test_69() { - [ $(grep -c obdfilter $LPROC/devices) -eq 0 ] && \ + [ $(lctl get_param -n devices | grep -c obdfilter) -eq 0 ] && \ skip "skipping test for remote OST" && return - $GSS && skip "gss with bulk security will triger oops. re-enable this after b10091 get fixed" && return f="$DIR/$tfile" touch $f - if ! $DIRECTIO write ${f}.2 0 1; then - skip "O_DIRECT not implemented" - return 0 - fi + $DIRECTIO write ${f}.2 0 1 || error "directio write error" - sysctl -w lustre.fail_loc=0x217 + lctl set_param fail_loc=0x217 truncate $f 1 # vmtruncate() will ignore truncate() error. $DIRECTIO write $f 0 2 && error "write succeeded, expect -ENOENT" - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 $DIRECTIO write $f 0 2 || error "write error" cancel_lru_locks osc $DIRECTIO read $f 0 1 || error "read error" - sysctl -w lustre.fail_loc=0x217 + lctl set_param fail_loc=0x217 $DIRECTIO read $f 1 1 && error "read succeeded, expect -ENOENT" - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 rm -f $f } run_test 69 "verify oa2dentry return -ENOENT doesn't LBUG ======" test_71() { - which dbench > /dev/null 2>&1 || skip "dbench not installed, skip this test" && return 0 + which dbench > /dev/null 2>&1 || { skip "dbench not installed, skip this test" && return 0; } DBENCH_LIB=${DBENCH_LIB:-/usr/lib/dbench} PATH=${DBENCH_LIB}:${PATH} cp `which dbench` $DIR @@ -2897,18 +3001,15 @@ test_71() { [ ! -e $TGT -a -e $SRC ] && echo "copying $SRC to $TGT" && cp $SRC $TGT SRC=$DBENCH_LIB/client_plain.txt [ ! -e $TGT -a -e $SRC ] && echo "copying $SRC to $TGT" && cp $SRC $TGT - - echo "copying necessary lib to $DIR" - [ -d /lib64 ] && LIB71=/lib64 || LIB71=/lib - mkdir -p $DIR$LIB71 || error "can't create $DIR$LIB71" - cp $LIB71/libc* $DIR$LIB71 || error "can't copy $LIB71/libc*" - cp $LIB71/ld-* $DIR$LIB71 || error "can't create $LIB71/ld-*" - + echo "copying necessary libs to $DIR" + LIBS71=$(ldd $DIR/dbench|sed -e 's/\t*//' -e 's/.*=> //' -e 's/ .*//' -e 's/^\///') + (cd / && tar chf - $LIBS71) | (cd $DIR && tar xvf -) + [ $? = 0 ] || error "can't copy libs $LIBS71 to $DIR" echo "chroot $DIR /dbench -c client.txt 2" chroot $DIR /dbench -c client.txt 2 RC=$? - rm -rf $DIR/dbench $TGT $DIR$LIB71 + rm -rf $DIR/dbench $DIR/lib $DIR/lib64 return $RC } @@ -2917,12 +3018,18 @@ run_test 71 "Running dbench on lustre (don't segment fault) ====" test_72() { # bug 5695 - Test that on 2.6 remove_suid works properly check_kernel_version 43 || return 0 [ "$RUNAS_ID" = "$UID" ] && skip "RUNAS_ID = UID = $UID -- skipping" && return + + # Check that testing environment is properly set up. Skip if not + FAIL_ON_ERROR=false check_runas_id_ret $RUNAS_ID $RUNAS || { + skip "User $RUNAS_ID does not exist - skipping" + return 0 + } # We had better clear the $DIR to get enough space for dd rm -rf $DIR/* touch $DIR/f72 chmod 777 $DIR/f72 chmod ug+s $DIR/f72 - $RUNAS -u $(($RUNAS_ID + 1)) dd if=/dev/zero of=$DIR/f72 bs=512 count=1 || error + $RUNAS dd if=/dev/zero of=$DIR/f72 bs=512 count=1 || error # See if we are still setuid/sgid test -u $DIR/f72 -o -g $DIR/f72 && error "S/gid is not dropped on write" # Now test that MDS is updated too @@ -2936,15 +3043,13 @@ run_test 72 "Test that remove suid works properly (bug5695) ====" test_73() { mkdir $DIR/d73-1 mkdir $DIR/d73-2 - multiop $DIR/d73-1/f73-1 O_c & + multiop_bg_pause $DIR/d73-1/f73-1 O_c || return 1 pid1=$! - #give multiop a chance to open - usleep 500 - echo 0x80000129 > /proc/sys/lustre/fail_loc + lctl set_param fail_loc=0x80000129 multiop $DIR/d73-1/f73-2 Oc & sleep 1 - echo 0 > /proc/sys/lustre/fail_loc + lctl set_param fail_loc=0 multiop $DIR/d73-2/f73-3 Oc & pid3=$! @@ -2968,10 +3073,10 @@ test_74a() { # bug 6149, 6184 # very important to OR with OBD_FAIL_ONCE (0x80000000) -- otherwise it # will spin in a tight reconnection loop touch $DIR/f74a - sysctl -w lustre.fail_loc=0x8000030e + lctl set_param fail_loc=0x8000030e # get any lock that won't be difficult - lookup works. ls $DIR/f74a - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 true } run_test 74a "ldlm_enqueue freed-export error path, ls (shouldn't LBUG)" @@ -2981,10 +3086,10 @@ test_74b() { # bug 13310 # # very important to OR with OBD_FAIL_ONCE (0x80000000) -- otherwise it # will spin in a tight reconnection loop - sysctl -w lustre.fail_loc=0x8000030e + lctl set_param fail_loc=0x8000030e # get a "difficult" lock touch $DIR/f74b - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 true } run_test 74b "ldlm_enqueue freed-export error path, touch (shouldn't LBUG)" @@ -3126,7 +3231,9 @@ test_76() { # bug 1443 [ $DETH -eq 0 ] && skip "No _iget." && return 0 BEFORE_INODES=`num_inodes` echo "before inodes: $BEFORE_INODES" - for i in `seq 1000`; do + local COUNT=1000 + [ "$SLOW" = "no" ] && COUNT=100 + for i in `seq $COUNT`; do touch $DIR/$tfile rm -f $DIR/$tfile done @@ -3141,14 +3248,29 @@ run_test 76 "destroy duplicate inodes in client inode cache ====" export ORIG_CSUM="" set_checksums() { - [ "$ORIG_CSUM" ]||ORIG_CSUM=`cat $LPROC/llite/*/checksum_pages|head -n1` - for f in $LPROC/llite/*/checksum_pages; do - echo $1 >> $f - done - + # Note: in sptlrpc modes which enable its own bulk checksum, the + # original crc32_le bulk checksum will be automatically disabled, + # and the OBD_FAIL_OSC_CHECKSUM_SEND/OBD_FAIL_OSC_CHECKSUM_RECEIVE + # will be checked by sptlrpc code against sptlrpc bulk checksum. + # In this case set_checksums() will not be no-op, because sptlrpc + # bulk checksum will be enabled all through the test. + + [ "$ORIG_CSUM" ] || ORIG_CSUM=`lctl get_param -n osc.*.checksums | head -n1` + lctl set_param -n osc.*.checksums $1 return 0 } +export ORIG_CSUM_TYPE="" +CKSUM_TYPES=${CKSUM_TYPES:-"crc32 adler"} +set_checksum_type() +{ + [ "$ORIG_CSUM_TYPE" ] || \ + ORIG_CSUM_TYPE=`lctl get_param -n osc/*osc-[^mM]*/checksum_type | + sed 's/.*\[\(.*\)\].*/\1/g' | head -n1` + lctl set_param -n osc.*osc-[^mM]*.checksum_type $1 + log "set checksum type to $1" + return 0 +} F77_TMP=$TMP/f77-temp F77SZ=8 setup_f77() { @@ -3167,34 +3289,38 @@ run_test 77a "normal checksum read/write operation =============" test_77b() { # bug 10889 [ ! -f $F77_TMP ] && setup_f77 #define OBD_FAIL_OSC_CHECKSUM_SEND 0x409 - sysctl -w lustre.fail_loc=0x80000409 + lctl set_param fail_loc=0x80000409 set_checksums 1 dd if=$F77_TMP of=$DIR/f77b bs=1M count=$F77SZ conv=sync || \ error "dd error: $?" - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 set_checksums 0 } run_test 77b "checksum error on client write ====================" test_77c() { # bug 10889 [ ! -f $DIR/f77b ] && skip "requires 77b - skipping" && return - cancel_lru_locks osc - #define OBD_FAIL_OSC_CHECKSUM_RECEIVE 0x408 - sysctl -w lustre.fail_loc=0x80000408 set_checksums 1 - cmp $F77_TMP $DIR/f77b || error "file compare failed" - sysctl -w lustre.fail_loc=0 + for algo in $CKSUM_TYPES; do + cancel_lru_locks osc + set_checksum_type $algo + #define OBD_FAIL_OSC_CHECKSUM_RECEIVE 0x408 + lctl set_param fail_loc=0x80000408 + cmp $F77_TMP $DIR/f77b || error "file compare failed" + lctl set_param fail_loc=0 + done set_checksums 0 + set_checksum_type $ORIG_CSUM_TYPE } run_test 77c "checksum error on client read ===================" test_77d() { # bug 10889 #define OBD_FAIL_OSC_CHECKSUM_SEND 0x409 - sysctl -w lustre.fail_loc=0x80000409 + lctl set_param fail_loc=0x80000409 set_checksums 1 directio write $DIR/f77 0 $F77SZ $((1024 * 1024)) || \ error "direct write: rc=$?" - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 set_checksums 0 } run_test 77d "checksum error on OST direct write ===============" @@ -3202,55 +3328,88 @@ run_test 77d "checksum error on OST direct write ===============" test_77e() { # bug 10889 [ ! -f $DIR/f77 ] && skip "requires 77d - skipping" && return #define OBD_FAIL_OSC_CHECKSUM_RECEIVE 0x408 - sysctl -w lustre.fail_loc=0x80000408 + lctl set_param fail_loc=0x80000408 set_checksums 1 cancel_lru_locks osc directio read $DIR/f77 0 $F77SZ $((1024 * 1024)) || \ error "direct read: rc=$?" - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 set_checksums 0 } run_test 77e "checksum error on OST direct read ================" test_77f() { # bug 10889 - #define OBD_FAIL_OSC_CHECKSUM_SEND 0x409 - sysctl -w lustre.fail_loc=0x409 set_checksums 1 - directio write $DIR/f77 0 $F77SZ $((1024 * 1024)) && \ - error "direct write succeeded" - sysctl -w lustre.fail_loc=0 + for algo in $CKSUM_TYPES; do + cancel_lru_locks osc + set_checksum_type $algo + #define OBD_FAIL_OSC_CHECKSUM_SEND 0x409 + lctl set_param fail_loc=0x409 + directio write $DIR/f77 0 $F77SZ $((1024 * 1024)) && \ + error "direct write succeeded" + lctl set_param fail_loc=0 + done + set_checksum_type $ORIG_CSUM_TYPE set_checksums 0 } run_test 77f "repeat checksum error on write (expect error) ====" test_77g() { # bug 10889 - [ $(grep -c obdfilter $LPROC/devices) -eq 0 ] && \ + [ $(lctl get_param -n devices | grep -c obdfilter) -eq 0 ] && \ skip "remote OST" && return [ ! -f $F77_TMP ] && setup_f77 #define OBD_FAIL_OST_CHECKSUM_RECEIVE 0x21a - sysctl -w lustre.fail_loc=0x8000021a + lctl set_param fail_loc=0x8000021a set_checksums 1 dd if=$F77_TMP of=$DIR/f77 bs=1M count=$F77SZ || \ error "write error: rc=$?" - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 set_checksums 0 } run_test 77g "checksum error on OST write ======================" test_77h() { # bug 10889 - [ $(grep -c obdfilter $LPROC/devices) -eq 0 ] && \ + [ $(lctl get_param -n devices | grep -c obdfilter) -eq 0 ] && \ skip "remote OST" && return [ ! -f $DIR/f77 ] && skip "requires 77g - skipping" && return cancel_lru_locks osc #define OBD_FAIL_OST_CHECKSUM_SEND 0x21b - sysctl -w lustre.fail_loc=0x8000021b + lctl set_param fail_loc=0x8000021b set_checksums 1 cmp $F77_TMP $DIR/f77 || error "file compare failed" - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 set_checksums 0 } run_test 77h "checksum error on OST read =======================" +test_77i() { # bug 13805 + #define OBD_FAIL_OSC_CONNECT_CKSUM 0x40b + lctl set_param fail_loc=0x40b + remount_client $MOUNT + lctl set_param fail_loc=0 + for VALUE in `lctl get_param osc.*osc-[^mM]*.checksum_type`; do + PARAM=`echo ${VALUE[0]} | cut -d "=" -f1` + algo=`lctl get_param -n $PARAM | sed 's/.*\[\(.*\)\].*/\1/g'` + [ "$algo" = "crc32" ] || error "algo set to $algo instead of crc32" + done + remount_client $MOUNT +} +run_test 77i "client not supporting OSD_CONNECT_CKSUM ==========" + +test_77j() { # bug 13805 + #define OBD_FAIL_OSC_CKSUM_ADLER_ONLY 0x40c + lctl set_param fail_loc=0x40c + remount_client $MOUNT + lctl set_param fail_loc=0 + for VALUE in `lctl get_param osc.*osc-[^mM]*.checksum_type`; do + PARAM=`echo ${VALUE[0]} | cut -d "=" -f1` + algo=`lctl get_param -n $PARAM | sed 's/.*\[\(.*\)\].*/\1/g'` + [ "$algo" = "adler" ] || error "algo set to $algo instead of adler" + done + remount_client $MOUNT +} +run_test 77j "client only supporting ADLER32 ====================" + [ "$ORIG_CSUM" ] && set_checksums $ORIG_CSUM || true rm -f $F77_TMP unset F77_TMP @@ -3258,16 +3417,31 @@ unset F77_TMP test_78() { # bug 10901 NSEQ=5 F78SIZE=$(($(awk '/MemFree:/ { print $2 }' /proc/meminfo) / 1024)) + echo "MemFree: $F78SIZE, Max file size: $MAXFREE" + MEMTOTAL=$(($(awk '/MemTotal:/ { print $2 }' /proc/meminfo) / 1024)) + echo "MemTotal: $MEMTOTAL" +# reserve 256MB of memory for the kernel and other running processes, +# and then take 1/2 of the remaining memory for the read/write buffers. + MEMTOTAL=$(((MEMTOTAL - 256 ) / 2)) + echo "Mem to use for directio: $MEMTOTAL" + [ $F78SIZE -gt $MEMTOTAL ] && F78SIZE=$MEMTOTAL [ $F78SIZE -gt 512 ] && F78SIZE=512 [ $F78SIZE -gt $((MAXFREE / 1024)) ] && F78SIZE=$((MAXFREE / 1024)) SMALLESTOST=`lfs df $DIR |grep OST | awk '{print $4}' |sort -n |head -1` - [ $F78SIZE -gt $((SMALLESTOST * $OSTCOUNT / 1024)) ] && \ - F78SIZE=$((SMALLESTOST * $OSTCOUNT / 1024)) - $SETSTRIPE $DIR/$tfile 0 -1 -1 || error "setstripe failed" + echo "Smallest OST: $SMALLESTOST" + [ $SMALLESTOST -lt 10240 ] && \ + skip "too small OSTSIZE, useless to run large O_DIRECT test" && return 0 + + [ $F78SIZE -gt $((SMALLESTOST * $OSTCOUNT / 1024 - 5)) ] && \ + F78SIZE=$((SMALLESTOST * $OSTCOUNT / 1024 - 5)) + [ "$SLOW" = "no" ] && NSEQ=1 && [ $F78SIZE -gt 32 ] && F78SIZE=32 + echo "File size: $F78SIZE" + $SETSTRIPE $DIR/$tfile -c -1 || error "setstripe failed" for i in `seq 1 $NSEQ` do + FSIZE=$(($F78SIZE / ($NSEQ - $i + 1))) echo directIO rdwr round $i of $NSEQ - $DIRECTIO rdwr $DIR/$tfile 0 $F78SIZE 1048576 || error "rdwr failed" + $DIRECTIO rdwr $DIR/$tfile 0 $FSIZE 1048576||error "rdwr failed" done rm -f $DIR/$tfile @@ -3275,17 +3449,14 @@ test_78() { # bug 10901 run_test 78 "handle large O_DIRECT writes correctly ============" test_79() { # bug 12743 - [ $(grep -c obdfilter $LPROC/devices) -eq 0 ] && + [ $(lctl get_param -n devices | grep -c obdfilter) -eq 0 ] && skip "skipping test for remote OST" && return wait_delete_completed - BKTOTAL=`awk 'BEGIN{total=0}; {total+=$1}; END{print total}' \ - $LPROC/obdfilter/*/kbytestotal` - BKFREE=`awk 'BEGIN{free=0}; {free+=$1}; END{print free}' \ - $LPROC/obdfilter/*/kbytesfree` - BKAVAIL=`awk 'BEGIN{avail=0}; {avail+=$1}; END{print avail}' \ - $LPROC/obdfilter/*/kbytesavail` + BKTOTAL=`lctl get_param -n obdfilter.*.kbytestotal | awk 'BEGIN{total=0}; {total+=$1}; END{print total}'` + BKFREE=`lctl get_param -n obdfilter.*.kbytesfree | awk 'BEGIN{free=0}; {free+=$1}; END{print free}'` + BKAVAIL=`lctl get_param -n obdfilter.*.kbytesavail | awk 'BEGIN{avail=0}; {avail+=$1}; END{print avail}'` STRING=`df -P $MOUNT | tail -n 1 | awk '{print $2","$3","$4}'` DFTOTAL=`echo $STRING | cut -d, -f1` DFUSED=`echo $STRING | cut -d, -f2` @@ -3309,6 +3480,20 @@ test_79() { # bug 12743 } run_test 79 "df report consistency check =======================" +test_80() { # bug 10718 + dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 seek=1M + sync; sleep 1; sync + BEFORE=`date +%s` + cancel_lru_locks OSC + AFTER=`date +%s` + DIFF=$((AFTER-BEFORE)) + if [ $DIFF -gt 1 ] ; then + error "elapsed for 1M@1T = $DIFF" + fi + true +} +run_test 80 "Page eviction is equally fast at high offsets too ====" + # on the LLNL clusters, runas will still pick up root's $TMP settings, # which will not be writable for the runas user, and then you get a CVS # error message with a corrupt path string (CVS bug) and panic. @@ -3402,11 +3587,9 @@ function get_named_value() done } -export CACHE_MAX=`cat $LPROC/llite/*/max_cached_mb | head -n 1` +export CACHE_MAX=`lctl get_param -n llite.*.max_cached_mb | head -n 1` cleanup_101() { - for s in $LPROC/llite/*/max_cached_mb; do - echo $CACHE_MAX > $s - done + lctl set_param -n llite.*.max_cached_mb $CACHE_MAX trap 0 } @@ -3417,14 +3600,10 @@ test_101() { [ "$CPU" = "UML" ] && nreads=1000 local cache_limit=32 - for s in $LPROC/osc/*-osc*/rpc_stats; do - echo 0 > $s - done + lctl set_param -n osc.*-osc*.rpc_stats 0 trap cleanup_101 EXIT - for s in $LPROC/llite/*; do - echo 0 > $s/read_ahead_stats - echo $cache_limit > $s/max_cached_mb - done + lctl set_param -n llite.*.read_ahead_stats 0 + lctl set_param -n llite.*.max_cached_mb $cache_limit # # randomly read 10000 of 64K chunks from file 3x 32MB in size @@ -3433,18 +3612,14 @@ test_101() { $RANDOM_READS -f $DIR/$tfile -s$((cache_limit * 3192 * 1024)) -b65536 -C -n$nreads -t 180 discard=0 - for s in $LPROC/llite/*; do - discard=$(($discard + $(cat $s/read_ahead_stats | get_named_value 'read but discarded'))) + for s in `lctl get_param -n llite.*.read_ahead_stats | get_named_value 'read but discarded'`; do + discard=$(($discard + $s)) done cleanup_101 if [ $(($discard * 10)) -gt $nreads ] ;then - for s in $LPROC/osc/*-osc*/rpc_stats; do - echo $s; cat $s - done - for s in $LPROC/llite/*/read_ahead_stats; do - echo $s; cat $s - done + lctl get_param osc.*-osc*.rpc_stats + lctl get_param llite.*.read_ahead_stats error "too many ($discard) discarded pages" fi rm -f $DIR/$tfile || true @@ -3461,7 +3636,7 @@ setup_test102() { trap cleanup_test102 EXIT cd $DIR - $SETSTRIPE $tdir $STRIPE_SIZE $STRIPE_OFFSET $STRIPE_COUNT + $SETSTRIPE $tdir -s $STRIPE_SIZE -i $STRIPE_OFFSET -c $STRIPE_COUNT cd $DIR/$tdir for num in 1 2 3 4 do @@ -3471,12 +3646,12 @@ setup_test102() { do local stripe_size=`expr $STRIPE_SIZE \* $num` local file=file"$num-$offset-$count" - $SETSTRIPE $file $stripe_size $offset $count + $SETSTRIPE $file -s $stripe_size -i $offset -c $count done done done - cd .. + cd $DIR star -c f=$TMP/f102.tar $tdir SETUP_TEST102=yes } @@ -3496,7 +3671,7 @@ test_102a() { touch $testfile [ "$UID" != 0 ] && skip "must run as root" && return - [ -z "`grep xattr $LPROC/mdc/*-mdc-*/connect_flags`" ] && skip "must have user_xattr" && return + [ -z "`lctl get_param -n mdc.*-mdc-*.connect_flags | grep xattr`" ] && skip "must have user_xattr" && return [ -z "$(which setfattr 2>/dev/null)" ] && skip "could not find setfattr" && return @@ -3543,7 +3718,7 @@ test_102b() { echo "get/set/list trusted.lov xattr ..." [ "$OSTCOUNT" -lt "2" ] && skip "skipping 2-stripe test" && return local testfile=$DIR/$tfile - $SETSTRIPE $testfile 65536 1 2 + $SETSTRIPE $testfile -s 65536 -i 1 -c 2 getfattr -d -m "^trusted" $testfile 2> /dev/null | \ grep "trusted.lov" || error "can't get trusted.lov from $testfile" @@ -3569,7 +3744,7 @@ test_102c() { mkdir -p $DIR/$tdir chown $RUNAS_ID $DIR/$tdir local testfile=$DIR/$tdir/$tfile - $RUNAS $SETSTRIPE $testfile 65536 1 2 + $RUNAS $SETSTRIPE $testfile -s 65536 -i 1 -c 2 $RUNAS getfattr -d -m "^lustre" $testfile 2> /dev/null | \ grep "lustre.lov" || error "can't get lustre.lov from $testfile" @@ -3679,10 +3854,7 @@ run_test 102d "star restore stripe info from tarfile,not keep osts ===========" test_102e() { # b10930: star test for trusted.lov xattr star --xhelp 2>&1 | grep -q nolustre - if [ $? -ne 0 ] - then - skip "being skipped because a lustre-aware star is not installed." && return - fi + [ $? -ne 0 ] && skip "lustre-aware star is not installed" && return [ "$OSTCOUNT" -lt "4" ] && skip "skipping 4-stripe test" && return setup_test102 mkdir -p $DIR/d102e @@ -3695,10 +3867,7 @@ run_test 102e "star restore stripe info from tarfile, keep osts ===========" test_102f() { # b10930: star test for trusted.lov xattr star --xhelp 2>&1 | grep -q nolustre - if [ $? -ne 0 ] - then - skip "being skipped because a lustre-aware star is not installed." && return - fi + [ $? -ne 0 ] && skip "lustre-aware star is not installed" && return [ "$OSTCOUNT" -lt "4" ] && skip "skipping 4-stripe test" && return setup_test102 mkdir -p $DIR/d102f @@ -3712,10 +3881,7 @@ run_test 102f "star copy files, not keep osts ===========" test_102g() { # b10930: star test for trusted.lov xattr star --xhelp 2>&1 | grep -q nolustre - if [ $? -ne 0 ] - then - skip "being skipped because a lustre-aware star is not installed." && return - fi + [ $? -ne 0 ] && skip "lustre-aware star is not installed" && return [ "$OSTCOUNT" -lt "4" ] && skip "skipping 4-stripe test" && return setup_test102 mkdir -p $DIR/d102g @@ -3727,6 +3893,50 @@ test_102g() { } run_test 102g "star copy files, keep osts ===========" +test_102h() { # bug 15777 + [ -z $(lctl get_param -n mdc.*.connect_flags | grep xattr) ] && + skip "must have user_xattr" && return + [ -z "$(which setfattr 2>/dev/null)" ] && + skip "could not find setfattr" && return + + XBIG=trusted.big + XSIZE=1024 + touch $DIR/$tfile + VALUE=datadatadatadatadatadatadatadata + while [ $(echo $VALUE | wc -c) -lt $XSIZE ]; do + VALUE="$VALUE$VALUE" + done + log "save $XBIG on $DIR/$tfile" + setfattr -n $XBIG -v "$VALUE" $DIR/$tfile || + error "saving $XBIG on $DIR/$tfile failed" + ORIG=$(getfattr -n $XBIG $DIR/$tfile 2> /dev/null | grep $XBIG) + OSIZE=$(echo $ORIG | wc -c) + [ $OSIZE -lt $XSIZE ] && error "set $XBIG too small ($OSIZE < $XSIZE)" + + XSML=trusted.sml + log "save $XSML on $DIR/$tfile" + setfattr -n $XSML -v val $DIR/$tfile || + error "saving $XSML on $DIR/$tfile failed" + NEW=$(getfattr -n $XBIG $DIR/$tfile 2> /dev/null | grep $XBIG) + if [ "$NEW" != "$ORIG" ]; then + log "orig: $ORIG" + log "new: $NEW" + error "$XBIG different after saving $XSML" + fi + + log "grow $XSML on $DIR/$tfile" + setfattr -n $XSML -v "$VALUE" $DIR/$tfile || + error "growing $XSML on $DIR/$tfile failed" + NEW=$(getfattr -n $XBIG $DIR/$tfile 2> /dev/null | grep $XBIG) + if [ "$NEW" != "$ORIG" ]; then + log "orig: $ORIG" + log "new: $NEW" + error "$XBIG different after growing $XSML" + fi + log "$XBIG still valid after growing $XSML" +} +run_test 102h "grow xattr from inside inode to external block" + run_acl_subtest() { $LUSTRE/tests/acl/run $LUSTRE/tests/acl/$1.test @@ -3735,10 +3945,16 @@ run_acl_subtest() test_103 () { [ "$UID" != 0 ] && skip "must run as root" && return - [ -z "$(grep acl $LPROC/mdc/*-mdc-*/connect_flags)" ] && skip "must have acl enabled" && return + [ -z "$(lctl get_param -n mdc.*-mdc-*.connect_flags | grep acl)" ] && skip "must have acl enabled" && return [ -z "$(which setfacl 2>/dev/null)" ] && skip "could not find setfacl" && return $GSS && skip "could not run under gss" && return + declare -a identity_old + + for num in `seq $MDSCOUNT`; do + switch_identity $num true || identity_old[$num]=$? + done + SAVE_UMASK=`umask` umask 0022 cd $DIR @@ -3749,9 +3965,8 @@ test_103 () { run_acl_subtest getfacl-noacl || error echo "performing misc..." run_acl_subtest misc || error -# XXX add back permission test when we support supplementary groups. -# echo "performing permissions..." -# run_acl_subtest permissions || error + echo "performing permissions..." + run_acl_subtest permissions || error echo "performing setfacl..." run_acl_subtest setfacl || error @@ -3764,6 +3979,12 @@ test_103 () { cd $SAVE_PWD umask $SAVE_UMASK + + for num in `seq $MDSCOUNT`; do + if [ "${identity_old[$num]}" = 1 ]; then + switch_identity $num false || identity_old[$num]=$? + fi + done } run_test 103 "acl test =========================================" @@ -3776,7 +3997,7 @@ test_104() { lfs df $DIR/$tfile || error "lfs df $DIR/$tfile failed" lfs df -ih $DIR/$tfile || error "lfs df -ih $DIR/$tfile failed" - OSC=`awk '/-osc-/ {print $4}' $LPROC/devices | head -n 1` + OSC=`lctl get_param -n devices | awk '/-osc-/ {print $4}' | head -n 1` lctl --device %$OSC deactivate lfs df || error "lfs df with deactivated OSC failed" lctl --device %$OSC recover @@ -3819,7 +4040,7 @@ test_105c() { run_test 105c "lockf when mounted without -o flock test ========" test_106() { #bug 10921 - mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed" + mkdir -p $DIR/$tdir $DIR/$tdir && error "exec $DIR/$tdir succeeded" chmod 777 $DIR/$tdir || error "chmod $DIR/$tdir failed" } @@ -3832,8 +4053,8 @@ test_107() { sleep 60 & SLEEPPID=$! - file=`cat /proc/sys/kernel/core_pattern` - core_pid=`cat /proc/sys/kernel/core_uses_pid` + file=`sysctl -n kernel.core_pattern` + core_pid=`sysctl -n kernel.core_uses_pid` [ $core_pid -eq 1 ] && file=$file.$SLEEPPID rm -f $file sleep 1 @@ -3891,8 +4112,9 @@ test_115() { run_test 115 "verify dynamic thread creation====================" free_min_max () { - AVAIL=($(cat $LPROC/osc/*[oO][sS][cC]-[^M]*/kbytesavail)) - echo OST kbytes available: ${AVAIL[@]} + wait_delete_completed + AVAIL=($(lctl get_param -n osc.*[oO][sS][cC]-[^M]*.kbytesavail)) + echo OST kbytes available: ${AVAIL[@]} MAXI=0; MAXV=${AVAIL[0]} MINI=0; MINV=${AVAIL[0]} for ((i = 0; i < ${#AVAIL[@]}; i++)); do @@ -3913,8 +4135,8 @@ test_116() { remote_mds && skip "remote MDS" && return echo -n "Free space priority " - cat $LPROC/lov/*-clilov-*/qos_prio_free - DELAY=$(cat $LPROC/lov/*-clilov-*/qos_maxage | head -1 | awk '{print $1}') + lctl get_param -n lov.*-clilov-*.qos_prio_free + DELAY=$(lctl get_param -n lov.*-clilov-*.qos_maxage | head -1 | awk '{print $1}') declare -a AVAIL free_min_max [ $MINV -gt 960000 ] && skip "too much free space in OST$MINI, skip" &&\ @@ -3925,7 +4147,7 @@ test_116() { declare -i FILL FILL=$(($MINV / 4)) echo "Filling 25% remaining space in OST${MINI} with ${FILL}Kb" - $SETSTRIPE $DIR/$tdir/OST${MINI} 0 $MINI 1 + $SETSTRIPE $DIR/$tdir/OST${MINI} -i $MINI -c 1 i=0 while [ $FILL -gt 0 ]; do i=$(($i + 1)) @@ -3954,7 +4176,7 @@ test_116() { # now fill using QOS echo writing a bunch of files to QOS-assigned OSTs - $SETSTRIPE $DIR/$tdir 0 -1 1 + $SETSTRIPE $DIR/$tdir -c 1 i=0 while [ $FILL -gt 0 ]; do i=$(($i + 1)) @@ -3978,13 +4200,15 @@ test_116() { [ $DIFF -gt 0 ] && echo "Wrote $(($DIFF2 * 100 / $DIFF - 100))% more data to larger OST $MAXI1" # Figure out which files were written where - UUID=$(awk '/'$MINI1': / {print $2; exit}' $LPROC/lov/${FSNAME}-clilov-*/target_obd) + UUID=$(lctl get_param -n lov.${FSNAME}-clilov-*.target_obd | + awk '/'$MINI1': / {print $2; exit}') echo $UUID - MINC=$($LFS getstripe --obd $UUID $DIR/$tdir | wc -l) + MINC=$($GETSTRIPE --obd $UUID $DIR/$tdir | wc -l) echo "$MINC files created on smaller OST $MINI1" - UUID=$(awk '/'$MAXI1': / {print $2; exit}' $LPROC/lov/${FSNAME}-clilov-*/target_obd) + UUID=$(lctl get_param -n lov.${FSNAME}-clilov-*.target_obd | + awk '/'$MAXI1': / {print $2; exit}') echo $UUID - MAXC=$($LFS getstripe --obd $UUID $DIR/$tdir | wc -l) + MAXC=$($GETSTRIPE --obd $UUID $DIR/$tdir | wc -l) echo "$MAXC files created on larger OST $MAXI1" [ $MINC -gt 0 ] && echo "Wrote $(($MAXC * 100 / $MINC - 100))% more files to larger OST $MAXI1" [ $MAXC -gt $MINC ] || error "stripe QOS didn't balance free space" @@ -3995,13 +4219,23 @@ test_117() # bug 10891 { dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 #define OBD_FAIL_OST_SETATTR_CREDITS 0x21e - sysctl -w lustre.fail_loc=0x21e + lctl set_param fail_loc=0x21e > $DIR/$tfile || error "truncate failed" - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 echo "Truncate succeeded." } run_test 117 "verify fsfilt_extend ==========" +export OLD_RESENDCOUNT="" +set_resend_count () { + local PROC_RESENDCOUNT="osc.${FSNAME}-OST*-osc-*.resend_count" + OLD_RESENDCOUNT=$(lctl get_param -n $PROC_RESENDCOUNT | head -1) + lctl set_param -n $PROC_RESENDCOUNT $1 + echo resend_count is set to $(lctl get_param -n $PROC_RESENDCOUNT) +} + +[ "$SLOW" = "no" ] && set_resend_count 4 # for reduce test_118* time (bug 14842) + # Reset async IO behavior after error case reset_async() { FILE=$DIR/reset_async @@ -4018,8 +4252,8 @@ test_118a() #bug 11710 reset_async multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c - DIRTY=$(grep -c dirty $LPROC/llite/*/dump_page_cache) - WRITEBACK=$(grep -c writeback $LPROC/llite/*/dump_page_cache) + DIRTY=$(lctl get_param -n llite.*.dump_page_cache | grep -c dirty) + WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | grep -c writeback) if [[ $DIRTY -ne 0 || $WRITEBACK -ne 0 ]]; then error "Dirty pages not flushed to disk, dirty=$DIRTY, writeback=$WRITEBACK" @@ -4030,15 +4264,18 @@ run_test 118a "verify O_SYNC works ==========" test_118b() { + remote_ost_nodsh && skip "remote OST" && return + reset_async #define OBD_FAIL_OST_ENOENT 0x217 - sysctl -w lustre.fail_loc=0x217 + set_nodes_failloc "$(osts_nodes)" 0x217 multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c RC=$? - sysctl -w lustre.fail_loc=0 - DIRTY=$(grep -c dirty $LPROC/llite/*/dump_page_cache) - WRITEBACK=$(grep -c writeback $LPROC/llite/*/dump_page_cache) + set_nodes_failloc "$(osts_nodes)" 0 + DIRTY=$(lctl get_param -n llite.*.dump_page_cache | grep -c dirty) + WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | + grep -c writeback) if [[ $RC -eq 0 ]]; then error "Must return error due to dropped pages, rc=$RC" @@ -4063,10 +4300,12 @@ run_test 118b "Reclaim dirty pages on fatal error ==========" test_118c() { + remote_ost_nodsh && skip "remote OST" && return + reset_async #define OBD_FAIL_OST_EROFS 0x216 - sysctl -w lustre.fail_loc=0x216 + set_nodes_failloc "$(osts_nodes)" 0x216 # multiop should block due to fsync until pages are written multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c & @@ -4077,20 +4316,22 @@ test_118c() error "Multiop failed to block on fsync, pid=$MULTIPID" fi - WRITEBACK=$(grep -c writeback $LPROC/llite/*/dump_page_cache) + WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | + grep -c writeback) if [[ $WRITEBACK -eq 0 ]]; then error "No page in writeback, writeback=$WRITEBACK" fi - sysctl -w lustre.fail_loc=0 + set_nodes_failloc "$(osts_nodes)" 0 wait $MULTIPID RC=$? if [[ $RC -ne 0 ]]; then error "Multiop fsync failed, rc=$RC" fi - DIRTY=$(grep -c dirty $LPROC/llite/*/dump_page_cache) - WRITEBACK=$(grep -c writeback $LPROC/llite/*/dump_page_cache) + DIRTY=$(lctl get_param -n llite.*.dump_page_cache | grep -c dirty) + WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | + grep -c writeback) if [[ $DIRTY -ne 0 || $WRITEBACK -ne 0 ]]; then error "Dirty pages not flushed to disk, dirty=$DIRTY, writeback=$WRITEBACK" fi @@ -4103,10 +4344,12 @@ run_test 118c "Fsync blocks on EROFS until dirty pages are flushed ==========" test_118d() { + remote_ost_nodsh && skip "remote OST" && return + reset_async #define OBD_FAIL_OST_BRW_PAUSE_BULK - sysctl -w lustre.fail_loc=0x214 + set_nodes_failloc "$(osts_nodes)" 0x214 # multiop should block due to fsync until pages are written multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c & MULTIPID=$! @@ -4116,15 +4359,18 @@ test_118d() error "Multiop failed to block on fsync, pid=$MULTIPID" fi - WRITEBACK=$(grep -c writeback $LPROC/llite/*/dump_page_cache) + WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | + grep -c writeback) if [[ $WRITEBACK -eq 0 ]]; then error "No page in writeback, writeback=$WRITEBACK" fi wait $MULTIPID || error "Multiop fsync failed, rc=$?" + set_nodes_failloc "$(osts_nodes)" 0 - DIRTY=$(grep -c dirty $LPROC/llite/*/dump_page_cache) - WRITEBACK=$(grep -c writeback $LPROC/llite/*/dump_page_cache) + DIRTY=$(lctl get_param -n llite.*.dump_page_cache | grep -c dirty) + WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | + grep -c writeback) if [[ $DIRTY -ne 0 || $WRITEBACK -ne 0 ]]; then error "Dirty pages not flushed to disk, dirty=$DIRTY, writeback=$WRITEBACK" fi @@ -4139,19 +4385,21 @@ test_118f() { reset_async #define OBD_FAIL_OSC_BRW_PREP_REQ2 0x40a - sysctl -w lustre.fail_loc=0x8000040a + lctl set_param fail_loc=0x8000040a # Should simulate EINVAL error which is fatal multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c RC=$? - if [[ $RC -eq 0 ]]; then error "Must return error due to dropped pages, rc=$RC" fi - - LOCKED=$(grep -c locked $LPROC/llite/*/dump_page_cache) - DIRTY=$(grep -c dirty $LPROC/llite/*/dump_page_cache) - WRITEBACK=$(grep -c writeback $LPROC/llite/*/dump_page_cache) + + lctl set_param fail_loc=0x0 + + LOCKED=$(lctl get_param -n llite.*.dump_page_cache | grep -c locked) + DIRTY=$(lctl get_param -n llite.*.dump_page_cache | grep -c dirty) + WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | + grep -c writeback) if [[ $LOCKED -ne 0 ]]; then error "Locked pages remain in cache, locked=$LOCKED" fi @@ -4169,23 +4417,24 @@ test_118f() { run_test 118f "Simulate unrecoverable OSC side error ==========" test_118g() { - reset_async + reset_async #define OBD_FAIL_OSC_BRW_PREP_REQ 0x406 - sysctl -w lustre.fail_loc=0x406 + lctl set_param fail_loc=0x406 # simulate local -ENOMEM - multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c - RC=$? + multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c + RC=$? - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 if [[ $RC -eq 0 ]]; then error "Must return error due to dropped pages, rc=$RC" fi - LOCKED=$(grep -c locked $LPROC/llite/*/dump_page_cache) - DIRTY=$(grep -c dirty $LPROC/llite/*/dump_page_cache) - WRITEBACK=$(grep -c writeback $LPROC/llite/*/dump_page_cache) + LOCKED=$(lctl get_param -n llite.*.dump_page_cache | grep -c locked) + DIRTY=$(lctl get_param -n llite.*.dump_page_cache | grep -c dirty) + WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | + grep -c writeback) if [[ $LOCKED -ne 0 ]]; then error "Locked pages remain in cache, locked=$LOCKED" fi @@ -4197,28 +4446,31 @@ test_118g() { rm -f $DIR/$tfile echo "No pages locked after fsync" - reset_async + reset_async return 0 } run_test 118g "Don't stay in wait if we got local -ENOMEM ==========" test_118h() { + remote_ost_nodsh && skip "remote OST" && return + reset_async #define OBD_FAIL_OST_BRW_WRITE_BULK 0x20e - sysctl -w lustre.fail_loc=0x20e + set_nodes_failloc "$(osts_nodes)" 0x20e # Should simulate ENOMEM error which is recoverable and should be handled by timeout multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c RC=$? - sysctl -w lustre.fail_loc=0 + set_nodes_failloc "$(osts_nodes)" 0 if [[ $RC -eq 0 ]]; then error "Must return error due to dropped pages, rc=$RC" fi - LOCKED=$(grep -c locked $LPROC/llite/*/dump_page_cache) - DIRTY=$(grep -c dirty $LPROC/llite/*/dump_page_cache) - WRITEBACK=$(grep -c writeback $LPROC/llite/*/dump_page_cache) + LOCKED=$(lctl get_param -n llite.*.dump_page_cache | grep -c locked) + DIRTY=$(lctl get_param -n llite.*.dump_page_cache | grep -c dirty) + WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | + grep -c writeback) if [[ $LOCKED -ne 0 ]]; then error "Locked pages remain in cache, locked=$LOCKED" fi @@ -4235,16 +4487,18 @@ test_118h() { run_test 118h "Verify timeout in handling recoverables errors ==========" test_118i() { + remote_ost_nodsh && skip "remote OST" && return + reset_async #define OBD_FAIL_OST_BRW_WRITE_BULK 0x20e - sysctl -w lustre.fail_loc=0x20e + set_nodes_failloc "$(osts_nodes)" 0x20e # Should simulate ENOMEM error which is recoverable and should be handled by timeout multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c & PID=$! sleep 5 - sysctl -w lustre.fail_loc=0 + set_nodes_failloc "$(osts_nodes)" 0 wait $PID RC=$? @@ -4252,9 +4506,9 @@ test_118i() { error "got error, but should be not, rc=$RC" fi - LOCKED=$(grep -c locked $LPROC/llite/*/dump_page_cache) - DIRTY=$(grep -c dirty $LPROC/llite/*/dump_page_cache) - WRITEBACK=$(grep -c writeback $LPROC/llite/*/dump_page_cache) + LOCKED=$(lctl get_param -n llite.*.dump_page_cache | grep -c locked) + DIRTY=$(lctl get_param -n llite.*.dump_page_cache | grep -c dirty) + WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | grep -c writeback) if [[ $LOCKED -ne 0 ]]; then error "Locked pages remain in cache, locked=$LOCKED" fi @@ -4271,22 +4525,24 @@ test_118i() { run_test 118i "Fix error before timeout in recoverable error ==========" test_118j() { + remote_ost_nodsh && skip "remote OST" && return + reset_async #define OBD_FAIL_OST_BRW_WRITE_BULK2 0x220 - sysctl -w lustre.fail_loc=0x220 + set_nodes_failloc "$(osts_nodes)" 0x220 # return -EIO from OST multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c RC=$? - sysctl -w lustre.fail_loc=0x0 + set_nodes_failloc "$(osts_nodes)" 0x0 if [[ $RC -eq 0 ]]; then error "Must return error due to dropped pages, rc=$RC" fi - LOCKED=$(grep -c locked $LPROC/llite/*/dump_page_cache) - DIRTY=$(grep -c dirty $LPROC/llite/*/dump_page_cache) - WRITEBACK=$(grep -c writeback $LPROC/llite/*/dump_page_cache) + LOCKED=$(lctl get_param -n llite.*.dump_page_cache | grep -c locked) + DIRTY=$(lctl get_param -n llite.*.dump_page_cache | grep -c dirty) + WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | grep -c writeback) if [[ $LOCKED -ne 0 ]]; then error "Locked pages remain in cache, locked=$LOCKED" fi @@ -4303,6 +4559,27 @@ test_118j() { } run_test 118j "Simulate unrecoverable OST side error ==========" +test_118k() +{ + #define OBD_FAIL_OST_BRW_WRITE_BULK 0x20e + set_nodes_failloc "$(osts_nodes)" 0x20e + mkdir -p $DIR/$tdir + + for ((i=0;i<10;i++)); do + (dd if=/dev/zero of=$DIR/$tdir/$tfile-$i bs=1M count=10 || \ + error "dd to $DIR/$tdir/$tfile-$i failed" )& + SLEEPPID=$! + sleep 0.500s + kill $SLEEPPID + wait $SLEEPPID + done + + set_nodes_failloc "$(osts_nodes)" 0 +} +run_test 118k "bio alloc -ENOMEM and IO TERM handling =========" + +[ "$SLOW" = "no" ] && [ -n "$OLD_RESENDCOUNT" ] && set_resend_count $OLD_RESENDCOUNT + test_119a() # bug 11737 { BSIZE=$((512 * 1024)) @@ -4323,181 +4600,140 @@ test_119b() # bug 11737 { [ "$OSTCOUNT" -lt "2" ] && skip "skipping 2-stripe test" && return - lfs setstripe $DIR/$tfile 0 -1 2 + $SETSTRIPE $DIR/$tfile -c 2 dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 seek=1 || error "dd failed" sync multiop $DIR/$tfile oO_RDONLY:O_DIRECT:r$((2048 * 1024)) || \ error "direct read failed" + rm -f $DIR/$tfile } run_test 119b "Sparse directIO read must return actual read amount" -LDLM_POOL_CTL_RECALC=1 -LDLM_POOL_CTL_SHRINK=2 - -disable_pool_recalc() { - for NSD in $LPROC/ldlm/namespaces/*$1*; do - if test -f $NSD/pool/control; then - CONTROL=`cat $NSD/pool/control` - CONTROL=$((CONTROL & ~LDLM_POOL_CTL_RECALC)) - echo "$CONTROL" > $NSD/pool/control - fi - done -} - -enable_pool_recalc() { - for NSD in $LPROC/ldlm/namespaces/*$1*; do - if test -f $NSD/pool/control; then - CONTROL=`cat $NSD/pool/control` - CONTROL=$((CONTROL | LDLM_POOL_CTL_RECALC)) - echo "$CONTROL" > $NSD/pool/control - fi - done -} - -disable_pool_shrink() { - for NSD in $LPROC/ldlm/namespaces/*$1*; do - if test -f $NSD/pool/control; then - CONTROL=`cat $NSD/pool/control` - CONTROL=$((CONTROL & ~LDLM_POOL_CTL_SHRINK)) - echo "$CONTROL" > $NSD/pool/control - fi - done -} - -enable_pool_shrink() { - for NSD in $LPROC/ldlm/namespaces/*$1*; do - if test -f $NSD/pool/control; then - CONTROL=`cat $NSD/pool/control` - CONTROL=$((CONTROL | LDLM_POOL_CTL_SHRINK)) - echo "$CONTROL" > $NSD/pool/control - fi - done -} - -disable_pool() { - disable_pool_shrink $1 - disable_pool_recalc $1 -} - -enable_pool() { - enable_pool_shrink $1 - enable_pool_recalc $1 -} - -lru_resize_enable() -{ - enable_pool osc - enable_pool "filter-$FSNAME" - enable_pool mdc - enable_pool "mds-$FSNAME" -} - -lru_resize_disable() +test_119c() # bug 13099 { - disable_pool osc - disable_pool "filter-$FSNAME" - disable_pool mdc - disable_pool "mds-$FSNAME" + BSIZE=1048576 + directio write $DIR/$tfile 3 1 $BSIZE || error "direct write failed" + directio readhole $DIR/$tfile 0 2 $BSIZE || error "reading hole failed" + rm -f $DIR/$tfile } +run_test 119c "Testing for direct read hitting hole" test_120a() { - [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ + mkdir -p $DIR/$tdir + [ -z "`lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable - mkdir $DIR/$tdir + lru_resize_disable mdc + lru_resize_disable osc cancel_lru_locks mdc stat $DIR/$tdir > /dev/null - can1=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk1=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can1=`lctl get_param -n ldlm.services.ldlm_canceld.stats | awk '/ldlm_cancel/ {print $2}'` + blk1=`lctl get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}'` mkdir $DIR/$tdir/d1 - can2=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can2=`lctl get_param -n ldlm.services.ldlm_canceld.stats | awk '/ldlm_cancel/ {print $2}'` + blk2=`lctl get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}'` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120a "Early Lock Cancel: mkdir test" test_120b() { - [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ + mkdir -p $DIR/$tdir + [ -z "`lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable - mkdir $DIR/$tdir + lru_resize_disable mdc + lru_resize_disable osc cancel_lru_locks mdc stat $DIR/$tdir > /dev/null - can1=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk1=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can1=`lctl get_param -n ldlm.services.ldlm_canceld.stats | awk '/ldlm_cancel/ {print $2}'` + blk1=`lctl get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}'` touch $DIR/$tdir/f1 - blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` - can2=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` + can2=`lctl get_param -n ldlm.services.ldlm_canceld.stats | awk '/ldlm_cancel/ {print $2}'` + blk2=`lctl get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}'` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120b "Early Lock Cancel: create test" test_120c() { - [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ + mkdir -p $DIR/$tdir + [ -z "`lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc mkdir -p $DIR/$tdir/d1 $DIR/$tdir/d2 touch $DIR/$tdir/d1/f1 cancel_lru_locks mdc stat $DIR/$tdir/d1 $DIR/$tdir/d2 $DIR/$tdir/d1/f1 > /dev/null - can1=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk1=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can1=`lctl get_param -n ldlm.services.ldlm_canceld.stats | awk '/ldlm_cancel/ {print $2}'` + blk1=`lctl get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}'` ln $DIR/$tdir/d1/f1 $DIR/$tdir/d2/f2 - can2=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can2=`lctl get_param -n ldlm.services.ldlm_canceld.stats | awk '/ldlm_cancel/ {print $2}'` + blk2=`lctl get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}'` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120c "Early Lock Cancel: link test" test_120d() { - [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ + mkdir -p $DIR/$tdir + [ -z "`lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc touch $DIR/$tdir cancel_lru_locks mdc stat $DIR/$tdir > /dev/null - can1=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk1=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can1=`lctl get_param -n ldlm.services.ldlm_canceld.stats | awk '/ldlm_cancel/ {print $2}'` + blk1=`lctl get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}'` chmod a+x $DIR/$tdir - can2=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can2=`lctl get_param -n ldlm.services.ldlm_canceld.stats | awk '/ldlm_cancel/ {print $2}'` + blk2=`lctl get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}'` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120d "Early Lock Cancel: setattr test" test_120e() { - [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ + mkdir -p $DIR/$tdir + [ -z "`lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable - mkdir $DIR/$tdir + lru_resize_disable mdc + lru_resize_disable osc dd if=/dev/zero of=$DIR/$tdir/f1 count=1 cancel_lru_locks mdc cancel_lru_locks osc dd if=$DIR/$tdir/f1 of=/dev/null stat $DIR/$tdir $DIR/$tdir/f1 > /dev/null - can1=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk1=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can1=`lctl get_param -n ldlm.services.ldlm_canceld.stats | + awk '/ldlm_cancel/ {print $2}'` + blk1=`lctl get_param -n ldlm.services.ldlm_cbd.stats | + awk '/ldlm_bl_callback/ {print $2}'` unlink $DIR/$tdir/f1 - can2=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can2=`lctl get_param -n ldlm.services.ldlm_canceld.stats | + awk '/ldlm_cancel/ {print $2}'` + blk2=`lctl get_param -n ldlm.services.ldlm_cbd.stats | + awk '/ldlm_bl_callback/ {print $2}'` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120e "Early Lock Cancel: unlink test" test_120f() { - [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ + [ -z "`lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + mkdir -p $DIR/$tdir + lru_resize_disable mdc + lru_resize_disable osc mkdir -p $DIR/$tdir/d1 $DIR/$tdir/d2 dd if=/dev/zero of=$DIR/$tdir/d1/f1 count=1 dd if=/dev/zero of=$DIR/$tdir/d2/f2 count=1 @@ -4506,21 +4742,27 @@ test_120f() { dd if=$DIR/$tdir/d1/f1 of=/dev/null dd if=$DIR/$tdir/d2/f2 of=/dev/null stat $DIR/$tdir/d1 $DIR/$tdir/d2 $DIR/$tdir/d1/f1 $DIR/$tdir/d2/f2 > /dev/null - can1=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk1=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can1=`lctl get_param -n ldlm.services.ldlm_canceld.stats | + awk '/ldlm_cancel/ {print $2}'` + blk1=`lctl get_param -n ldlm.services.ldlm_cbd.stats | + awk '/ldlm_bl_callback/ {print $2}'` mv $DIR/$tdir/d1/f1 $DIR/$tdir/d2/f2 - can2=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can2=`lctl get_param -n ldlm.services.ldlm_canceld.stats | + awk '/ldlm_cancel/ {print $2}'` + blk2=`lctl get_param -n ldlm.services.ldlm_cbd.stats | + awk '/ldlm_bl_callback/ {print $2}'` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120f "Early Lock Cancel: rename test" test_120g() { - [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ + [ -z "`lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc count=10000 echo create $count files mkdir -p $DIR/$tdir @@ -4528,69 +4770,159 @@ test_120g() { cancel_lru_locks osc t0=`date +%s` - can0=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk0=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can0=`lctl get_param -n ldlm.services.ldlm_canceld.stats | + awk '/ldlm_cancel/ {print $2}'` + blk0=`lctl get_param -n ldlm.services.ldlm_cbd.stats | + awk '/ldlm_bl_callback/ {print $2}'` createmany -o $DIR/$tdir/f $count sync - can1=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk1=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can1=`lctl get_param -n ldlm.services.ldlm_canceld.stats | + awk '/ldlm_cancel/ {print $2}'` + blk1=`lctl get_param -n ldlm.services.ldlm_cbd.stats | + awk '/ldlm_bl_callback/ {print $2}'` t1=`date +%s` echo total: $((can1-can0)) cancels, $((blk1-blk0)) blockings echo rm $count files rm -r $DIR/$tdir sync - can2=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can2=`lctl get_param -n ldlm.services.ldlm_canceld.stats | + awk '/ldlm_cancel/ {print $2}'` + blk2=`lctl get_param -n ldlm.services.ldlm_cbd.stats | + awk '/ldlm_bl_callback/ {print $2}'` t2=`date +%s` echo total: $count removes in $((t2-t1)) echo total: $((can2-can1)) cancels, $((blk2-blk1)) blockings sleep 2 # wait for commitment of removal - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120g "Early Lock Cancel: performance test" test_121() { #bug #10589 rm -rf $DIR/$tfile - writes=`dd if=/dev/zero of=$DIR/$tfile count=1 2>&1 | awk 'BEGIN { FS="+" } /out/ {print $1}'` + writes=$(LANG=C dd if=/dev/zero of=$DIR/$tfile count=1 2>&1 | awk -F '+' '/out/ {print $1}') #define OBD_FAIL_LDLM_CANCEL_RACE 0x310 - sysctl -w lustre.fail_loc=0x310 + lctl set_param fail_loc=0x310 cancel_lru_locks osc > /dev/null - reads=`dd if=$DIR/$tfile of=/dev/null 2>&1 | awk 'BEGIN { FS="+" } /in/ {print $1}'` - sysctl -w lustre.fail_loc=0 + reads=$(LANG=C dd if=$DIR/$tfile of=/dev/null 2>&1 | awk -F '+' '/in/ {print $1}') + lctl set_param fail_loc=0 [ "$reads" -eq "$writes" ] || error "read" $reads "blocks, must be" $writes } run_test 121 "read cancel race =========" -test_124a() { - [ -z "`grep lru_resize $LPROC/mdc/*/connect_flags`" ] && \ - skip "no lru resize on server" && return 0 +test_123a() { # was test 123, statahead(bug 11401) + if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then + log "testing on UP system. Performance may be not as good as expected." + fi + + remount_client $MOUNT + mkdir -p $DIR/$tdir + error=0 + NUMFREE=`df -i -P $DIR | tail -n 1 | awk '{ print $4 }'` + [ $NUMFREE -gt 100000 ] && NUMFREE=100000 || NUMFREE=$((NUMFREE-1000)) + MULT=10 + for ((i=1, j=0; i<=$NUMFREE; j=$i, i=$((i * MULT)) )); do + createmany -o $DIR/$tdir/$tfile $j $((i - j)) + + lctl get_param -n llite.*.statahead_max | grep '[0-9]' + cancel_lru_locks mdc + cancel_lru_locks osc + stime=`date +%s` + ls -l $DIR/$tdir > /dev/null + etime=`date +%s` + delta_sa=$((etime - stime)) + log "ls $i files with statahead: $delta_sa sec" + lctl get_param -n llite.*.statahead_stats + + max=`lctl get_param -n llite.*.statahead_max | head -n 1` + lctl set_param -n llite.*.statahead_max 0 + lctl get_param llite.*.statahead_max + cancel_lru_locks mdc + cancel_lru_locks osc + stime=`date +%s` + ls -l $DIR/$tdir > /dev/null + etime=`date +%s` + delta=$((etime - stime)) + log "ls $i files without statahead: $delta sec" + + lctl set_param llite.*.statahead_max=$max + if [ $delta_sa -gt $(($delta + 2)) ]; then + log "ls $i files is slower with statahead!" + error=1 + fi + + [ $delta -gt 20 ] && break + [ $delta -gt 8 ] && MULT=$((50 / delta)) + [ "$SLOW" = "no" -a $delta -gt 3 ] && break + done + log "ls done" + + stime=`date +%s` + rm -r $DIR/$tdir + sync + etime=`date +%s` + delta=$((etime - stime)) + log "rm -r $DIR/$tdir/: $delta seconds" + log "rm done" + lctl get_param -n llite.*.statahead_stats + # wait for commitment of removal + sleep 2 + [ $error -ne 0 ] && error "statahead is slow!" + return 0 +} +run_test 123a "verify statahead work" + +test_123b () { # statahead(bug 15027) + mkdir -p $DIR/$tdir + createmany -o $DIR/$tdir/$tfile-%d 1000 + cancel_lru_locks mdc - lru_resize_enable - NSDIR=`find $LPROC/ldlm/namespaces | grep mdc | head -1` + cancel_lru_locks osc + +#define OBD_FAIL_MDC_GETATTR_ENQUEUE 0x803 + lctl set_param fail_loc=0x80000803 + ls -lR $DIR/$tdir > /dev/null + log "ls done" + lctl set_param fail_loc=0x0 + lctl get_param -n llite.*.statahead_stats + rm -r $DIR/$tdir + sync - # we want to test main pool functionality, that is cancel based on SLV - # this is why shrinkers are disabled - disable_pool_shrink "mds-$FSNAME" - disable_pool_shrink mdc +} +run_test 123b "not panic with network error in statahead enqueue (bug 15027)" +test_124a() { + [ -z "`lctl get_param -n mdc.*.connect_flags | grep lru_resize`" ] && \ + skip "no lru resize on server" && return 0 NR=2000 mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir" - LRU_SIZE=`cat $NSDIR/lru_size` - # use touch to produce $NR new locks log "create $NR files at $DIR/$tdir" - for ((i=0;i<$NR;i++)); do touch $DIR/$tdir/f$i; done + createmany -o $DIR/$tdir/f $NR || + error "failed to create $NR files in $DIR/$tdir" + + cancel_lru_locks mdc + ls -l $DIR/$tdir > /dev/null + + NSDIR="" + LRU_SIZE=0 + for VALUE in `lctl get_param ldlm.namespaces.*mdc-*.lru_size`; do + PARAM=`echo ${VALUE[0]} | cut -d "=" -f1` + LRU_SIZE=$(lctl get_param -n $PARAM) + if [ $LRU_SIZE -gt $(default_lru_size) ]; then + NSDIR=$(echo $PARAM | cut -d "." -f1-3) + log "using $(basename $NSDIR) namespace" + break + fi + done - LRU_SIZE_B=`cat $NSDIR/lru_size` - if test $LRU_SIZE -ge $LRU_SIZE_B; then - skip "No cached locks created!" - cat $NSDIR/pool/state + if [ -z "$NSDIR" -o $LRU_SIZE -lt $(default_lru_size) ]; then + skip "Not enough cached locks created!" return 0 fi - LRU_SIZE_B=$((LRU_SIZE_B-LRU_SIZE)) - log "created $LRU_SIZE_B lock(s)" + log "created $LRU_SIZE lock(s)" # we want to sleep 30s to not make test too long SLEEP=30 @@ -4600,91 +4932,209 @@ test_124a() { MAX_HRS=10 # get the pool limit - LIMIT=`cat $NSDIR/pool/limit` + LIMIT=`lctl get_param -n $NSDIR.pool.limit` # calculate lock volume factor taking into account data set size and the # rule that number of locks will be getting smaller durring sleep interval # and we need to additionally enforce LVF to take this into account. # Use $LRU_SIZE_B here to take into account real number of locks created # in the case of CMD, LRU_SIZE_B != $NR in most of cases - LVF=$(($LRU_SIZE_B * $MAX_HRS * 60 * 60)) - log "make client drop locks $LVF times faster so that ${SLEEP}s is enough to cancel $LRU_SIZE_B lock(s)" - OLD_LVF=`cat $NSDIR/pool/lock_volume_factor` - echo "$LVF" > $NSDIR/pool/lock_volume_factor + LVF=$(($MAX_HRS * 60 * 60 * $LIMIT / $SLEEP)) + LRU_SIZE_B=$LRU_SIZE + log "make client drop locks $LVF times faster so that ${SLEEP}s is enough to cancel $LRU_SIZE lock(s)" + OLD_LVF=`lctl get_param -n $NSDIR.pool.lock_volume_factor` + lctl set_param -n $NSDIR.pool.lock_volume_factor $LVF log "sleep for $((SLEEP+SLEEP_ADD))s" sleep $((SLEEP+SLEEP_ADD)) - echo "$OLD_LVF" > $NSDIR/pool/lock_volume_factor - LRU_SIZE_A=`cat $NSDIR/lru_size` + lctl set_param -n $NSDIR.pool.lock_volume_factor $OLD_LVF + LRU_SIZE_A=`lctl get_param -n $NSDIR.lru_size` - [ $LRU_SIZE_B -ge $LRU_SIZE_A ] || { + [ $LRU_SIZE_B -gt $LRU_SIZE_A ] || { error "No locks dropped in "$((SLEEP+SLEEP_ADD))"s. LRU size: $LRU_SIZE_A" - lru_resize_enable unlinkmany $DIR/$tdir/f $NR return } log "Dropped "$((LRU_SIZE_B-LRU_SIZE_A))" locks in "$((SLEEP+SLEEP_ADD))"s" - lru_resize_enable log "unlink $NR files at $DIR/$tdir" unlinkmany $DIR/$tdir/f $NR } run_test 124a "lru resize =======================================" test_124b() { - [ -z "`grep lru_resize $LPROC/mdc/*/connect_flags`" ] && \ + [ -z "`lctl get_param -n mdc.*.connect_flags | grep lru_resize`" ] && \ skip "no lru resize on server" && return 0 - cleanup -f || error "failed to unmount" - MOUNTOPT="$MOUNTOPT,nolruresize" - setup - NR=3000 - mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir" + # even for cmd no matter what metadata namespace to use for getting + # the limit, we use appropriate. + LIMIT=`lctl get_param -n ldlm.namespaces.*mdc*.pool.limit` - createmany -o $DIR/$tdir/f $NR - log "doing ls -la $DIR/$tdir 3 times (lru resize disabled)" + NR=$(($(default_lru_size)*20)) + if [ $NR -gt $LIMIT ]; then + NR=$LIMIT + fi + lru_resize_disable mdc + mkdir -p $DIR/$tdir/disable_lru_resize || + error "failed to create $DIR/$tdir/disable_lru_resize" + + createmany -o $DIR/$tdir/disable_lru_resize/f $NR + log "doing ls -la $DIR/$tdir/disable_lru_resize 3 times" + cancel_lru_locks mdc stime=`date +%s` - ls -la $DIR/$tdir > /dev/null - ls -la $DIR/$tdir > /dev/null - ls -la $DIR/$tdir > /dev/null + PID="" + ls -la $DIR/$tdir/disable_lru_resize > /dev/null & + PID="$PID $!" + sleep 2 + ls -la $DIR/$tdir/disable_lru_resize > /dev/null & + PID="$PID $!" + sleep 2 + ls -la $DIR/$tdir/disable_lru_resize > /dev/null & + PID="$PID $!" + wait $PID etime=`date +%s` nolruresize_delta=$((etime-stime)) log "ls -la time: $nolruresize_delta seconds" + log "lru_size = $(lctl get_param -n ldlm.namespaces.*mdc*.lru_size)" + unlinkmany $DIR/$tdir/disable_lru_resize/f $NR - cleanup -f || error "failed to unmount" - MOUNTOPT=`echo $MOUNTOPT | sed "s/nolruresize/lruresize/"` - setup + lru_resize_enable mdc + mkdir -p $DIR/$tdir/enable_lru_resize || + error "failed to create $DIR/$tdir/enable_lru_resize" - createmany -o $DIR/$tdir/f $NR - log "doing ls -la $DIR/$tdir 3 times (lru resize enabled)" + createmany -o $DIR/$tdir/enable_lru_resize/f $NR + log "doing ls -la $DIR/$tdir/enable_lru_resize 3 times" + cancel_lru_locks mdc stime=`date +%s` - ls -la $DIR/$tdir > /dev/null - ls -la $DIR/$tdir > /dev/null - ls -la $DIR/$tdir > /dev/null + PID="" + ls -la $DIR/$tdir/enable_lru_resize > /dev/null & + PID="$PID $!" + sleep 2 + ls -la $DIR/$tdir/enable_lru_resize > /dev/null & + PID="$PID $!" + sleep 2 + ls -la $DIR/$tdir/enable_lru_resize > /dev/null & + PID="$PID $!" + wait $PID etime=`date +%s` lruresize_delta=$((etime-stime)) log "ls -la time: $lruresize_delta seconds" + log "lru_size = $(lctl get_param -n ldlm.namespaces.*mdc*.lru_size)" - if test $lruresize_delta -gt $nolruresize_delta; then - log "ls -la is $((lruresize_delta - $nolruresize_delta))s slower with lru resize enabled" - elif test $nolruresize_delta -gt $lruresize_delta; then - log "ls -la is $((nolruresize_delta - $lruresize_delta))s faster with lru resize enabled" + if [ $lruresize_delta -gt $nolruresize_delta ]; then + log "ls -la is $(((lruresize_delta - $nolruresize_delta) * 100 / $nolruresize_delta))% slower with lru resize enabled" + elif [ $nolruresize_delta -gt $lruresize_delta ]; then + log "ls -la is $(((nolruresize_delta - $lruresize_delta) * 100 / $nolruresize_delta))% faster with lru resize enabled" else log "lru resize performs the same with no lru resize" fi - - unlinkmany $DIR/$tdir/f $NR + unlinkmany $DIR/$tdir/enable_lru_resize/f $NR } run_test 124b "lru resize (performance test) =======================" test_125() { # 13358 - [ -z "$(grep acl $LPROC/mdc/*-mdc-*/connect_flags)" ] && skip "must have acl enabled" && return + [ -z "$(lctl get_param -n llite.*.client_type | grep local)" ] && skip "must run as local client" && return + [ -z "$(lctl get_param -n mdc.*-mdc-*.connect_flags | grep acl)" ] && skip "must have acl enabled" && return mkdir -p $DIR/d125 || error "mkdir failed" - $SETSTRIPE $DIR/d125 65536 -1 -1 || error "setstripe failed" + $SETSTRIPE $DIR/d125 -s 65536 -c -1 || error "setstripe failed" setfacl -R -m u:bin:rwx $DIR/d125 || error "setfacl $DIR/d125 failed" ls -ld $DIR/d125 || error "cannot access $DIR/d125" } run_test 125 "don't return EPROTO when a dir has a non-default striping and ACLs" +test_126() { # bug 12829/13455 + [ -z "$(lctl get_param -n llite.*.client_type | grep local)" ] && skip "must run as local client" && return + [ "$UID" != 0 ] && echo "skipping $TESTNAME (must run as root)" && return + $RUNAS -u 0 -g 1 touch $DIR/$tfile || error "touch failed" + gid=`ls -n $DIR/$tfile | awk '{print $4}'` + rm -f $DIR/$tfile + [ $gid -eq "1" ] || error "gid is set to" $gid "instead of 1" +} +run_test 126 "check that the fsgid provided by the client is taken into account" + +test_127() { # bug 15521 + $LSTRIPE -i 0 -c 1 $DIR/$tfile + $LCTL set_param osc.*.stats=0 + FSIZE=$((2048 * 1024)) + dd if=/dev/zero of=$DIR/$tfile bs=$FSIZE count=1 + cancel_lru_locks osc + dd if=$DIR/$tfile of=/dev/null bs=$FSIZE + + $LCTL get_param osc.*0000-osc-*.stats | grep samples > $DIR/${tfile}.tmp + while read NAME COUNT SAMP UNIT MIN MAX SUM SUMSQ; do + eval $NAME=$COUNT + echo "got $COUNT $NAME" + + case $NAME in + ost_read|ost_write) + [ $MIN -lt 4096 ] && error "min is too small: $MIN" + [ $MIN -gt $FSIZE ] && error "min is too big: $MIN" + [ $MAX -lt 4096 ] && error "max is too small: $MAX" + [ $MAX -gt $FSIZE ] && error "max is too big: $MAX" + [ $SUM -ne $FSIZE ] && error "sum is wrong: $SUM" + [ $SUMSQ -lt $(((FSIZE /4096) * (4096 * 4096))) ] && + error "sumsquare is too small: $SUMSQ" + [ $SUMSQ -gt $((FSIZE * FSIZE)) ] && + error "sumsquare is too big: $SUMSQ" + ;; + *) ;; + esac + done < $DIR/${tfile}.tmp + + #check that we actually got some stats + [ "$ost_read" ] || error "no read done" + [ "$ost_write" ] || error "no write done" +} +run_test 127 "verify the client stats are sane" + +test_128() { # bug 15212 + touch $DIR/$tfile + $LFS 2>&1 <<-EOF | tee $TMP/$tfile.log + find $DIR/$tfile + find $DIR/$tfile + EOF + + result=$(grep error $TMP/$tfile.log) + rm -f $DIR/$tfile + [ -z "$result" ] || error "consecutive find's under interactive lfs failed" +} +run_test 128 "interactive lfs for 2 consecutive find's" + +test_129() { + [ "$FSTYPE" != "ldiskfs" ] && skip "not needed for FSTYPE=$FSTYPE" && return 0 + + DEV=$(basename $(do_facet mds lctl get_param -n osd.*MDT*.mntdev)) + [ -z "$DEV" ] && error "can't access mds mntdev" + EFBIG=27 + LDPROC=/proc/fs/ldiskfs/$DEV/max_dir_size + MAX=16384 + + do_facet mds "echo $MAX > $LDPROC" + + mkdir -p $DIR/$tdir + + I=0 + J=0 + while [ ! $I -gt $MAX ]; do + multiop $DIR/$tdir/$J Oc + rc=$? + if [ $rc -eq $EFBIG ]; then + do_facet mds "echo 0 >$LDPROC" + echo "return code $rc received as expected" + return 0 + elif [ $rc -ne 0 ]; then + do_facet mds "echo 0 >$LDPROC" + error_exit "return code $rc received instead of expected $EFBIG" + fi + J=$((J+1)) + I=$(stat -c%s "$DIR/$tdir") + done + + error "exceeded dir size limit: $I bytes" + do_facet mds "echo 0 >$LDPROC" +} +run_test 129 "test directory size limit ========================" + + TMPDIR=$OLDTMPDIR TMP=$OLDTMP HOME=$OLDHOME @@ -4692,7 +5142,7 @@ HOME=$OLDHOME log "cleanup: ======================================================" check_and_cleanup_lustre if [ "$I_MOUNTED" != "yes" ]; then - sysctl -w lnet.debug="$OLDDEBUG" 2> /dev/null || true + lctl set_param debug="$OLDDEBUG" 2> /dev/null || true fi echo '=========================== finished ==============================='