X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftests%2Fsanity.sh;h=4ad3f1435f89cb7b91a25d499761565092de6b74;hb=b924164398e939986e20506ab5d004e64f0b004e;hp=caa0a7bfba3941a95f7445b9db3f3788bd68a222;hpb=7925d28ec175f30b09818d425a0886373e79734e;p=fs%2Flustre-release.git diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index caa0a7b..4ad3f14 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -7,14 +7,12 @@ set -e ONLY=${ONLY:-"$*"} -# bug number for skipped test: 4900 4900 2108 9789 3637 9789 3561 13310 10764 -ALWAYS_EXCEPT=" 27o 27q 42a 42b 42c 42d 45 74b 75 $SANITY_EXCEPT" +# bug number for skipped test: 13297 2108 9789 3637 9789 3561 12622 12653 12653 5188 10764 16260 +ALWAYS_EXCEPT=" 27u 42a 42b 42c 42d 45 51d 65a 65e 68 75 119d $SANITY_EXCEPT" # bug number for skipped test: 2108 9789 3637 9789 3561 5188/5749 1443 #ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"27m 42a 42b 42c 42d 45 68 76"} # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! -[ "$SLOW" = "no" ] && EXCEPT_SLOW="24o 27m 36f 36g 51b 51c 60c 63 64b 68 71 73 77f 78 101 103 115 120g 124b" - # Tests that fail on uml CPU=`awk '/model/ {print $4}' /proc/cpuinfo` # buffer i/o errs sock spc runas @@ -29,8 +27,8 @@ case `uname -r` in *) error "unsupported kernel" ;; esac -SRCDIR=`dirname $0` -export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH:/sbin +SRCDIR=$(cd $(dirname $0); echo $PWD) +export PATH=$PATH:/sbin TMP=${TMP:-/tmp} @@ -47,7 +45,7 @@ LCTL=${LCTL:-lctl} MCREATE=${MCREATE:-mcreate} OPENFILE=${OPENFILE:-openfile} OPENUNLINK=${OPENUNLINK:-openunlink} -RANDOM_READS=${RANDOM_READS:-"random-reads"} +READS=${READS:-"reads"} TOEXCL=${TOEXCL:-toexcl} TRUNCATE=${TRUNCATE:-truncate} MUNLINK=${MUNLINK:-munlink} @@ -63,21 +61,6 @@ STRIPES_PER_OBJ=-1 CHECK_GRANT=${CHECK_GRANT:-"yes"} GRANT_CHECK_LIST=${GRANT_CHECK_LIST:-""} -if [ $UID -ne 0 ]; then - echo "Warning: running as non-root uid $UID" - RUNAS_ID="$UID" - RUNAS="" -else - RUNAS_ID=${RUNAS_ID:-500} - RUNAS=${RUNAS:-"runas -u $RUNAS_ID"} - - # $RUNAS_ID may get set incorrectly somewhere else - if [ $RUNAS_ID -eq 0 ]; then - echo "Error: \$RUNAS_ID set to 0, but \$UID is also 0!" - exit 1 - fi -fi - export NAME=${NAME:-local} SAVE_PWD=$PWD @@ -85,21 +68,19 @@ SAVE_PWD=$PWD CLEANUP=${CLEANUP:-:} SETUP=${SETUP:-:} TRACE=${TRACE:-""} -LUSTRE=${LUSTRE:-`dirname $0`/..} +LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} . $LUSTRE/tests/test-framework.sh init_test_env $@ -. ${CONFIG:=$LUSTRE/tests/cfg/local.sh} +. ${CONFIG:=$LUSTRE/tests/cfg/${NAME}.sh} -if $GSS_KRB5; then - $RUNAS krb5_login.sh || exit 1 - $RUNAS -u $(($RUNAS_ID + 1)) krb5_login.sh || exit 1 -fi +[ "$SLOW" = "no" ] && EXCEPT_SLOW="24o 27m 36f 36g 51b 51c 60c 63 64b 68 71 73 77f 78 101 103 115 120g 124b" SANITYLOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh).log} FAIL_ON_ERROR=false cleanup() { echo -n "cln.." + pgrep ll_sa > /dev/null && { echo "There are ll_sa thread not exit!"; exit 20; } cleanupall ${FORCE} $* || { echo "FAILed to clean up"; exit 20; } } setup() { @@ -110,10 +91,9 @@ setup() { } check_kernel_version() { - VERSION_FILE=$LPROC/version + VERSION_FILE=version WANT_VER=$1 - [ ! -f $VERSION_FILE ] && echo "can't find kernel version" && return 1 - GOT_VER=$(awk '/kernel:/ {print $2}' $VERSION_FILE) + GOT_VER=$(lctl get_param -n $VERSION_FILE | awk '/kernel:/ {print $2}') [ $GOT_VER == "patchless" ] && return 0 [ $GOT_VER -ge $WANT_VER ] && return 0 log "test needs at least kernel version $WANT_VER, running $GOT_VER" @@ -130,24 +110,27 @@ fi check_and_setup_lustre DIR=${DIR:-$MOUNT} -[ -z "`echo $DIR | grep $MOUNT`" ] && echo "$DIR not in $MOUNT" && exit 99 +assert_DIR -LOVNAME=`cat $LPROC/llite/*/lov/common_name | tail -n 1` -OSTCOUNT=`cat $LPROC/lov/$LOVNAME/numobd` -STRIPECOUNT=`cat $LPROC/lov/$LOVNAME/stripecount` -STRIPESIZE=`cat $LPROC/lov/$LOVNAME/stripesize` -ORIGFREE=`cat $LPROC/lov/$LOVNAME/kbytesavail` +LOVNAME=`lctl get_param -n llite.*.lov.common_name | tail -n 1` +OSTCOUNT=`lctl get_param -n lov.$LOVNAME.numobd` +STRIPECOUNT=`lctl get_param -n lov.$LOVNAME.stripecount` +STRIPESIZE=`lctl get_param -n lov.$LOVNAME.stripesize` +ORIGFREE=`lctl get_param -n lov.$LOVNAME.kbytesavail` MAXFREE=${MAXFREE:-$((200000 * $OSTCOUNT))} [ -f $DIR/d52a/foo ] && chattr -a $DIR/d52a/foo [ -f $DIR/d52b/foo ] && chattr -i $DIR/d52b/foo -rm -rf $DIR/[Rdfs][1-9]* +rm -rf $DIR/[Rdfs][0-9]* + +# $RUNAS_ID may get set incorrectly somewhere else +[ $UID -eq 0 -a $RUNAS_ID -eq 0 ] && error "\$RUNAS_ID set to 0, but \$UID is also 0!" check_runas_id $RUNAS_ID $RUNAS build_test_filter -if [ "${ONLY}" = "MOUNT" ] ; then +if [ "${ONLY}" = "MOUNT" ] ; then echo "Lustre is up, please go on" exit fi @@ -160,6 +143,8 @@ echo # add a newline after mke2fs. umask 077 +OLDDEBUG="`lctl get_param -n debug 2> /dev/null`" +lctl set_param debug=-1 2> /dev/null || true test_0() { touch $DIR/$tfile $CHECKSTAT -t file $DIR/$tfile || error @@ -174,6 +159,12 @@ test_0b() { } run_test 0b "chmod 0755 $DIR =============================" +test_0c() { + $LCTL get_param mdc.*.import | grep "state: FULL" || error "import not FULL" + $LCTL get_param mdc.*.import | grep "target: $FSNAME-MDT" || error "bad target" +} +run_test 0c "check import proc =============================" + test_1a() { mkdir $DIR/d1 mkdir $DIR/d1/d2 @@ -473,6 +464,14 @@ test_17f() { } run_test 17f "symlinks: long and very long symlink name ========================" +test_17g() { + mkdir -p $DIR/$tdir + LONGSYMLINK="$(dd if=/dev/zero bs=4095 count=1 | tr '\0' 'x')" + ln -s $LONGSYMLINK $DIR/$tdir/$tfile + ls -l $DIR/$tdir +} +run_test 17g "symlinks: really long symlink name ===============================" + test_18() { touch $DIR/f ls $DIR || error @@ -530,6 +529,7 @@ run_test 21 "write to dangling link ============================" test_22() { WDIR=$DIR/$tdir + mkdir -p $WDIR chown $RUNAS_ID $WDIR (cd $WDIR || error "cd $WDIR failed"; $RUNAS tar cf - /etc/hosts /etc/sysconfig/network | \ @@ -585,7 +585,7 @@ test_24d() { run_test 24d "mkdir .../R4/{f,g}; rename .../R4/f .../R4/g =====" test_24e() { - echo '-- cross directory renames --' + echo '-- cross directory renames --' mkdir $DIR/R5{a,b} touch $DIR/R5a/f mv $DIR/R5a/f $DIR/R5b/g @@ -640,7 +640,7 @@ test_24j() { $CHECKSTAT -a $DIR/R10/f || error $CHECKSTAT -a $DIR/R10/g || error } -run_test 24j "source does not exist ============================" +run_test 24j "source does not exist ============================" test_24k() { mkdir $DIR/R11a $DIR/R11a/d @@ -700,9 +700,8 @@ run_test 24p "mkdir .../R12{a,b}; rename .../R12a .../R12b" test_24q() { mkdir $DIR/R13{a,b} DIRINO=`ls -lid $DIR/R13a | awk '{ print $1 }'` - multiop $DIR/R13b D_c & + multiop_bg_pause $DIR/R13b D_c || return 1 MULTIPID=$! - usleep 500 mrename $DIR/R13a $DIR/R13b $CHECKSTAT -a $DIR/R13a || error @@ -797,6 +796,7 @@ run_test 26e "unlink multiple component recursive symlink ======" # recursive symlinks (bug 7022) test_26f() { + mkdir -p $DIR/$tdir mkdir $DIR/$tdir/$tfile || error "mkdir $DIR/$tdir/$tfile failed" cd $DIR/$tdir/$tfile || error "cd $DIR/$tdir/$tfile failed" mkdir -p lndir/bar1 || error "mkdir lndir/bar1 failed" @@ -837,7 +837,7 @@ run_test 27c "create two stripe file f01 =======================" test_27d() { mkdir -p $DIR/d27 - $SETSTRIPE $DIR/d27/fdef 0 -1 0 || error "lstripe failed" + $SETSTRIPE -c0 -i-1 -s0 $DIR/d27/fdef || error "lstripe failed" $CHECKSTAT -t file $DIR/d27/fdef || error "checkstat failed" dd if=/dev/zero of=$DIR/d27/fdef bs=4k count=4 || error } @@ -933,25 +933,25 @@ reset_enospc() { [ "$1" ] && FAIL_LOC=$1 || FAIL_LOC=0 mkdir -p $DIR/d27/nospc rmdir $DIR/d27/nospc - sysctl -w lustre.fail_loc=$FAIL_LOC + do_nodes $(comma_list $(osts_nodes)) lctl set_param fail_loc=$FAIL_LOC } exhaust_precreations() { OSTIDX=$1 - OST=$(grep ${OSTIDX}": " $LPROC/lov/${LOVNAME}/target_obd | \ + + OST=$(lfs osts | grep ${OSTIDX}": " | \ awk '{print $2}' | sed -e 's/_UUID$//') # on the mdt's osc - OSC=$(ls $LPROC/osc | grep "${OST}-osc-MDT0000") - last_id=$(cat $LPROC/osc/${OSC}/prealloc_last_id) - next_id=$(cat $LPROC/osc/${OSC}/prealloc_next_id) + last_id=$(do_facet $SINGLEMDS lctl get_param -n osc.*${OST}-osc-MDT0000.prealloc_last_id) + next_id=$(do_facet $SINGLEMDS lctl get_param -n osc.*${OST}-osc-MDT0000.prealloc_next_id) - mkdir -p $DIR/d27/${OST} + mkdir -p $DIR/d27/${OST} $SETSTRIPE $DIR/d27/${OST} -i $OSTIDX -c 1 #define OBD_FAIL_OST_ENOSPC 0x215 - sysctl -w lustre.fail_loc=0x215 + do_facet ost$((OSTIDX + 1)) lctl set_param fail_loc=0x215 echo "Creating to objid $last_id on ost $OST..." createmany -o $DIR/d27/${OST}/f $next_id $((last_id - next_id + 2)) - grep '[0-9]' $LPROC/osc/${OSC}/prealloc* + do_facet $SINGLEMDS lctl get_param -n osc.*${OST}-osc-MDT0000.prealloc* | grep '[0-9]' reset_enospc $2 } @@ -965,7 +965,8 @@ exhaust_all_precreations() { test_27n() { [ "$OSTCOUNT" -lt "2" ] && skip "too few OSTs" && return - remote_mds && skip "remote MDS" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return reset_enospc rm -f $DIR/d27/f27n @@ -979,7 +980,8 @@ run_test 27n "create file with some full OSTs ==================" test_27o() { [ "$OSTCOUNT" -lt "2" ] && skip "too few OSTs" && return - remote_mds && skip "remote MDS" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return reset_enospc rm -f $DIR/d27/f27o @@ -995,18 +997,19 @@ run_test 27o "create file with all full OSTs (should error) ====" test_27p() { [ "$OSTCOUNT" -lt "2" ] && skip "too few OSTs" && return - remote_mds && skip "remote MDS" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return reset_enospc rm -f $DIR/d27/f27p - $MCREATE $DIR/d27/f27p || error - $TRUNCATE $DIR/d27/f27p 80000000 || error - $CHECKSTAT -s 80000000 $DIR/d27/f27p || error + $MCREATE $DIR/d27/f27p || error "mcreate failed" + $TRUNCATE $DIR/d27/f27p 80000000 || error "truncate failed" + $CHECKSTAT -s 80000000 $DIR/d27/f27p || error "checkstat failed" exhaust_precreations 0 0x80000215 - echo foo >> $DIR/d27/f27p || error - $CHECKSTAT -s 80000004 $DIR/d27/f27p || error + echo foo >> $DIR/d27/f27p || error "append failed" + $CHECKSTAT -s 80000004 $DIR/d27/f27p || error "checkstat failed" reset_enospc } @@ -1014,7 +1017,8 @@ run_test 27p "append to a truncated file with some full OSTs ===" test_27q() { [ "$OSTCOUNT" -lt "2" ] && skip "too few OSTs" && return - remote_mds && skip "remote MDS" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return reset_enospc rm -f $DIR/d27/f27q @@ -1034,7 +1038,8 @@ run_test 27q "append to truncated file with all OSTs full (should error) ===" test_27r() { [ "$OSTCOUNT" -lt "2" ] && skip "too few OSTs" && return - remote_mds && skip "remote MDS" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return reset_enospc rm -f $DIR/d27/f27r @@ -1064,18 +1069,18 @@ test_27t() { # bug 10864 run_test 27t "check that utils parse path correctly" test_27u() { # bug 4900 - [ "$OSTCOUNT" -lt "2" ] && skip "too few OSTs" && return - remote_mds && skip "remote MDS" && return + [ "$OSTCOUNT" -lt "2" ] && skip "too few OSTs" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return #define OBD_FAIL_MDS_OSC_PRECREATE 0x13d - sysctl -w lustre.fail_loc=0x13d + do_facet $SINGLEMDS lctl set_param fail_loc=0x13d mkdir -p $DIR/d27u createmany -o $DIR/d27u/t- 1000 - sysctl -w lustre.fail_loc=0 + do_facet $SINGLEMDS lctl set_param fail_loc=0 TLOG=$DIR/$tfile.getstripe - $GETSTRIPE $DIR/d27u > $TLOG + $GETSTRIPE $DIR/d27u > $TLOG OBJS=`awk -vobj=0 '($1 == 0) { obj += 1 } END { print obj;}' $TLOG` unlinkmany $DIR/d27u/t- 1000 [ $OBJS -gt 0 ] && \ @@ -1085,7 +1090,8 @@ run_test 27u "skip object creation on OSC w/o objects ==========" test_27v() { # bug 4900 [ "$OSTCOUNT" -lt "2" ] && skip "too few OSTs" && return - remote_mds && skip "remote MDS" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return exhaust_all_precreations @@ -1094,15 +1100,15 @@ test_27v() { # bug 4900 touch $DIR/$tdir/$tfile #define OBD_FAIL_TGT_DELAY_PRECREATE 0x705 - sysctl -w lustre.fail_loc=0x705 + lctl set_param fail_loc=0x705 START=`date +%s` for F in `seq 1 32`; do touch $DIR/$tdir/$tfile.$F done - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 FINISH=`date +%s` - TIMEOUT=`sysctl -n lustre.timeout` + TIMEOUT=`lctl get_param -n timeout` [ $((FINISH - START)) -ge $((TIMEOUT / 2)) ] && \ error "$FINISH - $START >= $TIMEOUT / 2" @@ -1140,17 +1146,34 @@ test_29() { touch $DIR/d29/foo log 'first d29' ls -l $DIR/d29 - MDCDIR=${MDCDIR:-$LPROC/ldlm/namespaces/*-mdc-*} - LOCKCOUNTORIG=`cat $MDCDIR/lock_count` - LOCKUNUSEDCOUNTORIG=`cat $MDCDIR/lock_unused_count` - [ -z $"LOCKCOUNTORIG" ] && echo "No mdc lock count" && return 1 + + declare -i LOCKCOUNTORIG=0 + for lock_count in $(lctl get_param -n ldlm.namespaces.*mdc*.lock_count); do + let LOCKCOUNTORIG=$LOCKCOUNTORIG+$lock_count + done + [ $LOCKCOUNTORIG -eq 0 ] && echo "No mdc lock count" && return 1 + + declare -i LOCKUNUSEDCOUNTORIG=0 + for unused_count in $(lctl get_param -n ldlm.namespaces.*mdc*.lock_unused_count); do + let LOCKUNUSEDCOUNTORIG=$LOCKUNUSEDCOUNTORIG+$unused_count + done + log 'second d29' ls -l $DIR/d29 log 'done' - LOCKCOUNTCURRENT=`cat $MDCDIR/lock_count` - LOCKUNUSEDCOUNTCURRENT=`cat $MDCDIR/lock_unused_count` + + declare -i LOCKCOUNTCURRENT=0 + for lock_count in $(lctl get_param -n ldlm.namespaces.*mdc*.lock_count); do + let LOCKCOUNTCURRENT=$LOCKCOUNTCURRENT+$lock_count + done + + declare -i LOCKUNUSEDCOUNTCURRENT=0 + for unused_count in $(lctl get_param -n ldlm.namespaces.*mdc*.lock_unused_count); do + let LOCKUNUSEDCOUNTCURRENT=$LOCKUNUSEDCOUNTCURRENT+$unused_count + done + if [ "$LOCKCOUNTCURRENT" -gt "$LOCKCOUNTORIG" ]; then - echo > $LPROC/ldlm/dump_namespaces + lctl set_param -n ldlm.dump_namespaces "" error "CURRENT: $LOCKCOUNTCURRENT > $LOCKCOUNTORIG" $LCTL dk | sort -k4 -t: > $TMP/test_29.dk log "dumped log to $TMP/test_29.dk (bug 5793)" @@ -1189,10 +1212,9 @@ run_test 31b "unlink file with multiple links while open =======" test_31c() { touch $DIR/f31 || error ln $DIR/f31 $DIR/f31c || error - multiop $DIR/f31 O_uc & + multiop_bg_pause $DIR/f31 O_uc || return 1 MULTIPID=$! multiop $DIR/f31c Ouc - usleep 500 kill -USR1 $MULTIPID wait $MULTIPID } @@ -1217,22 +1239,18 @@ test_31f() { # bug 4554 cp /etc/hosts $DIR/d31f ls -l $DIR/d31f $GETSTRIPE $DIR/d31f/hosts - multiop $DIR/d31f D_c & + multiop_bg_pause $DIR/d31f D_c || return 1 MULTIPID=$! - sleep 1 - rm -rv $DIR/d31f || error "first of $DIR/d31f" mkdir $DIR/d31f $SETSTRIPE $DIR/d31f -s 1048576 -c 1 cp /etc/hosts $DIR/d31f ls -l $DIR/d31f - $DIR/d31f/hosts - multiop $DIR/d31f D_c & + $DIR/d31f/hosts + multiop_bg_pause $DIR/d31f D_c || return 1 MULTIPID2=$! - sleep 6 - kill -USR1 $MULTIPID || error "first opendir $MULTIPID not running" wait $MULTIPID || error "first opendir $MULTIPID failed" @@ -1328,27 +1346,27 @@ run_test 31m "link to file: the same, non-existing, dir===============" test_32a() { echo "== more mountpoints and symlinks =================" [ -e $DIR/d32a ] && rm -fr $DIR/d32a - mkdir -p $DIR/d32a/ext2-mountpoint + mkdir -p $DIR/d32a/ext2-mountpoint mount -t ext2 -o loop $EXT2_DEV $DIR/d32a/ext2-mountpoint || error - $CHECKSTAT -t dir $DIR/d32a/ext2-mountpoint/.. || error + $CHECKSTAT -t dir $DIR/d32a/ext2-mountpoint/.. || error $UMOUNT $DIR/d32a/ext2-mountpoint || error } run_test 32a "stat d32a/ext2-mountpoint/.. =====================" test_32b() { [ -e $DIR/d32b ] && rm -fr $DIR/d32b - mkdir -p $DIR/d32b/ext2-mountpoint + mkdir -p $DIR/d32b/ext2-mountpoint mount -t ext2 -o loop $EXT2_DEV $DIR/d32b/ext2-mountpoint || error ls -al $DIR/d32b/ext2-mountpoint/.. || error $UMOUNT $DIR/d32b/ext2-mountpoint || error } run_test 32b "open d32b/ext2-mountpoint/.. =====================" - + test_32c() { [ -e $DIR/d32c ] && rm -fr $DIR/d32c - mkdir -p $DIR/d32c/ext2-mountpoint + mkdir -p $DIR/d32c/ext2-mountpoint mount -t ext2 -o loop $EXT2_DEV $DIR/d32c/ext2-mountpoint || error - mkdir -p $DIR/d32c/d2/test_dir + mkdir -p $DIR/d32c/d2/test_dir $CHECKSTAT -t dir $DIR/d32c/ext2-mountpoint/../d2/test_dir || error $UMOUNT $DIR/d32c/ext2-mountpoint || error } @@ -1356,9 +1374,9 @@ run_test 32c "stat d32c/ext2-mountpoint/../d2/test_dir =========" test_32d() { [ -e $DIR/d32d ] && rm -fr $DIR/d32d - mkdir -p $DIR/d32d/ext2-mountpoint + mkdir -p $DIR/d32d/ext2-mountpoint mount -t ext2 -o loop $EXT2_DEV $DIR/d32d/ext2-mountpoint || error - mkdir -p $DIR/d32d/d2/test_dir + mkdir -p $DIR/d32d/d2/test_dir ls -al $DIR/d32d/ext2-mountpoint/../d2/test_dir || error $UMOUNT $DIR/d32d/ext2-mountpoint || error } @@ -1366,10 +1384,10 @@ run_test 32d "open d32d/ext2-mountpoint/../d2/test_dir =========" test_32e() { [ -e $DIR/d32e ] && rm -fr $DIR/d32e - mkdir -p $DIR/d32e/tmp - TMP_DIR=$DIR/d32e/tmp - ln -s $DIR/d32e $TMP_DIR/symlink11 - ln -s $TMP_DIR/symlink11 $TMP_DIR/../symlink01 + mkdir -p $DIR/d32e/tmp + TMP_DIR=$DIR/d32e/tmp + ln -s $DIR/d32e $TMP_DIR/symlink11 + ln -s $TMP_DIR/symlink11 $TMP_DIR/../symlink01 $CHECKSTAT -t link $DIR/d32e/tmp/symlink11 || error $CHECKSTAT -t link $DIR/d32e/symlink01 || error } @@ -1377,20 +1395,20 @@ run_test 32e "stat d32e/symlink->tmp/symlink->lustre-subdir ====" test_32f() { [ -e $DIR/d32f ] && rm -fr $DIR/d32f - mkdir -p $DIR/d32f/tmp - TMP_DIR=$DIR/d32f/tmp - ln -s $DIR/d32f $TMP_DIR/symlink11 - ln -s $TMP_DIR/symlink11 $TMP_DIR/../symlink01 + mkdir -p $DIR/d32f/tmp + TMP_DIR=$DIR/d32f/tmp + ln -s $DIR/d32f $TMP_DIR/symlink11 + ln -s $TMP_DIR/symlink11 $TMP_DIR/../symlink01 ls $DIR/d32f/tmp/symlink11 || error ls $DIR/d32f/symlink01 || error } run_test 32f "open d32f/symlink->tmp/symlink->lustre-subdir ====" test_32g() { - TMP_DIR=$DIR/$tdir/tmp + TMP_DIR=$DIR/$tdir/tmp mkdir -p $TMP_DIR $DIR/${tdir}2 - ln -s $DIR/${tdir}2 $TMP_DIR/symlink12 - ln -s $TMP_DIR/symlink12 $TMP_DIR/../symlink02 + ln -s $DIR/${tdir}2 $TMP_DIR/symlink12 + ln -s $TMP_DIR/symlink12 $TMP_DIR/../symlink02 $CHECKSTAT -t link $TMP_DIR/symlink12 || error $CHECKSTAT -t link $DIR/$tdir/symlink02 || error $CHECKSTAT -t dir -f $TMP_DIR/symlink12 || error @@ -1400,10 +1418,10 @@ run_test 32g "stat d32g/symlink->tmp/symlink->lustre-subdir/${tdir}2" test_32h() { rm -fr $DIR/$tdir $DIR/${tdir}2 - TMP_DIR=$DIR/$tdir/tmp - mkdir -p $TMP_DIR $DIR/${tdir}2 - ln -s $DIR/${tdir}2 $TMP_DIR/symlink12 - ln -s $TMP_DIR/symlink12 $TMP_DIR/../symlink02 + TMP_DIR=$DIR/$tdir/tmp + mkdir -p $TMP_DIR $DIR/${tdir}2 + ln -s $DIR/${tdir}2 $TMP_DIR/symlink12 + ln -s $TMP_DIR/symlink12 $TMP_DIR/../symlink02 ls $TMP_DIR/symlink12 || error ls $DIR/$tdir/symlink02 || error } @@ -1411,17 +1429,17 @@ run_test 32h "open d32h/symlink->tmp/symlink->lustre-subdir/${tdir}2" test_32i() { [ -e $DIR/d32i ] && rm -fr $DIR/d32i - mkdir -p $DIR/d32i/ext2-mountpoint + mkdir -p $DIR/d32i/ext2-mountpoint mount -t ext2 -o loop $EXT2_DEV $DIR/d32i/ext2-mountpoint || error touch $DIR/d32i/test_file - $CHECKSTAT -t file $DIR/d32i/ext2-mountpoint/../test_file || error + $CHECKSTAT -t file $DIR/d32i/ext2-mountpoint/../test_file || error $UMOUNT $DIR/d32i/ext2-mountpoint || error } run_test 32i "stat d32i/ext2-mountpoint/../test_file ===========" test_32j() { [ -e $DIR/d32j ] && rm -fr $DIR/d32j - mkdir -p $DIR/d32j/ext2-mountpoint + mkdir -p $DIR/d32j/ext2-mountpoint mount -t ext2 -o loop $EXT2_DEV $DIR/d32j/ext2-mountpoint || error touch $DIR/d32j/test_file cat $DIR/d32j/ext2-mountpoint/../test_file || error @@ -1431,8 +1449,8 @@ run_test 32j "open d32j/ext2-mountpoint/../test_file ===========" test_32k() { rm -fr $DIR/d32k - mkdir -p $DIR/d32k/ext2-mountpoint - mount -t ext2 -o loop $EXT2_DEV $DIR/d32k/ext2-mountpoint + mkdir -p $DIR/d32k/ext2-mountpoint + mount -t ext2 -o loop $EXT2_DEV $DIR/d32k/ext2-mountpoint mkdir -p $DIR/d32k/d2 touch $DIR/d32k/d2/test_file || error $CHECKSTAT -t file $DIR/d32k/ext2-mountpoint/../d2/test_file || error @@ -1442,7 +1460,7 @@ run_test 32k "stat d32k/ext2-mountpoint/../d2/test_file ========" test_32l() { rm -fr $DIR/d32l - mkdir -p $DIR/d32l/ext2-mountpoint + mkdir -p $DIR/d32l/ext2-mountpoint mount -t ext2 -o loop $EXT2_DEV $DIR/d32l/ext2-mountpoint || error mkdir -p $DIR/d32l/d2 touch $DIR/d32l/d2/test_file @@ -1453,10 +1471,10 @@ run_test 32l "open d32l/ext2-mountpoint/../d2/test_file ========" test_32m() { rm -fr $DIR/d32m - mkdir -p $DIR/d32m/tmp - TMP_DIR=$DIR/d32m/tmp - ln -s $DIR $TMP_DIR/symlink11 - ln -s $TMP_DIR/symlink11 $TMP_DIR/../symlink01 + mkdir -p $DIR/d32m/tmp + TMP_DIR=$DIR/d32m/tmp + ln -s $DIR $TMP_DIR/symlink11 + ln -s $TMP_DIR/symlink11 $TMP_DIR/../symlink01 $CHECKSTAT -t link $DIR/d32m/tmp/symlink11 || error $CHECKSTAT -t link $DIR/d32m/symlink01 || error } @@ -1464,10 +1482,10 @@ run_test 32m "stat d32m/symlink->tmp/symlink->lustre-root ======" test_32n() { rm -fr $DIR/d32n - mkdir -p $DIR/d32n/tmp - TMP_DIR=$DIR/d32n/tmp - ln -s $DIR $TMP_DIR/symlink11 - ln -s $TMP_DIR/symlink11 $TMP_DIR/../symlink01 + mkdir -p $DIR/d32n/tmp + TMP_DIR=$DIR/d32n/tmp + ln -s $DIR $TMP_DIR/symlink11 + ln -s $TMP_DIR/symlink11 $TMP_DIR/../symlink01 ls -l $DIR/d32n/tmp/symlink11 || error ls -l $DIR/d32n/symlink01 || error } @@ -1475,11 +1493,11 @@ run_test 32n "open d32n/symlink->tmp/symlink->lustre-root ======" test_32o() { rm -fr $DIR/d32o $DIR/$tfile - touch $DIR/$tfile - mkdir -p $DIR/d32o/tmp - TMP_DIR=$DIR/d32o/tmp - ln -s $DIR/$tfile $TMP_DIR/symlink12 - ln -s $TMP_DIR/symlink12 $TMP_DIR/../symlink02 + touch $DIR/$tfile + mkdir -p $DIR/d32o/tmp + TMP_DIR=$DIR/d32o/tmp + ln -s $DIR/$tfile $TMP_DIR/symlink12 + ln -s $TMP_DIR/symlink12 $TMP_DIR/../symlink02 $CHECKSTAT -t link $DIR/d32o/tmp/symlink12 || error $CHECKSTAT -t link $DIR/d32o/symlink02 || error $CHECKSTAT -t file -f $DIR/d32o/tmp/symlink12 || error @@ -1493,15 +1511,15 @@ test_32p() { log 32p_2 rm -f $DIR/$tfile log 32p_3 - touch $DIR/$tfile + touch $DIR/$tfile log 32p_4 - mkdir -p $DIR/d32p/tmp + mkdir -p $DIR/d32p/tmp log 32p_5 - TMP_DIR=$DIR/d32p/tmp + TMP_DIR=$DIR/d32p/tmp log 32p_6 - ln -s $DIR/$tfile $TMP_DIR/symlink12 + ln -s $DIR/$tfile $TMP_DIR/symlink12 log 32p_7 - ln -s $TMP_DIR/symlink12 $TMP_DIR/../symlink02 + ln -s $TMP_DIR/symlink12 $TMP_DIR/../symlink02 log 32p_8 cat $DIR/d32p/tmp/symlink12 || error log 32p_9 @@ -1580,7 +1598,7 @@ test_34b() { run_test 34b "O_RDONLY opening file doesn't create objects =====" test_34c() { - [ ! -f $DIR/f34 ] && test_34a + [ ! -f $DIR/f34 ] && test_34a $CHECKSTAT -s $TEST_34_SIZE $DIR/f34 || error $OPENFILE -f O_RDWR $DIR/f34 $GETSTRIPE $DIR/f34 2>&1 | grep -q "no stripe info" && error @@ -1589,7 +1607,7 @@ test_34c() { run_test 34c "O_RDWR opening file-with-size works ==============" test_34d() { - [ ! -f $DIR/f34 ] && test_34a + [ ! -f $DIR/f34 ] && test_34a dd if=/dev/zero of=$DIR/f34 conv=notrunc bs=4k count=1 || error $CHECKSTAT -s $TEST_34_SIZE $DIR/f34 || error rm $DIR/f34 @@ -1671,7 +1689,7 @@ test_36f() { DATESTR="Dec 20 2000" mkdir -p $DIR/$tdir #define OBD_FAIL_OST_BRW_PAUSE_BULK 0x214 - sysctl -w lustre.fail_loc=0x80000214 + lctl set_param fail_loc=0x80000214 date; date +%s cp /etc/hosts $DIR/$tdir/$tfile sync & # write RPC generated with "current" inode timestamp, but delayed @@ -1690,10 +1708,12 @@ test_36f() { run_test 36f "utime on file racing with OST BRW write ==========" test_36g() { - remote_ost && skip "remote OST" && return - export FMD_MAX_AGE=`do_facet ost1 cat $LPROC/obdfilter/*/client_cache_seconds 2> /dev/null | head -n 1` + remote_ost_nodsh && skip "remote OST with nodsh" && return + + mkdir -p $DIR/$tdir + export FMD_MAX_AGE=`do_facet ost1 lctl get_param -n obdfilter.*.client_cache_seconds 2> /dev/null | head -n 1` FMD_BEFORE="`awk '/ll_fmd_cache/ { print $2 }' /proc/slabinfo`" - touch $DIR/d36/$tfile + touch $DIR/$tdir/$tfile sleep $((FMD_MAX_AGE + 12)) FMD_AFTER="`awk '/ll_fmd_cache/ { print $2 }' /proc/slabinfo`" [ "$FMD_AFTER" -gt "$FMD_BEFORE" ] && \ @@ -1751,7 +1771,7 @@ test_41() { run_test 41 "test small file write + fstat =====================" count_ost_writes() { - cat $LPROC/osc/*/stats | + lctl get_param -n osc.*.stats | awk -vwrites=0 '/ost_write/ { writes += $2 } END { print writes; }' } @@ -1767,9 +1787,9 @@ start_writeback() { # in 2.6, restore /proc/sys/vm/dirty_writeback_centisecs, # dirty_ratio, dirty_background_ratio if [ -f /proc/sys/vm/dirty_writeback_centisecs ]; then - echo $WRITEBACK_SAVE > /proc/sys/vm/dirty_writeback_centisecs - echo $BG_DIRTY_RATIO_SAVE > /proc/sys/vm/dirty_background_ratio - echo $DIRTY_RATIO_SAVE > /proc/sys/vm/dirty_ratio + sysctl -w vm.dirty_writeback_centisecs=$WRITEBACK_SAVE + sysctl -w vm.dirty_background_ratio=$BG_DIRTY_RATIO_SAVE + sysctl -w vm.dirty_ratio=$DIRTY_RATIO_SAVE else # if file not here, we are a 2.4 kernel kill -CONT `pidof kupdated` @@ -1782,15 +1802,15 @@ stop_writeback() { trap start_writeback EXIT # in 2.6, save and 0 /proc/sys/vm/dirty_writeback_centisecs if [ -f /proc/sys/vm/dirty_writeback_centisecs ]; then - WRITEBACK_SAVE=`cat /proc/sys/vm/dirty_writeback_centisecs` - echo 0 > /proc/sys/vm/dirty_writeback_centisecs - echo 0 > /proc/sys/vm/dirty_writeback_centisecs + WRITEBACK_SAVE=`sysctl -n vm.dirty_writeback_centisecs` + sysctl -w vm.dirty_writeback_centisecs=0 + sysctl -w vm.dirty_writeback_centisecs=0 # save and increase /proc/sys/vm/dirty_ratio - DIRTY_RATIO_SAVE=`cat /proc/sys/vm/dirty_ratio` - echo $MAX_DIRTY_RATIO > /proc/sys/vm/dirty_ratio + DIRTY_RATIO_SAVE=`sysctl -n vm.dirty_ratio` + sysctl -w vm.dirty_ratio=$MAX_DIRTY_RATIO # save and increase /proc/sys/vm/dirty_background_ratio - BG_DIRTY_RATIO_SAVE=`cat /proc/sys/vm/dirty_background_ratio` - echo $MAX_BG_DIRTY_RATIO > /proc/sys/vm/dirty_background_ratio + BG_DIRTY_RATIO_SAVE=`sysctl -n vm.dirty_background_ratio` + sysctl -w vm.dirty_background_ratio=$MAX_BG_DIRTY_RATIO else # if file not here, we are a 2.4 kernel kill -STOP `pidof kupdated` @@ -1815,7 +1835,7 @@ test_42a() { stop_writeback sync; sleep 1; sync # just to be safe BEFOREWRITES=`count_ost_writes` - grep "[0-9]" $LPROC/osc/*[oO][sS][cC][_-]*/cur_grant_bytes + lctl get_param -n osc.*[oO][sS][cC][_-]*.cur_grant_bytes | grep "[0-9]" dd if=/dev/zero of=$DIR/f42a bs=1024 count=100 AFTERWRITES=`count_ost_writes` [ $BEFOREWRITES -eq $AFTERWRITES ] || \ @@ -1897,6 +1917,7 @@ test_42d() { run_test 42d "test complete truncate of file with cached dirty data" test_43() { + mkdir -p $DIR/$tdir cp -p /bin/ls $DIR/$tdir/$tfile multiop $DIR/$tdir/$tfile Ow_c & pid=$! @@ -1911,12 +1932,11 @@ run_test 43 "execution of file opened for write should return -ETXTBSY" test_43a() { mkdir -p $DIR/d43 cp -p `which multiop` $DIR/d43/multiop || cp -p multiop $DIR/d43/multiop - $DIR/d43/multiop $TMP/test43.junk O_c & - MULTIPID=$! - sleep 1 + MULTIOP_PROG=$DIR/d43/multiop multiop_bg_pause $TMP/test43.junk O_c || return 1 + MULTIOP_PID=$! multiop $DIR/d43/multiop Oc && error "expected error, got success" - kill -USR1 $MULTIPID || return 2 - wait $MULTIPID || return 3 + kill -USR1 $MULTIOP_PID || return 2 + wait $MULTIOP_PID || return 3 rm $TMP/test43.junk } run_test 43a "open(RDWR) of file being executed should return -ETXTBSY" @@ -1924,12 +1944,11 @@ run_test 43a "open(RDWR) of file being executed should return -ETXTBSY" test_43b() { mkdir -p $DIR/d43 cp -p `which multiop` $DIR/d43/multiop || cp -p multiop $DIR/d43/multiop - $DIR/d43/multiop $TMP/test43.junk O_c & - MULTIPID=$! - sleep 1 + MULTIOP_PROG=$DIR/d43/multiop multiop_bg_pause $TMP/test43.junk O_c || return 1 + MULTIOP_PID=$! truncate $DIR/d43/multiop 0 && error "expected error, got success" - kill -USR1 $MULTIPID || return 2 - wait $MULTIPID || return 3 + kill -USR1 $MULTIOP_PID || return 2 + wait $MULTIOP_PID || return 3 rm $TMP/test43.junk } run_test 43b "truncate of file being executed should return -ETXTBSY" @@ -1988,8 +2007,8 @@ run_test 44a "test sparse pwrite ===============================" dirty_osc_total() { tot=0 - for d in $LPROC/osc/*/cur_dirty_bytes; do - tot=$(($tot + `cat $d`)) + for d in `lctl get_param -n osc.*.cur_dirty_bytes`; do + tot=$(($tot + $d)) done echo $tot } @@ -2028,7 +2047,7 @@ page_size() { # in a 2 stripe file (lov.sh), page 1023 maps to page 511 in its object. this # test tickles a bug where re-dirtying a page was failing to be mapped to the -# objects offset and an assert hit when an rpc was built with 1023's mapped +# objects offset and an assert hit when an rpc was built with 1023's mapped # offset 511 and 511's raw 511 offset. it also found general redirtying bugs. test_46() { f="$DIR/f46" @@ -2094,7 +2113,7 @@ run_test 48b "Access removed working dir (should return errors)=" test_48c() { # bug 2350 check_kernel_version 36 || return 0 - #sysctl -w lnet.debug=-1 + #lctl set_param debug=-1 #set -vx mkdir -p $DIR/d48c/dir cd $DIR/d48c/dir @@ -2117,7 +2136,7 @@ run_test 48c "Access removed working subdir (should return errors)" test_48d() { # bug 2350 check_kernel_version 36 || return 0 - #sysctl -w lnet.debug=-1 + #lctl set_param debug=-1 #set -vx mkdir -p $DIR/d48d/dir cd $DIR/d48d/dir @@ -2141,7 +2160,7 @@ run_test 48d "Access removed parent subdir (should return errors)" test_48e() { # bug 4134 check_kernel_version 41 || return 0 - #sysctl -w lnet.debug=-1 + #lctl set_param debug=-1 #set -vx mkdir -p $DIR/d48e/dir cd $DIR/d48e/dir @@ -2165,7 +2184,7 @@ test_50() { } run_test 50 "special situations: /proc symlinks ===============" -test_51() { +test_51a() { # was test_51 # bug 1516 - create an empty entry right after ".." then split dir mkdir $DIR/d51 touch $DIR/d51/foo @@ -2181,7 +2200,7 @@ test_51() { echo ls -l $DIR/d51 > /dev/null || error } -run_test 51 "special situations: split htree with empty entry ==" +run_test 51a "special situations: split htree with empty entry ==" #export NUMTEST=70000 # FIXME: I select a relatively small number to do basic test. @@ -2201,6 +2220,57 @@ test_51b() { } run_test 51b "mkdir .../t-0 --- .../t-$NUMTEST ====================" +test_51bb() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + + local ndirs=${TEST51BB_NDIRS:-10} + local nfiles=${TEST51BB_NFILES:-100} + + local numfree=`df -i -P $DIR | tail -n 1 | awk '{ print $4 }'` + + [ $numfree -lt $(( ndirs * nfiles)) ] && \ + nfiles=$(( numfree / ndirs - 10 )) + + local dir=$DIR/d51bb + mkdir -p $dir + local savePOLICY=$(lctl get_param -n lmv.*.placement) + lctl set_param -n lmv.*.placement=CHAR + + lfs df -i $dir + local IUSED=$(lfs df -i $dir | grep MDT | awk '{print $3}') + OLDUSED=($IUSED) + + declare -a dirs + for ((i=0; i < $ndirs; i++)); do + dirs[i]=$dir/$RANDOM + echo Creating directory ${dirs[i]} + mkdir -p ${dirs[i]} + ls $dir + echo Creating $nfiles in dir ${dirs[i]} ... + echo "createmany -o ${dirs[i]}/$tfile- $nfiles" + createmany -o ${dirs[i]}/$tfile- $nfiles + done + ls $dir + + sleep 1 + + IUSED=$(lfs df -i $dir | grep MDT | awk '{print $3}') + NEWUSED=($IUSED) + + local rc=0 + for ((i=0; i<${#NEWUSED[@]}; i++)); do + echo "mds $i: inodes count OLD ${OLDUSED[$i]} NEW ${NEWUSED[$i]}" + [ ${OLDUSED[$i]} -lt ${NEWUSED[$i]} ] || rc=$((rc + 1)) + done + + lctl set_param -n lmv.*.placement=$savePOLICY + + [ $rc -ne $MDSCOUNT ] || \ + error "Objects/inodes are not distributed over all mds servers" +} +run_test 51bb "mkdir createmany CMD $MDSCOUNT ====================" + + test_51c() { [ ! -d $DIR/d51b ] && skip "$DIR/51b missing" && \ return @@ -2227,7 +2297,7 @@ test_51d() { error "OST $N has less objects vs OST $NLAST (${OBJS[$N]} < ${OBJS[$NLAST]}" [ ${OBJS[$N]} -gt $((${OBJS[$NLAST]} + 20)) ] && \ error "OST $N has less objects vs OST $NLAST (${OBJS[$N]} < ${OBJS[$NLAST]}" - + [ ${OBJS0[$N]} -lt $((${OBJS0[$NLAST]} - 20)) ] && \ error "OST $N has less #0 objects vs OST $NLAST (${OBJS0[$N]} < ${OBJS0[$NLAST]}" [ ${OBJS0[$N]} -gt $((${OBJS0[$NLAST]} + 20)) ] && \ @@ -2276,13 +2346,22 @@ test_52b() { run_test 52b "immutable flag test (should return errors) =======" test_53() { - remote_mds && skip "remote MDS" && return - - # only test MDT0000 - for i in `ls -d $LPROC/osc/*-osc-MDT0000 2> /dev/null` ; do - ostname=`basename $i | cut -d - -f 1-2` - ost_last=`cat $LPROC/obdfilter/$ostname/last_id` - mds_last=`cat $i/prealloc_last_id` + remote_mds_nodsh && skip "remote MDS with nodsh" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return + + local param + local ostname + local mds_last + local ost_last + local ostnum + + # only test MDT0000 + for value in $(do_facet $SINGLEMDS lctl get_param osc.*-osc-MDT0000.prealloc_last_id) ; do + param=`echo ${value[0]} | cut -d "=" -f1` + ostname=`echo $param | cut -d "." -f2 | cut -d - -f 1-2` + mds_last=$(do_facet $SINGLEMDS lctl get_param -n $param) + ostnum=$(echo $ostname | sed "s/${FSNAME}-OST//g" | awk '{print ($1+1)}' ) + ost_last=$(do_facet ost$ostnum lctl get_param -n obdfilter.$ostname.last_id | head -n 1) echo "$ostname.last_id=$ost_last ; MDS.last_id=$mds_last" if [ $ost_last != $mds_last ]; then error "$ostname.last_id=$ost_last ; MDS.last_id=$mds_last" @@ -2304,7 +2383,7 @@ test_54b() { f="$DIR/f54b" mknod $f c 1 3 chmod 0666 $f - dd if=/dev/zero of=$f bs=`page_size` count=1 + dd if=/dev/zero of=$f bs=`page_size` count=1 } run_test 54b "char device works in lustre ======================" @@ -2326,7 +2405,7 @@ test_54c() { tdir="$DIR/d54c" loopdev="$DIR/loop54c" - find_loop_dev + find_loop_dev [ -z "$LOOPNUM" ] && echo "couldn't find empty loop device" && return mknod $loopdev b 7 $LOOPNUM echo "make a loop file system with $tfile on $loopdev ($LOOPNUM)..." @@ -2386,7 +2465,7 @@ test_55() { } run_test 55 "check iopen_connect_dentry() ======================" -test_56() { +test_56a() { # was test_56 rm -rf $DIR/d56 $SETSTRIPE -d $DIR mkdir $DIR/d56 @@ -2436,7 +2515,7 @@ test_56() { error "lfs getstripe --obd wrong: should not show file on other obd" echo "lfs getstripe --obd passed." } -run_test 56 "check lfs getstripe ====================================" +run_test 56a "check lfs getstripe ====================================" NUMFILES=3 NUMDIRS=3 @@ -2584,12 +2663,61 @@ test_56o() { } run_test 56o "check lfs find -mtime for old files ==========================" +test_56p() { + [ $RUNAS_ID -eq $UID ] && skip "RUNAS_ID = UID = $UID -- skipping" && return + + TDIR=$DIR/${tdir}g + rm -rf $TDIR + + setup_56 $NUMFILES $NUMDIRS + + chown $RUNAS_ID $TDIR/file* || error "chown $DIR/${tdir}g/file$i failed" + EXPECTED=$NUMFILES + NUMS="`$LFIND -uid $RUNAS_ID $TDIR | wc -l`" + [ $NUMS -eq $EXPECTED ] || \ + error "lfs find -uid $TDIR wrong: found $NUMS, expected $EXPECTED" + + EXPECTED=$(( ($NUMFILES+1) * $NUMDIRS + 1)) + NUMS="`$LFIND ! -uid $RUNAS_ID $TDIR | wc -l`" + [ $NUMS -eq $EXPECTED ] || \ + error "lfs find ! -uid $TDIR wrong: found $NUMS, expected $EXPECTED" + + echo "lfs find -uid and ! -uid passed." +} +run_test 56p "check lfs find -uid and ! -uid ===============================" + +test_56q() { + [ $RUNAS_ID -eq $UID ] && skip "RUNAS_ID = UID = $UID -- skipping" && return + + TDIR=$DIR/${tdir}g + rm -rf $TDIR + + setup_56 $NUMFILES $NUMDIRS + + chgrp $RUNAS_ID $TDIR/file* || error "chown $DIR/${tdir}g/file$i failed" + EXPECTED=$NUMFILES + NUMS="`$LFIND -gid $RUNAS_ID $TDIR | wc -l`" + [ $NUMS -eq $EXPECTED ] || \ + error "lfs find -gid $TDIR wrong: found $NUMS, expected $EXPECTED" + + EXPECTED=$(( ($NUMFILES+1) * $NUMDIRS + 1)) + NUMS="`$LFIND ! -gid $RUNAS_ID $TDIR | wc -l`" + [ $NUMS -eq $EXPECTED ] || \ + error "lfs find ! -gid $TDIR wrong: found $NUMS, expected $EXPECTED" + + echo "lfs find -gid and ! -gid passed." +} +run_test 56q "check lfs find -gid and ! -gid ===============================" + test_57a() { # note test will not do anything if MDS is not local - remote_mds && skip "remote MDS" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return - for DEV in `cat $LPROC/mds/*/mntdev`; do - dumpe2fs -h $DEV > $TMP/t57a.dump || error "can't access $DEV" + local MNTDEV="osd.*MDT*.mntdev" + DEV=$(do_facet $SINGLEMDS lctl get_param -n $MNTDEV) + [ -z "$DEV" ] && error "can't access $MNTDEV" + for DEV in $(do_facet $SINGLEMDS lctl get_param -n $MNTDEV); do + do_facet $SINGLEMDS dumpe2fs -h $DEV > $TMP/t57a.dump || error "can't access $DEV" DEVISIZE=`awk '/Inode size:/ { print $3 }' $TMP/t57a.dump` [ "$DEVISIZE" -gt 128 ] || error "inode size $DEVISIZE" rm $TMP/t57a.dump @@ -2611,8 +2739,8 @@ test_57b() { $GETSTRIPE $FILE1 2>&1 | grep -q "no stripe" || error "$FILE1 has an EA" $GETSTRIPE $FILEN 2>&1 | grep -q "no stripe" || error "$FILEN has an EA" - MDSFREE="`cat $LPROC/mds/*/kbytesfree 2> /dev/null`" - MDCFREE="`cat $LPROC/mdc/*/kbytesfree | head -n 1`" + MDSFREE="`lctl get_param -n osd.*MDT0000.kbytesfree 2> /dev/null`" + MDCFREE="`lctl get_param -n mdc.*.kbytesfree | head -n 1`" echo "opening files to create objects/EAs" for FILE in `seq -f $DIR/d57b/f%g 1 $FILECOUNT`; do $OPENFILE -f O_RDWR $FILE > /dev/null || error "opening $FILE" @@ -2623,8 +2751,8 @@ test_57b() { $GETSTRIPE $FILEN | grep -q "obdidx" || error "$FILEN missing EA" sleep 1 # make sure we get new statfs data -# MDSFREE2="`cat $LPROC/mds/*/kbytesfree`" -# MDCFREE2="`cat $LPROC/mdc/*/kbytesfree`" +# MDSFREE2="`lctl get_param -n mds.*.kbytesfree`" +# MDCFREE2="`lctl get_param -n mdc.*.kbytesfree`" # if [ "$MDCFREE2" -lt "$((MDCFREE - 8))" ]; then # if [ "$MDSFREE" != "$MDSFREE2" ]; then # error "MDC before $MDCFREE != after $MDCFREE2" @@ -2658,7 +2786,7 @@ TEST60_HEAD="test_60 run $RANDOM" test_60a() { [ ! -f run-llog.sh ] && skip "missing subtest run-llog.sh" && return log "$TEST60_HEAD - from kernel mode" -# sh run-llog.sh + sh run-llog.sh } run_test 60a "llog sanity tests run from kernel module ==========" @@ -2679,34 +2807,33 @@ test_60b() { # bug 6411 [ $LLOG_COUNT -gt 50 ] && error "CDEBUG_LIMIT not limiting messages ($LLOG_COUNT)"|| true } run_test 60b "limit repeated messages from CERROR/CWARN ========" - + test_60c() { - echo "create 5000 files" + echo "create 5000 files" createmany -o $DIR/f60c- 5000 #define OBD_FAIL_MDS_LLOG_CREATE_FAILED 0x13c - sysctl -w lustre.fail_loc=0x8000013c + lctl set_param fail_loc=0x8000013c unlinkmany $DIR/f60c- 5000 - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 } run_test 60c "unlink file when mds full" test_60d() { - SAVEPRINTK=$(sysctl -n lnet.printk) + SAVEPRINTK=$(lctl get_param -n printk) # verify "lctl mark" is even working" MESSAGE="test message ID $RANDOM $$" $LCTL mark "$MESSAGE" || error "$LCTL mark failed" dmesg | grep -q "$MESSAGE" || error "didn't find debug marker in log" - sysctl -w lnet.printk=0 || error "set lnet.printk failed" - sysctl -n lnet.printk | grep emerg || error "lnet.printk dropped emerg" - + lctl set_param printk=0 || error "set lnet.printk failed" + lctl get_param -n printk | grep emerg || error "lnet.printk dropped emerg" MESSAGE="new test message ID $RANDOM $$" # Assume here that libcfs_debug_mark_buffer() uses D_WARNING $LCTL mark "$MESSAGE" || error "$LCTL mark failed" dmesg | grep -q "$MESSAGE" && error "D_WARNING wasn't masked" || true - sysctl -w lnet.printk="$SAVEPRINTK" + lctl set_param -n printk="$SAVEPRINTK" } run_test 60d "test printk console message masking" @@ -2724,18 +2851,18 @@ test_62() { f="$DIR/f62" echo foo > $f cancel_lru_locks osc - sysctl -w lustre.fail_loc=0x405 + lctl set_param fail_loc=0x405 cat $f && error "cat succeeded, expect -EIO" - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 } -run_test 62 "verify obd_match failure doesn't LBUG (should -EIO)" +# This test is now irrelevant (as of bug 10718 inclusion), we no longer +# match every page all of the time. +#run_test 62 "verify obd_match failure doesn't LBUG (should -EIO)" # bug 2319 - oig_wait() interrupted causes crash because of invalid waitq. -test_63() { - MAX_DIRTY_MB=`cat $LPROC/osc/*/max_dirty_mb | head -n 1` - for i in $LPROC/osc/*/max_dirty_mb ; do - echo 0 > $i - done +test_63a() { # was test_63 + MAX_DIRTY_MB=`lctl get_param -n osc.*.max_dirty_mb | head -n 1` + lctl set_param -n osc.*.max_dirty_mb 0 for i in `seq 10` ; do dd if=/dev/zero of=$DIR/f63 bs=8k & sleep 5 @@ -2743,29 +2870,27 @@ test_63() { sleep 1 done - for i in $LPROC/osc/*/max_dirty_mb ; do - echo $MAX_DIRTY_MB > $i - done + lctl set_param -n osc.*.max_dirty_mb $MAX_DIRTY_MB rm -f $DIR/f63 || true } -run_test 63 "Verify oig_wait interruption does not crash =======" +run_test 63a "Verify oig_wait interruption does not crash =======" # bug 2248 - async write errors didn't return to application on sync # bug 3677 - async write errors left page locked test_63b() { debugsave - sysctl -w lnet.debug=-1 + lctl set_param debug=-1 # ensure we have a grant to do async writes dd if=/dev/zero of=$DIR/$tfile bs=4k count=1 rm $DIR/$tfile #define OBD_FAIL_OSC_BRW_PREP_REQ 0x406 - sysctl -w lustre.fail_loc=0x80000406 + lctl set_param fail_loc=0x80000406 multiop $DIR/$tfile Owy && \ error "sync didn't return ENOMEM" sync; sleep 2; sync # do a real sync this time to flush page - grep locked $LPROC/llite/*/dump_page_cache && \ + lctl get_param -n llite.*.dump_page_cache | grep locked && \ error "locked page left in cache after async error" || true debugrestore } @@ -2773,7 +2898,7 @@ run_test 63b "async write errors should be returned to fsync ===" test_64a () { df $DIR - grep "[0-9]" $LPROC/osc/*[oO][sS][cC][_-]*/cur* + lctl get_param -n osc.*[oO][sS][cC][_-]*.cur* | grep "[0-9]" } run_test 64a "verify filter grant calculations (in kernel) =====" @@ -2834,7 +2959,7 @@ test_65e() { touch $DIR/d65/f6 $LVERIFY $DIR/d65 $DIR/d65/f6 || error "lverify failed" } -run_test 65e "directory setstripe 0 -1 0 =======================" +run_test 65e "directory setstripe defaults =======================" test_65f() { mkdir -p $DIR/d65f @@ -2859,12 +2984,27 @@ test_65h() { "`$GETSTRIPE -v $DIR/d65/dd1 | grep "^count"`" ] || error "stripe info inherit failed" } run_test 65h "directory stripe info inherit ====================" - + test_65i() { # bug6367 - $SETSTRIPE $MOUNT -s 65536 -c -1 + $SETSTRIPE $MOUNT -s 65536 -c -1 } run_test 65i "set non-default striping on root directory (bug 6367)=" +test_65ia() { # bug12836 + $LFS getstripe $MOUNT || error "getstripe $MOUNT failed" +} +run_test 65ia "getstripe on -1 default directory striping" + +test_65ib() { # bug12836 + $LFS getstripe -v $MOUNT || error "getstripe -v $MOUNT failed" +} +run_test 65ib "getstripe -v on -1 default directory striping" + +test_65ic() { # bug12836 + $LFS find -mtime -1 $MOUNT || error "find $MOUNT failed" +} +run_test 65ic "new find on -1 default directory striping" + test_65j() { # bug6367 sync; sleep 1 # if we aren't already remounting for each test, do so for this test @@ -2878,22 +3018,24 @@ run_test 65j "set default striping on root directory (bug 6367)=" test_65k() { # bug11679 [ "$OSTCOUNT" -lt 2 ] && skip "too few OSTs" && return - remote_mds_nodsh && skip "remote MDS" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return echo "Check OST status: " - MDS_OSCS=`do_facet mds lctl dl | awk '/[oO][sS][cC].*md[ts]/ { print $4 }'` + MDS_OSCS=`do_facet $SINGLEMDS lctl dl | awk '/[oO][sS][cC].*md[ts]/ { print $4 }'` for OSC in $MDS_OSCS; do echo $OSC "is activate" - do_facet mds lctl --device %$OSC activate + do_facet $SINGLEMDS lctl --device %$OSC activate done do_facet client mkdir -p $DIR/$tdir for INACTIVE_OSC in $MDS_OSCS; do echo $INACTIVE_OSC "is Deactivate:" - do_facet mds lctl --device %$INACTIVE_OSC deactivate + do_facet $SINGLEMDS lctl --device %$INACTIVE_OSC deactivate for STRIPE_OSC in $MDS_OSCS; do STRIPE_OST=`osc_to_ost $STRIPE_OSC` - STRIPE_INDEX=`do_facet mds cat $LPROC/lov/*md*/target_obd | - grep $STRIPE_OST | awk -F: '{print $1}'` + STRIPE_INDEX=`do_facet $SINGLEMDS lctl get_param -n lov.*md*.target_obd | + grep $STRIPE_OST | awk -F: '{print $1}' | head -n 1` + + [ -f $DIR/$tdir/${STRIPE_INDEX} ] && continue echo "$SETSTRIPE $DIR/$tdir/${STRIPE_INDEX} -i ${STRIPE_INDEX} -c 1" do_facet client $SETSTRIPE $DIR/$tdir/${STRIPE_INDEX} -i ${STRIPE_INDEX} -c 1 RC=$? @@ -2901,7 +3043,7 @@ test_65k() { # bug11679 done do_facet client rm -f $DIR/$tdir/* echo $INACTIVE_OSC "is Activate." - do_facet mds lctl --device %$INACTIVE_OSC activate + do_facet $SINGLEMDS lctl --device %$INACTIVE_OSC activate done } run_test 65k "validate manual striping works properly with deactivated OSCs" @@ -2948,7 +3090,7 @@ swap_used() { # and then consuming memory until it is used. test_68() { [ "$UID" != 0 ] && skip "must run as root" && return - grep -q obdfilter $LPROC/devices && \ + lctl get_param -n devices | grep -q obdfilter && \ skip "local OST" && return grep -q llite_lloop /proc/modules @@ -2985,95 +3127,72 @@ run_test 68 "support swapping to Lustre ========================" # bug5265, obdfilter oa2dentry return -ENOENT # #define OBD_FAIL_OST_ENOENT 0x217 test_69() { - [ $(grep -c obdfilter $LPROC/devices) -eq 0 ] && \ - skip "skipping test for remote OST" && return - $GSS && skip "gss with bulk security will triger oops. re-enable this after b10091 get fixed" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return f="$DIR/$tfile" - touch $f + $SETSTRIPE $f -c 1 -i 0 - if ! $DIRECTIO write ${f}.2 0 1; then - skip "O_DIRECT not implemented" - return 0 - fi + $DIRECTIO write ${f}.2 0 1 || error "directio write error" - sysctl -w lustre.fail_loc=0x217 + do_facet ost1 lctl set_param fail_loc=0x217 truncate $f 1 # vmtruncate() will ignore truncate() error. $DIRECTIO write $f 0 2 && error "write succeeded, expect -ENOENT" - sysctl -w lustre.fail_loc=0 + do_facet ost1 lctl set_param fail_loc=0 $DIRECTIO write $f 0 2 || error "write error" cancel_lru_locks osc $DIRECTIO read $f 0 1 || error "read error" - sysctl -w lustre.fail_loc=0x217 + do_facet ost1 lctl set_param fail_loc=0x217 $DIRECTIO read $f 1 1 && error "read succeeded, expect -ENOENT" - sysctl -w lustre.fail_loc=0 + do_facet ost1 lctl set_param fail_loc=0 rm -f $f } run_test 69 "verify oa2dentry return -ENOENT doesn't LBUG ======" test_71() { - which dbench > /dev/null 2>&1 || skip "dbench not installed, skip this test" && return 0 - DBENCH_LIB=${DBENCH_LIB:-/usr/lib/dbench} - PATH=${DBENCH_LIB}:${PATH} - cp `which dbench` $DIR - - TGT=$DIR/client.txt - SRC=${SRC:-$DBENCH_LIB/client.txt} - [ ! -e $TGT -a -e $SRC ] && echo "copying $SRC to $TGT" && cp $SRC $TGT - SRC=$DBENCH_LIB/client_plain.txt - [ ! -e $TGT -a -e $SRC ] && echo "copying $SRC to $TGT" && cp $SRC $TGT - - echo "copying necessary lib to $DIR" - [ -d /lib64 ] && LIB71=/lib64 || LIB71=/lib - mkdir -p $DIR$LIB71 || error "can't create $DIR$LIB71" - cp $LIB71/libc* $DIR$LIB71 || error "can't copy $LIB71/libc*" - cp $LIB71/ld-* $DIR$LIB71 || error "can't create $LIB71/ld-*" - - echo "chroot $DIR /dbench -c client.txt 2" - chroot $DIR /dbench -c client.txt 2 - RC=$? - - rm -rf $DIR/dbench $TGT $DIR$LIB71 - - return $RC + sh rundbench -C -D $DIR 2 || error "dbench failed!" } run_test 71 "Running dbench on lustre (don't segment fault) ====" test_72() { # bug 5695 - Test that on 2.6 remove_suid works properly check_kernel_version 43 || return 0 [ "$RUNAS_ID" = "$UID" ] && skip "RUNAS_ID = UID = $UID -- skipping" && return + + # Check that testing environment is properly set up. Skip if not + FAIL_ON_ERROR=false check_runas_id_ret $RUNAS_ID $RUNAS || { + skip "User $RUNAS_ID does not exist - skipping" + return 0 + } # We had better clear the $DIR to get enough space for dd rm -rf $DIR/* touch $DIR/f72 chmod 777 $DIR/f72 chmod ug+s $DIR/f72 - $RUNAS -u $(($RUNAS_ID + 1)) dd if=/dev/zero of=$DIR/f72 bs=512 count=1 || error + $RUNAS dd if=/dev/zero of=$DIR/f72 bs=512 count=1 || error # See if we are still setuid/sgid test -u $DIR/f72 -o -g $DIR/f72 && error "S/gid is not dropped on write" # Now test that MDS is updated too cancel_lru_locks mdc test -u $DIR/f72 -o -g $DIR/f72 && error "S/gid is not dropped on MDS" true + rm -f $DIR/f72 } run_test 72 "Test that remove suid works properly (bug5695) ====" # bug 3462 - multiple simultaneous MDC requests test_73() { - mkdir $DIR/d73-1 + mkdir $DIR/d73-1 mkdir $DIR/d73-2 - multiop $DIR/d73-1/f73-1 O_c & + multiop_bg_pause $DIR/d73-1/f73-1 O_c || return 1 pid1=$! - #give multiop a chance to open - usleep 500 - echo 0x80000129 > /proc/sys/lustre/fail_loc + lctl set_param fail_loc=0x80000129 multiop $DIR/d73-1/f73-2 Oc & sleep 1 - echo 0 > /proc/sys/lustre/fail_loc + lctl set_param fail_loc=0 multiop $DIR/d73-2/f73-3 Oc & pid3=$! @@ -3084,8 +3203,8 @@ test_73() { sleep 25 $CHECKSTAT -t file $DIR/d73-1/f73-1 || return 4 - $CHECKSTAT -t file $DIR/d73-1/f73-2 || return 5 - $CHECKSTAT -t file $DIR/d73-2/f73-3 || return 6 + $CHECKSTAT -t file $DIR/d73-1/f73-2 || return 5 + $CHECKSTAT -t file $DIR/d73-2/f73-3 || return 6 rm -rf $DIR/d73-* } @@ -3097,11 +3216,12 @@ test_74a() { # bug 6149, 6184 # very important to OR with OBD_FAIL_ONCE (0x80000000) -- otherwise it # will spin in a tight reconnection loop touch $DIR/f74a - sysctl -w lustre.fail_loc=0x8000030e + lctl set_param fail_loc=0x8000030e # get any lock that won't be difficult - lookup works. ls $DIR/f74a - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 true + rm -f $DIR/f74a } run_test 74a "ldlm_enqueue freed-export error path, ls (shouldn't LBUG)" @@ -3110,11 +3230,12 @@ test_74b() { # bug 13310 # # very important to OR with OBD_FAIL_ONCE (0x80000000) -- otherwise it # will spin in a tight reconnection loop - sysctl -w lustre.fail_loc=0x8000030e + lctl set_param fail_loc=0x8000030e # get a "difficult" lock touch $DIR/f74b - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 true + rm -f $DIR/f74b } run_test 74b "ldlm_enqueue freed-export error path, touch (shouldn't LBUG)" @@ -3127,25 +3248,22 @@ export T75_PREP=no test75_prep() { [ $T75_PREP = "yes" ] && return echo "using F75=$F75, F128k=$F128k, FHEAD=$FHEAD, FTAIL=$FTAIL" - + dd if=/dev/urandom of=${F75}_128k bs=128k count=1 || error "dd failed" log "finished dd" chmod 777 ${F128k} T75_PREP=yes } - + test_75a() { -# skipped temporarily: we do not have join file currently -# please remove this when ready - huanghua - return test75_prep - + cp -p ${F128k} ${FHEAD} log "finished cp to $FHEAD" cp -p ${F128k} ${FTAIL} log "finished cp to $FTAIL" cat ${F128k} ${F128k} > ${F75}_sim_sim - + $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error" log "finished join $FHEAD to ${F75}_sim_sim" cmp ${FHEAD} ${F75}_sim_sim || error "${FHEAD} ${F75}_sim_sim differ" @@ -3153,13 +3271,10 @@ test_75a() { $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} still exist after join" } run_test 75a "TEST join file ====================================" - + test_75b() { -# skipped temporarily: we do not have join file currently -# please remove this when ready - huanghua - return test75_prep - + cp -p ${F128k} ${FTAIL} cat ${F75}_sim_sim >> ${F75}_join_sim cat ${F128k} >> ${F75}_join_sim @@ -3169,13 +3284,10 @@ test_75b() { $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} exist after join" } run_test 75b "TEST join file 2 ==================================" - + test_75c() { -# skipped temporarily: we do not have join file currently -# please remove this when ready - huanghua - return test75_prep - + cp -p ${F128k} ${FTAIL} cat ${F128k} >> ${F75}_sim_join cat ${F75}_join_sim >> ${F75}_sim_join @@ -3185,13 +3297,10 @@ test_75c() { $CHECKSTAT -a ${FHEAD} || error "tail ${FHEAD} exist after join" } run_test 75c "TEST join file 3 ==================================" - + test_75d() { -# skipped temporarily: we do not have join file currently -# please remove this when ready - huanghua - return test75_prep - + cp -p ${F128k} ${FHEAD} cp -p ${F128k} ${FHEAD}_tmp cat ${F75}_sim_sim >> ${F75}_join_join @@ -3202,23 +3311,17 @@ test_75d() { $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} exist after join (2)" } run_test 75d "TEST join file 4 ==================================" - + test_75e() { -# skipped temporarily: we do not have join file currently -# please remove this when ready - huanghua - return test75_prep - + rm -rf ${FHEAD} || "delete join file error" } run_test 75e "TEST join file 5 (remove joined file) =============" - + test_75f() { -# skipped temporarily: we do not have join file currently -# please remove this when ready - huanghua - return test75_prep - + cp -p ${F128k} ${F75}_join_10_compare cp -p ${F128k} ${F75}_join_10 for ((i = 0; i < 10; i++)); do @@ -3232,16 +3335,13 @@ test_75f() { error "files ${F75}_join_10 ${F75}_join_10_compare differ" } run_test 75f "TEST join file 6 (join 10 files) ==================" - + test_75g() { -# skipped temporarily: we do not have join file currently -# please remove this when ready - huanghua - return [ ! -f ${F75}_join_10 ] && echo "${F75}_join_10 missing" && return $LFS getstripe ${F75}_join_10 - + $OPENUNLINK ${F75}_join_10 ${F75}_join_10 || error "files unlink open" - + ls -l $F75* } run_test 75g "TEST join file 7 (open unlink) ====================" @@ -3255,7 +3355,9 @@ test_76() { # bug 1443 [ $DETH -eq 0 ] && skip "No _iget." && return 0 BEFORE_INODES=`num_inodes` echo "before inodes: $BEFORE_INODES" - for i in `seq 1000`; do + local COUNT=1000 + [ "$SLOW" = "no" ] && COUNT=100 + for i in `seq $COUNT`; do touch $DIR/$tfile rm -f $DIR/$tfile done @@ -3277,14 +3379,22 @@ set_checksums() # In this case set_checksums() will not be no-op, because sptlrpc # bulk checksum will be enabled all through the test. - [ "$ORIG_CSUM" ] || ORIG_CSUM=`cat $LPROC/osc/*/checksums | head -n1` - for f in $LPROC/osc/*/checksums; do - echo $1 >> $f - done - + [ "$ORIG_CSUM" ] || ORIG_CSUM=`lctl get_param -n osc.*.checksums | head -n1` + lctl set_param -n osc.*.checksums $1 return 0 } +export ORIG_CSUM_TYPE="" +CKSUM_TYPES=${CKSUM_TYPES:-"crc32 adler"} +set_checksum_type() +{ + [ "$ORIG_CSUM_TYPE" ] || \ + ORIG_CSUM_TYPE=`lctl get_param -n osc/*osc-[^mM]*/checksum_type | + sed 's/.*\[\(.*\)\].*/\1/g' | head -n1` + lctl set_param -n osc.*osc-[^mM]*.checksum_type $1 + log "set checksum type to $1" + return 0 +} F77_TMP=$TMP/f77-temp F77SZ=8 setup_f77() { @@ -3297,96 +3407,138 @@ test_77a() { # bug 10889 set_checksums 1 dd if=$F77_TMP of=$DIR/$tfile bs=1M count=$F77SZ || error "dd error" set_checksums 0 + rm -f $DIR/$tfile } run_test 77a "normal checksum read/write operation =============" test_77b() { # bug 10889 [ ! -f $F77_TMP ] && setup_f77 #define OBD_FAIL_OSC_CHECKSUM_SEND 0x409 - sysctl -w lustre.fail_loc=0x80000409 + lctl set_param fail_loc=0x80000409 set_checksums 1 dd if=$F77_TMP of=$DIR/f77b bs=1M count=$F77SZ conv=sync || \ error "dd error: $?" - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 set_checksums 0 + rm -f $DIR/f77b } run_test 77b "checksum error on client write ====================" test_77c() { # bug 10889 - [ ! -f $DIR/f77b ] && skip "requires 77b - skipping" && return - cancel_lru_locks osc - #define OBD_FAIL_OSC_CHECKSUM_RECEIVE 0x408 - sysctl -w lustre.fail_loc=0x80000408 + [ ! -f $DIR/f77b ] && skip "requires 77b - skipping" && return set_checksums 1 - cmp $F77_TMP $DIR/f77b || error "file compare failed" - sysctl -w lustre.fail_loc=0 + for algo in $CKSUM_TYPES; do + cancel_lru_locks osc + set_checksum_type $algo + #define OBD_FAIL_OSC_CHECKSUM_RECEIVE 0x408 + lctl set_param fail_loc=0x80000408 + cmp $F77_TMP $DIR/f77b || error "file compare failed" + lctl set_param fail_loc=0 + done set_checksums 0 + set_checksum_type $ORIG_CSUM_TYPE } run_test 77c "checksum error on client read ===================" test_77d() { # bug 10889 #define OBD_FAIL_OSC_CHECKSUM_SEND 0x409 - sysctl -w lustre.fail_loc=0x80000409 + lctl set_param fail_loc=0x80000409 set_checksums 1 directio write $DIR/f77 0 $F77SZ $((1024 * 1024)) || \ error "direct write: rc=$?" - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 set_checksums 0 } run_test 77d "checksum error on OST direct write ===============" test_77e() { # bug 10889 - [ ! -f $DIR/f77 ] && skip "requires 77d - skipping" && return + [ ! -f $DIR/f77 ] && skip "requires 77d - skipping" && return #define OBD_FAIL_OSC_CHECKSUM_RECEIVE 0x408 - sysctl -w lustre.fail_loc=0x80000408 + lctl set_param fail_loc=0x80000408 set_checksums 1 cancel_lru_locks osc directio read $DIR/f77 0 $F77SZ $((1024 * 1024)) || \ error "direct read: rc=$?" - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 set_checksums 0 } run_test 77e "checksum error on OST direct read ================" test_77f() { # bug 10889 - #define OBD_FAIL_OSC_CHECKSUM_SEND 0x409 - sysctl -w lustre.fail_loc=0x409 set_checksums 1 - directio write $DIR/f77 0 $F77SZ $((1024 * 1024)) && \ - error "direct write succeeded" - sysctl -w lustre.fail_loc=0 + for algo in $CKSUM_TYPES; do + cancel_lru_locks osc + set_checksum_type $algo + #define OBD_FAIL_OSC_CHECKSUM_SEND 0x409 + lctl set_param fail_loc=0x409 + directio write $DIR/f77 0 $F77SZ $((1024 * 1024)) && \ + error "direct write succeeded" + lctl set_param fail_loc=0 + done + set_checksum_type $ORIG_CSUM_TYPE set_checksums 0 } run_test 77f "repeat checksum error on write (expect error) ====" test_77g() { # bug 10889 - [ $(grep -c obdfilter $LPROC/devices) -eq 0 ] && \ - skip "remote OST" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return + [ ! -f $F77_TMP ] && setup_f77 + + $SETSTRIPE $DIR/f77g -c 1 -i 0 #define OBD_FAIL_OST_CHECKSUM_RECEIVE 0x21a - sysctl -w lustre.fail_loc=0x8000021a + do_facet ost1 lctl set_param fail_loc=0x8000021a set_checksums 1 - dd if=$F77_TMP of=$DIR/f77 bs=1M count=$F77SZ || \ + dd if=$F77_TMP of=$DIR/f77g bs=1M count=$F77SZ || \ error "write error: rc=$?" - sysctl -w lustre.fail_loc=0 + do_facet ost1 lctl set_param fail_loc=0 set_checksums 0 } run_test 77g "checksum error on OST write ======================" test_77h() { # bug 10889 - [ $(grep -c obdfilter $LPROC/devices) -eq 0 ] && \ - skip "remote OST" && return - [ ! -f $DIR/f77 ] && skip "requires 77g - skipping" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return + + [ ! -f $DIR/f77g ] && skip "requires 77g - skipping" && return cancel_lru_locks osc #define OBD_FAIL_OST_CHECKSUM_SEND 0x21b - sysctl -w lustre.fail_loc=0x8000021b + do_facet ost1 lctl set_param fail_loc=0x8000021b set_checksums 1 - cmp $F77_TMP $DIR/f77 || error "file compare failed" - sysctl -w lustre.fail_loc=0 + cmp $F77_TMP $DIR/f77g || error "file compare failed" + do_facet ost1 lctl set_param fail_loc=0 set_checksums 0 } run_test 77h "checksum error on OST read =======================" +test_77i() { # bug 13805 + #define OBD_FAIL_OSC_CONNECT_CKSUM 0x40b + lctl set_param fail_loc=0x40b + remount_client $MOUNT + lctl set_param fail_loc=0 + for VALUE in `lctl get_param osc.*osc-[^mM]*.checksum_type`; do + PARAM=`echo ${VALUE[0]} | cut -d "=" -f1` + algo=`lctl get_param -n $PARAM | sed 's/.*\[\(.*\)\].*/\1/g'` + [ "$algo" = "crc32" ] || error "algo set to $algo instead of crc32" + done + remount_client $MOUNT +} +run_test 77i "client not supporting OSD_CONNECT_CKSUM ==========" + +test_77j() { # bug 13805 + #define OBD_FAIL_OSC_CKSUM_ADLER_ONLY 0x40c + lctl set_param fail_loc=0x40c + remount_client $MOUNT + lctl set_param fail_loc=0 + sleep 2 # wait async osc connect to finish + for VALUE in `lctl get_param osc.*osc-[^mM]*.checksum_type`; do + PARAM=`echo ${VALUE[0]} | cut -d "=" -f1` + algo=`lctl get_param -n $PARAM | sed 's/.*\[\(.*\)\].*/\1/g'` + [ "$algo" = "adler" ] || error "algo set to $algo instead of adler" + done + remount_client $MOUNT +} +run_test 77j "client only supporting ADLER32 ====================" + [ "$ORIG_CSUM" ] && set_checksums $ORIG_CSUM || true rm -f $F77_TMP unset F77_TMP @@ -3395,8 +3547,17 @@ test_78() { # bug 10901 NSEQ=5 F78SIZE=$(($(awk '/MemFree:/ { print $2 }' /proc/meminfo) / 1024)) echo "MemFree: $F78SIZE, Max file size: $MAXFREE" - MEMTOTAL=$(($(awk '/MemTotal:/ { print $2 }' /proc/meminfo) / 2048)) - echo "MemTotal: $((MEMTOTAL * 2))" + MEMTOTAL=$(($(awk '/MemTotal:/ { print $2 }' /proc/meminfo) / 1024)) + echo "MemTotal: $MEMTOTAL" +# reserve 256MB of memory for the kernel and other running processes, +# and then take 1/2 of the remaining memory for the read/write buffers. + if [ $MEMTOTAL -gt 512 ] ;then + MEMTOTAL=$(((MEMTOTAL - 256 ) / 2)) + else + # for those poor memory-starved high-end clusters... + MEMTOTAL=$((MEMTOTAL / 2)) + fi + echo "Mem to use for directio: $MEMTOTAL" [ $F78SIZE -gt $MEMTOTAL ] && F78SIZE=$MEMTOTAL [ $F78SIZE -gt 512 ] && F78SIZE=512 [ $F78SIZE -gt $((MAXFREE / 1024)) ] && F78SIZE=$((MAXFREE / 1024)) @@ -3405,11 +3566,12 @@ test_78() { # bug 10901 [ $SMALLESTOST -lt 10240 ] && \ skip "too small OSTSIZE, useless to run large O_DIRECT test" && return 0 - [ $F78SIZE -gt $((SMALLESTOST * $OSTCOUNT / 1024 - 5)) ] && \ - F78SIZE=$((SMALLESTOST * $OSTCOUNT / 1024 - 5)) + [ $F78SIZE -gt $((SMALLESTOST * $OSTCOUNT / 1024 - 80)) ] && \ + F78SIZE=$((SMALLESTOST * $OSTCOUNT / 1024 - 80)) + [ "$SLOW" = "no" ] && NSEQ=1 && [ $F78SIZE -gt 32 ] && F78SIZE=32 echo "File size: $F78SIZE" - $SETSTRIPE $DIR/$tfile -c -1 || error "setstripe failed" + $SETSTRIPE $DIR/$tfile -c $OSTCOUNT || error "setstripe failed" for i in `seq 1 $NSEQ` do FSIZE=$(($F78SIZE / ($NSEQ - $i + 1))) @@ -3422,17 +3584,12 @@ test_78() { # bug 10901 run_test 78 "handle large O_DIRECT writes correctly ============" test_79() { # bug 12743 - [ $(grep -c obdfilter $LPROC/devices) -eq 0 ] && - skip "skipping test for remote OST" && return - wait_delete_completed - BKTOTAL=`awk 'BEGIN{total=0}; {total+=$1}; END{print total}' \ - $LPROC/obdfilter/*/kbytestotal` - BKFREE=`awk 'BEGIN{free=0}; {free+=$1}; END{print free}' \ - $LPROC/obdfilter/*/kbytesfree` - BKAVAIL=`awk 'BEGIN{avail=0}; {avail+=$1}; END{print avail}' \ - $LPROC/obdfilter/*/kbytesavail` + BKTOTAL=$(calc_osc_kbytes kbytestotal) + BKFREE=$(calc_osc_kbytes kbytesfree) + BKAVAIL=$(calc_osc_kbytes kbytesavail) + STRING=`df -P $MOUNT | tail -n 1 | awk '{print $2","$3","$4}'` DFTOTAL=`echo $STRING | cut -d, -f1` DFUSED=`echo $STRING | cut -d, -f2` @@ -3441,21 +3598,36 @@ test_79() { # bug 12743 ALLOWANCE=$((64 * $OSTCOUNT)) - if [ $DFTOTAL -lt $(($BKTOTAL - $ALLOWANCE)) ] || + if [ $DFTOTAL -lt $(($BKTOTAL - $ALLOWANCE)) ] || [ $DFTOTAL -gt $(($BKTOTAL + $ALLOWANCE)) ] ; then error "df total($DFTOTAL) mismatch OST total($BKTOTAL)" fi - if [ $DFFREE -lt $(($BKFREE - $ALLOWANCE)) ] || + if [ $DFFREE -lt $(($BKFREE - $ALLOWANCE)) ] || [ $DFFREE -gt $(($BKFREE + $ALLOWANCE)) ] ; then error "df free($DFFREE) mismatch OST free($BKFREE)" fi - if [ $DFAVAIL -lt $(($BKAVAIL - $ALLOWANCE)) ] || + if [ $DFAVAIL -lt $(($BKAVAIL - $ALLOWANCE)) ] || [ $DFAVAIL -gt $(($BKAVAIL + $ALLOWANCE)) ] ; then error "df avail($DFAVAIL) mismatch OST avail($BKAVAIL)" fi } run_test 79 "df report consistency check =======================" +test_80() { # bug 10718 + dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 seek=1M + sync; sleep 1; sync + BEFORE=`date +%s` + cancel_lru_locks osc + AFTER=`date +%s` + DIFF=$((AFTER-BEFORE)) + if [ $DIFF -gt 1 ] ; then + error "elapsed for 1M@1T = $DIFF" + fi + true + rm -f $DIR/$tfile +} +run_test 80 "Page eviction is equally fast at high offsets too ====" + # on the LLNL clusters, runas will still pick up root's $TMP settings, # which will not be writable for the runas user, and then you get a CVS # error message with a corrupt path string (CVS bug) and panic. @@ -3475,6 +3647,7 @@ test_99a() { run_test 99a "cvs init =========================================" test_99b() { + [ -z "$(which cvs 2>/dev/null)" ] && skip "could not find cvs" && return [ ! -d $DIR/d99cvsroot ] && test_99a cd /etc/init.d # some versions of cvs import exit(1) when asked to import links or @@ -3487,6 +3660,7 @@ test_99b() { run_test 99b "cvs import =======================================" test_99c() { + [ -z "$(which cvs 2>/dev/null)" ] && skip "could not find cvs" && return [ ! -d $DIR/d99cvsroot ] && test_99b cd $DIR mkdir -p $DIR/d99reposname @@ -3496,6 +3670,7 @@ test_99c() { run_test 99c "cvs checkout =====================================" test_99d() { + [ -z "$(which cvs 2>/dev/null)" ] && skip "could not find cvs" && return [ ! -d $DIR/d99cvsroot ] && test_99c cd $DIR/d99reposname $RUNAS touch foo99 @@ -3504,6 +3679,7 @@ test_99d() { run_test 99d "cvs add ==========================================" test_99e() { + [ -z "$(which cvs 2>/dev/null)" ] && skip "could not find cvs" && return [ ! -d $DIR/d99cvsroot ] && test_99c cd $DIR/d99reposname $RUNAS cvs update @@ -3511,17 +3687,30 @@ test_99e() { run_test 99e "cvs update =======================================" test_99f() { + [ -z "$(which cvs 2>/dev/null)" ] && skip "could not find cvs" && return [ ! -d $DIR/d99cvsroot ] && test_99d cd $DIR/d99reposname $RUNAS cvs commit -m 'nomsg' foo99 + rm -fr $DIR/d99cvsroot } run_test 99f "cvs commit =======================================" test_100() { - netstat -tna | while read PROT SND RCV LOCAL REMOTE STAT; do + [ "$NETTYPE" = tcp ] || \ + { skip "TCP secure port test, not useful for NETTYPE=$NETTYPE" && \ + return ; } + + remote_ost_nodsh && skip "remote OST with nodsh" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return + remote_servers || \ + { skip "useless for local single node setup" && return; } + + netstat -tna | ( rc=1; while read PROT SND RCV LOCAL REMOTE STAT; do [ "$PROT" != "tcp" ] && continue - RPORT=`echo $REMOTE | cut -d: -f2` + RPORT=$(echo $REMOTE | cut -d: -f2) [ "$RPORT" != "$ACCEPTOR_PORT" ] && continue + + rc=0 LPORT=`echo $LOCAL | cut -d: -f2` if [ $LPORT -ge 1024 ]; then echo "bad: $PROT $SND $RCV $LOCAL $REMOTE $STAT" @@ -3529,7 +3718,7 @@ test_100() { error "local: $LPORT > 1024, remote: $RPORT" fi done - true + [ "$rc" = 0 ] || error "privileged port not found" ) } run_test 100 "check local port using privileged port ===========" @@ -3549,11 +3738,9 @@ function get_named_value() done } -export CACHE_MAX=`cat $LPROC/llite/*/max_cached_mb | head -n 1` +export CACHE_MAX=`lctl get_param -n llite.*.max_cached_mb | head -n 1` cleanup_101() { - for s in $LPROC/llite/*/max_cached_mb; do - echo $CACHE_MAX > $s - done + lctl set_param -n llite.*.max_cached_mb $CACHE_MAX trap 0 } @@ -3564,57 +3751,124 @@ test_101() { [ "$CPU" = "UML" ] && nreads=1000 local cache_limit=32 - for s in $LPROC/osc/*-osc*/rpc_stats; do - echo 0 > $s - done + lctl set_param -n osc.*-osc*.rpc_stats 0 trap cleanup_101 EXIT - for s in $LPROC/llite/*; do - echo 0 > $s/read_ahead_stats - echo $cache_limit > $s/max_cached_mb - done + lctl set_param -n llite.*.read_ahead_stats 0 + lctl set_param -n llite.*.max_cached_mb $cache_limit # # randomly read 10000 of 64K chunks from file 3x 32MB in size # echo "nreads: $nreads file size: $((cache_limit * 3))MB" - $RANDOM_READS -f $DIR/$tfile -s$((cache_limit * 3192 * 1024)) -b65536 -C -n$nreads -t 180 + $READS -f $DIR/$tfile -s$((cache_limit * 3192 * 1024)) -b65536 -C -n$nreads -t 180 discard=0 - for s in $LPROC/llite/*; do - discard=$(($discard + $(cat $s/read_ahead_stats | get_named_value 'read but discarded'))) + for s in `lctl get_param -n llite.*.read_ahead_stats | \ + get_named_value 'read but discarded' | cut -d" " -f1`; do + discard=$(($discard + $s)) done cleanup_101 if [ $(($discard * 10)) -gt $nreads ] ;then - for s in $LPROC/osc/*-osc*/rpc_stats; do - echo $s; cat $s - done - for s in $LPROC/llite/*/read_ahead_stats; do - echo $s; cat $s - done - error "too many ($discard) discarded pages" + lctl get_param osc.*-osc*.rpc_stats + lctl get_param llite.*.read_ahead_stats + error "too many ($discard) discarded pages" fi rm -f $DIR/$tfile || true } run_test 101 "check read-ahead for random reads ================" +export SETUP_TEST101=no +setup_test101() { + [ "$SETUP_TEST101" = "yes" ] && return + mkdir -p $DIR/$tdir + STRIPE_SIZE=1048576 + STRIPE_COUNT=$OSTCOUNT + STRIPE_OFFSET=0 + + trap cleanup_test101 EXIT + # prepare the read-ahead file + $SETSTRIPE $DIR/$tfile -s $STRIPE_SIZE -i $STRIPE_OFFSET -c $OSTCOUNT + + dd if=/dev/zero of=$DIR/$tfile bs=1024k count=100 2> /dev/null + SETUP_TEST101=yes +} + +cleanup_test101() { + [ "$SETUP_TEST101" = "yes" ] || return + trap 0 + rm -rf $DIR/$tdir + rm -f $DIR/$tfile + SETUP_TEST101=no +} + +calc_total() { + awk 'BEGIN{total=0}; {total+=$1}; END{print total}' +} + +ra_check_101() { + local READ_SIZE=$1 + local STRIPE_SIZE=1048576 + local RA_INC=1048576 + local STRIDE_LENGTH=$((STRIPE_SIZE/READ_SIZE)) + local FILE_LENGTH=$((64*100)) + local discard_limit=$((((STRIDE_LENGTH - 1)*3/(STRIDE_LENGTH*OSTCOUNT))* \ + (STRIDE_LENGTH*OSTCOUNT - STRIDE_LENGTH))) + DISCARD=`$LCTL get_param -n llite.*.read_ahead_stats | \ + get_named_value 'read but discarded' | \ + cut -d" " -f1 | calc_total` + + if [ $DISCARD -gt $discard_limit ]; then + lctl get_param llite.*.read_ahead_stats + error "Too many ($DISCARD) discarded pages with size (${READ_SIZE})" + else + echo "Read-ahead success for size ${READ_SIZE}" + fi +} + +test_101b() { + [ "$OSTCOUNT" -lt "2" ] && skip "skipping stride IO stride-ahead test" && return + local STRIPE_SIZE=1048576 + local STRIDE_SIZE=$((STRIPE_SIZE*OSTCOUNT)) + local FILE_LENGTH=$((STRIPE_SIZE*100)) + local ITERATION=$((FILE_LENGTH/STRIDE_SIZE)) + # prepare the read-ahead file + setup_test101 + cancel_lru_locks osc + for BIDX in 2 4 8 16 32 64 128 256 + do + local BSIZE=$((BIDX*4096)) + local READ_COUNT=$((STRIPE_SIZE/BSIZE)) + local STRIDE_LENGTH=$((STRIDE_SIZE/BSIZE)) + local OFFSET=$((STRIPE_SIZE/BSIZE*(OSTCOUNT - 1))) + $LCTL set_param -n llite.*.read_ahead_stats 0 + $READS -f $DIR/$tfile -l $STRIDE_LENGTH -o $OFFSET \ + -s $FILE_LENGTH -b $STRIPE_SIZE -a $READ_COUNT -n $ITERATION + cancel_lru_locks osc + ra_check_101 $BSIZE + done + cleanup_test101 + true +} +run_test 101b "check stride-io mode read-ahead =================" + export SETUP_TEST102=no setup_test102() { [ "$SETUP_TEST102" = "yes" ] && return mkdir -p $DIR/$tdir STRIPE_SIZE=65536 - STRIPE_COUNT=4 + STRIPE_COUNT=4 STRIPE_OFFSET=2 trap cleanup_test102 EXIT cd $DIR $SETSTRIPE $tdir -s $STRIPE_SIZE -i $STRIPE_OFFSET -c $STRIPE_COUNT - cd $DIR/$tdir + cd $DIR/$tdir for num in 1 2 3 4 do for count in 1 2 3 4 do - for offset in 0 1 2 3 + for offset in 0 1 2 3 do local stripe_size=`expr $STRIPE_SIZE \* $num` local file=file"$num-$offset-$count" @@ -3624,7 +3878,7 @@ setup_test102() { done cd $DIR - star -c f=$TMP/f102.tar $tdir + star -c f=$TMP/f102.tar $tdir SETUP_TEST102=yes } @@ -3643,7 +3897,7 @@ test_102a() { touch $testfile [ "$UID" != 0 ] && skip "must run as root" && return - [ -z "`grep xattr $LPROC/mdc/*-mdc-*/connect_flags`" ] && skip "must have user_xattr" && return + [ -z "`lctl get_param -n mdc.*-mdc-*.connect_flags | grep xattr`" ] && skip "must have user_xattr" && return [ -z "$(which setfattr 2>/dev/null)" ] && skip "could not find setfattr" && return @@ -3651,7 +3905,7 @@ test_102a() { setfattr -n trusted.name1 -v value1 $testfile || error [ "`getfattr -n trusted.name1 $testfile 2> /dev/null | \ grep "trusted.name1"`" == "trusted.name1=\"value1\"" ] || error - + setfattr -n user.author1 -v author1 $testfile || error [ "`getfattr -n user.author1 $testfile 2> /dev/null | \ grep "user.author1"`" == "user.author1=\"author1\"" ] || error @@ -3662,7 +3916,7 @@ test_102a() { [ `getfattr -d -m "^trusted" $testfile 2> /dev/null | \ grep "trusted.name" | wc -l` -eq 3 ] || error - + setfattr -n user.author2 -v author2 $testfile || error setfattr -n user.author3 -v author3 $testfile || error [ `getfattr -d -m "^user" $testfile 2> /dev/null | \ @@ -3706,6 +3960,7 @@ test_102b() { local stripe_count=`grep "count" $tmp_file| awk '{print $2}'` [ "$stripe_size" -eq 65536 ] || error "stripe size $stripe_size != 65536" [ "$stripe_count" -eq 2 ] || error "stripe count $stripe_count != 2" + rm -f $DIR/$tfile } run_test 102b "getfattr/setfattr for trusted.lov EAs ============" @@ -3735,42 +3990,25 @@ test_102c() { } run_test 102c "non-root getfattr/setfattr for lustre.lov EAs ===========" -get_stripe_info() { - stripe_size=0 - stripe_count=0 - stripe_offset=0 - local lines=`sed -n '/obdidx/=' $1` - stripe_size=`awk '{if($1~/size/) print $2}' $1` - stripe_count=`awk '{if($1~/count/) print $2}' $1` - lines=`expr $lines + 1` - stripe_offset=`sed -n ${lines}p $1 |awk '{print $1}'` -} - compare_stripe_info1() { for num in 1 2 3 4 do for count in 1 2 3 4 do - for offset in 0 1 2 3 + for offset in 0 1 2 3 do local size=`expr $STRIPE_SIZE \* $num` local file=file"$num-$offset-$count" - local tmp_file=out - $GETSTRIPE -v $file > $tmp_file - get_stripe_info $tmp_file - if test $stripe_size -ne $size - then + get_stripe_info client $file + if [ $stripe_size -ne $size ]; then error "$file: different stripe size" && return fi - if test $stripe_count -ne $count - then + if [ $stripe_count -ne $count ]; then error "$file: different stripe count" && return fi - if test $stripe_offset -ne 0 - then + if [ $stripe_index -ne 0 ]; then error "$file: different stripe offset" && return fi - rm -f $tmp_file done done done @@ -3781,26 +4019,20 @@ compare_stripe_info2() { do for count in 1 2 3 4 do - for offset in 0 1 2 3 + for offset in 0 1 2 3 do local size=`expr $STRIPE_SIZE \* $num` local file=file"$num-$offset-$count" - local tmp_file=out - $GETSTRIPE -v $file > $tmp_file - get_stripe_info $tmp_file - if test $stripe_size -ne $size - then + get_stripe_info client $file + if [ $stripe_size -ne $size ]; then error "$file: different stripe size" && return fi - if test $stripe_count -ne $count - then + if [ $stripe_count -ne $count ]; then error "$file: different stripe count" && return fi - if test $stripe_offset -ne $offset - then + if [ $stripe_index -ne $offset ]; then error "$file: different stripe offset" && return fi - rm -f $tmp_file done done done @@ -3808,7 +4040,7 @@ compare_stripe_info2() { test_102d() { # b10930: star test for trusted.lov xattr - star --xhelp 2>&1 | grep -q nolustre + star --xhelp 2>&1 | grep -q nolustre if [ $? -ne 0 ] then skip "being skipped because a lustre-aware star is not installed." && return @@ -3825,7 +4057,7 @@ run_test 102d "star restore stripe info from tarfile,not keep osts ===========" test_102e() { # b10930: star test for trusted.lov xattr - star --xhelp 2>&1 | grep -q nolustre + star --xhelp 2>&1 | grep -q nolustre [ $? -ne 0 ] && skip "lustre-aware star is not installed" && return [ "$OSTCOUNT" -lt "4" ] && skip "skipping 4-stripe test" && return setup_test102 @@ -3838,7 +4070,7 @@ run_test 102e "star restore stripe info from tarfile, keep osts ===========" test_102f() { # b10930: star test for trusted.lov xattr - star --xhelp 2>&1 | grep -q nolustre + star --xhelp 2>&1 | grep -q nolustre [ $? -ne 0 ] && skip "lustre-aware star is not installed" && return [ "$OSTCOUNT" -lt "4" ] && skip "skipping 4-stripe test" && return setup_test102 @@ -3852,7 +4084,7 @@ run_test 102f "star copy files, not keep osts ===========" test_102g() { # b10930: star test for trusted.lov xattr - star --xhelp 2>&1 | grep -q nolustre + star --xhelp 2>&1 | grep -q nolustre [ $? -ne 0 ] && skip "lustre-aware star is not installed" && return [ "$OSTCOUNT" -lt "4" ] && skip "skipping 4-stripe test" && return setup_test102 @@ -3865,6 +4097,60 @@ test_102g() { } run_test 102g "star copy files, keep osts ===========" +test_102h() { # bug 15777 + [ -z $(lctl get_param -n mdc.*.connect_flags | grep xattr) ] && + skip "must have user_xattr" && return + [ -z "$(which setfattr 2>/dev/null)" ] && + skip "could not find setfattr" && return + + XBIG=trusted.big + XSIZE=1024 + touch $DIR/$tfile + VALUE=datadatadatadatadatadatadatadata + while [ $(echo $VALUE | wc -c) -lt $XSIZE ]; do + VALUE="$VALUE$VALUE" + done + log "save $XBIG on $DIR/$tfile" + setfattr -n $XBIG -v "$VALUE" $DIR/$tfile || + error "saving $XBIG on $DIR/$tfile failed" + ORIG=$(getfattr -n $XBIG $DIR/$tfile 2> /dev/null | grep $XBIG) + OSIZE=$(echo $ORIG | wc -c) + [ $OSIZE -lt $XSIZE ] && error "set $XBIG too small ($OSIZE < $XSIZE)" + + XSML=trusted.sml + log "save $XSML on $DIR/$tfile" + setfattr -n $XSML -v val $DIR/$tfile || + error "saving $XSML on $DIR/$tfile failed" + NEW=$(getfattr -n $XBIG $DIR/$tfile 2> /dev/null | grep $XBIG) + if [ "$NEW" != "$ORIG" ]; then + log "orig: $ORIG" + log "new: $NEW" + error "$XBIG different after saving $XSML" + fi + + log "grow $XSML on $DIR/$tfile" + setfattr -n $XSML -v "$VALUE" $DIR/$tfile || + error "growing $XSML on $DIR/$tfile failed" + NEW=$(getfattr -n $XBIG $DIR/$tfile 2> /dev/null | grep $XBIG) + if [ "$NEW" != "$ORIG" ]; then + log "orig: $ORIG" + log "new: $NEW" + error "$XBIG different after growing $XSML" + fi + log "$XBIG still valid after growing $XSML" + rm -f $file +} +run_test 102h "grow xattr from inside inode to external block" + +test_102i() { # bug 17038 + touch $DIR/$tfile + ln -s $DIR/$tfile $DIR/${tfile}link + getfattr -n trusted.lov $DIR/$tfile || error "lgetxattr on $DIR/$tfile failed" + getfattr -h -n trusted.lov $DIR/${tfile}link 2>&1 | grep -i "no such attr" || error "error for lgetxattr on $DIR/${tfile}link is not ENODATA" + rm -f $DIR/$tfile $DIR/${tfile}link +} +run_test 102i "lgetxattr test on symbolic link ============" + run_acl_subtest() { $LUSTRE/tests/acl/run $LUSTRE/tests/acl/$1.test @@ -3873,7 +4159,7 @@ run_acl_subtest() test_103 () { [ "$UID" != 0 ] && skip "must run as root" && return - [ -z "$(grep acl $LPROC/mdc/*-mdc-*/connect_flags)" ] && skip "must have acl enabled" && return + [ -z "$(lctl get_param -n mdc.*-mdc-*.connect_flags | grep acl)" ] && skip "must have acl enabled" && return [ -z "$(which setfacl 2>/dev/null)" ] && skip "could not find setfacl" && return $GSS && skip "could not run under gss" && return @@ -3925,11 +4211,12 @@ test_104() { lfs df $DIR/$tfile || error "lfs df $DIR/$tfile failed" lfs df -ih $DIR/$tfile || error "lfs df -ih $DIR/$tfile failed" - OSC=`awk '/-osc-/ {print $4}' $LPROC/devices | head -n 1` + OSC=`lctl get_param -n devices | awk '/-osc-/ {print $4}' | head -n 1` lctl --device %$OSC deactivate lfs df || error "lfs df with deactivated OSC failed" lctl --device %$OSC recover lfs df || error "lfs df with reactivated OSC failed" + rm -f $DIR/$tfile } run_test 104 "lfs df [-ih] [path] test =========================" @@ -3938,10 +4225,11 @@ test_105a() { touch $DIR/$tfile if [ -n "`mount | grep \"$DIR.*flock\" | grep -v noflock`" ]; then - flocks_test on -f $DIR/$tfile || error "fail flock on" + flocks_test 1 on -f $DIR/$tfile || error "fail flock on" else - flocks_test off -f $DIR/$tfile || error "fail flock off" + flocks_test 1 off -f $DIR/$tfile || error "fail flock off" fi + rm -f $DIR/$tfile } run_test 105a "flock when mounted without -o flock test ========" @@ -3949,10 +4237,11 @@ test_105b() { touch $DIR/$tfile if [ -n "`mount | grep \"$DIR.*flock\" | grep -v noflock`" ]; then - flocks_test on -c $DIR/$tfile || error "fail flock on" + flocks_test 1 on -c $DIR/$tfile || error "fail flock on" else - flocks_test off -c $DIR/$tfile || error "fail flock off" + flocks_test 1 off -c $DIR/$tfile || error "fail flock off" fi + rm -f $DIR/$tfile } run_test 105b "fcntl when mounted without -o flock test ========" @@ -3960,14 +4249,26 @@ test_105c() { touch $DIR/$tfile if [ -n "`mount | grep \"$DIR.*flock\" | grep -v noflock`" ]; then - flocks_test on -l $DIR/$tfile || error "fail flock on" + flocks_test 1 on -l $DIR/$tfile || error "fail flock on" else - flocks_test off -l $DIR/$tfile || error "fail flock off" + flocks_test 1 off -l $DIR/$tfile || error "fail flock off" fi + rm -f $DIR/$tfile } run_test 105c "lockf when mounted without -o flock test ========" +test_105d() { # bug 15924 + mkdir -p $DIR/$tdir + [ -z "`mount | grep \"$DIR.*flock\" | grep -v noflock`" ] && \ + skip "mount w/o flock enabled" && return + #define OBD_FAIL_LDLM_CP_CB_WAIT 0x315 + $LCTL set_param fail_loc=0x80000315 + flocks_test 2 $DIR/$tdir +} +run_test 105d "flock race (should not freeze) ========" + test_106() { #bug 10921 + mkdir -p $DIR/$tdir $DIR/$tdir && error "exec $DIR/$tdir succeeded" chmod 777 $DIR/$tdir || error "chmod $DIR/$tdir failed" } @@ -3976,14 +4277,19 @@ run_test 106 "attempt exec of dir followed by chown of that dir" test_107() { CDIR=`pwd` cd $DIR + + local file=core + rm -f $file + + local save_pattern=$(sysctl -n kernel.core_pattern) + local save_uses_pid=$(sysctl -n kernel.core_uses_pid) + sysctl -w kernel.core_pattern=$file + sysctl -w kernel.core_uses_pid=0 + ulimit -c unlimited sleep 60 & SLEEPPID=$! - file=`cat /proc/sys/kernel/core_pattern` - core_pid=`cat /proc/sys/kernel/core_uses_pid` - [ $core_pid -eq 1 ] && file=$file.$SLEEPPID - rm -f $file sleep 1 kill -s 11 $SLEEPPID @@ -3995,6 +4301,8 @@ test_107() { error "Fail to create core file $file" fi rm -f $file + sysctl -w kernel.core_pattern=$save_pattern + sysctl -w kernel.core_uses_pid=$save_uses_pid cd $CDIR } run_test 107 "Coredump on SIG" @@ -4007,6 +4315,7 @@ test_110() { touch $DIR/d110/yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy && error ""create with 256 char should fail, but not ls -l $DIR/d110 + rm -fr $DIR/d110 } run_test 110 "filename length checking" @@ -4030,7 +4339,7 @@ test_115() { # don't return an error [ $OSTIO_post -eq $OSTIO_pre ] && echo \ - "FAIL: No addition ll_ost_io threads were created ($OSTIO_pre)" &&\ + "WARNING: No new ll_ost_io threads were created ($OSTIO_pre)" &&\ echo "This may be fine, depending on what ran before this test" &&\ echo "and how fast this system is." && return @@ -4040,7 +4349,7 @@ run_test 115 "verify dynamic thread creation====================" free_min_max () { wait_delete_completed - AVAIL=($(cat $LPROC/osc/*[oO][sS][cC]-[^M]*/kbytesavail)) + AVAIL=($(lctl get_param -n osc.*[oO][sS][cC]-[^M]*.kbytesavail)) echo OST kbytes available: ${AVAIL[@]} MAXI=0; MAXV=${AVAIL[0]} MINI=0; MINV=${AVAIL[0]} @@ -4053,17 +4362,16 @@ free_min_max () { MINV=${AVAIL[i]}; MINI=$i fi done - echo Min free space: OST $MINI: $MINV - echo Max free space: OST $MAXI: $MAXV + echo Min free space: OST $MINI: $MINV + echo Max free space: OST $MAXI: $MAXV } test_116() { [ "$OSTCOUNT" -lt "2" ] && skip "$OSTCOUNT < 2 OSTs" && return - remote_mds && skip "remote MDS" && return echo -n "Free space priority " - cat $LPROC/lov/*-clilov-*/qos_prio_free - DELAY=$(cat $LPROC/lov/*-clilov-*/qos_maxage | head -1 | awk '{print $1}') + lctl get_param -n lov.*-clilov-*.qos_prio_free + DELAY=$(lctl get_param -n lov.*-clilov-*.qos_maxage | head -1 | awk '{print $1}') declare -a AVAIL free_min_max [ $MINV -gt 960000 ] && skip "too much free space in OST$MINI, skip" &&\ @@ -4094,7 +4402,7 @@ test_116() { echo "ok" else echo "failed - QOS mode won't be used" - error "QOS imbalance criteria not met" + error_ignore "QOS imbalance criteria not met" return fi @@ -4119,24 +4427,26 @@ test_116() { free_min_max DIFF2=$(($MAXV - $MINV)) echo "free space delta: orig $DIFF final $DIFF2" - [ $DIFF2 -gt $DIFF ] && echo "delta got worse!" + [ $DIFF2 -gt $DIFF ] && echo "delta got worse!" DIFF=$(($MINV1 - ${AVAIL[$MINI1]})) echo "Wrote $DIFF to smaller OST $MINI1" DIFF2=$(($MAXV1 - ${AVAIL[$MAXI1]})) echo "Wrote $DIFF2 to larger OST $MAXI1" [ $DIFF -gt 0 ] && echo "Wrote $(($DIFF2 * 100 / $DIFF - 100))% more data to larger OST $MAXI1" - # Figure out which files were written where - UUID=$(awk '/'$MINI1': / {print $2; exit}' $LPROC/lov/${FSNAME}-clilov-*/target_obd) + # Figure out which files were written where + UUID=$(lctl get_param -n lov.${FSNAME}-clilov-*.target_obd | + awk '/'$MINI1': / {print $2; exit}') echo $UUID MINC=$($GETSTRIPE --obd $UUID $DIR/$tdir | wc -l) echo "$MINC files created on smaller OST $MINI1" - UUID=$(awk '/'$MAXI1': / {print $2; exit}' $LPROC/lov/${FSNAME}-clilov-*/target_obd) + UUID=$(lctl get_param -n lov.${FSNAME}-clilov-*.target_obd | + awk '/'$MAXI1': / {print $2; exit}') echo $UUID MAXC=$($GETSTRIPE --obd $UUID $DIR/$tdir | wc -l) echo "$MAXC files created on larger OST $MAXI1" [ $MINC -gt 0 ] && echo "Wrote $(($MAXC * 100 / $MINC - 100))% more files to larger OST $MAXI1" - [ $MAXC -gt $MINC ] || error "stripe QOS didn't balance free space" + [ $MAXC -gt $MINC ] || error_ignore "stripe QOS didn't balance free space" } run_test 116 "stripe QOS: free space balance ===================" @@ -4144,13 +4454,24 @@ test_117() # bug 10891 { dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 #define OBD_FAIL_OST_SETATTR_CREDITS 0x21e - sysctl -w lustre.fail_loc=0x21e + lctl set_param fail_loc=0x21e > $DIR/$tfile || error "truncate failed" - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 echo "Truncate succeeded." + rm -f $DIR/$tfile } run_test 117 "verify fsfilt_extend ==========" +export OLD_RESENDCOUNT="" +set_resend_count () { + local PROC_RESENDCOUNT="osc.${FSNAME}-OST*-osc-*.resend_count" + OLD_RESENDCOUNT=$(lctl get_param -n $PROC_RESENDCOUNT | head -1) + lctl set_param -n $PROC_RESENDCOUNT $1 + echo resend_count is set to $(lctl get_param -n $PROC_RESENDCOUNT) +} + +[ "$SLOW" = "no" ] && set_resend_count 4 # for reduce test_118* time (bug 14842) + # Reset async IO behavior after error case reset_async() { FILE=$DIR/reset_async @@ -4167,29 +4488,31 @@ test_118a() #bug 11710 reset_async multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c - DIRTY=$(grep -c dirty $LPROC/llite/*/dump_page_cache) - WRITEBACK=$(grep -c writeback $LPROC/llite/*/dump_page_cache) + DIRTY=$(lctl get_param -n llite.*.dump_page_cache | grep -c dirty) + WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | grep -c writeback) if [[ $DIRTY -ne 0 || $WRITEBACK -ne 0 ]]; then error "Dirty pages not flushed to disk, dirty=$DIRTY, writeback=$WRITEBACK" return 1; fi + rm -f $DIR/$tfile } run_test 118a "verify O_SYNC works ==========" test_118b() { - remote_ost_nodsh && skip "remote OST" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return reset_async #define OBD_FAIL_OST_ENOENT 0x217 - do_facet ost sysctl -w lustre.fail_loc=0x217 + set_nodes_failloc "$(osts_nodes)" 0x217 multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c RC=$? - do_facet ost sysctl -w lustre.fail_loc=0 - DIRTY=$(grep -c dirty $LPROC/llite/*/dump_page_cache) - WRITEBACK=$(grep -c writeback $LPROC/llite/*/dump_page_cache) + set_nodes_failloc "$(osts_nodes)" 0 + DIRTY=$(lctl get_param -n llite.*.dump_page_cache | grep -c dirty) + WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | + grep -c writeback) if [[ $RC -eq 0 ]]; then error "Must return error due to dropped pages, rc=$RC" @@ -4214,12 +4537,12 @@ run_test 118b "Reclaim dirty pages on fatal error ==========" test_118c() { - remote_ost_nodsh && skip "remote OST" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return reset_async #define OBD_FAIL_OST_EROFS 0x216 - do_facet ost sysctl -w lustre.fail_loc=0x216 + set_nodes_failloc "$(osts_nodes)" 0x216 # multiop should block due to fsync until pages are written multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c & @@ -4230,20 +4553,22 @@ test_118c() error "Multiop failed to block on fsync, pid=$MULTIPID" fi - WRITEBACK=$(grep -c writeback $LPROC/llite/*/dump_page_cache) + WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | + grep -c writeback) if [[ $WRITEBACK -eq 0 ]]; then error "No page in writeback, writeback=$WRITEBACK" fi - do_facet ost sysctl -w lustre.fail_loc=0 + set_nodes_failloc "$(osts_nodes)" 0 wait $MULTIPID RC=$? if [[ $RC -ne 0 ]]; then error "Multiop fsync failed, rc=$RC" fi - DIRTY=$(grep -c dirty $LPROC/llite/*/dump_page_cache) - WRITEBACK=$(grep -c writeback $LPROC/llite/*/dump_page_cache) + DIRTY=$(lctl get_param -n llite.*.dump_page_cache | grep -c dirty) + WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | + grep -c writeback) if [[ $DIRTY -ne 0 || $WRITEBACK -ne 0 ]]; then error "Dirty pages not flushed to disk, dirty=$DIRTY, writeback=$WRITEBACK" fi @@ -4256,12 +4581,12 @@ run_test 118c "Fsync blocks on EROFS until dirty pages are flushed ==========" test_118d() { - remote_ost_nodsh && skip "remote OST" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return reset_async #define OBD_FAIL_OST_BRW_PAUSE_BULK - do_facet ost sysctl -w lustre.fail_loc=0x214 + set_nodes_failloc "$(osts_nodes)" 0x214 # multiop should block due to fsync until pages are written multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c & MULTIPID=$! @@ -4271,16 +4596,18 @@ test_118d() error "Multiop failed to block on fsync, pid=$MULTIPID" fi - WRITEBACK=$(grep -c writeback $LPROC/llite/*/dump_page_cache) + WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | + grep -c writeback) if [[ $WRITEBACK -eq 0 ]]; then error "No page in writeback, writeback=$WRITEBACK" fi wait $MULTIPID || error "Multiop fsync failed, rc=$?" - do_facet ost sysctl -w lustre.fail_loc=0 + set_nodes_failloc "$(osts_nodes)" 0 - DIRTY=$(grep -c dirty $LPROC/llite/*/dump_page_cache) - WRITEBACK=$(grep -c writeback $LPROC/llite/*/dump_page_cache) + DIRTY=$(lctl get_param -n llite.*.dump_page_cache | grep -c dirty) + WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | + grep -c writeback) if [[ $DIRTY -ne 0 || $WRITEBACK -ne 0 ]]; then error "Dirty pages not flushed to disk, dirty=$DIRTY, writeback=$WRITEBACK" fi @@ -4295,7 +4622,7 @@ test_118f() { reset_async #define OBD_FAIL_OSC_BRW_PREP_REQ2 0x40a - sysctl -w lustre.fail_loc=0x8000040a + lctl set_param fail_loc=0x8000040a # Should simulate EINVAL error which is fatal multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c @@ -4304,11 +4631,12 @@ test_118f() { error "Must return error due to dropped pages, rc=$RC" fi - sysctl -w lustre.fail_loc=0x0 - - LOCKED=$(grep -c locked $LPROC/llite/*/dump_page_cache) - DIRTY=$(grep -c dirty $LPROC/llite/*/dump_page_cache) - WRITEBACK=$(grep -c writeback $LPROC/llite/*/dump_page_cache) + lctl set_param fail_loc=0x0 + + LOCKED=$(lctl get_param -n llite.*.dump_page_cache | grep -c locked) + DIRTY=$(lctl get_param -n llite.*.dump_page_cache | grep -c dirty) + WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | + grep -c writeback) if [[ $LOCKED -ne 0 ]]; then error "Locked pages remain in cache, locked=$LOCKED" fi @@ -4326,23 +4654,24 @@ test_118f() { run_test 118f "Simulate unrecoverable OSC side error ==========" test_118g() { - reset_async + reset_async #define OBD_FAIL_OSC_BRW_PREP_REQ 0x406 - sysctl -w lustre.fail_loc=0x406 + lctl set_param fail_loc=0x406 # simulate local -ENOMEM - multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c - RC=$? + multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c + RC=$? - sysctl -w lustre.fail_loc=0 + lctl set_param fail_loc=0 if [[ $RC -eq 0 ]]; then error "Must return error due to dropped pages, rc=$RC" fi - LOCKED=$(grep -c locked $LPROC/llite/*/dump_page_cache) - DIRTY=$(grep -c dirty $LPROC/llite/*/dump_page_cache) - WRITEBACK=$(grep -c writeback $LPROC/llite/*/dump_page_cache) + LOCKED=$(lctl get_param -n llite.*.dump_page_cache | grep -c locked) + DIRTY=$(lctl get_param -n llite.*.dump_page_cache | grep -c dirty) + WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | + grep -c writeback) if [[ $LOCKED -ne 0 ]]; then error "Locked pages remain in cache, locked=$LOCKED" fi @@ -4354,30 +4683,31 @@ test_118g() { rm -f $DIR/$tfile echo "No pages locked after fsync" - reset_async + reset_async return 0 } run_test 118g "Don't stay in wait if we got local -ENOMEM ==========" test_118h() { - remote_ost_nodsh && skip "remote OST" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return reset_async #define OBD_FAIL_OST_BRW_WRITE_BULK 0x20e - do_facet ost sysctl -w lustre.fail_loc=0x20e + set_nodes_failloc "$(osts_nodes)" 0x20e # Should simulate ENOMEM error which is recoverable and should be handled by timeout multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c RC=$? - do_facet ost sysctl -w lustre.fail_loc=0 + set_nodes_failloc "$(osts_nodes)" 0 if [[ $RC -eq 0 ]]; then error "Must return error due to dropped pages, rc=$RC" fi - LOCKED=$(grep -c locked $LPROC/llite/*/dump_page_cache) - DIRTY=$(grep -c dirty $LPROC/llite/*/dump_page_cache) - WRITEBACK=$(grep -c writeback $LPROC/llite/*/dump_page_cache) + LOCKED=$(lctl get_param -n llite.*.dump_page_cache | grep -c locked) + DIRTY=$(lctl get_param -n llite.*.dump_page_cache | grep -c dirty) + WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | + grep -c writeback) if [[ $LOCKED -ne 0 ]]; then error "Locked pages remain in cache, locked=$LOCKED" fi @@ -4394,18 +4724,18 @@ test_118h() { run_test 118h "Verify timeout in handling recoverables errors ==========" test_118i() { - remote_ost_nodsh && skip "remote OST" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return reset_async #define OBD_FAIL_OST_BRW_WRITE_BULK 0x20e - do_facet ost sysctl -w lustre.fail_loc=0x20e + set_nodes_failloc "$(osts_nodes)" 0x20e # Should simulate ENOMEM error which is recoverable and should be handled by timeout multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c & PID=$! sleep 5 - do_facet ost sysctl -w lustre.fail_loc=0 + set_nodes_failloc "$(osts_nodes)" 0 wait $PID RC=$? @@ -4413,9 +4743,9 @@ test_118i() { error "got error, but should be not, rc=$RC" fi - LOCKED=$(grep -c locked $LPROC/llite/*/dump_page_cache) - DIRTY=$(grep -c dirty $LPROC/llite/*/dump_page_cache) - WRITEBACK=$(grep -c writeback $LPROC/llite/*/dump_page_cache) + LOCKED=$(lctl get_param -n llite.*.dump_page_cache | grep -c locked) + DIRTY=$(lctl get_param -n llite.*.dump_page_cache | grep -c dirty) + WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | grep -c writeback) if [[ $LOCKED -ne 0 ]]; then error "Locked pages remain in cache, locked=$LOCKED" fi @@ -4432,24 +4762,24 @@ test_118i() { run_test 118i "Fix error before timeout in recoverable error ==========" test_118j() { - remote_ost_nodsh && skip "remote OST" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return reset_async #define OBD_FAIL_OST_BRW_WRITE_BULK2 0x220 - do_facet ost sysctl -w lustre.fail_loc=0x220 + set_nodes_failloc "$(osts_nodes)" 0x220 # return -EIO from OST multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c RC=$? - do_facet ost sysctl -w lustre.fail_loc=0x0 + set_nodes_failloc "$(osts_nodes)" 0x0 if [[ $RC -eq 0 ]]; then error "Must return error due to dropped pages, rc=$RC" fi - LOCKED=$(grep -c locked $LPROC/llite/*/dump_page_cache) - DIRTY=$(grep -c dirty $LPROC/llite/*/dump_page_cache) - WRITEBACK=$(grep -c writeback $LPROC/llite/*/dump_page_cache) + LOCKED=$(lctl get_param -n llite.*.dump_page_cache | grep -c locked) + DIRTY=$(lctl get_param -n llite.*.dump_page_cache | grep -c dirty) + WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | grep -c writeback) if [[ $LOCKED -ne 0 ]]; then error "Locked pages remain in cache, locked=$LOCKED" fi @@ -4468,8 +4798,10 @@ run_test 118j "Simulate unrecoverable OST side error ==========" test_118k() { + remote_ost_nodsh && skip "remote OSTs with nodsh" && return + #define OBD_FAIL_OST_BRW_WRITE_BULK 0x20e - do_facet ost sysctl -w lustre.fail_loc=0x20e + set_nodes_failloc "$(osts_nodes)" 0x20e mkdir -p $DIR/$tdir for ((i=0;i<10;i++)); do @@ -4481,10 +4813,12 @@ test_118k() wait $SLEEPPID done - sysctl -w lustre.fail_loc=0 + set_nodes_failloc "$(osts_nodes)" 0 } run_test 118k "bio alloc -ENOMEM and IO TERM handling =========" +[ "$SLOW" = "no" ] && [ -n "$OLD_RESENDCOUNT" ] && set_resend_count $OLD_RESENDCOUNT + test_119a() # bug 11737 { BSIZE=$((512 * 1024)) @@ -4523,170 +4857,147 @@ test_119c() # bug 13099 } run_test 119c "Testing for direct read hitting hole" -LDLM_POOL_CTL_RECALC=1 -LDLM_POOL_CTL_SHRINK=2 - -disable_pool_recalc() { - for NSD in $LPROC/ldlm/namespaces/*$1*; do - if test -f $NSD/pool/control; then - CONTROL=`cat $NSD/pool/control` - CONTROL=$((CONTROL & ~LDLM_POOL_CTL_RECALC)) - echo "$CONTROL" > $NSD/pool/control - fi - done -} - -enable_pool_recalc() { - for NSD in $LPROC/ldlm/namespaces/*$1*; do - if test -f $NSD/pool/control; then - CONTROL=`cat $NSD/pool/control` - CONTROL=$((CONTROL | LDLM_POOL_CTL_RECALC)) - echo "$CONTROL" > $NSD/pool/control - fi - done -} - -disable_pool_shrink() { - for NSD in $LPROC/ldlm/namespaces/*$1*; do - if test -f $NSD/pool/control; then - CONTROL=`cat $NSD/pool/control` - CONTROL=$((CONTROL & ~LDLM_POOL_CTL_SHRINK)) - echo "$CONTROL" > $NSD/pool/control - fi - done -} - -enable_pool_shrink() { - for NSD in $LPROC/ldlm/namespaces/*$1*; do - if test -f $NSD/pool/control; then - CONTROL=`cat $NSD/pool/control` - CONTROL=$((CONTROL | LDLM_POOL_CTL_SHRINK)) - echo "$CONTROL" > $NSD/pool/control - fi - done -} - -disable_pool() { - disable_pool_shrink $1 - disable_pool_recalc $1 -} - -enable_pool() { - enable_pool_shrink $1 - enable_pool_recalc $1 -} - -lru_resize_enable() -{ - enable_pool osc - enable_pool "filter-$FSNAME" - enable_pool mdc - enable_pool "mds-$FSNAME" -} - -lru_resize_disable() +test_119d() # bug 15950 { - disable_pool osc - disable_pool "filter-$FSNAME" - disable_pool mdc - disable_pool "mds-$FSNAME" + MAX_RPCS_IN_FLIGHT=`$LCTL get_param -n osc.*OST0000-osc-[^mM]*.max_rpcs_in_flight` + $LCTL set_param -n osc.*OST0000-osc-[^mM]*.max_rpcs_in_flight 1 + BSIZE=1048576 + $SETSTRIPE $DIR/$tfile -i 0 -c 1 || error "setstripe failed" + $DIRECTIO write $DIR/$tfile 0 1 $BSIZE || error "first directio failed" + #define OBD_FAIL_OSC_DIO_PAUSE 0x40d + lctl set_param fail_loc=0x40d + $DIRECTIO write $DIR/$tfile 1 4 $BSIZE & + pid_dio=$! + sleep 1 + cat $DIR/$tfile > /dev/null & + lctl set_param fail_loc=0 + pid_reads=$! + wait $pid_dio + log "the DIO writes have completed, now wait for the reads (should not block very long)" + sleep 2 + [ -n "`ps h -p $pid_reads -o comm`" ] && \ + error "the read rpcs have not completed in 2s" + rm -f $DIR/$tfile + $LCTL set_param -n osc.*OST0000-osc-[^mM]*.max_rpcs_in_flight $MAX_RPCS_IN_FLIGHT } +run_test 119d "The DIO path should try to send a new rpc once one is completed" test_120a() { - [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ + mkdir -p $DIR/$tdir + [ -z "`lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc cancel_lru_locks mdc stat $DIR/$tdir > /dev/null - can1=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk1=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can1=`lctl get_param -n ldlm.services.ldlm_canceld.stats | awk '/ldlm_cancel/ {print $2}'` + blk1=`lctl get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}'` mkdir $DIR/$tdir/d1 - can2=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can2=`lctl get_param -n ldlm.services.ldlm_canceld.stats | awk '/ldlm_cancel/ {print $2}'` + blk2=`lctl get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}'` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120a "Early Lock Cancel: mkdir test" test_120b() { - [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ + mkdir -p $DIR/$tdir + [ -z "`lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc cancel_lru_locks mdc stat $DIR/$tdir > /dev/null - can1=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk1=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can1=`lctl get_param -n ldlm.services.ldlm_canceld.stats | awk '/ldlm_cancel/ {print $2}'` + blk1=`lctl get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}'` touch $DIR/$tdir/f1 - blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` - can2=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` + can2=`lctl get_param -n ldlm.services.ldlm_canceld.stats | awk '/ldlm_cancel/ {print $2}'` + blk2=`lctl get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}'` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120b "Early Lock Cancel: create test" test_120c() { - [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ + mkdir -p $DIR/$tdir + [ -z "`lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc mkdir -p $DIR/$tdir/d1 $DIR/$tdir/d2 touch $DIR/$tdir/d1/f1 cancel_lru_locks mdc stat $DIR/$tdir/d1 $DIR/$tdir/d2 $DIR/$tdir/d1/f1 > /dev/null - can1=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk1=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can1=`lctl get_param -n ldlm.services.ldlm_canceld.stats | awk '/ldlm_cancel/ {print $2}'` + blk1=`lctl get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}'` ln $DIR/$tdir/d1/f1 $DIR/$tdir/d2/f2 - can2=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can2=`lctl get_param -n ldlm.services.ldlm_canceld.stats | awk '/ldlm_cancel/ {print $2}'` + blk2=`lctl get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}'` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120c "Early Lock Cancel: link test" test_120d() { - [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ + mkdir -p $DIR/$tdir + [ -z "`lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc touch $DIR/$tdir cancel_lru_locks mdc stat $DIR/$tdir > /dev/null - can1=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk1=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can1=`lctl get_param -n ldlm.services.ldlm_canceld.stats | awk '/ldlm_cancel/ {print $2}'` + blk1=`lctl get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}'` chmod a+x $DIR/$tdir - can2=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can2=`lctl get_param -n ldlm.services.ldlm_canceld.stats | awk '/ldlm_cancel/ {print $2}'` + blk2=`lctl get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}'` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120d "Early Lock Cancel: setattr test" test_120e() { - [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ + mkdir -p $DIR/$tdir + [ -z "`lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc dd if=/dev/zero of=$DIR/$tdir/f1 count=1 cancel_lru_locks mdc cancel_lru_locks osc dd if=$DIR/$tdir/f1 of=/dev/null stat $DIR/$tdir $DIR/$tdir/f1 > /dev/null - can1=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk1=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can1=`lctl get_param -n ldlm.services.ldlm_canceld.stats | + awk '/ldlm_cancel/ {print $2}'` + blk1=`lctl get_param -n ldlm.services.ldlm_cbd.stats | + awk '/ldlm_bl_callback/ {print $2}'` unlink $DIR/$tdir/f1 - can2=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can2=`lctl get_param -n ldlm.services.ldlm_canceld.stats | + awk '/ldlm_cancel/ {print $2}'` + blk2=`lctl get_param -n ldlm.services.ldlm_cbd.stats | + awk '/ldlm_bl_callback/ {print $2}'` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120e "Early Lock Cancel: unlink test" test_120f() { - [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ + [ -z "`lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + mkdir -p $DIR/$tdir + lru_resize_disable mdc + lru_resize_disable osc mkdir -p $DIR/$tdir/d1 $DIR/$tdir/d2 dd if=/dev/zero of=$DIR/$tdir/d1/f1 count=1 dd if=/dev/zero of=$DIR/$tdir/d2/f2 count=1 @@ -4695,21 +5006,27 @@ test_120f() { dd if=$DIR/$tdir/d1/f1 of=/dev/null dd if=$DIR/$tdir/d2/f2 of=/dev/null stat $DIR/$tdir/d1 $DIR/$tdir/d2 $DIR/$tdir/d1/f1 $DIR/$tdir/d2/f2 > /dev/null - can1=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk1=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can1=`lctl get_param -n ldlm.services.ldlm_canceld.stats | + awk '/ldlm_cancel/ {print $2}'` + blk1=`lctl get_param -n ldlm.services.ldlm_cbd.stats | + awk '/ldlm_bl_callback/ {print $2}'` mv $DIR/$tdir/d1/f1 $DIR/$tdir/d2/f2 - can2=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can2=`lctl get_param -n ldlm.services.ldlm_canceld.stats | + awk '/ldlm_cancel/ {print $2}'` + blk2=`lctl get_param -n ldlm.services.ldlm_cbd.stats | + awk '/ldlm_bl_callback/ {print $2}'` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120f "Early Lock Cancel: rename test" test_120g() { - [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ + [ -z "`lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc count=10000 echo create $count files mkdir -p $DIR/$tdir @@ -4717,220 +5034,297 @@ test_120g() { cancel_lru_locks osc t0=`date +%s` - can0=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk0=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can0=`lctl get_param -n ldlm.services.ldlm_canceld.stats | + awk '/ldlm_cancel/ {print $2}'` + blk0=`lctl get_param -n ldlm.services.ldlm_cbd.stats | + awk '/ldlm_bl_callback/ {print $2}'` createmany -o $DIR/$tdir/f $count sync - can1=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk1=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can1=`lctl get_param -n ldlm.services.ldlm_canceld.stats | + awk '/ldlm_cancel/ {print $2}'` + blk1=`lctl get_param -n ldlm.services.ldlm_cbd.stats | + awk '/ldlm_bl_callback/ {print $2}'` t1=`date +%s` echo total: $((can1-can0)) cancels, $((blk1-blk0)) blockings echo rm $count files rm -r $DIR/$tdir sync - can2=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` - blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` + can2=`lctl get_param -n ldlm.services.ldlm_canceld.stats | + awk '/ldlm_cancel/ {print $2}'` + blk2=`lctl get_param -n ldlm.services.ldlm_cbd.stats | + awk '/ldlm_bl_callback/ {print $2}'` t2=`date +%s` echo total: $count removes in $((t2-t1)) echo total: $((can2-can1)) cancels, $((blk2-blk1)) blockings sleep 2 # wait for commitment of removal - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120g "Early Lock Cancel: performance test" test_121() { #bug #10589 rm -rf $DIR/$tfile - writes=`dd if=/dev/zero of=$DIR/$tfile count=1 2>&1 | awk 'BEGIN { FS="+" } /out/ {print $1}'` + writes=$(LANG=C dd if=/dev/zero of=$DIR/$tfile count=1 2>&1 | awk -F '+' '/out/ {print $1}') #define OBD_FAIL_LDLM_CANCEL_RACE 0x310 - sysctl -w lustre.fail_loc=0x310 + lctl set_param fail_loc=0x310 cancel_lru_locks osc > /dev/null - reads=`dd if=$DIR/$tfile of=/dev/null 2>&1 | awk 'BEGIN { FS="+" } /in/ {print $1}'` - sysctl -w lustre.fail_loc=0 + reads=$(LANG=C dd if=$DIR/$tfile of=/dev/null 2>&1 | awk -F '+' '/in/ {print $1}') + lctl set_param fail_loc=0 [ "$reads" -eq "$writes" ] || error "read" $reads "blocks, must be" $writes } run_test 121 "read cancel race =========" -cmd_cancel_lru_locks() { - NS=$1 - test "x$NS" = "x" && NS="mdc" - for d in `find $LPROC/ldlm/namespaces | grep $NS`; do - if test -f $d/lru_size; then - cancel_lru_locks $d +test_123a() { # was test 123, statahead(bug 11401) + SLOWOK=0 + if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then + log "testing on UP system. Performance may be not as good as expected." + SLOWOK=1 + fi + + mkdir -p $DIR/$tdir + rm -rf $DIR/$tdir/* + cancel_lru_locks mdc + cancel_lru_locks osc + error=0 + NUMFREE=`df -i -P $DIR | tail -n 1 | awk '{ print $4 }'` + [ $NUMFREE -gt 100000 ] && NUMFREE=100000 || NUMFREE=$((NUMFREE-1000)) + MULT=10 + for ((i=1, j=0; i<=$NUMFREE; j=$i, i=$((i * MULT)) )); do + createmany -o $DIR/$tdir/$tfile $j $((i - j)) + + lctl get_param -n llite.*.statahead_max | grep '[0-9]' + cancel_lru_locks mdc + cancel_lru_locks osc + stime=`date +%s` + ls -l $DIR/$tdir > /dev/null + etime=`date +%s` + delta_sa=$((etime - stime)) + log "ls $i files with statahead: $delta_sa sec" + lctl get_param -n llite.*.statahead_stats + + max=`lctl get_param -n llite.*.statahead_max | head -n 1` + lctl set_param -n llite.*.statahead_max 0 + lctl get_param llite.*.statahead_max + cancel_lru_locks mdc + cancel_lru_locks osc + stime=`date +%s` + ls -l $DIR/$tdir > /dev/null + etime=`date +%s` + delta=$((etime - stime)) + log "ls $i files without statahead: $delta sec" + + lctl set_param llite.*.statahead_max=$max + if [ $delta_sa -gt $(($delta + 2)) ]; then + log "ls $i files is slower with statahead!" + error=1 fi + + [ $delta -gt 20 ] && break + [ $delta -gt 8 ] && MULT=$((50 / delta)) + [ "$SLOW" = "no" -a $delta -gt 3 ] && break done + log "ls done" + + stime=`date +%s` + rm -r $DIR/$tdir + sync + etime=`date +%s` + delta=$((etime - stime)) + log "rm -r $DIR/$tdir/: $delta seconds" + log "rm done" + lctl get_param -n llite.*.statahead_stats + # wait for commitment of removal + sleep 2 + [ $error -ne 0 -a $SLOWOK -eq 0 ] && error "statahead is slow!" + return 0 } +run_test 123a "verify statahead work" -test_124a() { - [ -z "`grep lru_resize $LPROC/mdc/*/connect_flags`" ] && \ - skip "no lru resize on server" && return 0 - cmd_cancel_lru_locks "mdc" - lru_resize_enable +test_123b () { # statahead(bug 15027) + mkdir -p $DIR/$tdir + createmany -o $DIR/$tdir/$tfile-%d 1000 + + cancel_lru_locks mdc + cancel_lru_locks osc + +#define OBD_FAIL_MDC_GETATTR_ENQUEUE 0x803 + lctl set_param fail_loc=0x80000803 + ls -lR $DIR/$tdir > /dev/null + log "ls done" + lctl set_param fail_loc=0x0 + lctl get_param -n llite.*.statahead_stats + rm -r $DIR/$tdir + sync - # we want to test main pool functionality, that is cancel based on SLV - # this is why shrinkers are disabled - disable_pool_shrink "mds-$FSNAME" - disable_pool_shrink mdc +} +run_test 123b "not panic with network error in statahead enqueue (bug 15027)" - NR=2000 +test_124a() { + [ -z "`lctl get_param -n mdc.*.connect_flags | grep lru_resize`" ] && \ + skip "no lru resize on server" && return 0 + local NR=2000 mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir" - # use touch to produce $NR new locks log "create $NR files at $DIR/$tdir" - for ((i=0;i<$NR;i++)); do touch $DIR/$tdir/f$i; done - - NSDIR="" - LRU_SIZE=0 - for d in `find $LPROC/ldlm/namespaces | grep mdc-`; do - if test -f $d/lru_size; then - LRU_SIZE=`cat $d/lru_size` - if test $LRU_SIZE -gt 0; then - log "using $d namespace" - NSDIR=$d - break - fi + createmany -o $DIR/$tdir/f $NR || + error "failed to create $NR files in $DIR/$tdir" + + cancel_lru_locks mdc + ls -l $DIR/$tdir > /dev/null + + local NSDIR="" + local LRU_SIZE=0 + for VALUE in `lctl get_param ldlm.namespaces.*mdc-*.lru_size`; do + local PARAM=`echo ${VALUE[0]} | cut -d "=" -f1` + LRU_SIZE=$(lctl get_param -n $PARAM) + if [ $LRU_SIZE -gt $(default_lru_size) ]; then + NSDIR=$(echo $PARAM | cut -d "." -f1-3) + log "NS=$(basename $NSDIR)" + break fi done - if test -z $NSDIR; then - skip "No cached locks created!" - return 0 - fi - - if test $LRU_SIZE -lt 100; then + if [ -z "$NSDIR" -o $LRU_SIZE -lt $(default_lru_size) ]; then skip "Not enough cached locks created!" return 0 fi - log "created $LRU_SIZE lock(s)" - - # we want to sleep 30s to not make test too long - SLEEP=30 - SLEEP_ADD=2 - - # we know that lru resize allows one client to hold $LIMIT locks for 10h - MAX_HRS=10 - - # get the pool limit - LIMIT=`cat $NSDIR/pool/limit` - - # calculate lock volume factor taking into account data set size and the - # rule that number of locks will be getting smaller durring sleep interval - # and we need to additionally enforce LVF to take this into account. - # Use $LRU_SIZE_B here to take into account real number of locks created - # in the case of CMD, LRU_SIZE_B != $NR in most of cases - LVF=$(($MAX_HRS * 60 * 60 * $LIMIT / $SLEEP)) - LRU_SIZE_B=$LRU_SIZE - log "make client drop locks $LVF times faster so that ${SLEEP}s is enough to cancel $LRU_SIZE_B lock(s)" - OLD_LVF=`cat $NSDIR/pool/lock_volume_factor` - echo "$LVF" > $NSDIR/pool/lock_volume_factor - log "sleep for $((SLEEP+SLEEP_ADD))s" - sleep $((SLEEP+SLEEP_ADD)) - echo "$OLD_LVF" > $NSDIR/pool/lock_volume_factor - LRU_SIZE_A=`cat $NSDIR/lru_size` + log "LRU=$LRU_SIZE" + + local SLEEP=30 + + # We know that lru resize allows one client to hold $LIMIT locks + # for 10h. After that locks begin to be killed by client. + local MAX_HRS=10 + local LIMIT=`lctl get_param -n $NSDIR.pool.limit` + + # Make LVF so higher that sleeping for $SLEEP is enough to _start_ + # killing locks. Some time was spent for creating locks. This means + # that up to the moment of sleep finish we must have killed some of + # them (10-100 locks). This depends on how fast ther were created. + # Many of them were touched in almost the same moment and thus will + # be killed in groups. + local LVF=$(($MAX_HRS * 60 * 60 / $SLEEP)) + + # Use $LRU_SIZE_B here to take into account real number of locks + # created in the case of CMD, LRU_SIZE_B != $NR in most of cases + local LRU_SIZE_B=$LRU_SIZE + log "LVF=$LVF" + local OLD_LVF=`lctl get_param -n $NSDIR.pool.lock_volume_factor` + lctl set_param -n $NSDIR.pool.lock_volume_factor $LVF + + # Let's make sure that we really have some margin. Client checks + # cached locks every 10 sec. + SLEEP=$((SLEEP+20)) + log "Sleep ${SLEEP} sec" + local SEC=0 + while ((SEC<$SLEEP)); do + echo -n "..." + sleep 5 + SEC=$((SEC+5)) + LRU_SIZE=`lctl get_param -n $NSDIR/lru_size` + echo -n "$LRU_SIZE" + done + echo "" + lctl set_param -n $NSDIR.pool.lock_volume_factor $OLD_LVF + local LRU_SIZE_A=`lctl get_param -n $NSDIR/lru_size` [ $LRU_SIZE_B -gt $LRU_SIZE_A ] || { - error "No locks dropped in "$((SLEEP+SLEEP_ADD))"s. LRU size: $LRU_SIZE_A" - lru_resize_enable + error "No locks dropped in ${SLEEP}s. LRU size: $LRU_SIZE_A" unlinkmany $DIR/$tdir/f $NR return } - log "Dropped "$((LRU_SIZE_B-LRU_SIZE_A))" locks in "$((SLEEP+SLEEP_ADD))"s" - lru_resize_enable + log "Dropped "$((LRU_SIZE_B-LRU_SIZE_A))" locks in ${SLEEP}s" log "unlink $NR files at $DIR/$tdir" unlinkmany $DIR/$tdir/f $NR } run_test 124a "lru resize =======================================" -set_lru_size() { - NS=$1 - SIZE=$2 - test "x$NS" = "x" && NS="mdc" - test "x$SIZE" = "x" && SIZE="0" - test $SIZE -lt 0 && SIZE="0" - test $SIZE -gt 0 && ACTION="disabled" || ACTION="enabled" - for d in `find $LPROC/ldlm/namespaces | grep $NS`; do - if test -f $d/lru_size; then - log "$(basename $d):" - log " lru resize $ACTION" - log " lru_size=$SIZE" - echo $SIZE > $d/lru_size - fi - done -} - -get_lru_size() { - NS=$1 - test "x$NS" = "x" && NS="mdc" - for d in `find $LPROC/ldlm/namespaces | grep $NS`; do - if test -f $d/lru_size; then - log "$(basename $d):" - log " lru_size=$(cat $d/lru_size)" +get_max_pool_limit() +{ + local limit=`lctl get_param -n ldlm.namespaces.*-MDT0000-mdc-*.pool.limit` + local max=0 + for l in $limit; do + if test $l -gt $max; then + max=$l fi done + echo $max } test_124b() { - [ -z "`grep lru_resize $LPROC/mdc/*/connect_flags`" ] && \ + [ -z "`lctl get_param -n mdc.*.connect_flags | grep lru_resize`" ] && \ skip "no lru resize on server" && return 0 - NSDIR=`find $LPROC/ldlm/namespaces | grep mdc | head -1` - LIMIT=`cat $NSDIR/pool/limit` - - NR_CPU=$(awk '/processor/' /proc/cpuinfo | wc -l) - # 100 locks here is default value for non-shrinkable lru as well - # as the order to switch to static lru managing policy - # define LDLM_DEFAULT_LRU_SIZE (100 * num_online_cpus()) - LDLM_DEFAULT_LRU_SIZE=$((100 * NR_CPU)) + LIMIT=`get_max_pool_limit` - NR=$((LIMIT-(LIMIT/3))) - log "starting lru resize disable cycle" - set_lru_size "mdc-" $LDLM_DEFAULT_LRU_SIZE - - mkdir -p $DIR/$tdir/disable_lru_resize || - error "failed to create $DIR/$tdir/disable_lru_resize" + NR=$(($(default_lru_size)*20)) + if [ $NR -gt $LIMIT ]; then + log "Limit lock number by $LIMIT locks" + NR=$LIMIT + fi + lru_resize_disable mdc + mkdir -p $DIR/$tdir/disable_lru_resize || + error "failed to create $DIR/$tdir/disable_lru_resize" createmany -o $DIR/$tdir/disable_lru_resize/f $NR log "doing ls -la $DIR/$tdir/disable_lru_resize 3 times" + cancel_lru_locks mdc stime=`date +%s` - ls -la $DIR/$tdir/disable_lru_resize > /dev/null - ls -la $DIR/$tdir/disable_lru_resize > /dev/null - ls -la $DIR/$tdir/disable_lru_resize > /dev/null + PID="" + ls -la $DIR/$tdir/disable_lru_resize > /dev/null & + PID="$PID $!" + sleep 2 + ls -la $DIR/$tdir/disable_lru_resize > /dev/null & + PID="$PID $!" + sleep 2 + ls -la $DIR/$tdir/disable_lru_resize > /dev/null & + PID="$PID $!" + wait $PID etime=`date +%s` nolruresize_delta=$((etime-stime)) log "ls -la time: $nolruresize_delta seconds" - get_lru_size "mdc-" - - log "starting lru resize enable cycle" - mkdir -p $DIR/$tdir/enable_lru_resize || - error "failed to create $DIR/$tdir/enable_lru_resize" + log "lru_size = $(lctl get_param -n ldlm.namespaces.*mdc*.lru_size)" + unlinkmany $DIR/$tdir/disable_lru_resize/f $NR - # 0 locks means here flush lru and switch to lru resize policy - set_lru_size "mdc-" 0 + lru_resize_enable mdc + mkdir -p $DIR/$tdir/enable_lru_resize || + error "failed to create $DIR/$tdir/enable_lru_resize" createmany -o $DIR/$tdir/enable_lru_resize/f $NR log "doing ls -la $DIR/$tdir/enable_lru_resize 3 times" + cancel_lru_locks mdc stime=`date +%s` - ls -la $DIR/$tdir/enable_lru_resize > /dev/null - ls -la $DIR/$tdir/enable_lru_resize > /dev/null - ls -la $DIR/$tdir/enable_lru_resize > /dev/null + PID="" + ls -la $DIR/$tdir/enable_lru_resize > /dev/null & + PID="$PID $!" + sleep 2 + ls -la $DIR/$tdir/enable_lru_resize > /dev/null & + PID="$PID $!" + sleep 2 + ls -la $DIR/$tdir/enable_lru_resize > /dev/null & + PID="$PID $!" + wait $PID etime=`date +%s` lruresize_delta=$((etime-stime)) log "ls -la time: $lruresize_delta seconds" - get_lru_size "mdc-" + log "lru_size = $(lctl get_param -n ldlm.namespaces.*mdc*.lru_size)" - if test $lruresize_delta -gt $nolruresize_delta; then + if [ $lruresize_delta -gt $nolruresize_delta ]; then log "ls -la is $(((lruresize_delta - $nolruresize_delta) * 100 / $nolruresize_delta))% slower with lru resize enabled" - elif test $nolruresize_delta -gt $lruresize_delta; then + elif [ $nolruresize_delta -gt $lruresize_delta ]; then log "ls -la is $(((nolruresize_delta - $lruresize_delta) * 100 / $nolruresize_delta))% faster with lru resize enabled" else log "lru resize performs the same with no lru resize" fi + unlinkmany $DIR/$tdir/enable_lru_resize/f $NR } run_test 124b "lru resize (performance test) =======================" test_125() { # 13358 - [ -z "$(grep acl $LPROC/mdc/*-mdc-*/connect_flags)" ] && skip "must have acl enabled" && return + [ -z "$(lctl get_param -n llite.*.client_type | grep local)" ] && skip "must run as local client" && return + [ -z "$(lctl get_param -n mdc.*-mdc-*.connect_flags | grep acl)" ] && skip "must have acl enabled" && return mkdir -p $DIR/d125 || error "mkdir failed" $SETSTRIPE $DIR/d125 -s 65536 -c -1 || error "setstripe failed" setfacl -R -m u:bin:rwx $DIR/d125 || error "setfacl $DIR/d125 failed" @@ -4939,7 +5333,10 @@ test_125() { # 13358 run_test 125 "don't return EPROTO when a dir has a non-default striping and ACLs" test_126() { # bug 12829/13455 + [ -z "$(lctl get_param -n llite.*.client_type | grep local)" ] && skip "must run as local client" && return [ "$UID" != 0 ] && echo "skipping $TESTNAME (must run as root)" && return + $GSS && skip "must run as gss disabled" && return + $RUNAS -u 0 -g 1 touch $DIR/$tfile || error "touch failed" gid=`ls -n $DIR/$tfile | awk '{print $4}'` rm -f $DIR/$tfile @@ -4947,6 +5344,656 @@ test_126() { # bug 12829/13455 } run_test 126 "check that the fsgid provided by the client is taken into account" +test_127() { # bug 15521 + $LSTRIPE -i 0 -c 1 $DIR/$tfile + $LCTL set_param osc.*.stats=0 + FSIZE=$((2048 * 1024)) + dd if=/dev/zero of=$DIR/$tfile bs=$FSIZE count=1 + cancel_lru_locks osc + dd if=$DIR/$tfile of=/dev/null bs=$FSIZE + + $LCTL get_param osc.*0000-osc-*.stats | grep samples > $DIR/${tfile}.tmp + while read NAME COUNT SAMP UNIT MIN MAX SUM SUMSQ; do + echo "got $COUNT $NAME" + [ ! $MIN ] && error "Missing min value for $NAME proc entry" + eval $NAME=$COUNT || error "Wrong proc format" + + case $NAME in + read_bytes|write_bytes) + [ $MIN -lt 4096 ] && error "min is too small: $MIN" + [ $MIN -gt $FSIZE ] && error "min is too big: $MIN" + [ $MAX -lt 4096 ] && error "max is too small: $MAX" + [ $MAX -gt $FSIZE ] && error "max is too big: $MAX" + [ $SUM -ne $FSIZE ] && error "sum is wrong: $SUM" + [ $SUMSQ -lt $(((FSIZE /4096) * (4096 * 4096))) ] && + error "sumsquare is too small: $SUMSQ" + [ $SUMSQ -gt $((FSIZE * FSIZE)) ] && + error "sumsquare is too big: $SUMSQ" + ;; + *) ;; + esac + done < $DIR/${tfile}.tmp + + #check that we actually got some stats + [ "$read_bytes" ] || error "Missing read_bytes stats" + [ "$write_bytes" ] || error "Missing write_bytes stats" + [ "$read_bytes" != 0 ] || error "no read done" + [ "$write_bytes" != 0 ] || error "no write done" +} +run_test 127 "verify the client stats are sane" + +test_128() { # bug 15212 + touch $DIR/$tfile + $LFS 2>&1 <<-EOF | tee $TMP/$tfile.log + find $DIR/$tfile + find $DIR/$tfile + EOF + + result=$(grep error $TMP/$tfile.log) + rm -f $DIR/$tfile + [ -z "$result" ] || error "consecutive find's under interactive lfs failed" +} +run_test 128 "interactive lfs for 2 consecutive find's" + +set_dir_limits () { + local mntdev + local node + + local LDPROC=/proc/fs/ldiskfs + + for node in $(mdts_nodes); do + devs=$(do_node $node "lctl get_param -n devices" | awk '($3 ~ "mdt" && $4 ~ "MDT") { print $4 }') + for dev in $devs; do + mntdev=$(do_node $node "lctl get_param -n osd.$dev.mntdev") + do_node $node "echo $1 >$LDPROC/\\\$(basename $mntdev)/max_dir_size" + done + done +} +test_129() { + [ "$FSTYPE" != "ldiskfs" ] && skip "not needed for FSTYPE=$FSTYPE" && return 0 + remote_mds_nodsh && skip "remote MDS with nodsh" && return + + EFBIG=27 + MAX=16384 + + set_dir_limits $MAX + + mkdir -p $DIR/$tdir + + I=0 + J=0 + while [ ! $I -gt $((MAX * MDSCOUNT)) ]; do + multiop $DIR/$tdir/$J Oc + rc=$? + if [ $rc -eq $EFBIG ]; then + set_dir_limits 0 + echo "return code $rc received as expected" + return 0 + elif [ $rc -ne 0 ]; then + set_dir_limits 0 + error_exit "return code $rc received instead of expected $EFBIG" + fi + J=$((J+1)) + I=$(stat -c%s "$DIR/$tdir") + done + + error "exceeded dir size limit $MAX x $MDSCOUNT $((MAX * MDSCOUNT)) : $I bytes" + do_facet $SINGLEMDS "echo 0 >$LDPROC" +} +run_test 129 "test directory size limit ========================" + +test_130a() { + filefrag_op=$(filefrag -e 2>&1 | grep "invalid option") + [ -n "$filefrag_op" ] && skip "filefrag does not support FIEMAP" && return + + local fm_file=$DIR/$tfile + lfs setstripe -s 65536 -c 1 $fm_file || error "setstripe failed on $fm_file" + dd if=/dev/zero of=$fm_file bs=65536 count=1 || error "dd failed for $fm_file" + + filefrag -ves $fm_file || error "filefrag $fm_file failed" + filefrag_op=`filefrag -ve $fm_file | grep -A 100 "ext:" | grep -v "ext:" | grep -v "found"` + + lun=`$GETSTRIPE $fm_file | grep -A 10 obdidx | awk '{print $1}' | grep -v "obdidx"` + + start_blk=`echo $filefrag_op | cut -d: -f2 | cut -d. -f1` + IFS=$'\n' + tot_len=0 + for line in $filefrag_op + do + frag_lun=`echo $line | cut -d: -f5` + ext_len=`echo $line | cut -d: -f4` + if (( $frag_lun != $lun )); then + error "FIEMAP on 1-stripe file($fm_file) failed" + return + fi + (( tot_len += ext_len )) + done + + if (( lun != frag_lun || start_blk != 0 || tot_len != 64 )); then + error "FIEMAP on 1-stripe file($fm_file) failed;" + return + fi + echo "FIEMAP on single striped file succeeded" +} +run_test 130a "FIEMAP (1-stripe file)" + +test_130b() { + [ "$OSTCOUNT" -lt "2" ] && skip "skipping FIEMAP on 2-stripe file test" && return + + filefrag_op=$(filefrag -e 2>&1 | grep "invalid option") + [ -n "$filefrag_op" ] && skip "filefrag does not support FIEMAP" && return + + local fm_file=$DIR/$tfile + lfs setstripe -s 65536 -c 2 $fm_file || error "setstripe failed on $fm_file" + dd if=/dev/zero of=$fm_file bs=1M count=2 || error "dd failed on $fm_file" + + filefrag -ves $fm_file || error "filefrag $fm_file failed" + filefrag_op=`filefrag -ve $fm_file | grep -A 100 "ext:" | grep -v "ext:" | grep -v "found"` + + last_lun=`echo $filefrag_op | cut -d: -f5` + + IFS=$'\n' + tot_len=0 + num_luns=1 + for line in $filefrag_op + do + frag_lun=`echo $line | cut -d: -f5` + ext_len=`echo $line | cut -d: -f4` + if (( $frag_lun != $last_lun )); then + if (( tot_len != 1024 )); then + error "FIEMAP on $fm_file failed; returned len $tot_len for OST $last_lun instead of 256" + return + else + (( num_luns += 1 )) + tot_len=0 + fi + fi + (( tot_len += ext_len )) + last_lun=$frag_lun + done + if (( num_luns != 2 || tot_len != 1024 )); then + error "FIEMAP on $fm_file failed; returned wrong number of luns or wrong len for OST $last_lun" + return + fi + + echo "FIEMAP on 2-stripe file succeeded" +} +run_test 130b "FIEMAP (2-stripe file)" + +test_130c() { + [ "$OSTCOUNT" -lt "2" ] && skip "skipping FIEMAP on 2-stripe file with hole test" && return + + filefrag_op=$(filefrag -e 2>&1 | grep "invalid option") + [ -n "$filefrag_op" ] && skip "filefrag does not support FIEMAP" && return + + local fm_file=$DIR/$tfile + lfs setstripe -s 65536 -c 2 $fm_file || error "setstripe failed on $fm_file" + dd if=/dev/zero of=$fm_file seek=1 bs=1M count=1 || error "dd failed on $fm_file" + + filefrag -ves $fm_file || error "filefrag $fm_file failed" + filefrag_op=`filefrag -ve $fm_file | grep -A 100 "ext:" | grep -v "ext:" | grep -v "found"` + + last_lun=`echo $filefrag_op | cut -d: -f5` + + IFS=$'\n' + tot_len=0 + num_luns=1 + for line in $filefrag_op + do + frag_lun=`echo $line | cut -d: -f5` + ext_len=`echo $line | cut -d: -f4` + if (( $frag_lun != $last_lun )); then + logical=`echo $line | cut -d: -f2 | cut -d. -f1` + if (( logical != 512 )); then + error "FIEMAP on $fm_file failed; returned logical start for lun $logical instead of 512" + return + fi + if (( tot_len != 512 )); then + error "FIEMAP on $fm_file failed; returned len $tot_len for OST $last_lun instead of 1024" + return + else + (( num_luns += 1 )) + tot_len=0 + fi + fi + (( tot_len += ext_len )) + last_lun=$frag_lun + done + if (( num_luns != 2 || tot_len != 512 )); then + error "FIEMAP on $fm_file failed; returned wrong number of luns or wrong len for OST $last_lun" + return + fi + + echo "FIEMAP on 2-stripe file with hole succeeded" +} +run_test 130c "FIEMAP (2-stripe file with hole)" + +test_130d() { + [ "$OSTCOUNT" -lt "3" ] && skip "skipping FIEMAP on N-stripe file test" && return + + filefrag_op=$(filefrag -e 2>&1 | grep "invalid option") + [ -n "$filefrag_op" ] && skip "filefrag does not support FIEMAP" && return + + local fm_file=$DIR/$tfile + lfs setstripe -s 65536 -c $OSTCOUNT $fm_file || error "setstripe failed on $fm_file" + dd if=/dev/zero of=$fm_file bs=1M count=$OSTCOUNT || error "dd failed on $fm_file" + + filefrag -ves $fm_file || error "filefrag $fm_file failed" + filefrag_op=`filefrag -ve $fm_file | grep -A 100 "ext:" | grep -v "ext:" | grep -v "found"` + + last_lun=`echo $filefrag_op | cut -d: -f5` + + IFS=$'\n' + tot_len=0 + num_luns=1 + for line in $filefrag_op + do + frag_lun=`echo $line | cut -d: -f5` + ext_len=`echo $line | cut -d: -f4` + if (( $frag_lun != $last_lun )); then + if (( tot_len != 1024 )); then + error "FIEMAP on $fm_file failed; returned len $tot_len for OST $last_lun instead of 1024" + return + else + (( num_luns += 1 )) + tot_len=0 + fi + fi + (( tot_len += ext_len )) + last_lun=$frag_lun + done + if (( num_luns != OSTCOUNT || tot_len != 1024 )); then + error "FIEMAP on $fm_file failed; returned wrong number of luns or wrong len for OST $last_lun" + return + fi + + echo "FIEMAP on N-stripe file succeeded" +} +run_test 130d "FIEMAP (N-stripe file)" + +test_130e() { + [ "$OSTCOUNT" -lt "2" ] && skip "skipping continuation FIEMAP test" && return + + filefrag_op=$(filefrag -e 2>&1 | grep "invalid option") + [ -n "$filefrag_op" ] && skip "filefrag does not support FIEMAP" && return + + local fm_file=$DIR/$tfile + lfs setstripe -s 65536 -c 2 $fm_file || error "setstripe failed on $fm_file" + NUM_BLKS=512 + EXPECTED_LEN=$(( (NUM_BLKS / 2) * 4 )) + for ((i = 0; i < $NUM_BLKS; i++)) + do + dd if=/dev/zero of=$fm_file count=1 bs=4096 seek=$((2*$i)) conv=notrunc > /dev/null 2>&1 + done + + filefrag -ves $fm_file || error "filefrag $fm_file failed" + filefrag_op=`filefrag -ve $fm_file | grep -A 750 "ext:" | grep -v "ext:" | grep -v "found"` + + last_lun=`echo $filefrag_op | cut -d: -f5` + + IFS=$'\n' + tot_len=0 + num_luns=1 + for line in $filefrag_op + do + frag_lun=`echo $line | cut -d: -f5` + ext_len=`echo $line | cut -d: -f4` + if (( $frag_lun != $last_lun )); then + if (( tot_len != $EXPECTED_LEN )); then + error "FIEMAP on $fm_file failed; returned len $tot_len for OST $last_lun instead of $EXPECTED_LEN" + return + else + (( num_luns += 1 )) + tot_len=0 + fi + fi + (( tot_len += ext_len )) + last_lun=$frag_lun + done + if (( num_luns != 2 || tot_len != $EXPECTED_LEN )); then + echo "$num_luns $tot_len" + error "FIEMAP on $fm_file failed; returned wrong number of luns or wrong len for OST $last_lun" + return + fi + + echo "FIEMAP with continuation calls succeeded" +} +run_test 130e "FIEMAP (test continuation FIEMAP calls)" + +# Test for writev/readv +test_131a() { + rwv -f $DIR/$tfile -w -n 3 524288 1048576 1572864 || \ + error "writev test failed" + rwv -f $DIR/$tfile -r -v -n 2 1572864 1048576 || \ + error "readv failed" + rm -f $DIR/$tfile +} +run_test 131a "test iov's crossing stripe boundary for writev/readv" + +test_131b() { + rwv -f $DIR/$tfile -w -a -n 3 524288 1048576 1572864 || \ + error "append writev test failed" + rwv -f $DIR/$tfile -w -a -n 2 1572864 1048576 || \ + error "append writev test failed" + rm -f $DIR/$tfile +} +run_test 131b "test append writev" + +test_131c() { + rwv -f $DIR/$tfile -w -d -n 1 1048576 || return 0 + error "NOT PASS" +} +run_test 131c "test read/write on file w/o objects" + +test_131d() { + rwv -f $DIR/$tfile -w -n 1 1572864 + NOB=`rwv -f $DIR/$tfile -r -n 3 524288 524288 1048576 | awk '/error/ {print $6}'` + if [ "$NOB" != 1572864 ]; then + error "Short read filed: read $NOB bytes instead of 1572864" + fi + rm -f $DIR/$tfile +} +run_test 131d "test short read" + +test_131e() { + rwv -f $DIR/$tfile -w -s 1048576 -n 1 1048576 + rwv -f $DIR/$tfile -r -z -s 0 -n 1 524288 || \ + error "read hitting hole failed" + rm -f $DIR/$tfile +} +run_test 131e "test read hitting hole" + +test_140() { #bug-17379 + mkdir -p $DIR/$tdir || error "Creating dir $DIR/$tdir" + cd $DIR/$tdir || error "Changing to $DIR/$tdir" + cp /usr/bin/stat . || error "Copying stat to $DIR/$tdir" + + # VFS limits max symlink depth to 5(4KSTACK) or 8 + local i=0 + while i=`expr $i + 1`; do + mkdir -p $i || error "Creating dir $i" + cd $i || error "Changing to $i" + ln -s ../stat stat || error "Creating stat symlink" + # Read the symlink until ELOOP present, + # not LBUGing the system is considered success, + # we didn't overrun the stack. + $OPENFILE -f O_RDONLY stat >/dev/null 2>&1; ret=$? + [ $ret -ne 0 ] && { + if [ $ret -eq 40 ]; then + break # -ELOOP + else + error "Open stat symlink" + return + fi + } + done + i=`expr $i - 1` + [ $i -eq 5 -o $i -eq 8 ] || error "Invalid symlink depth" + echo "The symlink depth = $i" +} +run_test 140 "Check reasonable stack depth (shouldn't LBUG) ====" + +test_141() { + local ls + #define OBD_FAIL_MGC_PAUSE_PROCESS_LOG 0x903 + $LCTL set_param fail_loc=0x903 + # cancel_lru_locks mgc - does not work due to lctl set_param syntax + for ls in /proc/fs/lustre/ldlm/namespaces/MGC*/lru_size; do + echo "clear" > $ls + done + FAIL_ON_ERROR=true cleanup + FAIL_ON_ERROR=true setup +} +run_test 141 "umount should not race with any mgc requeue thread" + +test_150() { + local TF="$TMP/$tfile" + + dd if=/dev/urandom of=$TF bs=6096 count=1 || error "dd failed" + cp $TF $DIR/$tfile + cancel_lru_locks osc + cmp $TF $DIR/$tfile || error "$TMP/$tfile $DIR/$tfile differ" + remount_client $MOUNT + cmp $TF $DIR/$tfile || error "$TF $DIR/$tfile differ (remount)" + + $TRUNCATE $TF 6000 + $TRUNCATE $DIR/$tfile 6000 + cancel_lru_locks osc + cmp $TF $DIR/$tfile || error "$TF $DIR/$tfile differ (truncate1)" + + echo "12345" >>$TF + echo "12345" >>$DIR/$tfile + cancel_lru_locks osc + cmp $TF $DIR/$tfile || error "$TF $DIR/$tfile differ (append1)" + + echo "12345" >>$TF + echo "12345" >>$DIR/$tfile + cancel_lru_locks osc + cmp $TF $DIR/$tfile || error "$TF $DIR/$tfile differ (append2)" + + rm -f $TF + true +} +run_test 150 "truncate/append tests" + +function roc_access() { + ACCNUM=`$LCTL get_param -n obdfilter.*.stats | \ + grep 'cache_access'| awk '{print $2}' | \ + awk '{sum=sum+$3} END{print sum}'` + echo $ACCNUM +} + +function roc_hit() { + ACCNUM=`$LCTL get_param -n obdfilter.*.stats | \ + grep 'cache_hit'|awk '{print $2}' | \ + awk '{sum=sum+$1} END{print sum}'` + echo $ACCNUM +} + +test_151() { + local CPAGES=3 + + # check whether obdfilter is cache capable at all + if ! $LCTL get_param -n obdfilter.*.read_cache_enable; then + echo "not cache-capable obdfilter" + return 0 + fi + + # check cache is enabled on all obdfilters + if $LCTL get_param -n obdfilter.*.read_cache_enable | grep 0 >&/dev/null; then + echo "oss cache is disabled" + return 0 + fi + + $LCTL set_param -n obdfilter.*.writethrough_cache_enable 1 + + # pages should be in the case right after write + dd if=/dev/urandom of=$DIR/$tfile bs=4k count=$CPAGES || error "dd failed" + BEFORE=`roc_hit` + cancel_lru_locks osc + cat $DIR/$tfile >/dev/null + AFTER=`roc_hit` + if ! let "AFTER - BEFORE == CPAGES"; then + error "NOT IN CACHE: before: $BEFORE, after: $AFTER" + fi + + # the following read invalidates the cache + cancel_lru_locks osc + $LCTL set_param -n obdfilter.*.read_cache_enable 0 + cat $DIR/$tfile >/dev/null + + # now data shouldn't be found in the cache + BEFORE=`roc_hit` + cancel_lru_locks osc + cat $DIR/$tfile >/dev/null + AFTER=`roc_hit` + if let "AFTER - BEFORE != 0"; then + error "IN CACHE: before: $BEFORE, after: $AFTER" + fi + + $LCTL set_param -n obdfilter.*.read_cache_enable 1 + rm -f $DIR/$tfile +} +run_test 151 "test cache on oss and controls ===============================" + +test_152() { + local TF="$TMP/$tfile" + + # simulate ENOMEM during write +#define OBD_FAIL_OST_NOMEM 0x226 + lctl set_param fail_loc=0x80000226 + dd if=/dev/urandom of=$TF bs=6096 count=1 || error "dd failed" + cp $TF $DIR/$tfile + sync || error "sync failed" + lctl set_param fail_loc=0 + + # discard client's cache + cancel_lru_locks osc + + # simulate ENOMEM during read + lctl set_param fail_loc=0x80000226 + cmp $TF $DIR/$tfile || error "cmp failed" + lctl set_param fail_loc=0 + + rm -f $TF +} +run_test 152 "test read/write with enomem ============================" + +POOL=${POOL:-cea1} +TGT_COUNT=$OSTCOUNT +TGTPOOL_FIRST=1 +TGTPOOL_MAX=$(($TGT_COUNT - 1)) +TGTPOOL_STEP=2 +TGTPOOL_LIST=`seq $TGTPOOL_FIRST $TGTPOOL_STEP $TGTPOOL_MAX` +POOL_ROOT=${POOL_ROOT:-$DIR/d200.pools} +POOL_DIR=$POOL_ROOT/dir_tst +POOL_FILE=$POOL_ROOT/file_tst + +check_file_in_pool() +{ + file=$1 + res=$($GETSTRIPE $file | grep 0x | cut -f2) + for i in $res + do + found=$(echo :$TGTPOOL_LIST: | tr " " ":" | grep :$i:) + if [[ "$found" == "" ]] + then + echo "pool list: $TGTPOOL_LIST" + echo "striping: $res" + error "$file not allocated in $POOL" + return 1 + fi + done + return 0 +} + +test_200a() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + do_facet mgs $LCTL pool_new $FSNAME.$POOL + do_facet mgs $LCTL get_param -n lov.$FSNAME-MDT0000-mdtlov.pools.$POOL + [ $? == 0 ] || error "Pool creation of $POOL failed" +} +run_test 200a "Create new pool ==========================================" + +test_200b() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + TGT=$(seq -f $FSNAME-OST%04g_UUID $TGTPOOL_FIRST $TGTPOOL_STEP \ + $TGTPOOL_MAX | tr '\n' ' ') + do_facet mgs $LCTL pool_add $FSNAME.$POOL \ + $FSNAME-OST[$TGTPOOL_FIRST-$TGTPOOL_MAX/$TGTPOOL_STEP]_UUID + res=$(do_facet mgs $LCTL get_param -n lov.$FSNAME-MDT0000-mdtlov.pools.$POOL | sort \ + | tr '\n' ' ') + [ "$res" = "$TGT" ] || error "Pool content ($res) do not match requested ($TGT)" +} +run_test 200b "Add targets to a pool ====================================" + +test_200c() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + mkdir -p $POOL_DIR + $SETSTRIPE -c 2 -p $POOL $POOL_DIR + [ $? = 0 ] || error "Cannot set pool $POOL to $POOL_DIR" +} +run_test 200c "Set pool on a directory =================================" + +test_200d() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + res=$($GETSTRIPE $POOL_DIR | grep pool: | cut -f8 -d " ") + [ "$res" = $POOL ] || error "Pool on $POOL_DIR is not $POOL" +} +run_test 200d "Check pool on a directory ===============================" + +test_200e() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + failed=0 + for i in $(seq -w 1 $(($TGT_COUNT * 3))) + do + file=$POOL_DIR/file-$i + touch $file + check_file_in_pool $file + if [[ $? != 0 ]] + then + failed=$(($failed + 1)) + fi + done + [ "$failed" = 0 ] || error "$failed files not allocated in $POOL" +} +run_test 200e "Check files allocation from directory pool ==============" + +test_200f() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + mkdir -p $POOL_FILE + failed=0 + for i in $(seq -w 1 $(($TGT_COUNT * 3))) + do + file=$POOL_FILE/spoo-$i + $SETSTRIPE -p $POOL $file + check_file_in_pool $file + if [[ $? != 0 ]] + then + failed=$(($failed + 1)) + fi + done + [ "$failed" = 0 ] || error "$failed files not allocated in $POOL" +} +run_test 200f "Create files in a pool ===================================" + +test_200g() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + TGT=$(do_facet mgs $LCTL get_param -n lov.$FSNAME-MDT0000-mdtlov.pools.$POOL | head -1) + do_facet mgs $LCTL pool_remove $FSNAME.$POOL $TGT + res=$(do_facet mgs $LCTL get_param -n lov.$FSNAME-MDT0000-mdtlov.pools.$POOL | grep $TGT) + [ "$res" = "" ] || error "$TGT not removed from $FSNAME.$POOL" +} +run_test 200g "Remove a target from a pool =============================" + +test_200h() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + for TGT in $(do_facet mgs $LCTL get_param -n lov.$FSNAME-MDT0000-mdtlov.pools.$POOL) + do + do_facet mgs $LCTL pool_remove $FSNAME.$POOL $TGT + done + res=$(do_facet mgs $LCTL get_param -n lov.$FSNAME-MDT0000-mdtlov.pools.$POOL) + [ "$res" = "" ] || error "Pool $FSNAME.$POOL cannot be drained" +} +run_test 200h "Remove all targets from a pool ==========================" + +test_200i() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + do_facet mgs $LCTL pool_destroy $FSNAME.$POOL + res=$(do_facet mgs "$LCTL get_param -n lov.$FSNAME-MDT0000-mdtlov.pools.$POOL 2>/dev/null") + [ "$res" = "" ] || error "Pool $FSNAME.$POOL is not destroyed" +} +run_test 200i "Remove a pool ============================================" + +test_212() { + size=`date +%s` + size=$((size % 8192 + 1)) + dd if=/dev/urandom of=$DIR/f212 bs=1k count=$size + sendfile $DIR/f212 $DIR/f212.xyz || error "sendfile wrong" + rm -f $DIR/f212 $DIR/f212.xyz +} +run_test 212 "Sendfile test ============================================" + TMPDIR=$OLDTMPDIR TMP=$OLDTMP HOME=$OLDHOME @@ -4954,7 +6001,7 @@ HOME=$OLDHOME log "cleanup: ======================================================" check_and_cleanup_lustre if [ "$I_MOUNTED" != "yes" ]; then - sysctl -w lnet.debug="$OLDDEBUG" 2> /dev/null || true + lctl set_param debug="$OLDDEBUG" 2> /dev/null || true fi echo '=========================== finished ==============================='