X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2FsanityN.sh;h=777dec8e048e0daaeb2bea511251e6a96db68884;hp=beeeba3823a357c356f8c39ffae596b6d6a47ca5;hb=e628a59ade003f281800faf3553ac5930e05cb8c;hpb=d81556d5d57f2880f4571b46c1639bab154e60f4 diff --git a/lustre/tests/sanityN.sh b/lustre/tests/sanityN.sh index beeeba3..777dec8 100644 --- a/lustre/tests/sanityN.sh +++ b/lustre/tests/sanityN.sh @@ -4,10 +4,11 @@ set -e ONLY=${ONLY:-"$*"} # bug number for skipped test: 3192 9977 -ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"14b 28"} +ALWAYS_EXCEPT="14b 28 $SANITYN_EXCEPT" # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! -[ "$SLOW" = "no" ] && EXCEPT="$EXCEPT 16" +# bug number for skipped test: 12652 12652 +grep -q 'Enterprise Server 10' /etc/SuSE-release && ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 14" || true # Tests that fail on uml [ "$UML" = "true" ] && EXCEPT="$EXCEPT 7" @@ -34,14 +35,6 @@ MOUNT_2=${MOUNT_2:-"yes"} CHECK_GRANT=${CHECK_GRANT:-"yes"} GRANT_CHECK_LIST=${GRANT_CHECK_LIST:-""} -if [ $UID -ne 0 ]; then - RUNAS_ID="$UID" - RUNAS="" -else - RUNAS_ID=${RUNAS_ID:-500} - RUNAS=${RUNAS:-"runas -u $RUNAS_ID"} -fi - SAVE_PWD=$PWD export NAME=${NAME:-local} @@ -53,13 +46,11 @@ SETUP=${SETUP:-:} init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} +[ "$SLOW" = "no" ] && EXCEPT_SLOW="12 16" + SANITYLOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh).log} FAIL_ON_ERROR=false -if [ ! -z "$USING_KRB5" ]; then - $RUNAS krb5_login.sh || exit 1 -fi - SETUP=${SETUP:-:} TRACE=${TRACE:-""} @@ -70,11 +61,16 @@ LPROC=/proc/fs/lustre check_and_setup_lustre LPROC=/proc/fs/lustre -LOVNAME=`cat $LPROC/llite/*/lov/common_name | tail -n 1` -OSTCOUNT=`cat $LPROC/lov/$LOVNAME/numobd` +LOVNAME=`lctl get_param -n llite.*.lov.common_name | tail -n 1` +OSTCOUNT=`lctl get_param -n lov.$LOVNAME.numobd` rm -rf $DIR1/[df][0-9]* $DIR1/lnk +# $RUNAS_ID may get set incorrectly somewhere else +[ $UID -eq 0 -a $RUNAS_ID -eq 0 ] && error "\$RUNAS_ID set to 0, but \$UID is also 0!" + +check_runas_id $RUNAS_ID $RUNAS + build_test_filter test_1a() { @@ -166,6 +162,9 @@ test_6() { run_test 6 "remove of open file on other node ==================" test_7() { + # run_one creates uniq $tdir (bug 13798) + # opendirunlink failes if it exists + rmdir $DIR1/$tdir || true opendirunlink $DIR1/$tdir $DIR2/$tdir || \ error "opendirunlink $DIR1/$tdir $DIR2/$tdir" } @@ -222,9 +221,8 @@ run_test 10b "write of file with sub-page size on multiple mounts " test_11() { mkdir $DIR1/d11 - multiop $DIR1/d11/f O_c & + multiop_bg_pause $DIR1/d11/f O_c || return 1 MULTIPID=$! - usleep 200 cp -p /bin/ls $DIR1/d11/f $DIR2/d11/f RC=$? @@ -235,7 +233,7 @@ test_11() { run_test 11 "execution of file opened for write should return error ====" test_12() { - sh lockorder.sh + DIR=$DIR DIR2=$DIR2 sh lockorder.sh } run_test 12 "test lock ordering (link, stat, unlink) ===========" @@ -268,12 +266,11 @@ run_test 14 "execution of file open for write returns -ETXTBSY =" test_14a() { mkdir -p $DIR1/d14 cp -p `which multiop` $DIR1/d14/multiop || error "cp failed" - $DIR1/d14/multiop $TMP/test14.junk O_c & - MULTIPID=$! - sleep 1 + MULTIOP_PROG=$DIR1/d14/multiop multiop_bg_pause $TMP/test14.junk O_c || return 1 + MULTIOP_PID=$! multiop $DIR2/d14/multiop Oc && error "expected error, got success" - kill -USR1 $MULTIPID || return 2 - wait $MULTIPID || return 3 + kill -USR1 $MULTIOP_PID || return 2 + wait $MULTIOP_PID || return 3 rm $TMP/test14.junk $DIR1/d14/multiop || error "removing multiop" } run_test 14a "open(RDWR) of executing file returns -ETXTBSY ====" @@ -281,13 +278,12 @@ run_test 14a "open(RDWR) of executing file returns -ETXTBSY ====" test_14b() { # bug 3192, 7040 mkdir -p $DIR1/d14 cp -p `which multiop` $DIR1/d14/multiop || error "cp failed" - $DIR1/d14/multiop $TMP/test14.junk O_c & - MULTIPID=$! - sleep 1 - truncate $DIR2/d14/multiop 0 && kill -9 $MULTIPID && \ + MULTIOP_PROG=$DIR1/d14/multiop multiop_bg_pause $TMP/test14.junk O_c || return 1 + MULTIOP_PID=$! + truncate $DIR2/d14/multiop 0 && kill -9 $MULTIOP_PID && \ error "expected truncate error, got success" - kill -USR1 $MULTIPID || return 2 - wait $MULTIPID || return 3 + kill -USR1 $MULTIOP_PID || return 2 + wait $MULTIOP_PID || return 3 cmp `which multiop` $DIR1/d14/multiop || error "binary changed" rm $TMP/test14.junk $DIR1/d14/multiop || error "removing multiop" } @@ -296,12 +292,11 @@ run_test 14b "truncate of executing file returns -ETXTBSY ======" test_14c() { # bug 3430, 7040 mkdir -p $DIR1/d14 cp -p `which multiop` $DIR1/d14/multiop || error "cp failed" - $DIR1/d14/multiop $TMP/test14.junk O_c & - MULTIPID=$! - sleep 1 + MULTIOP_PROG=$DIR1/d14/multiop multiop_bg_pause $TMP/test14.junk O_c || return 1 + MULTIOP_PID=$! cp /etc/hosts $DIR2/d14/multiop && error "expected error, got success" - kill -USR1 $MULTIPID || return 2 - wait $MULTIPID || return 3 + kill -USR1 $MULTIOP_PID || return 2 + wait $MULTIOP_PID || return 3 cmp `which multiop` $DIR1/d14/multiop || error "binary changed" rm $TMP/test14.junk $DIR1/d14/multiop || error "removing multiop" } @@ -310,13 +305,12 @@ run_test 14c "open(O_TRUNC) of executing file return -ETXTBSY ==" test_14d() { # bug 10921 mkdir -p $DIR1/d14 cp -p `which multiop` $DIR1/d14/multiop || error "cp failed" - $DIR1/d14/multiop $TMP/test14.junk O_c & - MULTIPID=$! - sleep 1 + MULTIOP_PROG=$DIR1/d14/multiop multiop_bg_pause $TMP/test14.junk O_c || return 1 + MULTIOP_PID=$! log chmod chmod 600 $DIR1/d14/multiop || error "chmod failed" - kill -USR1 $MULTIPID || return 2 - wait $MULTIPID || return 3 + kill -USR1 $MULTIOP_PID || return 2 + wait $MULTIOP_PID || return 3 cmp `which multiop` $DIR1/d14/multiop || error "binary changed" rm $TMP/test14.junk $DIR1/d14/multiop || error "removing multiop" } @@ -332,7 +326,7 @@ run_test 15 "test out-of-space with multiple writers ===========" test_16() { rm -f $MOUNT1/fsxfile - lfs setstripe $MOUNT1/fsxfile 0 -1 -1 # b=10919 + lfs setstripe $MOUNT1/fsxfile -c -1 # b=10919 fsx -c 50 -p 100 -N 2500 -l $((SIZE * 256)) -S 0 $MOUNT1/fsxfile $MOUNT2/fsxfile } run_test 16 "2500 iterations of dual-mount fsx =================" @@ -446,10 +440,9 @@ test_23() { # Bug 5972 time1=`date +%s` sleep 2 - multiop $DIR1/f23 or20_c & + multiop_bg_pause $DIR1/f23 or20_c || return 1 MULTIPID=$! - sleep 2 time2=`stat -c "%X" $DIR2/f23` if (( $time2 <= $time1 )); then @@ -482,10 +475,9 @@ test_24() { run_test 24 "lfs df [-ih] [path] test =========================" test_25() { - [ `cat $LPROC/mdc/*-mdc-*/connect_flags | grep -c acl` -lt 2 ] && \ + [ `lctl get_param -n mdc.*-mdc-*.connect_flags | grep -c acl` -lt 2 ] && \ skip "must have acl, skipping" && return - mkdir $DIR1/$tdir || error "mkdir $DIR1/$tdir" touch $DIR1/$tdir/f1 || error "touch $DIR1/$tdir/f1" chmod 0755 $DIR1/$tdir/f1 || error "chmod 0755 $DIR1/$tdir/f1" @@ -538,7 +530,7 @@ test_27() { sleep 1 dd if=/dev/zero of=$DIR1/$tfile bs=8k conv=notrunc count=1 seek=0 log "dd 3 finished" - echo > $LPROC/ldlm/dump_namespaces + lctl set_param -n ldlm.dump_namespaces "" wait $DD1_PID $DD2_PID [ $? -ne 0 ] && lctl dk $TMP/debug || true } @@ -548,7 +540,7 @@ test_28() { # bug 9977 ECHO_UUID="ECHO_osc1_UUID" tOST=`$LCTL dl | | awk '/-osc-|OSC.*MNT/ { print $4 }' | head -1` - lfs setstripe $DIR1/$tfile 1048576 0 2 + lfs setstripe $DIR1/$tfile -s 1048576 -i 0 -c 2 tOBJID=`lfs getstripe $DIR1/$tfile |grep "^[[:space:]]\+1" |awk '{print $2}'` dd if=/dev/zero of=$DIR1/$tfile bs=1024k count=2 @@ -594,18 +586,107 @@ test_29() { # bug 10999 #run_test 29 "lock put race between glimpse and enqueue =========" test_30() { #bug #11110 - rm -rf $DIR1/$tdir - mkdir -p $DIR1/$tdir cp -f /bin/bash $DIR1/$tdir/bash /bin/sh -c 'sleep 1; rm -f $DIR2/$tdir/bash; cp /bin/bash $DIR2/$tdir' & err=$($DIR1/$tdir/bash -c 'sleep 2; openfile -f O_RDONLY /proc/$$/exe >& /dev/null; echo $?') wait - [ $err -ne 116 ] && error "return code ($err) != -ESTALE" && return + [ $err -ne 116 ] && error_ignore 12900 "return code ($err) != -ESTALE" && return true } run_test 30 "recreate file race =========" +test_31() { + mkdir -p $DIR1/$tdir || error "Creating dir $DIR1/$tdir" + writes=`LANG=C dd if=/dev/zero of=$DIR/$tdir/$tfile count=1 2>&1 | + awk 'BEGIN { FS="+" } /out/ {print $1}'` + #define OBD_FAIL_LDLM_CANCEL_BL_CB_RACE 0x314 + sysctl -w lustre.fail_loc=0x314 + reads=`LANG=C dd if=$DIR2/$tdir/$tfile of=/dev/null 2>&1 | + awk 'BEGIN { FS="+" } /in/ {print $1}'` + [ $reads -eq $writes ] || error "read" $reads "blocks, must be" $writes +} +run_test 31 "voluntary cancel / blocking ast race==============" + +# enable/disable lockless truncate feature, depending on the arg 0/1 +enable_lockless_truncate() { + lctl set_param -n llite.*.lockless_truncate $1 +} + +test_32a() { # bug 11270 + local p="$TMP/sanityN-$TESTNAME.parameters" + save_lustre_params $HOSTNAME llite.*.lockless_truncate > $p + cancel_lru_locks osc + clear_llite_stats + enable_lockless_truncate 1 + dd if=/dev/zero of=$DIR1/$tfile count=10 bs=1M > /dev/null 2>&1 + + log "checking cached lockless truncate" + $TRUNCATE $DIR1/$tfile 8000000 + $CHECKSTAT -s 8000000 $DIR2/$tfile || error "wrong file size" + [ $(calc_llite_stats lockless_truncate) -eq 0 ] || + error "lockless truncate doesn't use cached locks" + + log "checking not cached lockless truncate" + $TRUNCATE $DIR2/$tfile 5000000 + $CHECKSTAT -s 5000000 $DIR1/$tfile || error "wrong file size" + [ $(calc_llite_stats lockless_truncate) -ne 0 ] || + error "not cached trancate isn't lockless" + + log "disabled lockless truncate" + enable_lockless_truncate 0 + clear_llite_stats + $TRUNCATE $DIR2/$tfile 3000000 + $CHECKSTAT -s 3000000 $DIR1/$tfile || error "wrong file size" + [ $(calc_llite_stats lockless_truncate) -eq 0 ] || + error "lockless truncate disabling failed" + rm $DIR1/$tfile + # restore lockless_truncate default values + restore_lustre_params < $p + rm -f $p +} +run_test 32a "lockless truncate" + +test_32b() { # bug 11270 + local node + local p="$TMP/sanityN-$TESTNAME.parameters" + save_lustre_params $HOSTNAME "llite.*.contention_seconds" > $p + for node in $(osts_nodes); do + save_lustre_params $node "ldlm.namespaces.filter-*.max_nolock_bytes" >> $p + save_lustre_params $node "ldlm.namespaces.filter-*.contended_locks" >> $p + save_lustre_params $node "ldlm.namespaces.filter-*.contention_seconds" >> $p + done + clear_llite_stats + # agressive lockless i/o settings + for node in $(osts_nodes); do + do_node $node 'lctl set_param -n ldlm.namespaces.filter-*.max_nolock_bytes 2000000; lctl set_param -n ldlm.namespaces.filter-*.contended_locks 0; lctl set_param -n ldlm.namespaces.filter-*.contention_seconds 60' + done + lctl set_param -n llite.*.contention_seconds 60 + for i in $(seq 5); do + dd if=/dev/zero of=$DIR1/$tfile bs=4k count=1 conv=notrunc > /dev/null 2>&1 + dd if=/dev/zero of=$DIR2/$tfile bs=4k count=1 conv=notrunc > /dev/null 2>&1 + done + [ $(calc_llite_stats lockless_write_bytes) -ne 0 ] || error "lockless i/o was not triggered" + # disable lockless i/o (it is disabled by default) + for node in $(osts_nodes); do + do_node $node 'lctl set_param -n ldlm.namespaces.filter-*.max_nolock_bytes 0; lctl set_param -n ldlm.namespaces.filter-*.contended_locks 32; lctl set_param -n ldlm.namespaces.filter-*.contention_seconds 0' + done + # set contention_seconds to 0 at client too, otherwise Lustre still + # remembers lock contention + lctl set_param -n llite.*.contention_seconds 0 + clear_llite_stats + for i in $(seq 5); do + dd if=/dev/zero of=$DIR1/$tfile bs=4k count=1 conv=notrunc > /dev/null 2>&1 + dd if=/dev/zero of=$DIR2/$tfile bs=4k count=1 conv=notrunc > /dev/null 2>&1 + done + [ $(calc_llite_stats lockless_write_bytes) -eq 0 ] || + error "lockless i/o works when disabled" + rm -f $DIR1/$tfile + restore_lustre_params <$p + rm -f $p +} +run_test 32b "lockless i/o" + log "cleanup: ======================================================" check_and_cleanup_lustre