X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2FsanityN.sh;h=6e5e024b70d5394fc2d060d81859517f7def3cec;hp=5af953bbc98d1e735e97cf3a65facedda1065a1f;hb=4f23f96b21ea63119f0ff983a68e56630ade06f8;hpb=bd8b65394f0a1ffd0c7f8dd0d875c55ba219d9ec diff --git a/lustre/tests/sanityN.sh b/lustre/tests/sanityN.sh index 5af953b..6e5e024 100644 --- a/lustre/tests/sanityN.sh +++ b/lustre/tests/sanityN.sh @@ -3,11 +3,12 @@ set -e ONLY=${ONLY:-"$*"} -# bug number for skipped test: 3192 9977 -ALWAYS_EXCEPT="14b 28 $SANITYN_EXCEPT" +# bug number for skipped test: 3192 15528/3811 16929 9977 15528/11549 18080 +ALWAYS_EXCEPT=" 14b 19 22 28 29 35 $SANITYN_EXCEPT" # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! -[ "$SLOW" = "no" ] && EXCEPT="$EXCEPT 12 16" +# bug number for skipped test: 12652 12652 +grep -q 'Enterprise Server 10' /etc/SuSE-release && ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 14" || true # Tests that fail on uml [ "$UML" = "true" ] && EXCEPT="$EXCEPT 7" @@ -21,27 +22,17 @@ PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH SIZE=${SIZE:-40960} CHECKSTAT=${CHECKSTAT:-"checkstat -v"} -CREATETEST=${CREATETEST:-createtest} GETSTRIPE=${GETSTRIPE:-lfs getstripe} SETSTRIPE=${SETSTRIPE:-lstripe} MCREATE=${MCREATE:-mcreate} OPENFILE=${OPENFILE:-openfile} OPENUNLINK=${OPENUNLINK:-openunlink} -TOEXCL=${TOEXCL:-toexcl} TRUNCATE=${TRUNCATE:-truncate} export TMP=${TMP:-/tmp} MOUNT_2=${MOUNT_2:-"yes"} CHECK_GRANT=${CHECK_GRANT:-"yes"} GRANT_CHECK_LIST=${GRANT_CHECK_LIST:-""} -if [ $UID -ne 0 ]; then - RUNAS_ID="$UID" - RUNAS="" -else - RUNAS_ID=${RUNAS_ID:-500} - RUNAS=${RUNAS:-"runas -u $RUNAS_ID"} -fi - SAVE_PWD=$PWD export NAME=${NAME:-local} @@ -53,29 +44,28 @@ SETUP=${SETUP:-:} init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} +[ "$SLOW" = "no" ] && EXCEPT_SLOW="12 16 23 33a" + SANITYLOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh).log} FAIL_ON_ERROR=false -if $GSS_KRB5; then - $RUNAS krb5_login.sh || exit 1 -fi - SETUP=${SETUP:-:} TRACE=${TRACE:-""} -LPROC=/proc/fs/lustre - [ "$SANITYLOG" ] && rm -f $SANITYLOG || true check_and_setup_lustre -LPROC=/proc/fs/lustre -LOVNAME=`cat $LPROC/llite/*/lov/common_name | tail -n 1` -OSTCOUNT=`cat $LPROC/lov/$LOVNAME/numobd` +LOVNAME=`lctl get_param -n llite.*.lov.common_name | tail -n 1` +OSTCOUNT=`lctl get_param -n lov.$LOVNAME.numobd` +assert_DIR rm -rf $DIR1/[df][0-9]* $DIR1/lnk -check_runas_id $RUNAS_ID $RUNAS +# $RUNAS_ID may get set incorrectly somewhere else +[ $UID -eq 0 -a $RUNAS_ID -eq 0 ] && error "\$RUNAS_ID set to 0, but \$UID is also 0!" + +check_runas_id $RUNAS_ID $RUNAS_ID $RUNAS build_test_filter @@ -142,9 +132,9 @@ test_2e() { run_test 2e "check chmod on root is propagated to others" test_3() { - ( cd $DIR1 ; ln -s this/is/good lnk ) - [ "this/is/good" = "`perl -e 'print readlink("'$DIR2/lnk'");'`" ] || \ - error + ( cd $DIR1 ; ln -s this/is/good $tfile ) + [ "this/is/good" = "`perl -e 'print readlink("'$DIR2/$tfile'");'`" ] || + error "link $DIR2/$tfile not as expected" } run_test 3 "symlink on one mtpt, readlink on another ===========" @@ -168,11 +158,9 @@ test_6() { run_test 6 "remove of open file on other node ==================" test_7() { - # run_one creates uniq $tdir (bug 13798) - # opendirunlink failes if it exists - rmdir $DIR1/$tdir || true - opendirunlink $DIR1/$tdir $DIR2/$tdir || \ - error "opendirunlink $DIR1/$tdir $DIR2/$tdir" + local dir=d7 + opendirunlink $DIR1/$dir $DIR2/$dir || \ + error "opendirunlink $DIR1/$dir $DIR2/$dir" } run_test 7 "remove of open directory on other node =============" @@ -184,10 +172,11 @@ run_test 8 "remove of open special file on other node ==========" test_9() { MTPT=1 + local dir > $DIR2/f9 for C in a b c d e f g h i j k l; do - DIR=`eval echo \\$DIR$MTPT` - echo -n $C >> $DIR/f9 + dir=`eval echo \\$DIR$MTPT` + echo -n $C >> $dir/f9 [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1 done [ "`cat $DIR1/f9`" = "abcdefghijkl" ] || \ @@ -197,11 +186,12 @@ run_test 9 "append of file with sub-page size on multiple mounts" test_10a() { MTPT=1 + local dir OFFSET=0 > $DIR2/f10 for C in a b c d e f g h i j k l; do - DIR=`eval echo \\$DIR$MTPT` - echo -n $C | dd of=$DIR/f10 bs=1 seek=$OFFSET count=1 + dir=`eval echo \\$DIR$MTPT` + echo -n $C | dd of=$dir/f10 bs=1 seek=$OFFSET count=1 [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1 OFFSET=`expr $OFFSET + 1` done @@ -211,25 +201,26 @@ test_10a() { run_test 10a "write of file with sub-page size on multiple mounts " test_10b() { - yes "R" | dd of=$DIR1/f10b bs=3k count=1 || error "dd $DIR1" + # create a seed file + yes "R" | head -c 4000 >$TMP/f10b-seed + dd if=$TMP/f10b-seed of=$DIR1/f10b bs=3k count=1 || error "dd $DIR1" truncate $DIR1/f10b 4096 || error "truncate 4096" dd if=$DIR2/f10b of=$TMP/f10b-lustre bs=4k count=1 || error "dd $DIR2" # create a test file locally to compare - yes "R" | dd of=$TMP/f10b bs=3k count=1 || error "dd random" + dd if=$TMP/f10b-seed of=$TMP/f10b bs=3k count=1 || error "dd random" truncate $TMP/f10b 4096 || error "truncate 4096" cmp $TMP/f10b $TMP/f10b-lustre || error "file miscompare" - rm $TMP/f10b $TMP/f10b-lustre + rm $TMP/f10b $TMP/f10b-lustre $TMP/f10b-seed } run_test 10b "write of file with sub-page size on multiple mounts " test_11() { mkdir $DIR1/d11 - multiop $DIR1/d11/f O_c & + multiop_bg_pause $DIR1/d11/f O_c || return 1 MULTIPID=$! - usleep 200 cp -p /bin/ls $DIR1/d11/f $DIR2/d11/f RC=$? @@ -262,23 +253,25 @@ test_13() { # bug 2451 - directory coherency run_test 13 "test directory page revocation ====================" test_14() { - mkdir $DIR1/d14 - cp -p /bin/ls $DIR1/d14/ls - exec 100>> $DIR1/d14/ls - $DIR2/d14/ls && error || true - exec 100<&- + mkdir -p $DIR1/$tdir + cp -p /bin/ls $DIR1/$tdir/$tfile + multiop_bg_pause $DIR1/$tdir/$tfile Ow_c || return 1 + MULTIPID=$! + + $DIR2/$tdir/$tfile && error || true + kill -USR1 $MULTIPID + wait $MULTIPID || return 2 } run_test 14 "execution of file open for write returns -ETXTBSY =" test_14a() { mkdir -p $DIR1/d14 cp -p `which multiop` $DIR1/d14/multiop || error "cp failed" - $DIR1/d14/multiop $TMP/test14.junk O_c & - MULTIPID=$! - sleep 1 + MULTIOP_PROG=$DIR1/d14/multiop multiop_bg_pause $TMP/test14.junk O_c || return 1 + MULTIOP_PID=$! multiop $DIR2/d14/multiop Oc && error "expected error, got success" - kill -USR1 $MULTIPID || return 2 - wait $MULTIPID || return 3 + kill -USR1 $MULTIOP_PID || return 2 + wait $MULTIOP_PID || return 3 rm $TMP/test14.junk $DIR1/d14/multiop || error "removing multiop" } run_test 14a "open(RDWR) of executing file returns -ETXTBSY ====" @@ -286,13 +279,12 @@ run_test 14a "open(RDWR) of executing file returns -ETXTBSY ====" test_14b() { # bug 3192, 7040 mkdir -p $DIR1/d14 cp -p `which multiop` $DIR1/d14/multiop || error "cp failed" - $DIR1/d14/multiop $TMP/test14.junk O_c & - MULTIPID=$! - sleep 1 - truncate $DIR2/d14/multiop 0 && kill -9 $MULTIPID && \ + MULTIOP_PROG=$DIR1/d14/multiop multiop_bg_pause $TMP/test14.junk O_c || return 1 + MULTIOP_PID=$! + truncate $DIR2/d14/multiop 0 && kill -9 $MULTIOP_PID && \ error "expected truncate error, got success" - kill -USR1 $MULTIPID || return 2 - wait $MULTIPID || return 3 + kill -USR1 $MULTIOP_PID || return 2 + wait $MULTIOP_PID || return 3 cmp `which multiop` $DIR1/d14/multiop || error "binary changed" rm $TMP/test14.junk $DIR1/d14/multiop || error "removing multiop" } @@ -301,12 +293,11 @@ run_test 14b "truncate of executing file returns -ETXTBSY ======" test_14c() { # bug 3430, 7040 mkdir -p $DIR1/d14 cp -p `which multiop` $DIR1/d14/multiop || error "cp failed" - $DIR1/d14/multiop $TMP/test14.junk O_c & - MULTIPID=$! - sleep 1 + MULTIOP_PROG=$DIR1/d14/multiop multiop_bg_pause $TMP/test14.junk O_c || return 1 + MULTIOP_PID=$! cp /etc/hosts $DIR2/d14/multiop && error "expected error, got success" - kill -USR1 $MULTIPID || return 2 - wait $MULTIPID || return 3 + kill -USR1 $MULTIOP_PID || return 2 + wait $MULTIOP_PID || return 3 cmp `which multiop` $DIR1/d14/multiop || error "binary changed" rm $TMP/test14.junk $DIR1/d14/multiop || error "removing multiop" } @@ -315,13 +306,12 @@ run_test 14c "open(O_TRUNC) of executing file return -ETXTBSY ==" test_14d() { # bug 10921 mkdir -p $DIR1/d14 cp -p `which multiop` $DIR1/d14/multiop || error "cp failed" - $DIR1/d14/multiop $TMP/test14.junk O_c & - MULTIPID=$! - sleep 1 + MULTIOP_PROG=$DIR1/d14/multiop multiop_bg_pause $TMP/test14.junk O_c || return 1 + MULTIOP_PID=$! log chmod chmod 600 $DIR1/d14/multiop || error "chmod failed" - kill -USR1 $MULTIPID || return 2 - wait $MULTIPID || return 3 + kill -USR1 $MULTIOP_PID || return 2 + wait $MULTIOP_PID || return 3 cmp `which multiop` $DIR1/d14/multiop || error "binary changed" rm $TMP/test14.junk $DIR1/d14/multiop || error "removing multiop" } @@ -337,27 +327,28 @@ run_test 15 "test out-of-space with multiple writers ===========" test_16() { rm -f $MOUNT1/fsxfile - lfs setstripe $MOUNT1/fsxfile 0 -1 -1 # b=10919 + lfs setstripe $MOUNT1/fsxfile -c -1 # b=10919 fsx -c 50 -p 100 -N 2500 -l $((SIZE * 256)) -S 0 $MOUNT1/fsxfile $MOUNT2/fsxfile } run_test 16 "2500 iterations of dual-mount fsx =================" test_17() { # bug 3513, 3667 - [ ! -d /proc/fs/lustre/ost ] && skip "remote OST, skipping OST-only test" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return - cp /etc/termcap $DIR1/f17 + lfs setstripe $DIR1/$tfile -i 0 -c 1 + cp /etc/termcap $DIR1/$tfile cancel_lru_locks osc > /dev/null #define OBD_FAIL_ONCE|OBD_FAIL_LDLM_CREATE_RESOURCE 0x30a - sysctl -w lustre.fail_loc=0x8000030a - ls -ls $DIR1/f17 | awk '{ print $1,$6 }' > $DIR1/f17-1 & \ - ls -ls $DIR2/f17 | awk '{ print $1,$6 }' > $DIR2/f17-2 + do_facet ost1 lctl set_param fail_loc=0x8000030a + ls -ls $DIR1/$tfile | awk '{ print $1,$6 }' > $DIR1/$tfile-1 & \ + ls -ls $DIR2/$tfile | awk '{ print $1,$6 }' > $DIR2/$tfile-2 wait - diff -u $DIR1/f17-1 $DIR2/f17-2 || error "files are different" + diff -u $DIR1/$tfile-1 $DIR2/$tfile-2 || error "files are different" } run_test 17 "resource creation/LVB creation race ===============" test_18() { - ./mmap_sanity -d $MOUNT1 -m $MOUNT2 + $LUSTRE/tests/mmap_sanity -d $MOUNT1 -m $MOUNT2 sync; sleep 1; sync } run_test 18 "mmap sanity check =================================" @@ -365,10 +356,8 @@ run_test 18 "mmap sanity check =================================" test_19() { # bug3811 [ -d /proc/fs/lustre/obdfilter ] || return 0 - MAX=`cat /proc/fs/lustre/obdfilter/*/readcache_max_filesize | head -n 1` - for O in /proc/fs/lustre/obdfilter/*OST*; do - echo 4096 > $O/readcache_max_filesize - done + MAX=`lctl get_param -n obdfilter.*.readcache_max_filesize | head -n 1` + lctl set_param -n obdfilter.*OST*.readcache_max_filesize=4096 dd if=/dev/urandom of=$TMP/f19b bs=512k count=32 SUM=`cksum $TMP/f19b | cut -d" " -f 1,2` cp $TMP/f19b $DIR1/f19b @@ -383,22 +372,20 @@ test_19() { # bug3811 [ "`cat $TMP/sum2`" = "$SUM" ] || \ error "$DIR2/f19b `cat $TMP/sum2` != $SUM" done - for O in /proc/fs/lustre/obdfilter/*OST*; do - echo $MAX > $O/readcache_max_filesize - done + lctl set_param -n obdfilter.*OST*.readcache_max_filesize=$MAX rm $DIR1/f19b } -#run_test 19 "test concurrent uncached read races ===============" +run_test 19 "test concurrent uncached read races ===============" test_20() { mkdir $DIR1/d20 cancel_lru_locks osc - CNT=$((`cat /proc/fs/lustre/llite/*/dump_page_cache | wc -l`)) + CNT=$((`lctl get_param -n llite.*.dump_page_cache | wc -l`)) multiop $DIR1/f20 Ow8190c multiop $DIR2/f20 Oz8194w8190c multiop $DIR1/f20 Oz0r8190c cancel_lru_locks osc - CNTD=$((`cat /proc/fs/lustre/llite/*/dump_page_cache | wc -l` - $CNT)) + CNTD=$((`lctl get_param -n llite.*.dump_page_cache | wc -l` - $CNT)) [ $CNTD -gt 0 ] && \ error $CNTD" page left in cache after lock cancel" || true } @@ -440,7 +427,7 @@ test_22() { # Bug 9926 cat $DIR2/d21/no_joined || error "cat error" rm -rf $DIR2/d21/no_joined || error "unlink normal file error" } -run_test 22 " After joining in one dir, open/close unlink file in anther dir" +run_test 22 " After joining in one dir, open/close unlink file in anther dir" test_23() { # Bug 5972 echo "others should see updated atime while another read" > $DIR1/f23 @@ -449,20 +436,21 @@ test_23() { # Bug 5972 cancel_lru_locks osc time1=`date +%s` - sleep 2 + #MAX_ATIME_DIFF 60, we update atime only if older than 60 seconds + sleep 61 - multiop $DIR1/f23 or20_c & - MULTIPID=$! + multiop_bg_pause $DIR1/f23 or20_c || return 1 + # with SOM and opencache enabled, we need to close a file and cancel + # open lock to get atime propogated to MDS + kill -USR1 $! + cancel_lru_locks mdc - sleep 2 time2=`stat -c "%X" $DIR2/f23` if (( $time2 <= $time1 )); then - kill -USR1 $MULTIPID error "atime doesn't update among nodes" fi - kill -USR1 $MULTIPID || return 1 rm -f $DIR1/f23 || error "rm -f $DIR1/f23 failed" true } @@ -487,9 +475,10 @@ test_24() { run_test 24 "lfs df [-ih] [path] test =========================" test_25() { - [ `cat $LPROC/mdc/*-mdc-*/connect_flags | grep -c acl` -lt 2 ] && \ + [ `lctl get_param -n mdc.*-mdc-*.connect_flags | grep -c acl` -lt 2 ] && \ skip "must have acl, skipping" && return + mkdir -p $DIR1/$tdir touch $DIR1/$tdir/f1 || error "touch $DIR1/$tdir/f1" chmod 0755 $DIR1/$tdir/f1 || error "chmod 0755 $DIR1/$tdir/f1" @@ -520,8 +509,8 @@ test_26b() { chmod a+x $DIR2/$tfile mt1=`stat -c %Y $DIR1/$tfile` mt2=`stat -c %Y $DIR2/$tfile` - - if [ x"$mt1" != x"$mt2" ]; then + + if [ x"$mt1" != x"$mt2" ]; then error "not equal mtime, client1: "$mt1", client2: "$mt2"." fi } @@ -542,7 +531,7 @@ test_27() { sleep 1 dd if=/dev/zero of=$DIR1/$tfile bs=8k conv=notrunc count=1 seek=0 log "dd 3 finished" - echo > $LPROC/ldlm/dump_namespaces + lctl set_param -n ldlm.dump_namespaces "" wait $DD1_PID $DD2_PID [ $? -ne 0 ] && lctl dk $TMP/debug || true } @@ -552,7 +541,7 @@ test_28() { # bug 9977 ECHO_UUID="ECHO_osc1_UUID" tOST=`$LCTL dl | | awk '/-osc-|OSC.*MNT/ { print $4 }' | head -1` - lfs setstripe $DIR1/$tfile 1048576 0 2 + lfs setstripe $DIR1/$tfile -s 1048576 -i 0 -c 2 tOBJID=`lfs getstripe $DIR1/$tfile |grep "^[[:space:]]\+1" |awk '{print $2}'` dd if=/dev/zero of=$DIR1/$tfile bs=1024k count=2 @@ -564,7 +553,7 @@ test_28() { # bug 9977 tECHOID=`$LCTL dl | grep $ECHO_UUID | awk '{print $1}'` $LCTL --device $tECHOID destroy "${tOBJID}:0" - + $LCTL <<-EOF cfg_device ECHO_osc1 cleanup @@ -588,16 +577,17 @@ run_test 28 "read/write/truncate file with lost stripes" test_29() { # bug 10999 touch $DIR1/$tfile #define OBD_FAIL_LDLM_GLIMPSE 0x30f - sysctl -w lustre.fail_loc=0x8000030f + lctl set_param fail_loc=0x8000030f ls -l $DIR2/$tfile & usleep 500 dd if=/dev/zero of=$DIR1/$tfile bs=4k count=1 wait } #bug 11549 - permanently turn test off in b1_5 -#run_test 29 "lock put race between glimpse and enqueue =========" +run_test 29 "lock put race between glimpse and enqueue =========" test_30() { #bug #11110 + mkdir -p $DIR1/$tdir cp -f /bin/bash $DIR1/$tdir/bash /bin/sh -c 'sleep 1; rm -f $DIR2/$tdir/bash; cp /bin/bash $DIR2/$tdir' & err=$($DIR1/$tdir/bash -c 'sleep 2; openfile -f O_RDONLY /proc/$$/exe >& /dev/null; echo $?') @@ -608,6 +598,340 @@ test_30() { #bug #11110 run_test 30 "recreate file race =========" +test_31a() { + mkdir -p $DIR1/$tdir || error "Creating dir $DIR1/$tdir" + writes=`LANG=C dd if=/dev/zero of=$DIR/$tdir/$tfile count=1 2>&1 | + awk 'BEGIN { FS="+" } /out/ {print $1}'` + #define OBD_FAIL_LDLM_CANCEL_BL_CB_RACE 0x314 + lctl set_param fail_loc=0x314 + reads=`LANG=C dd if=$DIR2/$tdir/$tfile of=/dev/null 2>&1 | + awk 'BEGIN { FS="+" } /in/ {print $1}'` + [ $reads -eq $writes ] || error "read" $reads "blocks, must be" $writes +} +run_test 31a "voluntary cancel / blocking ast race==============" + +test_31b() { + remote_ost || { skip "local OST" && return 0; } + remote_ost_nodsh && skip "remote OST w/o dsh" && return 0 + mkdir -p $DIR1/$tdir || error "Creating dir $DIR1/$tdir" + lfs setstripe $DIR/$tdir/$tfile -i 0 -c 1 + cp /etc/hosts $DIR/$tdir/$tfile + #define OBD_FAIL_LDLM_CANCEL_BL_CB_RACE 0x314 + lctl set_param fail_loc=0x314 + #define OBD_FAIL_LDLM_OST_FAIL_RACE 0x316 + do_facet ost1 lctl set_param fail_loc=0x316 + # Don't crash kernel + cat $DIR2/$tdir/$tfile > /dev/null 2>&1 + lctl set_param fail_loc=0 + do_facet ost1 lctl set_param fail_loc=0 +} +run_test 31b "voluntary OST cancel / blocking ast race==============" + +# enable/disable lockless truncate feature, depending on the arg 0/1 +enable_lockless_truncate() { + lctl set_param -n osc.*.lockless_truncate $1 +} + +test_32a() { # bug 11270 + local p="$TMP/sanityN-$TESTNAME.parameters" + save_lustre_params $HOSTNAME osc.*.lockless_truncate > $p + cancel_lru_locks osc + enable_lockless_truncate 1 + rm -f $DIR1/$tfile + lfs setstripe -c -1 $DIR1/$tfile + dd if=/dev/zero of=$DIR1/$tfile count=10 bs=1M > /dev/null 2>&1 + clear_osc_stats + + log "checking cached lockless truncate" + $TRUNCATE $DIR1/$tfile 8000000 + $CHECKSTAT -s 8000000 $DIR2/$tfile || error "wrong file size" + [ $(calc_osc_stats lockless_truncate) -eq 0 ] || + error "lockless truncate doesn't use cached locks" + + log "checking not cached lockless truncate" + $TRUNCATE $DIR2/$tfile 5000000 + $CHECKSTAT -s 5000000 $DIR1/$tfile || error "wrong file size" + [ $(calc_osc_stats lockless_truncate) -ne 0 ] || + error "not cached trancate isn't lockless" + + log "disabled lockless truncate" + enable_lockless_truncate 0 + clear_osc_stats + $TRUNCATE $DIR2/$tfile 3000000 + $CHECKSTAT -s 3000000 $DIR1/$tfile || error "wrong file size" + [ $(calc_osc_stats lockless_truncate) -eq 0 ] || + error "lockless truncate disabling failed" + rm $DIR1/$tfile + # restore lockless_truncate default values + restore_lustre_params < $p + rm -f $p +} +run_test 32a "lockless truncate" + +test_32b() { # bug 11270 + remote_ost_nodsh && skip "remote OST with nodsh" && return + + local node + local p="$TMP/sanityN-$TESTNAME.parameters" + save_lustre_params $HOSTNAME "osc.*.contention_seconds" > $p + for node in $(osts_nodes); do + save_lustre_params $node "ldlm.namespaces.filter-*.max_nolock_bytes" >> $p + save_lustre_params $node "ldlm.namespaces.filter-*.contended_locks" >> $p + save_lustre_params $node "ldlm.namespaces.filter-*.contention_seconds" >> $p + done + clear_osc_stats + # agressive lockless i/o settings + for node in $(osts_nodes); do + do_node $node 'lctl set_param -n ldlm.namespaces.filter-*.max_nolock_bytes 2000000; lctl set_param -n ldlm.namespaces.filter-*.contended_locks 0; lctl set_param -n ldlm.namespaces.filter-*.contention_seconds 60' + done + lctl set_param -n osc.*.contention_seconds 60 + for i in $(seq 5); do + dd if=/dev/zero of=$DIR1/$tfile bs=4k count=1 conv=notrunc > /dev/null 2>&1 + dd if=/dev/zero of=$DIR2/$tfile bs=4k count=1 conv=notrunc > /dev/null 2>&1 + done + [ $(calc_osc_stats lockless_write_bytes) -ne 0 ] || error "lockless i/o was not triggered" + # disable lockless i/o (it is disabled by default) + for node in $(osts_nodes); do + do_node $node 'lctl set_param -n ldlm.namespaces.filter-*.max_nolock_bytes 0; lctl set_param -n ldlm.namespaces.filter-*.contended_locks 32; lctl set_param -n ldlm.namespaces.filter-*.contention_seconds 0' + done + # set contention_seconds to 0 at client too, otherwise Lustre still + # remembers lock contention + lctl set_param -n osc.*.contention_seconds 0 + clear_osc_stats + for i in $(seq 1); do + dd if=/dev/zero of=$DIR1/$tfile bs=4k count=1 conv=notrunc > /dev/null 2>&1 + dd if=/dev/zero of=$DIR2/$tfile bs=4k count=1 conv=notrunc > /dev/null 2>&1 + done + [ $(calc_osc_stats lockless_write_bytes) -eq 0 ] || + error "lockless i/o works when disabled" + rm -f $DIR1/$tfile + restore_lustre_params <$p + rm -f $p +} +run_test 32b "lockless i/o" + +print_jbd_stat () { + local dev + local mdts=$(get_facets MDS) + local varcvs + local mds + + local stat=0 + for mds in ${mdts//,/ }; do + varsvc=${mds}_svc + dev=$(basename $(do_facet $mds lctl get_param -n osd.${!varsvc}.mntdev)) + val=$(do_facet $mds cat /proc/fs/jbd/$dev/info | head -1 | cut -d" " -f1) + stat=$(( stat + val)) + done + echo $stat +} + +# commit on sharing tests +test_33a() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + + [ -n "$CLIENTS" ] || { skip "Need two or more clients" && return 0; } + [ $CLIENTCOUNT -ge 2 ] || \ + { skip "Need two or more clients, have $CLIENTCOUNT" && return 0; } + + zconf_mount_clients $CLIENT1,$CLIENT2 $DIR1 + zconf_mount_clients $CLIENT1,$CLIENT2 $DIR2 + + local nfiles=${TEST33_NFILES:-10000} + local param_file=$TMP/$tfile-params + + save_lustre_params $(comma_list $(mdts_nodes)) "mdt.*.commit_on_sharing" > $param_file + + local COS + local jbdold + local jbdnew + local jbd + + for COS in 0 1; do + do_facet $SINGLEMDS lctl set_param mdt.*.commit_on_sharing=$COS + avgjbd=0 + avgtime=0 + for i in 1 2 3; do + do_nodes $CLIENT1,$CLIENT2 "mkdir -p $DIR1/$tdir-\\\$(hostname)-$i" + + jbdold=$(print_jbd_stat) + echo "=== START createmany old: $jbdold transaction" + local elapsed=$(do_and_time "do_nodes $CLIENT1,$CLIENT2 createmany -o $DIR1/$tdir-\\\$(hostname)-$i/f- -r $DIR2/$tdir-\\\$(hostname)-$i/f- $nfiles > /dev/null 2>&1") + jbdnew=$(print_jbd_stat) + jbd=$(( jbdnew - jbdold )) + echo "=== END createmany new: $jbdnew transaction : $jbd transactions nfiles $nfiles time $ELAPSED COS=$COS" + avgjbd=$(( avgjbd + jbd )) + avgtime=$(( avgtime + elapsed )) + done + eval cos${COS}_jbd=$((avgjbd / 3)) + eval cos${COS}_time=$((avgtime / 3)) + done + + echo "COS=0 transactions (avg): $cos0_jbd time (avg): $cos0_time" + echo "COS=1 transactions (avg): $cos1_jbd time (avg): $cos1_time" + [ "$cos0_jbd" != 0 ] && echo "COS=1 vs COS=0 jbd: $((((cos1_jbd/cos0_jbd - 1)) * 100 )) %" + [ "$cos0_time" != 0 ] && echo "COS=1 vs COS=0 time: $((((cos1_time/cos0_time - 1)) * 100 )) %" + + restore_lustre_params < $param_file + rm -f $param_file + return 0 +} +run_test 33a "commit on sharing, cross crete/delete, 2 clients, benchmark" + +# End commit on sharing tests + +test_34() { #16129 + local OPER + local lock_in + local lock_out + for OPER in notimeout timeout ; do + rm $DIR1/$tfile 2>/dev/null + lock_in=$(do_nodes $(osts_nodes) "lctl get_param -n ldlm.namespaces.filter-*.lock_timeouts" | calc_sum) + if [ $OPER == "timeout" ] ; then + for j in `seq $OSTCOUNT`; do + #define OBD_FAIL_PTLRPC_HPREQ_TIMEOUT 0x511 + do_facet ost$j lctl set_param fail_loc=0x511 + done + echo lock should expire + else + for j in `seq $OSTCOUNT`; do + #define OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT 0x512 + do_facet ost$j lctl set_param fail_loc=0x512 + done + echo lock should not expire + fi + echo writing on client1 + dd if=/dev/zero of=$DIR1/$tfile count=100 conv=notrunc > /dev/null 2>&1 + sync & + echo reading on client2 + dd of=/dev/null if=$DIR2/$tfile > /dev/null 2>&1 + # wait for a lock timeout + sleep 4 + lock_out=$(do_nodes $(osts_nodes) "lctl get_param -n ldlm.namespaces.filter-*.lock_timeouts" | calc_sum) + if [ $OPER == "timeout" ] ; then + if [ $lock_in == $lock_out ]; then + error "no lock timeout happened" + else + echo "success" + fi + else + if [ $lock_in != $lock_out ]; then + error "lock timeout happened" + else + echo "success" + fi + fi + done +} +run_test 34 "no lock timeout under IO" + +test_35() { # bug 17645 + local generation=[] + local count=0 + for imp in /proc/fs/lustre/mdc/$FSNAME-MDT*-mdc-*; do + g=$(awk '/generation/{print $2}' $imp/import) + generation[count]=$g + let count=count+1 + done + + mkdir -p $MOUNT1/$tfile + cancel_lru_locks mdc + + # Let's initiate -EINTR situation by setting fail_loc and take + # write lock on same file from same client. This will not cause + # bl_ast yet as lock is already in local cache. +#define OBD_FAIL_LDLM_INTR_CP_AST 0x317 + do_facet client "lctl set_param fail_loc=0x80000317" + local timeout=`do_facet $SINGLEMDS lctl get_param -n timeout` + let timeout=timeout*3 + local nr=0 + while test $nr -lt 10; do + log "Race attempt $nr" + local blk1=`lctl get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}'` + test "x$blk1" = "x" && blk1=0 + createmany -o $MOUNT2/$tfile/a 4000 & + pid1=$! + sleep 1 + + # Let's make conflict and bl_ast + ls -la $MOUNT1/$tfile > /dev/null & + pid2=$! + + log "Wait for $pid1 $pid2 for $timeout sec..." + sleep $timeout + kill -9 $pid1 $pid2 > /dev/null 2>&1 + wait + local blk2=`lctl get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}'` + test "x$blk2" = "x" && blk2=0 + test $blk2 -gt $blk1 && break + rm -fr $MOUNT1/$tfile/* + cancel_lru_locks mdc + let nr=nr+1 + done + do_facet client "lctl set_param fail_loc=0x0" + df -h $MOUNT1 $MOUNT2 + count=0 + for imp in /proc/fs/lustre/mdc/$FSNAME-MDT*-mdc-*; do + g=$(awk '/generation/{print $2}' $imp/import) + if ! test "$g" -eq "${generation[count]}"; then + error "Eviction happened on import $(basename $imp)" + fi + let count=count+1 + done +} +run_test 35 "-EINTR cp_ast vs. bl_ast race does not evict client" + +test_36() { #bug 16417 + local SIZE + local SIZE_B + local i + + mkdir -p $DIR1/$tdir + $LFS setstripe -c -1 $DIR1/$tdir + i=0 + SIZE=50 + let SIZE_B=SIZE*1024*1024 + + while [ $i -le 10 ]; do + lctl mark "start test" + local before=$($LFS df | awk '{if ($1 ~/^filesystem/) {print $5; exit} }') + dd if=/dev/zero of=$DIR1/$tdir/file000 bs=1M count=$SIZE + sync + sleep 1 + local after_dd=$($LFS df | awk '{if ($1 ~/^filesystem/) {print $5; exit} }') + multiop_bg_pause $DIR2/$tdir/file000 O_r${SIZE_B}c || return 3 + read_pid=$! + rm -f $DIR1/$tdir/file000 + kill -USR1 $read_pid + wait $read_pid + sleep 1 + local after=$($LFS df | awk '{if ($1 ~/^filesystem/) {print $5; exit} }') + echo "*** cycle($i) *** before($before):after_dd($after_dd):after($after)" + # this free space! not used + if [ $after_dd -ge $after ]; then + error "space leaked" + return 1; + fi + let i=i+1 + done +} +run_test 36 "handle ESTALE/open-unlink corectly" + +test_37() { # bug 18695 + mkdir -p $DIR1/$tdir + multiop_bg_pause $DIR1/$tdir D_c || return 1 + MULTIPID=$! + # create large directory (32kB seems enough from e2fsck, ~= 1000 files) + createmany -m $DIR2/$tdir/f 10000 + # set mtime/atime backward + touch -t 198001010000 $DIR2/$tdir + kill -USR1 $MULTIPID + nr_files=`lfs find $DIR1/$tdir -type f | wc -l` + [ $nr_files -eq 10000 ] || error "$nr_files != 10000 truncated directory?" + +} +run_test 37 "check i_size is not updated for directory on close (bug 18695) ==============" + log "cleanup: ======================================================" check_and_cleanup_lustre