X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2FsanityN.sh;h=9d4020945f5b1650e0b3e0e3dafc9d5263a6c86e;hp=9a55449ec9b5aa578f50d83369eda21294b026cb;hb=5c2e140ae225bdfcff679c95af00d3a98116df74;hpb=4813fd9021a11e24521e76b708ce322c224df586 diff --git a/lustre/tests/sanityN.sh b/lustre/tests/sanityN.sh index 9a55449..9d40209 100644 --- a/lustre/tests/sanityN.sh +++ b/lustre/tests/sanityN.sh @@ -3,8 +3,8 @@ set -e ONLY=${ONLY:-"$*"} -# bug number for skipped test: 3192 9977 -ALWAYS_EXCEPT="14b 28 $SANITYN_EXCEPT" +# bug number for skipped test: 3192 15528/3811 16929 9977 15528/11549 18080 +ALWAYS_EXCEPT=" 14b 19 22 28 29 35 $SANITYN_EXCEPT" # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! # bug number for skipped test: 12652 12652 @@ -22,13 +22,11 @@ PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH SIZE=${SIZE:-40960} CHECKSTAT=${CHECKSTAT:-"checkstat -v"} -CREATETEST=${CREATETEST:-createtest} GETSTRIPE=${GETSTRIPE:-lfs getstripe} SETSTRIPE=${SETSTRIPE:-lstripe} MCREATE=${MCREATE:-mcreate} OPENFILE=${OPENFILE:-openfile} OPENUNLINK=${OPENUNLINK:-openunlink} -TOEXCL=${TOEXCL:-toexcl} TRUNCATE=${TRUNCATE:-truncate} export TMP=${TMP:-/tmp} MOUNT_2=${MOUNT_2:-"yes"} @@ -46,7 +44,7 @@ SETUP=${SETUP:-:} init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} -[ "$SLOW" = "no" ] && EXCEPT_SLOW="12 16" +[ "$SLOW" = "no" ] && EXCEPT_SLOW="12 16 23 33a" SANITYLOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh).log} FAIL_ON_ERROR=false @@ -54,22 +52,20 @@ FAIL_ON_ERROR=false SETUP=${SETUP:-:} TRACE=${TRACE:-""} -LPROC=/proc/fs/lustre - [ "$SANITYLOG" ] && rm -f $SANITYLOG || true check_and_setup_lustre -LPROC=/proc/fs/lustre LOVNAME=`lctl get_param -n llite.*.lov.common_name | tail -n 1` OSTCOUNT=`lctl get_param -n lov.$LOVNAME.numobd` +assert_DIR rm -rf $DIR1/[df][0-9]* $DIR1/lnk # $RUNAS_ID may get set incorrectly somewhere else [ $UID -eq 0 -a $RUNAS_ID -eq 0 ] && error "\$RUNAS_ID set to 0, but \$UID is also 0!" -check_runas_id $RUNAS_ID $RUNAS +check_runas_id $RUNAS_ID $RUNAS_ID $RUNAS build_test_filter @@ -136,9 +132,9 @@ test_2e() { run_test 2e "check chmod on root is propagated to others" test_3() { - ( cd $DIR1 ; ln -s this/is/good lnk ) - [ "this/is/good" = "`perl -e 'print readlink("'$DIR2/lnk'");'`" ] || \ - error + ( cd $DIR1 ; ln -s this/is/good $tfile ) + [ "this/is/good" = "`perl -e 'print readlink("'$DIR2/$tfile'");'`" ] || + error "link $DIR2/$tfile not as expected" } run_test 3 "symlink on one mtpt, readlink on another ===========" @@ -162,11 +158,9 @@ test_6() { run_test 6 "remove of open file on other node ==================" test_7() { - # run_one creates uniq $tdir (bug 13798) - # opendirunlink failes if it exists - rmdir $DIR1/$tdir || true - opendirunlink $DIR1/$tdir $DIR2/$tdir || \ - error "opendirunlink $DIR1/$tdir $DIR2/$tdir" + local dir=d7 + opendirunlink $DIR1/$dir $DIR2/$dir || \ + error "opendirunlink $DIR1/$dir $DIR2/$dir" } run_test 7 "remove of open directory on other node =============" @@ -178,10 +172,11 @@ run_test 8 "remove of open special file on other node ==========" test_9() { MTPT=1 + local dir > $DIR2/f9 for C in a b c d e f g h i j k l; do - DIR=`eval echo \\$DIR$MTPT` - echo -n $C >> $DIR/f9 + dir=`eval echo \\$DIR$MTPT` + echo -n $C >> $dir/f9 [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1 done [ "`cat $DIR1/f9`" = "abcdefghijkl" ] || \ @@ -191,11 +186,12 @@ run_test 9 "append of file with sub-page size on multiple mounts" test_10a() { MTPT=1 + local dir OFFSET=0 > $DIR2/f10 for C in a b c d e f g h i j k l; do - DIR=`eval echo \\$DIR$MTPT` - echo -n $C | dd of=$DIR/f10 bs=1 seek=$OFFSET count=1 + dir=`eval echo \\$DIR$MTPT` + echo -n $C | dd of=$dir/f10 bs=1 seek=$OFFSET count=1 [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1 OFFSET=`expr $OFFSET + 1` done @@ -205,25 +201,26 @@ test_10a() { run_test 10a "write of file with sub-page size on multiple mounts " test_10b() { - yes "R" | dd of=$DIR1/f10b bs=3k count=1 || error "dd $DIR1" + # create a seed file + yes "R" | head -c 4000 >$TMP/f10b-seed + dd if=$TMP/f10b-seed of=$DIR1/f10b bs=3k count=1 || error "dd $DIR1" truncate $DIR1/f10b 4096 || error "truncate 4096" dd if=$DIR2/f10b of=$TMP/f10b-lustre bs=4k count=1 || error "dd $DIR2" # create a test file locally to compare - yes "R" | dd of=$TMP/f10b bs=3k count=1 || error "dd random" + dd if=$TMP/f10b-seed of=$TMP/f10b bs=3k count=1 || error "dd random" truncate $TMP/f10b 4096 || error "truncate 4096" cmp $TMP/f10b $TMP/f10b-lustre || error "file miscompare" - rm $TMP/f10b $TMP/f10b-lustre + rm $TMP/f10b $TMP/f10b-lustre $TMP/f10b-seed } run_test 10b "write of file with sub-page size on multiple mounts " test_11() { mkdir $DIR1/d11 - multiop $DIR1/d11/f O_c & + multiop_bg_pause $DIR1/d11/f O_c || return 1 MULTIPID=$! - usleep 200 cp -p /bin/ls $DIR1/d11/f $DIR2/d11/f RC=$? @@ -256,23 +253,25 @@ test_13() { # bug 2451 - directory coherency run_test 13 "test directory page revocation ====================" test_14() { - mkdir $DIR1/d14 - cp -p /bin/ls $DIR1/d14/ls - exec 100>> $DIR1/d14/ls - $DIR2/d14/ls && error || true - exec 100<&- + mkdir -p $DIR1/$tdir + cp -p /bin/ls $DIR1/$tdir/$tfile + multiop_bg_pause $DIR1/$tdir/$tfile Ow_c || return 1 + MULTIPID=$! + + $DIR2/$tdir/$tfile && error || true + kill -USR1 $MULTIPID + wait $MULTIPID || return 2 } run_test 14 "execution of file open for write returns -ETXTBSY =" test_14a() { mkdir -p $DIR1/d14 cp -p `which multiop` $DIR1/d14/multiop || error "cp failed" - $DIR1/d14/multiop $TMP/test14.junk O_c & - MULTIPID=$! - sleep 1 + MULTIOP_PROG=$DIR1/d14/multiop multiop_bg_pause $TMP/test14.junk O_c || return 1 + MULTIOP_PID=$! multiop $DIR2/d14/multiop Oc && error "expected error, got success" - kill -USR1 $MULTIPID || return 2 - wait $MULTIPID || return 3 + kill -USR1 $MULTIOP_PID || return 2 + wait $MULTIOP_PID || return 3 rm $TMP/test14.junk $DIR1/d14/multiop || error "removing multiop" } run_test 14a "open(RDWR) of executing file returns -ETXTBSY ====" @@ -280,13 +279,12 @@ run_test 14a "open(RDWR) of executing file returns -ETXTBSY ====" test_14b() { # bug 3192, 7040 mkdir -p $DIR1/d14 cp -p `which multiop` $DIR1/d14/multiop || error "cp failed" - $DIR1/d14/multiop $TMP/test14.junk O_c & - MULTIPID=$! - sleep 1 - truncate $DIR2/d14/multiop 0 && kill -9 $MULTIPID && \ + MULTIOP_PROG=$DIR1/d14/multiop multiop_bg_pause $TMP/test14.junk O_c || return 1 + MULTIOP_PID=$! + truncate $DIR2/d14/multiop 0 && kill -9 $MULTIOP_PID && \ error "expected truncate error, got success" - kill -USR1 $MULTIPID || return 2 - wait $MULTIPID || return 3 + kill -USR1 $MULTIOP_PID || return 2 + wait $MULTIOP_PID || return 3 cmp `which multiop` $DIR1/d14/multiop || error "binary changed" rm $TMP/test14.junk $DIR1/d14/multiop || error "removing multiop" } @@ -295,12 +293,11 @@ run_test 14b "truncate of executing file returns -ETXTBSY ======" test_14c() { # bug 3430, 7040 mkdir -p $DIR1/d14 cp -p `which multiop` $DIR1/d14/multiop || error "cp failed" - $DIR1/d14/multiop $TMP/test14.junk O_c & - MULTIPID=$! - sleep 1 + MULTIOP_PROG=$DIR1/d14/multiop multiop_bg_pause $TMP/test14.junk O_c || return 1 + MULTIOP_PID=$! cp /etc/hosts $DIR2/d14/multiop && error "expected error, got success" - kill -USR1 $MULTIPID || return 2 - wait $MULTIPID || return 3 + kill -USR1 $MULTIOP_PID || return 2 + wait $MULTIOP_PID || return 3 cmp `which multiop` $DIR1/d14/multiop || error "binary changed" rm $TMP/test14.junk $DIR1/d14/multiop || error "removing multiop" } @@ -309,13 +306,12 @@ run_test 14c "open(O_TRUNC) of executing file return -ETXTBSY ==" test_14d() { # bug 10921 mkdir -p $DIR1/d14 cp -p `which multiop` $DIR1/d14/multiop || error "cp failed" - $DIR1/d14/multiop $TMP/test14.junk O_c & - MULTIPID=$! - sleep 1 + MULTIOP_PROG=$DIR1/d14/multiop multiop_bg_pause $TMP/test14.junk O_c || return 1 + MULTIOP_PID=$! log chmod chmod 600 $DIR1/d14/multiop || error "chmod failed" - kill -USR1 $MULTIPID || return 2 - wait $MULTIPID || return 3 + kill -USR1 $MULTIOP_PID || return 2 + wait $MULTIOP_PID || return 3 cmp `which multiop` $DIR1/d14/multiop || error "binary changed" rm $TMP/test14.junk $DIR1/d14/multiop || error "removing multiop" } @@ -337,21 +333,22 @@ test_16() { run_test 16 "2500 iterations of dual-mount fsx =================" test_17() { # bug 3513, 3667 - [ ! -d /proc/fs/lustre/ost ] && skip "remote OST, skipping OST-only test" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return - cp /etc/termcap $DIR1/f17 + lfs setstripe $DIR1/$tfile -i 0 -c 1 + cp /etc/termcap $DIR1/$tfile cancel_lru_locks osc > /dev/null #define OBD_FAIL_ONCE|OBD_FAIL_LDLM_CREATE_RESOURCE 0x30a - sysctl -w lustre.fail_loc=0x8000030a - ls -ls $DIR1/f17 | awk '{ print $1,$6 }' > $DIR1/f17-1 & \ - ls -ls $DIR2/f17 | awk '{ print $1,$6 }' > $DIR2/f17-2 + do_facet ost1 lctl set_param fail_loc=0x8000030a + ls -ls $DIR1/$tfile | awk '{ print $1,$6 }' > $DIR1/$tfile-1 & \ + ls -ls $DIR2/$tfile | awk '{ print $1,$6 }' > $DIR2/$tfile-2 wait - diff -u $DIR1/f17-1 $DIR2/f17-2 || error "files are different" + diff -u $DIR1/$tfile-1 $DIR2/$tfile-2 || error "files are different" } run_test 17 "resource creation/LVB creation race ===============" test_18() { - ./mmap_sanity -d $MOUNT1 -m $MOUNT2 + $LUSTRE/tests/mmap_sanity -d $MOUNT1 -m $MOUNT2 sync; sleep 1; sync } run_test 18 "mmap sanity check =================================" @@ -359,10 +356,8 @@ run_test 18 "mmap sanity check =================================" test_19() { # bug3811 [ -d /proc/fs/lustre/obdfilter ] || return 0 - MAX=`cat /proc/fs/lustre/obdfilter/*/readcache_max_filesize | head -n 1` - for O in /proc/fs/lustre/obdfilter/*OST*; do - echo 4096 > $O/readcache_max_filesize - done + MAX=`lctl get_param -n obdfilter.*.readcache_max_filesize | head -n 1` + lctl set_param -n obdfilter.*OST*.readcache_max_filesize=4096 dd if=/dev/urandom of=$TMP/f19b bs=512k count=32 SUM=`cksum $TMP/f19b | cut -d" " -f 1,2` cp $TMP/f19b $DIR1/f19b @@ -377,22 +372,20 @@ test_19() { # bug3811 [ "`cat $TMP/sum2`" = "$SUM" ] || \ error "$DIR2/f19b `cat $TMP/sum2` != $SUM" done - for O in /proc/fs/lustre/obdfilter/*OST*; do - echo $MAX > $O/readcache_max_filesize - done + lctl set_param -n obdfilter.*OST*.readcache_max_filesize=$MAX rm $DIR1/f19b } -#run_test 19 "test concurrent uncached read races ===============" +run_test 19 "test concurrent uncached read races ===============" test_20() { mkdir $DIR1/d20 cancel_lru_locks osc - CNT=$((`cat /proc/fs/lustre/llite/*/dump_page_cache | wc -l`)) + CNT=$((`lctl get_param -n llite.*.dump_page_cache | wc -l`)) multiop $DIR1/f20 Ow8190c multiop $DIR2/f20 Oz8194w8190c multiop $DIR1/f20 Oz0r8190c cancel_lru_locks osc - CNTD=$((`cat /proc/fs/lustre/llite/*/dump_page_cache | wc -l` - $CNT)) + CNTD=$((`lctl get_param -n llite.*.dump_page_cache | wc -l` - $CNT)) [ $CNTD -gt 0 ] && \ error $CNTD" page left in cache after lock cancel" || true } @@ -434,7 +427,7 @@ test_22() { # Bug 9926 cat $DIR2/d21/no_joined || error "cat error" rm -rf $DIR2/d21/no_joined || error "unlink normal file error" } -run_test 22 " After joining in one dir, open/close unlink file in anther dir" +run_test 22 " After joining in one dir, open/close unlink file in anther dir" test_23() { # Bug 5972 echo "others should see updated atime while another read" > $DIR1/f23 @@ -443,20 +436,21 @@ test_23() { # Bug 5972 cancel_lru_locks osc time1=`date +%s` - sleep 2 + #MAX_ATIME_DIFF 60, we update atime only if older than 60 seconds + sleep 61 - multiop $DIR1/f23 or20_c & - MULTIPID=$! + multiop_bg_pause $DIR1/f23 or20_c || return 1 + # with SOM and opencache enabled, we need to close a file and cancel + # open lock to get atime propogated to MDS + kill -USR1 $! + cancel_lru_locks mdc - sleep 2 time2=`stat -c "%X" $DIR2/f23` if (( $time2 <= $time1 )); then - kill -USR1 $MULTIPID error "atime doesn't update among nodes" fi - kill -USR1 $MULTIPID || return 1 rm -f $DIR1/f23 || error "rm -f $DIR1/f23 failed" true } @@ -484,6 +478,7 @@ test_25() { [ `lctl get_param -n mdc.*-mdc-*.connect_flags | grep -c acl` -lt 2 ] && \ skip "must have acl, skipping" && return + mkdir -p $DIR1/$tdir touch $DIR1/$tdir/f1 || error "touch $DIR1/$tdir/f1" chmod 0755 $DIR1/$tdir/f1 || error "chmod 0755 $DIR1/$tdir/f1" @@ -514,8 +509,8 @@ test_26b() { chmod a+x $DIR2/$tfile mt1=`stat -c %Y $DIR1/$tfile` mt2=`stat -c %Y $DIR2/$tfile` - - if [ x"$mt1" != x"$mt2" ]; then + + if [ x"$mt1" != x"$mt2" ]; then error "not equal mtime, client1: "$mt1", client2: "$mt2"." fi } @@ -558,7 +553,7 @@ test_28() { # bug 9977 tECHOID=`$LCTL dl | grep $ECHO_UUID | awk '{print $1}'` $LCTL --device $tECHOID destroy "${tOBJID}:0" - + $LCTL <<-EOF cfg_device ECHO_osc1 cleanup @@ -582,16 +577,17 @@ run_test 28 "read/write/truncate file with lost stripes" test_29() { # bug 10999 touch $DIR1/$tfile #define OBD_FAIL_LDLM_GLIMPSE 0x30f - sysctl -w lustre.fail_loc=0x8000030f + lctl set_param fail_loc=0x8000030f ls -l $DIR2/$tfile & usleep 500 dd if=/dev/zero of=$DIR1/$tfile bs=4k count=1 wait } #bug 11549 - permanently turn test off in b1_5 -#run_test 29 "lock put race between glimpse and enqueue =========" +run_test 29 "lock put race between glimpse and enqueue =========" test_30() { #bug #11110 + mkdir -p $DIR1/$tdir cp -f /bin/bash $DIR1/$tdir/bash /bin/sh -c 'sleep 1; rm -f $DIR2/$tdir/bash; cp /bin/bash $DIR2/$tdir' & err=$($DIR1/$tdir/bash -c 'sleep 2; openfile -f O_RDONLY /proc/$$/exe >& /dev/null; echo $?') @@ -602,17 +598,324 @@ test_30() { #bug #11110 run_test 30 "recreate file race =========" -test_31() { +test_31a() { mkdir -p $DIR1/$tdir || error "Creating dir $DIR1/$tdir" writes=`LANG=C dd if=/dev/zero of=$DIR/$tdir/$tfile count=1 2>&1 | awk 'BEGIN { FS="+" } /out/ {print $1}'` #define OBD_FAIL_LDLM_CANCEL_BL_CB_RACE 0x314 - sysctl -w lustre.fail_loc=0x314 + lctl set_param fail_loc=0x314 reads=`LANG=C dd if=$DIR2/$tdir/$tfile of=/dev/null 2>&1 | awk 'BEGIN { FS="+" } /in/ {print $1}'` [ $reads -eq $writes ] || error "read" $reads "blocks, must be" $writes } -run_test 31 "voluntary cancel / blocking ast race==============" +run_test 31a "voluntary cancel / blocking ast race==============" + +test_31b() { + remote_ost || { skip "local OST" && return 0; } + remote_ost_nodsh && skip "remote OST w/o dsh" && return 0 + mkdir -p $DIR1/$tdir || error "Creating dir $DIR1/$tdir" + lfs setstripe $DIR/$tdir/$tfile -i 0 -c 1 + cp /etc/hosts $DIR/$tdir/$tfile + #define OBD_FAIL_LDLM_CANCEL_BL_CB_RACE 0x314 + lctl set_param fail_loc=0x314 + #define OBD_FAIL_LDLM_OST_FAIL_RACE 0x316 + do_facet ost1 lctl set_param fail_loc=0x316 + # Don't crash kernel + cat $DIR2/$tdir/$tfile > /dev/null 2>&1 + lctl set_param fail_loc=0 + do_facet ost1 lctl set_param fail_loc=0 +} +run_test 31b "voluntary OST cancel / blocking ast race==============" + +# enable/disable lockless truncate feature, depending on the arg 0/1 +enable_lockless_truncate() { + lctl set_param -n osc.*.lockless_truncate $1 +} + +test_32a() { # bug 11270 + local p="$TMP/sanityN-$TESTNAME.parameters" + save_lustre_params $HOSTNAME osc.*.lockless_truncate > $p + cancel_lru_locks osc + enable_lockless_truncate 1 + rm -f $DIR1/$tfile + lfs setstripe -c -1 $DIR1/$tfile + dd if=/dev/zero of=$DIR1/$tfile count=10 bs=1M > /dev/null 2>&1 + clear_osc_stats + + log "checking cached lockless truncate" + $TRUNCATE $DIR1/$tfile 8000000 + $CHECKSTAT -s 8000000 $DIR2/$tfile || error "wrong file size" + [ $(calc_osc_stats lockless_truncate) -eq 0 ] || + error "lockless truncate doesn't use cached locks" + + log "checking not cached lockless truncate" + $TRUNCATE $DIR2/$tfile 5000000 + $CHECKSTAT -s 5000000 $DIR1/$tfile || error "wrong file size" + [ $(calc_osc_stats lockless_truncate) -ne 0 ] || + error "not cached trancate isn't lockless" + + log "disabled lockless truncate" + enable_lockless_truncate 0 + clear_osc_stats + $TRUNCATE $DIR2/$tfile 3000000 + $CHECKSTAT -s 3000000 $DIR1/$tfile || error "wrong file size" + [ $(calc_osc_stats lockless_truncate) -eq 0 ] || + error "lockless truncate disabling failed" + rm $DIR1/$tfile + # restore lockless_truncate default values + restore_lustre_params < $p + rm -f $p +} +run_test 32a "lockless truncate" + +test_32b() { # bug 11270 + remote_ost_nodsh && skip "remote OST with nodsh" && return + + local node + local p="$TMP/sanityN-$TESTNAME.parameters" + save_lustre_params $HOSTNAME "osc.*.contention_seconds" > $p + for node in $(osts_nodes); do + save_lustre_params $node "ldlm.namespaces.filter-*.max_nolock_bytes" >> $p + save_lustre_params $node "ldlm.namespaces.filter-*.contended_locks" >> $p + save_lustre_params $node "ldlm.namespaces.filter-*.contention_seconds" >> $p + done + clear_osc_stats + # agressive lockless i/o settings + for node in $(osts_nodes); do + do_node $node 'lctl set_param -n ldlm.namespaces.filter-*.max_nolock_bytes 2000000; lctl set_param -n ldlm.namespaces.filter-*.contended_locks 0; lctl set_param -n ldlm.namespaces.filter-*.contention_seconds 60' + done + lctl set_param -n osc.*.contention_seconds 60 + for i in $(seq 5); do + dd if=/dev/zero of=$DIR1/$tfile bs=4k count=1 conv=notrunc > /dev/null 2>&1 + dd if=/dev/zero of=$DIR2/$tfile bs=4k count=1 conv=notrunc > /dev/null 2>&1 + done + [ $(calc_osc_stats lockless_write_bytes) -ne 0 ] || error "lockless i/o was not triggered" + # disable lockless i/o (it is disabled by default) + for node in $(osts_nodes); do + do_node $node 'lctl set_param -n ldlm.namespaces.filter-*.max_nolock_bytes 0; lctl set_param -n ldlm.namespaces.filter-*.contended_locks 32; lctl set_param -n ldlm.namespaces.filter-*.contention_seconds 0' + done + # set contention_seconds to 0 at client too, otherwise Lustre still + # remembers lock contention + lctl set_param -n osc.*.contention_seconds 0 + clear_osc_stats + for i in $(seq 1); do + dd if=/dev/zero of=$DIR1/$tfile bs=4k count=1 conv=notrunc > /dev/null 2>&1 + dd if=/dev/zero of=$DIR2/$tfile bs=4k count=1 conv=notrunc > /dev/null 2>&1 + done + [ $(calc_osc_stats lockless_write_bytes) -eq 0 ] || + error "lockless i/o works when disabled" + rm -f $DIR1/$tfile + restore_lustre_params <$p + rm -f $p +} +run_test 32b "lockless i/o" + +print_jbd_stat () { + local dev + local mdts=$(get_facets MDS) + local varcvs + local mds + + local stat=0 + for mds in ${mdts//,/ }; do + varsvc=${mds}_svc + dev=$(basename $(do_facet $mds lctl get_param -n osd.${!varsvc}.mntdev)) + val=$(do_facet $mds cat /proc/fs/jbd/$dev/info | head -1 | cut -d" " -f1) + stat=$(( stat + val)) + done + echo $stat +} + +# commit on sharing tests +test_33a() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + + [ -n "$CLIENTS" ] || { skip "Need two or more clients" && return 0; } + [ $CLIENTCOUNT -ge 2 ] || \ + { skip "Need two or more clients, have $CLIENTCOUNT" && return 0; } + + zconf_mount_clients $CLIENT1,$CLIENT2 $DIR1 + zconf_mount_clients $CLIENT1,$CLIENT2 $DIR2 + + local nfiles=${TEST33_NFILES:-10000} + local param_file=$TMP/$tfile-params + + save_lustre_params $(comma_list $(mdts_nodes)) "mdt.*.commit_on_sharing" > $param_file + + local COS + local jbdold + local jbdnew + local jbd + + for COS in 0 1; do + do_facet $SINGLEMDS lctl set_param mdt.*.commit_on_sharing=$COS + avgjbd=0 + avgtime=0 + for i in 1 2 3; do + do_nodes $CLIENT1,$CLIENT2 "mkdir -p $DIR1/$tdir-\\\$(hostname)-$i" + + jbdold=$(print_jbd_stat) + echo "=== START createmany old: $jbdold transaction" + local elapsed=$(do_and_time "do_nodes $CLIENT1,$CLIENT2 createmany -o $DIR1/$tdir-\\\$(hostname)-$i/f- -r $DIR2/$tdir-\\\$(hostname)-$i/f- $nfiles > /dev/null 2>&1") + jbdnew=$(print_jbd_stat) + jbd=$(( jbdnew - jbdold )) + echo "=== END createmany new: $jbdnew transaction : $jbd transactions nfiles $nfiles time $ELAPSED COS=$COS" + avgjbd=$(( avgjbd + jbd )) + avgtime=$(( avgtime + elapsed )) + done + eval cos${COS}_jbd=$((avgjbd / 3)) + eval cos${COS}_time=$((avgtime / 3)) + done + + echo "COS=0 transactions (avg): $cos0_jbd time (avg): $cos0_time" + echo "COS=1 transactions (avg): $cos1_jbd time (avg): $cos1_time" + [ "$cos0_jbd" != 0 ] && echo "COS=1 vs COS=0 jbd: $((((cos1_jbd/cos0_jbd - 1)) * 100 )) %" + [ "$cos0_time" != 0 ] && echo "COS=1 vs COS=0 time: $((((cos1_time/cos0_time - 1)) * 100 )) %" + + restore_lustre_params < $param_file + rm -f $param_file + return 0 +} +run_test 33a "commit on sharing, cross crete/delete, 2 clients, benchmark" + +# End commit on sharing tests + +test_34() { #16129 + local OPER + local lock_in + local lock_out + for OPER in notimeout timeout ; do + rm $DIR1/$tfile 2>/dev/null + lock_in=$(do_nodes $(osts_nodes) "lctl get_param -n ldlm.namespaces.filter-*.lock_timeouts" | calc_sum) + if [ $OPER == "timeout" ] ; then + for j in `seq $OSTCOUNT`; do + #define OBD_FAIL_PTLRPC_HPREQ_TIMEOUT 0x511 + do_facet ost$j lctl set_param fail_loc=0x511 + done + echo lock should expire + else + for j in `seq $OSTCOUNT`; do + #define OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT 0x512 + do_facet ost$j lctl set_param fail_loc=0x512 + done + echo lock should not expire + fi + echo writing on client1 + dd if=/dev/zero of=$DIR1/$tfile count=100 conv=notrunc > /dev/null 2>&1 + sync & + echo reading on client2 + dd of=/dev/null if=$DIR2/$tfile > /dev/null 2>&1 + # wait for a lock timeout + sleep 4 + lock_out=$(do_nodes $(osts_nodes) "lctl get_param -n ldlm.namespaces.filter-*.lock_timeouts" | calc_sum) + if [ $OPER == "timeout" ] ; then + if [ $lock_in == $lock_out ]; then + error "no lock timeout happened" + else + echo "success" + fi + else + if [ $lock_in != $lock_out ]; then + error "lock timeout happened" + else + echo "success" + fi + fi + done +} +run_test 34 "no lock timeout under IO" + +test_35() { # bug 17645 + local generation=[] + local count=0 + for imp in /proc/fs/lustre/mdc/$FSNAME-MDT*-mdc-*; do + g=$(awk '/generation/{print $2}' $imp/import) + generation[count]=$g + let count=count+1 + done + + mkdir -p $MOUNT1/$tfile + cancel_lru_locks mdc + + # Let's initiate -EINTR situation by setting fail_loc and take + # write lock on same file from same client. This will not cause + # bl_ast yet as lock is already in local cache. +#define OBD_FAIL_LDLM_INTR_CP_AST 0x317 + do_facet client "lctl set_param fail_loc=0x80000317" + local timeout=`do_facet $SINGLEMDS lctl get_param -n timeout` + let timeout=timeout*3 + local nr=0 + while test $nr -lt 10; do + log "Race attempt $nr" + local blk1=`lctl get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}'` + test "x$blk1" = "x" && blk1=0 + createmany -o $MOUNT2/$tfile/a 4000 & + pid1=$! + sleep 1 + + # Let's make conflict and bl_ast + ls -la $MOUNT1/$tfile > /dev/null & + pid2=$! + + log "Wait for $pid1 $pid2 for $timeout sec..." + sleep $timeout + kill -9 $pid1 $pid2 > /dev/null 2>&1 + wait + local blk2=`lctl get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}'` + test "x$blk2" = "x" && blk2=0 + test $blk2 -gt $blk1 && break + rm -fr $MOUNT1/$tfile/* + cancel_lru_locks mdc + let nr=nr+1 + done + do_facet client "lctl set_param fail_loc=0x0" + df -h $MOUNT1 $MOUNT2 + count=0 + for imp in /proc/fs/lustre/mdc/$FSNAME-MDT*-mdc-*; do + g=$(awk '/generation/{print $2}' $imp/import) + if ! test "$g" -eq "${generation[count]}"; then + error "Eviction happened on import $(basename $imp)" + fi + let count=count+1 + done +} +run_test 35 "-EINTR cp_ast vs. bl_ast race does not evict client" + +test_36() { #bug 16417 + local SIZE + local SIZE_B + local i + + mkdir -p $DIR1/$tdir + $LFS setstripe -c -1 $DIR1/$tdir + i=0 + SIZE=50 + let SIZE_B=SIZE*1024*1024 + + while [ $i -le 10 ]; do + lctl mark "start test" + local before=$($LFS df | awk '{if ($1 ~/^filesystem/) {print $5; exit} }') + dd if=/dev/zero of=$DIR1/$tdir/file000 bs=1M count=$SIZE + sync + sleep 1 + local after_dd=$($LFS df | awk '{if ($1 ~/^filesystem/) {print $5; exit} }') + multiop_bg_pause $DIR2/$tdir/file000 O_r${SIZE_B}c || return 3 + read_pid=$! + rm -f $DIR1/$tdir/file000 + kill -USR1 $read_pid + wait $read_pid + sleep 1 + local after=$($LFS df | awk '{if ($1 ~/^filesystem/) {print $5; exit} }') + echo "*** cycle($i) *** before($before):after_dd($after_dd):after($after)" + # this free space! not used + if [ $after_dd -ge $after ]; then + error "space leaked" + return 1; + fi + let i=i+1 + done +} +run_test 36 "handle ESTALE/open-unlink corectly" log "cleanup: ======================================================"