X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2FsanityN.sh;h=6e5e024b70d5394fc2d060d81859517f7def3cec;hp=589573ce9114da530302b1bb5ad2a5ac1228c4d1;hb=4f23f96b21ea63119f0ff983a68e56630ade06f8;hpb=c34d5b580fcede4aeccdeab6618fe8b3146186e2 diff --git a/lustre/tests/sanityN.sh b/lustre/tests/sanityN.sh index 589573c..6e5e024 100644 --- a/lustre/tests/sanityN.sh +++ b/lustre/tests/sanityN.sh @@ -3,8 +3,8 @@ set -e ONLY=${ONLY:-"$*"} -# bug number for skipped test: 3192 15528/3811 16929 9977 15528/11549 -ALWAYS_EXCEPT=" 14b 19 22 28 29 $SANITYN_EXCEPT" +# bug number for skipped test: 3192 15528/3811 16929 9977 15528/11549 18080 +ALWAYS_EXCEPT=" 14b 19 22 28 29 35 $SANITYN_EXCEPT" # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! # bug number for skipped test: 12652 12652 @@ -22,13 +22,11 @@ PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH SIZE=${SIZE:-40960} CHECKSTAT=${CHECKSTAT:-"checkstat -v"} -CREATETEST=${CREATETEST:-createtest} GETSTRIPE=${GETSTRIPE:-lfs getstripe} SETSTRIPE=${SETSTRIPE:-lstripe} MCREATE=${MCREATE:-mcreate} OPENFILE=${OPENFILE:-openfile} OPENUNLINK=${OPENUNLINK:-openunlink} -TOEXCL=${TOEXCL:-toexcl} TRUNCATE=${TRUNCATE:-truncate} export TMP=${TMP:-/tmp} MOUNT_2=${MOUNT_2:-"yes"} @@ -46,7 +44,7 @@ SETUP=${SETUP:-:} init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} -[ "$SLOW" = "no" ] && EXCEPT_SLOW="12 16 33a" +[ "$SLOW" = "no" ] && EXCEPT_SLOW="12 16 23 33a" SANITYLOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh).log} FAIL_ON_ERROR=false @@ -67,7 +65,7 @@ rm -rf $DIR1/[df][0-9]* $DIR1/lnk # $RUNAS_ID may get set incorrectly somewhere else [ $UID -eq 0 -a $RUNAS_ID -eq 0 ] && error "\$RUNAS_ID set to 0, but \$UID is also 0!" -check_runas_id $RUNAS_ID $RUNAS +check_runas_id $RUNAS_ID $RUNAS_ID $RUNAS build_test_filter @@ -350,7 +348,7 @@ test_17() { # bug 3513, 3667 run_test 17 "resource creation/LVB creation race ===============" test_18() { - ./mmap_sanity -d $MOUNT1 -m $MOUNT2 + $LUSTRE/tests/mmap_sanity -d $MOUNT1 -m $MOUNT2 sync; sleep 1; sync } run_test 18 "mmap sanity check =================================" @@ -429,7 +427,7 @@ test_22() { # Bug 9926 cat $DIR2/d21/no_joined || error "cat error" rm -rf $DIR2/d21/no_joined || error "unlink normal file error" } -run_test 22 " After joining in one dir, open/close unlink file in anther dir" +run_test 22 " After joining in one dir, open/close unlink file in anther dir" test_23() { # Bug 5972 echo "others should see updated atime while another read" > $DIR1/f23 @@ -438,19 +436,21 @@ test_23() { # Bug 5972 cancel_lru_locks osc time1=`date +%s` - sleep 2 + #MAX_ATIME_DIFF 60, we update atime only if older than 60 seconds + sleep 61 multiop_bg_pause $DIR1/f23 or20_c || return 1 - MULTIPID=$! + # with SOM and opencache enabled, we need to close a file and cancel + # open lock to get atime propogated to MDS + kill -USR1 $! + cancel_lru_locks mdc time2=`stat -c "%X" $DIR2/f23` if (( $time2 <= $time1 )); then - kill -USR1 $MULTIPID error "atime doesn't update among nodes" fi - kill -USR1 $MULTIPID || return 1 rm -f $DIR1/f23 || error "rm -f $DIR1/f23 failed" true } @@ -509,8 +509,8 @@ test_26b() { chmod a+x $DIR2/$tfile mt1=`stat -c %Y $DIR1/$tfile` mt2=`stat -c %Y $DIR2/$tfile` - - if [ x"$mt1" != x"$mt2" ]; then + + if [ x"$mt1" != x"$mt2" ]; then error "not equal mtime, client1: "$mt1", client2: "$mt2"." fi } @@ -553,7 +553,7 @@ test_28() { # bug 9977 tECHOID=`$LCTL dl | grep $ECHO_UUID | awk '{print $1}'` $LCTL --device $tECHOID destroy "${tOBJID}:0" - + $LCTL <<-EOF cfg_device ECHO_osc1 cleanup @@ -636,9 +636,11 @@ test_32a() { # bug 11270 local p="$TMP/sanityN-$TESTNAME.parameters" save_lustre_params $HOSTNAME osc.*.lockless_truncate > $p cancel_lru_locks osc - clear_osc_stats enable_lockless_truncate 1 + rm -f $DIR1/$tfile + lfs setstripe -c -1 $DIR1/$tfile dd if=/dev/zero of=$DIR1/$tfile count=10 bs=1M > /dev/null 2>&1 + clear_osc_stats log "checking cached lockless truncate" $TRUNCATE $DIR1/$tfile 8000000 @@ -678,7 +680,7 @@ test_32b() { # bug 11270 save_lustre_params $node "ldlm.namespaces.filter-*.contention_seconds" >> $p done clear_osc_stats - # agressive lockless i/o settings + # agressive lockless i/o settings for node in $(osts_nodes); do do_node $node 'lctl set_param -n ldlm.namespaces.filter-*.max_nolock_bytes 2000000; lctl set_param -n ldlm.namespaces.filter-*.contended_locks 0; lctl set_param -n ldlm.namespaces.filter-*.contention_seconds 60' done @@ -687,7 +689,7 @@ test_32b() { # bug 11270 dd if=/dev/zero of=$DIR1/$tfile bs=4k count=1 conv=notrunc > /dev/null 2>&1 dd if=/dev/zero of=$DIR2/$tfile bs=4k count=1 conv=notrunc > /dev/null 2>&1 done - [ $(calc_osc_stats lockless_write_bytes) -ne 0 ] || error "lockless i/o was not triggered" + [ $(calc_osc_stats lockless_write_bytes) -ne 0 ] || error "lockless i/o was not triggered" # disable lockless i/o (it is disabled by default) for node in $(osts_nodes); do do_node $node 'lctl set_param -n ldlm.namespaces.filter-*.max_nolock_bytes 0; lctl set_param -n ldlm.namespaces.filter-*.contended_locks 32; lctl set_param -n ldlm.namespaces.filter-*.contention_seconds 0' @@ -701,7 +703,7 @@ test_32b() { # bug 11270 dd if=/dev/zero of=$DIR2/$tfile bs=4k count=1 conv=notrunc > /dev/null 2>&1 done [ $(calc_osc_stats lockless_write_bytes) -eq 0 ] || - error "lockless i/o works when disabled" + error "lockless i/o works when disabled" rm -f $DIR1/$tfile restore_lustre_params <$p rm -f $p @@ -709,19 +711,19 @@ test_32b() { # bug 11270 run_test 32b "lockless i/o" print_jbd_stat () { - local dev=$(basename $(do_facet $SINGLEMDS lctl get_param -n osd.*MDT*.mntdev)) - do_facet $SINGLEMDS cat /proc/fs/jbd/$dev/info | head -1 -} - -do_and_time () { - local cmd=$1 - - local start_ts=`date +%s` - - $cmd - - current_ts=`date +%s` - ELAPSED=`expr $current_ts - $start_ts` + local dev + local mdts=$(get_facets MDS) + local varcvs + local mds + + local stat=0 + for mds in ${mdts//,/ }; do + varsvc=${mds}_svc + dev=$(basename $(do_facet $mds lctl get_param -n osd.${!varsvc}.mntdev)) + val=$(do_facet $mds cat /proc/fs/jbd/$dev/info | head -1 | cut -d" " -f1) + stat=$(( stat + val)) + done + echo $stat } # commit on sharing tests @@ -738,7 +740,7 @@ test_33a() { local nfiles=${TEST33_NFILES:-10000} local param_file=$TMP/$tfile-params - save_lustre_params $(facet_active_host $SINGLEMDS) "mdt.*.commit_on_sharing" > $param_file + save_lustre_params $(comma_list $(mdts_nodes)) "mdt.*.commit_on_sharing" > $param_file local COS local jbdold @@ -750,24 +752,23 @@ test_33a() { avgjbd=0 avgtime=0 for i in 1 2 3; do - do_nodes $CLIENT1,$CLIENT2 "mkdir -p $DIR1/$tdir-\\\$(hostname)-$i" - jbdold=$(print_jbd_stat) - echo "=== START createmany $jbdold" - do_and_time "do_nodes $CLIENT1,$CLIENT2 createmany -o $DIR1/$tdir-\\\$(hostname)-$i/f- -r $DIR2/$tdir-\\\$(hostname)-$i/f- $nfiles" + jbdold=$(print_jbd_stat) + echo "=== START createmany old: $jbdold transaction" + local elapsed=$(do_and_time "do_nodes $CLIENT1,$CLIENT2 createmany -o $DIR1/$tdir-\\\$(hostname)-$i/f- -r $DIR2/$tdir-\\\$(hostname)-$i/f- $nfiles > /dev/null 2>&1") jbdnew=$(print_jbd_stat) - jbd=$((`echo $jbdnew | cut -d" " -f1` - `echo $jbdold | cut -d" " -f1`)) - echo "=== END createmany $jbdnew : $jbd transactions nfiles $nfiles time $ELAPSED COS=$COS" + jbd=$(( jbdnew - jbdold )) + echo "=== END createmany new: $jbdnew transaction : $jbd transactions nfiles $nfiles time $ELAPSED COS=$COS" avgjbd=$(( avgjbd + jbd )) - avgtime=$(( avgtime + ELAPSED )) + avgtime=$(( avgtime + elapsed )) done eval cos${COS}_jbd=$((avgjbd / 3)) eval cos${COS}_time=$((avgtime / 3)) done echo "COS=0 transactions (avg): $cos0_jbd time (avg): $cos0_time" - echo "COS=1 transactions (avg): $cos1_jbd time (avg): $cos1_time" + echo "COS=1 transactions (avg): $cos1_jbd time (avg): $cos1_time" [ "$cos0_jbd" != 0 ] && echo "COS=1 vs COS=0 jbd: $((((cos1_jbd/cos0_jbd - 1)) * 100 )) %" [ "$cos0_time" != 0 ] && echo "COS=1 vs COS=0 time: $((((cos1_time/cos0_time - 1)) * 100 )) %" @@ -779,6 +780,158 @@ run_test 33a "commit on sharing, cross crete/delete, 2 clients, benchmark" # End commit on sharing tests +test_34() { #16129 + local OPER + local lock_in + local lock_out + for OPER in notimeout timeout ; do + rm $DIR1/$tfile 2>/dev/null + lock_in=$(do_nodes $(osts_nodes) "lctl get_param -n ldlm.namespaces.filter-*.lock_timeouts" | calc_sum) + if [ $OPER == "timeout" ] ; then + for j in `seq $OSTCOUNT`; do + #define OBD_FAIL_PTLRPC_HPREQ_TIMEOUT 0x511 + do_facet ost$j lctl set_param fail_loc=0x511 + done + echo lock should expire + else + for j in `seq $OSTCOUNT`; do + #define OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT 0x512 + do_facet ost$j lctl set_param fail_loc=0x512 + done + echo lock should not expire + fi + echo writing on client1 + dd if=/dev/zero of=$DIR1/$tfile count=100 conv=notrunc > /dev/null 2>&1 + sync & + echo reading on client2 + dd of=/dev/null if=$DIR2/$tfile > /dev/null 2>&1 + # wait for a lock timeout + sleep 4 + lock_out=$(do_nodes $(osts_nodes) "lctl get_param -n ldlm.namespaces.filter-*.lock_timeouts" | calc_sum) + if [ $OPER == "timeout" ] ; then + if [ $lock_in == $lock_out ]; then + error "no lock timeout happened" + else + echo "success" + fi + else + if [ $lock_in != $lock_out ]; then + error "lock timeout happened" + else + echo "success" + fi + fi + done +} +run_test 34 "no lock timeout under IO" + +test_35() { # bug 17645 + local generation=[] + local count=0 + for imp in /proc/fs/lustre/mdc/$FSNAME-MDT*-mdc-*; do + g=$(awk '/generation/{print $2}' $imp/import) + generation[count]=$g + let count=count+1 + done + + mkdir -p $MOUNT1/$tfile + cancel_lru_locks mdc + + # Let's initiate -EINTR situation by setting fail_loc and take + # write lock on same file from same client. This will not cause + # bl_ast yet as lock is already in local cache. +#define OBD_FAIL_LDLM_INTR_CP_AST 0x317 + do_facet client "lctl set_param fail_loc=0x80000317" + local timeout=`do_facet $SINGLEMDS lctl get_param -n timeout` + let timeout=timeout*3 + local nr=0 + while test $nr -lt 10; do + log "Race attempt $nr" + local blk1=`lctl get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}'` + test "x$blk1" = "x" && blk1=0 + createmany -o $MOUNT2/$tfile/a 4000 & + pid1=$! + sleep 1 + + # Let's make conflict and bl_ast + ls -la $MOUNT1/$tfile > /dev/null & + pid2=$! + + log "Wait for $pid1 $pid2 for $timeout sec..." + sleep $timeout + kill -9 $pid1 $pid2 > /dev/null 2>&1 + wait + local blk2=`lctl get_param -n ldlm.services.ldlm_cbd.stats | awk '/ldlm_bl_callback/ {print $2}'` + test "x$blk2" = "x" && blk2=0 + test $blk2 -gt $blk1 && break + rm -fr $MOUNT1/$tfile/* + cancel_lru_locks mdc + let nr=nr+1 + done + do_facet client "lctl set_param fail_loc=0x0" + df -h $MOUNT1 $MOUNT2 + count=0 + for imp in /proc/fs/lustre/mdc/$FSNAME-MDT*-mdc-*; do + g=$(awk '/generation/{print $2}' $imp/import) + if ! test "$g" -eq "${generation[count]}"; then + error "Eviction happened on import $(basename $imp)" + fi + let count=count+1 + done +} +run_test 35 "-EINTR cp_ast vs. bl_ast race does not evict client" + +test_36() { #bug 16417 + local SIZE + local SIZE_B + local i + + mkdir -p $DIR1/$tdir + $LFS setstripe -c -1 $DIR1/$tdir + i=0 + SIZE=50 + let SIZE_B=SIZE*1024*1024 + + while [ $i -le 10 ]; do + lctl mark "start test" + local before=$($LFS df | awk '{if ($1 ~/^filesystem/) {print $5; exit} }') + dd if=/dev/zero of=$DIR1/$tdir/file000 bs=1M count=$SIZE + sync + sleep 1 + local after_dd=$($LFS df | awk '{if ($1 ~/^filesystem/) {print $5; exit} }') + multiop_bg_pause $DIR2/$tdir/file000 O_r${SIZE_B}c || return 3 + read_pid=$! + rm -f $DIR1/$tdir/file000 + kill -USR1 $read_pid + wait $read_pid + sleep 1 + local after=$($LFS df | awk '{if ($1 ~/^filesystem/) {print $5; exit} }') + echo "*** cycle($i) *** before($before):after_dd($after_dd):after($after)" + # this free space! not used + if [ $after_dd -ge $after ]; then + error "space leaked" + return 1; + fi + let i=i+1 + done +} +run_test 36 "handle ESTALE/open-unlink corectly" + +test_37() { # bug 18695 + mkdir -p $DIR1/$tdir + multiop_bg_pause $DIR1/$tdir D_c || return 1 + MULTIPID=$! + # create large directory (32kB seems enough from e2fsck, ~= 1000 files) + createmany -m $DIR2/$tdir/f 10000 + # set mtime/atime backward + touch -t 198001010000 $DIR2/$tdir + kill -USR1 $MULTIPID + nr_files=`lfs find $DIR1/$tdir -type f | wc -l` + [ $nr_files -eq 10000 ] || error "$nr_files != 10000 truncated directory?" + +} +run_test 37 "check i_size is not updated for directory on close (bug 18695) ==============" + log "cleanup: ======================================================" check_and_cleanup_lustre