X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftests%2Frecovery-small.sh;h=891ac23294c38fb48d770bcbee429b749ee99448;hb=fba6abdb9818b01d02ac7663e4ac9881258c8ead;hp=125051b457c88c8628bd66ccd8b14f198341b18f;hpb=20472b4070495c1bb2796ce2682aefdf2fe8a3f6;p=fs%2Flustre-release.git diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 125051b..891ac23 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -2,10 +2,6 @@ set -e -# bug number for skipped test: -ALWAYS_EXCEPT="$RECOVERY_SMALL_EXCEPT" -# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! - export MULTIOP=${MULTIOP:-multiop} PTLDEBUG=${PTLDEBUG:--1} LUSTRE=${LUSTRE:-`dirname $0`/..} @@ -14,6 +10,11 @@ init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} init_logging +ALWAYS_EXCEPT="$RECOVERY_SMALL_EXCEPT " +# bug number for skipped test: +ALWAYS_EXCEPT+=" " +# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! + require_dsh_mds || exit 0 # also long tests: 19, 21a, 21e, 21f, 23, 27 @@ -172,8 +173,8 @@ test_10b() { local before=$(date +%s) local evict - [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.53) ]] && - skip "Need MDS version at least 2.6.53" && return + [[ "$MDS1_VERSION" -lt $(version_code 2.6.53) ]] && + skip "Need MDS version at least 2.6.53" do_facet client "stat $DIR > /dev/null" || error "failed to stat $DIR: $?" drop_bl_callback_once "chmod 0777 $DIR" || @@ -244,8 +245,8 @@ test_10d() { local before=$(date +%s) local evict - [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.90) ]] && - skip "Need MDS version at least 2.6.90" && return + [[ "$MDS1_VERSION" -lt $(version_code 2.6.90) ]] && + skip "Need MDS version at least 2.6.90" # sleep 1 is to make sure that BEFORE is not equal to EVICTED below sleep 1 @@ -285,11 +286,11 @@ run_test 10d "test failed blocking ast" test_10e() { - [[ $(lustre_version_code ost1) -le $(version_code 2.8.58) ]] && - skip "Need OST version at least 2.8.59" && return 0 - [ $CLIENTCOUNT -lt 2 ] && skip "need two clients" && return 0 + [[ "$OST1_VERSION" -le $(version_code 2.8.58) ]] && + skip "Need OST version at least 2.8.59" + [ $CLIENTCOUNT -lt 2 ] && skip "need two clients" [ $(facet_host client) == $(facet_host ost1) ] && - skip "need ost1 and client on different nodes" && return 0 + skip "need ost1 and client on different nodes" local -a clients=(${CLIENTS//,/ }) local client1=${clients[0]} local client2=${clients[1]} @@ -1295,15 +1296,16 @@ test_53() { run_test 53 "touch: drop rep" test_54() { - zconf_mount `hostname` $MOUNT2 - touch $DIR/$tfile - touch $DIR2/$tfile.1 - sleep 10 - cat $DIR2/$tfile.missing # save transno = 0, rc != 0 into last_rcvd - fail $SINGLEMDS - umount $MOUNT2 - ERROR=`dmesg | egrep "(test 54|went back in time)" | tail -n1 | grep "went back in time"` - [ x"$ERROR" == x ] || error "back in time occured" + zconf_mount $(hostname) $MOUNT2 + touch $DIR/$tfile + touch $DIR2/$tfile.1 + sleep 10 + cat $DIR2/$tfile.missing # save transno = 0, rc != 0 into last_rcvd + fail $SINGLEMDS + umount $MOUNT2 + ERROR=$(dmesg | egrep "(test 54|went back in time)" | tail -n1 | + grep "went back in time") + [ x"$ERROR" == x ] || error "back in time occured" } run_test 54 "back in time" @@ -1323,7 +1325,7 @@ test_55() { # Minimum pass speed is 2MBps local ddtimeout=64 # LU-2887/LU-3089 - set min pass speed to 500KBps - [ "$(facet_fstype ost1)" = "zfs" ] && ddtimeout=256 + [ "$ost1_FSTYPE" = zfs ] && ddtimeout=256 # first dd should be finished quickly $LFS setstripe -c 1 -i 0 $DIR/$tdir/$tfile-1 @@ -1563,8 +1565,8 @@ run_test 65 "lock enqueue for destroyed export" test_66() { - [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.7.51) ]] || - { skip "Need MDS version at least 2.7.51"; return 0; } + [[ "$MDS1_VERSION" -ge $(version_code 2.7.51) ]] || + skip "Need MDS version at least 2.7.51" local list=$(comma_list $(osts_nodes)) @@ -1581,7 +1583,8 @@ test_66() do_nodes $list $LCTL set_param fail_loc=0x80000136 #initiate the re-connect & re-send - local mdccli=$($LCTL dl | awk '/-MDT0000-mdc-/ {print $4;}') + local mdtname="MDT0000" + local mdccli=$($LCTL dl | grep "${mdtname}-mdc" | awk '{print $4;}') local conn_uuid=$($LCTL get_param -n mdc.${mdccli}.conn_uuid) $LCTL set_param "mdc.${mdccli}.import=connection=${conn_uuid}" sleep 2 @@ -1965,8 +1968,8 @@ cleanup_106() { } test_106() { # LU-1789 - [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.3.50) ]] || - { skip "Need MDS version at least 2.3.50"; return 0; } + [[ "$MDS1_VERSION" -ge $(version_code 2.3.50) ]] || + skip "Need MDS version at least 2.3.50" #define OBD_FAIL_MDC_LIGHTWEIGHT 0x805 $LCTL set_param fail_loc=0x805 @@ -1992,9 +1995,8 @@ test_106() { # LU-1789 # lightweight goes through LUSTRE_IMP_RECOVER during failover touch -c $DIR2/$tfile || true $LCTL dk $TMP/lustre-log-$TESTNAME.log - recovered=`awk '/MDT0000-mdc-[0-9a-f]*: lwp recover/ { - print; - }' $TMP/lustre-log-$TESTNAME.log` + recovered=$(awk '/MDT0000-mdc-[0-9a-f]*. lwp recover/ { print }' \ + $TMP/lustre-log-$TESTNAME.log) [ -z "$recovered" ] && error "lightweight client was not recovered" # and all operations performed by lightweight client should be @@ -2148,10 +2150,10 @@ test_110f () { run_test 110f "remove remote directory: drop slave rep" test_110g () { - [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.11.0) ]] || - { skip "Need MDS version at least 2.11.0"; return 0; } + [[ "$MDS1_VERSION" -ge $(version_code 2.11.0) ]] || + skip "Need MDS version at least 2.11.0" - [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" mkdir -p $DIR/$tdir touch $DIR/$tdir/$tfile @@ -2175,10 +2177,9 @@ test_110g () { run_test 110g "drop reply during migration" test_110h () { - [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 - local server_version=$(lustre_version_code mds1) - [[ $server_version -ge $(version_code 2.7.56) ]] || - { skip "Need MDS version at least 2.7.56"; return 0; } + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" + [[ "$MDS1_VERSION" -ge $(version_code 2.7.56) ]] || + skip "Need MDS version at least 2.7.56" local src_dir=$DIR/$tdir/source_dir local tgt_dir=$DIR/$tdir/target_dir @@ -2203,10 +2204,9 @@ test_110h () { run_test 110h "drop update reply during cross-MDT file rename" test_110i () { - [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 - local server_version=$(lustre_version_code mds1) - [[ $server_version -ge $(version_code 2.7.56) ]] || - { skip "Need MDS version at least 2.7.56"; return 0; } + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" + [[ "$MDS1_VERSION" -ge $(version_code 2.7.56) ]] || + skip "Need MDS version at least 2.7.56" local src_dir=$DIR/$tdir/source_dir local tgt_dir=$DIR/$tdir/target_dir @@ -2234,10 +2234,9 @@ test_110i () { run_test 110i "drop update reply during cross-MDT dir rename" test_110j () { - [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 - local server_version=$(lustre_version_code mds1) - [[ $server_version -ge $(version_code 2.7.56) ]] || - { skip "Need MDS version at least 2.7.56"; return 0; } + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" + [[ "$MDS1_VERSION" -ge $(version_code 2.7.56) ]] || + skip "Need MDS version at least 2.7.56" local remote_dir=$DIR/$tdir/remote_dir local local_dir=$DIR/$tdir/local_dir @@ -2257,11 +2256,33 @@ test_110j () { } run_test 110j "drop update reply during cross-MDT ln" +test_110k() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTS" + [[ "$MDS1_VERSION" -ge $(version_code 2.12.55) ]] || + skip "Need MDS version at least 2.12.55" + + stop mds2 || error "stop mds2 failed" + umount $MOUNT + +#define OBD_FAIL_FLD_QUERY_REQ 0x1103 + do_facet mds2 lctl set_param fail_loc=0x1103 + start mds2 $(mdsdevname 2) -o abort_recovery || + error "start MDS with abort_recovery should succeed" + do_facet mds2 lctl set_param fail_loc=0 + + # cleanup + stop mds2 || error "cleanup: stop mds2 failed" + start mds2 $(mdsdevname 2) || error "cleanup: start mds2 failed" + zconf_mount $(hostname) $MOUNT || error "cleanup: mount failed" + client_up || error "post-failover df failed" +} +run_test 110k "FID_QUERY failed during recovery" + # LU-2844 mdt prepare fail should not cause umount oops test_111 () { - [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.3.62) ]] || - { skip "Need MDS version at least 2.3.62"; return 0; } + [[ "$MDS1_VERSION" -ge $(version_code 2.3.62) ]] || + skip "Need MDS version at least 2.3.62" #define OBD_FAIL_MDS_CHANGELOG_INIT 0x151 do_facet $SINGLEMDS lctl set_param fail_loc=0x151 @@ -2340,8 +2361,8 @@ test_115_write() { } test_115a() { - [ $(lustre_version_code ost1) -lt $(version_code 2.8.50) ] && - skip "need at least 2.8.50 on OST" && return 0 + [ "$OST1_VERSION" -lt $(version_code 2.8.50) ] && + skip "need at least 2.8.50 on OST" #define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK 0x51b #define OBD_FAIL_PTLRPC_DROP_BULK 0x51a @@ -2350,8 +2371,8 @@ test_115a() { run_test 115a "read: late REQ MDunlink and no bulk" test_115b() { - [ $(lustre_version_code ost1) -lt $(version_code 2.8.50) ] && - skip "need at least 2.8.50 on OST" && return 0 + [ "$OST1_VERSION" -lt $(version_code 2.8.50) ] && + skip "need at least 2.8.50 on OST" #define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK 0x51b #define OBD_FAIL_OST_ENOSPC 0x215 @@ -2363,8 +2384,8 @@ test_115b() { run_test 115b "write: late REQ MDunlink and no bulk" test_115c() { - [ $(lustre_version_code ost1) -lt $(version_code 2.8.50) ] && - skip "need at least 2.8.50 on OST" && return 0 + [ "$OST1_VERSION" -lt $(version_code 2.8.50) ] && + skip "need at least 2.8.50 on OST" #define OBD_FAIL_PTLRPC_LONG_REPL_UNLINK 0x50f #define OBD_FAIL_PTLRPC_DROP_BULK 0x51a @@ -2373,8 +2394,8 @@ test_115c() { run_test 115c "read: late Reply MDunlink and no bulk" test_115d() { - [ $(lustre_version_code ost1) -lt $(version_code 2.8.50) ] && - skip "need at least 2.8.50 on OST" && return 0 + [ "$OST1_VERSION" -lt $(version_code 2.8.50) ] && + skip "need at least 2.8.50 on OST" #define OBD_FAIL_PTLRPC_LONG_REPL_UNLINK 0x50f #define OBD_FAIL_OST_ENOSPC 0x215 @@ -2383,8 +2404,8 @@ test_115d() { run_test 115d "write: late Reply MDunlink and no bulk" test_115e() { - [ $(lustre_version_code ost1) -lt $(version_code 2.8.50) ] && - skip "need at least 2.8.50 on OST" && return 0 + [ "$OST1_VERSION" -lt $(version_code 2.8.50) ] && + skip "need at least 2.8.50 on OST" #define OBD_FAIL_PTLRPC_LONG_BULK_UNLINK 0x510 #define OBD_FAIL_OST_ALL_REPLY_NET 0x211 @@ -2393,8 +2414,8 @@ test_115e() { run_test 115e "read: late Bulk MDunlink and no reply" test_115f() { - [ $(lustre_version_code ost1) -lt $(version_code 2.8.50) ] && - skip "need at least 2.8.50 on OST" && return 0 + [ "$OST1_VERSION" -lt $(version_code 2.8.50) ] && + skip "need at least 2.8.50 on OST" #define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK 0x51b #define OBD_FAIL_OST_ALL_REPLY_NET 0x211 @@ -2403,8 +2424,8 @@ test_115f() { run_test 115f "read: late REQ MDunlink and no reply" test_115g() { - [ $(lustre_version_code ost1) -lt $(version_code 2.8.50) ] && - skip "need at least 2.8.50 on OST" && return 0 + [ "$OST1_VERSION" -lt $(version_code 2.8.50) ] && + skip "need at least 2.8.50 on OST" #define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c test_115_read 0x8000051c 0 @@ -2577,10 +2598,9 @@ test_130_base() { } test_130a() { - remote_mds_nodsh && skip "remote MDS with nodsh" && return - local server_version=$(lustre_version_code $SINGLEMDS) - [[ $server_version -ge $(version_code 2.7.2) ]] || - { skip "Need server version newer than 2.7.1"; return 0; } + remote_mds_nodsh && skip "remote MDS with nodsh" + [[ "$MDS1_VERSION" -ge $(version_code 2.7.2) ]] || + skip "Need server version newer than 2.7.1" test_130_base @@ -2590,10 +2610,9 @@ test_130a() { run_test 130a "enqueue resend on not existing file" test_130b() { - remote_mds_nodsh && skip "remote MDS with nodsh" && return - local server_version=$(lustre_version_code $SINGLEMDS) - [[ $server_version -ge $(version_code 2.7.2) ]] || - { skip "Need server version newer than 2.7.1"; return 0; } + remote_mds_nodsh && skip "remote MDS with nodsh" + [[ "$MDS1_VERSION" -ge $(version_code 2.7.2) ]] || + skip "Need server version newer than 2.7.1" test_130_base # let the reply to be dropped @@ -2760,7 +2779,7 @@ test_134() { run_test 134 "race between failover and search for reply data free slot" test_135() { - [ $MDS1_VERSION -lt $(version_code 2.12.51) ] && + [ "$MDS1_VERSION" -lt $(version_code 2.12.51) ] && skip "Need MDS version at least 2.12.51" mkdir -p $DIR/$tdir @@ -2775,8 +2794,8 @@ test_135() { run_test 135 "DOM: open/create resend to return size" test_136() { - remote_mds_nodsh && skip "remote MDS with nodsh" && return - [[ $MDS1_VERSION -ge $(version_code 2.12.52) ]] || + remote_mds_nodsh && skip "remote MDS with nodsh" + [[ "$MDS1_VERSION" -ge $(version_code 2.12.52) ]] || skip "Need MDS version at least 2.12.52" local mdts=$(comma_list $(mdts_nodes)) @@ -2829,6 +2848,34 @@ test_136() { } run_test 136 "changelog_deregister leaving pending records" +test_137() { + df $DIR + mkdir -p $DIR/d1 + mkdir -p $DIR/d2 + dd if=/dev/zero of=$DIR/d1/$tfile bs=4096 count=1 + dd if=/dev/zero of=$DIR/d2/$tfile bs=4096 count=1 + cancel_lru_locks osc + + #define OBD_FAIL_PTLRPC_RESEND_RACE 0x525 + do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000525" + + # RPC1: any reply is to be delayed to disable last_xid logic + ln $DIR/d1/$tfile $DIR/d1/f2 & + sleep 1 + + # RPC2: setattr1 reply is delayed & resent + # original reply comes to client; the resend get asleep + chmod 666 $DIR/d2/$tfile + + # RPC3: setattr2 on the same file; run ahead of RPC2 resend + chmod 777 $DIR/d2/$tfile + + # RPC2 resend wakes up + sleep 5 + [ $(stat -c "%a" $DIR/d2/$tfile) == 777 ] || error "resend got applied" +} +run_test 137 "late resend must be skipped if already applied" + complete $SECONDS check_and_cleanup_lustre exit_status