X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Frecovery-small.sh;h=023c14f72f582fbaf636a84e4a483ae84334a1ba;hp=dc9088dc8e1bcbd29cca9dca69d9a069b1e6b9f6;hb=721e945a96186dba1ddd5ef4b2cab8cd8c4e06c2;hpb=1829e654506c5b798eae5ef4a91948f1411513b4 diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index dc9088d..023c14f 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -10,6 +10,7 @@ LUSTRE=${LUSTRE:-`dirname $0`/..} . $LUSTRE/tests/test-framework.sh init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} +init_logging if [ "$FAILURE_MODE" = "HARD" ] && mixed_ost_devs; then CONFIG_EXCEPTIONS="52" @@ -18,7 +19,7 @@ if [ "$FAILURE_MODE" = "HARD" ] && mixed_ost_devs; then ALWAYS_EXCEPT="$ALWAYS_EXCEPT $CONFIG_EXCEPTIONS" fi -remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0 +require_dsh_mds || exit 0 # also long tests: 19, 21a, 21e, 21f, 23, 27 # 1 2.5 2.5 4 4 (min)" @@ -218,7 +219,7 @@ test_17() { remote_ost_nodsh && skip "remote OST with nodsh" && return 0 # With adaptive timeouts, bulk_get won't expire until adaptive_timeout_max - if at_is_valid && at_is_enabled; then + if at_is_enabled; then at_max_saved=$(at_max_get ost1) at_max_set $TIMEOUT ost1 fi @@ -239,13 +240,13 @@ test_17() { # expect cmp to succeed, client resent bulk do_facet client "cmp /etc/termcap $DIR/$tfile" || return 3 do_facet client "rm $DIR/$tfile" || return 4 - [ $at_max_saved -ne 0 ] && $(at_max_set $at_max_saved ost1) + [ $at_max_saved -ne 0 ] && at_max_set $at_max_saved ost1 return 0 } run_test 17 "timeout bulk get, don't evict client (2732)" test_18a() { - [ -z ${ost2_svc} ] && skip "needs 2 osts" && return 0 + [ -z ${ost2_svc} ] && skip_env "needs 2 osts" && return 0 do_facet client mkdir -p $DIR/$tdir f=$DIR/$tdir/$tfile @@ -658,6 +659,11 @@ test_26a() { # was test_26 bug 5921 - evict dead exports by pinger remote_ost_nodsh && skip "remote OST with nodsh" && return 0 remote_mds || { skip "local MDS" && return 0; } + if [ $(facet_host mgs) = $(facet_host ost1) ]; then + skip "msg and ost1 are at the same node" + return 0 + fi + check_timeout || return 1 local OST_NEXP=$(do_facet ost1 lctl get_param -n obdfilter.${ost1_svc}.num_exports | cut -d' ' -f2) @@ -680,9 +686,15 @@ run_test 26a "evict dead exports" test_26b() { # bug 10140 - evict dead exports by pinger remote_ost_nodsh && skip "remote OST with nodsh" && return 0 + if [ $(facet_host mgs) = $(facet_host ost1) ]; then + skip "msg and ost1 are at the same node" + return 0 + fi + check_timeout || return 1 - client_df - zconf_mount `hostname` $MOUNT2 || error "Failed to mount $MOUNT2" + clients_up + zconf_mount `hostname` $MOUNT2 || + { error "Failed to mount $MOUNT2"; return 2; } sleep 1 # wait connections being established local MDS_NEXP=$(do_facet $SINGLEMDS lctl get_param -n mdt.${mds1_svc}.num_exports | cut -d' ' -f2) @@ -692,16 +704,19 @@ test_26b() { # bug 10140 - evict dead exports by pinger zconf_umount `hostname` $MOUNT2 -f + # PING_INTERVAL max(obd_timeout / 4, 1U) + # PING_EVICT_TIMEOUT (PING_INTERVAL * 6) + # evictor takes PING_EVICT_TIMEOUT + 3 * PING_INTERVAL to evict. # But if there's a race to start the evictor from various obds, # the loser might have to wait for the next ping. - # PING_INTERVAL max(obd_timeout / 4, 1U) - # sleep (2*PING_INTERVAL) - - local rc=0 - wait_client_evicted ost1 $OST_NEXP $((TIMEOUT * 2 + TIMEOUT * 3 / 4)) || \ + # = 9 * PING_INTERVAL + PING_INTERVAL + # = 10 PING_INTERVAL = 10 obd_timeout / 4 = 2.5 obd_timeout + # let's wait $((TIMEOUT * 3)) # bug 19887 + local rc=0 + wait_client_evicted ost1 $OST_NEXP $((TIMEOUT * 3)) || \ error "Client was not evicted by ost" rc=1 - wait_client_evicted $SINGLEMDS $MDS_NEXP $((TIMEOUT * 2 + TIMEOUT * 3 / 4)) || \ + wait_client_evicted $SINGLEMDS $MDS_NEXP $((TIMEOUT * 3)) || \ error "Client was not evicted by mds" } run_test 26b "evict dead exports" @@ -738,12 +753,32 @@ test_28() { # bug 6086 - error adding new clients #define OBD_FAIL_MDS_CLIENT_ADD 0x12f do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000012f" # fail once (evicted), reconnect fail (fail_loc), ok - df || (sleep 10; df) || (sleep 10; df) || error "reconnect failed" + client_up || (sleep 10; client_up) || (sleep 10; client_up) || error "reconnect failed" rm -f $DIR/$tfile fail $SINGLEMDS # verify MDS last_rcvd can be loaded } run_test 28 "handle error adding new clients (bug 6086)" +test_29a() { # bug 22273 - error adding new clients + #define OBD_FAIL_TGT_CLIENT_ADD 0x711 + do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000711" + # fail abort so client will be new again + fail_abort $SINGLEMDS + client_up || error "reconnect failed" + return 0 +} +run_test 29a "error adding new clients doesn't cause LBUG (bug 22273)" + +test_29b() { # bug 22273 - error adding new clients + #define OBD_FAIL_TGT_CLIENT_ADD 0x711 + do_facet ost1 "lctl set_param fail_loc=0x80000711" + # fail abort so client will be new again + fail_abort ost1 + client_up || error "reconnect failed" + return 0 +} +run_test 29b "error adding new clients doesn't cause LBUG (bug 22273)" + test_50() { mkdir -p $DIR/$tdir # put a load of file creates/writes/deletes