X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Frecovery-small.sh;h=023c14f72f582fbaf636a84e4a483ae84334a1ba;hp=dc9088dc8e1bcbd29cca9dca69d9a069b1e6b9f6;hb=721e945a96186dba1ddd5ef4b2cab8cd8c4e06c2;hpb=1829e654506c5b798eae5ef4a91948f1411513b4

diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh
index dc9088d..023c14f 100755
--- a/lustre/tests/recovery-small.sh
+++ b/lustre/tests/recovery-small.sh
@@ -10,6 +10,7 @@ LUSTRE=${LUSTRE:-`dirname $0`/..}
 . $LUSTRE/tests/test-framework.sh
 init_test_env $@
 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
 
 if [ "$FAILURE_MODE" = "HARD" ] && mixed_ost_devs; then
     CONFIG_EXCEPTIONS="52"
@@ -18,7 +19,7 @@ if [ "$FAILURE_MODE" = "HARD" ] && mixed_ost_devs; then
     ALWAYS_EXCEPT="$ALWAYS_EXCEPT $CONFIG_EXCEPTIONS"
 fi
 
-remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
+require_dsh_mds || exit 0
 
 # also long tests: 19, 21a, 21e, 21f, 23, 27
 #                                   1  2.5  2.5    4    4          (min)"
@@ -218,7 +219,7 @@ test_17() {
     remote_ost_nodsh && skip "remote OST with nodsh" && return 0
 
     # With adaptive timeouts, bulk_get won't expire until adaptive_timeout_max
-    if at_is_valid && at_is_enabled; then
+    if at_is_enabled; then
         at_max_saved=$(at_max_get ost1)
         at_max_set $TIMEOUT ost1
     fi
@@ -239,13 +240,13 @@ test_17() {
     # expect cmp to succeed, client resent bulk
     do_facet client "cmp /etc/termcap $DIR/$tfile" || return 3
     do_facet client "rm $DIR/$tfile" || return 4
-    [ $at_max_saved -ne 0 ] && $(at_max_set $at_max_saved ost1)
+    [ $at_max_saved -ne 0 ] && at_max_set $at_max_saved ost1
     return 0
 }
 run_test 17 "timeout bulk get, don't evict client (2732)"
 
 test_18a() {
-    [ -z ${ost2_svc} ] && skip "needs 2 osts" && return 0
+    [ -z ${ost2_svc} ] && skip_env "needs 2 osts" && return 0
 
     do_facet client mkdir -p $DIR/$tdir
     f=$DIR/$tdir/$tfile
@@ -658,6 +659,11 @@ test_26a() {      # was test_26 bug 5921 - evict dead exports by pinger
 	remote_ost_nodsh && skip "remote OST with nodsh" && return 0
 	remote_mds || { skip "local MDS" && return 0; }
 
+        if [ $(facet_host mgs) = $(facet_host ost1) ]; then
+                skip "msg and ost1 are at the same node"
+                return 0
+        fi
+
 	check_timeout || return 1
 
 	local OST_NEXP=$(do_facet ost1 lctl get_param -n obdfilter.${ost1_svc}.num_exports | cut -d' ' -f2)
@@ -680,9 +686,15 @@ run_test 26a "evict dead exports"
 test_26b() {      # bug 10140 - evict dead exports by pinger
 	remote_ost_nodsh && skip "remote OST with nodsh" && return 0
 
+        if [ $(facet_host mgs) = $(facet_host ost1) ]; then
+                skip "msg and ost1 are at the same node"
+                return 0
+        fi
+
 	check_timeout || return 1
-	client_df
-	zconf_mount `hostname` $MOUNT2 || error "Failed to mount $MOUNT2"
+	clients_up
+	zconf_mount `hostname` $MOUNT2 ||
+                { error "Failed to mount $MOUNT2"; return 2; }
 	sleep 1 # wait connections being established
 
 	local MDS_NEXP=$(do_facet $SINGLEMDS lctl get_param -n mdt.${mds1_svc}.num_exports | cut -d' ' -f2)
@@ -692,16 +704,19 @@ test_26b() {      # bug 10140 - evict dead exports by pinger
 
 	zconf_umount `hostname` $MOUNT2 -f
 
+	# PING_INTERVAL max(obd_timeout / 4, 1U)
+	# PING_EVICT_TIMEOUT (PING_INTERVAL * 6)
+
 	# evictor takes PING_EVICT_TIMEOUT + 3 * PING_INTERVAL to evict.  
 	# But if there's a race to start the evictor from various obds, 
 	# the loser might have to wait for the next ping.
-	# PING_INTERVAL max(obd_timeout / 4, 1U)
-	# sleep (2*PING_INTERVAL) 
-
-        local rc=0
-        wait_client_evicted ost1 $OST_NEXP $((TIMEOUT * 2 + TIMEOUT * 3 / 4)) || \
+	# = 9 * PING_INTERVAL + PING_INTERVAL
+	# = 10 PING_INTERVAL = 10 obd_timeout / 4 = 2.5 obd_timeout
+	# let's wait $((TIMEOUT * 3)) # bug 19887
+	local rc=0
+	wait_client_evicted ost1 $OST_NEXP $((TIMEOUT * 3)) || \
 		error "Client was not evicted by ost" rc=1
-	wait_client_evicted $SINGLEMDS $MDS_NEXP $((TIMEOUT * 2 + TIMEOUT * 3 / 4)) || \
+	wait_client_evicted $SINGLEMDS $MDS_NEXP $((TIMEOUT * 3)) || \
 		error "Client was not evicted by mds"
 }
 run_test 26b "evict dead exports"
@@ -738,12 +753,32 @@ test_28() {      # bug 6086 - error adding new clients
 	#define OBD_FAIL_MDS_CLIENT_ADD 0x12f
 	do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000012f"
 	# fail once (evicted), reconnect fail (fail_loc), ok
-	df || (sleep 10; df) || (sleep 10; df) || error "reconnect failed"
+	client_up || (sleep 10; client_up) || (sleep 10; client_up) || error "reconnect failed"
 	rm -f $DIR/$tfile
 	fail $SINGLEMDS		# verify MDS last_rcvd can be loaded
 }
 run_test 28 "handle error adding new clients (bug 6086)"
 
+test_29a() { # bug 22273 - error adding new clients
+	#define OBD_FAIL_TGT_CLIENT_ADD 0x711
+	do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000711"
+	# fail abort so client will be new again
+	fail_abort $SINGLEMDS
+	client_up || error "reconnect failed"
+	return 0
+}
+run_test 29a "error adding new clients doesn't cause LBUG (bug 22273)"
+
+test_29b() { # bug 22273 - error adding new clients
+	#define OBD_FAIL_TGT_CLIENT_ADD 0x711
+	do_facet ost1 "lctl set_param fail_loc=0x80000711"
+	# fail abort so client will be new again
+	fail_abort ost1
+	client_up || error "reconnect failed"
+	return 0
+}
+run_test 29b "error adding new clients doesn't cause LBUG (bug 22273)"
+
 test_50() {
 	mkdir -p $DIR/$tdir
 	# put a load of file creates/writes/deletes