From d94e53604e470025d3593dbb08f0346701abd9f8 Mon Sep 17 00:00:00 2001 From: Johann Lombardi Date: Tue, 6 Apr 2010 11:01:33 +0200 Subject: [PATCH] b=22423 add regression test for reconnect flooding issue i=dmitry --- lustre/tests/conf-sanity.sh | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 69b1e05..40b4551 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -1313,8 +1313,11 @@ test_35b() { # bug 18674 # Setting OBD_FAIL_MDS_RESEND=0x136 do_facet mds "$LCTL set_param fail_loc=0x80000136" || return 2 - log "Creating a test file" + $LCTL set_param mdc.${FSNAME}*.stats=clear + + log "Creating a test file and stat it" touch $MOUNT/$tdir/$tfile + stat $MOUNT/$tdir/$tfile log "Stop injecting EBUSY on MDS" do_facet mds "$LCTL set_param fail_loc=0" || return 3 @@ -1326,6 +1329,8 @@ test_35b() { # bug 18674 $LCTL dk $TMP/lustre-log-$TESTNAME.log + CONNCNT=`$LCTL get_param mdc.${FSNAME}*.stats | awk '/mds_connect/{print $2}'` + # retrieve from the log if the client has ever tried to # contact the fake server after the loss of connection FAILCONN=`awk "BEGIN {ret = 0;} @@ -1345,6 +1350,12 @@ test_35b() { # bug 18674 log "ERROR: The client tried to reconnect to the failover server while the primary was busy" && \ return 5 + # When OBD_FAIL_MDS_RESEND is hit, we sleep for 2 * obd_timeout + # Reconnects are supposed to be rate limited to one every 5s + [ $CONNCNT -gt $((2 * $TIMEOUT / 5 + 1)) ] && \ + log "ERROR: Too many reconnects $CONNCNT" && \ + return 6 + cleanup } run_test 35b "Continue reconnection retries, if the active server is busy" -- 1.8.3.1