b=23595 fix conf-sanity 57 for remote ost

[fs/lustre-release.git] / lustre / tests / conf-sanity.sh
diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh

index 092ffbd..c4565e7 100644 (file)
--- a/lustre/tests/conf-sanity.sh
+++ b/lustre/tests/conf-sanity.sh
@@ -1366,19 +1366,21 @@ test_35b() { # bug 18674
                 at_max_set 0 mds client
         fi
  
-       mkdir -p $MOUNT/testdir
-       touch $MOUNT/testdir/test
+       mkdir -p $MOUNT/$tdir
  
         log "Injecting EBUSY on MDS"
         # Setting OBD_FAIL_MDS_RESEND=0x136
         do_facet mds "$LCTL set_param fail_loc=0x80000136" || return 2
  
-       log "Stat on a test file"
-       stat $MOUNT/testdir/test
+       $LCTL set_param mdc.${FSNAME}*.stats=clear
+
+       log "Creating a test file and stat it"
+       touch $MOUNT/$tdir/$tfile
+       stat $MOUNT/$tdir/$tfile
  
         log "Stop injecting EBUSY on MDS"
         do_facet mds "$LCTL set_param fail_loc=0" || return 3
-       rm -f $MOUNT/testdir/test
+       rm -f $MOUNT/$tdir/$tfile
  
         log "done"
         # restore adaptive timeout
@@ -1386,6 +1388,8 @@ test_35b() { # bug 18674
  
         $LCTL dk $TMP/lustre-log-$TESTNAME.log
  
+       CONNCNT=`$LCTL get_param mdc.${FSNAME}*.stats | awk '/mds_connect/{print $2}'`
+
         # retrieve from the log if the client has ever tried to
         # contact the fake server after the loss of connection
         FAILCONN=`awk "BEGIN {ret = 0;}
@@ -1405,6 +1409,12 @@ test_35b() { # bug 18674
                 log "ERROR: The client tried to reconnect to the failover server while the primary was busy" && \
                 return 5
  
+       # When OBD_FAIL_MDS_RESEND is hit, we sleep for 2 * obd_timeout
+        # Reconnects are supposed to be rate limited to one every 5s
+       [ $CONNCNT -gt $((2 * $TIMEOUT / 5 + 1)) ] && \
+               log "ERROR: Too many reconnects $CONNCNT" && \
+               return 6
+
          cleanup
         # remove nid settings
         writeconf
@@ -2481,13 +2491,10 @@ test_56() {
  run_test 56 "check big indexes"
  
  test_57() { # bug 22656
-       local NID=$($LCTL list_nids | head -1)
+       local NID=$(do_facet ost1 "$LCTL get_param nis" | tail -1 | awk '{print $1}')
         writeconf
         do_facet ost1 "$TUNEFS --failnode=$NID `ostdevname 1`" || error "tunefs failed"
-       if ! combined_mgs_mds ; then
-               start_mgs
-       fi
-       start_mds
+       start_mgsmds
         start_ost && error "OST registration from failnode should fail"
         stop_mds
         reformat