b=18150

[fs/lustre-release.git] / lustre / tests / conf-sanity.sh
diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh

index 2287f88..95a4c85 100644 (file)
--- a/lustre/tests/conf-sanity.sh
+++ b/lustre/tests/conf-sanity.sh
@@ -592,6 +592,8 @@ test_22() {
         stop_ost
         mount_client $MOUNT
         # check_mount will block trying to contact ost
+       mcreate $DIR/$tfile || return 40
+       rm -f $DIR/$tfile || return 42
         umount_client $MOUNT
         pass
  
@@ -1197,7 +1199,7 @@ test_34c() {
  }
  run_test 34c "force umount with failed ost should be normal"
  
-test_35() { # bug 12459
+test_35a() { # bug 12459
         setup
  
         DBG_SAVE="`lctl get_param -n debug`"
@@ -1211,7 +1213,7 @@ test_35() { # bug 12459
         log "Wait for RECONNECT_INTERVAL seconds (10s)"
         sleep 10
  
-       MSG="conf-sanity.sh test_35 `date +%F%kh%Mm%Ss`"
+       MSG="conf-sanity.sh test_35a `date +%F%kh%Mm%Ss`"
         $LCTL clear
         log "$MSG"
         log "Stopping the MDT:"
@@ -1242,7 +1244,74 @@ test_35() { # bug 12459
         [ "$NEXTCONN" != "0" ] && log "The client didn't try to reconnect to the last active server (tried ${NEXTCONN} instead)" && return 7
         cleanup
  }
-run_test 35 "Reconnect to the last active server first"
+run_test 35a "Reconnect to the last active server first"
+
+test_35b() { # bug 18674
+       remote_mds || { skip "local MDS" && return 0; }
+       setup
+
+       debugsave
+       $LCTL set_param debug="ha"
+       $LCTL clear
+       MSG="conf-sanity.sh test_35b `date +%F%kh%Mm%Ss`"
+       log "$MSG"
+
+       log "Set up a fake failnode for the MDS"
+       FAKENID="127.0.0.2"
+       local device=$(do_facet mds "$LCTL get_param -n devices" | \
+                       awk '($3 ~ "mdt" && $4 ~ "MDT") { print $4 }' | head -1)
+       do_facet mds "$LCTL conf_param ${device}.failover.node=$FAKENID" || \
+               return 1
+
+       local at_max_saved=0
+       # adaptive timeouts may prevent seeing the issue 
+       if at_is_enabled; then
+               at_max_saved=$(at_max_get mds)
+               at_max_set 0 mds client
+       fi
+
+       mkdir -p $MOUNT/testdir
+       touch $MOUNT/testdir/test
+
+       log "Injecting EBUSY on MDS"
+       # Setting OBD_FAIL_MDS_RESEND=0x136
+       do_facet mds "$LCTL set_param fail_loc=0x80000136" || return 2
+
+       log "Stat on a test file"
+       stat $MOUNT/testdir/test
+
+       log "Stop injecting EBUSY on MDS"
+       do_facet mds "$LCTL set_param fail_loc=0" || return 3
+       rm -f $MOUNT/testdir/test
+
+       log "done"
+       # restore adaptive timeout
+       [ $at_max_saved -ne 0 ] && at_max_set $at_max_saved mds client
+
+       $LCTL dk $TMP/lustre-log-$TESTNAME.log
+
+       # retrieve from the log if the client has ever tried to
+       # contact the fake server after the loss of connection
+       FAILCONN=`awk "BEGIN {ret = 0;}
+                      /import_select_connection.*${FSNAME}-MDT0000-mdc.* using connection/ {
+                               ret = 1;
+                               if (\\\$NF ~ /$FAKENID/) {
+                                       ret = 2;
+                                       exit;
+                               }
+                      }
+                      END {print ret}" $TMP/lustre-log-$TESTNAME.log`
+
+       [ "$FAILCONN" == "0" ] && \
+               log "ERROR: The client reconnection has not been triggered" && \
+               return 4
+       [ "$FAILCONN" == "2" ] && \
+               log "ERROR: The client tried to reconnect to the failover server while the primary was busy" && \
+               return 5
+
+        cleanup
+}
+run_test 35b "Continue reconnection retries, if the active server is busy"
  
  test_36() { # 12743
          local rc
@@ -1960,6 +2029,31 @@ test_50f() {
  }
  run_test 50f "normal statfs one server in down =========================="
  
+test_50g() {
+       [ "$OSTCOUNT" -lt "2" ] && skip_env "$OSTCOUNT < 2, skipping" && return
+       setup
+       start_ost2 || error "Unable to start OST2"
+
+       local PARAM="${FSNAME}-OST0001.osc.active"
+
+       $LFS setstripe -c -1 $DIR/$tfile || error "Unable to lfs setstripe"
+       do_facet mgs $LCTL conf_param $PARAM=0 || error "Unable to deactivate OST"
+
+       umount_client $MOUNT || error "Unable to unmount client"
+       mount_client $MOUNT || error "Unable to mount client"
+       # This df should not cause a panic
+       df -k $MOUNT
+
+       do_facet mgs $LCTL conf_param $PARAM=1 || error "Unable to activate OST"
+       rm -f $DIR/$tfile
+       umount_client $MOUNT || error "Unable to unmount client"
+       stop_ost2 || error "Unable to stop OST2"
+       stop_ost || error "Unable to stop OST1"
+       stop_mds || error "Unable to stop MDS"
+       writeconf
+}
+run_test 50g "deactivated OST should not cause panic====================="
+
  test_51() {
         local LOCAL_TIMEOUT=20