Whamcloud - gitweb
LU-7558 ptlrpc: connect vs import invalidate race 18/33718/4
authorAndriy Skulysh <c17819@cray.com>
Wed, 22 Aug 2018 19:22:49 +0000 (22:22 +0300)
committerOleg Drokin <green@whamcloud.com>
Wed, 16 Jan 2019 07:06:36 +0000 (07:06 +0000)
Connect can't be sent while import invalidate is
in progress, thus it leaves the import in not
initialized state.

Don't allow reconnect in evicted state.

Change-Id: I79a1a1eb05fede30e100ba09b6f3f98636a46213
Cray-bug-id: LUS-6322
Signed-off-by: Andriy Skulysh <c17819@cray.com>
Reviewed-by: Alexander Boyko <c17825@cray.com>
Reviewed-by: Andrew Perepechko <c17827@cray.com>
Reviewed-on: https://review.whamcloud.com/33718
Tested-by: Jenkins
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Mike Pershin <mpershin@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd_support.h
lustre/ptlrpc/import.c
lustre/ptlrpc/recover.c
lustre/tests/recovery-small.sh

index 1aed5f9..a5361e1 100644 (file)
@@ -437,6 +437,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c
 #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB3  0x520
 #define OBD_FAIL_PTLRPC_BULK_ATTACH      0x521
+#define OBD_FAIL_PTLRPC_CONNECT_RACE    0x531
 
 #define OBD_FAIL_OBD_PING_NET            0x600
 /*     OBD_FAIL_OBD_LOG_CANCEL_NET      0x601 obsolete since 1.5 */
index 6cf814b..6bb1666 100644 (file)
@@ -37,6 +37,7 @@
 #define DEBUG_SUBSYSTEM S_RPC
 
 #include <linux/kthread.h>
+#include <linux/delay.h>
 #include <obd_support.h>
 #include <lustre_ha.h>
 #include <lustre_net.h>
@@ -291,6 +292,10 @@ void ptlrpc_invalidate_import(struct obd_import *imp)
        if (!imp->imp_invalid || imp->imp_obd->obd_no_recov)
                ptlrpc_deactivate_import(imp);
 
+       if (OBD_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CONNECT_RACE)) {
+               OBD_RACE(OBD_FAIL_PTLRPC_CONNECT_RACE);
+               msleep(10 * MSEC_PER_SEC);
+       }
        CFS_FAIL_TIMEOUT(OBD_FAIL_MGS_CONNECT_NET, 3 * cfs_fail_val / 2);
        LASSERT(imp->imp_invalid);
 
@@ -668,6 +673,7 @@ int ptlrpc_connect_import(struct obd_import *imp)
                CERROR("already connected\n");
                RETURN(0);
        } else if (imp->imp_state == LUSTRE_IMP_CONNECTING ||
+                  imp->imp_state == LUSTRE_IMP_EVICTED ||
                   imp->imp_connected) {
                spin_unlock(&imp->imp_lock);
                CERROR("already connecting\n");
index 873adb1..ab93c45 100644 (file)
@@ -346,6 +346,8 @@ int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async)
         if (rc)
                 GOTO(out, rc);
 
+       OBD_RACE(OBD_FAIL_PTLRPC_CONNECT_RACE);
+
         rc = ptlrpc_connect_import(imp);
         if (rc)
                 GOTO(out, rc);
index 84e1a44..12ae698 100755 (executable)
@@ -1593,6 +1593,25 @@ test_65() {
 }
 run_test 65 "lock enqueue for destroyed export"
 
+test_67()
+{
+#define OBD_FAIL_PTLRPC_CONNECT_RACE    0x531
+       $LCTL set_param fail_loc=0x80000531
+
+       local mdtname="MDT0000"
+       local mdccli=$($LCTL dl | grep "${mdtname}-mdc" | awk '{print $4;}')
+       local conn_uuid=$($LCTL get_param -n mdc.${mdccli}.mds_conn_uuid)
+       $LCTL set_param "mdc.${mdccli}.import=connection=${conn_uuid}" &
+       sleep 2
+
+       mds_evict_client
+       sleep 1
+
+       client_reconnect
+       wait
+}
+run_test 67 "connect vs import invalidate race"
+
 check_cli_ir_state()
 {
         local NODE=${1:-$HOSTNAME}