Whamcloud - gitweb
ORNL-28: Solve reconnecting race between IR and SR
authorJinshan Xiong <jay@whamcloud.com>
Wed, 24 Aug 2011 23:03:53 +0000 (16:03 -0700)
committerOleg Drokin <green@whamcloud.com>
Fri, 21 Oct 2011 17:10:41 +0000 (13:10 -0400)
if there is a connecting request on the fly when client import is
notified by IR, it will set the corresponding conn uuid to a higher
prio and set imp_force_verify so that it will do reconnection
immediately in case RPC timeout happens.

Change-Id: I77e799e1f12b49f3c0271585c1ce812d16dc1ef6
Signed-off-by: Jinshan Xiong <jay@whamcloud.com>
Reviewed-on: http://review.whamcloud.com/1291
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre_net.h
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_request.c
lustre/ptlrpc/client.c
lustre/ptlrpc/import.c
lustre/ptlrpc/recover.c

index 3bd7caf..82fe858 100644 (file)
@@ -1518,7 +1518,7 @@ struct ptlrpc_svc_data {
  * Import API
  * @{
  */
-int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid);
+int ptlrpc_connect_import(struct obd_import *imp);
 int ptlrpc_init_import(struct obd_import *imp);
 int ptlrpc_disconnect_import(struct obd_import *imp, int noclose);
 int ptlrpc_import_recovery_state_machine(struct obd_import *imp);
index 4047407..ab65cb0 100644 (file)
@@ -471,7 +471,7 @@ int client_connect_import(const struct lu_env *env,
                 imp->imp_connect_flags_orig = data->ocd_connect_flags;
         }
 
-        rc = ptlrpc_connect_import(imp, NULL);
+        rc = ptlrpc_connect_import(imp);
         if (rc != 0) {
                 LASSERT (imp->imp_state == LUSTRE_IMP_DISCON);
                 GOTO(out_ldlm, rc);
index cf1ba95..d244a75 100644 (file)
@@ -2060,7 +2060,7 @@ static int replay_lock_interpret(const struct lu_env *env,
         LDLM_LOCK_PUT(lock);
 out:
         if (rc != ELDLM_OK)
-                ptlrpc_connect_import(req->rq_import, NULL);
+                ptlrpc_connect_import(req->rq_import);
 
         RETURN(rc);
 }
index eb871fd..687cb6e 100644 (file)
@@ -2590,7 +2590,7 @@ static int ptlrpc_replay_interpret(const struct lu_env *env,
 
         if (rc != 0)
                 /* this replay failed, so restart recovery */
-                ptlrpc_connect_import(imp, NULL);
+                ptlrpc_connect_import(imp);
 
         RETURN(rc);
 }
index 0715ec6..83e4937 100644 (file)
@@ -583,7 +583,7 @@ static int ptlrpc_first_transno(struct obd_import *imp, __u64 *transno)
  * actual sending.
  * Returns 0 on success or error code.
  */
-int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid)
+int ptlrpc_connect_import(struct obd_import *imp)
 {
         struct obd_device *obd = imp->imp_obd;
         int initial_connect = 0;
@@ -628,15 +628,6 @@ int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid)
                                            &imp->imp_connect_data.ocd_transno);
         cfs_spin_unlock(&imp->imp_lock);
 
-        if (new_uuid) {
-                struct obd_uuid uuid;
-
-                obd_str2uuid(&uuid, new_uuid);
-                rc = import_set_conn_priority(imp, &uuid);
-                if (rc)
-                        GOTO(out, rc);
-        }
-
         rc = import_select_connection(imp);
         if (rc)
                 GOTO(out, rc);
@@ -728,8 +719,14 @@ EXPORT_SYMBOL(ptlrpc_connect_import);
 static void ptlrpc_maybe_ping_import_soon(struct obd_import *imp)
 {
 #ifdef __KERNEL__
-        /* the pinger takes care of issuing the next reconnect request */
-        return;
+        int force_verify;
+
+        cfs_spin_lock(&imp->imp_lock);
+        force_verify = imp->imp_force_verify != 0;
+        cfs_spin_unlock(&imp->imp_lock);
+
+        if (force_verify)
+                ptlrpc_pinger_wake_up();
 #else
         /* liblustre has no pinger thread, so we wakeup pinger anyway */
         ptlrpc_pinger_wake_up();
@@ -780,6 +777,7 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
         /* All imports are pingable */
         imp->imp_pingable = 1;
         imp->imp_force_reconnect = 0;
+        imp->imp_force_verify = 0;
 
         if (aa->pcaa_initial_connect) {
                 if (msg_flags & MSG_CONNECT_REPLAYABLE) {
@@ -920,7 +918,7 @@ finish:
                                "invalidating and reconnecting\n",
                                obd2cli_tgt(imp->imp_obd),
                                imp->imp_connection->c_remote_uuid.uuid);
-                        ptlrpc_connect_import(imp, NULL);
+                        ptlrpc_connect_import(imp);
                         RETURN(0);
                 }
         } else {
@@ -1157,7 +1155,7 @@ static int completed_replay_interpret(const struct lu_env *env,
                                req->rq_import->imp_obd->obd_name,
                                req->rq_status);
                 }
-                ptlrpc_connect_import(req->rq_import, NULL);
+                ptlrpc_connect_import(req->rq_import);
         }
 
         RETURN(0);
index fade6b0..f56d16a 100644 (file)
@@ -67,7 +67,7 @@ void ptlrpc_initiate_recovery(struct obd_import *imp)
         ENTRY;
 
         CDEBUG(D_HA, "%s: starting recovery\n", obd2cli_tgt(imp->imp_obd));
-        ptlrpc_connect_import(imp, NULL);
+        ptlrpc_connect_import(imp);
 
         EXIT;
 }
@@ -223,7 +223,7 @@ void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
                 }
                 /* to control recovery via lctl {disable|enable}_recovery */
                 if (imp->imp_deactive == 0)
-                        ptlrpc_connect_import(imp, NULL);
+                        ptlrpc_connect_import(imp);
         }
 
         /* Wait for recovery to complete and resend. If evicted, then
@@ -301,15 +301,27 @@ int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid, int async)
         /* force import to be disconnected. */
         ptlrpc_set_import_discon(imp, 0);
 
+        if (new_uuid) {
+                struct obd_uuid uuid;
+
+                /* intruct import to use new uuid */
+                obd_str2uuid(&uuid, new_uuid);
+                rc = import_set_conn_priority(imp, &uuid);
+                if (rc)
+                        GOTO(out, rc);
+        }
+
         /* Check if reconnect is already in progress */
         cfs_spin_lock(&imp->imp_lock);
-        if (imp->imp_state != LUSTRE_IMP_DISCON)
+        if (imp->imp_state != LUSTRE_IMP_DISCON) {
+                imp->imp_force_verify = 1;
                 rc = -EALREADY;
+        }
         cfs_spin_unlock(&imp->imp_lock);
         if (rc)
                 GOTO(out, rc);
 
-        rc = ptlrpc_connect_import(imp, new_uuid);
+        rc = ptlrpc_connect_import(imp);
         if (rc)
                 GOTO(out, rc);