Whamcloud - gitweb
ORNL-28: Solve reconnecting race between IR and SR
[fs/lustre-release.git] / lustre / ptlrpc / import.c
index ef403ce..83e4937 100644 (file)
@@ -447,10 +447,9 @@ int ptlrpc_reconnect_import(struct obd_import *imp)
         /* Remove 'invalid' flag */
         ptlrpc_activate_import(imp);
         /* Attempt a new connect */
-        ptlrpc_recover_import(imp, NULL);
+        ptlrpc_recover_import(imp, NULL, 0);
         return 0;
 }
-
 EXPORT_SYMBOL(ptlrpc_reconnect_import);
 
 /**
@@ -479,12 +478,6 @@ static int import_select_connection(struct obd_import *imp)
                        imp->imp_obd->obd_name,
                        libcfs_nid2str(conn->oic_conn->c_peer.nid),
                        conn->oic_last_attempt);
-                /* Don't thrash connections */
-                if (cfs_time_before_64(cfs_time_current_64(),
-                                     conn->oic_last_attempt +
-                                     cfs_time_seconds(CONNECTION_SWITCH_MIN))) {
-                        continue;
-                }
 
                 /* If we have not tried this connection since
                    the last successful attempt, go with this one */
@@ -590,7 +583,7 @@ static int ptlrpc_first_transno(struct obd_import *imp, __u64 *transno)
  * actual sending.
  * Returns 0 on success or error code.
  */
-int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid)
+int ptlrpc_connect_import(struct obd_import *imp)
 {
         struct obd_device *obd = imp->imp_obd;
         int initial_connect = 0;
@@ -635,15 +628,6 @@ int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid)
                                            &imp->imp_connect_data.ocd_transno);
         cfs_spin_unlock(&imp->imp_lock);
 
-        if (new_uuid) {
-                struct obd_uuid uuid;
-
-                obd_str2uuid(&uuid, new_uuid);
-                rc = import_set_conn_priority(imp, &uuid);
-                if (rc)
-                        GOTO(out, rc);
-        }
-
         rc = import_select_connection(imp);
         if (rc)
                 GOTO(out, rc);
@@ -735,42 +719,18 @@ EXPORT_SYMBOL(ptlrpc_connect_import);
 static void ptlrpc_maybe_ping_import_soon(struct obd_import *imp)
 {
 #ifdef __KERNEL__
-        struct obd_import_conn *imp_conn;
-#endif
-        int wake_pinger = 0;
-
-        ENTRY;
+        int force_verify;
 
         cfs_spin_lock(&imp->imp_lock);
-        if (cfs_list_empty(&imp->imp_conn_list))
-                GOTO(unlock, 0);
+        force_verify = imp->imp_force_verify != 0;
+        cfs_spin_unlock(&imp->imp_lock);
 
-#ifdef __KERNEL__
-        imp_conn = cfs_list_entry(imp->imp_conn_list.prev,
-                                  struct obd_import_conn,
-                                  oic_item);
-
-        /* XXX: When the failover node is the primary node, it is possible
-         * to have two identical connections in imp_conn_list. We must
-         * compare not conn's pointers but NIDs, otherwise we can defeat
-         * connection throttling. (See bug 14774.) */
-        if (imp->imp_conn_current->oic_conn->c_peer.nid !=
-                                imp_conn->oic_conn->c_peer.nid) {
-                ptlrpc_ping_import_soon(imp);
-                wake_pinger = 1;
-        }
+        if (force_verify)
+                ptlrpc_pinger_wake_up();
 #else
         /* liblustre has no pinger thread, so we wakeup pinger anyway */
-        wake_pinger = 1;
+        ptlrpc_pinger_wake_up();
 #endif
-
- unlock:
-        cfs_spin_unlock(&imp->imp_lock);
-
-        if (wake_pinger)
-                ptlrpc_pinger_wake_up();
-
-        EXIT;
 }
 
 static int ptlrpc_busy_reconnect(int rc)
@@ -817,6 +777,7 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
         /* All imports are pingable */
         imp->imp_pingable = 1;
         imp->imp_force_reconnect = 0;
+        imp->imp_force_verify = 0;
 
         if (aa->pcaa_initial_connect) {
                 if (msg_flags & MSG_CONNECT_REPLAYABLE) {
@@ -957,7 +918,7 @@ finish:
                                "invalidating and reconnecting\n",
                                obd2cli_tgt(imp->imp_obd),
                                imp->imp_connection->c_remote_uuid.uuid);
-                        ptlrpc_connect_import(imp, NULL);
+                        ptlrpc_connect_import(imp);
                         RETURN(0);
                 }
         } else {
@@ -985,6 +946,8 @@ finish:
                 }
 
                 imp->imp_connect_data = *ocd;
+                CDEBUG(D_HA, "obd %s to target with inst %u\n",
+                       imp->imp_obd->obd_name, ocd->ocd_instance);
 
                 exp = class_conn2export(&imp->imp_dlm_handle);
                 cfs_spin_unlock(&imp->imp_lock);
@@ -1023,13 +986,14 @@ finish:
                         /* Sigh, some compilers do not like #ifdef in the middle
                            of macro arguments */
 #ifdef __KERNEL__
-                        const char *older =
-                                "older. Consider upgrading this client";
+                        const char *older = "older. Consider upgrading server "
+                                            "or downgrading client";
 #else
-                        const char *older =
-                                "older. Consider recompiling this application";
+                        const char *older = "older. Consider recompiling this "
+                                            "application";
 #endif
-                        const char *newer = "newer than client version";
+                        const char *newer = "newer than client version. "
+                                            "Consider upgrading client";
 
                         LCONSOLE_WARN("Server %s version (%d.%d.%d.%d) "
                                       "is much %s (%s)\n",
@@ -1046,33 +1010,25 @@ finish:
                         /* We sent to the server ocd_cksum_types with bits set
                          * for algorithms we understand. The server masked off
                          * the checksum types it doesn't support */
-                        if ((ocd->ocd_cksum_types & OBD_CKSUM_ALL) == 0) {
+                        if ((ocd->ocd_cksum_types & cksum_types_supported()) == 0) {
                                 LCONSOLE_WARN("The negotiation of the checksum "
                                               "alogrithm to use with server %s "
                                               "failed (%x/%x), disabling "
                                               "checksums\n",
                                               obd2cli_tgt(imp->imp_obd),
                                               ocd->ocd_cksum_types,
-                                              OBD_CKSUM_ALL);
+                                              cksum_types_supported());
                                 cli->cl_checksum = 0;
                                 cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
-                                cli->cl_cksum_type = OBD_CKSUM_CRC32;
                         } else {
                                 cli->cl_supp_cksum_types = ocd->ocd_cksum_types;
-
-                                if (ocd->ocd_cksum_types & OSC_DEFAULT_CKSUM)
-                                        cli->cl_cksum_type = OSC_DEFAULT_CKSUM;
-                                else if (ocd->ocd_cksum_types & OBD_CKSUM_ADLER)
-                                        cli->cl_cksum_type = OBD_CKSUM_ADLER;
-                                else
-                                        cli->cl_cksum_type = OBD_CKSUM_CRC32;
                         }
                 } else {
                         /* The server does not support OBD_CONNECT_CKSUM.
                          * Enforce CRC32 for backward compatibility*/
                         cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
-                        cli->cl_cksum_type = OBD_CKSUM_CRC32;
                 }
+                cli->cl_cksum_type =cksum_type_select(cli->cl_supp_cksum_types);
 
                 if (ocd->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
                         cli->cl_max_pages_per_rpc =
@@ -1199,7 +1155,7 @@ static int completed_replay_interpret(const struct lu_env *env,
                                req->rq_import->imp_obd->obd_name,
                                req->rq_status);
                 }
-                ptlrpc_connect_import(req->rq_import, NULL);
+                ptlrpc_connect_import(req->rq_import);
         }
 
         RETURN(0);