Whamcloud - gitweb
Branch b1_5
authorscjody <scjody>
Fri, 23 Jun 2006 01:28:12 +0000 (01:28 +0000)
committerscjody <scjody>
Fri, 23 Jun 2006 01:28:12 +0000 (01:28 +0000)
b=9387
r=adilger

Select next connection according to last successful connection time.

lustre/include/lustre_import.h
lustre/ldlm/ldlm_lib.c
lustre/obdclass/genops.c
lustre/ptlrpc/import.c

index 505507c..e011e5b 100644 (file)
@@ -45,7 +45,7 @@ struct obd_import_conn {
         struct list_head          oic_item;
         struct ptlrpc_connection *oic_conn;
         struct obd_uuid           oic_uuid;
-        cfs_time_t                oic_last_attempt; /* in cfs_time_t */
+        __u64                     oic_last_attempt; /* in cfs_time_t */
 };
 
 struct obd_import {
@@ -77,6 +77,7 @@ struct obd_import {
         __u64                     imp_last_transno_checked;
         struct lustre_handle      imp_remote_handle;
         cfs_time_t                imp_next_ping;   /* jiffies */
+        __u64                     imp_last_success_conn;   /* jiffies */
 
         /* all available obd_import_conn linked here */
         struct list_head          imp_conn_list;
index 2c0b292..bd07b57 100644 (file)
@@ -73,6 +73,7 @@ static int import_set_conn(struct obd_import *imp, struct obd_uuid *uuid,
                         if (priority) {
                                 list_del(&item->oic_item);
                                 list_add(&item->oic_item, &imp->imp_conn_list);
+                                item->oic_last_attempt = 0;
                         }
                         CDEBUG(D_HA, "imp %p@%s: found existing conn %s%s\n",
                                imp, imp->imp_obd->obd_name, uuid->uuid,
@@ -85,6 +86,7 @@ static int import_set_conn(struct obd_import *imp, struct obd_uuid *uuid,
         if (create) {
                 imp_conn->oic_conn = ptlrpc_conn;
                 imp_conn->oic_uuid = *uuid;
+                item->oic_last_attempt = 0;
                 if (priority)
                         list_add(&imp_conn->oic_item, &imp->imp_conn_list);
                 else
@@ -122,7 +124,6 @@ int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid,
 int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid)
 {
         struct obd_import_conn *imp_conn;
-        struct obd_import_conn *cur_conn;
         struct obd_export *dlmexp;
         int rc = -ENOENT;
         ENTRY;
@@ -138,12 +139,8 @@ int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid)
                         continue;
                 LASSERT(imp_conn->oic_conn);
 
-                cur_conn = list_entry(imp->imp_conn_list.next,
-                                      struct obd_import_conn,
-                                      oic_item);
-
                 /* is current conn? */
-                if (imp_conn == cur_conn) {
+                if (imp_conn == imp->imp_conn_current) {
                         LASSERT(imp_conn->oic_conn == imp->imp_connection);
 
                         if (imp->imp_state != LUSTRE_IMP_CLOSED &&
index e750395..f58f817 100644 (file)
@@ -745,6 +745,7 @@ struct obd_import *class_new_import(struct obd_device *obd)
         CFS_INIT_LIST_HEAD(&imp->imp_sending_list);
         CFS_INIT_LIST_HEAD(&imp->imp_delayed_list);
         spin_lock_init(&imp->imp_lock);
+        imp->imp_last_success_conn = 0;
         imp->imp_state = LUSTRE_IMP_NEW;
         imp->imp_obd = class_incref(obd);
         cfs_waitq_init(&imp->imp_recovery_waitq);
index 9072e83..00e23ea 100644 (file)
@@ -256,7 +256,7 @@ void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt)
 
 static int import_select_connection(struct obd_import *imp)
 {
-        struct obd_import_conn *imp_conn;
+        struct obd_import_conn *imp_conn = NULL, *conn;
         struct obd_export *dlmexp;
         ENTRY;
 
@@ -269,15 +269,44 @@ static int import_select_connection(struct obd_import *imp)
                 RETURN(-EINVAL);
         }
 
-        if (imp->imp_conn_current &&
-            imp->imp_conn_current->oic_item.next != &imp->imp_conn_list) {
-                imp_conn = list_entry(imp->imp_conn_current->oic_item.next,
-                                      struct obd_import_conn, oic_item);
-        } else {
-                imp_conn = list_entry(imp->imp_conn_list.next,
-                                      struct obd_import_conn, oic_item);
+        list_for_each_entry(conn, &imp->imp_conn_list, oic_item) {
+                CDEBUG(D_HA, "%s: connect to NID %s last attempt %llu\n",
+                       imp->imp_obd->obd_name,
+                       libcfs_nid2str(conn->oic_conn->c_peer.nid),
+                       conn->oic_last_attempt);
+
+                /* Throttle the reconnect rate to once per RECONNECT_INTERVAL */
+                if (jiffies > conn->oic_last_attempt + RECONNECT_INTERVAL * HZ) {
+
+                        /* If we have never tried this connection since the
+                           the last successful attempt, go with this one */
+                        if (conn->oic_last_attempt <=
+                                imp->imp_last_success_conn) {
+                                imp_conn = conn;
+                                break;
+                        }
+
+                        /* Both of these connections have already been tried
+                           since the last successful connection, just choose the
+                           least recently used */
+                        if (!imp_conn)
+                                imp_conn = conn;
+                        else
+                                if (conn->oic_last_attempt <
+                                                imp_conn->oic_last_attempt)
+                                        imp_conn = conn;
+        }
         }
 
+        /* if not found, simply choose the current one */
+        if (!imp_conn) {
+                LASSERT(imp->imp_conn_current);
+                imp_conn = imp->imp_conn_current;
+        }
+        LASSERT(imp_conn->oic_conn);
+
+        imp_conn->oic_last_attempt = get_jiffies_64();
+
         /* switch connection, don't mind if it's same as the current one */
         if (imp->imp_connection)
                 ptlrpc_put_connection(imp->imp_connection);
@@ -290,19 +319,23 @@ static int import_select_connection(struct obd_import *imp)
         dlmexp->exp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
         class_export_put(dlmexp);
 
-        if (imp->imp_conn_current && (imp->imp_conn_current != imp_conn)) {
-                LCONSOLE_WARN("Changing connection for %s to %s\n",
-                              imp->imp_obd->obd_name, imp_conn->oic_uuid.uuid);
-        }
+        if (imp->imp_conn_current != imp_conn) {
+                LCONSOLE_INFO("Changing connection for %s to %s/%s\n",
+                              imp->imp_obd->obd_name, imp_conn->oic_uuid.uuid,
+                              libcfs_nid2str(imp_conn->oic_conn->c_peer.nid));
         imp->imp_conn_current = imp_conn;
-        CDEBUG(D_HA, "%s: import %p using connection %s\n",
-               imp->imp_obd->obd_name, imp, imp_conn->oic_uuid.uuid);
+        }
+
+        CDEBUG(D_HA, "%s: import %p using connection %s/%s\n",
+               imp->imp_obd->obd_name, imp, imp_conn->oic_uuid.uuid,
+               libcfs_nid2str(imp_conn->oic_conn->c_peer.nid));
+
         spin_unlock(&imp->imp_lock);
 
         RETURN(0);
 }
 
-int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid)
+int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid)
 {
         struct obd_device *obd = imp->imp_obd;
         int initial_connect = 0;
@@ -606,29 +639,33 @@ finish:
                 struct obd_connect_data *ocd;
                 struct obd_export *exp;
 
-                ocd = lustre_swab_repbuf(request, REPLY_REC_OFF,
-                                         sizeof *ocd, lustre_swab_connect);
+                ocd = lustre_swab_repbuf(request, REPLY_REC_OFF, sizeof(*ocd),
+                                         lustre_swab_connect);
+
+                spin_lock_irqsave(&imp->imp_lock, flags);
+                list_del(&imp->imp_conn_current->oic_item);
+                list_add(&imp->imp_conn_current->oic_item, &imp->imp_conn_list);
+                imp->imp_last_success_conn =
+                        imp->imp_conn_current->oic_last_attempt;
+
                 if (ocd == NULL) {
+                        spin_unlock_irqrestore(&imp->imp_lock, flags);
                         CERROR("Wrong connect data from server\n");
                         rc = -EPROTO;
                         GOTO(out, rc);
                 }
-                spin_lock_irqsave(&imp->imp_lock, flags);
-                
-                /*
-                 * check that server granted subset of flags we asked for.
-                 */
-                LASSERT((ocd->ocd_connect_flags &
-                         imp->imp_connect_data.ocd_connect_flags) ==
-                        ocd->ocd_connect_flags);
 
                 imp->imp_connect_data = *ocd;
-                if (!ocd->ocd_ibits_known &&
-                    ocd->ocd_connect_flags & OBD_CONNECT_IBITS)
-                        CERROR("Inodebits aware server returned zero compatible"
-                               " bits?\n");
 
                 exp = class_conn2export(&imp->imp_dlm_handle);
+                spin_unlock_irqrestore(&imp->imp_lock, flags);
+
+                /* check that server granted subset of flags we asked for. */
+                LASSERTF((ocd->ocd_connect_flags &
+                          imp->imp_connect_flags_orig) ==
+                         ocd->ocd_connect_flags, LPX64" != "LPX64,
+                         imp->imp_connect_flags_orig, ocd->ocd_connect_flags);
+
                 if (!exp) {
                         /* This could happen if export is cleaned during the 
                            connect attempt */
@@ -642,6 +679,11 @@ finish:
 
                 obd_import_event(imp->imp_obd, imp, IMP_EVENT_OCD);
 
+                if (!ocd->ocd_ibits_known &&
+                    ocd->ocd_connect_flags & OBD_CONNECT_IBITS)
+                        CERROR("Inodebits aware server returned zero compatible"
+                               " bits?\n");
+
                 if ((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) &&
                     (ocd->ocd_version > LUSTRE_VERSION_CODE +
                     LUSTRE_VERSION_OFFSET_WARN)) {
@@ -662,21 +704,6 @@ finish:
                               OBD_OCD_VERSION_FIX(ocd->ocd_version),
                               action, LUSTRE_VERSION_STRING);
                 }
-
-                if (imp->imp_conn_current != NULL) {
-                        list_del(&imp->imp_conn_current->oic_item);
-                        list_add(&imp->imp_conn_current->oic_item,
-                                 &imp->imp_conn_list);
-                        imp->imp_conn_current = NULL;
-                        spin_unlock_irqrestore(&imp->imp_lock, flags);
-                } else {
-                        static int bug7269_dump = 0;
-                        spin_unlock_irqrestore(&imp->imp_lock, flags);
-                        CERROR("this is bug 7269 - please attach log there\n");
-                        if (bug7269_dump == 0)
-                                libcfs_debug_dumplog();
-                        bug7269_dump = 1;
-                }
         }
 
  out: