From: scjody Date: Fri, 23 Jun 2006 01:28:12 +0000 (+0000) Subject: Branch b1_5 X-Git-Tag: v1_7_100~1^90~8^2~71 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=3c79e1951b639b4d37e4baa624fc9d55d9511a92;p=fs%2Flustre-release.git Branch b1_5 b=9387 r=adilger Select next connection according to last successful connection time. --- diff --git a/lustre/include/lustre_import.h b/lustre/include/lustre_import.h index 505507c..e011e5b 100644 --- a/lustre/include/lustre_import.h +++ b/lustre/include/lustre_import.h @@ -45,7 +45,7 @@ struct obd_import_conn { struct list_head oic_item; struct ptlrpc_connection *oic_conn; struct obd_uuid oic_uuid; - cfs_time_t oic_last_attempt; /* in cfs_time_t */ + __u64 oic_last_attempt; /* in cfs_time_t */ }; struct obd_import { @@ -77,6 +77,7 @@ struct obd_import { __u64 imp_last_transno_checked; struct lustre_handle imp_remote_handle; cfs_time_t imp_next_ping; /* jiffies */ + __u64 imp_last_success_conn; /* jiffies */ /* all available obd_import_conn linked here */ struct list_head imp_conn_list; diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 2c0b292..bd07b57 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -73,6 +73,7 @@ static int import_set_conn(struct obd_import *imp, struct obd_uuid *uuid, if (priority) { list_del(&item->oic_item); list_add(&item->oic_item, &imp->imp_conn_list); + item->oic_last_attempt = 0; } CDEBUG(D_HA, "imp %p@%s: found existing conn %s%s\n", imp, imp->imp_obd->obd_name, uuid->uuid, @@ -85,6 +86,7 @@ static int import_set_conn(struct obd_import *imp, struct obd_uuid *uuid, if (create) { imp_conn->oic_conn = ptlrpc_conn; imp_conn->oic_uuid = *uuid; + item->oic_last_attempt = 0; if (priority) list_add(&imp_conn->oic_item, &imp->imp_conn_list); else @@ -122,7 +124,6 @@ int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid, int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid) { struct obd_import_conn *imp_conn; - struct obd_import_conn *cur_conn; struct obd_export *dlmexp; int rc = -ENOENT; ENTRY; @@ -138,12 +139,8 @@ int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid) continue; LASSERT(imp_conn->oic_conn); - cur_conn = list_entry(imp->imp_conn_list.next, - struct obd_import_conn, - oic_item); - /* is current conn? */ - if (imp_conn == cur_conn) { + if (imp_conn == imp->imp_conn_current) { LASSERT(imp_conn->oic_conn == imp->imp_connection); if (imp->imp_state != LUSTRE_IMP_CLOSED && diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index e750395..f58f817 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -745,6 +745,7 @@ struct obd_import *class_new_import(struct obd_device *obd) CFS_INIT_LIST_HEAD(&imp->imp_sending_list); CFS_INIT_LIST_HEAD(&imp->imp_delayed_list); spin_lock_init(&imp->imp_lock); + imp->imp_last_success_conn = 0; imp->imp_state = LUSTRE_IMP_NEW; imp->imp_obd = class_incref(obd); cfs_waitq_init(&imp->imp_recovery_waitq); diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 9072e83..00e23ea 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -256,7 +256,7 @@ void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt) static int import_select_connection(struct obd_import *imp) { - struct obd_import_conn *imp_conn; + struct obd_import_conn *imp_conn = NULL, *conn; struct obd_export *dlmexp; ENTRY; @@ -269,15 +269,44 @@ static int import_select_connection(struct obd_import *imp) RETURN(-EINVAL); } - if (imp->imp_conn_current && - imp->imp_conn_current->oic_item.next != &imp->imp_conn_list) { - imp_conn = list_entry(imp->imp_conn_current->oic_item.next, - struct obd_import_conn, oic_item); - } else { - imp_conn = list_entry(imp->imp_conn_list.next, - struct obd_import_conn, oic_item); + list_for_each_entry(conn, &imp->imp_conn_list, oic_item) { + CDEBUG(D_HA, "%s: connect to NID %s last attempt %llu\n", + imp->imp_obd->obd_name, + libcfs_nid2str(conn->oic_conn->c_peer.nid), + conn->oic_last_attempt); + + /* Throttle the reconnect rate to once per RECONNECT_INTERVAL */ + if (jiffies > conn->oic_last_attempt + RECONNECT_INTERVAL * HZ) { + + /* If we have never tried this connection since the + the last successful attempt, go with this one */ + if (conn->oic_last_attempt <= + imp->imp_last_success_conn) { + imp_conn = conn; + break; + } + + /* Both of these connections have already been tried + since the last successful connection, just choose the + least recently used */ + if (!imp_conn) + imp_conn = conn; + else + if (conn->oic_last_attempt < + imp_conn->oic_last_attempt) + imp_conn = conn; + } } + /* if not found, simply choose the current one */ + if (!imp_conn) { + LASSERT(imp->imp_conn_current); + imp_conn = imp->imp_conn_current; + } + LASSERT(imp_conn->oic_conn); + + imp_conn->oic_last_attempt = get_jiffies_64(); + /* switch connection, don't mind if it's same as the current one */ if (imp->imp_connection) ptlrpc_put_connection(imp->imp_connection); @@ -290,19 +319,23 @@ static int import_select_connection(struct obd_import *imp) dlmexp->exp_connection = ptlrpc_connection_addref(imp_conn->oic_conn); class_export_put(dlmexp); - if (imp->imp_conn_current && (imp->imp_conn_current != imp_conn)) { - LCONSOLE_WARN("Changing connection for %s to %s\n", - imp->imp_obd->obd_name, imp_conn->oic_uuid.uuid); - } + if (imp->imp_conn_current != imp_conn) { + LCONSOLE_INFO("Changing connection for %s to %s/%s\n", + imp->imp_obd->obd_name, imp_conn->oic_uuid.uuid, + libcfs_nid2str(imp_conn->oic_conn->c_peer.nid)); imp->imp_conn_current = imp_conn; - CDEBUG(D_HA, "%s: import %p using connection %s\n", - imp->imp_obd->obd_name, imp, imp_conn->oic_uuid.uuid); + } + + CDEBUG(D_HA, "%s: import %p using connection %s/%s\n", + imp->imp_obd->obd_name, imp, imp_conn->oic_uuid.uuid, + libcfs_nid2str(imp_conn->oic_conn->c_peer.nid)); + spin_unlock(&imp->imp_lock); RETURN(0); } -int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid) +int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid) { struct obd_device *obd = imp->imp_obd; int initial_connect = 0; @@ -606,29 +639,33 @@ finish: struct obd_connect_data *ocd; struct obd_export *exp; - ocd = lustre_swab_repbuf(request, REPLY_REC_OFF, - sizeof *ocd, lustre_swab_connect); + ocd = lustre_swab_repbuf(request, REPLY_REC_OFF, sizeof(*ocd), + lustre_swab_connect); + + spin_lock_irqsave(&imp->imp_lock, flags); + list_del(&imp->imp_conn_current->oic_item); + list_add(&imp->imp_conn_current->oic_item, &imp->imp_conn_list); + imp->imp_last_success_conn = + imp->imp_conn_current->oic_last_attempt; + if (ocd == NULL) { + spin_unlock_irqrestore(&imp->imp_lock, flags); CERROR("Wrong connect data from server\n"); rc = -EPROTO; GOTO(out, rc); } - spin_lock_irqsave(&imp->imp_lock, flags); - - /* - * check that server granted subset of flags we asked for. - */ - LASSERT((ocd->ocd_connect_flags & - imp->imp_connect_data.ocd_connect_flags) == - ocd->ocd_connect_flags); imp->imp_connect_data = *ocd; - if (!ocd->ocd_ibits_known && - ocd->ocd_connect_flags & OBD_CONNECT_IBITS) - CERROR("Inodebits aware server returned zero compatible" - " bits?\n"); exp = class_conn2export(&imp->imp_dlm_handle); + spin_unlock_irqrestore(&imp->imp_lock, flags); + + /* check that server granted subset of flags we asked for. */ + LASSERTF((ocd->ocd_connect_flags & + imp->imp_connect_flags_orig) == + ocd->ocd_connect_flags, LPX64" != "LPX64, + imp->imp_connect_flags_orig, ocd->ocd_connect_flags); + if (!exp) { /* This could happen if export is cleaned during the connect attempt */ @@ -642,6 +679,11 @@ finish: obd_import_event(imp->imp_obd, imp, IMP_EVENT_OCD); + if (!ocd->ocd_ibits_known && + ocd->ocd_connect_flags & OBD_CONNECT_IBITS) + CERROR("Inodebits aware server returned zero compatible" + " bits?\n"); + if ((ocd->ocd_connect_flags & OBD_CONNECT_VERSION) && (ocd->ocd_version > LUSTRE_VERSION_CODE + LUSTRE_VERSION_OFFSET_WARN)) { @@ -662,21 +704,6 @@ finish: OBD_OCD_VERSION_FIX(ocd->ocd_version), action, LUSTRE_VERSION_STRING); } - - if (imp->imp_conn_current != NULL) { - list_del(&imp->imp_conn_current->oic_item); - list_add(&imp->imp_conn_current->oic_item, - &imp->imp_conn_list); - imp->imp_conn_current = NULL; - spin_unlock_irqrestore(&imp->imp_lock, flags); - } else { - static int bug7269_dump = 0; - spin_unlock_irqrestore(&imp->imp_lock, flags); - CERROR("this is bug 7269 - please attach log there\n"); - if (bug7269_dump == 0) - libcfs_debug_dumplog(); - bug7269_dump = 1; - } } out: