From cbb7f099dadd506b3dcb6b31b1ff70875f882e29 Mon Sep 17 00:00:00 2001 From: shadow Date: Thu, 5 Feb 2009 11:05:48 +0000 Subject: [PATCH] handle MDS connect in failover as real connect. Branch b1_6 b=18304 i=johann i=green --- lustre/ldlm/ldlm_lib.c | 39 ++++++++++++++++++++------------------- lustre/tests/test-framework.sh | 1 + 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 00c2ded..ed984aa 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -523,11 +523,12 @@ int client_disconnect_export(struct obd_export *exp) * from old lib/target.c * -------------------------------------------------------------------------- */ -int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp, - struct obd_uuid *cluuid, int mds_conn) +static int target_handle_reconnect(struct lustre_handle *conn, + struct obd_export *exp, + struct obd_uuid *cluuid) { ENTRY; - if (exp->exp_connection && exp->exp_imp_reverse && !mds_conn) { + if (exp->exp_connection && exp->exp_imp_reverse) { struct lustre_handle *hdl; hdl = &exp->exp_imp_reverse->imp_remote_handle; /* Might be a re-connect after a partition. */ @@ -719,18 +720,27 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) if (obd_uuid_equals(&cluuid, &target->obd_uuid)) goto dont_check_exports; - spin_lock(&target->obd_dev_lock); export = lustre_hash_lookup(target->obd_uuid_hash, &cluuid); - if (export != NULL && export->exp_connecting) { /* bug 9635, et. al. */ + if (export != NULL && mds_conn) { + /* mds reconnected after failover */ + class_fail_export(export); + CWARN("%s: received MDS connection from NID %s," + " removing former export from NID %s\n", + target->obd_name, libcfs_nid2str(req->rq_peer.nid), + libcfs_nid2str(export->exp_connection->c_peer.nid)); + class_export_put(export); + export = NULL; + rc = 0; + } else if (export != NULL && export->exp_connecting) { + /* bug 9635, et. al. */ CWARN("%s: exp %p already connecting\n", export->exp_obd->obd_name, export); class_export_put(export); export = NULL; rc = -EALREADY; } else if (export != NULL && export->exp_connection != NULL && - req->rq_peer.nid != export->exp_connection->c_peer.nid && - !mds_conn) { + req->rq_peer.nid != export->exp_connection->c_peer.nid) { CWARN("%s: cookie %s seen on new NID %s when " "existing NID %s is already connected\n", target->obd_name, cluuid.uuid, @@ -750,15 +760,13 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) export->exp_connecting = 1; spin_unlock(&export->exp_lock); class_export_put(export); - spin_unlock(&target->obd_dev_lock); LASSERT(export->exp_obd == target); - rc = target_handle_reconnect(&conn, export, &cluuid, mds_conn); + rc = target_handle_reconnect(&conn, export, &cluuid); } /* If we found an export, we already unlocked. */ if (!export) { - spin_unlock(&target->obd_dev_lock); OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_DELAY_CONNECT, 2 * obd_timeout); } else if (req->rq_export == NULL && atomic_read(&export->exp_rpc_count) > 0) { @@ -774,18 +782,13 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) libcfs_nid2str(req->rq_peer.nid), export, atomic_read(&export->exp_rpc_count)); GOTO(out, rc = -EBUSY); - } else if (lustre_msg_get_conn_cnt(req->rq_reqmsg) == 1 && - !mds_conn) { + } else if (lustre_msg_get_conn_cnt(req->rq_reqmsg) == 1) { CERROR("%s: NID %s (%s) reconnected with 1 conn_cnt; " "cookies not random?\n", target->obd_name, libcfs_nid2str(req->rq_peer.nid), cluuid.uuid); GOTO(out, rc = -EALREADY); } else { OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_DELAY_RECONNECT, 2 * obd_timeout); - if (req->rq_export == NULL && mds_conn) - export->exp_last_request_time = - max(export->exp_last_request_time, - (time_t)cfs_time_current_sec()); } if (rc < 0) @@ -871,9 +874,7 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) req->rq_export = export; spin_lock(&export->exp_lock); - if (mds_conn) { - lustre_msg_set_conn_cnt(req->rq_reqmsg, export->exp_conn_cnt + 1); - } else if (export->exp_conn_cnt >= lustre_msg_get_conn_cnt(req->rq_reqmsg)) { + if (export->exp_conn_cnt >= lustre_msg_get_conn_cnt(req->rq_reqmsg)) { CERROR("%s: %s already connected at higher conn_cnt: %d > %d\n", cluuid.uuid, libcfs_nid2str(req->rq_peer.nid), export->exp_conn_cnt, diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index cd93bb1..b5a2ec7 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -204,6 +204,7 @@ load_modules() { load_module mgs/mgs load_module mds/mds grep -q crc16 /proc/kallsyms || { modprobe crc16 2>/dev/null || true; } + grep -q jbd /proc/kallsyms || { modprobe jbd 2>/dev/null || true; } [ "$FSTYPE" = "ldiskfs" ] && load_module ../ldiskfs/ldiskfs/ldiskfs load_module lvfs/fsfilt_$FSTYPE load_module ost/ost -- 1.8.3.1