From fb7ad45f3800a429a833888f4e1e30a78e37d433 Mon Sep 17 00:00:00 2001 From: nathan Date: Wed, 1 Mar 2006 20:49:35 +0000 Subject: [PATCH] Branch b1_4_mountconf b=8007 allow old clients to reconnect to a newly-restarted MGS (plus some minor cleanups) --- lustre/include/linux/lustre_import.h | 3 --- lustre/lov/lov_obd.c | 2 +- lustre/mds/handler.c | 4 ++-- lustre/mgc/mgc_request.c | 2 +- lustre/mgs/mgs_handler.c | 8 ++++++++ lustre/obdclass/genops.c | 11 ++++++----- lustre/obdclass/obd_mount.c | 14 ++------------ 7 files changed, 20 insertions(+), 24 deletions(-) diff --git a/lustre/include/linux/lustre_import.h b/lustre/include/linux/lustre_import.h index 59cf6ad..68834bf 100644 --- a/lustre/include/linux/lustre_import.h +++ b/lustre/include/linux/lustre_import.h @@ -112,9 +112,6 @@ void class_unobserve_import(struct obd_import *imp, obd_import_callback cb, void class_notify_import_observers(struct obd_import *imp, int event, void *event_arg); -#define IMP_EVENT_ACTIVE 1 -#define IMP_EVENT_INACTIVE 2 - /* genops.c */ struct obd_export; extern struct obd_import *class_exp2cliimp(struct obd_export *); diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 082aa2c..d9947d0 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -2206,7 +2206,7 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen, ENTRY; if (KEY_IS(KEY_NEXT_ID)) { - if (vallen != lov->desc.ld_tgt_count) + if (vallen > lov->desc.ld_tgt_count) RETURN(-EINVAL); vallen = sizeof(obd_id); } diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index f42bb2f..59b2334 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -2100,8 +2100,8 @@ int mds_postrecov(struct obd_device *obd) /* set nextid first, so we are sure it happens */ rc = mds_lov_set_nextid(obd); if (rc) { - CERROR("%s: mds_lov_set_nextid failed\n", - obd->obd_name); + CERROR("%s: mds_lov_set_nextid failed %d\n", + obd->obd_name, rc); GOTO(out, rc); } diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index 83f0ee5..772d4b8 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -719,7 +719,7 @@ static int mgc_import_event(struct obd_device *obd, int rc = 0; LASSERT(imp->imp_obd == obd); - CDEBUG(D_MGC, "import event %d\n", (int)event); + CDEBUG(D_MGC, "import event %#x\n", event); switch (event) { case IMP_EVENT_INVALIDATE: { diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index 4633e89..a89944a 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -168,6 +168,9 @@ static int mgs_setup(struct obd_device *obd, obd_count len, void *buf) if (rc) GOTO(err_fs, rc); + /* Allow reconnect attempts */ + obd->obd_replayable = 1; + /* Internal mgs setup */ mgs_init_fsdb_list(obd); sema_init(&mgs->mgs_log_sem, 1); @@ -442,6 +445,11 @@ int mgs_handle(struct ptlrpc_request *req) DEBUG_REQ(D_MGS, req, "connect"); OBD_FAIL_RETURN(OBD_FAIL_MGS_CONNECT_NET, 0); rc = target_handle_connect(req, mgs_handle); + if (!rc && (req->rq_reqmsg->conn_cnt > 1)) + /* Make clients trying to reconnect after a MGS restart + happy; also requires obd_replayable */ + lustre_msg_add_op_flags(req->rq_repmsg, + MSG_CONNECT_RECONNECT); break; case MGS_DISCONNECT: DEBUG_REQ(D_MGS, req, "disconnect"); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 0491b46..52c1f018 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -1092,16 +1092,17 @@ static int ping_evictor_main(void *arg) if (expire_time > exp->exp_last_request_time) { class_export_get(exp); spin_unlock(&obd->obd_dev_lock); - LCONSOLE_WARN("%s: haven't heard from %s in %ld" - " seconds. Last request was at %ld. " - "I think it's dead, and I am evicting " - "it.\n", obd->obd_name, + LCONSOLE_WARN("%s: haven't heard from %s (%s) " + "in %ld seconds. " + "Last request was at %ld. " + "I think it's dead, and I am " + "evicting it.\n", obd->obd_name, + obd_uuid2str(&exp->exp_client_uuid), obd_export_nid2str(exp), (long)(CURRENT_SECONDS - exp->exp_last_request_time), exp->exp_last_request_time); - class_fail_export(exp); class_export_put(exp); diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 0696c52..2f5ca77 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -700,18 +700,8 @@ static int lustre_stop_mgc(struct super_block *sb) obd->obd_force = 1; /* client_disconnect_export uses the no_recov flag to decide whether it should disconnect or just invalidate. (The MGC has no - recoverable data in any case.) - Without no_recov, we wait for locks to be dropped, so if the - MGS is down, we might wait for an obd timeout. With no-recov, - if the MGS is up, we don't tell it we're disconnecting, so - we must wait until the MGS evicts the dead client before the - client can reconnect. So it's either slow disconnect, or a - slow reconnect. This could probably be fixed on the server side - by ignoring handle mismatches in target_handle_reconnect. */ - if (lsi->lsi_flags & LSI_UMOUNT_FORCE) { - /* FIXME maybe always set this? */ - obd->obd_no_recov = 1; - } + recoverable data in any case.) */ + obd->obd_no_recov = 1; if (obd->u.cli.cl_mgc_mgsexp) obd_disconnect(obd->u.cli.cl_mgc_mgsexp); -- 1.8.3.1