Whamcloud - gitweb
Branch b1_4_mountconf
authornathan <nathan>
Wed, 1 Mar 2006 20:49:35 +0000 (20:49 +0000)
committernathan <nathan>
Wed, 1 Mar 2006 20:49:35 +0000 (20:49 +0000)
b=8007
allow old clients to reconnect to a newly-restarted MGS
(plus some minor cleanups)

lustre/include/linux/lustre_import.h
lustre/lov/lov_obd.c
lustre/mds/handler.c
lustre/mgc/mgc_request.c
lustre/mgs/mgs_handler.c
lustre/obdclass/genops.c
lustre/obdclass/obd_mount.c

index 59cf6ad..68834bf 100644 (file)
@@ -112,9 +112,6 @@ void class_unobserve_import(struct obd_import *imp, obd_import_callback cb,
 void class_notify_import_observers(struct obd_import *imp, int event,
                                    void *event_arg);
 
-#define IMP_EVENT_ACTIVE   1
-#define IMP_EVENT_INACTIVE 2
-
 /* genops.c */
 struct obd_export;
 extern struct obd_import *class_exp2cliimp(struct obd_export *);
index 082aa2c..d9947d0 100644 (file)
@@ -2206,7 +2206,7 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen,
         ENTRY;
 
         if (KEY_IS(KEY_NEXT_ID)) {
-                if (vallen != lov->desc.ld_tgt_count)
+                if (vallen > lov->desc.ld_tgt_count)
                         RETURN(-EINVAL);
                 vallen = sizeof(obd_id);
         }
index f42bb2f..59b2334 100644 (file)
@@ -2100,8 +2100,8 @@ int mds_postrecov(struct obd_device *obd)
         /* set nextid first, so we are sure it happens */
         rc = mds_lov_set_nextid(obd);
         if (rc) {
-                CERROR("%s: mds_lov_set_nextid failed\n",
-                       obd->obd_name);
+                CERROR("%s: mds_lov_set_nextid failed %d\n",
+                       obd->obd_name, rc);
                 GOTO(out, rc);
         }
         
index 83f0ee5..772d4b8 100644 (file)
@@ -719,7 +719,7 @@ static int mgc_import_event(struct obd_device *obd,
         int rc = 0;
 
         LASSERT(imp->imp_obd == obd);
-        CDEBUG(D_MGC, "import event %d\n", (int)event);
+        CDEBUG(D_MGC, "import event %#x\n", event);
 
         switch (event) {
         case IMP_EVENT_INVALIDATE: {
index 4633e89..a89944a 100644 (file)
@@ -168,6 +168,9 @@ static int mgs_setup(struct obd_device *obd, obd_count len, void *buf)
         if (rc)
                 GOTO(err_fs, rc);
 
+        /* Allow reconnect attempts */
+        obd->obd_replayable = 1;
+
         /* Internal mgs setup */
         mgs_init_fsdb_list(obd);
         sema_init(&mgs->mgs_log_sem, 1);
@@ -442,6 +445,11 @@ int mgs_handle(struct ptlrpc_request *req)
                 DEBUG_REQ(D_MGS, req, "connect");
                 OBD_FAIL_RETURN(OBD_FAIL_MGS_CONNECT_NET, 0);
                 rc = target_handle_connect(req, mgs_handle);
+                if (!rc && (req->rq_reqmsg->conn_cnt > 1))
+                        /* Make clients trying to reconnect after a MGS restart
+                           happy; also requires obd_replayable */
+                        lustre_msg_add_op_flags(req->rq_repmsg,
+                                                MSG_CONNECT_RECONNECT);
                 break;
         case MGS_DISCONNECT:
                 DEBUG_REQ(D_MGS, req, "disconnect");
index 0491b46..52c1f01 100644 (file)
@@ -1092,16 +1092,17 @@ static int ping_evictor_main(void *arg)
                         if (expire_time > exp->exp_last_request_time) {
                                 class_export_get(exp);
                                 spin_unlock(&obd->obd_dev_lock);
-                                LCONSOLE_WARN("%s: haven't heard from %s in %ld"
-                                              " seconds. Last request was at %ld. "
-                                              "I think it's dead, and I am evicting "
-                                              "it.\n", obd->obd_name,
+                                LCONSOLE_WARN("%s: haven't heard from %s (%s) "
+                                              "in %ld seconds. "
+                                              "Last request was at %ld. "
+                                              "I think it's dead, and I am "
+                                              "evicting it.\n", obd->obd_name,
+                                              obd_uuid2str(&exp->exp_client_uuid),
                                               obd_export_nid2str(exp),
                                               (long)(CURRENT_SECONDS -
                                                      exp->exp_last_request_time),
                                               exp->exp_last_request_time);
 
-
                                 class_fail_export(exp);
                                 class_export_put(exp);
 
index 0696c52..2f5ca77 100644 (file)
@@ -700,18 +700,8 @@ static int lustre_stop_mgc(struct super_block *sb)
         obd->obd_force = 1;
         /* client_disconnect_export uses the no_recov flag to decide whether it
            should disconnect or just invalidate.  (The MGC has no
-           recoverable data in any case.) 
-           Without no_recov, we wait for locks to be dropped, so if the
-           MGS is down, we might wait for an obd timeout.  With no-recov,
-           if the MGS is up, we don't tell it we're disconnecting, so 
-           we must wait until the MGS evicts the dead client before the 
-           client can reconnect. So it's either slow disconnect, or a 
-           slow reconnect. This could probably be fixed on the server side 
-           by ignoring handle mismatches in target_handle_reconnect. */
-        if (lsi->lsi_flags & LSI_UMOUNT_FORCE) {
-                /* FIXME maybe always set this? */
-                obd->obd_no_recov = 1;
-        }
+           recoverable data in any case.) */
+        obd->obd_no_recov = 1;
 
         if (obd->u.cli.cl_mgc_mgsexp)
                 obd_disconnect(obd->u.cli.cl_mgc_mgsexp);