Whamcloud - gitweb
b=13147
authornathan <nathan>
Thu, 9 Aug 2007 15:36:59 +0000 (15:36 +0000)
committernathan <nathan>
Thu, 9 Aug 2007 15:36:59 +0000 (15:36 +0000)
i=tappro
i=fanyong
block reactivating mgc import until all deactivates complete
Only an issue when failing back MDT/MGS to itself (testing)

lustre/ChangeLog
lustre/include/lustre_import.h
lustre/mgc/mgc_request.c
lustre/obdclass/genops.c
lustre/ptlrpc/import.c
lustre/tests/test-framework.sh

index edc8a2f..1a9d540 100644 (file)
@@ -79,6 +79,11 @@ Details    : Port older jbd statistics patch for sles10
         should be installed.  It is versioned separately from Lustre and
         may be released separately in future.
 
+Severity   : minor
+Bugzilla   : 13147
+Description: block reactivating mgc import until all deactivates complete
+Details    : Fix race when failing back MDT/MGS to itself (testing)
+
 Severity   : enhancement
 Bugzilla   : 12194
 Description: add optional extra BUILD_VERSION info
index b9dcf85..542d073 100644 (file)
@@ -70,6 +70,7 @@ struct obd_import {
 
         atomic_t                  imp_inflight;
         atomic_t                  imp_replay_inflight;
+        atomic_t                  imp_inval_count;
         enum lustre_imp_state     imp_state;
         int                       imp_generation;
         __u32                     imp_conn_cnt;
index e251dde..5de0f95 100644 (file)
@@ -730,6 +730,33 @@ static int mgc_target_register(struct obd_export *exp,
         RETURN(rc);
 }
 
+int mgc_reconnect_import(struct obd_import *imp)
+{
+        /* Force a new connect attempt */
+        ptlrpc_invalidate_import(imp);
+        /* Do a fresh connect next time by zeroing the handle */
+        ptlrpc_disconnect_import(imp, 1);
+        /* Wait for all invalidate calls to finish */
+        if (atomic_read(&imp->imp_inval_count) > 0) {
+                int rc;
+                struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+                rc = l_wait_event(imp->imp_recovery_waitq,
+                                  (atomic_read(&imp->imp_inval_count) == 0),
+                                  &lwi);
+                if (rc)
+                        CERROR("Interrupted, inval=%d\n",
+                               atomic_read(&imp->imp_inval_count));
+        }
+
+        /* Allow reconnect attempts */
+        imp->imp_obd->obd_no_recov = 0;
+        /* Remove 'invalid' flag */
+        ptlrpc_activate_import(imp);
+        /* Attempt a new connect */
+        ptlrpc_recover_import(imp, NULL);
+        return 0;
+}
+
 int mgc_set_info_async(struct obd_export *exp, obd_count keylen,
                        void *key, obd_count vallen, void *val,
                        struct ptlrpc_request_set *set)
@@ -767,20 +794,8 @@ int mgc_set_info_async(struct obd_export *exp, obd_count keylen,
                        imp->imp_replayable, imp->imp_obd->obd_replayable,
                        ptlrpc_import_state_name(imp->imp_state));
                 /* Resurrect if we previously died */
-                if (imp->imp_invalid || value > 1) {
-                        /* Force a new connect attempt */
-                        /* (can't put these in obdclass, module loop) */
-                        ptlrpc_invalidate_import(imp);
-                        /* Do a fresh connect next time by zeroing the handle */
-                        ptlrpc_disconnect_import(imp, 1);
-                        /* See client_disconnect_export */
-                        /* Allow reconnect attempts */
-                        imp->imp_obd->obd_no_recov = 0;
-                        /* Remove 'invalid' flag */
-                        ptlrpc_activate_import(imp);
-                        /* Attempt a new connect */
-                        ptlrpc_recover_import(imp, NULL);
-                }
+                if (imp->imp_invalid || value > 1)
+                        mgc_reconnect_import(imp);
                 RETURN(0);
         }
         /* FIXME move this to mgc_process_config */
index e3b4446..9538ee3 100644 (file)
@@ -842,6 +842,7 @@ struct obd_import *class_new_import(struct obd_device *obd)
         atomic_set(&imp->imp_refcount, 2);
         atomic_set(&imp->imp_inflight, 0);
         atomic_set(&imp->imp_replay_inflight, 0);
+        atomic_set(&imp->imp_inval_count, 0);
         CFS_INIT_LIST_HEAD(&imp->imp_conn_list);
         CFS_INIT_LIST_HEAD(&imp->imp_handle.h_link);
         class_handle_hash(&imp->imp_handle, import_handle_addref);
index cb0209d..0778530 100644 (file)
@@ -200,6 +200,8 @@ void ptlrpc_invalidate_import(struct obd_import *imp)
         struct l_wait_info lwi;
         int rc;
 
+        atomic_inc(&imp->imp_inval_count);
+
         ptlrpc_deactivate_import(imp);
 
         LASSERT(imp->imp_invalid);
@@ -217,6 +219,9 @@ void ptlrpc_invalidate_import(struct obd_import *imp)
 
         obd_import_event(imp->imp_obd, imp, IMP_EVENT_INVALIDATE);
         sptlrpc_import_flush_all_ctx(imp);
+
+        atomic_dec(&imp->imp_inval_count);
+        cfs_waitq_signal(&imp->imp_recovery_waitq);
 }
 
 /* unset imp_invalid */
index a0f0320..37864d5 100644 (file)
@@ -498,7 +498,7 @@ facet_failover() {
     wait_for $facet
     local dev=${facet}_dev
     local opt=${facet}_opt
-    start $facet ${!dev} ${!opt}
+    start $facet ${!dev} ${!opt} || error "Restart of $facet failed"
 }
 
 obd_name() {