From 77d406a0699307e8e633ef41f8984f45c09db9b8 Mon Sep 17 00:00:00 2001
From: wang di <di.wang@intel.com>
Date: Sun, 3 May 2015 05:11:22 -0700
Subject: [PATCH] LU-5420 mgc: MGC should retry for invalid import

After http://review.whamcloud.com/#/c/9967/ is landed, mgc does
not wait the import connected(state = FULL), then enqueue and
retrieve config log, which will cause the mount process to fail,
especially if the mgc is shared by multiple targets.
So once mgc enqueue is failed, it will give another chance to
wait the import to recover, if the import comes back in time,
it will try to enqueue again. Otherwise it will use local config log.

Signed-off-by: Wang Di <di.wang@intel.com>
Signed-off-by: Andreas Dilger <andreas.dilger@intel.com>
Change-Id: I8dbcafbda362ebbd3370e7527a3c14c594500c1e
Reviewed-on: http://review.whamcloud.com/11258
Tested-by: Jenkins
---
 lustre/include/lustre_ha.h         |   2 +-
 lustre/mgc/mgc_request.c           | 109 ++++++++++++++++++++++++++++++-------
 lustre/obdclass/obd_mount_server.c |   4 +-
 lustre/ptlrpc/import.c             |   3 +-
 4 files changed, 94 insertions(+), 24 deletions(-)

diff --git a/lustre/include/lustre_ha.h b/lustre/include/lustre_ha.h
index 671b757..dec5d5f 100644
--- a/lustre/include/lustre_ha.h
+++ b/lustre/include/lustre_ha.h
@@ -58,7 +58,7 @@ void ptlrpc_activate_import(struct obd_import *imp);
 void ptlrpc_deactivate_import(struct obd_import *imp);
 void ptlrpc_invalidate_import(struct obd_import *imp);
 void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt);
-
+void ptlrpc_pinger_force(struct obd_import *imp);
 /** @} ha */
 
 #endif
diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c
index ef5d711..ed7a16a 100644
--- a/lustre/mgc/mgc_request.c
+++ b/lustre/mgc/mgc_request.c
@@ -1852,23 +1852,62 @@ out_free:
 	return rc;
 }
 
-/** Get a config log from the MGS and process it.
- * This func is called for both clients and servers.
- * Copy the log locally before parsing it if appropriate (non-MGS server)
+static bool mgc_import_in_recovery(struct obd_import *imp)
+{
+	bool in_recovery = true;
+
+	spin_lock(&imp->imp_lock);
+	if (imp->imp_state == LUSTRE_IMP_FULL ||
+	    imp->imp_state == LUSTRE_IMP_CLOSED)
+		in_recovery = false;
+	spin_unlock(&imp->imp_lock);
+
+	return in_recovery;
+}
+
+/**
+ * Get a configuration log from the MGS and process it.
+ *
+ * This function is called for both clients and servers to process the
+ * configuration log from the MGS.  The MGC enqueues a DLM lock on the
+ * log from the MGS, and if the lock gets revoked the MGC will be notified
+ * by the lock cancellation callback that the config log has changed,
+ * and will enqueue another MGS lock on it, and then continue processing
+ * the new additions to the end of the log.
+ *
+ * Since the MGC import is not replayable, if the import is being evicted
+ * (rcl == -ESHUTDOWN, \see ptlrpc_import_delay_req()), retry to process
+ * the log until recovery is finished or the import is closed.
+ *
+ * Make a local copy of the log before parsing it if appropriate (non-MGS
+ * server) so that the server can start even when the MGS is down.
+ *
+ * There shouldn't be multiple processes running process_log at once --
+ * sounds like badness.  It actually might be fine, as long as they're not
+ * trying to update from the same log simultaneously, in which case we
+ * should use a per-log semaphore instead of cld_lock.
+ *
+ * \param[in] mgc	MGC device by which to fetch the configuration log
+ * \param[in] cld	log processing state (stored in lock callback data)
+ *
+ * \retval		0 on success
+ * \retval		negative errno on failure
  */
 int mgc_process_log(struct obd_device *mgc, struct config_llog_data *cld)
 {
         struct lustre_handle lockh = { 0 };
 	__u64 flags = LDLM_FL_NO_LRU;
 	int rc = 0, rcl;
+	bool retry = false;
         ENTRY;
 
-        LASSERT(cld);
+	LASSERT(cld != NULL);
 
         /* I don't want multiple processes running process_log at once --
            sounds like badness.  It actually might be fine, as long as
            we're not trying to update from the same log
            simultaneously (in which case we should use a per-log sem.) */
+restart:
 	mutex_lock(&cld->cld_lock);
 	if (cld->cld_stopping) {
 		mutex_unlock(&cld->cld_lock);
@@ -1877,27 +1916,57 @@ int mgc_process_log(struct obd_device *mgc, struct config_llog_data *cld)
 
         OBD_FAIL_TIMEOUT(OBD_FAIL_MGC_PAUSE_PROCESS_LOG, 20);
 
-        CDEBUG(D_MGC, "Process log %s:%p from %d\n", cld->cld_logname,
-               cld->cld_cfg.cfg_instance, cld->cld_cfg.cfg_last_idx + 1);
+	CDEBUG(D_MGC, "Process log %s:%p from %d\n", cld->cld_logname,
+	       cld->cld_cfg.cfg_instance, cld->cld_cfg.cfg_last_idx + 1);
 
 	/* Get the cfg lock on the llog */
 	rcl = mgc_enqueue(mgc->u.cli.cl_mgc_mgsexp, LDLM_PLAIN, NULL,
 			  LCK_CR, &flags, NULL, NULL, NULL,
 			  cld, 0, NULL, &lockh);
-        if (rcl == 0) {
-                /* Get the cld, it will be released in mgc_blocking_ast. */
-                config_log_get(cld);
-                rc = ldlm_lock_set_data(&lockh, (void *)cld);
-                LASSERT(rc == 0);
-        } else {
-                CDEBUG(D_MGC, "Can't get cfg lock: %d\n", rcl);
-
-                /* mark cld_lostlock so that it will requeue
-                 * after MGC becomes available. */
-                cld->cld_lostlock = 1;
-                /* Get extra reference, it will be put in requeue thread */
-                config_log_get(cld);
-        }
+	if (rcl == 0) {
+		/* Get the cld, it will be released in mgc_blocking_ast. */
+		config_log_get(cld);
+		rc = ldlm_lock_set_data(&lockh, (void *)cld);
+		LASSERT(rc == 0);
+	} else {
+		CDEBUG(D_MGC, "Can't get cfg lock: %d\n", rcl);
+
+		if (rcl == -ESHUTDOWN &&
+		    atomic_read(&mgc->u.cli.cl_mgc_refcount) > 0 && !retry) {
+			struct obd_import *imp;
+			struct l_wait_info lwi;
+			int secs = cfs_time_seconds(obd_timeout);
+
+			mutex_unlock(&cld->cld_lock);
+			imp = class_exp2cliimp(mgc->u.cli.cl_mgc_mgsexp);
+
+			/* Let's force the pinger, and wait the import to be
+			 * connected, note: since mgc import is non-replayable,
+			 * and even the import state is disconnected, it does
+			 * not mean the "recovery" is stopped, so we will keep
+			 * waitting until timeout or the import state is
+			 * FULL or closed */
+			ptlrpc_pinger_force(imp);
+
+			lwi = LWI_TIMEOUT(secs, NULL, NULL);
+			l_wait_event(imp->imp_recovery_waitq,
+				     !mgc_import_in_recovery(imp), &lwi);
+
+			if (imp->imp_state == LUSTRE_IMP_FULL) {
+				retry = true;
+				goto restart;
+			} else {
+				mutex_lock(&cld->cld_lock);
+				cld->cld_lostlock = 1;
+			}
+		} else {
+			/* mark cld_lostlock so that it will requeue
+			 * after MGC becomes available. */
+			cld->cld_lostlock = 1;
+		}
+		/* Get extra reference, it will be put in requeue thread */
+		config_log_get(cld);
+	}
 
 
         if (cld_is_recover(cld)) {
diff --git a/lustre/obdclass/obd_mount_server.c b/lustre/obdclass/obd_mount_server.c
index d89be15..6fb292f 100644
--- a/lustre/obdclass/obd_mount_server.c
+++ b/lustre/obdclass/obd_mount_server.c
@@ -1169,8 +1169,8 @@ static int server_register_target(struct lustre_sb_info *lsi)
 				"rc = %d. Is the MGS running?\n",
 				lsi->lsi_svname, rc);
 		} else {
-			CERROR("%s: error registering with the MGS: rc = %d "
-			       "(not fatal)\n", lsi->lsi_svname, rc);
+			CDEBUG(D_HA, "%s: error registering with the MGS: "
+			       "rc = %d (not fatal)\n", lsi->lsi_svname, rc);
 			/* reset the error code for non-fatal error. */
 			rc = 0;
 		}
diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c
index e96396a..e4d1b9a 100644
--- a/lustre/ptlrpc/import.c
+++ b/lustre/ptlrpc/import.c
@@ -406,7 +406,7 @@ void ptlrpc_activate_import(struct obd_import *imp)
 }
 EXPORT_SYMBOL(ptlrpc_activate_import);
 
-static void ptlrpc_pinger_force(struct obd_import *imp)
+void ptlrpc_pinger_force(struct obd_import *imp)
 {
 	CDEBUG(D_HA, "%s: waking up pinger s:%s\n", obd2cli_tgt(imp->imp_obd),
 	       ptlrpc_import_state_name(imp->imp_state));
@@ -418,6 +418,7 @@ static void ptlrpc_pinger_force(struct obd_import *imp)
 	if (imp->imp_state != LUSTRE_IMP_CONNECTING)
 		ptlrpc_pinger_wake_up();
 }
+EXPORT_SYMBOL(ptlrpc_pinger_force);
 
 void ptlrpc_fail_import(struct obd_import *imp, __u32 conn_cnt)
 {
-- 
1.8.3.1