Whamcloud - gitweb
LU-5020 llite: don't reconnect MGC if connecting 29/10229/16
authorHongchao Zhang <hongchao.zhang@intel.com>
Fri, 13 Oct 2017 22:57:28 +0000 (06:57 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Sun, 17 Dec 2017 06:19:11 +0000 (06:19 +0000)
1. In mgc_set_info_async(KEY=KEY_INIT_RECOV_BACKUP "init_recov_bk"),
   the MGC import should be reconnected only if its state is
   LUSTRE_IMP_DISCON

2. in mgc_target_register, if the target will regenerate the config,
   we should use some longer delay limit to wait the MGC to connect
   to MGS for the target (server) will fail to exit if the request
   expired due to delay limit.

3. In case of parallel mount, the async cleanup of OSS will affects
   the following mount for the OSS can't be setup again, then there
   should be some barrier to sync with the OSS cleanup.

Change-Id: I805b84cf12100ec2cc68f95bb65a9c396e0fbc1b
Signed-off-by: Hongchao Zhang <hongchao.zhang@intel.com>
Reviewed-on: https://review.whamcloud.com/10229
Reviewed-by: Fan Yong <fan.yong@intel.com>
Tested-by: Jenkins
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/mgc/mgc_request.c
lustre/obdclass/genops.c

index 7cf0891..c08ed96 100644 (file)
@@ -1067,6 +1067,7 @@ static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
 #define  MGC_ENQUEUE_LIMIT (INITIAL_CONNECT_TIMEOUT + (AT_OFF ? 0 : at_min) \
                                + PING_INTERVAL)
 #define  MGC_TARGET_REG_LIMIT 10
+#define  MGC_TARGET_REG_LIMIT_MAX RECONNECT_DELAY_MAX
 #define  MGC_SEND_PARAM_LIMIT 10
 
 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 13, 53, 0)
@@ -1200,11 +1201,18 @@ static int mgc_target_register(struct obd_export *exp,
                 RETURN(-ENOMEM);
         }
 
-        memcpy(req_mti, mti, sizeof(*req_mti));
-        ptlrpc_request_set_replen(req);
-        CDEBUG(D_MGC, "register %s\n", mti->mti_svname);
-        /* Limit how long we will wait for the enqueue to complete */
-        req->rq_delay_limit = MGC_TARGET_REG_LIMIT;
+       memcpy(req_mti, mti, sizeof(*req_mti));
+       ptlrpc_request_set_replen(req);
+       CDEBUG(D_MGC, "register %s\n", mti->mti_svname);
+       /* Limit how long we will wait for the enqueue to complete */
+       req->rq_delay_limit = MGC_TARGET_REG_LIMIT;
+
+       /* if the target needs to regenerate the config log in MGS, it's better
+        * to use some longer limit to let MGC have time to change connection to
+        * another MGS (or try again with the same MGS) for the target (server)
+        * will fail and exit if the request expired due to delay limit. */
+       if (mti->mti_flags & (LDD_F_UPDATE | LDD_F_NEED_INDEX))
+               req->rq_delay_limit = MGC_TARGET_REG_LIMIT_MAX;
 
         rc = ptlrpc_queue_wait(req);
         if (!rc) {
@@ -1227,24 +1235,28 @@ static int mgc_set_info_async(const struct lu_env *env, struct obd_export *exp,
         int rc = -EINVAL;
         ENTRY;
 
-        /* Turn off initial_recov after we try all backup servers once */
-        if (KEY_IS(KEY_INIT_RECOV_BACKUP)) {
-                struct obd_import *imp = class_exp2cliimp(exp);
-                int value;
-                if (vallen != sizeof(int))
-                        RETURN(-EINVAL);
-                value = *(int *)val;
-                CDEBUG(D_MGC, "InitRecov %s %d/d%d:i%d:r%d:or%d:%s\n",
-                       imp->imp_obd->obd_name, value,
-                       imp->imp_deactive, imp->imp_invalid,
-                       imp->imp_replayable, imp->imp_obd->obd_replayable,
-                       ptlrpc_import_state_name(imp->imp_state));
-                /* Resurrect if we previously died */
-                if ((imp->imp_state != LUSTRE_IMP_FULL &&
-                     imp->imp_state != LUSTRE_IMP_NEW) || value > 1)
-                        ptlrpc_reconnect_import(imp);
-                RETURN(0);
-        }
+       /* Turn off initial_recov after we try all backup servers once */
+       if (KEY_IS(KEY_INIT_RECOV_BACKUP)) {
+               struct obd_import *imp = class_exp2cliimp(exp);
+               int value;
+               if (vallen != sizeof(int))
+                       RETURN(-EINVAL);
+               value = *(int *)val;
+               CDEBUG(D_MGC, "InitRecov %s %d/d%d:i%d:r%d:or%d:%s\n",
+                      imp->imp_obd->obd_name, value,
+                      imp->imp_deactive, imp->imp_invalid,
+                      imp->imp_replayable, imp->imp_obd->obd_replayable,
+                      ptlrpc_import_state_name(imp->imp_state));
+               /* Resurrect the import immediately if
+                * 1. we previously got disconnected,
+                * 2. value > 1 (at the same node with MGS)
+                * */
+               if (imp->imp_state == LUSTRE_IMP_DISCON || value > 1)
+                       ptlrpc_reconnect_import(imp);
+
+               RETURN(0);
+       }
+
         /* FIXME move this to mgc_process_config */
         if (KEY_IS(KEY_REGISTER_TARGET)) {
                 struct mgs_target_info *mti;
index 8a88c6d..90b4b46 100644 (file)
@@ -478,13 +478,27 @@ int class_register_device(struct obd_device *new_obd)
        int i;
        int new_obd_minor = 0;
        bool minor_assign = false;
+       bool retried = false;
 
+again:
        write_lock(&obd_dev_lock);
        for (i = 0; i < class_devno_max(); i++) {
                struct obd_device *obd = class_num2obd(i);
 
                if (obd != NULL &&
                    (strcmp(new_obd->obd_name, obd->obd_name) == 0)) {
+
+                       if (!retried) {
+                               write_unlock(&obd_dev_lock);
+
+                               /* the obd_device could be waited to be
+                                * destroyed by the "obd_zombie_impexp_thread".
+                                */
+                               obd_zombie_barrier();
+                               retried = true;
+                               goto again;
+                       }
+
                        CERROR("%s: already exists, won't add\n",
                               obd->obd_name);
                        /* in case we found a free slot before duplicate */