From: Hongchao Zhang Date: Fri, 13 Oct 2017 22:57:28 +0000 (+0800) Subject: LU-5020 llite: don't reconnect MGC if connecting X-Git-Tag: 2.10.57~107 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=1a7ff02c1fbb8e85ac2e8fa458ba3fb810a76ea4;p=fs%2Flustre-release.git LU-5020 llite: don't reconnect MGC if connecting 1. In mgc_set_info_async(KEY=KEY_INIT_RECOV_BACKUP "init_recov_bk"), the MGC import should be reconnected only if its state is LUSTRE_IMP_DISCON 2. in mgc_target_register, if the target will regenerate the config, we should use some longer delay limit to wait the MGC to connect to MGS for the target (server) will fail to exit if the request expired due to delay limit. 3. In case of parallel mount, the async cleanup of OSS will affects the following mount for the OSS can't be setup again, then there should be some barrier to sync with the OSS cleanup. Change-Id: I805b84cf12100ec2cc68f95bb65a9c396e0fbc1b Signed-off-by: Hongchao Zhang Reviewed-on: https://review.whamcloud.com/10229 Reviewed-by: Fan Yong Tested-by: Jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index 7cf0891..c08ed96 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -1067,6 +1067,7 @@ static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, #define MGC_ENQUEUE_LIMIT (INITIAL_CONNECT_TIMEOUT + (AT_OFF ? 0 : at_min) \ + PING_INTERVAL) #define MGC_TARGET_REG_LIMIT 10 +#define MGC_TARGET_REG_LIMIT_MAX RECONNECT_DELAY_MAX #define MGC_SEND_PARAM_LIMIT 10 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 13, 53, 0) @@ -1200,11 +1201,18 @@ static int mgc_target_register(struct obd_export *exp, RETURN(-ENOMEM); } - memcpy(req_mti, mti, sizeof(*req_mti)); - ptlrpc_request_set_replen(req); - CDEBUG(D_MGC, "register %s\n", mti->mti_svname); - /* Limit how long we will wait for the enqueue to complete */ - req->rq_delay_limit = MGC_TARGET_REG_LIMIT; + memcpy(req_mti, mti, sizeof(*req_mti)); + ptlrpc_request_set_replen(req); + CDEBUG(D_MGC, "register %s\n", mti->mti_svname); + /* Limit how long we will wait for the enqueue to complete */ + req->rq_delay_limit = MGC_TARGET_REG_LIMIT; + + /* if the target needs to regenerate the config log in MGS, it's better + * to use some longer limit to let MGC have time to change connection to + * another MGS (or try again with the same MGS) for the target (server) + * will fail and exit if the request expired due to delay limit. */ + if (mti->mti_flags & (LDD_F_UPDATE | LDD_F_NEED_INDEX)) + req->rq_delay_limit = MGC_TARGET_REG_LIMIT_MAX; rc = ptlrpc_queue_wait(req); if (!rc) { @@ -1227,24 +1235,28 @@ static int mgc_set_info_async(const struct lu_env *env, struct obd_export *exp, int rc = -EINVAL; ENTRY; - /* Turn off initial_recov after we try all backup servers once */ - if (KEY_IS(KEY_INIT_RECOV_BACKUP)) { - struct obd_import *imp = class_exp2cliimp(exp); - int value; - if (vallen != sizeof(int)) - RETURN(-EINVAL); - value = *(int *)val; - CDEBUG(D_MGC, "InitRecov %s %d/d%d:i%d:r%d:or%d:%s\n", - imp->imp_obd->obd_name, value, - imp->imp_deactive, imp->imp_invalid, - imp->imp_replayable, imp->imp_obd->obd_replayable, - ptlrpc_import_state_name(imp->imp_state)); - /* Resurrect if we previously died */ - if ((imp->imp_state != LUSTRE_IMP_FULL && - imp->imp_state != LUSTRE_IMP_NEW) || value > 1) - ptlrpc_reconnect_import(imp); - RETURN(0); - } + /* Turn off initial_recov after we try all backup servers once */ + if (KEY_IS(KEY_INIT_RECOV_BACKUP)) { + struct obd_import *imp = class_exp2cliimp(exp); + int value; + if (vallen != sizeof(int)) + RETURN(-EINVAL); + value = *(int *)val; + CDEBUG(D_MGC, "InitRecov %s %d/d%d:i%d:r%d:or%d:%s\n", + imp->imp_obd->obd_name, value, + imp->imp_deactive, imp->imp_invalid, + imp->imp_replayable, imp->imp_obd->obd_replayable, + ptlrpc_import_state_name(imp->imp_state)); + /* Resurrect the import immediately if + * 1. we previously got disconnected, + * 2. value > 1 (at the same node with MGS) + * */ + if (imp->imp_state == LUSTRE_IMP_DISCON || value > 1) + ptlrpc_reconnect_import(imp); + + RETURN(0); + } + /* FIXME move this to mgc_process_config */ if (KEY_IS(KEY_REGISTER_TARGET)) { struct mgs_target_info *mti; diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 8a88c6d..90b4b46 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -478,13 +478,27 @@ int class_register_device(struct obd_device *new_obd) int i; int new_obd_minor = 0; bool minor_assign = false; + bool retried = false; +again: write_lock(&obd_dev_lock); for (i = 0; i < class_devno_max(); i++) { struct obd_device *obd = class_num2obd(i); if (obd != NULL && (strcmp(new_obd->obd_name, obd->obd_name) == 0)) { + + if (!retried) { + write_unlock(&obd_dev_lock); + + /* the obd_device could be waited to be + * destroyed by the "obd_zombie_impexp_thread". + */ + obd_zombie_barrier(); + retried = true; + goto again; + } + CERROR("%s: already exists, won't add\n", obd->obd_name); /* in case we found a free slot before duplicate */