X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fmgc%2Fmgc_request.c;h=456891a657ab9d7af9af8d6922de46b52ebe71ea;hb=546993d587c5fc380e9745eae98f863e02e68575;hp=7cf08910e772be4e01419b7a2780da7d85398341;hpb=e8bdd1a48dd8800d1de0f0daf1e2e38b123de091;p=fs%2Flustre-release.git diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index 7cf0891..456891a 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -23,7 +23,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2016, Intel Corporation. + * Copyright (c) 2011, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -39,6 +39,7 @@ #include #include +#include #include #include @@ -117,7 +118,7 @@ int mgc_logname2resid(char *logname, struct ldlm_res_id *res_id, int type) EXPORT_SYMBOL(mgc_logname2resid); /********************** config llog list **********************/ -static struct list_head config_llog_list = LIST_HEAD_INIT(config_llog_list); +static LIST_HEAD(config_llog_list); static DEFINE_SPINLOCK(config_list_lock); /* protects config_llog_list */ /* Take a reference to a config log */ @@ -170,18 +171,18 @@ static struct config_llog_data *config_log_find(char *logname, struct config_llog_instance *cfg) { - struct config_llog_data *cld; - struct config_llog_data *found = NULL; - void * instance; - ENTRY; + struct config_llog_data *cld; + struct config_llog_data *found = NULL; + unsigned long cfg_instance; - LASSERT(logname != NULL); + ENTRY; + LASSERT(logname != NULL); - instance = cfg ? cfg->cfg_instance : NULL; + cfg_instance = cfg ? cfg->cfg_instance : 0; spin_lock(&config_list_lock); list_for_each_entry(cld, &config_llog_list, cld_list_chain) { - /* check if instance equals */ - if (instance != cld->cld_cfg.cfg_instance) + /* check if cfg_instance is the one we want */ + if (cfg_instance != cld->cld_cfg.cfg_instance) continue; /* instance may be NULL, should check name */ @@ -207,8 +208,8 @@ struct config_llog_data *do_config_log_add(struct obd_device *obd, ENTRY; - CDEBUG(D_MGC, "do adding config log %s:%p\n", logname, - cfg ? cfg->cfg_instance : NULL); + CDEBUG(D_MGC, "do adding config log %s-%016lx\n", logname, + cfg ? cfg->cfg_instance : 0); OBD_ALLOC(cld, sizeof(*cld) + strlen(logname) + 1); if (!cld) @@ -235,10 +236,8 @@ struct config_llog_data *do_config_log_add(struct obd_device *obd, /* Keep the mgc around until we are done */ cld->cld_mgcexp = class_export_get(obd->obd_self_export); - if (cld_is_sptlrpc(cld)) { + if (cld_is_sptlrpc(cld)) sptlrpc_conf_log_start(logname); - cld->cld_cfg.cfg_obdname = obd->obd_name; - } spin_lock(&config_list_lock); list_add(&cld->cld_list_chain, &config_llog_list); @@ -255,50 +254,49 @@ struct config_llog_data *do_config_log_add(struct obd_device *obd, } static struct config_llog_data *config_recover_log_add(struct obd_device *obd, - char *fsname, - struct config_llog_instance *cfg, - struct super_block *sb) + char *fsname, + struct config_llog_instance *cfg, + struct super_block *sb) { - struct config_llog_instance lcfg = *cfg; - struct lustre_sb_info *lsi = s2lsi(sb); - struct config_llog_data *cld; - char logname[32]; + struct config_llog_instance lcfg = *cfg; + struct lustre_sb_info *lsi = s2lsi(sb); + struct config_llog_data *cld; + char logname[32]; if (IS_OST(lsi)) - return NULL; + return NULL; /* for osp-on-ost, see lustre_start_osp() */ if (IS_MDT(lsi) && lcfg.cfg_instance) return NULL; - /* we have to use different llog for clients and mdts for cmd - * where only clients are notified if one of cmd server restarts */ - LASSERT(strlen(fsname) < sizeof(logname) / 2); - strcpy(logname, fsname); + /* We have to use different llog for clients and MDTs for DNE, + * where only clients are notified if one of DNE server restarts. + */ + LASSERT(strlen(fsname) < sizeof(logname) / 2); + strncpy(logname, fsname, sizeof(logname)); if (IS_SERVER(lsi)) { /* mdt */ - LASSERT(lcfg.cfg_instance == NULL); - lcfg.cfg_instance = sb; - strcat(logname, "-mdtir"); - } else { - LASSERT(lcfg.cfg_instance != NULL); - strcat(logname, "-cliir"); - } + LASSERT(lcfg.cfg_instance == 0); + lcfg.cfg_instance = ll_get_cfg_instance(sb); + strncat(logname, "-mdtir", sizeof(logname)); + } else { + LASSERT(lcfg.cfg_instance != 0); + strncat(logname, "-cliir", sizeof(logname)); + } - cld = do_config_log_add(obd, logname, CONFIG_T_RECOVER, &lcfg, sb); - return cld; + cld = do_config_log_add(obd, logname, CONFIG_T_RECOVER, &lcfg, sb); + return cld; } static struct config_llog_data *config_log_find_or_add(struct obd_device *obd, char *logname, struct super_block *sb, int type, struct config_llog_instance *cfg) { - struct config_llog_instance lcfg = *cfg; - struct config_llog_data *cld; - - lcfg.cfg_instance = sb != NULL ? (void *)sb : (void *)obd; + struct config_llog_instance lcfg = *cfg; + struct config_llog_data *cld; - if (type == CONFIG_T_SPTLRPC) - lcfg.cfg_instance = NULL; + /* Note class_config_llog_handler() depends on getting "obd" back */ + lcfg.cfg_instance = sb ? ll_get_cfg_instance(sb) : (unsigned long)obd; cld = config_log_find(logname, &lcfg); if (unlikely(cld != NULL)) @@ -328,7 +326,8 @@ config_log_add(struct obd_device *obd, char *logname, bool locked = false; ENTRY; - CDEBUG(D_MGC, "adding config log %s:%p\n", logname, cfg->cfg_instance); + CDEBUG(D_MGC, "add config log %s-%016lx\n", logname, + cfg->cfg_instance); /* * for each regular log, the depended sptlrpc log name is @@ -448,8 +447,6 @@ out_sptlrpc: return ERR_PTR(rc); } -DEFINE_MUTEX(llog_process_lock); - static inline void config_mark_cld_stop(struct config_llog_data *cld) { if (cld) { @@ -538,16 +535,15 @@ static int config_log_end(char *logname, struct config_llog_instance *cfg) RETURN(rc); } -#ifdef CONFIG_PROC_FS int lprocfs_mgc_rd_ir_state(struct seq_file *m, void *data) { struct obd_device *obd = data; struct obd_import *imp; struct obd_connect_data *ocd; struct config_llog_data *cld; - ENTRY; - LASSERT(obd != NULL); + ENTRY; + LASSERT(obd); LPROCFS_CLIMP_CHECK(obd); imp = obd->u.cli.cl_import; ocd = &imp->imp_connect_data; @@ -569,7 +565,6 @@ int lprocfs_mgc_rd_ir_state(struct seq_file *m, void *data) LPROCFS_CLIMP_EXIT(obd); RETURN(0); } -#endif /* reenqueue any lost locks */ #define RQ_RUNNING 0x1 @@ -615,7 +610,7 @@ static void do_requeue(struct config_llog_data *cld) * in order to not flood the MGS. */ #define MGC_TIMEOUT_MIN_SECONDS 5 -#define MGC_TIMEOUT_RAND_CENTISEC 0x1ff /* ~500 */ +#define MGC_TIMEOUT_RAND_CENTISEC 500 static int mgc_requeue_thread(void *data) { @@ -631,7 +626,7 @@ static int mgc_requeue_thread(void *data) while (!(rq_state & RQ_STOP)) { struct l_wait_info lwi; struct config_llog_data *cld, *cld_prev; - int rand = cfs_rand() & MGC_TIMEOUT_RAND_CENTISEC; + int rand = prandom_u32_max(MGC_TIMEOUT_RAND_CENTISEC); int to; /* Any new or requeued lostlocks will change the state */ @@ -646,22 +641,21 @@ static int mgc_requeue_thread(void *data) /* Always wait a few seconds to allow the server who caused the lock revocation to finish its setup, plus some random so everyone doesn't try to reconnect at once. */ - to = msecs_to_jiffies(MGC_TIMEOUT_MIN_SECONDS * MSEC_PER_SEC); + to = cfs_time_seconds(MGC_TIMEOUT_MIN_SECONDS * 100 + rand); /* rand is centi-seconds */ - to += msecs_to_jiffies(rand * MSEC_PER_SEC / 100); - lwi = LWI_TIMEOUT(to, NULL, NULL); + lwi = LWI_TIMEOUT(to / 100, NULL, NULL); l_wait_event(rq_waitq, rq_state & (RQ_STOP | RQ_PRECLEANUP), &lwi); - /* - * iterate & processing through the list. for each cld, process - * its depending sptlrpc cld firstly (if any) and then itself. - * - * it's guaranteed any item in the list must have - * reference > 0; and if cld_lostlock is set, at - * least one reference is taken by the previous enqueue. - */ - cld_prev = NULL; + /* + * iterate & processing through the list. for each cld, process + * its depending sptlrpc cld firstly (if any) and then itself. + * + * it's guaranteed any item in the list must have + * reference > 0; and if cld_lostlock is set, at + * least one reference is taken by the previous enqueue. + */ + cld_prev = NULL; spin_lock(&config_list_lock); rq_state &= ~RQ_PRECLEANUP; @@ -691,9 +685,7 @@ static int mgc_requeue_thread(void *data) config_log_put(cld_prev); /* Wait a bit to see if anyone else needs a requeue */ - lwi = (struct l_wait_info) { 0 }; - l_wait_event(rq_waitq, rq_state & (RQ_NOW | RQ_STOP), - &lwi); + wait_event_idle(rq_waitq, rq_state & (RQ_NOW | RQ_STOP)); spin_lock(&config_list_lock); } @@ -967,11 +959,9 @@ static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) GOTO(err_cleanup, rc); } -#ifdef CONFIG_PROC_FS - obd->obd_vars = lprocfs_mgc_obd_vars; - lprocfs_obd_setup(obd, true); -#endif - sptlrpc_lprocfs_cliobd_attach(obd); + rc = mgc_tunables_init(obd); + if (rc) + GOTO(err_sysfs, rc); if (atomic_inc_return(&mgc_count) == 1) { rq_state = 0; @@ -984,7 +974,7 @@ static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) CERROR("%s: cannot start requeue thread: rc = %d; " "no more log updates\n", obd->obd_name, rc); - GOTO(err_cleanup, rc); + GOTO(err_sysfs, rc); } /* rc is the task_struct pointer of mgc_requeue_thread. */ rc = 0; @@ -993,6 +983,8 @@ static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) RETURN(rc); +err_sysfs: + lprocfs_obd_cleanup(obd); err_cleanup: client_obd_cleanup(obd); err_decref: @@ -1067,6 +1059,7 @@ static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, #define MGC_ENQUEUE_LIMIT (INITIAL_CONNECT_TIMEOUT + (AT_OFF ? 0 : at_min) \ + PING_INTERVAL) #define MGC_TARGET_REG_LIMIT 10 +#define MGC_TARGET_REG_LIMIT_MAX RECONNECT_DELAY_MAX #define MGC_SEND_PARAM_LIMIT 10 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 13, 53, 0) @@ -1200,11 +1193,18 @@ static int mgc_target_register(struct obd_export *exp, RETURN(-ENOMEM); } - memcpy(req_mti, mti, sizeof(*req_mti)); - ptlrpc_request_set_replen(req); - CDEBUG(D_MGC, "register %s\n", mti->mti_svname); - /* Limit how long we will wait for the enqueue to complete */ - req->rq_delay_limit = MGC_TARGET_REG_LIMIT; + memcpy(req_mti, mti, sizeof(*req_mti)); + ptlrpc_request_set_replen(req); + CDEBUG(D_MGC, "register %s\n", mti->mti_svname); + /* Limit how long we will wait for the enqueue to complete */ + req->rq_delay_limit = MGC_TARGET_REG_LIMIT; + + /* if the target needs to regenerate the config log in MGS, it's better + * to use some longer limit to let MGC have time to change connection to + * another MGS (or try again with the same MGS) for the target (server) + * will fail and exit if the request expired due to delay limit. */ + if (mti->mti_flags & (LDD_F_UPDATE | LDD_F_NEED_INDEX)) + req->rq_delay_limit = MGC_TARGET_REG_LIMIT_MAX; rc = ptlrpc_queue_wait(req); if (!rc) { @@ -1227,24 +1227,28 @@ static int mgc_set_info_async(const struct lu_env *env, struct obd_export *exp, int rc = -EINVAL; ENTRY; - /* Turn off initial_recov after we try all backup servers once */ - if (KEY_IS(KEY_INIT_RECOV_BACKUP)) { - struct obd_import *imp = class_exp2cliimp(exp); - int value; - if (vallen != sizeof(int)) - RETURN(-EINVAL); - value = *(int *)val; - CDEBUG(D_MGC, "InitRecov %s %d/d%d:i%d:r%d:or%d:%s\n", - imp->imp_obd->obd_name, value, - imp->imp_deactive, imp->imp_invalid, - imp->imp_replayable, imp->imp_obd->obd_replayable, - ptlrpc_import_state_name(imp->imp_state)); - /* Resurrect if we previously died */ - if ((imp->imp_state != LUSTRE_IMP_FULL && - imp->imp_state != LUSTRE_IMP_NEW) || value > 1) - ptlrpc_reconnect_import(imp); - RETURN(0); - } + /* Turn off initial_recov after we try all backup servers once */ + if (KEY_IS(KEY_INIT_RECOV_BACKUP)) { + struct obd_import *imp = class_exp2cliimp(exp); + int value; + if (vallen != sizeof(int)) + RETURN(-EINVAL); + value = *(int *)val; + CDEBUG(D_MGC, "InitRecov %s %d/d%d:i%d:r%d:or%d:%s\n", + imp->imp_obd->obd_name, value, + imp->imp_deactive, imp->imp_invalid, + imp->imp_replayable, imp->imp_obd->obd_replayable, + ptlrpc_import_state_name(imp->imp_state)); + /* Resurrect the import immediately if + * 1. we previously got disconnected, + * 2. value > 1 (at the same node with MGS) + * */ + if (imp->imp_state == LUSTRE_IMP_DISCON || value > 1) + ptlrpc_reconnect_import(imp); + + RETURN(0); + } + /* FIXME move this to mgc_process_config */ if (KEY_IS(KEY_REGISTER_TARGET)) { struct mgs_target_info *mti; @@ -1397,34 +1401,35 @@ static int mgc_apply_recover_logs(struct obd_device *mgc, __u64 max_version, void *data, int datalen, bool mne_swab) { - struct config_llog_instance *cfg = &cld->cld_cfg; - struct lustre_sb_info *lsi = s2lsi(cfg->cfg_sb); - struct mgs_nidtbl_entry *entry; - struct lustre_cfg *lcfg; - struct lustre_cfg_bufs bufs; - u64 prev_version = 0; - char *inst; - char *buf; - int bufsz; - int pos; - int rc = 0; - int off = 0; - ENTRY; + struct config_llog_instance *cfg = &cld->cld_cfg; + struct lustre_sb_info *lsi = s2lsi(cfg->cfg_sb); + struct mgs_nidtbl_entry *entry; + struct lustre_cfg *lcfg; + struct lustre_cfg_bufs bufs; + u64 prev_version = 0; + char *inst; + char *buf; + int bufsz; + int pos = 0; + int rc = 0; + int off = 0; - LASSERT(cfg->cfg_instance != NULL); - LASSERT(cfg->cfg_sb == cfg->cfg_instance); + ENTRY; + LASSERT(cfg->cfg_instance != 0); + LASSERT(ll_get_cfg_instance(cfg->cfg_sb) == cfg->cfg_instance); OBD_ALLOC(inst, PAGE_SIZE); if (inst == NULL) RETURN(-ENOMEM); if (!IS_SERVER(lsi)) { - pos = snprintf(inst, PAGE_SIZE, "%p", cfg->cfg_instance); + pos = snprintf(inst, PAGE_SIZE, "%016lx", cfg->cfg_instance); if (pos >= PAGE_SIZE) { OBD_FREE(inst, PAGE_SIZE); return -E2BIG; } - } else { +#ifdef HAVE_SERVER_SUPPORT + } else { LASSERT(IS_MDT(lsi)); rc = server_name2svname(lsi->lsi_svname, inst, NULL, PAGE_SIZE); @@ -1433,7 +1438,8 @@ static int mgc_apply_recover_logs(struct obd_device *mgc, RETURN(-EINVAL); } pos = strlen(inst); - } +#endif /* HAVE_SERVER_SUPPORT */ + } ++pos; buf = inst + pos; @@ -1741,15 +1747,8 @@ again: #ifdef HAVE_SERVER_SUPPORT /* config changed since first read RPC */ if (cld_is_nodemap(cld) && config_read_offset == 0) { - recent_nodemap = NULL; - nodemap_config_dealloc(new_config); - new_config = NULL; - CDEBUG(D_INFO, "nodemap config changed in transit, retrying\n"); - - /* setting eof to false, we request config again */ - eof = false; - GOTO(out, rc = 0); + GOTO(out, rc = -EAGAIN); } #endif if (!eof) @@ -1757,13 +1756,7 @@ again: GOTO(out, rc); } - mne_swab = !!ptlrpc_rep_need_swab(req); -#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0) - /* This import flag means the server did an extra swab of IR MNE - * records (fixed in LU-1252), reverse it here if needed. LU-1644 */ - if (unlikely(req->rq_import->imp_need_mne_swab)) - mne_swab = !mne_swab; -#endif + mne_swab = ptlrpc_rep_need_swab(req); /* When a nodemap config is received, we build a new nodemap config, * with new nodemap structs. We keep track of the most recently added @@ -1974,9 +1967,7 @@ static int mgc_process_cfg_log(struct obd_device *mgc, &cld->cld_cfg); /* - * update settings on existing OBDs. doing it inside - * of llog_process_lock so no device is attaching/detaching - * in parallel. + * update settings on existing OBDs. * the logname must be -sptlrpc */ if (rc == 0 && cld_is_sptlrpc(cld)) @@ -2055,12 +2046,12 @@ restart: mutex_lock(&cld->cld_lock); if (cld->cld_stopping) { mutex_unlock(&cld->cld_lock); - RETURN(0); - } + RETURN(0); + } - OBD_FAIL_TIMEOUT(OBD_FAIL_MGC_PAUSE_PROCESS_LOG, 20); + OBD_FAIL_TIMEOUT(OBD_FAIL_MGC_PAUSE_PROCESS_LOG, 20); - CDEBUG(D_MGC, "Process log %s:%p from %d\n", cld->cld_logname, + CDEBUG(D_MGC, "Process log %s-%016lx from %d\n", cld->cld_logname, cld->cld_cfg.cfg_instance, cld->cld_cfg.cfg_last_idx + 1); /* Get the cfg lock on the llog */ @@ -2080,7 +2071,7 @@ restart: atomic_read(&mgc->u.cli.cl_mgc_refcount) > 0 && !retry) { struct obd_import *imp; struct l_wait_info lwi; - int secs = cfs_time_seconds(obd_timeout); + long timeout = cfs_time_seconds(obd_timeout); mutex_unlock(&cld->cld_lock); imp = class_exp2cliimp(mgc->u.cli.cl_mgc_mgsexp); @@ -2093,7 +2084,7 @@ restart: * FULL or closed */ ptlrpc_pinger_force(imp); - lwi = LWI_TIMEOUT(secs, NULL, NULL); + lwi = LWI_TIMEOUT(timeout, NULL, NULL); l_wait_event(imp->imp_recovery_waitq, !mgc_import_in_recovery(imp), &lwi); @@ -2102,6 +2093,11 @@ restart: goto restart; } else { mutex_lock(&cld->cld_lock); + /* unlock/lock mutex, so check stopping again */ + if (cld->cld_stopping) { + mutex_unlock(&cld->cld_lock); + RETURN(0); + } spin_lock(&config_list_lock); cld->cld_lostlock = 1; spin_unlock(&config_list_lock); @@ -2147,6 +2143,12 @@ restart: CERROR("Can't drop cfg lock: %d\n", rcl); } + /* requeue nodemap lock immediately if transfer was interrupted */ + if (cld_is_nodemap(cld) && rc == -EAGAIN) { + mgc_requeue_add(cld); + rc = 0; + } + RETURN(rc); } @@ -2205,11 +2207,6 @@ static int mgc_process_config(struct obd_device *obd, size_t len, void *buf) break; } - /* COMPAT_146 */ - /* FIXME only set this for old logs! Right now this forces - us to always skip the "inside markers" check */ - cld->cld_cfg.cfg_flags |= CFG_F_COMPAT146; - rc = mgc_process_log(obd, cld); if (rc == 0 && cld->cld_recover != NULL) { if (OCD_HAS_FLAG(&obd->u.cli.cl_import-> @@ -2280,7 +2277,7 @@ static struct obd_ops mgc_obd_ops = { static int __init mgc_init(void) { - return class_register_type(&mgc_obd_ops, NULL, true, NULL, + return class_register_type(&mgc_obd_ops, NULL, false, NULL, LUSTRE_MGC_NAME, NULL); }