X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fmgc%2Fmgc_request.c;h=c30de4ae2027129a7dc8bb3b848c3a5fe5f3d62e;hb=0ad54d59777366fba8ee61eaaa27b3060c91782f;hp=c08ed96556bca5b9ffff3b7a2531b18577c6de87;hpb=1a7ff02c1fbb8e85ac2e8fa458ba3fb810a76ea4;p=fs%2Flustre-release.git diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index c08ed96..c30de4a 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -23,7 +23,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2016, Intel Corporation. + * Copyright (c) 2011, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -39,6 +39,7 @@ #include #include +#include #include #include @@ -117,7 +118,7 @@ int mgc_logname2resid(char *logname, struct ldlm_res_id *res_id, int type) EXPORT_SYMBOL(mgc_logname2resid); /********************** config llog list **********************/ -static struct list_head config_llog_list = LIST_HEAD_INIT(config_llog_list); +static LIST_HEAD(config_llog_list); static DEFINE_SPINLOCK(config_list_lock); /* protects config_llog_list */ /* Take a reference to a config log */ @@ -125,7 +126,7 @@ static int config_log_get(struct config_llog_data *cld) { ENTRY; atomic_inc(&cld->cld_refcount); - CDEBUG(D_INFO, "log %s refs %d\n", cld->cld_logname, + CDEBUG(D_INFO, "log %s (%p) refs %d\n", cld->cld_logname, cld, atomic_read(&cld->cld_refcount)); RETURN(0); } @@ -139,7 +140,7 @@ static void config_log_put(struct config_llog_data *cld) if (unlikely(!cld)) RETURN_EXIT; - CDEBUG(D_INFO, "log %s refs %d\n", cld->cld_logname, + CDEBUG(D_INFO, "log %s(%p) refs %d\n", cld->cld_logname, cld, atomic_read(&cld->cld_refcount)); LASSERT(atomic_read(&cld->cld_refcount) > 0); @@ -170,18 +171,18 @@ static struct config_llog_data *config_log_find(char *logname, struct config_llog_instance *cfg) { - struct config_llog_data *cld; - struct config_llog_data *found = NULL; - void * instance; - ENTRY; + struct config_llog_data *cld; + struct config_llog_data *found = NULL; + unsigned long cfg_instance; - LASSERT(logname != NULL); + ENTRY; + LASSERT(logname != NULL); - instance = cfg ? cfg->cfg_instance : NULL; + cfg_instance = cfg ? cfg->cfg_instance : 0; spin_lock(&config_list_lock); list_for_each_entry(cld, &config_llog_list, cld_list_chain) { - /* check if instance equals */ - if (instance != cld->cld_cfg.cfg_instance) + /* check if cfg_instance is the one we want */ + if (cfg_instance != cld->cld_cfg.cfg_instance) continue; /* instance may be NULL, should check name */ @@ -207,8 +208,8 @@ struct config_llog_data *do_config_log_add(struct obd_device *obd, ENTRY; - CDEBUG(D_MGC, "do adding config log %s:%p\n", logname, - cfg ? cfg->cfg_instance : NULL); + CDEBUG(D_MGC, "do adding config log %s-%016lx\n", logname, + cfg ? cfg->cfg_instance : 0); OBD_ALLOC(cld, sizeof(*cld) + strlen(logname) + 1); if (!cld) @@ -235,10 +236,8 @@ struct config_llog_data *do_config_log_add(struct obd_device *obd, /* Keep the mgc around until we are done */ cld->cld_mgcexp = class_export_get(obd->obd_self_export); - if (cld_is_sptlrpc(cld)) { + if (cld_is_sptlrpc(cld)) sptlrpc_conf_log_start(logname); - cld->cld_cfg.cfg_obdname = obd->obd_name; - } spin_lock(&config_list_lock); list_add(&cld->cld_list_chain, &config_llog_list); @@ -255,50 +254,49 @@ struct config_llog_data *do_config_log_add(struct obd_device *obd, } static struct config_llog_data *config_recover_log_add(struct obd_device *obd, - char *fsname, - struct config_llog_instance *cfg, - struct super_block *sb) + char *fsname, + struct config_llog_instance *cfg, + struct super_block *sb) { - struct config_llog_instance lcfg = *cfg; - struct lustre_sb_info *lsi = s2lsi(sb); - struct config_llog_data *cld; - char logname[32]; + struct config_llog_instance lcfg = *cfg; + struct lustre_sb_info *lsi = s2lsi(sb); + struct config_llog_data *cld; + char logname[32]; if (IS_OST(lsi)) - return NULL; + return NULL; /* for osp-on-ost, see lustre_start_osp() */ if (IS_MDT(lsi) && lcfg.cfg_instance) return NULL; - /* we have to use different llog for clients and mdts for cmd - * where only clients are notified if one of cmd server restarts */ - LASSERT(strlen(fsname) < sizeof(logname) / 2); - strcpy(logname, fsname); + /* We have to use different llog for clients and MDTs for DNE, + * where only clients are notified if one of DNE server restarts. + */ + LASSERT(strlen(fsname) < sizeof(logname) / 2); + strncpy(logname, fsname, sizeof(logname)); if (IS_SERVER(lsi)) { /* mdt */ - LASSERT(lcfg.cfg_instance == NULL); - lcfg.cfg_instance = sb; - strcat(logname, "-mdtir"); - } else { - LASSERT(lcfg.cfg_instance != NULL); - strcat(logname, "-cliir"); - } + LASSERT(lcfg.cfg_instance == 0); + lcfg.cfg_instance = ll_get_cfg_instance(sb); + strncat(logname, "-mdtir", sizeof(logname)); + } else { + LASSERT(lcfg.cfg_instance != 0); + strncat(logname, "-cliir", sizeof(logname)); + } - cld = do_config_log_add(obd, logname, CONFIG_T_RECOVER, &lcfg, sb); - return cld; + cld = do_config_log_add(obd, logname, CONFIG_T_RECOVER, &lcfg, sb); + return cld; } static struct config_llog_data *config_log_find_or_add(struct obd_device *obd, char *logname, struct super_block *sb, int type, struct config_llog_instance *cfg) { - struct config_llog_instance lcfg = *cfg; - struct config_llog_data *cld; - - lcfg.cfg_instance = sb != NULL ? (void *)sb : (void *)obd; + struct config_llog_instance lcfg = *cfg; + struct config_llog_data *cld; - if (type == CONFIG_T_SPTLRPC) - lcfg.cfg_instance = NULL; + /* Note class_config_llog_handler() depends on getting "obd" back */ + lcfg.cfg_instance = sb ? ll_get_cfg_instance(sb) : (unsigned long)obd; cld = config_log_find(logname, &lcfg); if (unlikely(cld != NULL)) @@ -328,7 +326,8 @@ config_log_add(struct obd_device *obd, char *logname, bool locked = false; ENTRY; - CDEBUG(D_MGC, "adding config log %s:%p\n", logname, cfg->cfg_instance); + CDEBUG(D_MGC, "add config log %s-%016lx\n", logname, + cfg->cfg_instance); /* * for each regular log, the depended sptlrpc log name is @@ -450,13 +449,24 @@ out_sptlrpc: DEFINE_MUTEX(llog_process_lock); +static inline void config_mark_cld_stop_nolock(struct config_llog_data *cld) +{ + ENTRY; + + spin_lock(&config_list_lock); + cld->cld_stopping = 1; + spin_unlock(&config_list_lock); + + CDEBUG(D_INFO, "lockh %#llx\n", cld->cld_lockh.cookie); + if (!ldlm_lock_addref_try(&cld->cld_lockh, LCK_CR)) + ldlm_lock_decref_and_cancel(&cld->cld_lockh, LCK_CR); +} + static inline void config_mark_cld_stop(struct config_llog_data *cld) { if (cld) { mutex_lock(&cld->cld_lock); - spin_lock(&config_list_lock); - cld->cld_stopping = 1; - spin_unlock(&config_list_lock); + config_mark_cld_stop_nolock(cld); mutex_unlock(&cld->cld_lock); } } @@ -494,10 +504,6 @@ static int config_log_end(char *logname, struct config_llog_instance *cfg) RETURN(rc); } - spin_lock(&config_list_lock); - cld->cld_stopping = 1; - spin_unlock(&config_list_lock); - cld_recover = cld->cld_recover; cld->cld_recover = NULL; cld_params = cld->cld_params; @@ -508,24 +514,20 @@ static int config_log_end(char *logname, struct config_llog_instance *cfg) cld->cld_barrier = NULL; cld_sptlrpc = cld->cld_sptlrpc; cld->cld_sptlrpc = NULL; + + config_mark_cld_stop_nolock(cld); mutex_unlock(&cld->cld_lock); config_mark_cld_stop(cld_recover); - config_log_put(cld_recover); - config_mark_cld_stop(cld_params); - config_log_put(cld_params); + config_mark_cld_stop(cld_barrier); + config_mark_cld_stop(cld_sptlrpc); + config_log_put(cld_params); + config_log_put(cld_recover); /* don't set cld_stopping on nm lock as other targets may be active */ config_log_put(cld_nodemap); - - if (cld_barrier) { - mutex_lock(&cld_barrier->cld_lock); - cld_barrier->cld_stopping = 1; - mutex_unlock(&cld_barrier->cld_lock); - config_log_put(cld_barrier); - } - + config_log_put(cld_barrier); config_log_put(cld_sptlrpc); /* drop the ref from the find */ @@ -538,16 +540,15 @@ static int config_log_end(char *logname, struct config_llog_instance *cfg) RETURN(rc); } -#ifdef CONFIG_PROC_FS int lprocfs_mgc_rd_ir_state(struct seq_file *m, void *data) { struct obd_device *obd = data; struct obd_import *imp; struct obd_connect_data *ocd; struct config_llog_data *cld; - ENTRY; - LASSERT(obd != NULL); + ENTRY; + LASSERT(obd); LPROCFS_CLIMP_CHECK(obd); imp = obd->u.cli.cl_import; ocd = &imp->imp_connect_data; @@ -569,7 +570,6 @@ int lprocfs_mgc_rd_ir_state(struct seq_file *m, void *data) LPROCFS_CLIMP_EXIT(obd); RETURN(0); } -#endif /* reenqueue any lost locks */ #define RQ_RUNNING 0x1 @@ -615,7 +615,7 @@ static void do_requeue(struct config_llog_data *cld) * in order to not flood the MGS. */ #define MGC_TIMEOUT_MIN_SECONDS 5 -#define MGC_TIMEOUT_RAND_CENTISEC 0x1ff /* ~500 */ +#define MGC_TIMEOUT_RAND_CENTISEC 500 static int mgc_requeue_thread(void *data) { @@ -631,7 +631,7 @@ static int mgc_requeue_thread(void *data) while (!(rq_state & RQ_STOP)) { struct l_wait_info lwi; struct config_llog_data *cld, *cld_prev; - int rand = cfs_rand() & MGC_TIMEOUT_RAND_CENTISEC; + int rand = prandom_u32_max(MGC_TIMEOUT_RAND_CENTISEC); int to; /* Any new or requeued lostlocks will change the state */ @@ -646,22 +646,21 @@ static int mgc_requeue_thread(void *data) /* Always wait a few seconds to allow the server who caused the lock revocation to finish its setup, plus some random so everyone doesn't try to reconnect at once. */ - to = msecs_to_jiffies(MGC_TIMEOUT_MIN_SECONDS * MSEC_PER_SEC); + to = cfs_time_seconds(MGC_TIMEOUT_MIN_SECONDS * 100 + rand); /* rand is centi-seconds */ - to += msecs_to_jiffies(rand * MSEC_PER_SEC / 100); - lwi = LWI_TIMEOUT(to, NULL, NULL); + lwi = LWI_TIMEOUT(to / 100, NULL, NULL); l_wait_event(rq_waitq, rq_state & (RQ_STOP | RQ_PRECLEANUP), &lwi); - /* - * iterate & processing through the list. for each cld, process - * its depending sptlrpc cld firstly (if any) and then itself. - * - * it's guaranteed any item in the list must have - * reference > 0; and if cld_lostlock is set, at - * least one reference is taken by the previous enqueue. - */ - cld_prev = NULL; + /* + * iterate & processing through the list. for each cld, process + * its depending sptlrpc cld firstly (if any) and then itself. + * + * it's guaranteed any item in the list must have + * reference > 0; and if cld_lostlock is set, at + * least one reference is taken by the previous enqueue. + */ + cld_prev = NULL; spin_lock(&config_list_lock); rq_state &= ~RQ_PRECLEANUP; @@ -691,9 +690,7 @@ static int mgc_requeue_thread(void *data) config_log_put(cld_prev); /* Wait a bit to see if anyone else needs a requeue */ - lwi = (struct l_wait_info) { 0 }; - l_wait_event(rq_waitq, rq_state & (RQ_NOW | RQ_STOP), - &lwi); + wait_event_idle(rq_waitq, rq_state & (RQ_NOW | RQ_STOP)); spin_lock(&config_list_lock); } @@ -719,9 +716,14 @@ static void mgc_requeue_add(struct config_llog_data *cld) cld->cld_stopping, rq_state); LASSERT(atomic_read(&cld->cld_refcount) > 0); + /* lets cancel an existent lock to mark cld as "lostlock" */ + CDEBUG(D_INFO, "lockh %#llx\n", cld->cld_lockh.cookie); + if (!ldlm_lock_addref_try(&cld->cld_lockh, LCK_CR)) + ldlm_lock_decref_and_cancel(&cld->cld_lockh, LCK_CR); + mutex_lock(&cld->cld_lock); spin_lock(&config_list_lock); - if (!(rq_state & RQ_STOP) && !cld->cld_stopping && !cld->cld_lostlock) { + if (!(rq_state & RQ_STOP) && !cld->cld_stopping) { cld->cld_lostlock = 1; rq_state |= RQ_NOW; wakeup = true; @@ -936,7 +938,7 @@ static int mgc_cleanup(struct obd_device *obd) /* COMPAT_146 - old config logs may have added profiles we don't know about */ - if (obd->obd_type->typ_refcnt <= 1) + if (atomic_read(&obd->obd_type->typ_refcnt) <= 1) /* Only for the last mgc */ class_del_profiles(); @@ -967,11 +969,9 @@ static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) GOTO(err_cleanup, rc); } -#ifdef CONFIG_PROC_FS - obd->obd_vars = lprocfs_mgc_obd_vars; - lprocfs_obd_setup(obd, true); -#endif - sptlrpc_lprocfs_cliobd_attach(obd); + rc = mgc_tunables_init(obd); + if (rc) + GOTO(err_sysfs, rc); if (atomic_inc_return(&mgc_count) == 1) { rq_state = 0; @@ -984,7 +984,7 @@ static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) CERROR("%s: cannot start requeue thread: rc = %d; " "no more log updates\n", obd->obd_name, rc); - GOTO(err_cleanup, rc); + GOTO(err_sysfs, rc); } /* rc is the task_struct pointer of mgc_requeue_thread. */ rc = 0; @@ -993,6 +993,8 @@ static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) RETURN(rc); +err_sysfs: + lprocfs_obd_cleanup(obd); err_cleanup: client_obd_cleanup(obd); err_decref: @@ -1033,6 +1035,7 @@ static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, LASSERT(atomic_read(&cld->cld_refcount) > 0); lock->l_ast_data = NULL; + cld->cld_lockh.cookie = 0; /* Are we done with this log? */ if (cld->cld_stopping) { CDEBUG(D_MGC, "log %s: stopping, won't requeue\n", @@ -1409,34 +1412,35 @@ static int mgc_apply_recover_logs(struct obd_device *mgc, __u64 max_version, void *data, int datalen, bool mne_swab) { - struct config_llog_instance *cfg = &cld->cld_cfg; - struct lustre_sb_info *lsi = s2lsi(cfg->cfg_sb); - struct mgs_nidtbl_entry *entry; - struct lustre_cfg *lcfg; - struct lustre_cfg_bufs bufs; - u64 prev_version = 0; - char *inst; - char *buf; - int bufsz; - int pos; - int rc = 0; - int off = 0; - ENTRY; + struct config_llog_instance *cfg = &cld->cld_cfg; + struct lustre_sb_info *lsi = s2lsi(cfg->cfg_sb); + struct mgs_nidtbl_entry *entry; + struct lustre_cfg *lcfg; + struct lustre_cfg_bufs bufs; + u64 prev_version = 0; + char *inst; + char *buf; + int bufsz; + int pos = 0; + int rc = 0; + int off = 0; - LASSERT(cfg->cfg_instance != NULL); - LASSERT(cfg->cfg_sb == cfg->cfg_instance); + ENTRY; + LASSERT(cfg->cfg_instance != 0); + LASSERT(ll_get_cfg_instance(cfg->cfg_sb) == cfg->cfg_instance); OBD_ALLOC(inst, PAGE_SIZE); if (inst == NULL) RETURN(-ENOMEM); if (!IS_SERVER(lsi)) { - pos = snprintf(inst, PAGE_SIZE, "%p", cfg->cfg_instance); + pos = snprintf(inst, PAGE_SIZE, "%016lx", cfg->cfg_instance); if (pos >= PAGE_SIZE) { OBD_FREE(inst, PAGE_SIZE); return -E2BIG; } - } else { +#ifdef HAVE_SERVER_SUPPORT + } else { LASSERT(IS_MDT(lsi)); rc = server_name2svname(lsi->lsi_svname, inst, NULL, PAGE_SIZE); @@ -1445,7 +1449,8 @@ static int mgc_apply_recover_logs(struct obd_device *mgc, RETURN(-EINVAL); } pos = strlen(inst); - } +#endif /* HAVE_SERVER_SUPPORT */ + } ++pos; buf = inst + pos; @@ -1753,15 +1758,8 @@ again: #ifdef HAVE_SERVER_SUPPORT /* config changed since first read RPC */ if (cld_is_nodemap(cld) && config_read_offset == 0) { - recent_nodemap = NULL; - nodemap_config_dealloc(new_config); - new_config = NULL; - CDEBUG(D_INFO, "nodemap config changed in transit, retrying\n"); - - /* setting eof to false, we request config again */ - eof = false; - GOTO(out, rc = 0); + GOTO(out, rc = -EAGAIN); } #endif if (!eof) @@ -1769,13 +1767,7 @@ again: GOTO(out, rc); } - mne_swab = !!ptlrpc_rep_need_swab(req); -#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0) - /* This import flag means the server did an extra swab of IR MNE - * records (fixed in LU-1252), reverse it here if needed. LU-1644 */ - if (unlikely(req->rq_import->imp_need_mne_swab)) - mne_swab = !mne_swab; -#endif + mne_swab = ptlrpc_rep_need_swab(req); /* When a nodemap config is received, we build a new nodemap config, * with new nodemap structs. We keep track of the most recently added @@ -1986,9 +1978,7 @@ static int mgc_process_cfg_log(struct obd_device *mgc, &cld->cld_cfg); /* - * update settings on existing OBDs. doing it inside - * of llog_process_lock so no device is attaching/detaching - * in parallel. + * update settings on existing OBDs. * the logname must be -sptlrpc */ if (rc == 0 && cld_is_sptlrpc(cld)) @@ -2067,12 +2057,12 @@ restart: mutex_lock(&cld->cld_lock); if (cld->cld_stopping) { mutex_unlock(&cld->cld_lock); - RETURN(0); - } + RETURN(0); + } - OBD_FAIL_TIMEOUT(OBD_FAIL_MGC_PAUSE_PROCESS_LOG, 20); + OBD_FAIL_TIMEOUT(OBD_FAIL_MGC_PAUSE_PROCESS_LOG, 20); - CDEBUG(D_MGC, "Process log %s:%p from %d\n", cld->cld_logname, + CDEBUG(D_MGC, "Process log %s-%016lx from %d\n", cld->cld_logname, cld->cld_cfg.cfg_instance, cld->cld_cfg.cfg_last_idx + 1); /* Get the cfg lock on the llog */ @@ -2084,15 +2074,18 @@ restart: /* Get the cld, it will be released in mgc_blocking_ast. */ config_log_get(cld); rc = ldlm_lock_set_data(&lockh, (void *)cld); + LASSERT(!lustre_handle_is_used(&cld->cld_lockh)); LASSERT(rc == 0); + cld->cld_lockh = lockh; } else { CDEBUG(D_MGC, "Can't get cfg lock: %d\n", rcl); + cld->cld_lockh.cookie = 0; if (rcl == -ESHUTDOWN && atomic_read(&mgc->u.cli.cl_mgc_refcount) > 0 && !retry) { struct obd_import *imp; struct l_wait_info lwi; - int secs = cfs_time_seconds(obd_timeout); + long timeout = cfs_time_seconds(obd_timeout); mutex_unlock(&cld->cld_lock); imp = class_exp2cliimp(mgc->u.cli.cl_mgc_mgsexp); @@ -2105,7 +2098,7 @@ restart: * FULL or closed */ ptlrpc_pinger_force(imp); - lwi = LWI_TIMEOUT(secs, NULL, NULL); + lwi = LWI_TIMEOUT(timeout, NULL, NULL); l_wait_event(imp->imp_recovery_waitq, !mgc_import_in_recovery(imp), &lwi); @@ -2114,6 +2107,11 @@ restart: goto restart; } else { mutex_lock(&cld->cld_lock); + /* unlock/lock mutex, so check stopping again */ + if (cld->cld_stopping) { + mutex_unlock(&cld->cld_lock); + RETURN(0); + } spin_lock(&config_list_lock); cld->cld_lostlock = 1; spin_unlock(&config_list_lock); @@ -2133,16 +2131,6 @@ restart: else if (cld_is_nodemap(cld)) rc = rcl; - if (cld_is_recover(cld) && rc) { - if (!rcl) { - CERROR("%s: recover log %s failed, not fatal: rc = %d\n", - mgc->obd_name, cld->cld_logname, rc); - spin_lock(&config_list_lock); - cld->cld_lostlock = 1; - spin_unlock(&config_list_lock); - } - rc = 0; /* this is not a fatal error for recover log */ - } } else if (!cld_is_barrier(cld)) { rc = mgc_process_cfg_log(mgc, cld, rcl != 0); } @@ -2150,14 +2138,23 @@ restart: CDEBUG(D_MGC, "%s: configuration from log '%s' %sed (%d).\n", mgc->obd_name, cld->cld_logname, rc ? "fail" : "succeed", rc); - mutex_unlock(&cld->cld_lock); - /* Now drop the lock so MGS can revoke it */ if (!rcl) { rcl = mgc_cancel(mgc->u.cli.cl_mgc_mgsexp, LCK_CR, &lockh); if (rcl) CERROR("Can't drop cfg lock: %d\n", rcl); } + mutex_unlock(&cld->cld_lock); + + /* requeue nodemap lock immediately if transfer was interrupted */ + if ((cld_is_nodemap(cld) && rc == -EAGAIN) || + (cld_is_recover(cld) && rc)) { + if (cld_is_recover(cld)) + CWARN("%s: IR log %s failed, not fatal: rc = %d\n", + mgc->obd_name, cld->cld_logname, rc); + mgc_requeue_add(cld); + rc = 0; + } RETURN(rc); } @@ -2217,11 +2214,6 @@ static int mgc_process_config(struct obd_device *obd, size_t len, void *buf) break; } - /* COMPAT_146 */ - /* FIXME only set this for old logs! Right now this forces - us to always skip the "inside markers" check */ - cld->cld_cfg.cfg_flags |= CFG_F_COMPAT146; - rc = mgc_process_log(obd, cld); if (rc == 0 && cld->cld_recover != NULL) { if (OCD_HAS_FLAG(&obd->u.cli.cl_import-> @@ -2292,7 +2284,7 @@ static struct obd_ops mgc_obd_ops = { static int __init mgc_init(void) { - return class_register_type(&mgc_obd_ops, NULL, true, NULL, + return class_register_type(&mgc_obd_ops, NULL, false, NULL, LUSTRE_MGC_NAME, NULL); }