From: Mikhail Pershin Date: Tue, 8 Oct 2024 13:10:37 +0000 (+0300) Subject: LU-18352 mgc: explicitly create sptlrpc local copy X-Git-Tag: 2.16.51~189 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=refs%2Fchanges%2F09%2F56609%2F10;p=fs%2Flustre-release.git LU-18352 mgc: explicitly create sptlrpc local copy Sptlrpc config has single instance per MGC and is shared by targets, hence it is processed only once and local copy of it is created also only during first processing for the first target doing that. All other targets just find config in memory. Therefore a local copy creation for other targets need to be done explicitly when already processed config is found. Patch introduces mgc_get_local_copy() which does just llog copy from MGS if possible for a target finding already processed sptlrpc config Test-Parameters: testlist=sanity-sec env=ONLY=70,SHARED_KEY=true Signed-off-by: Mikhail Pershin Change-Id: I922f92a950b9a07172f36f42b94da854c7702a80 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56609 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Sebastien Buisson Reviewed-by: Aurelien Degremont Reviewed-by: Oleg Drokin --- diff --git a/lustre/mgc/mgc_internal.h b/lustre/mgc/mgc_internal.h index d1c021a..723d49d 100644 --- a/lustre/mgc/mgc_internal.h +++ b/lustre/mgc/mgc_internal.h @@ -27,6 +27,11 @@ int mgc_tunables_init(struct obd_device *obd); int lprocfs_mgc_rd_ir_state(struct seq_file *m, void *data); int mgc_process_log(struct obd_device *mgc, struct config_llog_data *cld); +int mgc_enqueue(struct obd_export *exp, enum ldlm_type type, + union ldlm_policy_data *policy, enum ldlm_mode mode, + __u64 *flags, ldlm_glimpse_callback glimpse_callback, + void *data, __u32 lvb_len, void *lvb_swabber, + struct lustre_handle *lockh); /* this timeout represents how many seconds MGC should wait before * requeue config and recover lock to the MGS. We need to randomize this @@ -76,9 +81,11 @@ int mgc_process_server_cfg_log(struct lu_env *env, struct llog_ctxt **ctxt, struct lustre_sb_info *lsi, struct obd_device *mgc, struct config_llog_data *cld, - int local_only); + int local_only, bool copy_only); int mgc_process_config_server(struct obd_device *obd, size_t len, void *buf); int mgc_barrier_glimpse_ast(struct ldlm_lock *lock, void *data); +int mgc_get_local_copy(struct obd_device *mgc, struct super_block *sb, + struct config_llog_data *cld); #else /* HAVE_SERVER_SUPPORT */ #define mgc_barrier_glimpse_ast NULL #endif /* HAVE_SERVER_SUPPORT */ diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index 8d9d03d..e3f0182 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -278,9 +278,8 @@ config_log_find_or_add(struct obd_device *obd, char *logname, struct config_llog_instance lcfg = *cfg; struct config_llog_data *cld; - /* Note class_config_llog_handler() depends on getting "obd" back */ - /* for sptlrpc, sb is only provided to be able to make a local copy, - * not for the instance + /* Sptlrpc config is common for all targets, so it uses MGC OBD + * as instance, but have also 'sb' provided to create local copy */ if (sb && type != MGS_CFG_T_SPTLRPC) lcfg.cfg_instance = ll_get_cfg_instance(sb); @@ -288,8 +287,16 @@ config_log_find_or_add(struct obd_device *obd, char *logname, lcfg.cfg_instance = (unsigned long)obd; cld = config_log_find(logname, &lcfg); - if (unlikely(cld != NULL)) + if (unlikely(cld)) { +#ifdef HAVE_SERVER_SUPPORT + /* If a target finds existing sptlrpc config + * then create its local copy explicitly + */ + if (sb && cld_is_sptlrpc(cld)) + mgc_get_local_copy(obd, sb, cld); +#endif return cld; + } return do_config_log_add(obd, logname, type, &lcfg, sb); } @@ -675,7 +682,6 @@ static int mgc_requeue_thread(void *data) config_log_get(cld); cld->cld_lostlock = 0; spin_unlock(&config_list_lock); - config_log_put(cld_prev); cld_prev = cld; @@ -944,11 +950,11 @@ static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, } /* Take a config lock so we can get cancel notifications */ -static int mgc_enqueue(struct obd_export *exp, enum ldlm_type type, - union ldlm_policy_data *policy, enum ldlm_mode mode, - __u64 *flags, ldlm_glimpse_callback glimpse_callback, - void *data, __u32 lvb_len, void *lvb_swabber, - struct lustre_handle *lockh) +int mgc_enqueue(struct obd_export *exp, enum ldlm_type type, + union ldlm_policy_data *policy, enum ldlm_mode mode, + __u64 *flags, ldlm_glimpse_callback glimpse_callback, + void *data, __u32 lvb_len, void *lvb_swabber, + struct lustre_handle *lockh) { struct config_llog_data *cld = (struct config_llog_data *)data; struct ldlm_enqueue_info einfo = { @@ -982,9 +988,8 @@ static int mgc_enqueue(struct obd_export *exp, enum ldlm_type type, req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER, 0); ptlrpc_request_set_replen(req); - /* check if this is server or client */ - if (cld->cld_cfg.cfg_sb && - IS_SERVER(s2lsi(cld->cld_cfg.cfg_sb))) + /* check for server by local storage set via server_mgc_set_fs() */ + if (exp->exp_obd->u.cli.cl_mgc_los) short_limit = 1; /* Limit how long we will wait for the enqueue to complete */ @@ -1634,9 +1639,7 @@ static int mgc_process_cfg_log(struct obd_device *mgc, #endif if (cld->cld_cfg.cfg_sb) lsi = s2lsi(cld->cld_cfg.cfg_sb); - /* sptlrpc llog must not keep ref to sb, - * it was just needed to get lsi - */ + /* sptlrpc is shared config and shouldn't leave sb in cld */ if (cld_is_sptlrpc(cld)) cld->cld_cfg.cfg_sb = NULL; @@ -1658,7 +1661,7 @@ static int mgc_process_cfg_log(struct obd_device *mgc, if (lsi && mgc->u.cli.cl_mgc_los) { if (!IS_MGS(lsi)) rc = mgc_process_server_cfg_log(env, &ctxt, lsi, mgc, - cld, !local_only); + cld, !local_only, 0); } else if (local_only) { rc = -EIO; } @@ -1843,11 +1846,8 @@ restart: } /* Now drop the lock so MGS can revoke it */ - if (!rcl) { - rcl = mgc_cancel(mgc->u.cli.cl_mgc_mgsexp, LCK_CR, &lockh); - if (rcl) - CERROR("Can't drop cfg lock: %d\n", rcl); - } + if (!rcl) + mgc_cancel(mgc->u.cli.cl_mgc_mgsexp, LCK_CR, &lockh); mutex_unlock(&cld->cld_lock); /* requeue nodemap lock immediately if transfer was interrupted */ diff --git a/lustre/mgc/mgc_request_server.c b/lustre/mgc/mgc_request_server.c index 7104cd5..dbe3657 100644 --- a/lustre/mgc/mgc_request_server.c +++ b/lustre/mgc/mgc_request_server.c @@ -567,7 +567,8 @@ out_free: int mgc_process_server_cfg_log(struct lu_env *env, struct llog_ctxt **ctxt, struct lustre_sb_info *lsi, struct obd_device *mgc, - struct config_llog_data *cld, int mgslock) + struct config_llog_data *cld, int mgslock, + bool copy_only) { struct llog_ctxt *lctxt = llog_get_context(mgc, LLOG_CONFIG_ORIG_CTXT); struct client_obd *cli = &mgc->u.cli; @@ -592,6 +593,8 @@ int mgc_process_server_cfg_log(struct lu_env *env, struct llog_ctxt **ctxt, if (!rc) lsi->lsi_flags &= ~LDD_F_NO_LOCAL_LOGS; } + if (copy_only) + GOTO(out_pop, rc); if (!mgslock) { if (unlikely(lsi->lsi_flags & LDD_F_NO_LOCAL_LOGS)) { @@ -629,3 +632,69 @@ out_pop: __llog_ctxt_put(env, lctxt); return rc; } + +int mgc_get_local_copy(struct obd_device *mgc, struct super_block *sb, + struct config_llog_data *cld) +{ + struct llog_ctxt *ctxt; + struct lustre_sb_info *lsi = s2lsi(sb); + struct lu_env *env; + struct lustre_handle lockh = { .cookie = 0, }; + __u64 flags = 0; + int rc; + + ENTRY; + + LASSERT(cld); + if (!mgc->u.cli.cl_mgc_los || IS_MGS(lsi)) + return 0; + + mutex_lock(&cld->cld_lock); + if (!cld->cld_processed) + GOTO(out_mutex, rc = -ENODATA); + + if (cld->cld_stopping) + GOTO(out_mutex, rc = -ENODEV); + + CDEBUG(D_MGC, "Get log %s-%016lx local copy\n", cld->cld_logname, + cld->cld_cfg.cfg_instance); + + if (ldlm_lock_addref_try(&cld->cld_lockh, LCK_CR)) { + rc = mgc_enqueue(mgc->u.cli.cl_mgc_mgsexp, LDLM_PLAIN, NULL, + LCK_CR, &flags, NULL, cld, 0, NULL, &lockh); + if (rc) + GOTO(out_mutex, rc); + } + + OBD_ALLOC_PTR(env); + if (!env) + GOTO(out_mutex, rc = -ENOMEM); + + rc = lu_env_init(env, LCT_MG_THREAD); + if (rc) + GOTO(out_free, rc); + + ctxt = llog_get_context(mgc, LLOG_CONFIG_REPL_CTXT); + LASSERT(ctxt); + + rc = mgc_process_server_cfg_log(env, &ctxt, lsi, mgc, cld, 1, true); + if (rc) + CDEBUG(D_MGC, "%s: can't save local copy of '%s': rc = %d.\n", + mgc->obd_name, cld->cld_logname, rc); + + /* release lock */ + if (lustre_handle_is_used(&lockh)) + ldlm_lock_decref_and_cancel(&lockh, LCK_CR); + else + ldlm_lock_decref(&cld->cld_lockh, LCK_CR); + + EXIT; + + __llog_ctxt_put(env, ctxt); + lu_env_fini(env); +out_free: + OBD_FREE_PTR(env); +out_mutex: + mutex_unlock(&cld->cld_lock); + return rc; +} diff --git a/lustre/tests/sanity-sec.sh b/lustre/tests/sanity-sec.sh index c8a6fbc..5005cc4 100755 --- a/lustre/tests/sanity-sec.sh +++ b/lustre/tests/sanity-sec.sh @@ -6459,9 +6459,20 @@ test_70() { $param_copy' $(ostdevname 1)" do_facet ost1 "llog_reader $param_copy" | grep -vE "SKIP|marker" | grep "^#" > $param_copy - cat $param_copy + cat -A $param_copy cmp -bl $param_mgs $param_copy || - error "sptlrpc llog differ in oss" + error "sptlrpc llog differ at ost1" + rm -f $param_copy + + do_facet ost2 "sync ; sync" + do_facet ost2 "$DEBUGFS -c -R 'ls CONFIGS/' $(ostdevname 2)" + do_facet ost2 "$DEBUGFS -c -R 'dump CONFIGS/$FSNAME-sptlrpc \ + $param_copy' $(ostdevname 2)" + do_facet ost2 "llog_reader $param_copy" | grep -vE "SKIP|marker" | + grep "^#" > $param_copy + cat -A $param_copy + cmp -bl $param_mgs $param_copy || + error "sptlrpc llog differ at ost2" } run_test 70 "targets have local copy of sptlrpc llog"