From 6a6e4ee20fe5aaad4beab5477e1c7d05e4e702e2 Mon Sep 17 00:00:00 2001 From: Mikhail Pershin Date: Sat, 14 Oct 2023 00:28:58 +0300 Subject: [PATCH] LU-17184 mgc: remove damaged local configs If local config llog is damaged it can't be removed and prevents target from mounting. This happens because mgc_llog_local_copy() uses llog_erase() to remove llogs which can't do the job if llog header is damaged. Patch changes are: - llog_erase() to don't initialize header but just destroy llog file - mgc_llog_local_copy() to don't exit on backup to temp file but continue with remote llog copying anyway - conf-sanity test_151 is added to check that target can mount with damaged local config Signed-off-by: Mikhail Pershin Change-Id: I637749c38fd5ed03bdac5ca1cd60196f724ab0d1 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/52697 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Alexander Boyko Reviewed-by: Oleg Drokin --- lustre/mgc/mgc_request_server.c | 24 +++++++++++++++--------- lustre/obdclass/llog.c | 12 ++++-------- lustre/tests/conf-sanity.sh | 28 ++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 17 deletions(-) diff --git a/lustre/mgc/mgc_request_server.c b/lustre/mgc/mgc_request_server.c index 15b5fa6..6eadd9d 100644 --- a/lustre/mgc/mgc_request_server.c +++ b/lustre/mgc/mgc_request_server.c @@ -528,32 +528,38 @@ static int mgc_llog_local_copy(const struct lu_env *env, ENTRY; /* - * - copy it to backup using llog_backup() + * NB: mgc_process_server_cfg_log() always needs valid local copy + * and works only on it, so that defines the process: + * - copy current local copy to temp_log using llog_backup() * - copy remote llog to logname using llog_backup() - * - if failed then move bakup to logname again + * - if failed then restore logname from backup */ + OBD_ALLOC(temp_log, strlen(logname) + 2); if (!temp_log) RETURN(-ENOMEM); sprintf(temp_log, "%sT", logname); - /* make a copy of local llog at first */ + /* copy current local llog to temp_log */ rc = llog_backup(env, obd, lctxt, lctxt, logname, temp_log); if (rc < 0 && rc != -ENOENT) - GOTO(out, rc); - /* copy remote llog to the local copy */ + CWARN("%s: failed to backup local config %s: rc = %d\n", + obd->obd_name, logname, rc); + + /* build new local llog */ rc = llog_backup(env, obd, rctxt, lctxt, logname, logname); if (rc == -ENOENT) { - /* no remote llog, delete local one too */ + CWARN("%s: no remote llog for %s, check MGS config\n", + obd->obd_name, logname); llog_erase(env, lctxt, NULL, logname); } else if (rc < 0) { /* error during backup, get local one back from the copy */ - llog_backup(env, obd, lctxt, lctxt, temp_log, logname); -out: - CERROR("%s: failed to copy remote log %s: rc = %d\n", + CWARN("%s: failed to copy new config %s: rc = %d\n", obd->obd_name, logname, rc); + llog_backup(env, obd, lctxt, lctxt, temp_log, logname); } llog_erase(env, lctxt, NULL, temp_log); + OBD_FREE(temp_log, strlen(logname) + 2); return rc; } diff --git a/lustre/obdclass/llog.c b/lustre/obdclass/llog.c index f8197e5..61e836e 100644 --- a/lustre/obdclass/llog.c +++ b/lustre/obdclass/llog.c @@ -1288,8 +1288,8 @@ EXPORT_SYMBOL(llog_open_create); int llog_erase(const struct lu_env *env, struct llog_ctxt *ctxt, struct llog_logid *logid, char *name) { - struct llog_handle *handle; - int rc = 0, rc2; + struct llog_handle *handle; + int rc; ENTRY; @@ -1301,13 +1301,9 @@ int llog_erase(const struct lu_env *env, struct llog_ctxt *ctxt, if (rc < 0) RETURN(rc); - rc = llog_init_handle(env, handle, LLOG_F_IS_PLAIN, NULL); - if (rc == 0) - rc = llog_destroy(env, handle); + rc = llog_destroy(env, handle); + llog_close(env, handle); - rc2 = llog_close(env, handle); - if (rc == 0) - rc = rc2; RETURN(rc); } EXPORT_SYMBOL(llog_erase); diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 0c228d6..b90be4b 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -10780,6 +10780,34 @@ test_150() { } run_test 150 "test setting max_cached_mb to a %" +test_151() { + (( MDS1_VERSION >= $(version_code 2.15.58) )) || + skip "need MDS version at least 2.15.58" + [[ "$ost1_FSTYPE" == ldiskfs ]] || skip "ldiskfs only test" + + cleanup + if ! combined_mgs_mds ; then + stop mgs + fi + + echo "Damage ost1 local config log" + do_facet ost1 "$DEBUGFS -w -R 'punch CONFIGS/$FSNAME-OST0000 0 1' \ + $(ostdevname 1) || return \$?" || + error "do_facet ost1 failed with $?" + + # expect OST to fail mount with no MGS and bad local config + start_ost && error "OST start should fail" + + if ! combined_mgs_mds ; then + start_mgs + fi + start_mds || error "MDS start failed" + # now it should start with MGS config + start_ost || error "OST start failed" + reformat_and_config +} +run_test 151 "damaged local config doesn't prevent mounting" + # # (This was sanity/802a) # -- 1.8.3.1