From cde3df1cfe121eba8796dddc37d6501b0bcd89aa Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Thu, 22 May 2025 21:32:26 -0600 Subject: [PATCH] LU-19046 mgc: mgc_fs_setup() should wait interruptibly When a target mounts, it fetches a copy of its config log from the MGS to store in the local filesystem. However, the MGC can currently only fetch the config log for one target filesystem at a time. This should be improved in a separate patch. If the MGS is inaccessible, or there is a problem during setup, the server will wait for it while holding cl_mgc_mutex. Other targets on the same server will be unable to mount, and block on cl_mgc_mutex, possibly dumping a stack trace like: INFO: task mount.lustre:93138 blocked for more than 90 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" to disable this task:mount.lustre state:D stack:0 pid:93138 ppid:93135 Call Trace: __schedule+0x2d1/0x870 schedule+0x55/0xf0 schedule_preempt_disabled+0xa/0x10 __mutex_lock.isra.11+0x349/0x420 mgc_fs_setup.isra.12+0x65/0x7a0 [mgc] mgc_set_info_async+0x99f/0xb30 [mgc] server_start_targets+0x452/0x2c30 [obdclass] server_fill_super+0x94e/0x10a0 [obdclass] lustre_fill_super+0x388/0x3d0 [lustre] mount_nodev+0x49/0xa0 legacy_get_tree+0x27/0x50 vfs_get_tree+0x25/0xc0 do_mount+0x2e9/0x950 ksys_mount+0xbe/0xe0 Use wait_event_interruptible() in mgc_fs_setup() so the server's mount thread can be interrupted and killed. This does not fix the reason for the server to be blocked, but it does allow it to be killed. Rename mgc_fs_cleanup() to mgc_fs_clear() so it is not confused with actually cleaning up the MGC. Avoid printing an error if the sptlrpc log is not available. This is common for most filesystems, and is not an error. Signed-off-by: Andreas Dilger Change-Id: I0bafa5dae0eadecb112efaf61f8bcf7ea8c4c296 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/59396 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Mikhail Pershin Reviewed-by: Timothy Day Reviewed-by: Oleg Drokin --- lustre/mgc/mgc_request_server.c | 30 ++++++++++++++++++++++-------- lustre/target/tgt_mount.c | 7 +++++-- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/lustre/mgc/mgc_request_server.c b/lustre/mgc/mgc_request_server.c index 456b54a..ef6f592 100644 --- a/lustre/mgc/mgc_request_server.c +++ b/lustre/mgc/mgc_request_server.c @@ -65,6 +65,9 @@ static int mgc_local_llog_fini(const struct lu_env *env, RETURN(0); } +/* Configure the MGC to fetch config logs from the MGS to a local + * filesystem device during mount. + */ static int mgc_fs_setup(const struct lu_env *env, struct obd_device *obd, struct super_block *sb) { @@ -78,8 +81,14 @@ static int mgc_fs_setup(const struct lu_env *env, struct obd_device *obd, LASSERT(lsi); LASSERT(lsi->lsi_dt_dev); - /* The mgc fs exclusion mutex. Only one fs can be setup at a time. */ - mutex_lock(&cli->cl_mgc_mutex); + /* MGC can currently only fetch config logs for one fs at a time. + * Allow this mount to be killed if it is hung for some reason. + */ + rc = mutex_lock_interruptible(&cli->cl_mgc_mutex); + CDEBUG(D_MGC, "%s: cl_mgc_mutex %s for %s: rc = %d\n", obd->obd_name, + lsi->lsi_osd_obdname, rc ? "interrupted" : "locked", rc); + if (rc) + RETURN(rc); /* Setup the configs dir */ fid.f_seq = FID_SEQ_LOCAL_NAME; @@ -114,11 +123,11 @@ static int mgc_fs_setup(const struct lu_env *env, struct obd_device *obd, GOTO(out_llog, rc); /* We take an obd ref to insure that we can't get to mgc_cleanup - * without calling mgc_fs_cleanup first. + * without calling mgc_fs_clear() first. */ class_incref(obd, "mgc_fs", obd); - /* We keep the cl_mgc_sem until mgc_fs_cleanup */ + /* We hold the cl_mgc_mutex until mgc_fs_clear() is called */ EXIT; out_llog: if (rc) { @@ -130,12 +139,15 @@ out_los: local_oid_storage_fini(env, cli->cl_mgc_los); out_mutex: cli->cl_mgc_los = NULL; + CDEBUG(D_MGC, "%s: cl_mgc_mutex unlock for %s: rc = %d\n", + obd->obd_name, lsi->lsi_osd_obdname, rc); mutex_unlock(&cli->cl_mgc_mutex); } return rc; } -static int mgc_fs_cleanup(const struct lu_env *env, struct obd_device *obd) +/* Unconfigure the MGC from fetching config logs to the local device */ +static int mgc_fs_clear(const struct lu_env *env, struct obd_device *obd) { struct client_obd *cli = &obd->u.cli; @@ -151,6 +163,7 @@ static int mgc_fs_cleanup(const struct lu_env *env, struct obd_device *obd) cli->cl_mgc_los = NULL; class_decref(obd, "mgc_fs", obd); + CDEBUG(D_MGC, "%s: cl_mgc_mutex unlock\n", obd->obd_name); mutex_unlock(&cli->cl_mgc_mutex); RETURN(0); @@ -266,7 +279,7 @@ int mgc_set_info_async_server(const struct lu_env *env, if (KEY_IS(KEY_CLEAR_FS)) { if (vallen != 0) RETURN(-EINVAL); - rc = mgc_fs_cleanup(env, exp->exp_obd); + rc = mgc_fs_clear(env, exp->exp_obd); RETURN(rc); } @@ -546,8 +559,9 @@ static int mgc_llog_local_copy(const struct lu_env *env, /* build new local llog */ rc = llog_backup(env, obd, rctxt, lctxt, logname, logname); if (rc == -ENOENT) { - CWARN("%s: no remote llog for %s, check MGS config\n", - obd->obd_name, logname); + CDEBUG_LIMIT(strstr(logname, "sptlrpc") ? D_MGC : D_WARNING, + "%s: no remote llog for %s, check MGS config\n", + obd->obd_name, logname); llog_erase(env, lctxt, NULL, logname); } else if (rc < 0) { /* error during backup, get local one back from the copy */ diff --git a/lustre/target/tgt_mount.c b/lustre/target/tgt_mount.c index 8861b6b..b1d8338 100644 --- a/lustre/target/tgt_mount.c +++ b/lustre/target/tgt_mount.c @@ -1456,8 +1456,11 @@ static int server_start_targets(struct super_block *sb) name_service = LUSTRE_OSS_NAME; } - /* make sure MDS/OSS is started */ - mutex_lock(&server_start_lock); + /* make sure MDS/OSS is started, but allow mount to be killed */ + rc = mutex_lock_interruptible(&server_start_lock); + if (rc) + RETURN(rc); + obd = class_name2obd(obd_name_service); if (!obd) { rc = lustre_start_simple(obd_name_service, name_service, -- 1.8.3.1