From e26d7cc3992252e5fce5a51aee716f933b04c13a Mon Sep 17 00:00:00 2001 From: Sergey Cheremencev Date: Tue, 9 Feb 2021 01:26:46 +0300 Subject: [PATCH] LU-14399 hsm: process hsm_actions in coordinator Wait for mdd setup in a separate thread to don't block mount. The patch adds conf-sanity_131 to verify the fix. Change-Id: Ifd0e8969d7ed4f8944ab61ab0e0ebe2655bad003 Fixes: a558006b ("LU-13920 hsm: process hsm_actions only after mdd setup") HPE-bug-id: LUS-9750 Signed-off-by: Sergey Cheremencev Reviewed-on: https://review.whamcloud.com/41445 Tested-by: jenkins Reviewed-by: Alexander Boyko Tested-by: Maloo Reviewed-by: Artem Blagodarenko Reviewed-by: Oleg Drokin --- lustre/mdt/mdt_coordinator.c | 43 ++++++++++++++++++++++++++----------------- lustre/tests/conf-sanity.sh | 22 ++++++++++++++++++++++ 2 files changed, 48 insertions(+), 17 deletions(-) diff --git a/lustre/mdt/mdt_coordinator.c b/lustre/mdt/mdt_coordinator.c index 87296cb..767af80 100644 --- a/lustre/mdt/mdt_coordinator.c +++ b/lustre/mdt/mdt_coordinator.c @@ -549,7 +549,31 @@ static int set_cdt_state(struct coordinator *cdt, enum cdt_states new_state) return rc; } +static int mdt_hsm_pending_restore(struct mdt_thread_info *mti); +static void cdt_start_pending_restore(struct mdt_device *mdt, + struct coordinator *cdt) +{ + struct mdt_thread_info *cdt_mti; + unsigned int i = 0; + int rc; + + /* wait until MDD initialize hsm actions llog */ + while (!test_bit(MDT_FL_CFGLOG, &mdt->mdt_state) && i < obd_timeout) { + schedule_timeout_interruptible(cfs_time_seconds(1)); + i++; + } + if (!test_bit(MDT_FL_CFGLOG, &mdt->mdt_state)) + CWARN("%s: trying to init HSM before MDD\n", mdt_obd_name(mdt)); + + /* set up list of started restore requests */ + cdt_mti = lu_context_key_get(&cdt->cdt_env.le_ctx, &mdt_thread_key); + rc = mdt_hsm_pending_restore(cdt_mti); + if (rc) + CERROR("%s: cannot take the layout locks needed for registered restore: %d\n", + mdt_obd_name(mdt), rc); + +} /** * coordinator thread @@ -579,6 +603,7 @@ static int mdt_coordinator(void *data) /* Inform mdt_hsm_cdt_start(). */ wake_up(&cdt->cdt_waitq); + cdt_start_pending_restore(mdt, cdt); while (1) { int i; @@ -1103,7 +1128,6 @@ static int mdt_hsm_cdt_start(struct mdt_device *mdt) { struct coordinator *cdt = &mdt->mdt_coordinator; struct mdt_thread_info *cdt_mti; - unsigned int i = 0; int rc; void *ptr; struct task_struct *task; @@ -1135,28 +1159,13 @@ static int mdt_hsm_cdt_start(struct mdt_device *mdt) cdt->cdt_group_request_mask = (1UL << HSMA_RESTORE); cdt->cdt_other_request_mask = (1UL << HSMA_RESTORE); - /* wait until MDD initialize hsm actions llog */ - while (!test_bit(MDT_FL_CFGLOG, &mdt->mdt_state) && i < obd_timeout) { - schedule_timeout_interruptible(cfs_time_seconds(1)); - i++; - } - if (!test_bit(MDT_FL_CFGLOG, &mdt->mdt_state)) - CWARN("%s: trying to init HSM before MDD\n", mdt_obd_name(mdt)); - /* to avoid deadlock when start is made through sysfs * sysfs entries are created by the coordinator thread */ - /* set up list of started restore requests */ - cdt_mti = lu_context_key_get(&cdt->cdt_env.le_ctx, &mdt_thread_key); - rc = mdt_hsm_pending_restore(cdt_mti); - if (rc) - CERROR("%s: cannot take the layout locks needed" - " for registered restore: %d\n", - mdt_obd_name(mdt), rc); - if (mdt->mdt_bottom->dd_rdonly) RETURN(0); + cdt_mti = lu_context_key_get(&cdt->cdt_env.le_ctx, &mdt_thread_key); task = kthread_run(mdt_coordinator, cdt_mti, "hsm_cdtr"); if (IS_ERR(task)) { rc = PTR_ERR(task); diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index e3a56c7..7a9a105 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -9397,6 +9397,28 @@ test_131() { } run_test 131 "MDT backup restore with project ID" +test_132() { + local err_cnt + local err_cnt2 + + reformat + combined_mgs_mds || start_mgs || error "unable to start MGS" + start_mdt 1 || error "unable to start mdt1" + + err_cnt=$(do_facet mds1 dmesg | grep -c "cannot take the layout locks") + stop_mdt 1 || error "stop mdt1 failed" + + [ "$mds1_FSTYPE" == zfs ] && import_zpool mds1 + do_facet mds1 $TUNEFS --param mdt.hsm_control=enabled $(mdsdevname 1) || + error "tunefs failed" + start_mdt 1 || error "cannot start mdt1" + + err_cnt2=$(do_facet mds1 dmesg | grep -c "cannot take the layout locks") + [ $err_cnt -eq $err_cnt2 ] || error "Can not take the layout lock" + stop_mdt 1 || error "stop mdt1 failed" +} +run_test 132 "hsm_actions processed after failover" + if ! combined_mgs_mds ; then stop mgs fi -- 1.8.3.1