From: Patrick Farrell Date: Mon, 26 May 2025 00:30:24 +0000 (-0400) Subject: LU-19008 hsm: add locking for coordinator thread stop X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=678e2bb63174cf9b4db0d47e1de671ad44b36643;p=fs%2Flustre-release.git LU-19008 hsm: add locking for coordinator thread stop There is no locking around thread stop, which can race between mdt_coordinator() and mdt_hsm_cdt_stop() and with use-after-free during unmount. Add locking to avoid this. Fixes: 4512347d6c ("LU-16356 hsm: add running ref to the coordinator") Signed-off-by: Patrick Farrell Change-Id: I996a79fcbca3b1c6f6a0f5ee5d9f052f31eda61f Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/59425 Reviewed-by: Andreas Dilger Reviewed-by: Etienne AUJAMES Reviewed-by: Oleg Drokin Tested-by: jenkins Tested-by: Maloo --- diff --git a/lustre/mdt/mdt_coordinator.c b/lustre/mdt/mdt_coordinator.c index ec2411c..52e12a7 100644 --- a/lustre/mdt/mdt_coordinator.c +++ b/lustre/mdt/mdt_coordinator.c @@ -822,7 +822,11 @@ fail_to_start: " no error\n", mdt_obd_name(mdt), current->pid); - set_cdt_state(cdt, CDT_STOPPED); + /* Clear cdt_task under lock to avoid race with mdt_hsm_cdt_stop() */ + mutex_lock(&cdt->cdt_state_lock); + cdt->cdt_task = NULL; + set_cdt_state_locked(cdt, CDT_STOPPED); + mutex_unlock(&cdt->cdt_state_lock); /* Inform mdt_hsm_cdt_stop(). */ wake_up(&cdt->cdt_waitq); @@ -1218,9 +1222,11 @@ static int mdt_hsm_cdt_start(struct mdt_device *mdt) CERROR("%s: error starting coordinator thread: %d\n", mdt_obd_name(mdt), rc); } else { + /* Set task under lock to avoid race with mdt_hsm_cdt_stop() */ + mutex_lock(&cdt->cdt_state_lock); cdt->cdt_task = task; - wait_event(cdt->cdt_waitq, - cdt->cdt_state != CDT_INIT); + mutex_unlock(&cdt->cdt_state_lock); + wait_event(cdt->cdt_waitq, cdt->cdt_state != CDT_INIT); CDEBUG(D_HSM, "%s: coordinator thread started\n", mdt_obd_name(mdt)); rc = 0; @@ -1236,6 +1242,7 @@ static int mdt_hsm_cdt_start(struct mdt_device *mdt) int mdt_hsm_cdt_stop(struct mdt_device *mdt) { struct coordinator *cdt = &mdt->mdt_coordinator; + struct task_struct *task; int rc; ENTRY; @@ -1245,13 +1252,22 @@ int mdt_hsm_cdt_stop(struct mdt_device *mdt) if (rc) RETURN(rc); - kthread_stop(cdt->cdt_task); + /* Get task pointer under lock to avoid race with thread exit */ + mutex_lock(&cdt->cdt_state_lock); + task = cdt->cdt_task; + if (task) + cdt->cdt_task = NULL; + mutex_unlock(&cdt->cdt_state_lock); + + /* Only call kthread_stop if we have a valid task */ + if (task) + kthread_stop(task); + rc = wait_event_interruptible(cdt->cdt_waitq, cdt->cdt_state == CDT_STOPPED); if (rc) RETURN(-EINTR); - cdt->cdt_task = NULL; RETURN(0); }