From f22a0ab6c37db2d983451ec01e869ed8d3226cb2 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Wed, 22 Nov 2017 10:38:43 +0800 Subject: [PATCH] LU-10134 lfsck: not add requests if engine out of work There is race condition between LFSCK assistant engine and LFSCK request generators: before the LFSCK assistant engine exit, it will mark itself as 'stopping', then cleanup the in-queue requests, and then mark itself as 'stopped'. It is expected that the 'stopping' status will prevent generators adding more LFSCK requests. But current implementation only checks 'stopped' or not. So if the LFSCK engine thread exit before the whole system scanned that may because of some failures or on demand, more LFSCK requests may be added in the queue after the cleanup. The patch fixes the wrong logic by checking 'running' or not, and stop adding more LFSCK requests if not 'running'. Signed-off-by: Fan Yong Change-Id: Ic2b5ca3f5e80b5be5a5c60aa24f0b54682b717d9 Reviewed-on: https://review.whamcloud.com/30165 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Lai Siyao Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/lfsck/lfsck_engine.c | 3 --- lustre/lfsck/lfsck_namespace.c | 14 +++++++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/lustre/lfsck/lfsck_engine.c b/lustre/lfsck/lfsck_engine.c index ab0e6ff..a202a92 100644 --- a/lustre/lfsck/lfsck_engine.c +++ b/lustre/lfsck/lfsck_engine.c @@ -1753,9 +1753,6 @@ cleanup: } spin_unlock(&lad->lad_lock); - LASSERTF(lad->lad_prefetched == 0, "unmatched prefeteched objs %d\n", - lad->lad_prefetched); - memset(lr, 0, sizeof(*lr)); if (rc > 0) { lr->lr_event = LE_PHASE2_DONE; diff --git a/lustre/lfsck/lfsck_namespace.c b/lustre/lfsck/lfsck_namespace.c index be898f7..26c3172 100644 --- a/lustre/lfsck/lfsck_namespace.c +++ b/lustre/lfsck/lfsck_namespace.c @@ -3991,7 +3991,9 @@ static void lfsck_namespace_close_dir(const struct lu_env *env, lnr->lnr_size = size; spin_lock(&lad->lad_lock); - if (lad->lad_assistant_status < 0) { + if (lad->lad_assistant_status < 0 || + unlikely(!thread_is_running(&lfsck->li_thread) || + !thread_is_running(&lad->lad_thread))) { spin_unlock(&lad->lad_lock); lfsck_namespace_assistant_req_fini(env, &lnr->lnr_lar); ns->ln_striped_dirs_skipped++; @@ -4285,11 +4287,11 @@ static int lfsck_namespace_exec_dir(const struct lu_env *env, l_wait_event(mthread->t_ctl_waitq, lad->lad_prefetched < bk->lb_async_windows || !thread_is_running(mthread) || - thread_is_stopped(athread), + !thread_is_running(athread), &lwi); - if (unlikely(!thread_is_running(mthread)) || - thread_is_stopped(athread)) + if (unlikely(!thread_is_running(mthread) || + !thread_is_running(athread))) return 0; if (unlikely(lfsck_is_dead_obj(lfsck->li_obj_dir))) @@ -4304,7 +4306,9 @@ static int lfsck_namespace_exec_dir(const struct lu_env *env, } spin_lock(&lad->lad_lock); - if (lad->lad_assistant_status < 0) { + if (lad->lad_assistant_status < 0 || + unlikely(!thread_is_running(mthread) || + !thread_is_running(athread))) { spin_unlock(&lad->lad_lock); lfsck_namespace_assistant_req_fini(env, &lnr->lnr_lar); return lad->lad_assistant_status; -- 1.8.3.1