From 7bd1ef795506aa9e71a845a78456d418f7022707 Mon Sep 17 00:00:00 2001 From: Vladimir Saveliev Date: Tue, 27 Aug 2024 14:02:29 +0300 Subject: [PATCH] LU-18172: lfsck: umount has to wait lfsck_stop When called from umount lfsck_stop() should wait if lfsck is already stopping. Otherwise, continuation of mdt_fini() or ofd_fini() leads to various failures. Seen so far: 1. osd_scrub_cleanup LASSERT(dev->od_otable_it == NULL); because lfsck_master_engine() has not reached yet oit_iops->fini(env, oit_di) osd_otable_it_fini dev->od_otable_it = NULL; 2. lfsck_find_mdt_idx_by_fid rc = fld_server_lookup(env, ss->ss_server_fld... BUG: unable to handle kernel NULL pointer dereference because ss->ss_server_fld is NULL set on mdt_fini mdt_fld_fini ss->ss_server_fld = NULL; Test for umount while lfsck is stopping is added. Test-Parameters: trivial testlist=sanity-lfsck env=ONLY=44 HPE-bug-id: LUS-12421 Signed-off-by: Vladimir Saveliev Change-Id: I527c071d316ba3405f2199125fa7d018c98c403b Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56165 Tested-by: Maloo Tested-by: jenkins Reviewed-by: Hongchao Zhang Reviewed-by: Oleg Drokin Reviewed-by: Andreas Dilger --- lustre/lfsck/lfsck_lib.c | 4 ++-- lustre/tests/sanity-lfsck.sh | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/lustre/lfsck/lfsck_lib.c b/lustre/lfsck/lfsck_lib.c index 2c84a68..8f8c554 100644 --- a/lustre/lfsck/lfsck_lib.c +++ b/lustre/lfsck/lfsck_lib.c @@ -3386,8 +3386,8 @@ int lfsck_stop(const struct lu_env *env, struct dt_device *key, /* no error if LFSCK stopped already, or not started */ GOTO(unlock, rc = 0); - if (thread_is_stopping(thread)) - /* Someone is stopping LFSCK. */ + if (thread_is_stopping(thread) && stop->ls_status != LS_PAUSED) + /* Someone is stopping LFSCK and it is not umount. */ GOTO(unlock, rc = -EINPROGRESS); if (stop) { diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index 0a77b0f..18eba49 100755 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -6306,6 +6306,28 @@ test_42() { } run_test 42 "LFSCK can repair inconsistent MDT-object/OST-object encryption flags" +test_44() { + lfsck_prep 3 3 + + #define OBD_FAIL_LFSCK_DELAY1 0x1600 + do_facet $SINGLEMDS $LCTL set_param fail_val=3 fail_loc=0x1600 + $START_NAMESPACE -r || error "(31) Fail to start LFSCK for namespace!" + $STOP_LFSCK & + sleep 1 + $STOP_LFSCK && error "(32) LFSCK_STOP had to fail" + stop $SINGLEMDS + do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0 + start_facet $SINGLEMDS + wait + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 32 || { + $SHOW_NAMESPACE + error "(33) unexpected status" + } +} +run_test 44 "umount while lfsck is stopping" + # restore MDS/OST size MDSSIZE=${SAVED_MDSSIZE} OSTSIZE=${SAVED_OSTSIZE} -- 1.8.3.1