Whamcloud - gitweb
LU-18172: lfsck: umount has to wait lfsck_stop 65/56165/3
authorVladimir Saveliev <vladimir.saveliev@hpe.com>
Tue, 27 Aug 2024 11:02:29 +0000 (14:02 +0300)
committerOleg Drokin <green@whamcloud.com>
Thu, 2 Jan 2025 20:47:54 +0000 (20:47 +0000)
When called from umount lfsck_stop() should wait if lfsck is already
stopping. Otherwise, continuation of mdt_fini() or ofd_fini() leads to
various failures. Seen so far:
1.
 osd_scrub_cleanup
   LASSERT(dev->od_otable_it == NULL);
 because lfsck_master_engine() has not reached yet
   oit_iops->fini(env, oit_di)
     osd_otable_it_fini
       dev->od_otable_it = NULL;
2.
 lfsck_find_mdt_idx_by_fid
   rc = fld_server_lookup(env, ss->ss_server_fld...
     BUG: unable to handle kernel NULL pointer dereference
 because ss->ss_server_fld is NULL set on
 mdt_fini
   mdt_fld_fini
     ss->ss_server_fld = NULL;

Test for umount while lfsck is stopping is added.

Test-Parameters: trivial testlist=sanity-lfsck env=ONLY=44
HPE-bug-id: LUS-12421
Signed-off-by: Vladimir Saveliev <vladimir.saveliev@hpe.com>
Change-Id: I527c071d316ba3405f2199125fa7d018c98c403b
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56165
Tested-by: Maloo <maloo@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Hongchao Zhang <hongchao@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/lfsck/lfsck_lib.c
lustre/tests/sanity-lfsck.sh

index 2c84a68..8f8c554 100644 (file)
@@ -3386,8 +3386,8 @@ int lfsck_stop(const struct lu_env *env, struct dt_device *key,
                /* no error if LFSCK stopped already, or not started */
                GOTO(unlock, rc = 0);
 
-       if (thread_is_stopping(thread))
-               /* Someone is stopping LFSCK. */
+       if (thread_is_stopping(thread) && stop->ls_status != LS_PAUSED)
+               /* Someone is stopping LFSCK and it is not umount. */
                GOTO(unlock, rc = -EINPROGRESS);
 
        if (stop) {
index 0a77b0f..18eba49 100755 (executable)
@@ -6306,6 +6306,28 @@ test_42() {
 }
 run_test 42 "LFSCK can repair inconsistent MDT-object/OST-object encryption flags"
 
+test_44() {
+       lfsck_prep 3 3
+
+       #define OBD_FAIL_LFSCK_DELAY1           0x1600
+       do_facet $SINGLEMDS $LCTL set_param fail_val=3 fail_loc=0x1600
+       $START_NAMESPACE -r || error "(31) Fail to start LFSCK for namespace!"
+       $STOP_LFSCK &
+       sleep 1
+       $STOP_LFSCK && error "(32) LFSCK_STOP had to fail"
+       stop $SINGLEMDS
+       do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
+       start_facet $SINGLEMDS
+       wait
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 32 || {
+               $SHOW_NAMESPACE
+               error "(33) unexpected status"
+       }
+}
+run_test 44 "umount while lfsck is stopping"
+
 # restore MDS/OST size
 MDSSIZE=${SAVED_MDSSIZE}
 OSTSIZE=${SAVED_OSTSIZE}