Whamcloud - gitweb
LU-16655 scrub: upgrade scrub_file from 2.12 format 55/50455/6
authorAlexander Zarochentsev <alexander.zarochentsev@hpe.com>
Tue, 28 Mar 2023 16:00:09 +0000 (19:00 +0300)
committerOleg Drokin <green@whamcloud.com>
Wed, 5 Apr 2023 04:36:49 +0000 (04:36 +0000)
Scrub_file->sf_oi_count has different offsets in Lustre-2.10,
Lustre-2.12, and Lustre-2.15 due to unintended format changes.
Lustre-2.15 reads sf_oi_count from offset of sf_success_count
and may initialize incorrect number of OI files, and not be
able to do FID lookups for existing filesystem objects.

Fixes: a114f6b8c5 ("LU-13344 servers: change request timeouts to s32")
Fixes: 4c2f028a95 ("LU-9019 osd-ldiskfs: migrate to 64 bit time")
Signed-off-by: Alexander Zarochentsev <alexander.zarochentsev@hpe.com>
Change-Id: Id7c8bd555229405d604456c48447f01fd121aca9
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50455
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Andrew Perepechko <andrew.perepechko@hpe.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/include/lustre_scrub.h
lustre/obdclass/scrub.c
lustre/osd-ldiskfs/osd_oi.c
lustre/osd-zfs/osd_scrub.c

index f7fb5bd..2861256 100644 (file)
@@ -44,6 +44,7 @@
 #define PFID_STRIPE_COUNT_MASK ((1 << PFID_STRIPE_IDX_BITS) - 1)
 
 #define SCRUB_MAGIC_V1                 0x4C5FD252
+#define SCRUB_MAGIC_V2                 0x4C5FE253
 #define SCRUB_CHECKPOINT_INTERVAL      60
 #define SCRUB_WINDOW_SIZE              1024
 
index 0a7b953..6b5fac9 100644 (file)
@@ -119,7 +119,7 @@ void scrub_file_init(struct lustre_scrub *scrub, uuid_t uuid)
 
        memset(sf, 0, sizeof(*sf));
        uuid_copy(&sf->sf_uuid, &uuid);
-       sf->sf_magic = SCRUB_MAGIC_V1;
+       sf->sf_magic = SCRUB_MAGIC_V2;
        sf->sf_status = SS_INIT;
 }
 EXPORT_SYMBOL(scrub_file_init);
@@ -133,6 +133,7 @@ void scrub_file_reset(struct lustre_scrub *scrub, uuid_t uuid, u64 flags)
               scrub->os_name, sf->sf_flags, flags);
 
        uuid_copy(&sf->sf_uuid, &uuid);
+       sf->sf_magic = SCRUB_MAGIC_V2;
        sf->sf_status = SS_INIT;
        sf->sf_flags |= flags;
        sf->sf_flags &= ~SF_AUTO;
@@ -183,9 +184,13 @@ int scrub_file_load(const struct lu_env *env, struct lustre_scrub *scrub)
        }
 
        scrub_file_to_cpu(sf, &scrub->os_file_disk);
-       if (sf->sf_magic != SCRUB_MAGIC_V1) {
-               CDEBUG(D_LFSCK, "%s: invalid scrub magic 0x%x != 0x%x\n",
-                      scrub->os_name, sf->sf_magic, SCRUB_MAGIC_V1);
+       if (sf->sf_magic == SCRUB_MAGIC_V1) {
+               CWARN("%s: reset scrub OI count for format change (LU-16655)\n",
+                     scrub->os_name);
+               sf->sf_oi_count = 0;
+       } else if (sf->sf_magic != SCRUB_MAGIC_V2) {
+               CDEBUG(D_LFSCK, "%s: invalid scrub magic %#x, should be %#x\n",
+                      scrub->os_name, sf->sf_magic, SCRUB_MAGIC_V2);
                return -EFAULT;
        }
 
index 30fcae2..fe58e63 100644 (file)
@@ -397,8 +397,9 @@ int osd_oi_init(struct osd_thread_info *info, struct osd_device *osd,
        int rc;
        ENTRY;
 
-       if (unlikely(sf->sf_oi_count & (sf->sf_oi_count - 1)) != 0) {
-               LCONSOLE_WARN("%s: Invalid OI count in scrub file %d\n",
+       if (unlikely((sf->sf_oi_count & (sf->sf_oi_count - 1)) != 0 ||
+                    sf->sf_oi_count > OSD_OI_FID_NR_MAX)) {
+               LCONSOLE_WARN("%s: invalid OI count %u in scrub file, reset it\n",
                              osd_dev2name(osd), sf->sf_oi_count);
                sf->sf_oi_count = 0;
        }
index 979e377..d4dbd61 100644 (file)
@@ -1380,8 +1380,9 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev,
                        dirty = true;
                }
 
-               if ((sf->sf_oi_count & (sf->sf_oi_count - 1)) != 0) {
-                       LCONSOLE_WARN("%s: invalid oi count %d, set it to %d\n",
+               if (unlikely((sf->sf_oi_count & (sf->sf_oi_count - 1)) != 0 ||
+                            sf->sf_oi_count > OSD_OI_FID_NR_MAX)) {
+                       LCONSOLE_WARN("%s: invalid OI count %u, reset to %u\n",
                                      osd_name(dev), sf->sf_oi_count,
                                      osd_oi_count);
                        sf->sf_oi_count = osd_oi_count;