Whamcloud - gitweb
LU-16837 llite: handle unknown layout component 60/51060/14
authorBobi Jam <bobijam@whamcloud.com>
Fri, 19 May 2023 09:40:31 +0000 (17:40 +0800)
committerOleg Drokin <green@whamcloud.com>
Sat, 23 Sep 2023 06:01:17 +0000 (06:01 +0000)
If lustre client encounters unknown layout component pattern in
a mirror file, this patch makes client mark this mirror as invalid
and skip it.

Signed-off-by: Bobi Jam <bobijam@whamcloud.com>
Change-Id: Ie5f44212ab96bdc706cc5a9e11f330234fc01069
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51060
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Vitaliy Kuznetsov <vkuznetsov@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd_support.h
lustre/include/uapi/linux/lustre/lustre_idl.h
lustre/include/uapi/linux/lustre/lustre_user.h
lustre/llite/file.c
lustre/lov/lov_ea.c
lustre/lov/lov_internal.h
lustre/lov/lov_io.c
lustre/lov/lov_object.c
lustre/tests/sanity-flr.sh

index ebefb06..c444e80 100644 (file)
@@ -628,6 +628,9 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_LLITE_READPAGE_PAUSE              0x1422
 #define OBD_FAIL_LLITE_PANIC_ON_ESTALE             0x1423
 #define OBD_FAIL_LLITE_READPAGE_PAUSE2             0x1424
+#define OBD_FAIL_LOV_MIRROR_INIT                   0x1425
+#define OBD_FAIL_LOV_COMP_MAGIC                            0x1426
+#define OBD_FAIL_LOV_COMP_PATTERN                  0x1427
 
 #define OBD_FAIL_FID_INDIR     0x1501
 #define OBD_FAIL_FID_INLMA     0x1502
index dde7494..887ba49 100644 (file)
@@ -1143,6 +1143,7 @@ enum obdo_flags {
 #define LOV_MAGIC_MAGIC 0x0BD0
 #define LOV_MAGIC_MASK  0xFFFF
 
+#define LOV_MAGIC_BAD          (0x0BAD0000 | LOV_MAGIC_MAGIC)
 #define LOV_MAGIC_V1           (0x0BD10000 | LOV_MAGIC_MAGIC)
 #define LOV_MAGIC_JOIN_V1      (0x0BD20000 | LOV_MAGIC_MAGIC)
 #define LOV_MAGIC_V3           (0x0BD30000 | LOV_MAGIC_MAGIC)
index 30ded1f..c782aca 100644 (file)
@@ -721,6 +721,10 @@ struct fsxattr {
 #define LOV_PATTERN_FOREIGN            0x400
 #define LOV_PATTERN_COMPRESS           0x800
 
+/* combine exclusive patterns as a bad pattern */
+#define LOV_PATTERN_BAD                (LOV_PATTERN_RAID1 | LOV_PATTERN_MDT | \
+                                LOV_PATTERN_FOREIGN)
+
 #define LOV_PATTERN_F_MASK     0xffff0000
 #define LOV_PATTERN_F_HOLE     0x40000000 /* there is hole in LOV EA */
 #define LOV_PATTERN_F_RELEASED 0x80000000 /* HSM released file */
index 93048ac..c4c7b56 100644 (file)
@@ -2599,6 +2599,14 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
                        CDEBUG(D_INFO,
                               "comp[%d]: stripe_count=%u, stripe_size=%u\n",
                               i, v1->lmm_stripe_count, v1->lmm_stripe_size);
+
+                       if (unlikely(CFS_FAIL_CHECK(OBD_FAIL_LOV_COMP_MAGIC) &&
+                                    (cfs_fail_val == i + 1)))
+                               v1->lmm_magic = LOV_MAGIC_BAD;
+
+                       if (unlikely(CFS_FAIL_CHECK(OBD_FAIL_LOV_COMP_PATTERN) &&
+                                    (cfs_fail_val == i + 1)))
+                               v1->lmm_pattern = LOV_PATTERN_BAD;
                }
 
                if (v1 == NULL)
index c024d6d..acf6c55 100644 (file)
@@ -99,9 +99,21 @@ static int lsm_lmm_verify_v1v3(struct lov_mds_md *lmm, size_t lmm_size,
        }
 
        if (!lov_pattern_supported(lov_pattern(pattern))) {
-               rc = -EINVAL;
-               CERROR("lov: unrecognized striping pattern: rc = %d\n", rc);
-               lov_dump_lmm_common(D_WARNING, lmm);
+               static int nr;
+               static ktime_t time2_clear_nr;
+               ktime_t now = ktime_get();
+
+               /* limit this message 20 times within 24h */
+               if (ktime_after(now, time2_clear_nr)) {
+                       nr = 0;
+                       time2_clear_nr = ktime_add_ms(now,
+                                                     24 * 3600 * MSEC_PER_SEC);
+               }
+               if (nr++ < 20) {
+                       CWARN("lov: unrecognized striping pattern: rc = %d\n",
+                             rc);
+                       lov_dump_lmm_common(D_WARNING, lmm);
+               }
                goto out;
        }
 
@@ -138,7 +150,9 @@ static void lsme_free(struct lov_stripe_md_entry *lsme)
 
        stripe_count = lsme->lsme_stripe_count;
        if (!lsme_inited(lsme) ||
-           lsme->lsme_pattern & LOV_PATTERN_F_RELEASED)
+           lsme->lsme_pattern & LOV_PATTERN_F_RELEASED ||
+           !lov_supported_comp_magic(lsme->lsme_magic) ||
+           !lov_pattern_supported(lov_pattern(lsme->lsme_pattern)))
                stripe_count = 0;
        for (i = 0; i < stripe_count; i++)
                OBD_SLAB_FREE_PTR(lsme->lsme_oinfo[i], lov_oinfo_slab);
@@ -191,7 +205,8 @@ lsme_unpack(struct lov_obd *lov, struct lov_mds_md *lmm, size_t buf_size,
                RETURN(ERR_PTR(-EINVAL));
 
        pattern = le32_to_cpu(lmm->lmm_pattern);
-       if (pattern & LOV_PATTERN_F_RELEASED || !inited)
+       if (pattern & LOV_PATTERN_F_RELEASED || !inited ||
+           !lov_pattern_supported(lov_pattern(pattern)))
                stripe_count = 0;
        else
                stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
@@ -451,9 +466,16 @@ lsme_unpack_comp(struct lov_obd *lov, struct lov_mds_md *lmm,
        unsigned int magic;
 
        magic = le32_to_cpu(lmm->lmm_magic);
-       if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3 &&
-           magic != LOV_MAGIC_FOREIGN)
-               RETURN(ERR_PTR(-EINVAL));
+       if (!lov_supported_comp_magic(magic)) {
+               struct lov_stripe_md_entry *lsme;
+
+               /* allocate a lsme holder for invalid magic lmm */
+               OBD_ALLOC_LARGE(lsme, offsetof(typeof(*lsme), lsme_oinfo[0]));
+               lsme->lsme_magic = magic;
+               lsme->lsme_pattern = le32_to_cpu(lmm->lmm_pattern);
+
+               return lsme;
+       }
 
        if (magic != LOV_MAGIC_FOREIGN &&
            le16_to_cpu(lmm->lmm_stripe_count) == 0 &&
@@ -517,6 +539,16 @@ lsm_unpackmd_comp_md_v1(struct lov_obd *lov, void *buf, size_t buf_size)
                blob_size = le32_to_cpu(lcme->lcme_size);
                blob = (char *)lcm + blob_offset;
 
+               if (unlikely(CFS_FAIL_CHECK(OBD_FAIL_LOV_COMP_MAGIC) &&
+                            (cfs_fail_val == i + 1)))
+                       ((struct lov_mds_md *)blob)->lmm_magic = LOV_MAGIC_BAD;
+
+               if (unlikely(CFS_FAIL_CHECK(OBD_FAIL_LOV_COMP_PATTERN) &&
+                            (cfs_fail_val == i + 1))) {
+                       ((struct lov_mds_md *)blob)->lmm_pattern =
+                                                               LOV_PATTERN_BAD;
+               }
+
                lsme = lsme_unpack_comp(lov, blob, blob_size,
                                        le32_to_cpu(lcme->lcme_flags) &
                                        LCME_FL_INIT,
@@ -525,6 +557,10 @@ lsm_unpackmd_comp_md_v1(struct lov_obd *lov, void *buf, size_t buf_size)
                if (IS_ERR(lsme))
                        GOTO(out_lsm, rc = PTR_ERR(lsme));
 
+               /**
+                * pressume that unrecognized magic component also has valid
+                * lsme_id/lsme_flags/lsme_extent
+                */
                if (!(lsme->lsme_pattern & LOV_PATTERN_F_RELEASED))
                        lsm->lsm_is_released = false;
 
@@ -654,7 +690,9 @@ void dump_lsm(unsigned int level, const struct lov_stripe_md *lsm)
                       lse->lsme_stripe_count, lse->lsme_stripe_size,
                       lse->lsme_pool_name);
                if (!lsme_inited(lse) ||
-                   lse->lsme_pattern & LOV_PATTERN_F_RELEASED)
+                   lse->lsme_pattern & LOV_PATTERN_F_RELEASED ||
+                   !lov_supported_comp_magic(lse->lsme_magic) ||
+                   !lov_pattern_supported(lov_pattern(lse->lsme_pattern)))
                        continue;
                for (j = 0; j < lse->lsme_stripe_count; j++) {
                        CDEBUG(level, "   oinfo:%p: ostid: "DOSTID
index 9f12353..4a45578 100644 (file)
@@ -184,6 +184,12 @@ struct lsm_operations {
 const struct lsm_operations *lsm_op_find(int magic);
 void lsm_free(struct lov_stripe_md *lsm);
 
+static inline bool lov_supported_comp_magic(unsigned int magic)
+{
+       return magic == LOV_MAGIC_V1 || magic == LOV_MAGIC_V3 ||
+              magic == LOV_MAGIC_FOREIGN;
+}
+
 /* lov_do_div64(a, b) returns a % b, and a = a / b.
  * The 32-bit code is LOV-specific due to knowing about stripe limits in
  * order to reduce the divisor to a 32-bit number.  If the divisor is
index c6cf1a2..8dc3b8e 100644 (file)
@@ -215,6 +215,7 @@ static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio,
 static int lov_io_mirror_write_intent(struct lov_io *lio,
        struct lov_object *obj, struct cl_io *io)
 {
+       struct lu_object *lobj = lov2lu(obj);
        struct lov_layout_composite *comp = &obj->u.composite;
        struct lu_extent *ext = &io->ci_write_intent;
        struct lov_mirror_entry *lre;
@@ -254,7 +255,19 @@ static int lov_io_mirror_write_intent(struct lov_io *lio,
         * multiple components covering the writing component
         */
        primary = &comp->lo_mirrors[comp->lo_preferred_mirror];
-       LASSERT(!primary->lre_stale);
+       if (primary->lre_stale || !primary->lre_valid) {
+               /**
+                * new server could pick a primary mirror which old client
+                * does not recognize, and old client would mark it as
+                * invalid.
+                */
+               CERROR(DFID ": cannot find known valid non-stale mirror, "
+                      "could be new server picked a mirror which this client "
+                      "does not recognize.\n",
+                      PFID(lu_object_fid(lobj)));
+               RETURN(-EIO);
+       }
+
        lov_foreach_mirror_layout_entry(obj, lle, primary) {
                LASSERT(lle->lle_valid);
                if (!lu_extent_is_overlapped(ext, lle->lle_extent))
@@ -267,7 +280,7 @@ static int lov_io_mirror_write_intent(struct lov_io *lio,
        if (count == 0) {
                CERROR(DFID ": cannot find any valid components covering "
                       "file extent "DEXT", mirror: %d\n",
-                      PFID(lu_object_fid(lov2lu(obj))), PEXT(ext),
+                      PFID(lu_object_fid(lobj)), PEXT(ext),
                       primary->lre_mirror_id);
                RETURN(-EIO);
        }
@@ -290,7 +303,7 @@ static int lov_io_mirror_write_intent(struct lov_io *lio,
 
        CDEBUG(D_VFSTRACE, DFID "there are %zd components to be staled to "
               "modify file extent "DEXT", iot: %d\n",
-              PFID(lu_object_fid(lov2lu(obj))), count, PEXT(ext), io->ci_type);
+              PFID(lu_object_fid(lobj)), count, PEXT(ext), io->ci_type);
 
        io->ci_need_write_intent = count > 0;
 
index 1aeb6a8..0cbbd1b 100644 (file)
@@ -675,16 +675,40 @@ static int lov_init_composite(const struct lu_env *env, struct lov_device *dev,
                case LOV_PATTERN_FOREIGN:
                        lle->lle_comp_ops = NULL;
                        break;
-               default:
-                       CERROR("%s: unknown composite layout entry type %i\n",
-                              lov2obd(dev->ld_lov)->obd_name,
-                              lsm->lsm_entries[i]->lsme_pattern);
-                       dump_lsm(D_ERROR, lsm);
-                       RETURN(-EIO);
+               default: {
+                       static int nr;
+                       static ktime_t time2_clear_nr;
+                       ktime_t now = ktime_get();
+
+                       lle->lle_comp_ops = NULL;
+
+                       /* limit this message 20 times within 24h */
+                       if (ktime_after(now, time2_clear_nr)) {
+                               nr = 0;
+                               time2_clear_nr = ktime_add_ms(now,
+                                                     24 * 3600 * MSEC_PER_SEC);
+                       }
+                       if (nr++ < 20) {
+                               CWARN("%s: unknown layout entry %d pattern %#x"
+                                     " could be an unrecognizable component"
+                                     " set by other clients, skip to"
+                                     " initialize the next component.\n",
+                                       lov2obd(dev->ld_lov)->obd_name,
+                                       i,
+                                       lsm->lsm_entries[i]->lsme_pattern);
+                               dump_lsm(D_ERROR, lsm);
+                       }
+               }
                }
 
                lle->lle_extent = &lle->lle_lsme->lsme_extent;
-               lle->lle_valid = !(lle->lle_lsme->lsme_flags & LCME_FL_STALE);
+               if (!lov_pattern_supported(
+                               lov_pattern(lle->lle_lsme->lsme_pattern)) ||
+                   !lov_supported_comp_magic(lle->lle_lsme->lsme_magic))
+                       lle->lle_valid = 0;
+               else
+                       lle->lle_valid =
+                               !(lle->lle_lsme->lsme_flags & LCME_FL_STALE);
 
                if (flr_state != LCM_FL_NONE)
                        mirror_id = mirror_id_of(lle->lle_lsme->lsme_id);
@@ -744,6 +768,11 @@ static int lov_init_composite(const struct lu_env *env, struct lov_device *dev,
                if (lsme_is_foreign(lle->lle_lsme))
                        continue;
 
+               if (!lov_pattern_supported(
+                               lov_pattern(lle->lle_lsme->lsme_pattern)) ||
+                   !lov_supported_comp_magic(lle->lle_lsme->lsme_magic))
+                       continue;
+
                result = lle->lle_comp_ops->lco_init(env, dev, lov, index,
                                                     conf, lle);
                if (result < 0)
@@ -771,6 +800,9 @@ static int lov_init_composite(const struct lu_env *env, struct lov_device *dev,
                if (lre->lre_foreign)
                        continue;
 
+               if (!lre->lre_valid)
+                       continue;
+
                mirror_count++; /* valid mirror */
 
                /* aggregated preference of all involved OSTs */
@@ -860,6 +892,10 @@ static int lov_delete_composite(const struct lu_env *env,
        lov_foreach_layout_entry(lov, entry) {
                if (entry->lle_lsme && lsme_is_foreign(entry->lle_lsme))
                        continue;
+               if (!lov_pattern_supported(
+                               lov_pattern(entry->lle_lsme->lsme_pattern)) ||
+                   !lov_supported_comp_magic(entry->lle_lsme->lsme_magic))
+                       continue;
 
                rc = lov_delete_raid0(env, lov, entry);
                if (rc)
@@ -2369,7 +2405,10 @@ int lov_read_and_clear_async_rc(struct cl_object *clob)
                                                lsm->lsm_entries[i];
                                int j;
 
-                               if (!lsme_inited(lse))
+                               if (!lsme_inited(lse) ||
+                                   !lov_pattern_supported(
+                                           lov_pattern(lse->lsme_pattern)) ||
+                                   !lov_supported_comp_magic(lse->lsme_magic))
                                        break;
 
                                for (j = 0; j < lse->lsme_stripe_count; j++) {
index 14c9bdd..cb5d889 100644 (file)
@@ -3226,6 +3226,39 @@ test_61c() { # LU-14508
 }
 run_test 61c "mirror resync preserves timestamps"
 
+test_62() {
+       local file=$DIR/$tdir/$tfile
+
+       mkdir -p $DIR/$tdir
+
+       echo "create mirror file with unknown magic"
+       #define OBD_FAIL_LOV_COMP_MAGIC 0x1426
+       # mirror 2 in-memory magic is bad
+       $LCTL set_param fail_loc=0x1426 fail_val=2
+       $LFS setstripe -N --flags=prefer -N2 $file ||
+               error "failed to create mirror file $file"
+       magic=$($LFS getstripe -v -I131074 $file | awk '/lmm_magic/{print $2}')
+       [[ $magic == 0x0BAD0BD0 ]] ||
+               error "mirror 2 magic $magic is not bad as expected"
+       cat /etc/passwd > $file || error "cannot write to $file"
+       diff /etc/passwd $file || error "read $file error"
+
+       rm -f $file
+
+       echo "create mirror file with unknown pattern"
+       #define OBD_FAIL_LOV_COMP_PATTERN 0x1427
+       # mirror 1 in-memory pattern is bad
+       $LCTL set_param fail_loc=0x1427 fail_val=1
+       $LFS setstripe -N -N --flags=prefer $file ||
+               error "failed to create mirror file $file"
+       pattern=$($LFS getstripe -I65537 $file | awk '/lmm_pattern/{print $2}')
+       [[ $pattern == 502 ]] ||
+               error "mirror 1 pattern $pattern is not bad as expected"
+       cat /etc/passwd > $file || error "cannot write to $file"
+       diff /etc/passwd $file || error "read $file error"
+}
+run_test 62 "read/write with unknown type of mirror"
+
 test_70() {
        local tf=$DIR/$tdir/$tfile