From 14ed4a6f8f231fe94392906f991a32f07e7d7883 Mon Sep 17 00:00:00 2001 From: Bobi Jam Date: Fri, 19 May 2023 17:40:31 +0800 Subject: [PATCH] LU-16837 llite: handle unknown layout component If lustre client encounters unknown layout component pattern in a mirror file, this patch makes client mark this mirror as invalid and skip it. Signed-off-by: Bobi Jam Change-Id: Ie5f44212ab96bdc706cc5a9e11f330234fc01069 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51060 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Vitaliy Kuznetsov Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 3 ++ lustre/include/uapi/linux/lustre/lustre_idl.h | 1 + lustre/include/uapi/linux/lustre/lustre_user.h | 4 ++ lustre/llite/file.c | 8 ++++ lustre/lov/lov_ea.c | 56 +++++++++++++++++++++----- lustre/lov/lov_internal.h | 6 +++ lustre/lov/lov_io.c | 19 +++++++-- lustre/lov/lov_object.c | 55 +++++++++++++++++++++---- lustre/tests/sanity-flr.sh | 33 +++++++++++++++ 9 files changed, 165 insertions(+), 20 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index ebefb06..c444e80 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -628,6 +628,9 @@ extern char obd_jobid_var[]; #define OBD_FAIL_LLITE_READPAGE_PAUSE 0x1422 #define OBD_FAIL_LLITE_PANIC_ON_ESTALE 0x1423 #define OBD_FAIL_LLITE_READPAGE_PAUSE2 0x1424 +#define OBD_FAIL_LOV_MIRROR_INIT 0x1425 +#define OBD_FAIL_LOV_COMP_MAGIC 0x1426 +#define OBD_FAIL_LOV_COMP_PATTERN 0x1427 #define OBD_FAIL_FID_INDIR 0x1501 #define OBD_FAIL_FID_INLMA 0x1502 diff --git a/lustre/include/uapi/linux/lustre/lustre_idl.h b/lustre/include/uapi/linux/lustre/lustre_idl.h index dde7494..887ba49 100644 --- a/lustre/include/uapi/linux/lustre/lustre_idl.h +++ b/lustre/include/uapi/linux/lustre/lustre_idl.h @@ -1143,6 +1143,7 @@ enum obdo_flags { #define LOV_MAGIC_MAGIC 0x0BD0 #define LOV_MAGIC_MASK 0xFFFF +#define LOV_MAGIC_BAD (0x0BAD0000 | LOV_MAGIC_MAGIC) #define LOV_MAGIC_V1 (0x0BD10000 | LOV_MAGIC_MAGIC) #define LOV_MAGIC_JOIN_V1 (0x0BD20000 | LOV_MAGIC_MAGIC) #define LOV_MAGIC_V3 (0x0BD30000 | LOV_MAGIC_MAGIC) diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index 30ded1f..c782aca 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -721,6 +721,10 @@ struct fsxattr { #define LOV_PATTERN_FOREIGN 0x400 #define LOV_PATTERN_COMPRESS 0x800 +/* combine exclusive patterns as a bad pattern */ +#define LOV_PATTERN_BAD (LOV_PATTERN_RAID1 | LOV_PATTERN_MDT | \ + LOV_PATTERN_FOREIGN) + #define LOV_PATTERN_F_MASK 0xffff0000 #define LOV_PATTERN_F_HOLE 0x40000000 /* there is hole in LOV EA */ #define LOV_PATTERN_F_RELEASED 0x80000000 /* HSM released file */ diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 93048ac..c4c7b566 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -2599,6 +2599,14 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, CDEBUG(D_INFO, "comp[%d]: stripe_count=%u, stripe_size=%u\n", i, v1->lmm_stripe_count, v1->lmm_stripe_size); + + if (unlikely(CFS_FAIL_CHECK(OBD_FAIL_LOV_COMP_MAGIC) && + (cfs_fail_val == i + 1))) + v1->lmm_magic = LOV_MAGIC_BAD; + + if (unlikely(CFS_FAIL_CHECK(OBD_FAIL_LOV_COMP_PATTERN) && + (cfs_fail_val == i + 1))) + v1->lmm_pattern = LOV_PATTERN_BAD; } if (v1 == NULL) diff --git a/lustre/lov/lov_ea.c b/lustre/lov/lov_ea.c index c024d6d..acf6c55 100644 --- a/lustre/lov/lov_ea.c +++ b/lustre/lov/lov_ea.c @@ -99,9 +99,21 @@ static int lsm_lmm_verify_v1v3(struct lov_mds_md *lmm, size_t lmm_size, } if (!lov_pattern_supported(lov_pattern(pattern))) { - rc = -EINVAL; - CERROR("lov: unrecognized striping pattern: rc = %d\n", rc); - lov_dump_lmm_common(D_WARNING, lmm); + static int nr; + static ktime_t time2_clear_nr; + ktime_t now = ktime_get(); + + /* limit this message 20 times within 24h */ + if (ktime_after(now, time2_clear_nr)) { + nr = 0; + time2_clear_nr = ktime_add_ms(now, + 24 * 3600 * MSEC_PER_SEC); + } + if (nr++ < 20) { + CWARN("lov: unrecognized striping pattern: rc = %d\n", + rc); + lov_dump_lmm_common(D_WARNING, lmm); + } goto out; } @@ -138,7 +150,9 @@ static void lsme_free(struct lov_stripe_md_entry *lsme) stripe_count = lsme->lsme_stripe_count; if (!lsme_inited(lsme) || - lsme->lsme_pattern & LOV_PATTERN_F_RELEASED) + lsme->lsme_pattern & LOV_PATTERN_F_RELEASED || + !lov_supported_comp_magic(lsme->lsme_magic) || + !lov_pattern_supported(lov_pattern(lsme->lsme_pattern))) stripe_count = 0; for (i = 0; i < stripe_count; i++) OBD_SLAB_FREE_PTR(lsme->lsme_oinfo[i], lov_oinfo_slab); @@ -191,7 +205,8 @@ lsme_unpack(struct lov_obd *lov, struct lov_mds_md *lmm, size_t buf_size, RETURN(ERR_PTR(-EINVAL)); pattern = le32_to_cpu(lmm->lmm_pattern); - if (pattern & LOV_PATTERN_F_RELEASED || !inited) + if (pattern & LOV_PATTERN_F_RELEASED || !inited || + !lov_pattern_supported(lov_pattern(pattern))) stripe_count = 0; else stripe_count = le16_to_cpu(lmm->lmm_stripe_count); @@ -451,9 +466,16 @@ lsme_unpack_comp(struct lov_obd *lov, struct lov_mds_md *lmm, unsigned int magic; magic = le32_to_cpu(lmm->lmm_magic); - if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3 && - magic != LOV_MAGIC_FOREIGN) - RETURN(ERR_PTR(-EINVAL)); + if (!lov_supported_comp_magic(magic)) { + struct lov_stripe_md_entry *lsme; + + /* allocate a lsme holder for invalid magic lmm */ + OBD_ALLOC_LARGE(lsme, offsetof(typeof(*lsme), lsme_oinfo[0])); + lsme->lsme_magic = magic; + lsme->lsme_pattern = le32_to_cpu(lmm->lmm_pattern); + + return lsme; + } if (magic != LOV_MAGIC_FOREIGN && le16_to_cpu(lmm->lmm_stripe_count) == 0 && @@ -517,6 +539,16 @@ lsm_unpackmd_comp_md_v1(struct lov_obd *lov, void *buf, size_t buf_size) blob_size = le32_to_cpu(lcme->lcme_size); blob = (char *)lcm + blob_offset; + if (unlikely(CFS_FAIL_CHECK(OBD_FAIL_LOV_COMP_MAGIC) && + (cfs_fail_val == i + 1))) + ((struct lov_mds_md *)blob)->lmm_magic = LOV_MAGIC_BAD; + + if (unlikely(CFS_FAIL_CHECK(OBD_FAIL_LOV_COMP_PATTERN) && + (cfs_fail_val == i + 1))) { + ((struct lov_mds_md *)blob)->lmm_pattern = + LOV_PATTERN_BAD; + } + lsme = lsme_unpack_comp(lov, blob, blob_size, le32_to_cpu(lcme->lcme_flags) & LCME_FL_INIT, @@ -525,6 +557,10 @@ lsm_unpackmd_comp_md_v1(struct lov_obd *lov, void *buf, size_t buf_size) if (IS_ERR(lsme)) GOTO(out_lsm, rc = PTR_ERR(lsme)); + /** + * pressume that unrecognized magic component also has valid + * lsme_id/lsme_flags/lsme_extent + */ if (!(lsme->lsme_pattern & LOV_PATTERN_F_RELEASED)) lsm->lsm_is_released = false; @@ -654,7 +690,9 @@ void dump_lsm(unsigned int level, const struct lov_stripe_md *lsm) lse->lsme_stripe_count, lse->lsme_stripe_size, lse->lsme_pool_name); if (!lsme_inited(lse) || - lse->lsme_pattern & LOV_PATTERN_F_RELEASED) + lse->lsme_pattern & LOV_PATTERN_F_RELEASED || + !lov_supported_comp_magic(lse->lsme_magic) || + !lov_pattern_supported(lov_pattern(lse->lsme_pattern))) continue; for (j = 0; j < lse->lsme_stripe_count; j++) { CDEBUG(level, " oinfo:%p: ostid: "DOSTID diff --git a/lustre/lov/lov_internal.h b/lustre/lov/lov_internal.h index 9f12353..4a45578 100644 --- a/lustre/lov/lov_internal.h +++ b/lustre/lov/lov_internal.h @@ -184,6 +184,12 @@ struct lsm_operations { const struct lsm_operations *lsm_op_find(int magic); void lsm_free(struct lov_stripe_md *lsm); +static inline bool lov_supported_comp_magic(unsigned int magic) +{ + return magic == LOV_MAGIC_V1 || magic == LOV_MAGIC_V3 || + magic == LOV_MAGIC_FOREIGN; +} + /* lov_do_div64(a, b) returns a % b, and a = a / b. * The 32-bit code is LOV-specific due to knowing about stripe limits in * order to reduce the divisor to a 32-bit number. If the divisor is diff --git a/lustre/lov/lov_io.c b/lustre/lov/lov_io.c index c6cf1a2..8dc3b8e 100644 --- a/lustre/lov/lov_io.c +++ b/lustre/lov/lov_io.c @@ -215,6 +215,7 @@ static int lov_io_subio_init(const struct lu_env *env, struct lov_io *lio, static int lov_io_mirror_write_intent(struct lov_io *lio, struct lov_object *obj, struct cl_io *io) { + struct lu_object *lobj = lov2lu(obj); struct lov_layout_composite *comp = &obj->u.composite; struct lu_extent *ext = &io->ci_write_intent; struct lov_mirror_entry *lre; @@ -254,7 +255,19 @@ static int lov_io_mirror_write_intent(struct lov_io *lio, * multiple components covering the writing component */ primary = &comp->lo_mirrors[comp->lo_preferred_mirror]; - LASSERT(!primary->lre_stale); + if (primary->lre_stale || !primary->lre_valid) { + /** + * new server could pick a primary mirror which old client + * does not recognize, and old client would mark it as + * invalid. + */ + CERROR(DFID ": cannot find known valid non-stale mirror, " + "could be new server picked a mirror which this client " + "does not recognize.\n", + PFID(lu_object_fid(lobj))); + RETURN(-EIO); + } + lov_foreach_mirror_layout_entry(obj, lle, primary) { LASSERT(lle->lle_valid); if (!lu_extent_is_overlapped(ext, lle->lle_extent)) @@ -267,7 +280,7 @@ static int lov_io_mirror_write_intent(struct lov_io *lio, if (count == 0) { CERROR(DFID ": cannot find any valid components covering " "file extent "DEXT", mirror: %d\n", - PFID(lu_object_fid(lov2lu(obj))), PEXT(ext), + PFID(lu_object_fid(lobj)), PEXT(ext), primary->lre_mirror_id); RETURN(-EIO); } @@ -290,7 +303,7 @@ static int lov_io_mirror_write_intent(struct lov_io *lio, CDEBUG(D_VFSTRACE, DFID "there are %zd components to be staled to " "modify file extent "DEXT", iot: %d\n", - PFID(lu_object_fid(lov2lu(obj))), count, PEXT(ext), io->ci_type); + PFID(lu_object_fid(lobj)), count, PEXT(ext), io->ci_type); io->ci_need_write_intent = count > 0; diff --git a/lustre/lov/lov_object.c b/lustre/lov/lov_object.c index 1aeb6a8..0cbbd1b 100644 --- a/lustre/lov/lov_object.c +++ b/lustre/lov/lov_object.c @@ -675,16 +675,40 @@ static int lov_init_composite(const struct lu_env *env, struct lov_device *dev, case LOV_PATTERN_FOREIGN: lle->lle_comp_ops = NULL; break; - default: - CERROR("%s: unknown composite layout entry type %i\n", - lov2obd(dev->ld_lov)->obd_name, - lsm->lsm_entries[i]->lsme_pattern); - dump_lsm(D_ERROR, lsm); - RETURN(-EIO); + default: { + static int nr; + static ktime_t time2_clear_nr; + ktime_t now = ktime_get(); + + lle->lle_comp_ops = NULL; + + /* limit this message 20 times within 24h */ + if (ktime_after(now, time2_clear_nr)) { + nr = 0; + time2_clear_nr = ktime_add_ms(now, + 24 * 3600 * MSEC_PER_SEC); + } + if (nr++ < 20) { + CWARN("%s: unknown layout entry %d pattern %#x" + " could be an unrecognizable component" + " set by other clients, skip to" + " initialize the next component.\n", + lov2obd(dev->ld_lov)->obd_name, + i, + lsm->lsm_entries[i]->lsme_pattern); + dump_lsm(D_ERROR, lsm); + } + } } lle->lle_extent = &lle->lle_lsme->lsme_extent; - lle->lle_valid = !(lle->lle_lsme->lsme_flags & LCME_FL_STALE); + if (!lov_pattern_supported( + lov_pattern(lle->lle_lsme->lsme_pattern)) || + !lov_supported_comp_magic(lle->lle_lsme->lsme_magic)) + lle->lle_valid = 0; + else + lle->lle_valid = + !(lle->lle_lsme->lsme_flags & LCME_FL_STALE); if (flr_state != LCM_FL_NONE) mirror_id = mirror_id_of(lle->lle_lsme->lsme_id); @@ -744,6 +768,11 @@ static int lov_init_composite(const struct lu_env *env, struct lov_device *dev, if (lsme_is_foreign(lle->lle_lsme)) continue; + if (!lov_pattern_supported( + lov_pattern(lle->lle_lsme->lsme_pattern)) || + !lov_supported_comp_magic(lle->lle_lsme->lsme_magic)) + continue; + result = lle->lle_comp_ops->lco_init(env, dev, lov, index, conf, lle); if (result < 0) @@ -771,6 +800,9 @@ static int lov_init_composite(const struct lu_env *env, struct lov_device *dev, if (lre->lre_foreign) continue; + if (!lre->lre_valid) + continue; + mirror_count++; /* valid mirror */ /* aggregated preference of all involved OSTs */ @@ -860,6 +892,10 @@ static int lov_delete_composite(const struct lu_env *env, lov_foreach_layout_entry(lov, entry) { if (entry->lle_lsme && lsme_is_foreign(entry->lle_lsme)) continue; + if (!lov_pattern_supported( + lov_pattern(entry->lle_lsme->lsme_pattern)) || + !lov_supported_comp_magic(entry->lle_lsme->lsme_magic)) + continue; rc = lov_delete_raid0(env, lov, entry); if (rc) @@ -2369,7 +2405,10 @@ int lov_read_and_clear_async_rc(struct cl_object *clob) lsm->lsm_entries[i]; int j; - if (!lsme_inited(lse)) + if (!lsme_inited(lse) || + !lov_pattern_supported( + lov_pattern(lse->lsme_pattern)) || + !lov_supported_comp_magic(lse->lsme_magic)) break; for (j = 0; j < lse->lsme_stripe_count; j++) { diff --git a/lustre/tests/sanity-flr.sh b/lustre/tests/sanity-flr.sh index 14c9bdd..cb5d889 100644 --- a/lustre/tests/sanity-flr.sh +++ b/lustre/tests/sanity-flr.sh @@ -3226,6 +3226,39 @@ test_61c() { # LU-14508 } run_test 61c "mirror resync preserves timestamps" +test_62() { + local file=$DIR/$tdir/$tfile + + mkdir -p $DIR/$tdir + + echo "create mirror file with unknown magic" + #define OBD_FAIL_LOV_COMP_MAGIC 0x1426 + # mirror 2 in-memory magic is bad + $LCTL set_param fail_loc=0x1426 fail_val=2 + $LFS setstripe -N --flags=prefer -N2 $file || + error "failed to create mirror file $file" + magic=$($LFS getstripe -v -I131074 $file | awk '/lmm_magic/{print $2}') + [[ $magic == 0x0BAD0BD0 ]] || + error "mirror 2 magic $magic is not bad as expected" + cat /etc/passwd > $file || error "cannot write to $file" + diff /etc/passwd $file || error "read $file error" + + rm -f $file + + echo "create mirror file with unknown pattern" + #define OBD_FAIL_LOV_COMP_PATTERN 0x1427 + # mirror 1 in-memory pattern is bad + $LCTL set_param fail_loc=0x1427 fail_val=1 + $LFS setstripe -N -N --flags=prefer $file || + error "failed to create mirror file $file" + pattern=$($LFS getstripe -I65537 $file | awk '/lmm_pattern/{print $2}') + [[ $pattern == 502 ]] || + error "mirror 1 pattern $pattern is not bad as expected" + cat /etc/passwd > $file || error "cannot write to $file" + diff /etc/passwd $file || error "read $file error" +} +run_test 62 "read/write with unknown type of mirror" + test_70() { local tf=$DIR/$tdir/$tfile -- 1.8.3.1