From: Alex Zhuravlev Date: Sun, 5 Nov 2023 13:51:29 +0000 (+0300) Subject: LU-17261 lov: ignore broken components X-Git-Tag: 2.15.60~37 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=902fe290e51dccdee89380fb725ae6e3c1802e2b;p=fs%2Flustre-release.git LU-17261 lov: ignore broken components if some component of a mirrored file is broken, it makes sense to try another (possible valid) replica rather than give up immediately. Signed-off-by: Alex Zhuravlev Change-Id: I32ea0efa90109f5159bf8b6c4e0efe1d543580c3 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/52996 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Zhenyu Xu Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 0a42aa7..465382d 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -633,6 +633,7 @@ extern bool obd_enable_health_write; #define OBD_FAIL_LOV_MIRROR_INIT 0x1425 #define OBD_FAIL_LOV_COMP_MAGIC 0x1426 #define OBD_FAIL_LOV_COMP_PATTERN 0x1427 +#define OBD_FAIL_LOV_INVALID_OSTIDX 0x1428 #define OBD_FAIL_FID_INDIR 0x1501 #define OBD_FAIL_FID_INLMA 0x1502 diff --git a/lustre/lod/lod_lov.c b/lustre/lod/lod_lov.c index 4256191..d7000b6 100644 --- a/lustre/lod/lod_lov.c +++ b/lustre/lod/lod_lov.c @@ -846,6 +846,13 @@ static int lod_gen_component_ea(const struct lu_env *env, * component, its l_ost_idx does not matter. */ objs[i].l_ost_idx = cpu_to_le32(ost_idx); + + /* simulation of broken LOVEA */ + if (CFS_FAIL_CHECK(OBD_FAIL_LOV_INVALID_OSTIDX) && + comp_idx == 0 && i == 0 && lo->ldo_mirror_count > 1) { + objs[i].l_ost_idx = cpu_to_le32(0xffffffff); + } + } done: if (lmm_size != NULL) diff --git a/lustre/lov/lov_ea.c b/lustre/lov/lov_ea.c index c268197..62656c3 100644 --- a/lustre/lov/lov_ea.c +++ b/lustre/lov/lov_ea.c @@ -554,8 +554,15 @@ lsm_unpackmd_comp_md_v1(struct lov_obd *lov, void *buf, size_t buf_size) LCME_FL_INIT, (i == entry_count - 1) ? &maxbytes : NULL); - if (IS_ERR(lsme)) - GOTO(out_lsm, rc = PTR_ERR(lsme)); + if (IS_ERR(lsme)) { + OBD_ALLOC_LARGE(lsme, sizeof(*lsme)); + if (!lsme) + GOTO(out_lsm, rc = -ENOMEM); + + lsme->lsme_magic = LOV_MAGIC_FOREIGN; + lsme->lsme_pattern = LOV_PATTERN_FOREIGN; + lsme->lsme_flags = LCME_FL_OFFLINE; + } /** * pressume that unrecognized magic component also has valid diff --git a/lustre/tests/sanity-flr.sh b/lustre/tests/sanity-flr.sh index 01f2b00..6fe457d 100644 --- a/lustre/tests/sanity-flr.sh +++ b/lustre/tests/sanity-flr.sh @@ -4435,6 +4435,19 @@ test_209b() { } run_test 209b "pagecache can be used after LL cancellation" +test_210a() { + local tf=$DIR/$tfile + + stack_trap "rm -f $tf" + dd if=/dev/zero of=$tf bs=1M count=1 || error "can't dd" +#define OBD_FAIL_LOV_INVALID_OSTIDX 0x1428 + do_facet mds1 "$LCTL set_param fail_loc=0x1428" + $LFS mirror extend -N $tf || error "can't mirror" + $LFS getstripe -v $tf + stat $tf || error "can't stat" +} +run_test 210a "handle broken mirrored lovea" + complete_test $SECONDS check_and_cleanup_lustre exit_status