Whamcloud - gitweb
LU-17261 lov: ignore broken components 96/52996/5
authorAlex Zhuravlev <bzzz@whamcloud.com>
Sun, 5 Nov 2023 13:51:29 +0000 (16:51 +0300)
committerOleg Drokin <green@whamcloud.com>
Wed, 20 Dec 2023 01:56:36 +0000 (01:56 +0000)
if some component of a mirrored file is broken, it makes sense
to try another (possible valid) replica rather than give up
immediately.

Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Change-Id: I32ea0efa90109f5159bf8b6c4e0efe1d543580c3
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/52996
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Zhenyu Xu <bobijam@hotmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd_support.h
lustre/lod/lod_lov.c
lustre/lov/lov_ea.c
lustre/tests/sanity-flr.sh

index 0a42aa7..465382d 100644 (file)
@@ -633,6 +633,7 @@ extern bool obd_enable_health_write;
 #define OBD_FAIL_LOV_MIRROR_INIT                   0x1425
 #define OBD_FAIL_LOV_COMP_MAGIC                            0x1426
 #define OBD_FAIL_LOV_COMP_PATTERN                  0x1427
+#define OBD_FAIL_LOV_INVALID_OSTIDX                0x1428
 
 #define OBD_FAIL_FID_INDIR     0x1501
 #define OBD_FAIL_FID_INLMA     0x1502
index 4256191..d7000b6 100644 (file)
@@ -846,6 +846,13 @@ static int lod_gen_component_ea(const struct lu_env *env,
                 * component, its l_ost_idx does not matter.
                 */
                objs[i].l_ost_idx = cpu_to_le32(ost_idx);
+
+               /* simulation of broken LOVEA */
+               if (CFS_FAIL_CHECK(OBD_FAIL_LOV_INVALID_OSTIDX) &&
+                   comp_idx == 0 && i == 0 && lo->ldo_mirror_count > 1) {
+                       objs[i].l_ost_idx = cpu_to_le32(0xffffffff);
+               }
+
        }
 done:
        if (lmm_size != NULL)
index c268197..62656c3 100644 (file)
@@ -554,8 +554,15 @@ lsm_unpackmd_comp_md_v1(struct lov_obd *lov, void *buf, size_t buf_size)
                                        LCME_FL_INIT,
                                        (i == entry_count - 1) ? &maxbytes :
                                                                 NULL);
-               if (IS_ERR(lsme))
-                       GOTO(out_lsm, rc = PTR_ERR(lsme));
+               if (IS_ERR(lsme)) {
+                       OBD_ALLOC_LARGE(lsme, sizeof(*lsme));
+                       if (!lsme)
+                               GOTO(out_lsm, rc = -ENOMEM);
+
+                       lsme->lsme_magic = LOV_MAGIC_FOREIGN;
+                       lsme->lsme_pattern = LOV_PATTERN_FOREIGN;
+                       lsme->lsme_flags = LCME_FL_OFFLINE;
+               }
 
                /**
                 * pressume that unrecognized magic component also has valid
index 01f2b00..6fe457d 100644 (file)
@@ -4435,6 +4435,19 @@ test_209b() {
 }
 run_test 209b "pagecache can be used after LL cancellation"
 
+test_210a() {
+       local tf=$DIR/$tfile
+
+       stack_trap "rm -f $tf"
+       dd if=/dev/zero of=$tf bs=1M count=1 || error "can't dd"
+#define OBD_FAIL_LOV_INVALID_OSTIDX                0x1428
+       do_facet mds1 "$LCTL set_param fail_loc=0x1428"
+       $LFS mirror extend -N $tf || error "can't mirror"
+       $LFS getstripe -v $tf
+       stat $tf || error "can't stat"
+}
+run_test 210a "handle broken mirrored lovea"
+
 complete_test $SECONDS
 check_and_cleanup_lustre
 exit_status