Whamcloud - gitweb
EX-6511 csdc: prefer uncompressed mirror for read
authorBobi Jam <bobijam@whamcloud.com>
Thu, 27 Apr 2023 09:38:24 +0000 (17:38 +0800)
committerAndreas Dilger <adilger@whamcloud.com>
Mon, 12 Jun 2023 23:36:49 +0000 (23:36 +0000)
When accessing a mirrored file with both compressed and uncompressed
components, choose uncompressed components for read.

Signed-off-by: Bobi Jam <bobijam@whamcloud.com>
Change-Id: I043b27bd891c039901075a08c76630f8f0f9f182
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/50791
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Sebastien Buisson <sbuisson@ddn.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/include/obd_support.h
lustre/lov/lov_cl_internal.h
lustre/lov/lov_io.c
lustre/lov/lov_object.c
lustre/tests/sanity-flr.sh

index 2065b14..6cef841 100644 (file)
@@ -621,6 +621,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_LLITE_READPAGE_PAUSE              0x1422
 #define OBD_FAIL_LLITE_PANIC_ON_ESTALE             0x1423
 #define OBD_FAIL_LLITE_READPAGE_PAUSE2             0x1424
+#define OBD_FAIL_LOV_MIRROR_INIT                   0x1425
 
 #define OBD_FAIL_FID_INDIR     0x1501
 #define OBD_FAIL_FID_INLMA     0x1502
index 1691137..5201db4 100644 (file)
@@ -221,7 +221,8 @@ struct lov_layout_dom {
 
 struct lov_layout_entry {
        __u32                           lle_type;
-       unsigned int                    lle_valid:1;
+       unsigned int                    lle_valid:1,
+                                       lle_has_compr:1;
        unsigned int                    lle_preference;
        struct lu_extent                *lle_extent;
        struct lov_stripe_md_entry      *lle_lsme;
@@ -237,7 +238,9 @@ struct lov_mirror_entry {
        unsigned short  lre_stale:1,    /* set if any components is stale */
                        lre_valid:1,    /* set if at least one of components
                                         * in this mirror is valid */
-                       lre_foreign:1;  /* set if it is a foreign component */
+                       lre_foreign:1,  /* set if it is a foreign component */
+                       lre_has_compr:1; /* set if it contains compressed
+                                         * component */
        int             lre_preference; /* overall preference of this mirror */
 
        unsigned short  lre_start;      /* index to lo_entries, start index of
index d40b06c..1723b04 100644 (file)
@@ -301,7 +301,8 @@ static int lov_io_mirror_init(struct lov_io *lio, struct lov_object *obj,
                               struct cl_io *io)
 {
        struct lov_layout_composite *comp = &obj->u.composite;
-       int index;
+       int index;              /* the mirror being checked */
+       int candidate = -1;     /* candidate mirror for read */
        int i;
        int result;
        ENTRY;
@@ -402,7 +403,9 @@ static int lov_io_mirror_init(struct lov_io *lio, struct lov_object *obj,
                                         .e_end   = lio->lis_pos + 1 };
                struct lov_mirror_entry *lre;
                struct lov_layout_entry *lle;
+               int preference = 0;
                bool found = false;
+               bool has_compr_inrange = false;
 
                lre = lov_mirror_entry(obj, (index + i) % comp->lo_mirror_count);
                if (!lre->lre_valid)
@@ -417,23 +420,45 @@ static int lov_io_mirror_init(struct lov_io *lio, struct lov_object *obj,
 
                        if (lu_extent_is_overlapped(&ext, lle->lle_extent)) {
                                found = true;
-                               break;
+                               if (io->ci_type != CIT_READ)
+                                       break;
+
+                               if (candidate == -1 ||
+                                   lre->lre_preference > preference) {
+                                       preference = lre->lre_preference;
+                                       candidate = index + i;
+                               }
+                               /**
+                                * read prefers mirror with no compressed
+                                * components in the read range.
+                                */
+                               if (lle->lle_has_compr) {
+                                       /* put this mirror on the back burner */
+                                       has_compr_inrange = true;
+                                       break;
+                               }
                        }
                } /* each component of the mirror */
                if (found) {
-                       index = (index + i) % comp->lo_mirror_count;
-                       break;
+                       if (io->ci_type != CIT_READ || !has_compr_inrange) {
+                               index = (index + i) % comp->lo_mirror_count;
+                               break;
+                       }
                }
        } /* each mirror */
 
        if (i == comp->lo_mirror_count) {
-               CERROR(DFID": failed to find a component covering "
-                      "I/O region at %llu\n",
-                      PFID(lu_object_fid(lov2lu(obj))), lio->lis_pos);
+               if (candidate != -1) {
+                       index = candidate % comp->lo_mirror_count;
+               } else {
+                       CERROR(DFID": failed to find a component covering "
+                              "I/O region at %llu\n",
+                              PFID(lu_object_fid(lov2lu(obj))), lio->lis_pos);
 
-               dump_lsm(D_ERROR, obj->lo_lsm);
+                       dump_lsm(D_ERROR, obj->lo_lsm);
 
-               RETURN(-EIO);
+                       RETURN(-EIO);
+               }
        }
 
        CDEBUG(D_VFSTRACE, DFID ": flr state: %d, move mirror from %d to %d, "
@@ -444,6 +469,9 @@ static int lov_io_mirror_init(struct lov_io *lio, struct lov_object *obj,
 
        lio->lis_mirror_index = index;
 
+       if (OBD_FAIL_CHECK(OBD_FAIL_LOV_MIRROR_INIT))
+               cfs_fail_val = index;
+
        /*
         * FLR: if all mirrors have been tried once, most likely the network
         * of this client has been partitioned. We should relinquish CPU for
index 3efe22b..5c4e62f 100644 (file)
@@ -685,6 +685,8 @@ static int lov_init_composite(const struct lu_env *env, struct lov_device *dev,
 
                lle->lle_extent = &lle->lle_lsme->lsme_extent;
                lle->lle_valid = !(lle->lle_lsme->lsme_flags & LCME_FL_STALE);
+               lle->lle_has_compr = !!(lle->lle_lsme->lsme_pattern &
+                                       LOV_PATTERN_COMPRESS);
 
                if (flr_state != LCM_FL_NONE)
                        mirror_id = mirror_id_of(lle->lle_lsme->lsme_id);
@@ -696,6 +698,7 @@ static int lov_init_composite(const struct lu_env *env, struct lov_device *dev,
                                lre->lre_stale |= !lle->lle_valid;
                                lre->lre_foreign |=
                                        lsme_is_foreign(lle->lle_lsme);
+                               lre->lre_has_compr |= lle->lle_has_compr;
                                lre->lre_end = i;
                                continue;
                        }
@@ -718,6 +721,7 @@ static int lov_init_composite(const struct lu_env *env, struct lov_device *dev,
                lre->lre_valid = lle->lle_valid;
                lre->lre_stale = !lle->lle_valid;
                lre->lre_foreign = lsme_is_foreign(lle->lle_lsme);
+               lre->lre_has_compr = lle->lle_has_compr;
        }
 
        /* sanity check for FLR */
@@ -778,6 +782,9 @@ static int lov_init_composite(const struct lu_env *env, struct lov_device *dev,
                        lre->lre_preference +=
                                comp->lo_entries[j].lle_preference;
                }
+               /* uncompressed mirror adds extra preference value */
+               if (!lre->lre_has_compr)
+                       lre->lre_preference += (j - lre->lre_start);
 
                if (lre->lre_preference > preference) {
                        preference = lre->lre_preference;
index 15d9d01..917d879 100644 (file)
@@ -2021,6 +2021,30 @@ test_43b() {
 }
 run_test 43b "allow writing to multiple preferred mirror file"
 
+test_43c() {
+       (( $MDS1_VERSION >= $(version_code 2.14.0.88) )) ||
+               skip "Need MDS >= 2.14.0.88 for compression support"
+
+       local tf=$DIR/$tdir/$tfile
+       local p="$TMP/$TESTSUITE-$TESTNAME.parameters"
+
+       save_lustre_params client "llite.*.enable_compression" > $p
+       stack_trap "rm -rf $DIR/$tdir; restore_lustre_params < $p" EXIT
+       $LCTL set_param llite.*.enable_compression=1
+
+       test_mkdir $DIR/$tdir
+       $LFS setstripe -N -Eeof -Z gzip -N -Eeof $tf ||
+               error "setstripe $tf failed"
+
+       #define OBD_FAIL_LOV_MIRROR_INIT 0x1425
+       $LCTL set_param fail_loc=0x1425 fail_val=-1
+       cat $tf || error "read $tf failed"
+
+       local index=$($LCTL get_param fail_val -n)
+       ((index == 1)) || error "should read 2nd mirror instead of $((index+1))"
+}
+run_test 43c "read prefer uncompressed mirror"
+
 test_44a() {
        [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
        rm -rf $DIR/$tdir