From: Bobi Jam Date: Thu, 27 Apr 2023 09:38:24 +0000 (+0800) Subject: EX-6511 csdc: prefer uncompressed mirror for read X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=511d44bf73e416e264eb8b8ec48816931e6bb424;p=fs%2Flustre-release.git EX-6511 csdc: prefer uncompressed mirror for read When accessing a mirrored file with both compressed and uncompressed components, choose uncompressed components for read. Signed-off-by: Bobi Jam Change-Id: I043b27bd891c039901075a08c76630f8f0f9f182 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/50791 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Sebastien Buisson Reviewed-by: Andreas Dilger --- diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 2065b14..6cef841 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -621,6 +621,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_LLITE_READPAGE_PAUSE 0x1422 #define OBD_FAIL_LLITE_PANIC_ON_ESTALE 0x1423 #define OBD_FAIL_LLITE_READPAGE_PAUSE2 0x1424 +#define OBD_FAIL_LOV_MIRROR_INIT 0x1425 #define OBD_FAIL_FID_INDIR 0x1501 #define OBD_FAIL_FID_INLMA 0x1502 diff --git a/lustre/lov/lov_cl_internal.h b/lustre/lov/lov_cl_internal.h index 1691137..5201db4 100644 --- a/lustre/lov/lov_cl_internal.h +++ b/lustre/lov/lov_cl_internal.h @@ -221,7 +221,8 @@ struct lov_layout_dom { struct lov_layout_entry { __u32 lle_type; - unsigned int lle_valid:1; + unsigned int lle_valid:1, + lle_has_compr:1; unsigned int lle_preference; struct lu_extent *lle_extent; struct lov_stripe_md_entry *lle_lsme; @@ -237,7 +238,9 @@ struct lov_mirror_entry { unsigned short lre_stale:1, /* set if any components is stale */ lre_valid:1, /* set if at least one of components * in this mirror is valid */ - lre_foreign:1; /* set if it is a foreign component */ + lre_foreign:1, /* set if it is a foreign component */ + lre_has_compr:1; /* set if it contains compressed + * component */ int lre_preference; /* overall preference of this mirror */ unsigned short lre_start; /* index to lo_entries, start index of diff --git a/lustre/lov/lov_io.c b/lustre/lov/lov_io.c index d40b06c..1723b04 100644 --- a/lustre/lov/lov_io.c +++ b/lustre/lov/lov_io.c @@ -301,7 +301,8 @@ static int lov_io_mirror_init(struct lov_io *lio, struct lov_object *obj, struct cl_io *io) { struct lov_layout_composite *comp = &obj->u.composite; - int index; + int index; /* the mirror being checked */ + int candidate = -1; /* candidate mirror for read */ int i; int result; ENTRY; @@ -402,7 +403,9 @@ static int lov_io_mirror_init(struct lov_io *lio, struct lov_object *obj, .e_end = lio->lis_pos + 1 }; struct lov_mirror_entry *lre; struct lov_layout_entry *lle; + int preference = 0; bool found = false; + bool has_compr_inrange = false; lre = lov_mirror_entry(obj, (index + i) % comp->lo_mirror_count); if (!lre->lre_valid) @@ -417,23 +420,45 @@ static int lov_io_mirror_init(struct lov_io *lio, struct lov_object *obj, if (lu_extent_is_overlapped(&ext, lle->lle_extent)) { found = true; - break; + if (io->ci_type != CIT_READ) + break; + + if (candidate == -1 || + lre->lre_preference > preference) { + preference = lre->lre_preference; + candidate = index + i; + } + /** + * read prefers mirror with no compressed + * components in the read range. + */ + if (lle->lle_has_compr) { + /* put this mirror on the back burner */ + has_compr_inrange = true; + break; + } } } /* each component of the mirror */ if (found) { - index = (index + i) % comp->lo_mirror_count; - break; + if (io->ci_type != CIT_READ || !has_compr_inrange) { + index = (index + i) % comp->lo_mirror_count; + break; + } } } /* each mirror */ if (i == comp->lo_mirror_count) { - CERROR(DFID": failed to find a component covering " - "I/O region at %llu\n", - PFID(lu_object_fid(lov2lu(obj))), lio->lis_pos); + if (candidate != -1) { + index = candidate % comp->lo_mirror_count; + } else { + CERROR(DFID": failed to find a component covering " + "I/O region at %llu\n", + PFID(lu_object_fid(lov2lu(obj))), lio->lis_pos); - dump_lsm(D_ERROR, obj->lo_lsm); + dump_lsm(D_ERROR, obj->lo_lsm); - RETURN(-EIO); + RETURN(-EIO); + } } CDEBUG(D_VFSTRACE, DFID ": flr state: %d, move mirror from %d to %d, " @@ -444,6 +469,9 @@ static int lov_io_mirror_init(struct lov_io *lio, struct lov_object *obj, lio->lis_mirror_index = index; + if (OBD_FAIL_CHECK(OBD_FAIL_LOV_MIRROR_INIT)) + cfs_fail_val = index; + /* * FLR: if all mirrors have been tried once, most likely the network * of this client has been partitioned. We should relinquish CPU for diff --git a/lustre/lov/lov_object.c b/lustre/lov/lov_object.c index 3efe22b..5c4e62f 100644 --- a/lustre/lov/lov_object.c +++ b/lustre/lov/lov_object.c @@ -685,6 +685,8 @@ static int lov_init_composite(const struct lu_env *env, struct lov_device *dev, lle->lle_extent = &lle->lle_lsme->lsme_extent; lle->lle_valid = !(lle->lle_lsme->lsme_flags & LCME_FL_STALE); + lle->lle_has_compr = !!(lle->lle_lsme->lsme_pattern & + LOV_PATTERN_COMPRESS); if (flr_state != LCM_FL_NONE) mirror_id = mirror_id_of(lle->lle_lsme->lsme_id); @@ -696,6 +698,7 @@ static int lov_init_composite(const struct lu_env *env, struct lov_device *dev, lre->lre_stale |= !lle->lle_valid; lre->lre_foreign |= lsme_is_foreign(lle->lle_lsme); + lre->lre_has_compr |= lle->lle_has_compr; lre->lre_end = i; continue; } @@ -718,6 +721,7 @@ static int lov_init_composite(const struct lu_env *env, struct lov_device *dev, lre->lre_valid = lle->lle_valid; lre->lre_stale = !lle->lle_valid; lre->lre_foreign = lsme_is_foreign(lle->lle_lsme); + lre->lre_has_compr = lle->lle_has_compr; } /* sanity check for FLR */ @@ -778,6 +782,9 @@ static int lov_init_composite(const struct lu_env *env, struct lov_device *dev, lre->lre_preference += comp->lo_entries[j].lle_preference; } + /* uncompressed mirror adds extra preference value */ + if (!lre->lre_has_compr) + lre->lre_preference += (j - lre->lre_start); if (lre->lre_preference > preference) { preference = lre->lre_preference; diff --git a/lustre/tests/sanity-flr.sh b/lustre/tests/sanity-flr.sh index 15d9d01..917d879 100644 --- a/lustre/tests/sanity-flr.sh +++ b/lustre/tests/sanity-flr.sh @@ -2021,6 +2021,30 @@ test_43b() { } run_test 43b "allow writing to multiple preferred mirror file" +test_43c() { + (( $MDS1_VERSION >= $(version_code 2.14.0.88) )) || + skip "Need MDS >= 2.14.0.88 for compression support" + + local tf=$DIR/$tdir/$tfile + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + + save_lustre_params client "llite.*.enable_compression" > $p + stack_trap "rm -rf $DIR/$tdir; restore_lustre_params < $p" EXIT + $LCTL set_param llite.*.enable_compression=1 + + test_mkdir $DIR/$tdir + $LFS setstripe -N -Eeof -Z gzip -N -Eeof $tf || + error "setstripe $tf failed" + + #define OBD_FAIL_LOV_MIRROR_INIT 0x1425 + $LCTL set_param fail_loc=0x1425 fail_val=-1 + cat $tf || error "read $tf failed" + + local index=$($LCTL get_param fail_val -n) + ((index == 1)) || error "should read 2nd mirror instead of $((index+1))" +} +run_test 43c "read prefer uncompressed mirror" + test_44a() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return rm -rf $DIR/$tdir