From 70539f31f95f6116335ed4a4c5a278f436e37d80 Mon Sep 17 00:00:00 2001 From: Lai Siyao Date: Thu, 25 Apr 2024 04:15:49 -0400 Subject: [PATCH] LU-17756 lod: add tunable lod.*.max_stripes_per_mdt Add a tunable lod.*.max_stripes_per_mdt for directory overstriping. The default value is LMV_MAX_STRIPES_PER_MDT(5). Add sanity 300uh 300ui. Signed-off-by: Lai Siyao Change-Id: Id8199f01f5e2d62ead6bf43d239eee8ec1e4cbb5 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54945 Reviewed-by: Andreas Dilger Reviewed-by: Qian Yingjin Reviewed-by: Oleg Drokin Tested-by: jenkins Tested-by: Maloo --- lustre/lod/lod_dev.c | 1 + lustre/lod/lod_internal.h | 2 ++ lustre/lod/lod_object.c | 73 ++++++++++++++++++++++++---------------- lustre/lod/lproc_lod.c | 33 ++++++++++++++++++ lustre/tests/sanity.sh | 85 ++++++++++++++++++++++++++++++++++++++--------- 5 files changed, 149 insertions(+), 45 deletions(-) diff --git a/lustre/lod/lod_dev.c b/lustre/lod/lod_dev.c index 5354cfa..aa8f144 100644 --- a/lustre/lod/lod_dev.c +++ b/lustre/lod/lod_dev.c @@ -2112,6 +2112,7 @@ static int lod_init0(const struct lu_env *env, struct lod_device *lod, lod->lod_osd_max_easize = ddp.ddp_max_ea_size; lod->lod_dom_stripesize_max_kb = (1ULL << 10); /* 1Mb is default */ lod->lod_max_stripecount = 0; + lod->lod_max_stripes_per_mdt = LMV_MAX_STRIPES_PER_MDT; /* initialize local statfs cached values */ rc = lod_lsfs_init(env, lod); diff --git a/lustre/lod/lod_internal.h b/lustre/lod/lod_internal.h index 81ba892..09e3358 100644 --- a/lustre/lod/lod_internal.h +++ b/lustre/lod/lod_internal.h @@ -166,6 +166,8 @@ struct lod_device { /* max stripe count if stripe count is set to -1. 0 means unlimited */ unsigned int lod_max_stripecount; unsigned int lod_max_mdt_stripecount; + /* valid range: [1, LMV_MAX_STRIPES_PER_MDT] */ + unsigned int lod_max_stripes_per_mdt; }; #define lod_ost_bitmap lod_ost_descs.ltd_tgt_bitmap diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index c0a7959..03e9a92 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -2185,17 +2185,25 @@ static int lod_prep_md_striped_create(const struct lu_env *env, le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC_SPECIFIC); stripe_count = lo->ldo_dir_stripe_count; - if (!(lo->ldo_dir_hash_type & LMV_HASH_FLAG_OVERSTRIPED) && - stripe_count > mdt_count) - RETURN(-E2BIG); - - if ((lo->ldo_dir_hash_type & LMV_HASH_FLAG_OVERSTRIPED) && - (stripe_count > mdt_count * LMV_MAX_STRIPES_PER_MDT || - /* a single MDT doesn't initialize the infrastructure for striped - * directories, so we just don't support overstriping in that case - */ - mdt_count == 1)) + /* silently clear OVERSTRIPED flag on single MDT system */ + if (mdt_count == 1) + lo->ldo_dir_hash_type &= ~LMV_HASH_FLAG_OVERSTRIPED; + if (lo->ldo_dir_hash_type & LMV_HASH_FLAG_OVERSTRIPED) { + /* silently clamp stripe count if MDTs are not specific */ + if (stripe_count > mdt_count * lod->lod_max_stripes_per_mdt) { + if (le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC) + stripe_count = mdt_count * + lod->lod_max_stripes_per_mdt; + else + RETURN(-E2BIG); + } + /* clear OVERSTRIPED if not overstriped */ + if (stripe_count <= mdt_count && + le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC) + lo->ldo_dir_hash_type &= ~LMV_HASH_FLAG_OVERSTRIPED; + } else if (stripe_count > mdt_count) { RETURN(-E2BIG); + } OBD_ALLOC_PTR_ARRAY(stripes, stripe_count); if (!stripes) @@ -2228,6 +2236,7 @@ static int lod_prep_md_striped_create(const struct lu_env *env, if (le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC_SPECIFIC) { int stripes_per_mdt; int mdt; + bool overstriped = false; is_specific = true; @@ -2235,13 +2244,24 @@ static int lod_prep_md_striped_create(const struct lu_env *env, for (mdt = 0; mdt < mdt_count + 1; mdt++) { stripes_per_mdt = 0; for (i = 0; i < stripe_count; i++) { - if (mdt == le32_to_cpu( - lum->lum_objects[i].lum_mds)) + if (mdt == + le32_to_cpu(lum->lum_objects[i].lum_mds)) stripes_per_mdt++; } - if (stripes_per_mdt > LMV_MAX_STRIPES_PER_MDT) + if (stripes_per_mdt > + lod->lod_max_stripes_per_mdt) GOTO(out_free, rc = -EINVAL); + if (stripes_per_mdt > 1) + overstriped = true; } + if (!overstriped && + (lo->ldo_dir_hash_type & LMV_HASH_FLAG_OVERSTRIPED)) + lo->ldo_dir_hash_type &= + ~LMV_HASH_FLAG_OVERSTRIPED; + else if (overstriped && + !(lo->ldo_dir_hash_type & + LMV_HASH_FLAG_OVERSTRIPED)) + GOTO(out_free, rc = -EINVAL); for (i = 0; i < stripe_count; i++) idx_array[i] = @@ -5939,8 +5959,7 @@ static void lod_ah_init(const struct lu_env *env, max_stripe_count = d->lod_remote_mdt_count + 1; if (lc->ldo_dir_hash_type & LMV_HASH_FLAG_OVERSTRIPED) - max_stripe_count = - max_stripe_count * LMV_MAX_STRIPES_PER_MDT; + max_stripe_count *= d->lod_max_stripes_per_mdt; /* shrink the stripe_count to max stripe count */ if (lc->ldo_dir_stripe_count > max_stripe_count && @@ -9031,8 +9050,6 @@ static int lod_dir_declare_layout_split(const struct lu_env *env, saved_count = lo->ldo_dir_stripes_allocated; stripe_count = le32_to_cpu(lum->lum_stripe_count); - if (stripe_count <= saved_count) - RETURN(-EINVAL); /* if the split target is overstriped, we need to put that flag in the * current layout so it can allocate the larger number of stripes @@ -9042,21 +9059,19 @@ static int lod_dir_declare_layout_split(const struct lu_env *env, * rather than after when we finalize directory setup (at the end of * this function). */ - if (le32_to_cpu(lum->lum_hash_type) & LMV_HASH_FLAG_OVERSTRIPED) - lo->ldo_dir_hash_type |= LMV_HASH_FLAG_OVERSTRIPED; - - if (!(lo->ldo_dir_hash_type & LMV_HASH_FLAG_OVERSTRIPED) && - stripe_count > mdt_count) { - RETURN(-E2BIG); - } else if ((lo->ldo_dir_hash_type & LMV_HASH_FLAG_OVERSTRIPED) && - (stripe_count > mdt_count * LMV_MAX_STRIPES_PER_MDT || - /* a single MDT doesn't initialize the infrastructure for striped - * directories, so we just don't support overstriping in that case - */ - mdt_count == 1)) { + if (le32_to_cpu(lum->lum_hash_type) & LMV_HASH_FLAG_OVERSTRIPED) { + /* silently clamp stripe count if it exceeds limit */ + if (stripe_count > mdt_count * lod->lod_max_stripes_per_mdt) + stripe_count = mdt_count * lod->lod_max_stripes_per_mdt; + if (stripe_count > mdt_count) + lo->ldo_dir_hash_type |= LMV_HASH_FLAG_OVERSTRIPED; + } else if (stripe_count > mdt_count) { RETURN(-E2BIG); } + if (stripe_count <= saved_count) + RETURN(-EINVAL); + dof->dof_type = DFT_DIR; OBD_ALLOC(stripes, sizeof(*stripes) * stripe_count); diff --git a/lustre/lod/lproc_lod.c b/lustre/lod/lproc_lod.c index f4a6a9e..d0dfcaa 100644 --- a/lustre/lod/lproc_lod.c +++ b/lustre/lod/lproc_lod.c @@ -345,6 +345,38 @@ static ssize_t max_mdt_stripecount_store(struct kobject *kobj, LUSTRE_RW_ATTR(max_mdt_stripecount); +static ssize_t max_stripes_per_mdt_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct dt_device *dt = container_of(kobj, struct dt_device, dd_kobj); + struct lod_device *lod = dt2lod_dev(dt); + + return scnprintf(buf, PAGE_SIZE, "%u\n", lod->lod_max_stripes_per_mdt); +} + +static ssize_t max_stripes_per_mdt_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct dt_device *dt = container_of(kobj, struct dt_device, dd_kobj); + struct lod_device *lod = dt2lod_dev(dt); + long val; + int rc; + + rc = kstrtol(buffer, 0, &val); + if (rc) + return rc; + + if (val < 1 || val > LMV_MAX_STRIPES_PER_MDT) /* any limitation? */ + return -ERANGE; + + lod->lod_max_stripes_per_mdt = val; + + return count; +} + +LUSTRE_RW_ATTR(max_stripes_per_mdt); + /** * Show default striping pattern (LOV_PATTERN_*). @@ -1386,6 +1418,7 @@ static struct attribute *lod_attrs[] = { &lustre_attr_stripecount.attr, &lustre_attr_max_stripecount.attr, &lustre_attr_max_mdt_stripecount.attr, + &lustre_attr_max_stripes_per_mdt.attr, &lustre_attr_stripetype.attr, &lustre_attr_activeobd.attr, &lustre_attr_desc_uuid.attr, diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 5cf8fe3..5e6345d 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -26923,7 +26923,12 @@ test_300t() { } run_test 300t "test max_mdt_stripecount" -MDT_OVSTRP_VER="2.15.60" +mdts=$(comma_list $(mdts_nodes)) +max_stripes_per_mdt=$(do_facet mds1 $LCTL get_param -n \ + lod.$FSNAME-MDT0000-mdtlov.max_stripes_per_mdt || echo 0) +((max_stripes_per_mdt == 0)) || + do_nodes $mdts $LCTL set_param -n \ + lod.$FSNAME-MDT*.max_stripes_per_mdt=$LMV_MAX_STRIPES_PER_MDT # 300u family tests MDT overstriping test_300ua() { (( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs" @@ -26939,7 +26944,7 @@ test_300ua() { # This does a basic interop test - if the MDS does not support mdt # overstriping, we should get stripes == number of MDTs - if (( $MDS1_VERSION < $(version_code $MDT_OVSTRP_VER) )); then + if (( max_stripes_per_mdt == 0 )); then expected_count=$MDSCOUNT else expected_count=$setcount @@ -26951,8 +26956,7 @@ test_300ua() { error "(2) unable to rm overstriped dir" # Tests after this require overstriping support - (( MDS1_VERSION >= $(version_code $MDT_OVSTRP_VER) )) || - { echo "skipped for MDS < $MDT_OVSTRP_VER"; return 0; } + (( max_stripes_per_mdt > 0 )) || return 0 test_striped_dir 0 $setcount true || error "(3)failed on overstriped dir" @@ -26967,8 +26971,8 @@ test_300ua() { run_test 300ua "basic overstriped dir sanity test" test_300ub() { - (( MDS1_VERSION >= $(version_code $MDT_OVSTRP_VER) )) || - skip "skipped for MDS < $MDT_OVSTRP_VER" + (( max_stripes_per_mdt > 0 )) || + skip "skipped for MDS that doesn't support metadata overstripe" (( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs" mkdir $DIR/$tdir @@ -27036,8 +27040,8 @@ test_300ub() { run_test 300ub "test MDT overstriping interface & limits" test_300uc() { - (( MDS1_VERSION >= $(version_code $MDT_OVSTRP_VER) )) || - skip "skipped for MDS < $MDT_OVSTRP_VER" + (( max_stripes_per_mdt > 0 )) || + skip "skipped for MDS that doesn't support metadata overstripe" (( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs" mkdir $DIR/$tdir @@ -27064,8 +27068,8 @@ test_300uc() { run_test 300uc "test MDT overstriping as default & inheritance" test_300ud() { - (( MDS1_VERSION >= $(version_code $MDT_OVSTRP_VER) )) || - skip "skipped for MDS < $MDT_OVSTRP_VER" + (( max_stripes_per_mdt > 0 )) || + skip "skipped for MDS that doesn't support metadata overstripe" (( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs" local mdts=$(comma_list $(mdts_nodes)) @@ -27113,8 +27117,8 @@ test_300ud() { run_test 300ud "dir split" test_300ue() { - (( MDS1_VERSION >= $(version_code $MDT_OVSTRP_VER) )) || - skip "skipped for MDS < $MDT_OVSTRP_VER" + (( max_stripes_per_mdt > 0 )) || + skip "skipped for MDS that doesn't support metadata overstripe" (( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs" local mdts=$(comma_list $(mdts_nodes)) @@ -27161,8 +27165,8 @@ test_300ue() { run_test 300ue "dir merge" test_300uf() { - (( MDS1_VERSION >= $(version_code $MDT_OVSTRP_VER) )) || - skip "skipped for MDS < $MDT_OVSTRP_VER" + (( max_stripes_per_mdt > 0 )) || + skip "skipped for MDS that doesn't support metadata overstripe" (( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs" # maximum amount of local locks: @@ -27207,8 +27211,8 @@ test_300uf() { run_test 300uf "migrate with too many local locks" test_300ug() { - (( MDS1_VERSION >= $(version_code $MDT_OVSTRP_VER) )) || - skip "skipped for MDS < $MDT_OVSTRP_VER" + (( max_stripes_per_mdt > 0 )) || + skip "skipped for MDS that doesn't support metadata overstripe" (( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs" mkdir -p $DIR/$tdir @@ -27239,6 +27243,55 @@ test_300ug() { } run_test 300ug "migrate overstriped dirs" +test_300uh() { + (( max_stripes_per_mdt > 0 )) || + skip "skipped for MDS that doesn't support metadata overstripe" + (( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs" + + local mdts=$(comma_list $(mdts_nodes)) + local val=$(do_facet mds1 $LCTL get_param -n \ + lod.$FSNAME-MDT0000-mdtlov.max_stripes_per_mdt) + + stack_trap "do_nodes $mdts $LCTL set_param -n \ + lod.$FSNAME-MDT*.max_stripes_per_mdt=$val" + + # create 5 stripes will be silently clamped down + do_nodes $mdts $LCTL set_param -n \ + lod.$FSNAME-MDT*.max_stripes_per_mdt=2 + $LFS setdirstripe -C $((MDSCOUNT * 5)) $DIR/$tdir || + error "mkdir $tdir failed" + $LFS getdirstripe -H $DIR/$tdir | grep overstriped || + error "overstriped flag not found" + + local count=$($LFS getdirstripe -c $DIR/$tdir) + + ((count == 2 * MDSCOUNT)) || error "count $count != $((2 * MDSCOUNT))" + + # create 3 stripes on MDT0 should fail + $LFS setdirstripe -i 0,0,0 $DIR/$tdir/sub && error "mkdir sub succeeded" + + # OVERSTRIPED flag will be cleared if not really overstriped + $LFS setdirstripe -C $MDSCOUNT $DIR/$tdir/sub || + error "mkdir sub failed" + $LFS getdirstripe -H $DIR/$tdir/sub | grep -v overstriped || + error "overstriped flag found" +} +run_test 300uh "overstripe tunable max_stripes_per_mdt" + +test_300ui() { + (( max_stripes_per_mdt > 0 )) || + skip "skipped for MDS that doesn't support metadata overstripe" + (( MDSCOUNT == 1 )) || skip "1 MDT only" + + $LFS setdirstripe -C 2 $DIR/$tdir && error "mkdir $tdir succeeded" || + true +} +run_test 300ui "overstripe is not supported on one MDT system" + +(( max_stripes_per_mdt == 0 )) || + do_nodes $mdts $LCTL set_param -n \ + lod.$FSNAME-MDT*.max_stripes_per_mdt=$max_stripes_per_mdt + prepare_remote_file() { mkdir $DIR/$tdir/src_dir || error "create remote source failed" -- 1.8.3.1