Add a tunable lod.*.max_stripes_per_mdt for directory overstriping.
The default value is LMV_MAX_STRIPES_PER_MDT(5).
Add sanity 300uh 300ui.
Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Change-Id: Id8199f01f5e2d62ead6bf43d239eee8ec1e4cbb5
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54945
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Qian Yingjin <qian@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lod->lod_osd_max_easize = ddp.ddp_max_ea_size;
lod->lod_dom_stripesize_max_kb = (1ULL << 10); /* 1Mb is default */
lod->lod_max_stripecount = 0;
+ lod->lod_max_stripes_per_mdt = LMV_MAX_STRIPES_PER_MDT;
/* initialize local statfs cached values */
rc = lod_lsfs_init(env, lod);
/* max stripe count if stripe count is set to -1. 0 means unlimited */
unsigned int lod_max_stripecount;
unsigned int lod_max_mdt_stripecount;
+ /* valid range: [1, LMV_MAX_STRIPES_PER_MDT] */
+ unsigned int lod_max_stripes_per_mdt;
};
#define lod_ost_bitmap lod_ost_descs.ltd_tgt_bitmap
le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC_SPECIFIC);
stripe_count = lo->ldo_dir_stripe_count;
- if (!(lo->ldo_dir_hash_type & LMV_HASH_FLAG_OVERSTRIPED) &&
- stripe_count > mdt_count)
- RETURN(-E2BIG);
-
- if ((lo->ldo_dir_hash_type & LMV_HASH_FLAG_OVERSTRIPED) &&
- (stripe_count > mdt_count * LMV_MAX_STRIPES_PER_MDT ||
- /* a single MDT doesn't initialize the infrastructure for striped
- * directories, so we just don't support overstriping in that case
- */
- mdt_count == 1))
+ /* silently clear OVERSTRIPED flag on single MDT system */
+ if (mdt_count == 1)
+ lo->ldo_dir_hash_type &= ~LMV_HASH_FLAG_OVERSTRIPED;
+ if (lo->ldo_dir_hash_type & LMV_HASH_FLAG_OVERSTRIPED) {
+ /* silently clamp stripe count if MDTs are not specific */
+ if (stripe_count > mdt_count * lod->lod_max_stripes_per_mdt) {
+ if (le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC)
+ stripe_count = mdt_count *
+ lod->lod_max_stripes_per_mdt;
+ else
+ RETURN(-E2BIG);
+ }
+ /* clear OVERSTRIPED if not overstriped */
+ if (stripe_count <= mdt_count &&
+ le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC)
+ lo->ldo_dir_hash_type &= ~LMV_HASH_FLAG_OVERSTRIPED;
+ } else if (stripe_count > mdt_count) {
RETURN(-E2BIG);
+ }
OBD_ALLOC_PTR_ARRAY(stripes, stripe_count);
if (!stripes)
if (le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC_SPECIFIC) {
int stripes_per_mdt;
int mdt;
+ bool overstriped = false;
is_specific = true;
for (mdt = 0; mdt < mdt_count + 1; mdt++) {
stripes_per_mdt = 0;
for (i = 0; i < stripe_count; i++) {
- if (mdt == le32_to_cpu(
- lum->lum_objects[i].lum_mds))
+ if (mdt ==
+ le32_to_cpu(lum->lum_objects[i].lum_mds))
stripes_per_mdt++;
}
- if (stripes_per_mdt > LMV_MAX_STRIPES_PER_MDT)
+ if (stripes_per_mdt >
+ lod->lod_max_stripes_per_mdt)
GOTO(out_free, rc = -EINVAL);
+ if (stripes_per_mdt > 1)
+ overstriped = true;
}
+ if (!overstriped &&
+ (lo->ldo_dir_hash_type & LMV_HASH_FLAG_OVERSTRIPED))
+ lo->ldo_dir_hash_type &=
+ ~LMV_HASH_FLAG_OVERSTRIPED;
+ else if (overstriped &&
+ !(lo->ldo_dir_hash_type &
+ LMV_HASH_FLAG_OVERSTRIPED))
+ GOTO(out_free, rc = -EINVAL);
for (i = 0; i < stripe_count; i++)
idx_array[i] =
max_stripe_count = d->lod_remote_mdt_count + 1;
if (lc->ldo_dir_hash_type & LMV_HASH_FLAG_OVERSTRIPED)
- max_stripe_count =
- max_stripe_count * LMV_MAX_STRIPES_PER_MDT;
+ max_stripe_count *= d->lod_max_stripes_per_mdt;
/* shrink the stripe_count to max stripe count */
if (lc->ldo_dir_stripe_count > max_stripe_count &&
saved_count = lo->ldo_dir_stripes_allocated;
stripe_count = le32_to_cpu(lum->lum_stripe_count);
- if (stripe_count <= saved_count)
- RETURN(-EINVAL);
/* if the split target is overstriped, we need to put that flag in the
* current layout so it can allocate the larger number of stripes
* rather than after when we finalize directory setup (at the end of
* this function).
*/
- if (le32_to_cpu(lum->lum_hash_type) & LMV_HASH_FLAG_OVERSTRIPED)
- lo->ldo_dir_hash_type |= LMV_HASH_FLAG_OVERSTRIPED;
-
- if (!(lo->ldo_dir_hash_type & LMV_HASH_FLAG_OVERSTRIPED) &&
- stripe_count > mdt_count) {
- RETURN(-E2BIG);
- } else if ((lo->ldo_dir_hash_type & LMV_HASH_FLAG_OVERSTRIPED) &&
- (stripe_count > mdt_count * LMV_MAX_STRIPES_PER_MDT ||
- /* a single MDT doesn't initialize the infrastructure for striped
- * directories, so we just don't support overstriping in that case
- */
- mdt_count == 1)) {
+ if (le32_to_cpu(lum->lum_hash_type) & LMV_HASH_FLAG_OVERSTRIPED) {
+ /* silently clamp stripe count if it exceeds limit */
+ if (stripe_count > mdt_count * lod->lod_max_stripes_per_mdt)
+ stripe_count = mdt_count * lod->lod_max_stripes_per_mdt;
+ if (stripe_count > mdt_count)
+ lo->ldo_dir_hash_type |= LMV_HASH_FLAG_OVERSTRIPED;
+ } else if (stripe_count > mdt_count) {
RETURN(-E2BIG);
}
+ if (stripe_count <= saved_count)
+ RETURN(-EINVAL);
+
dof->dof_type = DFT_DIR;
OBD_ALLOC(stripes, sizeof(*stripes) * stripe_count);
LUSTRE_RW_ATTR(max_mdt_stripecount);
+static ssize_t max_stripes_per_mdt_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct dt_device *dt = container_of(kobj, struct dt_device, dd_kobj);
+ struct lod_device *lod = dt2lod_dev(dt);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n", lod->lod_max_stripes_per_mdt);
+}
+
+static ssize_t max_stripes_per_mdt_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct dt_device *dt = container_of(kobj, struct dt_device, dd_kobj);
+ struct lod_device *lod = dt2lod_dev(dt);
+ long val;
+ int rc;
+
+ rc = kstrtol(buffer, 0, &val);
+ if (rc)
+ return rc;
+
+ if (val < 1 || val > LMV_MAX_STRIPES_PER_MDT) /* any limitation? */
+ return -ERANGE;
+
+ lod->lod_max_stripes_per_mdt = val;
+
+ return count;
+}
+
+LUSTRE_RW_ATTR(max_stripes_per_mdt);
+
/**
* Show default striping pattern (LOV_PATTERN_*).
&lustre_attr_stripecount.attr,
&lustre_attr_max_stripecount.attr,
&lustre_attr_max_mdt_stripecount.attr,
+ &lustre_attr_max_stripes_per_mdt.attr,
&lustre_attr_stripetype.attr,
&lustre_attr_activeobd.attr,
&lustre_attr_desc_uuid.attr,
}
run_test 300t "test max_mdt_stripecount"
-MDT_OVSTRP_VER="2.15.60"
+mdts=$(comma_list $(mdts_nodes))
+max_stripes_per_mdt=$(do_facet mds1 $LCTL get_param -n \
+ lod.$FSNAME-MDT0000-mdtlov.max_stripes_per_mdt || echo 0)
+((max_stripes_per_mdt == 0)) ||
+ do_nodes $mdts $LCTL set_param -n \
+ lod.$FSNAME-MDT*.max_stripes_per_mdt=$LMV_MAX_STRIPES_PER_MDT
# 300u family tests MDT overstriping
test_300ua() {
(( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs"
# This does a basic interop test - if the MDS does not support mdt
# overstriping, we should get stripes == number of MDTs
- if (( $MDS1_VERSION < $(version_code $MDT_OVSTRP_VER) )); then
+ if (( max_stripes_per_mdt == 0 )); then
expected_count=$MDSCOUNT
else
expected_count=$setcount
error "(2) unable to rm overstriped dir"
# Tests after this require overstriping support
- (( MDS1_VERSION >= $(version_code $MDT_OVSTRP_VER) )) ||
- { echo "skipped for MDS < $MDT_OVSTRP_VER"; return 0; }
+ (( max_stripes_per_mdt > 0 )) || return 0
test_striped_dir 0 $setcount true ||
error "(3)failed on overstriped dir"
run_test 300ua "basic overstriped dir sanity test"
test_300ub() {
- (( MDS1_VERSION >= $(version_code $MDT_OVSTRP_VER) )) ||
- skip "skipped for MDS < $MDT_OVSTRP_VER"
+ (( max_stripes_per_mdt > 0 )) ||
+ skip "skipped for MDS that doesn't support metadata overstripe"
(( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs"
mkdir $DIR/$tdir
run_test 300ub "test MDT overstriping interface & limits"
test_300uc() {
- (( MDS1_VERSION >= $(version_code $MDT_OVSTRP_VER) )) ||
- skip "skipped for MDS < $MDT_OVSTRP_VER"
+ (( max_stripes_per_mdt > 0 )) ||
+ skip "skipped for MDS that doesn't support metadata overstripe"
(( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs"
mkdir $DIR/$tdir
run_test 300uc "test MDT overstriping as default & inheritance"
test_300ud() {
- (( MDS1_VERSION >= $(version_code $MDT_OVSTRP_VER) )) ||
- skip "skipped for MDS < $MDT_OVSTRP_VER"
+ (( max_stripes_per_mdt > 0 )) ||
+ skip "skipped for MDS that doesn't support metadata overstripe"
(( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs"
local mdts=$(comma_list $(mdts_nodes))
run_test 300ud "dir split"
test_300ue() {
- (( MDS1_VERSION >= $(version_code $MDT_OVSTRP_VER) )) ||
- skip "skipped for MDS < $MDT_OVSTRP_VER"
+ (( max_stripes_per_mdt > 0 )) ||
+ skip "skipped for MDS that doesn't support metadata overstripe"
(( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs"
local mdts=$(comma_list $(mdts_nodes))
run_test 300ue "dir merge"
test_300uf() {
- (( MDS1_VERSION >= $(version_code $MDT_OVSTRP_VER) )) ||
- skip "skipped for MDS < $MDT_OVSTRP_VER"
+ (( max_stripes_per_mdt > 0 )) ||
+ skip "skipped for MDS that doesn't support metadata overstripe"
(( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs"
# maximum amount of local locks:
run_test 300uf "migrate with too many local locks"
test_300ug() {
- (( MDS1_VERSION >= $(version_code $MDT_OVSTRP_VER) )) ||
- skip "skipped for MDS < $MDT_OVSTRP_VER"
+ (( max_stripes_per_mdt > 0 )) ||
+ skip "skipped for MDS that doesn't support metadata overstripe"
(( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs"
mkdir -p $DIR/$tdir
}
run_test 300ug "migrate overstriped dirs"
+test_300uh() {
+ (( max_stripes_per_mdt > 0 )) ||
+ skip "skipped for MDS that doesn't support metadata overstripe"
+ (( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs"
+
+ local mdts=$(comma_list $(mdts_nodes))
+ local val=$(do_facet mds1 $LCTL get_param -n \
+ lod.$FSNAME-MDT0000-mdtlov.max_stripes_per_mdt)
+
+ stack_trap "do_nodes $mdts $LCTL set_param -n \
+ lod.$FSNAME-MDT*.max_stripes_per_mdt=$val"
+
+ # create 5 stripes will be silently clamped down
+ do_nodes $mdts $LCTL set_param -n \
+ lod.$FSNAME-MDT*.max_stripes_per_mdt=2
+ $LFS setdirstripe -C $((MDSCOUNT * 5)) $DIR/$tdir ||
+ error "mkdir $tdir failed"
+ $LFS getdirstripe -H $DIR/$tdir | grep overstriped ||
+ error "overstriped flag not found"
+
+ local count=$($LFS getdirstripe -c $DIR/$tdir)
+
+ ((count == 2 * MDSCOUNT)) || error "count $count != $((2 * MDSCOUNT))"
+
+ # create 3 stripes on MDT0 should fail
+ $LFS setdirstripe -i 0,0,0 $DIR/$tdir/sub && error "mkdir sub succeeded"
+
+ # OVERSTRIPED flag will be cleared if not really overstriped
+ $LFS setdirstripe -C $MDSCOUNT $DIR/$tdir/sub ||
+ error "mkdir sub failed"
+ $LFS getdirstripe -H $DIR/$tdir/sub | grep -v overstriped ||
+ error "overstriped flag found"
+}
+run_test 300uh "overstripe tunable max_stripes_per_mdt"
+
+test_300ui() {
+ (( max_stripes_per_mdt > 0 )) ||
+ skip "skipped for MDS that doesn't support metadata overstripe"
+ (( MDSCOUNT == 1 )) || skip "1 MDT only"
+
+ $LFS setdirstripe -C 2 $DIR/$tdir && error "mkdir $tdir succeeded" ||
+ true
+}
+run_test 300ui "overstripe is not supported on one MDT system"
+
+(( max_stripes_per_mdt == 0 )) ||
+ do_nodes $mdts $LCTL set_param -n \
+ lod.$FSNAME-MDT*.max_stripes_per_mdt=$max_stripes_per_mdt
+
prepare_remote_file() {
mkdir $DIR/$tdir/src_dir ||
error "create remote source failed"