From 4c2514f4832801374092f3a48c755248af345566 Mon Sep 17 00:00:00 2001 From: Lai Siyao Date: Thu, 29 Apr 2021 05:30:00 +0800 Subject: [PATCH] LU-14459 mdt: support fixed directory layout User may not want directories split automatically in some cases: *.directory migrated. * directory restriped. To support this, an LMV flag LMV_HASH_FLAG_FIXED is added, and it will be set on migrated/restriped directories. NB, if directory is migrated or restriped to a one-stripe directory, it won't be transformed into a plain directory, because this flag needs to be kept. Update sanity 230q. Signed-off-by: Lai Siyao Change-Id: Icd12b2aa34d391e32c3323a8b9c24449ea3e3d0e Reviewed-on: https://review.whamcloud.com/43291 Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Hongchao Zhang Reviewed-by: Oleg Drokin --- lustre/include/lustre_lmv.h | 5 ++++ lustre/include/uapi/linux/lustre/lustre_user.h | 5 ++++ lustre/lod/lod_object.c | 24 +++++---------- lustre/mdd/mdd_dir.c | 13 ++++---- lustre/mdt/mdt_handler.c | 5 +++- lustre/mdt/mdt_lib.c | 8 +++-- lustre/mdt/mdt_reint.c | 4 +++ lustre/mdt/mdt_restripe.c | 5 +++- lustre/mdt/mdt_xattr.c | 2 +- lustre/ptlrpc/wiretest.c | 2 ++ lustre/tests/sanity.sh | 41 +++++++++++++++++--------- lustre/utils/liblustreapi.c | 5 ++++ lustre/utils/wirecheck.c | 1 + lustre/utils/wiretest.c | 1 + 14 files changed, 80 insertions(+), 41 deletions(-) diff --git a/lustre/include/lustre_lmv.h b/lustre/include/lustre_lmv.h index 6f302b3..abb89a3 100644 --- a/lustre/include/lustre_lmv.h +++ b/lustre/include/lustre_lmv.h @@ -504,4 +504,9 @@ static inline bool lmv_is_layout_changing(const struct lmv_mds_md_v1 *lmv) lmv_hash_is_migrating(cpu_to_le32(lmv->lmv_hash_type)); } +static inline bool lmv_is_fixed(const struct lmv_mds_md_v1 *lmv) +{ + return cpu_to_le32(lmv->lmv_hash_type) & LMV_HASH_FLAG_FIXED; +} + #endif diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index 495335b..7fa3564 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -1020,6 +1020,9 @@ static inline bool lmv_is_known_hash_type(__u32 type) (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_CRUSH; } +/* fixed layout, such directories won't split automatically */ +/* NB, update LMV_HASH_FLAG_KNOWN when adding new flag */ +#define LMV_HASH_FLAG_FIXED 0x02000000 #define LMV_HASH_FLAG_MERGE 0x04000000 #define LMV_HASH_FLAG_SPLIT 0x08000000 @@ -1034,6 +1037,8 @@ static inline bool lmv_is_known_hash_type(__u32 type) #define LMV_HASH_FLAG_LAYOUT_CHANGE \ (LMV_HASH_FLAG_MIGRATION | LMV_HASH_FLAG_SPLIT | LMV_HASH_FLAG_MERGE) +#define LMV_HASH_FLAG_KNOWN 0xfe000000 + /* both SPLIT and MIGRATION are set for directory split */ static inline bool lmv_hash_is_splitting(__u32 hash) { diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index c1f4c32..e2dd864 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -2413,9 +2413,11 @@ static int lod_dir_layout_set(const struct lu_env *env, RETURN(-EINVAL); /* adjust hash for dir merge, which may not be set in user command */ - if (lmv_is_merging(lmv) && !lmv->lmv_migrate_hash) - lmv->lmv_merge_hash = - lod->lod_mdt_descs.ltd_lmv_desc.ld_pattern; + if (lmv_is_merging(lmv) && + !(lmv->lmv_migrate_hash & LMV_HASH_TYPE_MASK)) + lmv->lmv_merge_hash |= + lod->lod_mdt_descs.ltd_lmv_desc.ld_pattern & + LMV_HASH_TYPE_MASK; LMV_DEBUG(D_INFO, lmv, "set"); @@ -8266,9 +8268,9 @@ static int lod_dir_declare_layout_shrink(const struct lu_env *env, struct lod_object *lo = lod_dt_obj(dt); struct dt_object *next = dt_object_child(dt); struct lmv_user_md *lmu = mlc->mlc_buf.lb_buf; - __u32 final_stripe_count; char *stripe_name = info->lti_key; struct lu_buf *lmv_buf = &info->lti_buf; + __u32 final_stripe_count; struct dt_object *dto; int i; int rc; @@ -8294,9 +8296,6 @@ static int lod_dir_declare_layout_shrink(const struct lu_env *env, continue; if (i < final_stripe_count) { - if (final_stripe_count == 1) - continue; - rc = lod_sub_declare_xattr_set(env, dto, lmv_buf, XATTR_NAME_LMV, LU_XATTR_REPLACE, th); @@ -8520,7 +8519,8 @@ static int lod_dir_layout_shrink(const struct lu_env *env, lmv->lmv_magic = cpu_to_le32(LMV_MAGIC_STRIPE); lmv->lmv_stripe_count = cpu_to_le32(final_stripe_count); lmv->lmv_hash_type = cpu_to_le32(lo->ldo_dir_hash_type) & - cpu_to_le32(LMV_HASH_TYPE_MASK); + cpu_to_le32(LMV_HASH_TYPE_MASK | + LMV_HASH_FLAG_FIXED); lmv->lmv_layout_version = cpu_to_le32(lo->ldo_dir_layout_version + 1); lmv->lmv_migrate_offset = 0; @@ -8532,14 +8532,6 @@ static int lod_dir_layout_shrink(const struct lu_env *env, continue; if (i < final_stripe_count) { - /* if only one stripe left, no need to update - * LMV because this stripe will replace master - * object and act as a plain directory. - */ - if (final_stripe_count == 1) - continue; - - rc = lod_fld_lookup(env, lod, lu_object_fid(&dto->do_lu), &mdtidx, &type); diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index 451fd1d..5a0d54f 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -4100,7 +4100,10 @@ static int mdd_migrate_cmd_check(struct mdd_device *mdd, const struct lu_name *lname) { __u32 lum_stripe_count = lum->lum_stripe_count; - __u32 lmv_hash_type = lmv->lmv_hash_type; + __u32 lum_hash_type = lum->lum_hash_type & + cpu_to_le32(LMV_HASH_TYPE_MASK); + __u32 lmv_hash_type = lmv->lmv_hash_type & + cpu_to_le32(LMV_HASH_TYPE_MASK); if (!lmv_is_sane(lmv)) return -EBADF; @@ -4109,12 +4112,10 @@ static int mdd_migrate_cmd_check(struct mdd_device *mdd, if (!lum_stripe_count) lum_stripe_count = cpu_to_le32(1); - lmv_hash_type &= ~cpu_to_le32(LMV_HASH_FLAG_MIGRATION); - /* TODO: check specific MDTs */ if (lum_stripe_count != lmv->lmv_migrate_offset || lum->lum_stripe_offset != lmv->lmv_master_mdt_index || - (lum->lum_hash_type && lum->lum_hash_type != lmv_hash_type)) { + (lum_hash_type && lum_hash_type != lmv_hash_type)) { CERROR("%s: '"DNAME"' migration was interrupted, run 'lfs migrate -m %d -c %d -H %s "DNAME"' to finish migration.\n", mdd2obd_dev(mdd)->obd_name, PNAME(lname), le32_to_cpu(lmv->lmv_master_mdt_index), @@ -4647,7 +4648,7 @@ int mdd_dir_layout_shrink(const struct lu_env *env, if (rc) GOTO(stop_trans, rc); - if (le32_to_cpu(lmu->lum_stripe_count) == 1) { + if (le32_to_cpu(lmu->lum_stripe_count) == 1 && !lmv_is_fixed(lmv)) { rc = mdd_declare_1sd_collapse(env, pobj, obj, stripe, attr, mlc, &lname, handle); if (rc) @@ -4670,7 +4671,7 @@ int mdd_dir_layout_shrink(const struct lu_env *env, if (rc) GOTO(stop_trans, rc); - if (le32_to_cpu(lmu->lum_stripe_count) == 1) { + if (le32_to_cpu(lmu->lum_stripe_count) == 1 && !lmv_is_fixed(lmv)) { rc = mdd_1sd_collapse(env, pobj, obj, stripe, attr, mlc, &lname, handle); if (rc) diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index 2f5d16b..6455708 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -1419,6 +1419,7 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, if (mdt_body_has_lov(la, reqbody)) { u32 stripe_count = 1; + bool fixed_layout = false; if (ma->ma_valid & MA_LOV) { LASSERT(ma->ma_lmm_size); @@ -1443,6 +1444,7 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, repbody->mbo_valid |= (OBD_MD_FLDIREA|OBD_MD_MEA); stripe_count = le32_to_cpu(lmv->lmv_stripe_count); + fixed_layout = lmv_is_fixed(lmv); if (magic == LMV_MAGIC_STRIPE && lmv_is_restriping(lmv)) mdt_restripe_migrate_add(info, o); else if (magic == LMV_MAGIC_V1 && @@ -1474,7 +1476,8 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, !fid_is_root(mdt_object_fid(o)) && mdt->mdt_enable_dir_auto_split && !o->mot_restriping && - stripe_count < atomic_read(&mdt->mdt_mds_mds_conns) + 1) + stripe_count < atomic_read(&mdt->mdt_mds_mds_conns) + 1 && + !fixed_layout) mdt_auto_split_add(info, o); } else if (S_ISLNK(la->la_mode) && reqbody->mbo_valid & OBD_MD_LINKNAME) { diff --git a/lustre/mdt/mdt_lib.c b/lustre/mdt/mdt_lib.c index ca86fcb..f88bce3 100644 --- a/lustre/mdt/mdt_lib.c +++ b/lustre/mdt/mdt_lib.c @@ -1559,8 +1559,12 @@ static int mdt_migrate_unpack(struct mdt_thread_info *info) RCL_CLIENT); if (rr->rr_eadatalen > 0) { - rr->rr_eadata = req_capsule_client_get(pill, - &RMF_EADATA); + struct lmv_user_md_v1 *lmu; + + lmu = req_capsule_client_get(pill, &RMF_EADATA); + lmu->lum_hash_type |= + cpu_to_le32(LMV_HASH_FLAG_FIXED); + rr->rr_eadata = lmu; spec->u.sp_ea.eadatalen = rr->rr_eadatalen; spec->u.sp_ea.eadata = rr->rr_eadata; spec->sp_cr_flags |= MDS_OPEN_HAS_EA; diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index 78fcdbd..6a90950 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -352,6 +352,7 @@ static int mdt_restripe(struct mdt_thread_info *info, struct mdt_device *mdt = info->mti_mdt; struct lu_fid *fid = &info->mti_tmp_fid2; struct ldlm_enqueue_info *einfo = &info->mti_einfo[0]; + struct lmv_user_md *lum = spec->u.sp_ea.eadata; struct lmv_mds_md_v1 *lmv; struct mdt_object *child; struct mdt_lock_handle *lhp; @@ -363,6 +364,9 @@ static int mdt_restripe(struct mdt_thread_info *info, if (!mdt->mdt_enable_dir_restripe) RETURN(-EPERM); + LASSERT(lum); + lum->lum_hash_type |= cpu_to_le32(LMV_HASH_FLAG_FIXED); + rc = mdt_version_get_check_save(info, parent, 0); if (rc) RETURN(rc); diff --git a/lustre/mdt/mdt_restripe.c b/lustre/mdt/mdt_restripe.c index 794ca4e..e62f070 100644 --- a/lustre/mdt/mdt_restripe.c +++ b/lustre/mdt/mdt_restripe.c @@ -244,6 +244,8 @@ int mdt_restripe_internal(struct mdt_thread_info *info, lmv->lmv_hash_type |= cpu_to_le32(LMV_HASH_FLAG_MERGE | LMV_HASH_FLAG_MIGRATION); + lmv->lmv_hash_type |= lum->lum_hash_type & + cpu_to_le32(LMV_HASH_FLAG_FIXED); lmv->lmv_merge_offset = lum->lum_stripe_count; lmv->lmv_merge_hash = lum->lum_hash_type; lmv->lmv_layout_version = cpu_to_le32(++version); @@ -605,7 +607,8 @@ static int mdt_restripe_migrate(struct mdt_thread_info *info) if ((lmv_is_splitting(lmv) && idx >= le32_to_cpu(lmv->lmv_split_offset)) || (lmv_is_merging(lmv) && - le32_to_cpu(lmv->lmv_hash_type) == LMV_HASH_TYPE_CRUSH && + (le32_to_cpu(lmv->lmv_hash_type) & LMV_HASH_TYPE_MASK) == + LMV_HASH_TYPE_CRUSH && idx < le32_to_cpu(lmv->lmv_merge_offset))) { /* new stripes doesn't need to migrate sub files in dir * split, neither for target stripes in dir merge if hash type diff --git a/lustre/mdt/mdt_xattr.c b/lustre/mdt/mdt_xattr.c index f8d9911..077363d 100644 --- a/lustre/mdt/mdt_xattr.c +++ b/lustre/mdt/mdt_xattr.c @@ -479,7 +479,7 @@ int mdt_dir_layout_update(struct mdt_thread_info *info) } if (lmu->lum_hash_type && - lmu->lum_hash_type != + (lmu->lum_hash_type & cpu_to_le32(LMV_HASH_TYPE_MASK)) != (lmv->lmv_merge_hash & cpu_to_le32(LMV_HASH_TYPE_MASK))) { CERROR("%s: "DFID" merge hash mismatch %u != %u\n", mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1), diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index ef54f00..61edecc 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -1914,11 +1914,13 @@ void lustre_assert_wire_constants(void) BUILD_BUG_ON(LMV_MAGIC_V1 != 0x0CD20CD0); BUILD_BUG_ON(LMV_MAGIC_STRIPE != 0x0CD40CD0); BUILD_BUG_ON(LMV_HASH_TYPE_MASK != 0x0000ffff); + BUILD_BUG_ON(LMV_HASH_FLAG_FIXED != 0x02000000); BUILD_BUG_ON(LMV_HASH_FLAG_MERGE != 0x04000000); BUILD_BUG_ON(LMV_HASH_FLAG_SPLIT != 0x08000000); BUILD_BUG_ON(LMV_HASH_FLAG_LOST_LMV != 0x10000000); BUILD_BUG_ON(LMV_HASH_FLAG_BAD_TYPE != 0x20000000); BUILD_BUG_ON(LMV_HASH_FLAG_MIGRATION != 0x80000000); + BUILD_BUG_ON(LMV_HASH_FLAG_MIGRATION != 0x80000000); BUILD_BUG_ON(LMV_CRUSH_PG_COUNT != 4096); /* Checks for struct obd_statfs */ diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 60416c6..3e9f157 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -19613,15 +19613,15 @@ test_230o() { run_test 230o "dir split" test_230p() { - [ $MDSCOUNT -ge 2 ] || skip "needs >= 2 MDTs" - [ $MDS1_VERSION -ge $(version_code 2.13.52) ] || + (( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs" + (( MDS1_VERSION >= $(version_code 2.13.52) )) || skip "Need MDS version at least 2.13.52" local mdts=$(comma_list $(mdts_nodes)) local timeout=100 local restripe_status local delta - local i + local c [[ $mds1_FSTYPE == zfs ]] && timeout=300 @@ -19635,33 +19635,37 @@ test_230p() { test_mkdir -c $MDSCOUNT -H crush $DIR/$tdir createmany -m $DIR/$tdir/f 100 || - error "create files under remote dir failed $i" + error "create files under remote dir failed" createmany -d $DIR/$tdir/d 100 || - error "create dirs under remote dir failed $i" + error "create dirs under remote dir failed" - for i in $(seq $((MDSCOUNT - 1)) -1 1); do + for c in $(seq $((MDSCOUNT - 1)) -1 1); do local mdt_hash="crush" do_nodes $mdts "$LCTL set_param mdt.*.md_stats=clear >/dev/null" - $LFS setdirstripe -c $i $DIR/$tdir || - error "split -c $i $tdir failed" - [ $i -eq 1 ] && mdt_hash="none" + $LFS setdirstripe -c $c $DIR/$tdir || + error "split -c $c $tdir failed" + if (( MDS1_VERSION >= $(version_code 2.14.51) )); then + mdt_hash="$mdt_hash,fixed" + elif [ $c -eq 1 ]; then + mdt_hash="none" + fi wait_update $HOSTNAME \ "$LFS getdirstripe -H $DIR/$tdir" $mdt_hash $timeout || error "dir merge not finished" delta=$(do_nodes $mdts "lctl get_param -n mdt.*MDT*.md_stats" | awk '/migrate/ {sum += $2} END { print sum }') - echo "$delta migrated when dir merge $((i + 1)) to $i stripes" + echo "$delta migrated when dir merge $((c + 1)) to $c stripes" # delta is around total_files/stripe_count - (( $delta < 200 / i + 4 )) || - error "$delta files migrated >= $((200 / i + 4))" + (( delta < 200 / c + 4 )) || + error "$delta files migrated >= $((200 / c + 4))" done } run_test 230p "dir merge" test_230q() { - [ $MDSCOUNT -ge 2 ] || skip "needs >= 2 MDTs" - [ $MDS1_VERSION -ge $(version_code 2.13.52) ] || + (( MDSCOUNT > 1)) || skip "needs >= 2 MDTs" + (( MDS1_VERSION >= $(version_code 2.13.52) )) || skip "Need MDS version at least 2.13.52" local mdts=$(comma_list $(mdts_nodes)) @@ -19721,6 +19725,15 @@ test_230q() { [ $nr_files -eq $total ] || error "total sub files $nr_files != $total" done + + (( MDS1_VERSION >= $(version_code 2.14.51) )) || return 0 + + echo "fixed layout directory won't auto split" + $LFS migrate -m 0 $DIR/$tdir || error "migrate $tdir failed" + wait_update $HOSTNAME "$LFS getdirstripe -H $DIR/$tdir" "crush,fixed" \ + 10 || error "stripe hash $($LFS getdirstripe -H $DIR/$tdir)" + wait_update $HOSTNAME "$LFS getdirstripe -c $DIR/$tdir" 1 10 || + error "stripe count $($LFS getdirstripe -c $DIR/$tdir)" } run_test 230q "dir auto split" diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index 42db6d6..1371a12 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -3319,6 +3319,11 @@ void lmv_dump_user_lmm(struct lmv_user_md *lum, char *pool_name, llapi_printf(LLAPI_MSG_NORMAL, ",bad_type"); if (flags & LMV_HASH_FLAG_LOST_LMV) llapi_printf(LLAPI_MSG_NORMAL, ",lost_lmv"); + if (flags & LMV_HASH_FLAG_FIXED) + llapi_printf(LLAPI_MSG_NORMAL, ",fixed"); + if (flags & ~LMV_HASH_FLAG_KNOWN) + llapi_printf(LLAPI_MSG_NORMAL, ",unknown_%04x", + flags & ~LMV_HASH_FLAG_KNOWN); if (verbose & VERBOSE_HASH_TYPE && !yaml) separator = " "; diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index 6f77d1d..983eb54 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -883,6 +883,7 @@ check_lmv_mds_md_v1(void) CHECK_CDEFINE(LMV_MAGIC_V1); CHECK_CDEFINE(LMV_MAGIC_STRIPE); CHECK_CDEFINE(LMV_HASH_TYPE_MASK); + CHECK_CDEFINE(LMV_HASH_FLAG_FIXED); CHECK_CDEFINE(LMV_HASH_FLAG_MERGE); CHECK_CDEFINE(LMV_HASH_FLAG_SPLIT); CHECK_CDEFINE(LMV_HASH_FLAG_LOST_LMV); diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index c38b8b43..b67db59 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -1939,6 +1939,7 @@ void lustre_assert_wire_constants(void) BUILD_BUG_ON(LMV_MAGIC_V1 != 0x0CD20CD0); BUILD_BUG_ON(LMV_MAGIC_STRIPE != 0x0CD40CD0); BUILD_BUG_ON(LMV_HASH_TYPE_MASK != 0x0000ffff); + BUILD_BUG_ON(LMV_HASH_FLAG_FIXED != 0x02000000); BUILD_BUG_ON(LMV_HASH_FLAG_MERGE != 0x04000000); BUILD_BUG_ON(LMV_HASH_FLAG_SPLIT != 0x08000000); BUILD_BUG_ON(LMV_HASH_FLAG_LOST_LMV != 0x10000000); -- 1.8.3.1