From 3ae36f4c0a5235c52f0a8003acf7986086528ce2 Mon Sep 17 00:00:00 2001 From: Lai Siyao Date: Thu, 29 Apr 2021 05:30:00 +0800 Subject: [PATCH] LU-14459 mdt: support fixed directory layout User may not want directories split automatically in some cases: *.directory migrated. * directory restriped. To support this, an LMV flag LMV_HASH_FLAG_FIXED is added, and it will be set on migrated/restriped directories. NB, if directory is migrated or restriped to a one-stripe directory, it won't be transformed into a plain directory, because this flag needs to be kept. Update sanity 230q. Lustre-change: https://review.whamcloud.com/43291 Lustre-commit: 4c2514f4832801374092f3a48c755248af345566 Signed-off-by: Lai Siyao Change-Id: Icd12b2aa34d391e32c3323a8b9c24449ea3e3d0e Reviewed-by: Andreas Dilger Reviewed-by: Hongchao Zhang Reviewed-by: Oleg Drokin Reviewed-on: https://review.whamcloud.com/44459 Tested-by: jenkins Tested-by: Maloo --- lustre/include/lustre_lmv.h | 5 +++ lustre/include/uapi/linux/lustre/lustre_user.h | 5 +++ lustre/lod/lod_object.c | 24 +++++--------- lustre/mdd/mdd_dir.c | 13 ++++---- lustre/mdt/mdt_handler.c | 5 ++- lustre/mdt/mdt_lib.c | 8 +++-- lustre/mdt/mdt_reint.c | 4 +++ lustre/mdt/mdt_restripe.c | 5 ++- lustre/mdt/mdt_xattr.c | 2 +- lustre/ptlrpc/wiretest.c | 2 ++ lustre/tests/sanity.sh | 44 ++++++++++++++++---------- lustre/utils/liblustreapi.c | 5 +++ lustre/utils/wirecheck.c | 1 + lustre/utils/wiretest.c | 1 + 14 files changed, 81 insertions(+), 43 deletions(-) diff --git a/lustre/include/lustre_lmv.h b/lustre/include/lustre_lmv.h index 811da36..2637a6d 100644 --- a/lustre/include/lustre_lmv.h +++ b/lustre/include/lustre_lmv.h @@ -489,4 +489,9 @@ static inline bool lmv_is_layout_changing(const struct lmv_mds_md_v1 *lmv) lmv_hash_is_migrating(cpu_to_le32(lmv->lmv_hash_type)); } +static inline bool lmv_is_fixed(const struct lmv_mds_md_v1 *lmv) +{ + return cpu_to_le32(lmv->lmv_hash_type) & LMV_HASH_FLAG_FIXED; +} + #endif diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index 6a9eeb2..c9e6058 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -1029,6 +1029,9 @@ static inline bool lmv_is_known_hash_type(__u32 type) (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_CRUSH; } +/* fixed layout, such directories won't split automatically */ +/* NB, update LMV_HASH_FLAG_KNOWN when adding new flag */ +#define LMV_HASH_FLAG_FIXED 0x02000000 #define LMV_HASH_FLAG_MERGE 0x04000000 #define LMV_HASH_FLAG_SPLIT 0x08000000 @@ -1043,6 +1046,8 @@ static inline bool lmv_is_known_hash_type(__u32 type) #define LMV_HASH_FLAG_LAYOUT_CHANGE \ (LMV_HASH_FLAG_MIGRATION | LMV_HASH_FLAG_SPLIT | LMV_HASH_FLAG_MERGE) +#define LMV_HASH_FLAG_KNOWN 0xfe000000 + /* both SPLIT and MIGRATION are set for directory split */ static inline bool lmv_hash_is_splitting(__u32 hash) { diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index d0e0aca..16ea21e 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -2411,9 +2411,11 @@ static int lod_dir_layout_set(const struct lu_env *env, RETURN(-EINVAL); /* adjust hash for dir merge, which may not be set in user command */ - if (lmv_is_merging(lmv) && !lmv->lmv_migrate_hash) - lmv->lmv_merge_hash = - lod->lod_mdt_descs.ltd_lmv_desc.ld_pattern; + if (lmv_is_merging(lmv) && + !(lmv->lmv_migrate_hash & LMV_HASH_TYPE_MASK)) + lmv->lmv_merge_hash |= + lod->lod_mdt_descs.ltd_lmv_desc.ld_pattern & + LMV_HASH_TYPE_MASK; LMV_DEBUG(D_INFO, lmv, "set"); @@ -8231,9 +8233,9 @@ static int lod_dir_declare_layout_shrink(const struct lu_env *env, struct lod_object *lo = lod_dt_obj(dt); struct dt_object *next = dt_object_child(dt); struct lmv_user_md *lmu = mlc->mlc_buf.lb_buf; - __u32 final_stripe_count; char *stripe_name = info->lti_key; struct lu_buf *lmv_buf = &info->lti_buf; + __u32 final_stripe_count; struct dt_object *dto; int i; int rc; @@ -8259,9 +8261,6 @@ static int lod_dir_declare_layout_shrink(const struct lu_env *env, continue; if (i < final_stripe_count) { - if (final_stripe_count == 1) - continue; - rc = lod_sub_declare_xattr_set(env, dto, lmv_buf, XATTR_NAME_LMV, LU_XATTR_REPLACE, th); @@ -8485,7 +8484,8 @@ static int lod_dir_layout_shrink(const struct lu_env *env, lmv->lmv_magic = cpu_to_le32(LMV_MAGIC_STRIPE); lmv->lmv_stripe_count = cpu_to_le32(final_stripe_count); lmv->lmv_hash_type = cpu_to_le32(lo->ldo_dir_hash_type) & - cpu_to_le32(LMV_HASH_TYPE_MASK); + cpu_to_le32(LMV_HASH_TYPE_MASK | + LMV_HASH_FLAG_FIXED); lmv->lmv_layout_version = cpu_to_le32(lo->ldo_dir_layout_version + 1); lmv->lmv_migrate_offset = 0; @@ -8497,14 +8497,6 @@ static int lod_dir_layout_shrink(const struct lu_env *env, continue; if (i < final_stripe_count) { - /* if only one stripe left, no need to update - * LMV because this stripe will replace master - * object and act as a plain directory. - */ - if (final_stripe_count == 1) - continue; - - rc = lod_fld_lookup(env, lod, lu_object_fid(&dto->do_lu), &mdtidx, &type); diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index 1dcff8a..d1c15d0 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -4091,7 +4091,10 @@ static int mdd_migrate_cmd_check(struct mdd_device *mdd, const struct lu_name *lname) { __u32 lum_stripe_count = lum->lum_stripe_count; - __u32 lmv_hash_type = lmv->lmv_hash_type; + __u32 lum_hash_type = lum->lum_hash_type & + cpu_to_le32(LMV_HASH_TYPE_MASK); + __u32 lmv_hash_type = lmv->lmv_hash_type & + cpu_to_le32(LMV_HASH_TYPE_MASK); char *mdt_hash_name[] = { "none", LMV_HASH_NAME_ALL_CHARS, LMV_HASH_NAME_FNV_1A_64, @@ -4105,12 +4108,10 @@ static int mdd_migrate_cmd_check(struct mdd_device *mdd, if (!lum_stripe_count) lum_stripe_count = cpu_to_le32(1); - lmv_hash_type &= ~cpu_to_le32(LMV_HASH_FLAG_MIGRATION); - /* TODO: check specific MDTs */ if (lum_stripe_count != lmv->lmv_migrate_offset || lum->lum_stripe_offset != lmv->lmv_master_mdt_index || - (lum->lum_hash_type && lum->lum_hash_type != lmv_hash_type)) { + (lum_hash_type && lum_hash_type != lmv_hash_type)) { CERROR("%s: '"DNAME"' migration was interrupted, run 'lfs migrate -m %d -c %d -H %s "DNAME"' to finish migration.\n", mdd2obd_dev(mdd)->obd_name, PNAME(lname), le32_to_cpu(lmv->lmv_master_mdt_index), @@ -4643,7 +4644,7 @@ int mdd_dir_layout_shrink(const struct lu_env *env, if (rc) GOTO(stop_trans, rc); - if (le32_to_cpu(lmu->lum_stripe_count) == 1) { + if (le32_to_cpu(lmu->lum_stripe_count) == 1 && !lmv_is_fixed(lmv)) { rc = mdd_declare_1sd_collapse(env, pobj, obj, stripe, attr, mlc, &lname, handle); if (rc) @@ -4666,7 +4667,7 @@ int mdd_dir_layout_shrink(const struct lu_env *env, if (rc) GOTO(stop_trans, rc); - if (le32_to_cpu(lmu->lum_stripe_count) == 1) { + if (le32_to_cpu(lmu->lum_stripe_count) == 1 && !lmv_is_fixed(lmv)) { rc = mdd_1sd_collapse(env, pobj, obj, stripe, attr, mlc, &lname, handle); if (rc) diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index cb1c385..757d7cf 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -1460,6 +1460,7 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, if (mdt_body_has_lov(la, reqbody)) { u32 stripe_count = 1; + bool fixed_layout = false; if (ma->ma_valid & MA_LOV) { LASSERT(ma->ma_lmm_size); @@ -1484,6 +1485,7 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, repbody->mbo_valid |= (OBD_MD_FLDIREA|OBD_MD_MEA); stripe_count = le32_to_cpu(lmv->lmv_stripe_count); + fixed_layout = lmv_is_fixed(lmv); if (magic == LMV_MAGIC_STRIPE && lmv_is_restriping(lmv)) mdt_restripe_migrate_add(info, o); else if (magic == LMV_MAGIC_V1 && @@ -1515,7 +1517,8 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, !fid_is_root(mdt_object_fid(o)) && mdt->mdt_enable_dir_auto_split && !o->mot_restriping && - stripe_count < atomic_read(&mdt->mdt_mds_mds_conns) + 1) + stripe_count < atomic_read(&mdt->mdt_mds_mds_conns) + 1 && + !fixed_layout) mdt_auto_split_add(info, o); } else if (S_ISLNK(la->la_mode) && reqbody->mbo_valid & OBD_MD_LINKNAME) { diff --git a/lustre/mdt/mdt_lib.c b/lustre/mdt/mdt_lib.c index da823336..2374d4f 100644 --- a/lustre/mdt/mdt_lib.c +++ b/lustre/mdt/mdt_lib.c @@ -1549,8 +1549,12 @@ static int mdt_migrate_unpack(struct mdt_thread_info *info) RCL_CLIENT); if (rr->rr_eadatalen > 0) { - rr->rr_eadata = req_capsule_client_get(pill, - &RMF_EADATA); + struct lmv_user_md_v1 *lmu; + + lmu = req_capsule_client_get(pill, &RMF_EADATA); + lmu->lum_hash_type |= + cpu_to_le32(LMV_HASH_FLAG_FIXED); + rr->rr_eadata = lmu; spec->u.sp_ea.eadatalen = rr->rr_eadatalen; spec->u.sp_ea.eadata = rr->rr_eadata; spec->sp_cr_flags |= MDS_OPEN_HAS_EA; diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index 03d0ff0..ed5ef78 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -353,6 +353,7 @@ static int mdt_restripe(struct mdt_thread_info *info, struct mdt_device *mdt = info->mti_mdt; struct lu_fid *fid = &info->mti_tmp_fid2; struct ldlm_enqueue_info *einfo = &info->mti_einfo[0]; + struct lmv_user_md *lum = spec->u.sp_ea.eadata; struct lmv_mds_md_v1 *lmv; struct mdt_object *child; struct mdt_lock_handle *lhp; @@ -364,6 +365,9 @@ static int mdt_restripe(struct mdt_thread_info *info, if (!mdt->mdt_enable_dir_restripe) RETURN(-EPERM); + LASSERT(lum); + lum->lum_hash_type |= cpu_to_le32(LMV_HASH_FLAG_FIXED); + rc = mdt_version_get_check_save(info, parent, 0); if (rc) RETURN(rc); diff --git a/lustre/mdt/mdt_restripe.c b/lustre/mdt/mdt_restripe.c index 70b0e91..209a5b9 100644 --- a/lustre/mdt/mdt_restripe.c +++ b/lustre/mdt/mdt_restripe.c @@ -244,6 +244,8 @@ int mdt_restripe_internal(struct mdt_thread_info *info, lmv->lmv_hash_type |= cpu_to_le32(LMV_HASH_FLAG_MERGE | LMV_HASH_FLAG_MIGRATION); + lmv->lmv_hash_type |= lum->lum_hash_type & + cpu_to_le32(LMV_HASH_FLAG_FIXED); lmv->lmv_merge_offset = lum->lum_stripe_count; lmv->lmv_merge_hash = lum->lum_hash_type; lmv->lmv_layout_version = cpu_to_le32(++version); @@ -605,7 +607,8 @@ static int mdt_restripe_migrate(struct mdt_thread_info *info) if ((lmv_is_splitting(lmv) && idx >= le32_to_cpu(lmv->lmv_split_offset)) || (lmv_is_merging(lmv) && - le32_to_cpu(lmv->lmv_hash_type) == LMV_HASH_TYPE_CRUSH && + (le32_to_cpu(lmv->lmv_hash_type) & LMV_HASH_TYPE_MASK) == + LMV_HASH_TYPE_CRUSH && idx < le32_to_cpu(lmv->lmv_merge_offset))) { /* new stripes doesn't need to migrate sub files in dir * split, neither for target stripes in dir merge if hash type diff --git a/lustre/mdt/mdt_xattr.c b/lustre/mdt/mdt_xattr.c index f828cae..cfd04a2 100644 --- a/lustre/mdt/mdt_xattr.c +++ b/lustre/mdt/mdt_xattr.c @@ -480,7 +480,7 @@ int mdt_dir_layout_update(struct mdt_thread_info *info) } if (lmu->lum_hash_type && - lmu->lum_hash_type != + (lmu->lum_hash_type & cpu_to_le32(LMV_HASH_TYPE_MASK)) != (lmv->lmv_merge_hash & cpu_to_le32(LMV_HASH_TYPE_MASK))) { CERROR("%s: "DFID" merge hash mismatch %u != %u\n", mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1), diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index 3b042f9..5927929 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -1917,11 +1917,13 @@ void lustre_assert_wire_constants(void) BUILD_BUG_ON(LMV_MAGIC_V1 != 0x0CD20CD0); BUILD_BUG_ON(LMV_MAGIC_STRIPE != 0x0CD40CD0); BUILD_BUG_ON(LMV_HASH_TYPE_MASK != 0x0000ffff); + BUILD_BUG_ON(LMV_HASH_FLAG_FIXED != 0x02000000); BUILD_BUG_ON(LMV_HASH_FLAG_MERGE != 0x04000000); BUILD_BUG_ON(LMV_HASH_FLAG_SPLIT != 0x08000000); BUILD_BUG_ON(LMV_HASH_FLAG_LOST_LMV != 0x10000000); BUILD_BUG_ON(LMV_HASH_FLAG_BAD_TYPE != 0x20000000); BUILD_BUG_ON(LMV_HASH_FLAG_MIGRATION != 0x80000000); + BUILD_BUG_ON(LMV_HASH_FLAG_MIGRATION != 0x80000000); BUILD_BUG_ON(LMV_CRUSH_PG_COUNT != 4096); /* Checks for struct obd_statfs */ diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index e618c8c..ff2cbd1 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -18872,8 +18872,8 @@ test_230o() { run_test 230o "dir split" test_230p() { - [ $MDSCOUNT -ge 2 ] || skip "needs >= 2 MDTs" - [ $MDS1_VERSION -ge $(version_code 2.13.52) ] || + (( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs" + (( MDS1_VERSION >= $(version_code 2.13.52) )) || skip "Need MDS version at least 2.13.52" local mdts=$(comma_list $(mdts_nodes)) @@ -18881,8 +18881,7 @@ test_230p() { local restripe_status local delta - local i - local j + local c [[ $(facet_fstype mds1) == zfs ]] && timeout=300 @@ -18896,33 +18895,37 @@ test_230p() { test_mkdir -c $MDSCOUNT -H crush $DIR/$tdir createmany -m $DIR/$tdir/f 100 || - error "create files under remote dir failed $i" + error "create files under remote dir failed" createmany -d $DIR/$tdir/d 100 || - error "create dirs under remote dir failed $i" + error "create dirs under remote dir failed" - for i in $(seq $((MDSCOUNT - 1)) -1 1); do + for c in $(seq $((MDSCOUNT - 1)) -1 1); do local mdt_hash="crush" - do_nodes $mdts "$LCTL set_param mdt.*.md_stats=clear > /dev/null" - $LFS setdirstripe -c $i $DIR/$tdir || - error "split -c $i $tdir failed" - [ $i -eq 1 ] && mdt_hash="none" + do_nodes $mdts "$LCTL set_param mdt.*.md_stats=clear >/dev/null" + $LFS setdirstripe -c $c $DIR/$tdir || + error "split -c $c $tdir failed" + if (( MDS1_VERSION >= $(version_code 2.14.0.6) )); then + mdt_hash="$mdt_hash,fixed" + elif [ $c -eq 1 ]; then + mdt_hash="none" + fi wait_update $HOSTNAME \ "$LFS getdirstripe -H $DIR/$tdir" $mdt_hash $timeout || error "dir merge not finished" delta=$(do_nodes $mdts "lctl get_param -n mdt.*MDT*.md_stats" | awk '/migrate/ {sum += $2} END { print sum }') - echo "$delta files migrated when dir merge from $((i + 1)) to $i stripes" + echo "$delta migrated when dir merge $((c + 1)) to $c stripes" # delta is around total_files/stripe_count - [ $delta -lt $((200 / i)) ] || - error "$delta files migrated" + (( delta < 200 / c + 4 )) || + error "$delta files migrated >= $((200 / c + 4))" done } run_test 230p "dir merge" test_230q() { - [ $MDSCOUNT -ge 2 ] || skip "needs >= 2 MDTs" - [ $MDS1_VERSION -ge $(version_code 2.13.52) ] || + (( MDSCOUNT > 1)) || skip "needs >= 2 MDTs" + (( MDS1_VERSION >= $(version_code 2.13.52) )) || skip "Need MDS version at least 2.13.52" local mdts=$(comma_list $(mdts_nodes)) @@ -18980,6 +18983,15 @@ test_230q() { [ $nr_files -eq $total ] || error "total sub files $nr_files != $total" done + + (( MDS1_VERSION >= $(version_code 2.14.0.6) )) || return 0 + + echo "fixed layout directory won't auto split" + $LFS migrate -m 0 $DIR/$tdir || error "migrate $tdir failed" + wait_update $HOSTNAME "$LFS getdirstripe -H $DIR/$tdir" "crush,fixed" \ + 10 || error "stripe hash $($LFS getdirstripe -H $DIR/$tdir)" + wait_update $HOSTNAME "$LFS getdirstripe -c $DIR/$tdir" 1 10 || + error "stripe count $($LFS getdirstripe -c $DIR/$tdir)" } run_test 230q "dir auto split" diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index dd6bae1..ddcf22b 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -3230,6 +3230,11 @@ void lmv_dump_user_lmm(struct lmv_user_md *lum, char *pool_name, llapi_printf(LLAPI_MSG_NORMAL, ",bad_type"); if (flags & LMV_HASH_FLAG_LOST_LMV) llapi_printf(LLAPI_MSG_NORMAL, ",lost_lmv"); + if (flags & LMV_HASH_FLAG_FIXED) + llapi_printf(LLAPI_MSG_NORMAL, ",fixed"); + if (flags & ~LMV_HASH_FLAG_KNOWN) + llapi_printf(LLAPI_MSG_NORMAL, ",unknown_%04x", + flags & ~LMV_HASH_FLAG_KNOWN); if (verbose & VERBOSE_HASH_TYPE && !yaml) separator = " "; diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index 85016ff..519f648 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -931,6 +931,7 @@ check_lmv_mds_md_v1(void) CHECK_CDEFINE(LMV_MAGIC_V1); CHECK_CDEFINE(LMV_MAGIC_STRIPE); CHECK_CDEFINE(LMV_HASH_TYPE_MASK); + CHECK_CDEFINE(LMV_HASH_FLAG_FIXED); CHECK_CDEFINE(LMV_HASH_FLAG_MERGE); CHECK_CDEFINE(LMV_HASH_FLAG_SPLIT); CHECK_CDEFINE(LMV_HASH_FLAG_LOST_LMV); diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index 56e8a2c..e104642 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -1953,6 +1953,7 @@ void lustre_assert_wire_constants(void) BUILD_BUG_ON(LMV_MAGIC_V1 != 0x0CD20CD0); BUILD_BUG_ON(LMV_MAGIC_STRIPE != 0x0CD40CD0); BUILD_BUG_ON(LMV_HASH_TYPE_MASK != 0x0000ffff); + BUILD_BUG_ON(LMV_HASH_FLAG_FIXED != 0x02000000); BUILD_BUG_ON(LMV_HASH_FLAG_MERGE != 0x04000000); BUILD_BUG_ON(LMV_HASH_FLAG_SPLIT != 0x08000000); BUILD_BUG_ON(LMV_HASH_FLAG_LOST_LMV != 0x10000000); -- 1.8.3.1