From 15888818fd038d83ce9a9fe3e3c7ef3e499c9346 Mon Sep 17 00:00:00 2001 From: Lai Siyao Date: Fri, 28 Apr 2023 05:22:03 -0400 Subject: [PATCH] LU-16717 mdt: resume dir migration with bad_type LFSCK may set hash type to "none,bad_type" upon migration failure, set it back to "fnv_1a_64,migrating,bad_type,fixed" to allow migration resumption. fnv_1a_64 is set because it's the default hash type, and now that we don't know the hash type in the original migration command, just try with it. LFSCK just add "bad_type" flag on such directory, so that such migration can always be resumed in the future. Add sanity 230z. Lustre-change: https://review.whamcloud.com/50797 Lustre-commit: 151650e468ab423e831c30d635ea380e0434a122 Signed-off-by: Lai Siyao Change-Id: I19606aefcb9115e6724843785aea89a1c380e23f Reviewed-by: Andreas Dilger Reviewed-by: Hongchao Zhang Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/51230 Tested-by: jenkins Tested-by: Andreas Dilger --- lustre/lfsck/lfsck_striped_dir.c | 6 +++++- lustre/mdt/mdt_reint.c | 32 ++++++++++++++++++++++++++++++++ lustre/tests/sanity.sh | 30 ++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 1 deletion(-) diff --git a/lustre/lfsck/lfsck_striped_dir.c b/lustre/lfsck/lfsck_striped_dir.c index 90fb15e..df01fdc 100644 --- a/lustre/lfsck/lfsck_striped_dir.c +++ b/lustre/lfsck/lfsck_striped_dir.c @@ -1567,7 +1567,11 @@ int lfsck_namespace_repair_bad_name_hash(const struct lu_env *env, GOTO(log, rc = 1); *lmv2 = llmv->ll_lmv; - lmv2->lmv_hash_type = LMV_HASH_TYPE_UNKNOWN | LMV_HASH_FLAG_BAD_TYPE; + /* only set BAD_TYPE here, do not clear hash type or MIGRATION flag, + * so that user can resume dir migration if this is caused by dir + * migration failure. + */ + lmv2->lmv_hash_type |= LMV_HASH_FLAG_BAD_TYPE; rc = lfsck_namespace_set_lmv_master(env, com, parent, lmv2, lfsck_dto2fid(shard), llmv->ll_lmv.lmv_master_mdt_index, diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index c8af826..86d5333 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -2230,6 +2230,17 @@ close: return rc ?: rc2; } +/* LFSCK used to clear hash type and MIGRATION flag upon migration failure */ +static inline bool lmv_is_failed_migration(const struct lmv_mds_md_v1 *lmv) +{ + return le32_to_cpu(lmv->lmv_hash_type) == + (LMV_HASH_TYPE_UNKNOWN | LMV_HASH_FLAG_BAD_TYPE) && + lmv_is_known_hash_type(le32_to_cpu(lmv->lmv_migrate_hash)) && + le32_to_cpu(lmv->lmv_migrate_offset) > 0 && + le32_to_cpu(lmv->lmv_migrate_offset) < + le32_to_cpu(lmv->lmv_stripe_count); +} + /* * migrate file in below steps: * 1. lock parent and its stripes @@ -2411,6 +2422,27 @@ lock_parent: if ((ma->ma_valid & MA_LMV) && lmv_is_restriping(&ma->ma_lmv->lmv_md_v1)) GOTO(unlock_links, rc = -EBUSY); + else if (lmv_is_failed_migration(&ma->ma_lmv->lmv_md_v1)) { + struct lu_buf *buf = &info->mti_buf; + struct lmv_mds_md_v1 *lmv = &ma->ma_lmv->lmv_md_v1; + __u32 version = le32_to_cpu(lmv->lmv_layout_version); + + /* migration failed before, and LFSCK cleared hash type + * and flags, fake it to resume migration. + */ + lmv->lmv_hash_type = + cpu_to_le32(LMV_HASH_TYPE_FNV_1A_64 | + LMV_HASH_FLAG_MIGRATION | + LMV_HASH_FLAG_BAD_TYPE | + LMV_HASH_FLAG_FIXED); + lmv->lmv_layout_version = cpu_to_le32(version + 1); + buf->lb_buf = lmv; + buf->lb_len = sizeof(*lmv); + rc = mo_xattr_set(env, mdt_object_child(sobj), buf, + XATTR_NAME_LMV, LU_XATTR_REPLACE); + mo_invalidate(env, mdt_object_child(sobj)); + GOTO(unlock_links, rc = -EALREADY); + } } /* if migration HSM is allowed */ diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index b9b2aa1..32d9924 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -20921,6 +20921,36 @@ test_230y() { } run_test 230y "unlink dir with bad hash type" +test_230z() { + (( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs" + (( MDS1_VERSION >= $(version_code 2.14.0.88) )) || + skip "Need MDS version at least 2.14.0.88" + + local pid + + test_mkdir -c -1 $DIR/$tdir || error "mkdir $tdir failed" + $LFS getdirstripe $DIR/$tdir + createmany -d $DIR/$tdir/d 100 || error "createmany failed" + $LFS migrate -m 1 -c 2 -H fnv_1a_64 $DIR/$tdir & + pid=$! + sleep 1 + + #OBD_FAIL_MIGRATE_BAD_HASH 0x1802 + do_facet mds2 lctl set_param fail_loc=0x1802 + + wait $pid + do_facet mds2 lctl set_param fail_loc=0 + $LFS getdirstripe $DIR/$tdir + + # resume migration + $LFS migrate -m 1 -c 2 -H fnv_1a_64 $DIR/$tdir || + error "resume migration failed" + $LFS getdirstripe $DIR/$tdir + [ $($LFS getdirstripe -H $DIR/$tdir) == "fnv_1a_64,fixed" ] || + error "migration is not finished" +} +run_test 230z "resume dir migration with bad hash type" + test_231a() { # For simplicity this test assumes that max_pages_per_rpc -- 1.8.3.1