Whamcloud - gitweb
LU-16717 mdt: resume dir migration with bad_type 97/50797/4
authorLai Siyao <lai.siyao@whamcloud.com>
Fri, 28 Apr 2023 09:22:03 +0000 (05:22 -0400)
committerOleg Drokin <green@whamcloud.com>
Wed, 31 May 2023 19:01:53 +0000 (19:01 +0000)
LFSCK may set hash type to "none,bad_type" upon migration failure,
set it back to "fnv_1a_64,migrating,bad_type,fixed" to allow
migration resumption. fnv_1a_64 is set because it's the default hash
type, and now that we don't know the hash type in the original
migration command, just try with it.

LFSCK just add "bad_type" flag on such directory, so that such
migration can always be resumed in the future.

Add sanity 230z.

Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Change-Id: I19606aefcb9115e6724843785aea89a1c380e23f
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50797
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Hongchao Zhang <hongchao@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/lfsck/lfsck_striped_dir.c
lustre/mdt/mdt_reint.c
lustre/tests/sanity.sh

index 493b42b..b0c77dc 100644 (file)
@@ -1567,7 +1567,11 @@ int lfsck_namespace_repair_bad_name_hash(const struct lu_env *env,
                GOTO(log, rc = 1);
 
        *lmv2 = llmv->ll_lmv;
-       lmv2->lmv_hash_type = LMV_HASH_TYPE_UNKNOWN | LMV_HASH_FLAG_BAD_TYPE;
+       /* only set BAD_TYPE here, do not clear hash type or MIGRATION flag,
+        * so that user can resume dir migration if this is caused by dir
+        * migration failure.
+        */
+       lmv2->lmv_hash_type |= LMV_HASH_FLAG_BAD_TYPE;
        rc = lfsck_namespace_set_lmv_master(env, com, parent, lmv2,
                                            lfsck_dto2fid(shard),
                                            llmv->ll_lmv.lmv_master_mdt_index,
index 67bf2f4..e0123de 100644 (file)
@@ -2114,6 +2114,17 @@ close:
        return rc ?: rc2;
 }
 
+/* LFSCK used to clear hash type and MIGRATION flag upon migration failure */
+static inline bool lmv_is_failed_migration(const struct lmv_mds_md_v1 *lmv)
+{
+       return le32_to_cpu(lmv->lmv_hash_type) ==
+               (LMV_HASH_TYPE_UNKNOWN | LMV_HASH_FLAG_BAD_TYPE) &&
+              lmv_is_known_hash_type(le32_to_cpu(lmv->lmv_migrate_hash)) &&
+              le32_to_cpu(lmv->lmv_migrate_offset) > 0 &&
+              le32_to_cpu(lmv->lmv_migrate_offset) <
+               le32_to_cpu(lmv->lmv_stripe_count);
+}
+
 /*
  * migrate file in below steps:
  *  1. lock source and target stripes
@@ -2335,6 +2346,27 @@ lock_parent:
                else if (lmv_is_restriping(&ma->ma_lmv->lmv_md_v1))
                        /* race with restripe/auto-split */
                        GOTO(unlock_source, rc = -EBUSY);
+               else if (lmv_is_failed_migration(&ma->ma_lmv->lmv_md_v1)) {
+                       struct lu_buf *buf = &info->mti_buf;
+                       struct lmv_mds_md_v1 *lmv = &ma->ma_lmv->lmv_md_v1;
+                       __u32 version = le32_to_cpu(lmv->lmv_layout_version);
+
+                       /* migration failed before, and LFSCK cleared hash type
+                        * and flags, fake it to resume migration.
+                        */
+                       lmv->lmv_hash_type =
+                               cpu_to_le32(LMV_HASH_TYPE_FNV_1A_64 |
+                                           LMV_HASH_FLAG_MIGRATION |
+                                           LMV_HASH_FLAG_BAD_TYPE |
+                                           LMV_HASH_FLAG_FIXED);
+                       lmv->lmv_layout_version = cpu_to_le32(version + 1);
+                       buf->lb_buf = lmv;
+                       buf->lb_len = sizeof(*lmv);
+                       rc = mo_xattr_set(env, mdt_object_child(sobj), buf,
+                                         XATTR_NAME_LMV, LU_XATTR_REPLACE);
+                       mo_invalidate(env, mdt_object_child(sobj));
+                       GOTO(unlock_source, rc = -EALREADY);
+               }
        }
 
        /* if migration HSM is allowed */
index 0e48b7e..a54ff02 100755 (executable)
@@ -21896,6 +21896,36 @@ test_230y() {
 }
 run_test 230y "unlink dir with bad hash type"
 
+test_230z() {
+       (( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs"
+       (( MDS1_VERSION >= $(version_code 2.15.55.45) )) ||
+               skip "Need MDS version at least 2.15.55.45"
+
+       local pid
+
+       test_mkdir -c -1 $DIR/$tdir || error "mkdir $tdir failed"
+       $LFS getdirstripe $DIR/$tdir
+       createmany -d $DIR/$tdir/d 100 || error "createmany failed"
+       $LFS migrate -m 1 -c 2 -H fnv_1a_64 $DIR/$tdir &
+       pid=$!
+       sleep 1
+
+       #OBD_FAIL_MIGRATE_BAD_HASH      0x1802
+       do_facet mds2 lctl set_param fail_loc=0x1802
+
+       wait $pid
+       do_facet mds2 lctl set_param fail_loc=0
+       $LFS getdirstripe $DIR/$tdir
+
+       # resume migration
+       $LFS migrate -m 1 -c 2 -H fnv_1a_64 $DIR/$tdir ||
+               error "resume migration failed"
+       $LFS getdirstripe $DIR/$tdir
+       [ $($LFS getdirstripe -H $DIR/$tdir) == "fnv_1a_64,fixed" ] ||
+               error "migration is not finished"
+}
+run_test 230z "resume dir migration with bad hash type"
+
 test_231a()
 {
        # For simplicity this test assumes that max_pages_per_rpc