Whamcloud - gitweb
LU-16717 mdt: resume dir migration with bad_type 43/51243/2
authorLai Siyao <lai.siyao@whamcloud.com>
Fri, 28 Apr 2023 09:22:03 +0000 (05:22 -0400)
committerOleg Drokin <green@whamcloud.com>
Wed, 2 Aug 2023 06:19:19 +0000 (06:19 +0000)
LFSCK may set hash type to "none,bad_type" upon migration failure,
set it back to "fnv_1a_64,migrating,bad_type,fixed" to allow
migration resumption. fnv_1a_64 is set because it's the default hash
type, and now that we don't know the hash type in the original
migration command, just try with it.

LFSCK just add "bad_type" flag on such directory, so that such
migration can always be resumed in the future.

Add sanity 230z.

Lustre-change: https://review.whamcloud.com/50797
Lustre-commit: 151650e468ab423e831c30d635ea380e0434a122

Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Change-Id: I19606aefcb9115e6724843785aea89a1c380e23f
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Hongchao Zhang <hongchao@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51243
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/lfsck/lfsck_striped_dir.c
lustre/mdt/mdt_reint.c
lustre/tests/sanity.sh

index 63a8754..2aa2c7d 100644 (file)
@@ -1567,7 +1567,11 @@ int lfsck_namespace_repair_bad_name_hash(const struct lu_env *env,
                GOTO(log, rc = 1);
 
        *lmv2 = llmv->ll_lmv;
-       lmv2->lmv_hash_type = LMV_HASH_TYPE_UNKNOWN | LMV_HASH_FLAG_BAD_TYPE;
+       /* only set BAD_TYPE here, do not clear hash type or MIGRATION flag,
+        * so that user can resume dir migration if this is caused by dir
+        * migration failure.
+        */
+       lmv2->lmv_hash_type |= LMV_HASH_FLAG_BAD_TYPE;
        rc = lfsck_namespace_set_lmv_master(env, com, parent, lmv2,
                                            lfsck_dto2fid(shard),
                                            llmv->ll_lmv.lmv_master_mdt_index,
index a2ddbbc..2394887 100644 (file)
@@ -2207,6 +2207,17 @@ close:
        return rc ?: rc2;
 }
 
+/* LFSCK used to clear hash type and MIGRATION flag upon migration failure */
+static inline bool lmv_is_failed_migration(const struct lmv_mds_md_v1 *lmv)
+{
+       return le32_to_cpu(lmv->lmv_hash_type) ==
+               (LMV_HASH_TYPE_UNKNOWN | LMV_HASH_FLAG_BAD_TYPE) &&
+              lmv_is_known_hash_type(le32_to_cpu(lmv->lmv_migrate_hash)) &&
+              le32_to_cpu(lmv->lmv_migrate_offset) > 0 &&
+              le32_to_cpu(lmv->lmv_migrate_offset) <
+               le32_to_cpu(lmv->lmv_stripe_count);
+}
+
 /*
  * migrate file in below steps:
  *  1. lock parent and its stripes
@@ -2384,6 +2395,27 @@ lock_parent:
                if ((ma->ma_valid & MA_LMV) &&
                    lmv_is_restriping(&ma->ma_lmv->lmv_md_v1))
                        GOTO(unlock_links, rc = -EBUSY);
+               else if (lmv_is_failed_migration(&ma->ma_lmv->lmv_md_v1)) {
+                       struct lu_buf *buf = &info->mti_buf;
+                       struct lmv_mds_md_v1 *lmv = &ma->ma_lmv->lmv_md_v1;
+                       __u32 version = le32_to_cpu(lmv->lmv_layout_version);
+
+                       /* migration failed before, and LFSCK cleared hash type
+                        * and flags, fake it to resume migration.
+                        */
+                       lmv->lmv_hash_type =
+                               cpu_to_le32(LMV_HASH_TYPE_FNV_1A_64 |
+                                           LMV_HASH_FLAG_MIGRATION |
+                                           LMV_HASH_FLAG_BAD_TYPE |
+                                           LMV_HASH_FLAG_FIXED);
+                       lmv->lmv_layout_version = cpu_to_le32(version + 1);
+                       buf->lb_buf = lmv;
+                       buf->lb_len = sizeof(*lmv);
+                       rc = mo_xattr_set(env, mdt_object_child(sobj), buf,
+                                         XATTR_NAME_LMV, LU_XATTR_REPLACE);
+                       mo_invalidate(env, mdt_object_child(sobj));
+                       GOTO(unlock_links, rc = -EALREADY);
+               }
        }
 
        /* if migration HSM is allowed */
index 6cd539a..93f8c3b 100755 (executable)
@@ -20661,6 +20661,36 @@ test_230y() {
 }
 run_test 230y "unlink dir with bad hash type"
 
+test_230z() {
+       (( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs"
+       (( MDS1_VERSION >= $(version_code 2.15.3) )) ||
+               skip "Need MDS version at least 2.15.3"
+
+       local pid
+
+       test_mkdir -c -1 $DIR/$tdir || error "mkdir $tdir failed"
+       $LFS getdirstripe $DIR/$tdir
+       createmany -d $DIR/$tdir/d 100 || error "createmany failed"
+       $LFS migrate -m 1 -c 2 -H fnv_1a_64 $DIR/$tdir &
+       pid=$!
+       sleep 1
+
+       #OBD_FAIL_MIGRATE_BAD_HASH      0x1802
+       do_facet mds2 lctl set_param fail_loc=0x1802
+
+       wait $pid
+       do_facet mds2 lctl set_param fail_loc=0
+       $LFS getdirstripe $DIR/$tdir
+
+       # resume migration
+       $LFS migrate -m 1 -c 2 -H fnv_1a_64 $DIR/$tdir ||
+               error "resume migration failed"
+       $LFS getdirstripe $DIR/$tdir
+       [ $($LFS getdirstripe -H $DIR/$tdir) == "fnv_1a_64,fixed" ] ||
+               error "migration is not finished"
+}
+run_test 230z "resume dir migration with bad hash type"
+
 test_231a()
 {
        # For simplicity this test assumes that max_pages_per_rpc