Whamcloud - gitweb
LU-18869 dne: mdt migrate to check layout 85/58585/8
authorAlexander Zarochentsev <alexander.zarochentsev@hpe.com>
Sat, 29 Mar 2025 09:53:41 +0000 (09:53 +0000)
committerOleg Drokin <green@whamcloud.com>
Wed, 16 Apr 2025 20:41:41 +0000 (20:41 +0000)
MDT migrate doesn't check that the requested dir
layout already applied or the old layout conforms
the new one.

Adding a check for that allows to avoid unnecessary
fs opeations especially for repeating migration attempts
after a migration failure.

The following condition and the premature exit from
the migrate procedure:

if (spobj == tpobj)
GOTO(out, rc = -EALREADY);

was removed from mdd_migrate_object() due to it
didn't allow repeat of the same migrate command to
try to migrate objects which had been skipped in the
previous migrate attempt as they were open or had
LinkEA overflow.

lod_striping_from_default() should use named
constant of LMV_OFFSET_DEFAULT instead of -1.

HPE-bug-id: LUS-12657
Signed-off-by: Alexander Zarochentsev <alexander.zarochentsev@hpe.com>
Change-Id: Iaacffcc1ecf34c5e01cba57a44c5e3ade97d936a
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/58585
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Lai Siyao <lai.siyao@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/include/dt_object.h
lustre/include/md_object.h
lustre/lod/lod_object.c
lustre/mdd/mdd_dir.c
lustre/mdd/mdd_internal.h
lustre/mdd/mdd_object.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_reint.c
lustre/tests/sanity.sh

index a5228ce..d1c7ea0 100644 (file)
@@ -1071,7 +1071,9 @@ struct dt_object_operations {
                                struct thandle *th);
 
        /**
-        * Check whether the file is in PCC-RO state.
+        * Perform additional layout checks before
+        * layout changing op. Currently used for PCC-RO and
+        * dir migration.
         *
         * \param[in] env       execution environment
         * \param[in] dt        DT object
@@ -1079,12 +1081,11 @@ struct dt_object_operations {
         *                      the DT object's layout
         *
         * \retval 0            success
-        * \retval -ne          -EALREADY if the file is already PCC-RO cached;
+        * \retval -ne          -EALREADY if the object conforms the layout
         *                      Otherwise, return error code
         */
-       int (*do_layout_pccro_check)(const struct lu_env *env,
-                                    struct dt_object *dt,
-                                    struct md_layout_change *mlc);
+       int (*do_layout_check)(const struct lu_env *env, struct dt_object *dt,
+                              struct md_layout_change *mlc);
 };
 
 enum dt_bufs_type {
@@ -3071,14 +3072,14 @@ static inline int dt_layout_change(const struct lu_env *env,
        return o->do_ops->do_layout_change(env, o, mlc, th);
 }
 
-static inline int dt_layout_pccro_check(const struct lu_env *env,
+static inline int dt_layout_check(const struct lu_env *env,
                                        struct dt_object *o,
                                        struct md_layout_change *mlc)
 {
        LASSERT(o);
        LASSERT(o->do_ops);
-       LASSERT(o->do_ops->do_layout_pccro_check);
-       return o->do_ops->do_layout_pccro_check(env, o, mlc);
+       LASSERT(o->do_ops->do_layout_check);
+       return o->do_ops->do_layout_check(env, o, mlc);
 }
 
 int dt_global_init(void);
index bc171fd..cc7c81a 100644 (file)
@@ -279,11 +279,11 @@ struct md_object_operations {
                                 struct md_object *obj,
                                 struct md_layout_change *layout);
        /**
-        * Check whether the file is in PCC-RO state.
+        * Additonal layout checks
         */
-       int (*moo_layout_pccro_check)(const struct lu_env *env,
-                                    struct md_object *obj,
-                                    struct md_layout_change *layout);
+       int (*moo_layout_check)(const struct lu_env *env,
+                               struct md_object *obj,
+                               struct md_layout_change *layout);
 };
 
 /**
@@ -492,12 +492,12 @@ static inline int mo_layout_change(const struct lu_env *env,
        return m->mo_ops->moo_layout_change(env, m, layout);
 }
 
-static inline int mo_layout_pccro_check(const struct lu_env *env,
+static inline int mo_layout_check(const struct lu_env *env,
                                        struct md_object *m,
                                        struct md_layout_change *layout)
 {
-       LASSERT(m->mo_ops->moo_layout_pccro_check);
-       return m->mo_ops->moo_layout_pccro_check(env, m, layout);
+       LASSERT(m->mo_ops->moo_layout_check);
+       return m->mo_ops->moo_layout_check(env, m, layout);
 }
 
 static inline int mo_swap_layouts(const struct lu_env *env,
index 10cbafd..fbf8442 100644 (file)
@@ -5656,7 +5656,7 @@ static void lod_striping_from_default(struct lod_object *lo,
                if (lo->ldo_dir_stripe_count == 0)
                        lo->ldo_dir_stripe_count =
                                lds->lds_dir_def_stripe_count;
-               if (lo->ldo_dir_stripe_offset == -1)
+               if (lo->ldo_dir_stripe_offset == LMV_OFFSET_DEFAULT)
                        lo->ldo_dir_stripe_offset =
                                lds->lds_dir_def_stripe_offset;
                if (lo->ldo_dir_hash_type == LMV_HASH_TYPE_UNKNOWN)
@@ -7928,6 +7928,72 @@ static int lod_layout_pccro_check(const struct lu_env *env,
        return lo->ldo_flr_state & LCM_FL_PCC_RDONLY ? -EALREADY : 0;
 }
 
+/* Check if the dir layout conforms the requested one */
+static int lod_dir_layout_check(const struct lu_env *env,
+                               struct dt_object *dt,
+                               struct md_layout_change *mlc)
+{
+       struct lmv_user_md_v1 *lum = mlc->mlc_buf.lb_buf;
+       size_t lum_len = mlc->mlc_buf.lb_len;
+       struct lod_object *lo = lod_dt_obj(dt);
+       struct lod_device *ld = lu2lod_dev(dt->do_lu.lo_dev);
+       int lum_stripe_count, lum_num_objs;
+       int rc;
+       int i;
+       ENTRY;
+
+       rc = lod_striping_load(env, lo);
+       if (rc)
+               RETURN(rc);
+
+       lum_stripe_count = le32_to_cpu(lum->lum_stripe_count);
+       lum_num_objs = lmv_foreign_to_md_stripes(lum_len);
+
+       if (lmv_hash_is_migrating(lo->ldo_dir_hash_type))
+               lum_stripe_count = lo->ldo_dir_migrate_offset;
+
+       if (lum_num_objs > lum_stripe_count)
+               RETURN(-EINVAL);
+
+       for (i = 0; i < lum_num_objs; i++) {
+               struct lmv_user_mds_data *stripe_desc = lum->lum_objects + i;
+               struct dt_object *stripe_obj = lo->ldo_stripe[i];
+               __u32 lum_mds_idx, dt_mds_idx;
+               int type;
+
+               lum_mds_idx = le32_to_cpu(stripe_desc->lum_mds);
+               rc = lod_fld_lookup(env, ld, lu_object_fid(&stripe_obj->do_lu),
+                                   &dt_mds_idx, &type);
+               if (rc < 0)
+                       RETURN(rc);
+
+               if (lum_mds_idx == LMV_OFFSET_DEFAULT)
+                       continue;
+               if (lum_mds_idx != dt_mds_idx) {
+                       if (!lmv_hash_is_migrating(lo->ldo_dir_hash_type))
+                               RETURN(0);
+                       CERROR("%s: attempt to resume migration, stripe #%d mismatch: %u != %u, rc = %d\n",
+                              dt->do_lu.lo_dev->ld_obd->obd_name, i,
+                              lum_mds_idx, dt_mds_idx, -EPERM);
+                       RETURN(-EPERM);
+               }
+       }
+
+       /* all compatibility check passed */
+       RETURN(-EALREADY);
+}
+
+static int lod_layout_check(const struct lu_env *env,
+                           struct dt_object *dt,
+                           struct md_layout_change *mlc)
+{
+       if (S_ISDIR(dt->do_lu.lo_header->loh_attr))
+               return lod_dir_layout_check(env, dt, mlc);
+
+       LASSERT(S_ISREG(dt->do_lu.lo_header->loh_attr));
+       return lod_layout_pccro_check(env, dt, mlc);
+}
+
 static struct lod_layout_component *
 lod_locate_comp_hsm(struct lod_object *lo, int *hsm_mirror_id)
 {
@@ -9330,7 +9396,7 @@ const struct dt_object_operations lod_obj_ops = {
        .do_invalidate          = lod_invalidate,
        .do_declare_layout_change = lod_declare_layout_change,
        .do_layout_change       = lod_layout_change,
-       .do_layout_pccro_check  = lod_layout_pccro_check,
+       .do_layout_check        = lod_layout_check,
 };
 
 /**
index 04394fd..b1c1a6d 100644 (file)
@@ -4187,9 +4187,7 @@ static int mdd_migrate_linkea_prepare(const struct lu_env *env,
        /* If there are still links locally, don't migrate this file */
        LASSERT(ldata->ld_leh != NULL);
 
-       /*
-        * If linkEA is overflow, switch to ns-only migrate
-        */
+       /* If linkEA is overflow, switch to ns-only migrate */
        if (unlikely(ldata->ld_leh->leh_overflow_time))
                RETURN(+EOVERFLOW);
 
@@ -4591,38 +4589,80 @@ static int mdd_migrate_create(const struct lu_env *env,
  * here, because this command will decide target MDT in subdir migration in
  * LMV.
  */
-static int mdd_migrate_cmd_check(struct mdd_device *mdd,
+static int mdd_migrate_cmd_check(const struct lu_env *env, struct mdd_device *mdd,
+                                struct mdd_object *sobj,
                                 const struct lmv_mds_md_v1 *lmv,
                                 const struct lmv_user_md_v1 *lum,
-                                const struct lu_name *lname)
+                                size_t lum_len, const struct lu_name *lname)
 {
+       struct mdd_thread_info *info = mdd_env_info(env);
        __u32 lum_stripe_count = lum->lum_stripe_count;
        __u32 lum_hash_type = lum->lum_hash_type &
                              cpu_to_le32(LMV_HASH_TYPE_MASK);
-       __u32 lmv_hash_type = lmv->lmv_hash_type &
-                             cpu_to_le32(LMV_HASH_TYPE_MASK);
+       struct md_layout_change *mlc = &info->mdi_mlc;
+       __u32 lmv_hash_type;
+       int rc = 0;
+       ENTRY;
 
-       if (!lmv_is_sane(lmv))
-               return -EBADF;
+       if (lmv && !lmv_is_sane(lmv))
+               RETURN(-EBADF);
 
-       /* if stripe_count unspecified, set to 1 */
+       /* If stripe_count unspecified, set to 1 */
        if (!lum_stripe_count)
                lum_stripe_count = cpu_to_le32(1);
 
-       /* TODO: check specific MDTs */
-       if (lum_stripe_count != lmv->lmv_migrate_offset ||
-           lum->lum_stripe_offset != lmv->lmv_master_mdt_index ||
-           (lum_hash_type && lum_hash_type != lmv_hash_type)) {
-               CERROR("%s: '"DNAME"' migration was interrupted, run 'lfs migrate -m %d -c %d -H %s "DNAME"' to finish migration: rc = %d\n",
-                       mdd2obd_dev(mdd)->obd_name, encode_fn_luname(lname),
-                       le32_to_cpu(lmv->lmv_master_mdt_index),
-                       le32_to_cpu(lmv->lmv_migrate_offset),
-                       mdt_hash_name[le32_to_cpu(lmv_hash_type)],
-                       encode_fn_luname(lname), -EPERM);
-               return -EPERM;
+       /* Easy check for plain and single-striped dirs
+        * if the object is on the target MDT already
+        */
+       if (!lmv || lmv->lmv_stripe_count == cpu_to_le32(1)) {
+               struct seq_server_site  *ss = mdd_seq_site(mdd);
+               struct lu_seq_range range = { 0 };
+
+               fld_range_set_type(&range, LU_SEQ_RANGE_MDT);
+               rc = fld_server_lookup(env, ss->ss_server_fld,
+                               fid_seq(mdd_object_fid(sobj)), &range);
+               if (rc)
+                       RETURN(rc);
+
+               if (lum_stripe_count == cpu_to_le32(1) &&
+                   le32_to_cpu(lum->lum_stripe_offset) == range.lsr_index)
+                       RETURN(-EALREADY);
+               RETURN(0);
+       }
+
+       lmv_hash_type = lmv->lmv_hash_type & cpu_to_le32(LMV_HASH_TYPE_MASK);
+
+       if (lmv_is_migrating(lmv)) {
+               if (lum_stripe_count != lmv->lmv_migrate_offset ||
+                   lum->lum_stripe_offset != lmv->lmv_master_mdt_index ||
+                   (lum_hash_type && lum_hash_type != lmv_hash_type)) {
+                       rc = -EPERM;
+               }
+       } else {
+               /* check at top level if the target layout already applied */
+               if ((lum_hash_type && lum_hash_type != lmv_hash_type) ||
+                   lum->lum_stripe_offset != lmv->lmv_master_mdt_index ||
+                   lum_stripe_count != lmv->lmv_stripe_count)
+                       RETURN(0);
+       }
+
+       if (rc == 0) {
+               mlc->mlc_buf.lb_buf = (void*)lum;
+               mlc->mlc_buf.lb_len = lum_len;
+               rc = mo_layout_check(env, &sobj->mod_obj, mlc);
+       }
+
+       if (rc == -EPERM) {
+               CERROR("%s: '"DNAME"' migration was interrupted, run "
+                      "'lfs migrate -m %d -c %d -H %s "DNAME"' to finish migration: rc = %d\n",
+                      mdd2obd_dev(mdd)->obd_name, encode_fn_luname(lname),
+                      le32_to_cpu(lmv->lmv_master_mdt_index),
+                      le32_to_cpu(lmv->lmv_migrate_offset),
+                      mdt_hash_name[le32_to_cpu(lmv_hash_type)],
+                      encode_fn_luname(lname), rc);
        }
 
-       return -EALREADY;
+       RETURN(rc);
 }
 
 /**
@@ -4710,6 +4750,7 @@ retry:
 
        if (S_ISDIR(attr->la_mode) && !spec->sp_migrate_nsonly) {
                struct lmv_user_md_v1 *lum = spec->u.sp_ea.eadata;
+               size_t lum_len = spec->u.sp_ea.eadatalen;
 
                LASSERT(lum);
 
@@ -4724,19 +4765,11 @@ retry:
                        GOTO(out, rc);
 
                lmv = sbuf.lb_buf;
-               if (lmv) {
-                       if (!lmv_is_sane(lmv))
-                               GOTO(out, rc = -EBADF);
-                       if (lmv_is_migrating(lmv)) {
-                               rc = mdd_migrate_cmd_check(mdd, lmv, lum,
-                                                          sname);
-                               GOTO(out, rc);
-                       }
-               }
+               rc = mdd_migrate_cmd_check(env, mdd, sobj, lmv, lum,
+                                          lum_len, sname);
+               if (rc)
+                       GOTO(out, rc);
        } else if (!S_ISDIR(attr->la_mode)) {
-               if (spobj == tpobj)
-                       GOTO(out, rc = -EALREADY);
-
                /* update namespace only if @sobj is on MDT where @tpobj is. */
                if (!mdd_object_remote(tpobj) && !mdd_object_remote(sobj))
                        spec->sp_migrate_nsonly = true;
index 177b4a3..ffc3208 100644 (file)
@@ -744,10 +744,10 @@ mdo_layout_change(const struct lu_env *env, struct mdd_object *obj,
 }
 
 static inline int
-mdo_layout_pccro_check(const struct lu_env *env, struct mdd_object *obj,
+mdo_layout_check(const struct lu_env *env, struct mdd_object *obj,
                       struct md_layout_change *mlc)
 {
-       return dt_layout_pccro_check(env, mdd_object_child(obj), mlc);
+       return dt_layout_check(env, mdd_object_child(obj), mlc);
 }
 
 static inline
index cb0cc41..fa0815b 100644 (file)
@@ -3430,12 +3430,11 @@ out:
        RETURN(rc);
 }
 
-/*  Update the layout for PCC-RO. */
 static int
-mdd_layout_pccro_check(const struct lu_env *env, struct md_object *o,
+mdd_layout_check(const struct lu_env *env, struct md_object *o,
                       struct md_layout_change *mlc)
 {
-       return mdo_layout_pccro_check(env, md2mdd_obj(o), mlc);
+       return mdo_layout_check(env, md2mdd_obj(o), mlc);
 }
 
 /**
@@ -3487,8 +3486,8 @@ out:
 /**
  * Layout change callback for object.
  *
- * This is only used by FLR and PCC-RO for now. In the future, it can be
- * exteneded to handle all layout change.
+ * This is used by FLR and PCC-RO as well as dir migration
+ * and restriping.
  */
 static int
 mdd_layout_change(const struct lu_env *env, struct md_object *o,
@@ -4254,5 +4253,5 @@ const struct md_object_operations mdd_obj_ops = {
        .moo_object_lock        = mdd_object_lock,
        .moo_object_unlock      = mdd_object_unlock,
        .moo_layout_change      = mdd_layout_change,
-       .moo_layout_pccro_check = mdd_layout_pccro_check,
+       .moo_layout_check       = mdd_layout_check,
 };
index fa23671..258d990 100644 (file)
@@ -5073,8 +5073,8 @@ static int mdt_layout_change_pccro(struct mdt_thread_info *info,
        if (rc)
                RETURN(rc);
 
-       rc = mo_layout_pccro_check(info->mti_env,
-                                  mdt_object_child(obj), layout);
+       rc = mo_layout_check(info->mti_env,
+                            mdt_object_child(obj), layout);
        if (rc == -EALREADY)
                RETURN(0);
 
index 2eab5b7..442776d 100644 (file)
@@ -2517,7 +2517,7 @@ put_parent:
 unlock_rename:
        mdt_rename_unlock(info, rename_lh);
 
-       if (rc)
+       if (rc && rc != -EALREADY)
                CERROR("%s: migrate "DFID"/"DNAME" failed: rc = %d\n",
                       mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
                       encode_fn_luname(&rr->rr_name), rc);
index 1ad09ec..b211b12 100755 (executable)
@@ -19436,9 +19436,15 @@ test_160d() {
        changelog_register || error "changelog_register failed"
 
        mkdir -p $DIR/$tdir/migrate_dir
+
+       local src_mdt=$($LFS getdirstripe -m $DIR/$tdir/migrate_dir)
+       local tgt_mdt=1
+
+       [[ "$src_mdt" == "$tgt_mdt" ]] && tgt_mdt=0
+
        changelog_clear 0 || error "changelog_clear failed"
 
-       $LFS migrate -m 1 $DIR/$tdir/migrate_dir || error "migrate fails"
+       $LFS migrate -m $tgt_mdt $DIR/$tdir/migrate_dir || error "migrate fails"
        changelog_dump | tail -n 5
        local migrates=$(changelog_dump | grep -c "MIGRT")
        [ $migrates -eq 1 ] || error "MIGRATE changelog count $migrates != 1"
@@ -24070,6 +24076,25 @@ test_230c() {
                        [[ "$mdt_index" == "0" ]] ||
                                error "$file is not on MDT0"
                done
+
+               # repeating the migrate should leave the dir's FID the same
+               # and update only the skipped file's FID
+               local old_dir_fid=$($LFS path2fid $migrate_dir)
+               local old_file_fid=$($LFS path2fid $migrate_dir/file)
+
+               $LFS migrate -m 0 $migrate_dir || error "Repeating migrate fails"
+
+               local new_dir_fid=$($LFS path2fid $migrate_dir)
+               local new_file_fid=$($LFS path2fid $migrate_dir/file)
+
+               foo_fid_new=$($LFS path2fid $migrate_dir/foo)
+
+               [[ "$foo_fid_old" != "$foo_fid_new" ]] ||
+                       error "Expecting the skipped file to be migrated, but its FID is the same"
+               [[ "$old_dir_fid" != "$new_fid_dir" ]] ||
+                       error "The top-level dir has been migrated after repeating the same migrate cmd"
+               [[ "$old_file_fid" == "$new_file_fid" ]] ||
+                       error "The migrated file has been migrated again"
        fi
 
        rm -rf $DIR/$tdir || error "rm dir failed after migration"