Whamcloud - gitweb
LU-4684 migrate: shrink dir layout after migration 26/31626/22
authorLai Siyao <lai.siyao@intel.com>
Thu, 8 Mar 2018 03:24:31 +0000 (11:24 +0800)
committerOleg Drokin <green@whamcloud.com>
Mon, 17 Sep 2018 04:05:31 +0000 (04:05 +0000)
Use setxattr(XATTR_NAME_LMV) to shrink dir layout after migration,
this may change dir fid because it may shrink dir to 1-stripe, if so
replace this dir with its stripe.

Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Change-Id: I4fd3e83e188ac0c6c4845183c17701276fae94a8
Reviewed-on: https://review.whamcloud.com/31626
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Fan Yong <fan.yong@intel.com>
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/lod/lod_object.c
lustre/mdd/mdd_dir.c
lustre/mdd/mdd_internal.h
lustre/mdd/mdd_object.c
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_reint.c
lustre/mdt/mdt_xattr.c
lustre/utils/liblustreapi.c

index cd38047..dc896ef 100644 (file)
@@ -3337,6 +3337,9 @@ static int lod_declare_xattr_set(const struct lu_env *env,
                        rc = lod_dir_declare_layout_add(env, dt, buf, th);
                else if (strcmp(op, "del") == 0)
                        rc = lod_dir_declare_layout_delete(env, dt, buf, th);
+               else if (strcmp(op, "set") == 0)
+                       rc = lod_sub_declare_xattr_set(env, next, buf,
+                                                      XATTR_NAME_LMV, fl, th);
 
                RETURN(rc);
        } else if (S_ISDIR(mode)) {
@@ -4094,12 +4097,15 @@ static int lod_xattr_set(const struct lu_env *env,
                const char *op = name + strlen(XATTR_NAME_LMV) + 1;
 
                rc = -ENOTSUPP;
-               if (strcmp(op, "del") == 0)
-                       rc = lod_dir_layout_delete(env, dt, buf, th);
                /*
                 * XATTR_NAME_LMV".add" is never called, but only declared,
                 * because lod_xattr_set_lmv() will do the addition.
                 */
+               if (strcmp(op, "del") == 0)
+                       rc = lod_dir_layout_delete(env, dt, buf, th);
+               else if (strcmp(op, "set") == 0)
+                       rc = lod_sub_xattr_set(env, next, buf, XATTR_NAME_LMV,
+                                              fl, th);
 
                RETURN(rc);
        } else if (S_ISDIR(dt->do_lu.lo_header->loh_attr) &&
index aa87adc..e9380ff 100644 (file)
@@ -3309,6 +3309,107 @@ out:
        return rc;
 }
 
+static int mdd_dir_declare_destroy_stripe(const struct lu_env *env,
+                                         struct mdd_object *obj,
+                                         struct mdd_object *stripe,
+                                         const struct lu_buf *lmv_buf,
+                                         const struct lu_buf *lmu_buf,
+                                         int index,
+                                         struct thandle *handle)
+{
+       struct lmv_user_md *lmu = lmu_buf->lb_buf;
+       __u32 shrink_offset = le32_to_cpu(lmu->lum_stripe_count);
+       int rc;
+
+       if (index < shrink_offset) {
+               if (shrink_offset < 2)
+                       return 0;
+               return mdo_declare_xattr_set(env, stripe, lmv_buf,
+                                            XATTR_NAME_LMV".set", 0, handle);
+       }
+
+       rc = mdo_declare_ref_del(env, stripe, handle);
+       if (rc)
+               return rc;
+
+       rc = mdo_declare_destroy(env, stripe, handle);
+
+       return rc;
+}
+
+static int mdd_dir_destroy_stripe(const struct lu_env *env,
+                                 struct mdd_object *obj,
+                                 struct mdd_object *stripe,
+                                 const struct lu_buf *lmv_buf,
+                                 const struct lu_buf *lmu_buf,
+                                 int index,
+                                 struct thandle *handle)
+{
+       struct mdd_thread_info *info = mdd_env_info(env);
+       struct lmv_mds_md_v1 *lmv = lmv_buf->lb_buf;
+       struct lmv_user_md *lmu = lmu_buf->lb_buf;
+       __u32 shrink_offset = le32_to_cpu(lmu->lum_stripe_count);
+       int rc;
+
+       ENTRY;
+
+       /* update remaining stripes' LMV */
+       if (index < shrink_offset) {
+               struct lmv_mds_md_v1 *slave_lmv;
+               struct lu_buf slave_buf = {
+                               .lb_buf = &info->mti_lmv.lmv_md_v1,
+                               .lb_len = sizeof(*slave_lmv)
+               };
+               __u32 version = le32_to_cpu(lmv->lmv_layout_version);
+
+               /* if dir will be shrunk to 1-stripe, don't update */
+               if (shrink_offset < 2)
+                       RETURN(0);
+
+               slave_lmv = slave_buf.lb_buf;
+               memset(slave_lmv, 0, sizeof(*slave_lmv));
+               slave_lmv->lmv_magic = cpu_to_le32(LMV_MAGIC_STRIPE);
+               slave_lmv->lmv_stripe_count = lmu->lum_stripe_count;
+               slave_lmv->lmv_master_mdt_index = cpu_to_le32(index);
+               slave_lmv->lmv_hash_type = lmv->lmv_hash_type &
+                                          cpu_to_le32(LMV_HASH_TYPE_MASK);
+               slave_lmv->lmv_layout_version = cpu_to_le32(++version);
+
+               rc = mdo_xattr_set(env, stripe, &slave_buf,
+                                  XATTR_NAME_LMV".set", 0, handle);
+               RETURN(rc);
+       }
+
+       mdd_write_lock(env, stripe, MOR_SRC_CHILD);
+       rc = mdo_ref_del(env, stripe, handle);
+       if (!rc)
+               rc = mdo_destroy(env, stripe, handle);
+       mdd_write_unlock(env, stripe);
+
+       RETURN(rc);
+}
+
+static int mdd_shrink_stripe_is_empty(const struct lu_env *env,
+                                      struct mdd_object *obj,
+                                      struct mdd_object *stripe,
+                                      const struct lu_buf *lmv_buf,
+                                      const struct lu_buf *lmu_buf,
+                                      int index,
+                                      struct thandle *handle)
+{
+       struct lmv_user_md *lmu = lmu_buf->lb_buf;
+       __u32 shrink_offset = le32_to_cpu(lmu->lum_stripe_count);
+
+       /* the default value is 0, but it means 1 */
+       if (!shrink_offset)
+               shrink_offset = 1;
+
+       if (index < shrink_offset)
+               return 0;
+
+       return mdd_dir_is_empty(env, stripe);
+}
+
 /*
  * iterate stripes of striped directory on remote MDT, local striped directory
  * is accessed via LOD.
@@ -3768,7 +3869,7 @@ static int mdd_declare_migrate_create(const struct lu_env *env,
                struct lu_buf lmu_buf = { NULL };
 
                if (lmv) {
-                       struct lmv_user_md *lmu = (typeof(lmu))info->mti_key;
+                       struct lmv_user_md *lmu = &info->mti_lmv.lmv_user_md;
 
                        lmu->lum_stripe_count = 0;
                        lmu_buf.lb_buf = lmu;
@@ -3776,7 +3877,7 @@ static int mdd_declare_migrate_create(const struct lu_env *env,
                }
 
                rc = mdd_dir_declare_layout_delete(env, sobj, sbuf, &lmu_buf,
-                                               handle);
+                                                  handle);
                if (rc)
                        return rc;
 
@@ -3921,7 +4022,7 @@ static int mdd_migrate_create(const struct lu_env *env,
 
                if (sbuf->lb_buf) {
                        struct mdd_thread_info *info = mdd_env_info(env);
-                       struct lmv_user_md *lmu = (typeof(lmu))info->mti_key;
+                       struct lmv_user_md *lmu = &info->mti_lmv.lmv_user_md;
 
                        lmu->lum_stripe_count = 0;
                        lmu_buf.lb_buf = lmu;
@@ -4440,6 +4541,360 @@ out:
        return rc;
 }
 
+static int __mdd_dir_declare_layout_shrink(const struct lu_env *env,
+                                          struct mdd_object *pobj,
+                                          struct mdd_object *obj,
+                                          struct mdd_object *stripe,
+                                          struct lu_attr *attr,
+                                          struct lu_buf *lmv_buf,
+                                          const struct lu_buf *lmu_buf,
+                                          struct lu_name *lname,
+                                          struct thandle *handle)
+{
+       struct mdd_thread_info *info = mdd_env_info(env);
+       struct lmv_mds_md_v1 *lmv = lmv_buf->lb_buf;
+       struct lmv_user_md *lmu = (typeof(lmu))info->mti_key;
+       struct lu_buf shrink_buf = { .lb_buf = lmu,
+                                    .lb_len = sizeof(*lmu) };
+       int rc;
+
+       LASSERT(lmv);
+
+       memcpy(lmu, lmu_buf->lb_buf, sizeof(*lmu));
+
+       if (le32_to_cpu(lmu->lum_stripe_count) < 2)
+               lmu->lum_stripe_count = 0;
+
+       rc = mdd_dir_declare_layout_delete(env, obj, lmv_buf, &shrink_buf,
+                                          handle);
+       if (rc)
+               return rc;
+
+       if (lmu->lum_stripe_count == 0) {
+               lmu->lum_stripe_count = cpu_to_le32(1);
+
+               rc = mdo_declare_xattr_del(env, obj, XATTR_NAME_LMV, handle);
+               if (rc)
+                       return rc;
+       }
+
+       rc = mdd_dir_iterate_stripes(env, obj, lmv_buf, &shrink_buf, handle,
+                                    mdd_dir_declare_destroy_stripe);
+       if (rc)
+               return rc;
+
+       if (le32_to_cpu(lmu->lum_stripe_count) > 1)
+               return mdo_declare_xattr_set(env, obj, lmv_buf,
+                                            XATTR_NAME_LMV".set", 0, handle);
+
+       rc = mdo_declare_index_insert(env, stripe, mdo2fid(pobj), S_IFDIR,
+                                     dotdot, handle);
+       if (rc)
+               return rc;
+
+       rc = mdd_iterate_xattrs(env, obj, stripe, false, handle,
+                               mdo_declare_xattr_set);
+       if (rc)
+               return rc;
+
+       rc = mdo_declare_xattr_del(env, stripe, XATTR_NAME_LMV, handle);
+       if (rc)
+               return rc;
+
+       rc = mdo_declare_attr_set(env, stripe, attr, handle);
+       if (rc)
+               return rc;
+
+       rc = mdo_declare_index_delete(env, pobj, lname->ln_name, handle);
+       if (rc)
+               return rc;
+
+       rc = mdo_declare_index_insert(env, pobj, mdo2fid(stripe), attr->la_mode,
+                                     lname->ln_name, handle);
+       if (rc)
+               return rc;
+
+       rc = mdo_declare_ref_del(env, obj, handle);
+       if (rc)
+               return rc;
+
+       rc = mdo_declare_ref_del(env, obj, handle);
+       if (rc)
+               return rc;
+
+       rc = mdo_declare_destroy(env, obj, handle);
+       if (rc)
+               return rc;
+
+       return rc;
+
+}
+
+/*
+ * after files under \a obj were migrated, shrink old stripes from \a obj,
+ * furthermore, if it becomes a 1-stripe directory, convert it to a normal one.
+ */
+static int __mdd_dir_layout_shrink(const struct lu_env *env,
+                                  struct mdd_object *pobj,
+                                  struct mdd_object *obj,
+                                  struct mdd_object *stripe,
+                                  struct lu_attr *attr,
+                                  struct lu_buf *lmv_buf,
+                                  const struct lu_buf *lmu_buf,
+                                  struct lu_name *lname,
+                                  struct thandle *handle)
+{
+       struct mdd_thread_info *info = mdd_env_info(env);
+       struct lmv_mds_md_v1 *lmv = lmv_buf->lb_buf;
+       struct lmv_user_md *lmu = (typeof(lmu))info->mti_key;
+       struct lu_buf shrink_buf = { .lb_buf = lmu,
+                                    .lb_len = sizeof(*lmu) };
+       int len = lmv_buf->lb_len;
+       __u32 version = le32_to_cpu(lmv->lmv_layout_version);
+       int rc;
+
+       ENTRY;
+
+       /* lmu needs to be altered, but lmu_buf is const */
+       memcpy(lmu, lmu_buf->lb_buf, sizeof(*lmu));
+
+       /*
+        * if dir will be shrunk to 1-stripe, delete all stripes, because it
+        * will be converted to normal dir.
+        */
+       if (le32_to_cpu(lmu->lum_stripe_count) == 1)
+               lmu->lum_stripe_count = 0;
+
+       /* delete stripes after lmu_stripe_count */
+       rc = mdd_dir_layout_delete(env, obj, lmv_buf, &shrink_buf, handle);
+       if (rc)
+               RETURN(rc);
+
+       if (lmu->lum_stripe_count == 0) {
+               lmu->lum_stripe_count = cpu_to_le32(1);
+
+               /* delete LMV to avoid deleting stripes again upon destroy */
+               mdd_write_lock(env, obj, MOR_SRC_CHILD);
+               rc = mdo_xattr_del(env, obj, XATTR_NAME_LMV, handle);
+               mdd_write_unlock(env, obj);
+               if (rc)
+                       RETURN(rc);
+       }
+
+       /* destroy stripes after lmu_stripe_count */
+       mdd_write_lock(env, obj, MOR_SRC_PARENT);
+       rc = mdd_dir_iterate_stripes(env, obj, lmv_buf, &shrink_buf, handle,
+                                    mdd_dir_destroy_stripe);
+       mdd_write_unlock(env, obj);
+
+       if (le32_to_cpu(lmu->lum_stripe_count) > 1) {
+               /* update dir LMV, that's all if it's still striped. */
+               lmv->lmv_stripe_count = lmu->lum_stripe_count;
+               lmv->lmv_hash_type &= ~cpu_to_le32(LMV_HASH_FLAG_MIGRATION);
+               lmv->lmv_migrate_offset = 0;
+               lmv->lmv_migrate_hash = 0;
+               lmv->lmv_layout_version = cpu_to_le32(++version);
+
+               lmv_buf->lb_len = sizeof(*lmv);
+               rc = mdo_xattr_set(env, obj, lmv_buf, XATTR_NAME_LMV".set", 0,
+                                  handle);
+               lmv_buf->lb_len = len;
+               RETURN(rc);
+       }
+
+       /* replace directory with its remaining stripe */
+       LASSERT(pobj);
+       LASSERT(stripe);
+
+       mdd_write_lock(env, pobj, MOR_SRC_PARENT);
+       mdd_write_lock(env, obj, MOR_SRC_CHILD);
+
+       /* insert dotdot to stripe which points to parent */
+       rc = __mdd_index_insert_only(env, stripe, mdo2fid(pobj), S_IFDIR,
+                                    dotdot, handle);
+       if (rc)
+               GOTO(out, rc);
+
+       /* copy xattrs including linkea */
+       rc = mdd_iterate_xattrs(env, obj, stripe, false, handle, mdo_xattr_set);
+       if (rc)
+               GOTO(out, rc);
+
+       /* delete LMV */
+       rc = mdo_xattr_del(env, stripe, XATTR_NAME_LMV, handle);
+       if (rc)
+               GOTO(out, rc);
+
+       /* don't set nlink from parent */
+       attr->la_valid &= ~LA_NLINK;
+
+       rc = mdo_attr_set(env, stripe, attr, handle);
+       if (rc)
+               GOTO(out, rc);
+
+       /* delete dir name from parent */
+       rc = __mdd_index_delete_only(env, pobj, lname->ln_name, handle);
+       if (rc)
+               GOTO(out, rc);
+
+       /* insert stripe to parent with dir name */
+       rc = __mdd_index_insert_only(env, pobj, mdo2fid(stripe), attr->la_mode,
+                                    lname->ln_name, handle);
+       if (rc)
+               GOTO(out, rc);
+
+       /* destroy dir obj */
+       rc = mdo_ref_del(env, obj, handle);
+       if (rc)
+               GOTO(out, rc);
+
+       rc = mdo_ref_del(env, obj, handle);
+       if (rc)
+               GOTO(out, rc);
+
+       rc = mdo_destroy(env, obj, handle);
+       if (rc)
+               GOTO(out, rc);
+
+       EXIT;
+out:
+       mdd_write_unlock(env, obj);
+       mdd_write_unlock(env, pobj);
+
+       return rc;
+}
+
+/*
+ * shrink directory stripes to lum_stripe_count specified by lum_mds_md.
+ */
+int mdd_dir_layout_shrink(const struct lu_env *env,
+                         struct md_object *md_obj,
+                         const struct lu_buf *lmu_buf)
+{
+       struct mdd_device *mdd = mdo2mdd(md_obj);
+       struct mdd_thread_info *info = mdd_env_info(env);
+       struct mdd_object *obj = md2mdd_obj(md_obj);
+       struct mdd_object *pobj = NULL;
+       struct mdd_object *stripe = NULL;
+       struct lu_attr *attr = &info->mti_pattr;
+       struct lu_fid *fid = &info->mti_fid2;
+       struct lu_name lname = { NULL };
+       struct lu_buf lmv_buf = { NULL };
+       struct lmv_mds_md_v1 *lmv;
+       struct lmv_user_md *lmu;
+       struct thandle *handle;
+       int rc;
+
+       ENTRY;
+
+       rc = mdd_la_get(env, obj, attr);
+       if (rc)
+               RETURN(rc);
+
+       if (!S_ISDIR(attr->la_mode))
+               RETURN(-ENOTDIR);
+
+       rc = mdd_stripe_get(env, obj, &lmv_buf, XATTR_NAME_LMV);
+       if (rc < 0)
+               RETURN(rc);
+
+       lmv = lmv_buf.lb_buf;
+       lmu = lmu_buf->lb_buf;
+
+       /* this was checked in MDT */
+       LASSERT(le32_to_cpu(lmu->lum_stripe_count) <
+               le32_to_cpu(lmv->lmv_stripe_count));
+
+       rc = mdd_dir_iterate_stripes(env, obj, &lmv_buf, lmu_buf, NULL,
+                                    mdd_shrink_stripe_is_empty);
+       if (rc < 0)
+               GOTO(out, rc);
+       else if (rc != 0)
+               GOTO(out, rc = -ENOTEMPTY);
+
+       /*
+        * if obj stripe count will be shrunk to 1, we need to convert it to a
+        * normal dir, which will change its fid and update parent namespace,
+        * get obj name and parent fid from linkea.
+        */
+       if (le32_to_cpu(lmu->lum_stripe_count) < 2) {
+               struct linkea_data *ldata = &info->mti_link_data;
+               char *filename = info->mti_name;
+
+               rc = mdd_links_read(env, obj, ldata);
+               if (rc)
+                       GOTO(out, rc);
+
+               if (ldata->ld_leh->leh_reccount > 1)
+                       GOTO(out, rc = -EINVAL);
+
+               linkea_first_entry(ldata);
+               if (!ldata->ld_lee)
+                       GOTO(out, rc = -ENODATA);
+
+               linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen, &lname,
+                                   fid);
+
+               /* Note: lname might miss \0 at the end */
+               snprintf(filename, sizeof(info->mti_name), "%.*s",
+                        lname.ln_namelen, lname.ln_name);
+               lname.ln_name = filename;
+
+               pobj = mdd_object_find(env, mdd, fid);
+               if (IS_ERR(pobj)) {
+                       rc = PTR_ERR(pobj);
+                       pobj = NULL;
+                       GOTO(out, rc);
+               }
+
+               fid_le_to_cpu(fid, &lmv->lmv_stripe_fids[0]);
+
+               stripe = mdd_object_find(env, mdd, fid);
+               if (IS_ERR(stripe)) {
+                       mdd_object_put(env, pobj);
+                       pobj = NULL;
+                       GOTO(out, rc = PTR_ERR(stripe));
+               }
+       }
+
+       handle = mdd_trans_create(env, mdd);
+       if (IS_ERR(handle))
+               GOTO(out, rc = PTR_ERR(handle));
+
+       rc = __mdd_dir_declare_layout_shrink(env, pobj, obj, stripe, attr,
+                                            &lmv_buf, lmu_buf, &lname, handle);
+       if (rc)
+               GOTO(stop_trans, rc);
+
+       rc = mdd_declare_changelog_store(env, mdd, CL_LAYOUT, NULL, NULL,
+                                        handle);
+       if (rc)
+               GOTO(stop_trans, rc);
+
+       rc = mdd_trans_start(env, mdd, handle);
+       if (rc)
+               GOTO(stop_trans, rc);
+
+       rc = __mdd_dir_layout_shrink(env, pobj, obj, stripe, attr, &lmv_buf,
+                                    lmu_buf, &lname, handle);
+       if (rc)
+               GOTO(stop_trans, rc);
+
+       rc = mdd_changelog_data_store_xattr(env, mdd, CL_LAYOUT, 0, obj,
+                                           XATTR_NAME_LMV, handle);
+       GOTO(stop_trans, rc);
+
+stop_trans:
+       rc = mdd_trans_stop(env, mdd, rc, handle);
+out:
+       if (pobj) {
+               mdd_object_put(env, stripe);
+               mdd_object_put(env, pobj);
+       }
+       lu_buf_free(&lmv_buf);
+       return rc;
+}
+
 const struct md_dir_operations mdd_dir_ops = {
        .mdo_is_subdir     = mdd_is_subdir,
        .mdo_lookup        = mdd_lookup,
index 5ecce16..7d50e68 100644 (file)
@@ -204,6 +204,7 @@ struct mdd_thread_info {
        struct dt_insert_rec      mti_dt_rec;
        struct lfsck_req_local    mti_lrl;
        struct lu_seq_range       mti_range;
+       union lmv_mds_md          mti_lmv;
 };
 
 int mdd_la_get(const struct lu_env *env, struct mdd_object *obj,
@@ -270,6 +271,9 @@ int mdd_links_rename(const struct lu_env *env,
                     struct thandle *handle,
                     struct linkea_data *ldata,
                     int first, int check);
+int mdd_dir_layout_shrink(const struct lu_env *env,
+                         struct md_object *md_obj,
+                         const struct lu_buf *lmu_buf);
 
 struct mdd_thread_info *mdd_env_info(const struct lu_env *env);
 
@@ -351,6 +355,12 @@ int mdd_declare_create_object_internal(const struct lu_env *env,
                                       struct dt_allocation_hint *hint);
 int mdd_stripe_get(const struct lu_env *env, struct mdd_object *obj,
                   struct lu_buf *lmm_buf, const char *name);
+int mdd_changelog_data_store_xattr(const struct lu_env *env,
+                                  struct mdd_device *mdd,
+                                  enum changelog_rec_type type,
+                                  int flags, struct mdd_object *mdd_obj,
+                                  const char *xattr_name,
+                                  struct thandle *handle);
 
 /* mdd_trans.c */
 void mdd_object_make_hint(const struct lu_env *env, struct mdd_object *parent,
index a623314..e2c39af 100644 (file)
@@ -942,12 +942,12 @@ int mdd_changelog_data_store(const struct lu_env *env, struct mdd_device *mdd,
        RETURN(rc);
 }
 
-static int mdd_changelog_data_store_xattr(const struct lu_env *env,
-                                         struct mdd_device *mdd,
-                                         enum changelog_rec_type type,
-                                         int flags, struct mdd_object *mdd_obj,
-                                         const char *xattr_name,
-                                         struct thandle *handle)
+int mdd_changelog_data_store_xattr(const struct lu_env *env,
+                                  struct mdd_device *mdd,
+                                  enum changelog_rec_type type,
+                                  int flags, struct mdd_object *mdd_obj,
+                                  const char *xattr_name,
+                                  struct thandle *handle)
 {
        int                              rc;
 
@@ -1849,6 +1849,11 @@ static int mdd_xattr_set(const struct lu_env *env, struct md_object *obj,
                RETURN(rc);
        }
 
+       if (strcmp(name, XATTR_NAME_LMV) == 0) {
+               rc = mdd_dir_layout_shrink(env, obj, buf);
+               RETURN(rc);
+       }
+
        if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0 ||
            strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) {
                struct posix_acl *acl;
index b1068af..ddd73f7 100644 (file)
@@ -760,6 +760,16 @@ int mdt_remote_object_lock(struct mdt_thread_info *mti,
                           struct mdt_object *o, const struct lu_fid *fid,
                           struct lustre_handle *lh,
                           enum ldlm_mode mode, __u64 ibits, bool cache);
+int mdt_reint_striped_lock(struct mdt_thread_info *info,
+                          struct mdt_object *o,
+                          struct mdt_lock_handle *lh,
+                          __u64 ibits,
+                          struct ldlm_enqueue_info *einfo,
+                          bool cos_incompat);
+void mdt_reint_striped_unlock(struct mdt_thread_info *info,
+                             struct mdt_object *o,
+                             struct mdt_lock_handle *lh,
+                             struct ldlm_enqueue_info *einfo, int decref);
 
 enum mdt_name_flags {
        MNF_FIX_ANON = 1,
@@ -882,6 +892,7 @@ int mdt_links_read(struct mdt_thread_info *info,
                   struct linkea_data *ldata);
 int mdt_close_internal(struct mdt_thread_info *info, struct ptlrpc_request *req,
                       struct mdt_body *repbody);
+int mdt_remote_permission(struct mdt_thread_info *info);
 
 static inline struct mdt_device *mdt_dev(struct lu_device *d)
 {
index 821c525..cb77f69 100644 (file)
@@ -244,7 +244,7 @@ static inline int mdt_remote_permission_check(struct mdt_thread_info *info)
  * retval      = 0 remote operation is allowed.
  *              < 0 remote operation is denied.
  */
-static int mdt_remote_permission(struct mdt_thread_info *info)
+int mdt_remote_permission(struct mdt_thread_info *info)
 {
        struct md_op_spec *spec = &info->mti_spec;
        struct lu_attr *attr = &info->mti_attr.ma_attr;
@@ -362,12 +362,12 @@ static int mdt_lock_slaves(struct mdt_thread_info *mti, struct mdt_object *obj,
                              policy);
 }
 
-static inline int mdt_reint_striped_lock(struct mdt_thread_info *info,
-                                        struct mdt_object *o,
-                                        struct mdt_lock_handle *lh,
-                                        __u64 ibits,
-                                        struct ldlm_enqueue_info *einfo,
-                                        bool cos_incompat)
+int mdt_reint_striped_lock(struct mdt_thread_info *info,
+                          struct mdt_object *o,
+                          struct mdt_lock_handle *lh,
+                          __u64 ibits,
+                          struct ldlm_enqueue_info *einfo,
+                          bool cos_incompat)
 {
        int rc;
 
@@ -396,10 +396,10 @@ static inline int mdt_reint_striped_lock(struct mdt_thread_info *info,
        return rc;
 }
 
-static inline void
-mdt_reint_striped_unlock(struct mdt_thread_info *info, struct mdt_object *o,
-                        struct mdt_lock_handle *lh,
-                        struct ldlm_enqueue_info *einfo, int decref)
+void mdt_reint_striped_unlock(struct mdt_thread_info *info,
+                             struct mdt_object *o,
+                             struct mdt_lock_handle *lh,
+                             struct ldlm_enqueue_info *einfo, int decref)
 {
        if (einfo->ei_cbdata)
                mdt_unlock_slaves(info, o, einfo, decref);
index 29271e9..846abe2 100644 (file)
@@ -305,6 +305,160 @@ out:
        return rc;
 }
 
+/* shrink dir layout after migration */
+static int mdt_dir_layout_shrink(struct mdt_thread_info *info)
+{
+       const struct lu_env *env = info->mti_env;
+       struct mdt_device *mdt = info->mti_mdt;
+       struct mdt_reint_record *rr = &info->mti_rr;
+       struct lmv_user_md *lmu = rr->rr_eadata;
+       __u32 lum_stripe_count = lmu->lum_stripe_count;
+       struct lu_buf *buf = &info->mti_buf;
+       struct lmv_mds_md_v1 *lmv;
+       struct md_attr *ma = &info->mti_attr;
+       struct ldlm_enqueue_info *einfo = &info->mti_einfo[0];
+       struct mdt_object *pobj = NULL;
+       struct mdt_object *obj;
+       struct mdt_lock_handle *lhp = NULL;
+       struct mdt_lock_handle *lhc;
+       int rc;
+
+       ENTRY;
+
+       rc = mdt_remote_permission(info);
+       if (rc)
+               RETURN(rc);
+
+       /* mti_big_lmm is used to save LMV, but it may be uninitialized. */
+       if (unlikely(!info->mti_big_lmm)) {
+               info->mti_big_lmmsize = lmv_mds_md_size(64, LMV_MAGIC);
+               OBD_ALLOC(info->mti_big_lmm, info->mti_big_lmmsize);
+               if (!info->mti_big_lmm)
+                       RETURN(-ENOMEM);
+       }
+
+       obj = mdt_object_find(env, mdt, rr->rr_fid1);
+       if (IS_ERR(obj))
+               RETURN(PTR_ERR(obj));
+
+relock:
+       /* lock object */
+       lhc = &info->mti_lh[MDT_LH_CHILD];
+       mdt_lock_reg_init(lhc, LCK_EX);
+       rc = mdt_reint_striped_lock(info, obj, lhc, MDS_INODELOCK_FULL, einfo,
+                                   true);
+       if (rc)
+               GOTO(put_obj, rc);
+
+       ma->ma_lmv = info->mti_big_lmm;
+       ma->ma_lmv_size = info->mti_big_lmmsize;
+       ma->ma_valid = 0;
+       rc = mdt_stripe_get(info, obj, ma, XATTR_NAME_LMV);
+       if (rc)
+               GOTO(unlock_obj, rc);
+
+       /* user may run 'lfs migrate' multiple times, so it's shrunk already */
+       if (!(ma->ma_valid & MA_LMV))
+               GOTO(unlock_obj, rc = -EALREADY);
+
+       lmv = &ma->ma_lmv->lmv_md_v1;
+
+       /* ditto */
+       if (!(le32_to_cpu(lmv->lmv_hash_type) & LMV_HASH_FLAG_MIGRATION))
+               GOTO(unlock_obj, rc = -EALREADY);
+
+       lum_stripe_count = lmu->lum_stripe_count;
+       if (!lum_stripe_count)
+               lum_stripe_count = cpu_to_le32(1);
+
+       if (lmv->lmv_migrate_offset != lum_stripe_count) {
+               CERROR("%s: "DFID" migrate mdt count mismatch %u != %u\n",
+                       mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+                       lmv->lmv_migrate_offset, lmu->lum_stripe_count);
+               GOTO(unlock_obj, rc = -EINVAL);
+       }
+
+       if (lmv->lmv_master_mdt_index != lmu->lum_stripe_offset) {
+               CERROR("%s: "DFID" migrate mdt index mismatch %u != %u\n",
+                       mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+                       lmv->lmv_master_mdt_index, lmu->lum_stripe_offset);
+               GOTO(unlock_obj, rc = -EINVAL);
+       }
+
+       if (lum_stripe_count > 1 &&
+           (lmv->lmv_hash_type & cpu_to_le32(LMV_HASH_TYPE_MASK)) !=
+           lmu->lum_hash_type) {
+               CERROR("%s: "DFID" migrate mdt hash mismatch %u != %u\n",
+                       mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
+                       lmv->lmv_hash_type, lmu->lum_hash_type);
+               GOTO(unlock_obj, rc = -EINVAL);
+       }
+
+       if (le32_to_cpu(lmu->lum_stripe_count) < 2 && !pobj) {
+               /*
+                * lock parent because dir will be shrunk to be 1 stripe, which
+                * should be converted to normal directory, but that will
+                * change dir fid and update namespace of parent.
+                */
+               lhp = &info->mti_lh[MDT_LH_PARENT];
+               mdt_lock_reg_init(lhp, LCK_PW);
+
+               /* get parent from PFID */
+               ma->ma_need |= MA_PFID;
+               ma->ma_valid = 0;
+               rc = mdt_attr_get_complex(info, obj, ma);
+               if (rc)
+                       GOTO(unlock_obj, rc);
+
+               if (!(ma->ma_valid & MA_PFID))
+                       GOTO(unlock_obj, rc = -ENOTSUPP);
+
+               pobj = mdt_object_find(env, mdt, &ma->ma_pfid);
+               if (IS_ERR(pobj)) {
+                       rc = PTR_ERR(pobj);
+                       pobj = NULL;
+                       GOTO(unlock_obj, rc);
+               }
+
+               mdt_reint_striped_unlock(info, obj, lhc, einfo, 1);
+
+               if (mdt_object_remote(pobj)) {
+                       rc = mdt_remote_object_lock(info, pobj, rr->rr_fid1,
+                                                   &lhp->mlh_rreg_lh, LCK_EX,
+                                                   MDS_INODELOCK_LOOKUP,
+                                                   false);
+                       if (rc != ELDLM_OK) {
+                               mdt_object_put(env, pobj);
+                               GOTO(put_obj, rc);
+                       }
+                       mdt_object_unlock(info, NULL, lhp, 1);
+               }
+
+               rc = mdt_reint_object_lock(info, pobj, lhp,
+                                          MDS_INODELOCK_UPDATE, true);
+               if (rc) {
+                       mdt_object_put(env, pobj);
+                       GOTO(put_obj, rc);
+               }
+
+               goto relock;
+       }
+
+       buf->lb_buf = rr->rr_eadata;
+       buf->lb_len = rr->rr_eadatalen;
+       rc = mo_xattr_set(env, mdt_object_child(obj), buf, XATTR_NAME_LMV, 0);
+       GOTO(unlock_obj, rc);
+
+unlock_obj:
+       mdt_reint_striped_unlock(info, obj, lhc, einfo, rc);
+       if (pobj)
+               mdt_object_unlock_put(info, pobj, lhp, rc);
+put_obj:
+       mdt_object_put(env, obj);
+
+       return rc;
+}
+
 int mdt_reint_setxattr(struct mdt_thread_info *info,
                        struct mdt_lock_handle *unused)
 {
@@ -344,6 +498,17 @@ int mdt_reint_setxattr(struct mdt_thread_info *info,
        } else if (strncmp(xattr_name, XATTR_TRUSTED_PREFIX,
                    sizeof(XATTR_TRUSTED_PREFIX) - 1) == 0) {
 
+               /* setxattr(LMV) with lum is used to shrink dir layout */
+               if (strcmp(xattr_name, XATTR_NAME_LMV) == 0) {
+                       __u32 *magic = rr->rr_eadata;
+
+                       if (le32_to_cpu(*magic) == LMV_USER_MAGIC ||
+                           le32_to_cpu(*magic) == LMV_USER_MAGIC_SPECIFIC) {
+                               rc = mdt_dir_layout_shrink(info);
+                               GOTO(out, rc);
+                       }
+               }
+
                if (!md_capable(mdt_ucred(info), CFS_CAP_SYS_ADMIN))
                        GOTO(out, rc = -EPERM);
 
index a9cb7f1..f37dca0 100644 (file)
@@ -4476,9 +4476,41 @@ out:
        return ret;
 }
 
+/* dir migration finished, shrink its stripes */
+static int cb_migrate_mdt_fini(char *path, DIR *parent, DIR **dirp, void *data,
+                              struct dirent64 *de)
+{
+       struct find_param *param = data;
+       struct lmv_user_md *lmu = param->fp_lmv_md;
+       int lmulen = lmv_user_md_size(lmu->lum_stripe_count, lmu->lum_magic);
+       int ret = 0;
+
+       if (de && de->d_type != DT_DIR)
+               goto out;
+
+       if (*dirp) {
+               /*
+                * close it before setxattr because the latter may destroy the
+                * original object, and cause close fail.
+                */
+               ret = closedir(*dirp);
+               *dirp = NULL;
+               if (ret)
+                       goto out;
+       }
+
+       ret = setxattr(path, XATTR_NAME_LMV, lmu, lmulen, 0);
+       if (ret == -EALREADY)
+               ret = 0;
+out:
+       cb_common_fini(path, parent, dirp, data, de);
+       return ret;
+}
+
 int llapi_migrate_mdt(char *path, struct find_param *param)
 {
-       return param_callback(path, cb_migrate_mdt_init, cb_common_fini, param);
+       return param_callback(path, cb_migrate_mdt_init, cb_migrate_mdt_fini,
+                             param);
 }
 
 int llapi_mv(char *path, struct find_param *param)