Whamcloud - gitweb
LU-14579 flr: mirror unlink and split race
[fs/lustre-release.git] / lustre / mdd / mdd_dir.c
index 89f7f74..2f69cc7 100644 (file)
@@ -27,7 +27,6 @@
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
  *
  * lustre/mdd/mdd_dir.c
  *
@@ -70,14 +69,14 @@ mdd_name_check(struct mdd_device *m, const struct lu_name *ln)
 static int
 __mdd_lookup(const struct lu_env *env, struct md_object *pobj,
             const struct lu_attr *pattr, const struct lu_name *lname,
-            struct lu_fid* fid, int mask)
+            struct lu_fid *fid, unsigned int may_mask)
 {
-       const char *name                = lname->ln_name;
-       const struct dt_key *key        = (const struct dt_key *)name;
-       struct mdd_object *mdd_obj      = md2mdd_obj(pobj);
-       struct mdd_device *m            = mdo2mdd(pobj);
-       struct dt_object *dir           = mdd_object_child(mdd_obj);
-        int rc;
+       const char *name = lname->ln_name;
+       const struct dt_key *key = (const struct dt_key *)name;
+       struct mdd_object *mdd_obj = md2mdd_obj(pobj);
+       struct dt_object *dir = mdd_object_child(mdd_obj);
+       int rc;
+
        ENTRY;
 
        if (unlikely(mdd_is_dead_obj(mdd_obj)))
@@ -87,11 +86,12 @@ __mdd_lookup(const struct lu_env *env, struct md_object *pobj,
                RETURN(-ESTALE);
 
        if (mdd_object_remote(mdd_obj)) {
-               CDEBUG(D_INFO, "%s: Object "DFID" locates on remote server\n",
-                      mdd2obd_dev(m)->obd_name, PFID(mdo2fid(mdd_obj)));
+               CDEBUG(D_INFO, "%s: Object "DFID" located on remote server\n",
+                      mdd_obj_dev_name(mdd_obj),
+                      PFID(mdd_object_fid(mdd_obj)));
        }
 
-       rc = mdd_permission_internal_locked(env, mdd_obj, pattr, mask,
+       rc = mdd_permission_internal_locked(env, mdd_obj, pattr, may_mask,
                                            DT_TGT_PARENT);
        if (rc)
                RETURN(rc);
@@ -123,7 +123,7 @@ int mdd_lookup(const struct lu_env *env,
 }
 
 /** Read the link EA into a temp buffer.
- * Uses the mdd_thread_info::mti_big_buf since it is generally large.
+ * Uses the mdd_thread_info::mti_link_buf since it is generally large.
  * A pointer to the buffer is stored in \a ldata::ld_buf.
  *
  * \retval 0 or error
@@ -167,9 +167,9 @@ static int __mdd_links_read(const struct lu_env *env,
        return linkea_init(ldata);
 }
 
-static int mdd_links_read(const struct lu_env *env,
-                         struct mdd_object *mdd_obj,
-                         struct linkea_data *ldata)
+int mdd_links_read(const struct lu_env *env,
+                  struct mdd_object *mdd_obj,
+                  struct linkea_data *ldata)
 {
        int rc;
 
@@ -277,10 +277,10 @@ static int mdd_is_parent(const struct lu_env *env,
        struct lu_fid *pfid;
        int rc;
 
-       LASSERT(!lu_fid_eq(mdo2fid(mo), tfid));
+       LASSERT(!lu_fid_eq(mdd_object_fid(mo), tfid));
        pfid = &mdd_env_info(env)->mti_fid;
 
-       if (mdd_is_root(mdd, mdo2fid(mo)))
+       if (mdd_is_root(mdd, mdd_object_fid(mo)))
                return 0;
 
        if (mdd_is_root(mdd, tfid))
@@ -355,8 +355,7 @@ int mdd_is_subdir(const struct lu_env *env, struct md_object *mo,
  *           -ve        other error
  *
  */
-static int mdd_dir_is_empty(const struct lu_env *env,
-                            struct mdd_object *dir)
+int mdd_dir_is_empty(const struct lu_env *env, struct mdd_object *dir)
 {
        struct dt_it     *it;
        struct dt_object *obj;
@@ -511,7 +510,7 @@ static inline int mdd_is_sticky(const struct lu_env *env,
        if (cattr->la_uid == uc->uc_fsuid)
                return 0;
 
-       return !md_capable(uc, CFS_CAP_FOWNER);
+       return !md_capable(uc, CAP_FOWNER);
 }
 
 static int mdd_may_delete_entry(const struct lu_env *env,
@@ -580,12 +579,11 @@ int mdd_may_delete(const struct lu_env *env, struct mdd_object *tpobj,
        /* additional check the rename case */
        if (cattr) {
                if (S_ISDIR(cattr->la_mode)) {
-                       struct mdd_device *mdd = mdo2mdd(&tobj->mod_obj);
-
                        if (!S_ISDIR(tattr->la_mode))
                                RETURN(-ENOTDIR);
 
-                       if (lu_fid_eq(mdo2fid(tobj), &mdd->mdd_root_fid))
+                       if (mdd_is_root(mdo2mdd(&tobj->mod_obj),
+                                       mdd_object_fid(tobj)))
                                RETURN(-EBUSY);
                } else if (S_ISDIR(tattr->la_mode))
                        RETURN(-EISDIR);
@@ -800,7 +798,7 @@ int mdd_changelog_write_rec(const struct lu_env *env,
                struct llog_changelog_rec *rec;
 
                mdd = lu2mdd_dev(loghandle->lgh_ctxt->loc_obd->obd_lu_dev);
-               rec = container_of0(r, struct llog_changelog_rec, cr_hdr);
+               rec = container_of(r, struct llog_changelog_rec, cr_hdr);
 
                spin_lock(&mdd->mdd_cl.mc_lock);
                rec->cr.cr_index = mdd->mdd_cl.mc_index + 1;
@@ -1054,7 +1052,7 @@ int mdd_changelog_ns_store(const struct lu_env *env,
                mdd_changelog_rec_ext_jobid(&rec->cr, uc->uc_jobid);
 
        if (likely(target != NULL)) {
-               rec->cr.cr_tfid = *mdo2fid(target);
+               rec->cr.cr_tfid = *mdd_object_fid(target);
                target->mod_cltime = ktime_get();
        } else {
                fid_zero(&rec->cr.cr_tfid);
@@ -1255,7 +1253,7 @@ static inline int mdd_links_del(const struct lu_env *env,
 /** Read the link EA into a temp buffer.
  * Uses the name_buf since it is generally large.
  * \retval IS_ERR err
- * \retval ptr to \a lu_buf (always \a mti_big_buf)
+ * \retval ptr to \a lu_buf (always \a mti_link_buf)
  */
 struct lu_buf *mdd_links_get(const struct lu_env *env,
                             struct mdd_object *mdd_obj)
@@ -1340,7 +1338,7 @@ static int mdd_declare_link(const struct lu_env *env,
                            struct lu_attr *la,
                            struct linkea_data *data)
 {
-       struct lu_fid tfid = *mdo2fid(c);
+       struct lu_fid tfid = *mdd_object_fid(c);
        int rc;
 
        if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DANGLING3))
@@ -1421,8 +1419,8 @@ static int mdd_link(const struct lu_env *env, struct md_object *tgt_obj,
        /* Note: even this function will change ldata, but it comes from
         * thread_info, which is completely temporary and only seen in
         * this function, so we do not need reset ldata once it fails.*/
-       rc = mdd_linkea_prepare(env, mdd_sobj, NULL, NULL, mdo2fid(mdd_tobj),
-                               lname, 0, 0, ldata);
+       rc = mdd_linkea_prepare(env, mdd_sobj, NULL, NULL,
+                               mdd_object_fid(mdd_tobj), lname, 0, 0, ldata);
        if (rc != 0)
                GOTO(stop, rc);
 
@@ -1447,7 +1445,7 @@ static int mdd_link(const struct lu_env *env, struct md_object *tgt_obj,
                        GOTO(out_unlock, rc);
        }
 
-       *tfid = *mdo2fid(mdd_sobj);
+       *tfid = *mdd_object_fid(mdd_sobj);
        if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DANGLING3))
                tfid->f_oid = cfs_fail_val;
 
@@ -1468,7 +1466,7 @@ static int mdd_link(const struct lu_env *env, struct md_object *tgt_obj,
        if (rc == 0)
                /* Note: The failure of links_add should not cause the
                 * link failure, so do not check return value. */
-               mdd_links_add(env, mdd_sobj, mdo2fid(mdd_tobj),
+               mdd_links_add(env, mdd_sobj, mdd_object_fid(mdd_tobj),
                              lname, handle, ldata, 0);
 
        EXIT;
@@ -1476,8 +1474,8 @@ out_unlock:
        mdd_write_unlock(env, mdd_sobj);
        if (rc == 0)
                rc = mdd_changelog_ns_store(env, mdd, CL_HARDLINK, 0, mdd_sobj,
-                                           mdo2fid(mdd_tobj), NULL, NULL,
-                                           lname, NULL, handle);
+                                           mdd_object_fid(mdd_tobj), NULL,
+                                           NULL, lname, NULL, handle);
 stop:
        rc = mdd_trans_stop(env, mdd, rc, handle);
        if (is_vmalloc_addr(ldata->ld_buf))
@@ -1532,7 +1530,7 @@ static int mdd_declare_finish_unlink(const struct lu_env *env,
 /* caller should take a lock before calling */
 int mdd_finish_unlink(const struct lu_env *env,
                      struct mdd_object *obj, struct md_attr *ma,
-                     const struct mdd_object *pobj,
+                     struct mdd_object *pobj,
                      const struct lu_name *lname,
                      struct thandle *th)
 {
@@ -1570,7 +1568,7 @@ int mdd_finish_unlink(const struct lu_env *env,
                }
        } else if (!is_dir) {
                /* old files may not have link ea; ignore errors */
-               mdd_links_del(env, obj, mdo2fid(pobj), lname, th);
+               mdd_links_del(env, obj, mdd_object_fid(pobj), lname, th);
        }
 
        RETURN(rc);
@@ -1767,7 +1765,7 @@ static int mdd_unlink(const struct lu_env *env, struct md_object *pobj,
                rc = mdo_ref_del(env, mdd_cobj, handle);
                if (rc != 0) {
                        __mdd_index_insert_only(env, mdd_pobj,
-                                               mdo2fid(mdd_cobj),
+                                               mdd_object_fid(mdd_cobj),
                                                mdd_object_type(mdd_cobj),
                                                name, handle);
                        GOTO(cleanup, rc);
@@ -1793,7 +1791,7 @@ static int mdd_unlink(const struct lu_env *env, struct md_object *pobj,
 
        /* Enough for only unlink the entry */
        if (unlikely(mdd_cobj == NULL))
-               GOTO(stop, rc);
+               GOTO(cleanup, rc);
 
        if (cattr->la_nlink > 0 || mdd_cobj->mod_count > 0) {
                /* update ctime of an unlinked file only if it is still
@@ -1838,8 +1836,8 @@ cleanup:
 
                rc = mdd_changelog_ns_store(env, mdd,
                        is_dir ? CL_RMDIR : CL_UNLINK, cl_flags,
-                       mdd_cobj, mdo2fid(mdd_pobj), NULL, NULL, lname, NULL,
-                       handle);
+                       mdd_cobj, mdd_object_fid(mdd_pobj), NULL, NULL,
+                       lname, NULL, handle);
        }
 
 stop:
@@ -1938,7 +1936,8 @@ static int mdd_create_data(const struct lu_env *env,
        if (rc)
                GOTO(stop, rc);
 
-       rc = mdd_changelog_data_store(env, mdd, CL_LAYOUT, 0, son, handle);
+       rc = mdd_changelog_data_store(env, mdd, CL_LAYOUT, 0, son, handle,
+                                     NULL);
 
 stop:
        rc = mdd_trans_stop(env, mdd, rc, handle);
@@ -1960,8 +1959,8 @@ static int mdd_declare_object_initialize(const struct lu_env *env,
        if (!S_ISDIR(attr->la_mode))
                RETURN(0);
 
-       rc = mdo_declare_index_insert(env, child, mdo2fid(child), S_IFDIR,
-                                     dot, handle);
+       rc = mdo_declare_index_insert(env, child, mdd_object_fid(child),
+                                     S_IFDIR, dot, handle);
        if (rc != 0)
                RETURN(rc);
 
@@ -1969,8 +1968,8 @@ static int mdd_declare_object_initialize(const struct lu_env *env,
        if (rc != 0)
                RETURN(rc);
 
-       rc = mdo_declare_index_insert(env, child, mdo2fid(parent), S_IFDIR,
-                                     dotdot, handle);
+       rc = mdo_declare_index_insert(env, child, mdd_object_fid(parent),
+                                     S_IFDIR, dotdot, handle);
 
        RETURN(rc);
 }
@@ -1985,9 +1984,9 @@ static int mdd_object_initialize(const struct lu_env *env,
        ENTRY;
 
        if (S_ISDIR(attr->la_mode)) {
-                /* Add "." and ".." for newly created dir */
-                mdo_ref_add(env, child, handle);
-                rc = __mdd_index_insert_only(env, child, mdo2fid(child),
+               /* Add "." and ".." for newly created dir */
+               mdo_ref_add(env, child, handle);
+               rc = __mdd_index_insert_only(env, child, mdd_object_fid(child),
                                             S_IFDIR, dot, handle);
                if (rc == 0)
                        rc = __mdd_index_insert_only(env, child, pfid, S_IFDIR,
@@ -2060,7 +2059,7 @@ static int mdd_create_sanity_check(const struct lu_env *env,
            spec->u.sp_ea.eadata != NULL && spec->u.sp_ea.eadatalen > 0) {
                const struct lmv_user_md *lum = spec->u.sp_ea.eadata;
 
-               if (!lmv_magic_supported(le32_to_cpu(lum->lum_magic)) &&
+               if (!lmv_user_magic_supported(le32_to_cpu(lum->lum_magic)) &&
                    le32_to_cpu(lum->lum_magic) != LMV_USER_MAGIC_V0) {
                        rc = -EINVAL;
                        CERROR("%s: invalid lmv_user_md: magic = %x, "
@@ -2092,7 +2091,7 @@ static int mdd_create_sanity_check(const struct lu_env *env,
                           !lustre_in_group_p(uc,
                                              (cattr->la_valid & LA_GID) ?
                                              cattr->la_gid : pattr->la_gid) &&
-                          !md_capable(uc, CFS_CAP_FSETID)) {
+                          !md_capable(uc, CAP_FSETID)) {
                        cattr->la_mode &= ~S_ISGID;
                        cattr->la_valid |= LA_MODE;
                }
@@ -2150,6 +2149,11 @@ static int mdd_declare_create_object(const struct lu_env *env,
        const struct lu_buf *buf;
        int rc;
 
+#ifdef CONFIG_LUSTRE_FS_POSIX_ACL
+       /* ldiskfs OSD needs this information for credit allocation */
+       if (def_acl_buf)
+               hint->dah_acl_len = def_acl_buf->lb_len;
+#endif
        rc = mdd_declare_create_object_internal(env, p, c, attr, handle, spec,
                                                hint);
        if (rc)
@@ -2188,7 +2192,7 @@ static int mdd_declare_create_object(const struct lu_env *env,
                rc = mdo_declare_xattr_set(env, c, buf,
                                           S_ISDIR(attr->la_mode) ?
                                                XATTR_NAME_LMV : XATTR_NAME_LOV,
-                                          0, handle);
+                                          LU_XATTR_CREATE, handle);
                if (rc)
                        GOTO(out, rc);
 
@@ -2222,6 +2226,16 @@ static int mdd_declare_create_object(const struct lu_env *env,
                if (rc < 0)
                        GOTO(out, rc);
        }
+
+       if (spec->sp_cr_file_encctx != NULL) {
+               buf = mdd_buf_get_const(env, spec->sp_cr_file_encctx,
+                                       spec->sp_cr_file_encctx_size);
+               rc = mdo_declare_xattr_set(env, c, buf,
+                                          LL_XATTR_NAME_ENCRYPTION_CONTEXT, 0,
+                                          handle);
+               if (rc < 0)
+                       GOTO(out, rc);
+       }
 out:
        return rc;
 }
@@ -2259,8 +2273,9 @@ static int mdd_declare_create(const struct lu_env *env, struct mdd_device *mdd,
                struct lu_attr *la = &mdd_env_info(env)->mti_la_for_fix;
                enum changelog_rec_type type;
 
-               rc = mdo_declare_index_insert(env, p, mdo2fid(c), attr->la_mode,
-                                             name->ln_name, handle);
+               rc = mdo_declare_index_insert(env, p, mdd_object_fid(c),
+                                             attr->la_mode, name->ln_name,
+                                             handle);
                if (rc != 0)
                        return rc;
 
@@ -2292,6 +2307,7 @@ static int mdd_acl_init(const struct lu_env *env, struct mdd_object *pobj,
                        struct lu_buf *acl_buf)
 {
        int     rc;
+
        ENTRY;
 
        if (S_ISLNK(la->la_mode)) {
@@ -2305,9 +2321,12 @@ static int mdd_acl_init(const struct lu_env *env, struct mdd_object *pobj,
                           XATTR_NAME_ACL_DEFAULT);
        mdd_read_unlock(env, pobj);
        if (rc > 0) {
+               /* ACL buffer size is not enough, need realloc */
+               if (rc > acl_buf->lb_len)
+                       RETURN(-ERANGE);
+
                /* If there are default ACL, fix mode/ACL by default ACL */
                def_acl_buf->lb_len = rc;
-               LASSERT(def_acl_buf->lb_len <= acl_buf->lb_len);
                memcpy(acl_buf->lb_buf, def_acl_buf->lb_buf, rc);
                acl_buf->lb_len = rc;
                rc = __mdd_fix_mode_acl(env, acl_buf, &la->la_mode);
@@ -2353,7 +2372,8 @@ static int mdd_create_object(const struct lu_env *env, struct mdd_object *pobj,
         * created in declare phase, they also needs to be added to master
         * object as sub-directory entry. So it has to initialize the master
         * object, then set dir striped EA.(in mdo_xattr_set) */
-       rc = mdd_object_initialize(env, mdo2fid(pobj), son, attr, handle);
+       rc = mdd_object_initialize(env, mdd_object_fid(pobj), son, attr,
+                                  handle);
        if (rc != 0)
                GOTO(err_destroy, rc);
 
@@ -2380,7 +2400,7 @@ static int mdd_create_object(const struct lu_env *env, struct mdd_object *pobj,
                rc = mdo_xattr_set(env, son, buf,
                                   S_ISDIR(attr->la_mode) ? XATTR_NAME_LMV :
                                                            XATTR_NAME_LOV,
-                                  0, handle);
+                                  LU_XATTR_CREATE, handle);
                if (rc != 0)
                        GOTO(err_destroy, rc);
        }
@@ -2441,6 +2461,16 @@ static int mdd_create_object(const struct lu_env *env, struct mdd_object *pobj,
                        GOTO(err_initlized, rc);
        }
 
+       if (spec->sp_cr_file_encctx != NULL) {
+               buf = mdd_buf_get_const(env, spec->sp_cr_file_encctx,
+                                       spec->sp_cr_file_encctx_size);
+               rc = mdo_xattr_set(env, son, buf,
+                                  LL_XATTR_NAME_ENCRYPTION_CONTEXT, 0,
+                                  handle);
+               if (rc < 0)
+                       GOTO(err_initlized, rc);
+       }
+
 err_initlized:
        if (unlikely(rc != 0)) {
                int rc2;
@@ -2550,22 +2580,23 @@ int mdd_create(const struct lu_env *env, struct md_object *pobj,
                      const struct lu_name *lname, struct md_object *child,
                      struct md_op_spec *spec, struct md_attr *ma)
 {
-       struct mdd_thread_info  *info = mdd_env_info(env);
-       struct lu_attr          *la = &info->mti_la_for_fix;
-       struct mdd_object       *mdd_pobj = md2mdd_obj(pobj);
-       struct mdd_object       *son = md2mdd_obj(child);
-       struct mdd_device       *mdd = mdo2mdd(pobj);
-       struct lu_attr          *attr = &ma->ma_attr;
-       struct thandle          *handle;
-       struct lu_attr          *pattr = &info->mti_pattr;
-       struct lu_buf           acl_buf;
-       struct lu_buf           def_acl_buf;
-       struct lu_buf           hsm_buf;
-       struct linkea_data      *ldata = &info->mti_link_data;
-       const char              *name = lname->ln_name;
+       struct mdd_thread_info *info = mdd_env_info(env);
+       struct lu_attr *la = &info->mti_la_for_fix;
+       struct mdd_object *mdd_pobj = md2mdd_obj(pobj);
+       struct mdd_object *son = md2mdd_obj(child);
+       struct mdd_device *mdd = mdo2mdd(pobj);
+       struct lu_attr *attr = &ma->ma_attr;
+       struct thandle *handle;
+       struct lu_attr *pattr = &info->mti_pattr;
+       struct lu_buf acl_buf;
+       struct lu_buf def_acl_buf;
+       struct lu_buf hsm_buf;
+       struct linkea_data *ldata = &info->mti_link_data;
+       const char *name = lname->ln_name;
        struct dt_allocation_hint *hint = &mdd_env_info(env)->mti_hint;
-       int                      rc;
-       int                      rc2;
+       int acl_size = LUSTRE_POSIX_ACL_MAX_SIZE_OLD;
+       int rc, rc2;
+
        ENTRY;
 
        rc = mdd_la_get(env, mdd_pobj, pattr);
@@ -2584,12 +2615,25 @@ int mdd_create(const struct lu_env *env, struct md_object *pobj,
        if (IS_ERR(handle))
                GOTO(out_free, rc = PTR_ERR(handle));
 
-       lu_buf_check_and_alloc(&info->mti_xattr_buf,
-                       MIN(mdd->mdd_dt_conf.ddp_max_ea_size, XATTR_SIZE_MAX));
-       acl_buf = info->mti_xattr_buf;
-       def_acl_buf.lb_buf = info->mti_key;
-       def_acl_buf.lb_len = sizeof(info->mti_key);
+use_bigger_buffer:
+       acl_buf = *lu_buf_check_and_alloc(&info->mti_xattr_buf, acl_size);
+       if (!acl_buf.lb_buf)
+               GOTO(out_stop, rc = -ENOMEM);
+       /* mti_big_buf is also used down below in mdd_changelog_ns_store(),
+        * but def_acl_buf is finished with it before then
+        */
+       def_acl_buf = *lu_buf_check_and_alloc(&info->mti_big_buf, acl_size);
+       if (!def_acl_buf.lb_buf)
+               GOTO(out_stop, rc = -ENOMEM);
+
        rc = mdd_acl_init(env, mdd_pobj, attr, &def_acl_buf, &acl_buf);
+       if (unlikely(rc == -ERANGE &&
+                    acl_size == LUSTRE_POSIX_ACL_MAX_SIZE_OLD)) {
+               /* use maximum-sized xattr buffer for too-big default ACL */
+               acl_size = min_t(unsigned int, mdd->mdd_dt_conf.ddp_max_ea_size,
+                                XATTR_SIZE_MAX);
+               goto use_bigger_buffer;
+       }
        if (rc < 0)
                GOTO(out_stop, rc);
 
@@ -2600,11 +2644,8 @@ int mdd_create(const struct lu_env *env, struct md_object *pobj,
                 * migrate may create 1-stripe directory, so lod_ah_init()
                 * doesn't adjust stripe count from lmu.
                 */
-               if (lmu && lmu->lum_stripe_count == cpu_to_le32(1)) {
-                       info->mti_lmu = *lmu;
-                       info->mti_lmu.lum_stripe_count = 0;
-                       spec->u.sp_ea.eadata = &info->mti_lmu;
-               }
+               if (lmu && lmu->lum_stripe_count == cpu_to_le32(1))
+                       lmu->lum_stripe_count = 0;
        }
 
        mdd_object_make_hint(env, mdd_pobj, son, attr, spec, hint);
@@ -2652,13 +2693,13 @@ int mdd_create(const struct lu_env *env, struct md_object *pobj,
                rc = mdd_orphan_insert(env, son, handle);
                GOTO(out_volatile, rc);
        } else {
-               rc = __mdd_index_insert(env, mdd_pobj, mdo2fid(son),
-                                     attr->la_mode, name, handle);
+               rc = __mdd_index_insert(env, mdd_pobj, mdd_object_fid(son),
+                                       attr->la_mode, name, handle);
                if (rc != 0)
                        GOTO(err_created, rc);
 
-               mdd_links_add(env, son, mdo2fid(mdd_pobj), lname, handle,
-                             ldata, 1);
+               mdd_links_add(env, son, mdd_object_fid(mdd_pobj), lname,
+                             handle, ldata, 1);
 
                /* update parent directory mtime/ctime */
                *la = *attr;
@@ -2708,14 +2749,14 @@ out_volatile:
                mdd_write_unlock(env, son);
        }
 
-       if (rc == 0 && fid_is_namespace_visible(mdo2fid(son)) &&
+       if (rc == 0 && fid_is_namespace_visible(mdd_object_fid(son)) &&
            likely((spec->sp_cr_flags & MDS_OPEN_VOLATILE) == 0))
                rc = mdd_changelog_ns_store(env, mdd,
                                S_ISDIR(attr->la_mode) ? CL_MKDIR :
                                S_ISREG(attr->la_mode) ? CL_CREATE :
                                S_ISLNK(attr->la_mode) ? CL_SOFTLINK : CL_MKNOD,
-                               0, son, mdo2fid(mdd_pobj), NULL, NULL, lname,
-                               NULL, handle);
+                               0, son, mdd_object_fid(mdd_pobj), NULL, NULL,
+                               lname, NULL, handle);
 out_stop:
        rc2 = mdd_trans_stop(env, mdd, rc, handle);
        if (rc == 0) {
@@ -2743,49 +2784,6 @@ out_free:
        return rc;
 }
 
-/*
- * Get locks on parents in proper order
- * RETURN: < 0 - error, rename_order if successful
- */
-enum rename_order {
-        MDD_RN_SAME,
-        MDD_RN_SRCTGT,
-        MDD_RN_TGTSRC
-};
-
-static int mdd_rename_order(const struct lu_env *env,
-                            struct mdd_device *mdd,
-                            struct mdd_object *src_pobj,
-                           const struct lu_attr *pattr,
-                            struct mdd_object *tgt_pobj)
-{
-        /* order of locking, 1 - tgt-src, 0 - src-tgt*/
-        int rc;
-        ENTRY;
-
-        if (src_pobj == tgt_pobj)
-                RETURN(MDD_RN_SAME);
-
-        /* compared the parent child relationship of src_p&tgt_p */
-        if (lu_fid_eq(&mdd->mdd_root_fid, mdo2fid(src_pobj))){
-                rc = MDD_RN_SRCTGT;
-        } else if (lu_fid_eq(&mdd->mdd_root_fid, mdo2fid(tgt_pobj))) {
-                rc = MDD_RN_TGTSRC;
-        } else {
-               rc = mdd_is_parent(env, mdd, src_pobj, pattr,
-                                  mdo2fid(tgt_pobj));
-                if (rc == -EREMOTE)
-                        rc = 0;
-
-                if (rc == 1)
-                        rc = MDD_RN_TGTSRC;
-                else if (rc == 0)
-                        rc = MDD_RN_SRCTGT;
-        }
-
-        RETURN(rc);
-}
-
 /* has not mdd_write{read}_lock on any obj yet. */
 static int mdd_rename_sanity_check(const struct lu_env *env,
                                    struct mdd_object *src_pobj,
@@ -2850,28 +2848,27 @@ static int mdd_declare_rename(const struct lu_env *env,
                              struct linkea_data *ldata,
                              struct thandle *handle)
 {
-       struct lu_attr    *la = &mdd_env_info(env)->mti_la_for_fix;
+       struct lu_attr *la = &mdd_env_info(env)->mti_la_for_fix;
        int rc;
 
        LASSERT(ma->ma_attr.la_valid & LA_CTIME);
        la->la_ctime = la->la_mtime = ma->ma_attr.la_ctime;
 
-        LASSERT(mdd_spobj);
-        LASSERT(mdd_tpobj);
-        LASSERT(mdd_sobj);
+       LASSERT(mdd_spobj);
+       LASSERT(mdd_tpobj);
+       LASSERT(mdd_sobj);
 
-        /* name from source dir */
-        rc = mdo_declare_index_delete(env, mdd_spobj, sname->ln_name, handle);
-        if (rc)
-                return rc;
+       /* name from source dir */
+       rc = mdo_declare_index_delete(env, mdd_spobj, sname->ln_name, handle);
+       if (rc)
+               return rc;
 
-        /* .. from source child */
-        if (S_ISDIR(mdd_object_type(mdd_sobj))) {
-                /* source child can be directory,
-                 * counted by source dir's nlink */
-                rc = mdo_declare_ref_del(env, mdd_spobj, handle);
-                if (rc)
-                        return rc;
+       /* .. from source child */
+       if (S_ISDIR(mdd_object_type(mdd_sobj))) {
+               /* source child can be directory, count by source dir's nlink */
+               rc = mdo_declare_ref_del(env, mdd_spobj, handle);
+               if (rc)
+                       return rc;
                if (mdd_spobj != mdd_tpobj) {
                        rc = mdo_declare_index_delete(env, mdd_sobj, dotdot,
                                                      handle);
@@ -2879,7 +2876,7 @@ static int mdd_declare_rename(const struct lu_env *env,
                                return rc;
 
                        rc = mdo_declare_index_insert(env, mdd_sobj,
-                                                     mdo2fid(mdd_tpobj),
+                                                     mdd_object_fid(mdd_tpobj),
                                                      S_IFDIR, dotdot, handle);
                        if (rc != 0)
                                return rc;
@@ -2911,7 +2908,7 @@ static int mdd_declare_rename(const struct lu_env *env,
                return rc;
 
        /* new name */
-       rc = mdo_declare_index_insert(env, mdd_tpobj, mdo2fid(mdd_sobj),
+       rc = mdo_declare_index_insert(env, mdd_tpobj, mdd_object_fid(mdd_sobj),
                                      mdd_object_type(mdd_sobj),
                                      tname->ln_name, handle);
        if (rc != 0)
@@ -2959,6 +2956,16 @@ static int mdd_declare_rename(const struct lu_env *env,
         return rc;
 }
 
+static int mdd_migrate_object(const struct lu_env *env,
+                             struct mdd_object *spobj,
+                             struct mdd_object *tpobj,
+                             struct mdd_object *sobj,
+                             struct mdd_object *tobj,
+                             const struct lu_name *sname,
+                             const struct lu_name *tname,
+                             struct md_op_spec *spec,
+                             struct md_attr *ma);
+
 /* src object can be remote that is why we use only fid and type of object */
 static int mdd_rename(const struct lu_env *env,
                       struct md_object *src_pobj, struct md_object *tgt_pobj,
@@ -2980,8 +2987,8 @@ static int mdd_rename(const struct lu_env *env,
        struct lu_attr *tpattr = MDD_ENV_VAR(env, tpattr);
        struct thandle *handle;
        struct linkea_data  *ldata = &mdd_env_info(env)->mti_link_data;
-       const struct lu_fid *tpobj_fid = mdo2fid(mdd_tpobj);
-       const struct lu_fid *spobj_fid = mdo2fid(mdd_spobj);
+       const struct lu_fid *tpobj_fid = mdd_object_fid(mdd_tpobj);
+       const struct lu_fid *spobj_fid = mdd_object_fid(mdd_spobj);
        bool is_dir;
        bool tobj_ref = 0;
        bool tobj_locked = 0;
@@ -3003,6 +3010,31 @@ static int mdd_rename(const struct lu_env *env,
        if (rc)
                GOTO(out_pending, rc);
 
+       /* if rename is cross MDTs, migrate symlink if it doesn't have other
+        * hard links, and target doesn't exist.
+        */
+       if (mdd_object_remote(mdd_sobj) && S_ISLNK(cattr->la_mode) &&
+           cattr->la_nlink == 1 && !tobj) {
+               struct md_op_spec *spec = &mdd_env_info(env)->mti_spec;
+               struct lu_device *ld = &mdd->mdd_md_dev.md_lu_dev;
+               struct lu_fid tfid;
+
+               rc = ld->ld_ops->ldo_fid_alloc(env, ld, &tfid, &tgt_pobj->mo_lu,
+                                              NULL);
+               if (rc < 0)
+                       GOTO(out_pending, rc);
+
+               mdd_tobj = mdd_object_find(env, mdd, &tfid);
+               if (IS_ERR(mdd_tobj))
+                       GOTO(out_pending, rc = PTR_ERR(mdd_tobj));
+
+               memset(spec, 0, sizeof(*spec));
+               rc = mdd_migrate_object(env, mdd_spobj, mdd_tpobj, mdd_sobj,
+                                       mdd_tobj, lsname, ltname, spec, ma);
+               mdd_object_put(env, mdd_tobj);
+               GOTO(out_pending, rc);
+       }
+
        rc = mdd_la_get(env, mdd_spobj, pattr);
        if (rc)
                GOTO(out_pending, rc);
@@ -3031,11 +3063,6 @@ static int mdd_rename(const struct lu_env *env,
        if (rc < 0)
                GOTO(out_pending, rc);
 
-       /* FIXME: Should consider tobj and sobj too in rename_lock. */
-       rc = mdd_rename_order(env, mdd, mdd_spobj, pattr, mdd_tpobj);
-       if (rc < 0)
-               GOTO(out_pending, rc);
-
         handle = mdd_trans_create(env, mdd);
         if (IS_ERR(handle))
                 GOTO(out_pending, rc = PTR_ERR(handle));
@@ -3099,12 +3126,12 @@ static int mdd_rename(const struct lu_env *env,
 
        /* Update the linkEA for the source object */
        mdd_write_lock(env, mdd_sobj, DT_SRC_CHILD);
-       rc = mdd_links_rename(env, mdd_sobj, mdo2fid(mdd_spobj), lsname,
-                             mdo2fid(mdd_tpobj), ltname, handle, ldata,
-                             0, 0);
+       rc = mdd_links_rename(env, mdd_sobj, mdd_object_fid(mdd_spobj),
+                             lsname, mdd_object_fid(mdd_tpobj), ltname,
+                             handle, ldata, 0, 0);
        if (rc == -ENOENT)
                /* Old files might not have EA entry */
-               mdd_links_add(env, mdd_sobj, mdo2fid(mdd_spobj),
+               mdd_links_add(env, mdd_sobj, mdd_object_fid(mdd_spobj),
                              lsname, handle, NULL, 0);
        mdd_write_unlock(env, mdd_sobj);
        /* We don't fail the transaction if the link ea can't be
@@ -3218,9 +3245,9 @@ fixup_tpobj:
                        }
 
                        rc2 = __mdd_index_insert(env, mdd_tpobj,
-                                                 mdo2fid(mdd_tobj),
-                                                 mdd_object_type(mdd_tobj),
-                                                 tname, handle);
+                                                mdd_object_fid(mdd_tobj),
+                                                mdd_object_type(mdd_tobj),
+                                                tname, handle);
                        if (rc2 != 0)
                                CWARN("tp obj fix error: rc = %d\n", rc2);
                }
@@ -3291,7 +3318,8 @@ static int mdd_migrate_sanity_check(const struct lu_env *env,
                mdd_read_lock(env, sobj, DT_SRC_CHILD);
                if (sobj->mod_count > 0) {
                        CDEBUG(D_INFO, "%s: "DFID" is opened, count %d\n",
-                              mdd2obd_dev(mdd)->obd_name, PFID(mdo2fid(sobj)),
+                              mdd_obj_dev_name(sobj),
+                              PFID(mdd_object_fid(sobj)),
                               sobj->mod_count);
                        mdd_read_unlock(env, sobj);
                        RETURN(-EBUSY);
@@ -3307,232 +3335,6 @@ static int mdd_migrate_sanity_check(const struct lu_env *env,
        RETURN(rc);
 }
 
-typedef int (*mdd_dir_stripe_cb)(const struct lu_env *env,
-                                struct mdd_object *obj,
-                                struct mdd_object *stripe,
-                                const struct lu_buf *lmv_buf,
-                                const struct lu_buf *lmu_buf,
-                                int index,
-                                struct thandle *handle);
-
-static int mdd_dir_declare_delete_stripe(const struct lu_env *env,
-                                        struct mdd_object *obj,
-                                        struct mdd_object *stripe,
-                                        const struct lu_buf *lmv_buf,
-                                        const struct lu_buf *lmu_buf,
-                                        int index,
-                                        struct thandle *handle)
-{
-       struct mdd_thread_info *info = mdd_env_info(env);
-       char *stripe_name = info->mti_name;
-       struct lmv_user_md *lmu = lmu_buf->lb_buf;
-       int rc;
-
-       if (index < le32_to_cpu(lmu->lum_stripe_count))
-               return 0;
-
-       rc = mdo_declare_index_delete(env, stripe, dotdot, handle);
-       if (rc)
-               return rc;
-
-       snprintf(stripe_name, sizeof(info->mti_name), DFID":%d",
-                PFID(mdd_object_fid(stripe)), index);
-
-       rc = mdo_declare_index_delete(env, obj, stripe_name, handle);
-       if (rc)
-               return rc;
-
-       rc = mdo_declare_ref_del(env, obj, handle);
-
-       return rc;
-}
-
-/* delete stripe from its master object namespace */
-static int mdd_dir_delete_stripe(const struct lu_env *env,
-                                struct mdd_object *obj,
-                                struct mdd_object *stripe,
-                                const struct lu_buf *lmv_buf,
-                                const struct lu_buf *lmu_buf,
-                                int index,
-                                struct thandle *handle)
-{
-       struct mdd_thread_info *info = mdd_env_info(env);
-       char *stripe_name = info->mti_name;
-       struct lmv_mds_md_v1 *lmv = lmv_buf->lb_buf;
-       struct lmv_user_md *lmu = lmu_buf->lb_buf;
-       __u32 del_offset = le32_to_cpu(lmu->lum_stripe_count);
-       int rc;
-
-       ENTRY;
-
-       /* local dir will delete via LOD */
-       LASSERT(mdd_object_remote(obj));
-       LASSERT(del_offset < le32_to_cpu(lmv->lmv_stripe_count));
-
-       if (index < del_offset)
-               RETURN(0);
-
-       mdd_write_lock(env, stripe, DT_SRC_CHILD);
-       rc = __mdd_index_delete_only(env, stripe, dotdot, handle);
-       if (rc)
-               GOTO(out, rc);
-
-       snprintf(stripe_name, sizeof(info->mti_name), DFID":%d",
-                PFID(mdd_object_fid(stripe)), index);
-
-       rc = __mdd_index_delete_only(env, obj, stripe_name, handle);
-       if (rc)
-               GOTO(out, rc);
-
-       rc = mdo_ref_del(env, obj, handle);
-       GOTO(out, rc);
-out:
-       mdd_write_unlock(env, stripe);
-
-       return rc;
-}
-
-static int mdd_dir_declare_destroy_stripe(const struct lu_env *env,
-                                         struct mdd_object *obj,
-                                         struct mdd_object *stripe,
-                                         const struct lu_buf *lmv_buf,
-                                         const struct lu_buf *lmu_buf,
-                                         int index,
-                                         struct thandle *handle)
-{
-       struct lmv_user_md *lmu = lmu_buf->lb_buf;
-       __u32 shrink_offset = le32_to_cpu(lmu->lum_stripe_count);
-       int rc;
-
-       if (index < shrink_offset) {
-               if (shrink_offset < 2)
-                       return 0;
-               return mdo_declare_xattr_set(env, stripe, lmv_buf,
-                                            XATTR_NAME_LMV".set", 0, handle);
-       }
-
-       rc = mdo_declare_ref_del(env, stripe, handle);
-       if (rc)
-               return rc;
-
-       rc = mdo_declare_destroy(env, stripe, handle);
-
-       return rc;
-}
-
-static int mdd_dir_destroy_stripe(const struct lu_env *env,
-                                 struct mdd_object *obj,
-                                 struct mdd_object *stripe,
-                                 const struct lu_buf *lmv_buf,
-                                 const struct lu_buf *lmu_buf,
-                                 int index,
-                                 struct thandle *handle)
-{
-       struct mdd_thread_info *info = mdd_env_info(env);
-       struct lmv_mds_md_v1 *lmv = lmv_buf->lb_buf;
-       struct lmv_user_md *lmu = lmu_buf->lb_buf;
-       __u32 shrink_offset = le32_to_cpu(lmu->lum_stripe_count);
-       int rc;
-
-       ENTRY;
-
-       /* update remaining stripes' LMV */
-       if (index < shrink_offset) {
-               struct lmv_mds_md_v1 *slave_lmv;
-               struct lu_buf slave_buf = {
-                               .lb_buf = &info->mti_lmv.lmv_md_v1,
-                               .lb_len = sizeof(*slave_lmv)
-               };
-               __u32 version = le32_to_cpu(lmv->lmv_layout_version);
-
-               /* if dir will be shrunk to 1-stripe, don't update */
-               if (shrink_offset < 2)
-                       RETURN(0);
-
-               slave_lmv = slave_buf.lb_buf;
-               memset(slave_lmv, 0, sizeof(*slave_lmv));
-               slave_lmv->lmv_magic = cpu_to_le32(LMV_MAGIC_STRIPE);
-               slave_lmv->lmv_stripe_count = lmu->lum_stripe_count;
-               slave_lmv->lmv_master_mdt_index = cpu_to_le32(index);
-               slave_lmv->lmv_hash_type = lmv->lmv_hash_type &
-                                          cpu_to_le32(LMV_HASH_TYPE_MASK);
-               slave_lmv->lmv_layout_version = cpu_to_le32(++version);
-
-               rc = mdo_xattr_set(env, stripe, &slave_buf,
-                                  XATTR_NAME_LMV".set", 0, handle);
-               RETURN(rc);
-       }
-
-       mdd_write_lock(env, stripe, DT_SRC_CHILD);
-       rc = mdo_ref_del(env, stripe, handle);
-       if (!rc)
-               rc = mdo_destroy(env, stripe, handle);
-       mdd_write_unlock(env, stripe);
-
-       RETURN(rc);
-}
-
-static int mdd_shrink_stripe_is_empty(const struct lu_env *env,
-                                      struct mdd_object *obj,
-                                      struct mdd_object *stripe,
-                                      const struct lu_buf *lmv_buf,
-                                      const struct lu_buf *lmu_buf,
-                                      int index,
-                                      struct thandle *handle)
-{
-       struct lmv_user_md *lmu = lmu_buf->lb_buf;
-       __u32 shrink_offset = le32_to_cpu(lmu->lum_stripe_count);
-
-       /* the default value is 0, but it means 1 */
-       if (!shrink_offset)
-               shrink_offset = 1;
-
-       if (index < shrink_offset)
-               return 0;
-
-       return mdd_dir_is_empty(env, stripe);
-}
-
-/*
- * iterate stripes of striped directory on remote MDT, local striped directory
- * is accessed via LOD.
- */
-static int mdd_dir_iterate_stripes(const struct lu_env *env,
-                                  struct mdd_object *obj,
-                                  const struct lu_buf *lmv_buf,
-                                  const struct lu_buf *lmu_buf,
-                                  struct thandle *handle,
-                                  mdd_dir_stripe_cb cb)
-{
-       struct mdd_device *mdd = mdo2mdd(&obj->mod_obj);
-       struct lu_fid *fid = &mdd_env_info(env)->mti_fid2;
-       struct lmv_mds_md_v1 *lmv = lmv_buf->lb_buf;
-       struct mdd_object *stripe;
-       int i;
-       int rc;
-
-       ENTRY;
-
-       LASSERT(lmv);
-
-       for (i = 0; i < le32_to_cpu(lmv->lmv_stripe_count); i++) {
-               fid_le_to_cpu(fid, &lmv->lmv_stripe_fids[i]);
-               if (!fid_is_sane(fid))
-                       continue;
-
-               stripe = mdd_object_find(env, mdd, fid);
-               if (IS_ERR(stripe))
-                       RETURN(PTR_ERR(stripe));
-
-               rc = cb(env, obj, stripe, lmv_buf, lmu_buf, i, handle);
-               mdd_object_put(env, stripe);
-               if (rc)
-                       RETURN(rc);
-       }
-
-       RETURN(0);
-}
-
 typedef int (*mdd_xattr_cb)(const struct lu_env *env,
                            struct mdd_object *obj,
                            const struct lu_buf *buf,
@@ -3669,7 +3471,7 @@ static int mdd_declare_update_link(const struct lu_env *env,
 
        rc = mdo_declare_index_delete(env, pobj, lname->ln_name, handle);
        if (!rc)
-               rc = mdo_declare_index_insert(env, pobj, mdo2fid(tobj),
+               rc = mdo_declare_index_insert(env, pobj, mdd_object_fid(tobj),
                                              mdd_object_type(sobj),
                                              lname->ln_name, handle);
        mdd_object_put(env, pobj);
@@ -3708,7 +3510,7 @@ static int mdd_update_link(const struct lu_env *env,
                RETURN(0);
 
        CDEBUG(D_INFO, "update "DFID"/"DNAME":"DFID"\n",
-              PFID(fid), PNAME(lname), PFID(mdo2fid(tobj)));
+              PFID(fid), PNAME(lname), PFID(mdd_object_fid(tobj)));
 
        pobj = mdd_object_find(env, mdd, fid);
        if (IS_ERR(pobj)) {
@@ -3726,7 +3528,7 @@ static int mdd_update_link(const struct lu_env *env,
        mdd_write_lock(env, pobj, DT_TGT_PARENT);
        rc = __mdd_index_delete_only(env, pobj, lname->ln_name, handle);
        if (!rc)
-               rc = __mdd_index_insert_only(env, pobj, mdo2fid(tobj),
+               rc = __mdd_index_insert_only(env, pobj, mdd_object_fid(tobj),
                                             mdd_object_type(sobj),
                                             lname->ln_name, handle);
        mdd_write_unlock(env, pobj);
@@ -3844,14 +3646,15 @@ static int mdd_iterate_linkea(const struct lu_env *env,
  * \retval     1 don't migrate
  * \retval     -errno on failure
  */
-static int migrate_linkea_prepare(const struct lu_env *env,
-                                 struct mdd_device *mdd,
-                                 struct mdd_object *spobj,
-                                 struct mdd_object *tpobj,
-                                 struct mdd_object *sobj,
-                                 const struct lu_name *lname,
-                                 const struct lu_attr *attr,
-                                 struct linkea_data *ldata)
+static int mdd_migrate_linkea_prepare(const struct lu_env *env,
+                                     struct mdd_device *mdd,
+                                     struct mdd_object *spobj,
+                                     struct mdd_object *tpobj,
+                                     struct mdd_object *sobj,
+                                     const struct lu_name *sname,
+                                     const struct lu_name *tname,
+                                     const struct lu_attr *attr,
+                                     struct linkea_data *ldata)
 {
        __u32 source_mdt_index;
        int rc;
@@ -3859,8 +3662,8 @@ static int migrate_linkea_prepare(const struct lu_env *env,
        ENTRY;
 
        memset(ldata, 0, sizeof(*ldata));
-       rc = mdd_linkea_prepare(env, sobj, mdo2fid(spobj), lname,
-                               mdo2fid(tpobj), lname, 1, 0, ldata);
+       rc = mdd_linkea_prepare(env, sobj, mdd_object_fid(spobj), sname,
+                               mdd_object_fid(tpobj), tname, 1, 0, ldata);
        if (rc)
                RETURN(rc);
 
@@ -3882,146 +3685,152 @@ static int migrate_linkea_prepare(const struct lu_env *env,
        if (unlikely(ldata->ld_leh->leh_overflow_time))
                RETURN(-EOVERFLOW);
 
-       rc = mdd_fld_lookup(env, mdd, mdo2fid(sobj), &source_mdt_index);
+       rc = mdd_fld_lookup(env, mdd, mdd_object_fid(sobj), &source_mdt_index);
        if (rc)
                RETURN(rc);
 
-       rc = mdd_iterate_linkea(env, sobj, NULL, lname, mdo2fid(tpobj), ldata,
-                               &source_mdt_index, NULL,
+       rc = mdd_iterate_linkea(env, sobj, NULL, tname, mdd_object_fid(tpobj),
+                               ldata, &source_mdt_index, NULL,
                                mdd_is_link_on_source_mdt);
        RETURN(rc);
 }
 
-static int mdd_dir_declare_layout_delete(const struct lu_env *env,
-                                        struct mdd_object *obj,
-                                        const struct lu_buf *lmv_buf,
-                                        const struct lu_buf *lmu_buf,
-                                        struct thandle *handle)
+static int mdd_declare_migrate_update(const struct lu_env *env,
+                                     struct mdd_object *spobj,
+                                     struct mdd_object *tpobj,
+                                     struct mdd_object *obj,
+                                     const struct lu_name *sname,
+                                     const struct lu_name *tname,
+                                     struct lu_attr *attr,
+                                     struct lu_attr *spattr,
+                                     struct lu_attr *tpattr,
+                                     struct linkea_data *ldata,
+                                     struct md_attr *ma,
+                                     struct thandle *handle)
 {
+       struct mdd_thread_info *info = mdd_env_info(env);
+       struct lu_attr *la = &info->mti_la_for_fix;
        int rc;
 
-       if (!lmv_buf->lb_buf)
-               rc = mdo_declare_index_delete(env, obj, dotdot, handle);
-       else if (mdd_object_remote(obj))
-               rc = mdd_dir_iterate_stripes(env, obj, lmv_buf, lmu_buf, handle,
-                                            mdd_dir_declare_delete_stripe);
-       else
-               rc = mdo_declare_xattr_set(env, obj, lmu_buf,
-                                          XATTR_NAME_LMV".del", 0, handle);
+       rc = mdo_declare_index_delete(env, spobj, sname->ln_name, handle);
+       if (rc)
+               return rc;
 
-       return rc;
-}
+       if (S_ISDIR(attr->la_mode)) {
+               rc = mdo_declare_ref_del(env, spobj, handle);
+               if (rc)
+                       return rc;
+       }
 
-static int mdd_dir_layout_delete(const struct lu_env *env,
-                                struct mdd_object *obj,
-                                const struct lu_buf *lmv_buf,
-                                const struct lu_buf *lmu_buf,
-                                struct thandle *handle)
-{
-       int rc;
+       rc = mdo_declare_index_insert(env, tpobj, mdd_object_fid(obj),
+                                     attr->la_mode & S_IFMT,
+                                     tname->ln_name, handle);
+       if (rc)
+               return rc;
 
-       ENTRY;
+       rc = mdd_declare_links_add(env, obj, handle, ldata);
+       if (rc)
+               return rc;
 
-       mdd_write_lock(env, obj, DT_SRC_PARENT);
-       if (!lmv_buf->lb_buf)
-               /* normal dir */
-               rc = __mdd_index_delete_only(env, obj, dotdot, handle);
-       else if (mdd_object_remote(obj))
-               /* striped, but remote */
-               rc = mdd_dir_iterate_stripes(env, obj, lmv_buf, lmu_buf, handle,
-                                            mdd_dir_delete_stripe);
-       else
-               rc = mdo_xattr_set(env, obj, lmu_buf, XATTR_NAME_LMV".del", 0,
-                                  handle);
-       mdd_write_unlock(env, obj);
+       if (S_ISDIR(attr->la_mode)) {
+               rc = mdo_declare_ref_add(env, tpobj, handle);
+               if (rc)
+                       return rc;
+       }
 
-       RETURN(rc);
+       la->la_valid = LA_CTIME | LA_MTIME;
+       rc = mdo_declare_attr_set(env, spobj, la, handle);
+       if (rc)
+               return rc;
+
+       if (tpobj != spobj) {
+               rc = mdo_declare_attr_set(env, tpobj, la, handle);
+               if (rc)
+                       return rc;
+       }
+
+       return rc;
 }
 
 static int mdd_declare_migrate_create(const struct lu_env *env,
+                                     struct mdd_object *spobj,
                                      struct mdd_object *tpobj,
                                      struct mdd_object *sobj,
                                      struct mdd_object *tobj,
-                                     const struct lu_name *lname,
+                                     const struct lu_name *sname,
+                                     const struct lu_name *tname,
+                                     struct lu_attr *spattr,
+                                     struct lu_attr *tpattr,
                                      struct lu_attr *attr,
                                      struct lu_buf *sbuf,
                                      struct linkea_data *ldata,
+                                     struct md_attr *ma,
                                      struct md_op_spec *spec,
                                      struct dt_allocation_hint *hint,
                                      struct thandle *handle)
 {
        struct mdd_thread_info *info = mdd_env_info(env);
+       struct md_layout_change *mlc = &info->mti_mlc;
        struct lmv_mds_md_v1 *lmv = sbuf->lb_buf;
        int rc;
 
-       if (S_ISDIR(attr->la_mode)) {
-               struct lu_buf lmu_buf = { NULL };
-
-               if (lmv) {
-                       struct lmv_user_md *lmu = &info->mti_lmv.lmv_user_md;
+       ENTRY;
 
-                       lmu->lum_stripe_count = 0;
-                       lmu_buf.lb_buf = lmu;
-                       lmu_buf.lb_len = sizeof(*lmu);
-               }
+       if (S_ISDIR(attr->la_mode)) {
+               struct lmv_user_md *lum = spec->u.sp_ea.eadata;
 
-               rc = mdd_dir_declare_layout_delete(env, sobj, sbuf, &lmu_buf,
-                                                  handle);
+               mlc->mlc_opc = MD_LAYOUT_DETACH;
+               rc = mdo_declare_layout_change(env, sobj, mlc, handle);
                if (rc)
                        return rc;
 
-               if (lmv) {
-                       rc = mdo_declare_xattr_del(env, sobj, XATTR_NAME_LMV,
-                                                  handle);
-                       if (rc)
-                               return rc;
-               }
+               lum->lum_hash_type |= cpu_to_le32(LMV_HASH_FLAG_MIGRATION);
+       } else if (S_ISLNK(attr->la_mode)) {
+               spec->u.sp_symname = sbuf->lb_buf;
+       } else if (S_ISREG(attr->la_mode)) {
+               spec->sp_cr_flags |= MDS_OPEN_DELAY_CREATE;
+               spec->sp_cr_flags &= ~MDS_OPEN_HAS_EA;
        }
 
+       mdd_object_make_hint(env, tpobj, tobj, attr, spec, hint);
+
        rc = mdd_declare_create(env, mdo2mdd(&tpobj->mod_obj), tpobj, tobj,
-                               lname, attr, handle, spec, ldata, NULL, NULL,
+                               tname, attr, handle, spec, ldata, NULL, NULL,
                                NULL, hint);
        if (rc)
                return rc;
 
-       if (S_ISDIR(attr->la_mode) && mdd_dir_is_empty(env, sobj) != 0) {
+       /*
+        * tobj mode will be used in mdo_declare_layout_change(), but it's not
+        * createb yet, copy from sobj.
+        */
+       tobj->mod_obj.mo_lu.lo_header->loh_attr &= ~S_IFMT;
+       tobj->mod_obj.mo_lu.lo_header->loh_attr |=
+               sobj->mod_obj.mo_lu.lo_header->loh_attr & S_IFMT;
+
+       if (S_ISDIR(attr->la_mode)) {
                if (!lmv) {
-                       /*
-                        * if sobj is not striped, fake a 1-stripe LMV, which
-                        * will be used to generate a compound LMV for tobj.
-                        */
+                       /* if sobj is not striped, fake a 1-stripe LMV */
                        LASSERT(sizeof(info->mti_key) >
                                lmv_mds_md_size(1, LMV_MAGIC_V1));
                        lmv = (typeof(lmv))info->mti_key;
                        memset(lmv, 0, sizeof(*lmv));
                        lmv->lmv_magic = cpu_to_le32(LMV_MAGIC_V1);
                        lmv->lmv_stripe_count = cpu_to_le32(1);
-                       fid_le_to_cpu(&lmv->lmv_stripe_fids[0], mdo2fid(sobj));
-                       sbuf->lb_buf = lmv;
-                       sbuf->lb_len = lmv_mds_md_size(1, LMV_MAGIC_V1);
-
-                       rc = mdo_declare_xattr_set(env, tobj, sbuf,
-                                                  XATTR_NAME_LMV".add", 0,
-                                                  handle);
-                       sbuf->lb_buf = NULL;
-                       sbuf->lb_len = 0;
+                       lmv->lmv_hash_type = cpu_to_le32(LMV_HASH_TYPE_DEFAULT);
+                       fid_le_to_cpu(&lmv->lmv_stripe_fids[0],
+                                     mdd_object_fid(sobj));
+                       mlc->mlc_buf.lb_buf = lmv;
+                       mlc->mlc_buf.lb_len = lmv_mds_md_size(1, LMV_MAGIC_V1);
                } else {
-                       rc = mdo_declare_xattr_set(env, tobj, sbuf,
-                                                  XATTR_NAME_LMV".add", 0,
-                                                  handle);
+                       mlc->mlc_buf = *sbuf;
                }
+               mlc->mlc_opc = MD_LAYOUT_ATTACH;
+               rc = mdo_declare_layout_change(env, tobj, mlc, handle);
                if (rc)
                        return rc;
        }
 
-       /*
-        * tobj mode will be used in lod_declare_xattr_set(), but it's not
-        * createb yet, copy from sobj.
-        */
-       tobj->mod_obj.mo_lu.lo_header->loh_attr &= ~S_IFMT;
-       tobj->mod_obj.mo_lu.lo_header->loh_attr |=
-               sobj->mod_obj.mo_lu.lo_header->loh_attr & S_IFMT;
-
        rc = mdd_iterate_xattrs(env, sobj, tobj, true, handle,
                                mdo_declare_xattr_set);
        if (rc)
@@ -4033,7 +3842,7 @@ static int mdd_declare_migrate_create(const struct lu_env *env,
                handle->th_complex = 1;
 
                /* target may be remote, update PFID via sobj. */
-               fid_buf.lb_buf = (void *)mdo2fid(tobj);
+               fid_buf.lb_buf = (void *)mdd_object_fid(tobj);
                fid_buf.lb_len = sizeof(struct lu_fid);
                rc = mdo_declare_xattr_set(env, sobj, &fid_buf, XATTR_NAME_FID,
                                           0, handle);
@@ -4046,38 +3855,112 @@ static int mdd_declare_migrate_create(const struct lu_env *env,
        }
 
        if (!S_ISDIR(attr->la_mode)) {
-               rc = mdd_iterate_linkea(env, sobj, tobj, lname, mdo2fid(tpobj),
-                                       ldata, NULL, handle,
-                                       mdd_declare_update_link);
+               rc = mdd_iterate_linkea(env, sobj, tobj, tname,
+                                       mdd_object_fid(tpobj), ldata, NULL,
+                                       handle, mdd_declare_update_link);
                if (rc)
                        return rc;
+       }
 
-               if (lmv) {
-                       rc = mdo_declare_xattr_del(env, sobj, XATTR_NAME_LMV,
-                                                  handle);
+       if (!S_ISDIR(attr->la_mode) || lmv) {
+               rc = mdo_declare_ref_del(env, sobj, handle);
+               if (rc)
+                       return rc;
+
+               if (S_ISDIR(attr->la_mode)) {
+                       rc = mdo_declare_ref_del(env, sobj, handle);
                        if (rc)
                                return rc;
                }
+
+               rc = mdo_declare_destroy(env, sobj, handle);
+               if (rc)
+                       return rc;
        }
 
+       rc = mdd_declare_migrate_update(env, spobj, tpobj, tobj, sname, tname,
+                                       attr, spattr, tpattr, ldata, ma,
+                                       handle);
        return rc;
 }
 
 /**
- * Create target, migrate xattrs and update links.
+ * migrate dirent from \a spobj to \a tpobj.
+ **/
+static int mdd_migrate_update(const struct lu_env *env,
+                             struct mdd_object *spobj,
+                             struct mdd_object *tpobj,
+                             struct mdd_object *obj,
+                             const struct lu_name *sname,
+                             const struct lu_name *tname,
+                             struct lu_attr *attr,
+                             struct lu_attr *spattr,
+                             struct lu_attr *tpattr,
+                             struct linkea_data *ldata,
+                             struct md_attr *ma,
+                             struct thandle *handle)
+{
+       struct mdd_thread_info *info = mdd_env_info(env);
+       struct lu_attr *la = &info->mti_la_for_fix;
+       int rc;
+
+       ENTRY;
+
+       CDEBUG(D_INFO, "update "DFID" from "DFID"/%s to "DFID"/%s\n",
+              PFID(mdd_object_fid(obj)), PFID(mdd_object_fid(spobj)),
+              sname->ln_name, PFID(mdd_object_fid(tpobj)), tname->ln_name);
+
+       rc = __mdd_index_delete(env, spobj, sname->ln_name,
+                               S_ISDIR(attr->la_mode), handle);
+       if (rc)
+               RETURN(rc);
+
+       rc = __mdd_index_insert(env, tpobj, mdd_object_fid(obj),
+                               attr->la_mode & S_IFMT,
+                               tname->ln_name, handle);
+       if (rc)
+               RETURN(rc);
+
+       rc = mdd_links_write(env, obj, ldata, handle);
+       if (rc)
+               RETURN(rc);
+
+       la->la_ctime = la->la_mtime = ma->ma_attr.la_ctime;
+       la->la_valid = LA_CTIME | LA_MTIME;
+       mdd_write_lock(env, spobj, DT_SRC_PARENT);
+       rc = mdd_update_time(env, spobj, spattr, la, handle);
+       mdd_write_unlock(env, spobj);
+       if (rc)
+               RETURN(rc);
+
+       if (tpobj != spobj) {
+               la->la_valid = LA_CTIME | LA_MTIME;
+               mdd_write_lock(env, tpobj, DT_TGT_PARENT);
+               rc = mdd_update_time(env, tpobj, tpattr, la, handle);
+               mdd_write_unlock(env, tpobj);
+               if (rc)
+                       RETURN(rc);
+       }
+
+       RETURN(rc);
+}
+
+/**
+ * Migrate file/dir to target MDT.
  *
  * Create target according to \a spec, and then migrate xattrs, if it's
- * directory, migrate source stripes to target, else update fid to target
- * for links.
+ * directory, migrate source stripes to target.
  *
  * \param[in] env      execution environment
+ * \param[in] spobj    source parent object
  * \param[in] tpobj    target parent object
  * \param[in] sobj     source object
  * \param[in] tobj     target object
  * \param[in] lname    file name
+ * \param[in] spattr   source parent attributes
+ * \param[in] tpattr   target parent attributes
  * \param[in] attr     source attributes
  * \param[in] sbuf     source LMV buf
- * \param[in] ldata    source linkea
  * \param[in] spec     migrate create spec
  * \param[in] hint     target creation hint
  * \param[in] handle   tranasction handle
@@ -4086,13 +3969,18 @@ static int mdd_declare_migrate_create(const struct lu_env *env,
  * \retval     -errno on failure
  **/
 static int mdd_migrate_create(const struct lu_env *env,
+                             struct mdd_object *spobj,
                              struct mdd_object *tpobj,
                              struct mdd_object *sobj,
                              struct mdd_object *tobj,
-                             const struct lu_name *lname,
+                             const struct lu_name *sname,
+                             const struct lu_name *tname,
+                             struct lu_attr *spattr,
+                             struct lu_attr *tpattr,
                              struct lu_attr *attr,
                              const struct lu_buf *sbuf,
                              struct linkea_data *ldata,
+                             struct md_attr *ma,
                              struct md_op_spec *spec,
                              struct dt_allocation_hint *hint,
                              struct thandle *handle)
@@ -4102,38 +3990,22 @@ static int mdd_migrate_create(const struct lu_env *env,
        ENTRY;
 
        /*
-        * directory will migrate sobj stripes to tobj:
-        * 1. delete stripes from sobj.
-        * 2. add stripes to tobj, see lod_dir_declare_layout_add().
-        * 3. create/attach stripes for tobj, see lod_xattr_set_lmv().
+        * migrate sobj stripes to tobj if it's directory:
+        * 1. detach stripes from sobj.
+        * 2. attach stripes to tobj, see mdd_declare_migrate_mdt().
+        * 3. create stripes for tobj, see lod_xattr_set_lmv().
         */
        if (S_ISDIR(attr->la_mode)) {
-               struct lu_buf lmu_buf = { NULL };
+               struct mdd_thread_info *info = mdd_env_info(env);
+               struct md_layout_change *mlc = &info->mti_mlc;
 
-               if (sbuf->lb_buf) {
-                       struct mdd_thread_info *info = mdd_env_info(env);
-                       struct lmv_user_md *lmu = &info->mti_lmv.lmv_user_md;
-
-                       lmu->lum_stripe_count = 0;
-                       lmu_buf.lb_buf = lmu;
-                       lmu_buf.lb_len = sizeof(*lmu);
-               }
+               mlc->mlc_opc = MD_LAYOUT_DETACH;
 
-               rc = mdd_dir_layout_delete(env, sobj, sbuf, &lmu_buf, handle);
+               mdd_write_lock(env, sobj, DT_SRC_PARENT);
+               rc = mdo_layout_change(env, sobj, mlc, handle);
+               mdd_write_unlock(env, sobj);
                if (rc)
                        RETURN(rc);
-
-               /*
-                * delete LMV so that later when destroying sobj it won't delete
-                * stripes again.
-                */
-               if (sbuf->lb_buf) {
-                       mdd_write_lock(env, sobj, DT_SRC_CHILD);
-                       rc = mdo_xattr_del(env, sobj, XATTR_NAME_LMV, handle);
-                       mdd_write_unlock(env, sobj);
-                       if (rc)
-                               RETURN(rc);
-               }
        }
 
        /* don't set nlink from sobj */
@@ -4155,7 +4027,7 @@ static int mdd_migrate_create(const struct lu_env *env,
                struct lu_buf fid_buf;
 
                /* target may be remote, update PFID via sobj. */
-               fid_buf.lb_buf = (void *)mdo2fid(tobj);
+               fid_buf.lb_buf = (void *)mdd_object_fid(tobj);
                fid_buf.lb_len = sizeof(struct lu_fid);
                rc = mdo_xattr_set(env, sobj, &fid_buf, XATTR_NAME_FID, 0,
                                   handle);
@@ -4166,176 +4038,81 @@ static int mdd_migrate_create(const struct lu_env *env,
                mdd_write_lock(env, sobj, DT_SRC_CHILD);
                rc = mdo_xattr_del(env, sobj, XATTR_NAME_LOV, handle);
                mdd_write_unlock(env, sobj);
-               if (rc)
+               /* O_DELAY_CREATE file may not have LOV, ignore -ENODATA */
+               if (rc && rc != -ENODATA)
                        RETURN(rc);
+               rc = 0;
        }
 
-       if (!S_ISDIR(attr->la_mode))
-               rc = mdd_iterate_linkea(env, sobj, tobj, lname, mdo2fid(tpobj),
-                                       ldata, NULL, handle, mdd_update_link);
-
-       RETURN(rc);
-}
-
-static int mdd_declare_migrate_update(const struct lu_env *env,
-                                     struct mdd_object *spobj,
-                                     struct mdd_object *tpobj,
-                                     struct mdd_object *sobj,
-                                     struct mdd_object *tobj,
-                                     const struct lu_name *lname,
-                                     struct lu_attr *attr,
-                                     struct lu_attr *spattr,
-                                     struct lu_attr *tpattr,
-                                     struct linkea_data *ldata,
-                                     bool do_create,
-                                     bool do_destroy,
-                                     struct md_attr *ma,
-                                     struct thandle *handle)
-{
-       struct mdd_thread_info *info = mdd_env_info(env);
-       const struct lu_fid *fid = mdo2fid(do_create ? tobj : sobj);
-       struct lu_attr *la = &info->mti_la_for_fix;
-       int rc;
-
-       rc = mdo_declare_index_delete(env, spobj, lname->ln_name, handle);
-       if (rc)
-               return rc;
-
-       if (S_ISDIR(attr->la_mode)) {
-               rc = mdo_declare_ref_del(env, spobj, handle);
-               if (rc)
-                       return rc;
-       }
-
-       rc = mdo_declare_index_insert(env, tpobj, fid, mdd_object_type(sobj),
-                                     lname->ln_name, handle);
-       if (rc)
-               return rc;
-
-       rc = mdd_declare_links_add(env, do_create ? tobj : sobj, handle, ldata);
-       if (rc)
-               return rc;
-
-       if (S_ISDIR(attr->la_mode)) {
-               rc = mdo_declare_ref_add(env, tpobj, handle);
+       /* update links FID */
+       if (!S_ISDIR(attr->la_mode)) {
+               rc = mdd_iterate_linkea(env, sobj, tobj, tname,
+                                       mdd_object_fid(tpobj), ldata,
+                                       NULL, handle, mdd_update_link);
                if (rc)
-                       return rc;
+                       RETURN(rc);
        }
 
-       la->la_valid = LA_CTIME | LA_MTIME;
-       rc = mdo_declare_attr_set(env, spobj, la, handle);
-       if (rc)
-               return rc;
-
-       if (tpobj != spobj) {
-               rc = mdo_declare_attr_set(env, tpobj, la, handle);
+       /* don't destroy sobj if it's plain directory */
+       if (!S_ISDIR(attr->la_mode) || sbuf->lb_buf) {
+               mdd_write_lock(env, sobj, DT_SRC_CHILD);
+               rc = mdo_ref_del(env, sobj, handle);
+               if (!rc) {
+                       if (S_ISDIR(attr->la_mode))
+                               rc = mdo_ref_del(env, sobj, handle);
+                       if (!rc)
+                               rc = mdo_destroy(env, sobj, handle);
+               }
+               mdd_write_unlock(env, sobj);
                if (rc)
-                       return rc;
+                       RETURN(rc);
        }
 
-       if (do_create && do_destroy) {
-               rc = mdo_declare_ref_del(env, sobj, handle);
-               if (rc)
-                       return rc;
-
-               rc = mdo_declare_destroy(env, sobj, handle);
-               if (rc)
-                       return rc;
-       }
+       rc = mdd_migrate_update(env, spobj, tpobj, tobj, sname, tname, attr,
+                               spattr, tpattr, ldata, ma, handle);
 
-       return rc;
+       RETURN(rc);
 }
 
-/**
- * migrate dirent from \a spobj to \a tpobj, and destroy \a sobj
- **/
-static int mdd_migrate_update(const struct lu_env *env,
-                             struct mdd_object *spobj,
-                             struct mdd_object *tpobj,
-                             struct mdd_object *sobj,
-                             struct mdd_object *tobj,
-                             const struct lu_name *lname,
-                             struct lu_attr *attr,
-                             struct lu_attr *spattr,
-                             struct lu_attr *tpattr,
-                             struct linkea_data *ldata,
-                             bool do_create,
-                             bool do_destroy,
-                             struct md_attr *ma,
-                             struct thandle *handle)
+/* NB: if user issued different migrate command, we can't adjust it silently
+ * here, because this command will decide target MDT in subdir migration in
+ * LMV.
+ */
+static int mdd_migrate_cmd_check(struct mdd_device *mdd,
+                                const struct lmv_mds_md_v1 *lmv,
+                                const struct lmv_user_md_v1 *lum,
+                                const struct lu_name *lname)
 {
-       struct mdd_thread_info *info = mdd_env_info(env);
-       const struct lu_fid *fid = mdo2fid(do_create ? tobj : sobj);
-       struct lu_attr *la = &info->mti_la_for_fix;
-       int rc;
-
-       ENTRY;
-
-       CDEBUG(D_INFO, "update %s "DFID"/"DFID" to "DFID"/"DFID"\n",
-              lname->ln_name, PFID(mdo2fid(spobj)),
-              PFID(mdo2fid(sobj)), PFID(mdo2fid(tpobj)),
-              PFID(fid));
-
-       rc = __mdd_index_delete(env, spobj, lname->ln_name,
-                               S_ISDIR(attr->la_mode), handle);
-       if (rc)
-               RETURN(rc);
-
-       rc = __mdd_index_insert(env, tpobj, fid, mdd_object_type(sobj),
-                               lname->ln_name, handle);
-       if (rc)
-               RETURN(rc);
-
-       rc = mdd_links_write(env, do_create ? tobj : sobj, ldata, handle);
-       if (rc)
-               RETURN(rc);
-
-       la->la_ctime = la->la_mtime = ma->ma_attr.la_ctime;
-       la->la_valid = LA_CTIME | LA_MTIME;
-       mdd_write_lock(env, spobj, DT_SRC_PARENT);
-       rc = mdd_update_time(env, spobj, spattr, la, handle);
-       mdd_write_unlock(env, spobj);
-       if (rc)
-               RETURN(rc);
-
-       if (tpobj != spobj) {
-               la->la_valid = LA_CTIME | LA_MTIME;
-               mdd_write_lock(env, tpobj, DT_TGT_PARENT);
-               rc = mdd_update_time(env, tpobj, tpattr, la, handle);
-               mdd_write_unlock(env, tpobj);
-               if (rc)
-                       RETURN(rc);
+       __u32 lum_stripe_count = lum->lum_stripe_count;
+       __u32 lmv_hash_type = lmv->lmv_hash_type;
+
+       if (!lmv_is_sane(lmv))
+               return -EBADF;
+
+       /* if stripe_count unspecified, set to 1 */
+       if (!lum_stripe_count)
+               lum_stripe_count = cpu_to_le32(1);
+
+       lmv_hash_type &= ~cpu_to_le32(LMV_HASH_FLAG_MIGRATION);
+
+       /* TODO: check specific MDTs */
+       if (lum_stripe_count != lmv->lmv_migrate_offset ||
+           lum->lum_stripe_offset != lmv->lmv_master_mdt_index ||
+           (lum->lum_hash_type && lum->lum_hash_type != lmv_hash_type)) {
+               CERROR("%s: '"DNAME"' migration was interrupted, run 'lfs migrate -m %d -c %d -H %s "DNAME"' to finish migration.\n",
+                       mdd2obd_dev(mdd)->obd_name, PNAME(lname),
+                       le32_to_cpu(lmv->lmv_master_mdt_index),
+                       le32_to_cpu(lmv->lmv_migrate_offset),
+                       mdt_hash_name[le32_to_cpu(lmv_hash_type)],
+                       PNAME(lname));
+               return -EPERM;
        }
 
-       /*
-        * there are three situations we shouldn't destroy source:
-        * 1. if source is not dir, and it happens to be located on the same MDT
-        *    as target parent.
-        * 2. if source is not dir, and has link on the same MDT where source is
-        *    located.
-        * 3. if source is dir, and it's a normal, non-empty dir.
-        *
-        * the first two situations equals to !do_create, and the 3rd equals to
-        * !do_destroy, so the below condition is actually
-        * !(!do_create || !do_destroy).
-        *
-        * NB, if user has opened source dir before migration, he will get
-        * -ENOENT error when close it later, because source is likely to be
-        *  remote, which can't be moved to orphan list, but except this error
-        *  message, this won't cause any inconsistency or trouble.
-        */
-       if (do_create && do_destroy) {
-               mdd_write_lock(env, sobj, DT_SRC_CHILD);
-               mdo_ref_del(env, sobj, handle);
-               rc = mdo_destroy(env, sobj, handle);
-               mdd_write_unlock(env, sobj);
-       }
-
-       RETURN(rc);
+       return -EALREADY;
 }
 
 /**
- * Migrate directory or file.
+ * Internal function to migrate directory or file between MDTs.
  *
  * migrate source to target in following steps:
  *   1. create target, append source stripes after target's if it's directory,
@@ -4344,231 +4121,110 @@ static int mdd_migrate_update(const struct lu_env *env,
  *      update file linkea, and destroy source if it's not needed any more.
  *
  * \param[in] env      execution environment
- * \param[in] md_pobj  parent master object
- * \param[in] md_sobj  source object
- * \param[in] lname    file name
- * \param[in] md_tobj  target object
+ * \param[in] spobj    source parent object
+ * \param[in] tpobj    target parent object
+ * \param[in] sobj     source object
+ * \param[in] tobj     target object
+ * \param[in] sname    source file name
+ * \param[in] tname    target file name
  * \param[in] spec     target creation spec
  * \param[in] ma       used to update \a pobj mtime and ctime
  *
  * \retval             0 on success
  * \retval             -errno on failure
  */
-static int mdd_migrate(const struct lu_env *env, struct md_object *md_pobj,
-                      struct md_object *md_sobj, const struct lu_name *lname,
-                      struct md_object *md_tobj, struct md_op_spec *spec,
-                      struct md_attr *ma)
+static int mdd_migrate_object(const struct lu_env *env,
+                             struct mdd_object *spobj,
+                             struct mdd_object *tpobj,
+                             struct mdd_object *sobj,
+                             struct mdd_object *tobj,
+                             const struct lu_name *sname,
+                             const struct lu_name *tname,
+                             struct md_op_spec *spec,
+                             struct md_attr *ma)
 {
-       struct mdd_device *mdd = mdo2mdd(md_pobj);
        struct mdd_thread_info *info = mdd_env_info(env);
-       struct mdd_object *pobj = md2mdd_obj(md_pobj);
-       struct mdd_object *sobj = md2mdd_obj(md_sobj);
-       struct mdd_object *tobj = md2mdd_obj(md_tobj);
-       struct mdd_object *spobj = NULL;
-       struct mdd_object *tpobj = NULL;
+       struct mdd_device *mdd = mdo2mdd(&spobj->mod_obj);
        struct lu_attr *spattr = &info->mti_pattr;
        struct lu_attr *tpattr = &info->mti_tpattr;
        struct lu_attr *attr = &info->mti_cattr;
        struct linkea_data *ldata = &info->mti_link_data;
        struct dt_allocation_hint *hint = &info->mti_hint;
-       struct lu_fid *fid = &info->mti_fid2;
-       struct lu_buf pbuf = { NULL };
        struct lu_buf sbuf = { NULL };
-       struct lmv_mds_md_v1 *plmv;
+       struct lmv_mds_md_v1 *lmv;
        struct thandle *handle;
-       bool do_create = true;
-       bool do_destroy = true;
        int rc;
+
        ENTRY;
 
        rc = mdd_la_get(env, sobj, attr);
        if (rc)
                RETURN(rc);
 
-       /* locate source and target stripe on pobj, which are the real parent */
-       rc = mdd_stripe_get(env, pobj, &pbuf, XATTR_NAME_LMV);
-       if (rc < 0 && rc != -ENODATA)
-               RETURN(rc);
-
-       plmv = pbuf.lb_buf;
-       if (plmv) {
-               __u32 hash_type = le32_to_cpu(plmv->lmv_hash_type);
-               __u32 count = le32_to_cpu(plmv->lmv_stripe_count);
-               int index;
-
-               /* locate target parent stripe */
-               if (hash_type & LMV_HASH_FLAG_MIGRATION) {
-                       /*
-                        * fail check here to make sure top dir migration
-                        * succeed.
-                        */
-                       if (OBD_FAIL_CHECK_RESET(OBD_FAIL_MIGRATE_ENTRIES, 0))
-                               GOTO(out, rc = -EIO);
-                       hash_type &= ~LMV_HASH_FLAG_MIGRATION;
-                       count = le32_to_cpu(plmv->lmv_migrate_offset);
-               }
-               index = lmv_name_to_stripe_index(hash_type, count,
-                                                lname->ln_name,
-                                                lname->ln_namelen);
-               if (index < 0)
-                       GOTO(out, rc = index);
-
-               fid_le_to_cpu(fid, &plmv->lmv_stripe_fids[index]);
-               tpobj = mdd_object_find(env, mdd, fid);
-               if (IS_ERR(tpobj))
-                       GOTO(out, rc = PTR_ERR(tpobj));
-
-               /* locate source parent stripe */
-               if (le32_to_cpu(plmv->lmv_hash_type) &
-                   LMV_HASH_FLAG_MIGRATION) {
-                       hash_type = le32_to_cpu(plmv->lmv_migrate_hash);
-                       count = le32_to_cpu(plmv->lmv_stripe_count) -
-                               le32_to_cpu(plmv->lmv_migrate_offset);
-
-                       index = lmv_name_to_stripe_index(hash_type, count,
-                                                        lname->ln_name,
-                                                        lname->ln_namelen);
-                       if (index < 0) {
-                               mdd_object_put(env, tpobj);
-                               GOTO(out, rc = index);
-                       }
-
-                       index += le32_to_cpu(plmv->lmv_migrate_offset);
-                       fid_le_to_cpu(fid, &plmv->lmv_stripe_fids[index]);
-                       spobj = mdd_object_find(env, mdd, fid);
-                       if (IS_ERR(spobj)) {
-                               mdd_object_put(env, tpobj);
-                               GOTO(out, rc = PTR_ERR(spobj));
-                       }
-               } else {
-                       spobj = tpobj;
-                       mdd_object_get(spobj);
-               }
-       } else {
-               tpobj = pobj;
-               spobj = pobj;
-               mdd_object_get(tpobj);
-               mdd_object_get(spobj);
-       }
-
        rc = mdd_la_get(env, spobj, spattr);
        if (rc)
-               GOTO(out, rc);
+               RETURN(rc);
 
        rc = mdd_la_get(env, tpobj, tpattr);
        if (rc)
-               GOTO(out, rc);
+               RETURN(rc);
 
-       if (S_ISDIR(attr->la_mode)) {
-               struct lmv_user_md_v1 *lmu = spec->u.sp_ea.eadata;
+       if (S_ISDIR(attr->la_mode) && !spec->sp_migrate_nsonly) {
+               struct lmv_user_md_v1 *lum = spec->u.sp_ea.eadata;
 
-               LASSERT(lmu);
+               LASSERT(lum);
 
-               /*
-                * if user use default value '0' for stripe_count, we need to
+               /* if user use default value '0' for stripe_count, we need to
                 * adjust it to '1' to create a 1-stripe directory.
                 */
-               if (lmu->lum_stripe_count == 0) {
-                       /* eadata is from request, don't alter it */
-                       info->mti_lmu = *lmu;
-                       info->mti_lmu.lum_stripe_count = cpu_to_le32(1);
-                       spec->u.sp_ea.eadata = &info->mti_lmu;
-                       lmu = spec->u.sp_ea.eadata;
-               }
+               if (lum->lum_stripe_count == 0)
+                       lum->lum_stripe_count = cpu_to_le32(1);
 
                rc = mdd_stripe_get(env, sobj, &sbuf, XATTR_NAME_LMV);
-               if (rc == -ENODATA) {
-                       if (mdd_dir_is_empty(env, sobj) == 0) {
-                               /*
-                                * if sobj is empty, and target is not striped,
-                                * create target as a normal directory.
-                                */
-                               if (le32_to_cpu(lmu->lum_stripe_count) == 1) {
-                                       info->mti_lmu = *lmu;
-                                       info->mti_lmu.lum_stripe_count = 0;
-                                       spec->u.sp_ea.eadata = &info->mti_lmu;
-                                       lmu = spec->u.sp_ea.eadata;
-                               }
-                       } else {
-                               /*
-                                * sobj is not striped dir, if it's not empty,
-                                * it will be migrated to be a stripe of target,
-                                * don't destroy it after migration.
-                                */
-                               do_destroy = false;
-                       }
-               } else if (rc) {
+               if (rc && rc != -ENODATA)
                        GOTO(out, rc);
-               } else {
-                       struct lmv_mds_md_v1 *lmv = sbuf.lb_buf;
-
-                       if (le32_to_cpu(lmv->lmv_hash_type) &
-                           LMV_HASH_FLAG_MIGRATION) {
-                               __u32 lum_stripe_count = lmu->lum_stripe_count;
-                               __u32 lmv_hash_type = lmv->lmv_hash_type &
-                                       cpu_to_le32(LMV_HASH_TYPE_MASK);
-
-                               if (!lum_stripe_count)
-                                       lum_stripe_count = cpu_to_le32(1);
-
-                               /* TODO: check specific MDTs */
-                               if (lmv->lmv_migrate_offset !=
-                                   lum_stripe_count ||
-                                   lmv->lmv_master_mdt_index !=
-                                   lmu->lum_stripe_offset ||
-                                   (lmv_hash_type != 0 &&
-                                    lmv_hash_type != lmu->lum_hash_type)) {
-                                       CERROR("%s: \'"DNAME"\' migration was "
-                                               "interrupted, run \'lfs migrate "
-                                               "-m %d -c %d -H %d "DNAME"\' to "
-                                               "finish migration.\n",
-                                               mdd2obd_dev(mdd)->obd_name,
-                                               PNAME(lname),
-                                               le32_to_cpu(
-                                                   lmv->lmv_master_mdt_index),
-                                               le32_to_cpu(
-                                                   lmv->lmv_migrate_offset),
-                                               le32_to_cpu(lmv_hash_type),
-                                               PNAME(lname));
-                                       GOTO(out, rc = -EPERM);
-                               }
-                               GOTO(out, rc = -EALREADY);
+
+               lmv = sbuf.lb_buf;
+               if (lmv) {
+                       if (!lmv_is_sane(lmv))
+                               GOTO(out, rc = -EBADF);
+                       if (lmv_is_migrating(lmv)) {
+                               rc = mdd_migrate_cmd_check(mdd, lmv, lum,
+                                                          sname);
+                               GOTO(out, rc);
                        }
                }
-       } else if (!mdd_object_remote(tpobj)) {
-               /*
-                * if source is already on MDT where target parent is located,
-                * no need to create, just update namespace.
-                */
-               do_create = false;
-       } else if (S_ISLNK(attr->la_mode)) {
-               lu_buf_check_and_alloc(&sbuf, attr->la_size + 1);
-               if (!sbuf.lb_buf)
-                       GOTO(out, rc = -ENOMEM);
-               rc = mdd_readlink(env, &sobj->mod_obj, &sbuf);
-               if (rc <= 0) {
-                       rc = rc ?: -EFAULT;
-                       CERROR("%s: "DFID" readlink failed: rc = %d\n",
-                              mdd2obd_dev(mdd)->obd_name,
-                              PFID(mdo2fid(sobj)), rc);
-                       GOTO(out, rc);
+       } else if (!S_ISDIR(attr->la_mode)) {
+               if (spobj == tpobj)
+                       GOTO(out, rc = -EALREADY);
+
+               /* update namespace only if @sobj is on MDT where @tpobj is. */
+               if (!mdd_object_remote(tpobj) && !mdd_object_remote(sobj))
+                       spec->sp_migrate_nsonly = true;
+
+               if (S_ISLNK(attr->la_mode)) {
+                       lu_buf_check_and_alloc(&sbuf, attr->la_size + 1);
+                       if (!sbuf.lb_buf)
+                               GOTO(out, rc = -ENOMEM);
+
+                       rc = mdd_readlink(env, &sobj->mod_obj, &sbuf);
+                       if (rc <= 0) {
+                               rc = rc ?: -EFAULT;
+                               CERROR("%s: "DFID" readlink failed: rc = %d\n",
+                                      mdd2obd_dev(mdd)->obd_name,
+                                      PFID(mdd_object_fid(sobj)), rc);
+                               GOTO(out, rc);
+                       }
                }
-               spec->u.sp_symname = sbuf.lb_buf;
-       } else if (S_ISREG(attr->la_mode)) {
-               spec->sp_cr_flags |= MDS_OPEN_DELAY_CREATE;
-               spec->sp_cr_flags &= ~MDS_OPEN_HAS_EA;
        }
 
-       /*
-        * if sobj has link on the same MDT, no need to create, just update
-        * namespace, and it will be a remote file on target parent, which is
-        * similar to rename.
-        */
-       rc = migrate_linkea_prepare(env, mdd, spobj, tpobj, sobj, lname, attr,
-                                   ldata);
+       /* linkea needs update upon FID or parent stripe change */
+       rc = mdd_migrate_linkea_prepare(env, mdd, spobj, tpobj, sobj, sname,
+                                       tname, attr, ldata);
        if (rc > 0)
-               do_create = false;
-       else if (rc)
+               /* update namespace only if @sobj has link on its MDT. */
+               spec->sp_migrate_nsonly = true;
+       else if (rc < 0)
                GOTO(out, rc);
 
        rc = mdd_migrate_sanity_check(env, mdd, spobj, tpobj, sobj, tobj,
@@ -4576,115 +4232,182 @@ static int mdd_migrate(const struct lu_env *env, struct md_object *md_pobj,
        if (rc)
                GOTO(out, rc);
 
-       mdd_object_make_hint(env, tpobj, tobj, attr, spec, hint);
-
        handle = mdd_trans_create(env, mdd);
        if (IS_ERR(handle))
                GOTO(out, rc = PTR_ERR(handle));
 
-       if (do_create) {
-               rc = mdd_declare_migrate_create(env, tpobj, sobj, tobj, lname,
-                                               attr, &sbuf, ldata, spec, hint,
-                                               handle);
-               if (rc)
-                       GOTO(stop_trans, rc);
-       }
+       if (spec->sp_migrate_nsonly)
+               rc = mdd_declare_migrate_update(env, spobj, tpobj, sobj, sname,
+                                               tname, attr, spattr, tpattr,
+                                               ldata, ma, handle);
+       else
+               rc = mdd_declare_migrate_create(env, spobj, tpobj, sobj, tobj,
+                                               sname, tname, spattr, tpattr,
+                                               attr, &sbuf, ldata, ma, spec,
+                                               hint, handle);
+       if (rc)
+               GOTO(stop, rc);
+
+       rc = mdd_declare_changelog_store(env, mdd, CL_MIGRATE, tname, sname,
+                                        handle);
+       if (rc)
+               GOTO(stop, rc);
+
+       rc = mdd_trans_start(env, mdd, handle);
+       if (rc)
+               GOTO(stop, rc);
+
+       if (spec->sp_migrate_nsonly)
+               rc = mdd_migrate_update(env, spobj, tpobj, sobj, sname, tname,
+                                       attr, spattr, tpattr, ldata, ma,
+                                       handle);
+       else
+               rc = mdd_migrate_create(env, spobj, tpobj, sobj, tobj, sname,
+                                       tname, spattr, tpattr, attr, &sbuf,
+                                       ldata, ma, spec, hint, handle);
+       if (rc)
+               GOTO(stop, rc);
+
+       rc = mdd_changelog_ns_store(env, mdd, CL_MIGRATE, 0,
+                                   spec->sp_migrate_nsonly ? sobj : tobj,
+                                   mdd_object_fid(spobj), mdd_object_fid(sobj),
+                                   mdd_object_fid(tpobj), tname, sname,
+                                   handle);
+       if (rc)
+               GOTO(stop, rc);
+       EXIT;
+
+stop:
+       rc = mdd_trans_stop(env, mdd, rc, handle);
+out:
+       lu_buf_free(&sbuf);
+
+       return rc;
+}
+
+/**
+ * Migrate directory or file between MDTs.
+ *
+ * \param[in] env      execution environment
+ * \param[in] md_pobj  parent master object
+ * \param[in] md_sobj  source object
+ * \param[in] lname    file name
+ * \param[in] md_tobj  target object
+ * \param[in] spec     target creation spec
+ * \param[in] ma       used to update \a pobj mtime and ctime
+ *
+ * \retval             0 on success
+ * \retval             -errno on failure
+ */
+static int mdd_migrate(const struct lu_env *env, struct md_object *md_pobj,
+                      struct md_object *md_sobj, const struct lu_name *lname,
+                      struct md_object *md_tobj, struct md_op_spec *spec,
+                      struct md_attr *ma)
+{
+       struct mdd_thread_info *info = mdd_env_info(env);
+       struct mdd_device *mdd = mdo2mdd(md_pobj);
+       struct mdd_object *pobj = md2mdd_obj(md_pobj);
+       struct mdd_object *sobj = md2mdd_obj(md_sobj);
+       struct mdd_object *tobj = md2mdd_obj(md_tobj);
+       struct mdd_object *spobj = NULL;
+       struct mdd_object *tpobj = NULL;
+       struct lu_buf pbuf = { NULL };
+       struct lu_fid *fid = &info->mti_fid2;
+       struct lmv_mds_md_v1 *lmv;
+       int rc;
+
+       ENTRY;
+
+       /* locate source and target stripe on pobj, which are the real parent */
+       rc = mdd_stripe_get(env, pobj, &pbuf, XATTR_NAME_LMV);
+       if (rc < 0 && rc != -ENODATA)
+               RETURN(rc);
+
+       lmv = pbuf.lb_buf;
+       if (lmv) {
+               int index;
+
+               if (!lmv_is_sane(lmv))
+                       GOTO(out, rc = -EBADF);
+
+               /* locate target parent stripe */
+               /* fail check here to make sure top dir migration succeed. */
+               if (lmv_is_migrating(lmv) &&
+                   OBD_FAIL_CHECK_RESET(OBD_FAIL_MIGRATE_ENTRIES, 0))
+                       GOTO(out, rc = -EIO);
 
-       rc = mdd_declare_migrate_update(env, spobj, tpobj, sobj, tobj, lname,
-                                       attr, spattr, tpattr, ldata, do_create,
-                                       do_destroy, ma, handle);
-       if (rc)
-               GOTO(stop_trans, rc);
+               index = lmv_name_to_stripe_index(lmv, lname->ln_name,
+                                                lname->ln_namelen);
+               if (index < 0)
+                       GOTO(out, rc = index);
 
-       rc = mdd_declare_changelog_store(env, mdd, CL_MIGRATE, lname, NULL,
-                                        handle);
-       if (rc)
-               GOTO(stop_trans, rc);
+               fid_le_to_cpu(fid, &lmv->lmv_stripe_fids[index]);
+               tpobj = mdd_object_find(env, mdd, fid);
+               if (IS_ERR(tpobj))
+                       GOTO(out, rc = PTR_ERR(tpobj));
 
-       rc = mdd_trans_start(env, mdd, handle);
-       if (rc)
-               GOTO(stop_trans, rc);
+               /* locate source parent stripe */
+               if (lmv_is_layout_changing(lmv)) {
+                       index = lmv_name_to_stripe_index_old(lmv,
+                                                            lname->ln_name,
+                                                            lname->ln_namelen);
+                       if (index < 0)
+                               GOTO(out, rc = index);
 
-       if (do_create) {
-               rc = mdd_migrate_create(env, tpobj, sobj, tobj, lname, attr,
-                                       &sbuf, ldata, spec, hint, handle);
-               if (rc)
-                       GOTO(stop_trans, rc);
-       }
+                       fid_le_to_cpu(fid, &lmv->lmv_stripe_fids[index]);
+                       spobj = mdd_object_find(env, mdd, fid);
+                       if (IS_ERR(spobj))
+                               GOTO(out, rc = PTR_ERR(spobj));
 
-       rc = mdd_migrate_update(env, spobj, tpobj, sobj, tobj, lname, attr,
-                               spattr, tpattr, ldata, do_create, do_destroy,
-                               ma, handle);
-       if (rc)
-               GOTO(stop_trans, rc);
+                       /* parent stripe unchanged */
+                       if (spobj == tpobj) {
+                               if (!lmv_is_restriping(lmv))
+                                       GOTO(out, rc = -EINVAL);
+                               GOTO(out, rc = -EALREADY);
+                       }
+               } else {
+                       spobj = tpobj;
+                       mdd_object_get(spobj);
+               }
+       } else {
+               tpobj = pobj;
+               spobj = pobj;
+               mdd_object_get(tpobj);
+               mdd_object_get(spobj);
+       }
 
-       rc = mdd_changelog_ns_store(env, mdd, CL_MIGRATE, 0, tobj,
-                                   mdo2fid(spobj), mdo2fid(sobj),
-                                   mdo2fid(tpobj), lname, lname, handle);
-       if (rc)
-               GOTO(stop_trans, rc);
+       rc = mdd_migrate_object(env, spobj, tpobj, sobj, tobj, lname, lname,
+                               spec, ma);
+       GOTO(out, rc);
 
-       EXIT;
-stop_trans:
-       rc = mdd_trans_stop(env, mdd, rc, handle);
 out:
-       if (spobj && !IS_ERR(spobj))
+       if (!IS_ERR_OR_NULL(spobj))
                mdd_object_put(env, spobj);
-       if (tpobj && !IS_ERR(tpobj))
+       if (!IS_ERR_OR_NULL(tpobj))
                mdd_object_put(env, tpobj);
-       lu_buf_free(&sbuf);
        lu_buf_free(&pbuf);
+
        return rc;
 }
 
-static int __mdd_dir_declare_layout_shrink(const struct lu_env *env,
-                                          struct mdd_object *pobj,
-                                          struct mdd_object *obj,
-                                          struct mdd_object *stripe,
-                                          struct lu_attr *attr,
-                                          struct lu_buf *lmv_buf,
-                                          const struct lu_buf *lmu_buf,
-                                          struct lu_name *lname,
-                                          struct thandle *handle)
+static int mdd_declare_1sd_collapse(const struct lu_env *env,
+                                   struct mdd_object *pobj,
+                                   struct mdd_object *obj,
+                                   struct mdd_object *stripe,
+                                   struct lu_attr *attr,
+                                   struct md_layout_change *mlc,
+                                   struct lu_name *lname,
+                                   struct thandle *handle)
 {
-       struct mdd_thread_info *info = mdd_env_info(env);
-       struct lmv_mds_md_v1 *lmv = lmv_buf->lb_buf;
-       struct lmv_user_md *lmu = (typeof(lmu))info->mti_key;
-       struct lu_buf shrink_buf = { .lb_buf = lmu,
-                                    .lb_len = sizeof(*lmu) };
        int rc;
 
-       LASSERT(lmv);
-
-       memcpy(lmu, lmu_buf->lb_buf, sizeof(*lmu));
-
-       if (le32_to_cpu(lmu->lum_stripe_count) < 2)
-               lmu->lum_stripe_count = 0;
-
-       rc = mdd_dir_declare_layout_delete(env, obj, lmv_buf, &shrink_buf,
-                                          handle);
-       if (rc)
-               return rc;
-
-       if (lmu->lum_stripe_count == 0) {
-               lmu->lum_stripe_count = cpu_to_le32(1);
-
-               rc = mdo_declare_xattr_del(env, obj, XATTR_NAME_LMV, handle);
-               if (rc)
-                       return rc;
-       }
-
-       rc = mdd_dir_iterate_stripes(env, obj, lmv_buf, &shrink_buf, handle,
-                                    mdd_dir_declare_destroy_stripe);
+       mlc->mlc_opc = MD_LAYOUT_DETACH;
+       rc = mdo_declare_layout_change(env, obj, mlc, handle);
        if (rc)
                return rc;
 
-       if (le32_to_cpu(lmu->lum_stripe_count) > 1)
-               return mdo_declare_xattr_set(env, obj, lmv_buf,
-                                            XATTR_NAME_LMV".set", 0, handle);
-
-       rc = mdo_declare_index_insert(env, stripe, mdo2fid(pobj), S_IFDIR,
-                                     dotdot, handle);
+       rc = mdo_declare_index_insert(env, stripe, mdd_object_fid(pobj),
+                                     S_IFDIR, dotdot, handle);
        if (rc)
                return rc;
 
@@ -4705,8 +4428,8 @@ static int __mdd_dir_declare_layout_shrink(const struct lu_env *env,
        if (rc)
                return rc;
 
-       rc = mdo_declare_index_insert(env, pobj, mdo2fid(stripe), attr->la_mode,
-                                     lname->ln_name, handle);
+       rc = mdo_declare_index_insert(env, pobj, mdd_object_fid(stripe),
+                                     attr->la_mode, lname->ln_name, handle);
        if (rc)
                return rc;
 
@@ -4723,91 +4446,37 @@ static int __mdd_dir_declare_layout_shrink(const struct lu_env *env,
                return rc;
 
        return rc;
-
 }
 
-/*
- * after files under \a obj were migrated, shrink old stripes from \a obj,
- * furthermore, if it becomes a 1-stripe directory, convert it to a normal one.
- */
-static int __mdd_dir_layout_shrink(const struct lu_env *env,
-                                  struct mdd_object *pobj,
-                                  struct mdd_object *obj,
-                                  struct mdd_object *stripe,
-                                  struct lu_attr *attr,
-                                  struct lu_buf *lmv_buf,
-                                  const struct lu_buf *lmu_buf,
-                                  struct lu_name *lname,
-                                  struct thandle *handle)
+/* transform one-stripe directory to a plain directory */
+static int mdd_1sd_collapse(const struct lu_env *env,
+                           struct mdd_object *pobj,
+                           struct mdd_object *obj,
+                           struct mdd_object *stripe,
+                           struct lu_attr *attr,
+                           struct md_layout_change *mlc,
+                           struct lu_name *lname,
+                           struct thandle *handle)
 {
-       struct mdd_thread_info *info = mdd_env_info(env);
-       struct lmv_mds_md_v1 *lmv = lmv_buf->lb_buf;
-       struct lmv_user_md *lmu = (typeof(lmu))info->mti_key;
-       struct lu_buf shrink_buf = { .lb_buf = lmu,
-                                    .lb_len = sizeof(*lmu) };
-       int len = lmv_buf->lb_len;
-       __u32 version = le32_to_cpu(lmv->lmv_layout_version);
        int rc;
 
        ENTRY;
 
-       /* lmu needs to be altered, but lmu_buf is const */
-       memcpy(lmu, lmu_buf->lb_buf, sizeof(*lmu));
-
-       /*
-        * if dir will be shrunk to 1-stripe, delete all stripes, because it
-        * will be converted to normal dir.
-        */
-       if (le32_to_cpu(lmu->lum_stripe_count) == 1)
-               lmu->lum_stripe_count = 0;
-
-       /* delete stripes after lmu_stripe_count */
-       rc = mdd_dir_layout_delete(env, obj, lmv_buf, &shrink_buf, handle);
-       if (rc)
-               RETURN(rc);
-
-       if (lmu->lum_stripe_count == 0) {
-               lmu->lum_stripe_count = cpu_to_le32(1);
-
-               /* delete LMV to avoid deleting stripes again upon destroy */
-               mdd_write_lock(env, obj, DT_SRC_CHILD);
-               rc = mdo_xattr_del(env, obj, XATTR_NAME_LMV, handle);
-               mdd_write_unlock(env, obj);
-               if (rc)
-                       RETURN(rc);
-       }
+       /* replace 1-stripe directory with its stripe */
+       mlc->mlc_opc = MD_LAYOUT_DETACH;
 
-       /* destroy stripes after lmu_stripe_count */
        mdd_write_lock(env, obj, DT_SRC_PARENT);
-       rc = mdd_dir_iterate_stripes(env, obj, lmv_buf, &shrink_buf, handle,
-                                    mdd_dir_destroy_stripe);
+       rc = mdo_layout_change(env, obj, mlc, handle);
        mdd_write_unlock(env, obj);
-
-       if (le32_to_cpu(lmu->lum_stripe_count) > 1) {
-               /* update dir LMV, that's all if it's still striped. */
-               lmv->lmv_stripe_count = lmu->lum_stripe_count;
-               lmv->lmv_hash_type &= ~cpu_to_le32(LMV_HASH_FLAG_MIGRATION);
-               lmv->lmv_migrate_offset = 0;
-               lmv->lmv_migrate_hash = 0;
-               lmv->lmv_layout_version = cpu_to_le32(++version);
-
-               lmv_buf->lb_len = sizeof(*lmv);
-               rc = mdo_xattr_set(env, obj, lmv_buf, XATTR_NAME_LMV".set", 0,
-                                  handle);
-               lmv_buf->lb_len = len;
+       if (rc)
                RETURN(rc);
-       }
-
-       /* replace directory with its remaining stripe */
-       LASSERT(pobj);
-       LASSERT(stripe);
 
        mdd_write_lock(env, pobj, DT_SRC_PARENT);
        mdd_write_lock(env, obj, DT_SRC_CHILD);
 
        /* insert dotdot to stripe which points to parent */
-       rc = __mdd_index_insert_only(env, stripe, mdo2fid(pobj), S_IFDIR,
-                                    dotdot, handle);
+       rc = __mdd_index_insert_only(env, stripe, mdd_object_fid(pobj),
+                                    S_IFDIR, dotdot, handle);
        if (rc)
                GOTO(out, rc);
 
@@ -4834,8 +4503,8 @@ static int __mdd_dir_layout_shrink(const struct lu_env *env,
                GOTO(out, rc);
 
        /* insert stripe to parent with dir name */
-       rc = __mdd_index_insert_only(env, pobj, mdo2fid(stripe), attr->la_mode,
-                                    lname->ln_name, handle);
+       rc = __mdd_index_insert_only(env, pobj, mdd_object_fid(stripe),
+                                    attr->la_mode, lname->ln_name, handle);
        if (rc)
                GOTO(out, rc);
 
@@ -4861,11 +4530,11 @@ out:
 }
 
 /*
- * shrink directory stripes to lum_stripe_count specified by lum_mds_md.
+ * shrink directory stripes after migration/merge
  */
 int mdd_dir_layout_shrink(const struct lu_env *env,
                          struct md_object *md_obj,
-                         const struct lu_buf *lmu_buf)
+                         struct md_layout_change *mlc)
 {
        struct mdd_device *mdd = mdo2mdd(md_obj);
        struct mdd_thread_info *info = mdd_env_info(env);
@@ -4895,25 +4564,25 @@ int mdd_dir_layout_shrink(const struct lu_env *env,
                RETURN(rc);
 
        lmv = lmv_buf.lb_buf;
-       lmu = lmu_buf->lb_buf;
+       if (!lmv_is_sane(lmv))
+               RETURN(-EBADF);
+
+       lmu = mlc->mlc_buf.lb_buf;
 
-       /* this was checked in MDT */
+       /* adjust the default value '0' to '1' */
+       if (lmu->lum_stripe_count == 0)
+               lmu->lum_stripe_count = cpu_to_le32(1);
+
+       /* these were checked in MDT */
        LASSERT(le32_to_cpu(lmu->lum_stripe_count) <
                le32_to_cpu(lmv->lmv_stripe_count));
+       LASSERT(!lmv_is_splitting(lmv));
+       LASSERT(lmv_is_migrating(lmv) || lmv_is_merging(lmv));
 
-       rc = mdd_dir_iterate_stripes(env, obj, &lmv_buf, lmu_buf, NULL,
-                                    mdd_shrink_stripe_is_empty);
-       if (rc < 0)
-               GOTO(out, rc);
-       else if (rc != 0)
-               GOTO(out, rc = -ENOTEMPTY);
-
-       /*
-        * if obj stripe count will be shrunk to 1, we need to convert it to a
-        * normal dir, which will change its fid and update parent namespace,
-        * get obj name and parent fid from linkea.
+       /* if dir stripe count will be shrunk to 1, it needs to be transformed
+        * to a plain dir, which will cause FID change and namespace update.
         */
-       if (le32_to_cpu(lmu->lum_stripe_count) < 2) {
+       if (le32_to_cpu(lmu->lum_stripe_count) == 1) {
                struct linkea_data *ldata = &info->mti_link_data;
                char *filename = info->mti_name;
 
@@ -4957,11 +4626,18 @@ int mdd_dir_layout_shrink(const struct lu_env *env,
        if (IS_ERR(handle))
                GOTO(out, rc = PTR_ERR(handle));
 
-       rc = __mdd_dir_declare_layout_shrink(env, pobj, obj, stripe, attr,
-                                            &lmv_buf, lmu_buf, &lname, handle);
+       mlc->mlc_opc = MD_LAYOUT_SHRINK;
+       rc = mdo_declare_layout_change(env, obj, mlc, handle);
        if (rc)
                GOTO(stop_trans, rc);
 
+       if (le32_to_cpu(lmu->lum_stripe_count) == 1) {
+               rc = mdd_declare_1sd_collapse(env, pobj, obj, stripe, attr, mlc,
+                                             &lname, handle);
+               if (rc)
+                       GOTO(stop_trans, rc);
+       }
+
        rc = mdd_declare_changelog_store(env, mdd, CL_LAYOUT, NULL, NULL,
                                         handle);
        if (rc)
@@ -4971,11 +4647,20 @@ int mdd_dir_layout_shrink(const struct lu_env *env,
        if (rc)
                GOTO(stop_trans, rc);
 
-       rc = __mdd_dir_layout_shrink(env, pobj, obj, stripe, attr, &lmv_buf,
-                                    lmu_buf, &lname, handle);
+       mdd_write_lock(env, obj, DT_SRC_PARENT);
+       mlc->mlc_opc = MD_LAYOUT_SHRINK;
+       rc = mdo_layout_change(env, obj, mlc, handle);
+       mdd_write_unlock(env, obj);
        if (rc)
                GOTO(stop_trans, rc);
 
+       if (le32_to_cpu(lmu->lum_stripe_count) == 1) {
+               rc = mdd_1sd_collapse(env, pobj, obj, stripe, attr, mlc, &lname,
+                                     handle);
+               if (rc)
+                       GOTO(stop_trans, rc);
+       }
+
        rc = mdd_changelog_data_store_xattr(env, mdd, CL_LAYOUT, 0, obj,
                                            XATTR_NAME_LMV, handle);
        GOTO(stop_trans, rc);
@@ -4991,6 +4676,257 @@ out:
        return rc;
 }
 
+static int mdd_dir_declare_split_plain(const struct lu_env *env,
+                                       struct mdd_device *mdd,
+                                       struct mdd_object *pobj,
+                                       struct mdd_object *obj,
+                                       struct mdd_object *tobj,
+                                       struct md_layout_change *mlc,
+                                       struct dt_allocation_hint *hint,
+                                       struct thandle *handle)
+{
+       struct mdd_thread_info *info = mdd_env_info(env);
+       const struct lu_name *lname = mlc->mlc_name;
+       struct lu_attr *la = &info->mti_la_for_fix;
+       struct lmv_user_md_v1 *lum = mlc->mlc_spec->u.sp_ea.eadata;
+       struct linkea_data *ldata = &info->mti_link_data;
+       struct lmv_mds_md_v1 *lmv;
+       __u32 count;
+       int rc;
+
+       mlc->mlc_opc = MD_LAYOUT_DETACH;
+       rc = mdo_declare_layout_change(env, obj, mlc, handle);
+       if (rc)
+               return rc;
+
+       memset(ldata, 0, sizeof(*ldata));
+       rc = mdd_linkea_prepare(env, obj, NULL, NULL, mdd_object_fid(pobj),
+                               lname, 1, 0, ldata);
+       if (rc)
+               return rc;
+
+       count = lum->lum_stripe_count;
+       lum->lum_stripe_count = 0;
+       mdd_object_make_hint(env, pobj, tobj, mlc->mlc_attr, mlc->mlc_spec,
+                            hint);
+       rc = mdd_declare_create(env, mdo2mdd(&pobj->mod_obj), pobj, tobj,
+                               lname, mlc->mlc_attr, handle, mlc->mlc_spec,
+                               ldata, NULL, NULL, NULL, hint);
+       if (rc)
+               return rc;
+
+       /* tobj mode will be used in lod_declare_xattr_set(), but it's not
+        * created yet.
+        */
+       tobj->mod_obj.mo_lu.lo_header->loh_attr |= S_IFDIR;
+
+       lmv = (typeof(lmv))info->mti_key;
+       memset(lmv, 0, sizeof(*lmv));
+       lmv->lmv_magic = cpu_to_le32(LMV_MAGIC_V1);
+       lmv->lmv_stripe_count = cpu_to_le32(1);
+       lmv->lmv_hash_type = cpu_to_le32(LMV_HASH_TYPE_DEFAULT);
+       fid_le_to_cpu(&lmv->lmv_stripe_fids[0], mdd_object_fid(obj));
+
+       mlc->mlc_opc = MD_LAYOUT_ATTACH;
+       mlc->mlc_buf.lb_buf = lmv;
+       mlc->mlc_buf.lb_len = lmv_mds_md_size(1, LMV_MAGIC_V1);
+       rc = mdo_declare_layout_change(env, tobj, mlc, handle);
+       if (rc)
+               return rc;
+
+       rc = mdd_iterate_xattrs(env, obj, tobj, true, handle,
+                               mdo_declare_xattr_set);
+       if (rc)
+               return rc;
+
+       lum->lum_stripe_count = count;
+       mlc->mlc_opc = MD_LAYOUT_SPLIT;
+       rc = mdo_declare_layout_change(env, tobj, mlc, handle);
+       if (rc)
+               return rc;
+
+       rc = mdo_declare_index_delete(env, pobj, lname->ln_name, handle);
+       if (rc)
+               return rc;
+
+       rc = mdo_declare_index_insert(env, pobj, mdd_object_fid(tobj),
+                                     S_IFDIR, lname->ln_name, handle);
+       if (rc)
+               return rc;
+
+       la->la_valid = LA_CTIME | LA_MTIME;
+       rc = mdo_declare_attr_set(env, obj, la, handle);
+       if (rc)
+               return rc;
+
+       rc = mdo_declare_attr_set(env, pobj, la, handle);
+       if (rc)
+               return rc;
+
+       rc = mdd_declare_changelog_store(env, mdd, CL_MIGRATE, lname, NULL,
+                                        handle);
+       return rc;
+}
+
+/**
+ * plain directory split:
+ * 1. create \a tobj as plain directory.
+ * 2. append \a obj as first stripe of \a tobj.
+ * 3. migrate xattrs from \a obj to \a tobj.
+ * 4. split \a tobj to specific stripe count.
+ */
+static int mdd_dir_split_plain(const struct lu_env *env,
+                               struct mdd_device *mdd,
+                               struct mdd_object *pobj,
+                               struct mdd_object *obj,
+                               struct mdd_object *tobj,
+                               struct md_layout_change *mlc,
+                               struct dt_allocation_hint *hint,
+                               struct thandle *handle)
+{
+       struct mdd_thread_info *info = mdd_env_info(env);
+       struct lu_attr *pattr = &info->mti_pattr;
+       struct lu_attr *la = &info->mti_la_for_fix;
+       const struct lu_name *lname = mlc->mlc_name;
+       struct linkea_data *ldata = &info->mti_link_data;
+       int rc;
+
+       ENTRY;
+
+       /* copy linkea out and set on target later */
+       rc = mdd_links_read(env, obj, ldata);
+       if (rc)
+               RETURN(rc);
+
+       mlc->mlc_opc = MD_LAYOUT_DETACH;
+       rc = mdo_layout_change(env, obj, mlc, handle);
+       if (rc)
+               RETURN(rc);
+
+       /* don't set nlink from obj */
+       mlc->mlc_attr->la_valid &= ~LA_NLINK;
+
+       rc = mdd_create_object(env, pobj, tobj, mlc->mlc_attr, mlc->mlc_spec,
+                              NULL, NULL, NULL, hint, handle, false);
+       if (rc)
+               RETURN(rc);
+
+       rc = mdd_iterate_xattrs(env, obj, tobj, true, handle, mdo_xattr_set);
+       if (rc)
+               RETURN(rc);
+
+       rc = mdd_links_write(env, tobj, ldata, handle);
+       if (rc)
+               RETURN(rc);
+
+       rc = __mdd_index_delete(env, pobj, lname->ln_name, true, handle);
+       if (rc)
+               RETURN(rc);
+
+       rc = __mdd_index_insert(env, pobj, mdd_object_fid(tobj), S_IFDIR,
+                               lname->ln_name, handle);
+       if (rc)
+               RETURN(rc);
+
+       la->la_ctime = la->la_mtime = mlc->mlc_attr->la_mtime;
+       la->la_valid = LA_CTIME | LA_MTIME;
+
+       mdd_write_lock(env, obj, DT_SRC_CHILD);
+       rc = mdd_update_time(env, tobj, mlc->mlc_attr, la, handle);
+       mdd_write_unlock(env, obj);
+       if (rc)
+               RETURN(rc);
+
+       rc = mdd_la_get(env, pobj, pattr);
+       if (rc)
+               RETURN(rc);
+
+       la->la_valid = LA_CTIME | LA_MTIME;
+
+       mdd_write_lock(env, pobj, DT_SRC_PARENT);
+       rc = mdd_update_time(env, pobj, pattr, la, handle);
+       mdd_write_unlock(env, pobj);
+       if (rc)
+               RETURN(rc);
+
+       /* FID changes, record it as CL_MIGRATE */
+       rc = mdd_changelog_ns_store(env, mdd, CL_MIGRATE, 0, tobj,
+                                   mdd_object_fid(pobj), mdd_object_fid(obj),
+                                   mdd_object_fid(pobj), lname, lname, handle);
+       RETURN(rc);
+}
+
+int mdd_dir_layout_split(const struct lu_env *env, struct md_object *o,
+                        struct md_layout_change *mlc)
+{
+       struct mdd_thread_info *info = mdd_env_info(env);
+       struct mdd_device *mdd = mdo2mdd(o);
+       struct mdd_object *obj = md2mdd_obj(o);
+       struct mdd_object *pobj = md2mdd_obj(mlc->mlc_parent);
+       struct mdd_object *tobj = md2mdd_obj(mlc->mlc_target);
+       struct dt_allocation_hint *hint = &info->mti_hint;
+       bool is_plain = false;
+       struct thandle *handle;
+       int rc;
+
+       ENTRY;
+
+       LASSERT(S_ISDIR(mdd_object_type(obj)));
+
+       rc = mdo_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_LMV);
+       if (rc == -ENODATA)
+               is_plain = true;
+       else if (rc < 0)
+               RETURN(rc);
+
+       handle = mdd_trans_create(env, mdd);
+       if (IS_ERR(handle))
+               RETURN(PTR_ERR(handle));
+
+       if (is_plain) {
+               rc = mdd_dir_declare_split_plain(env, mdd, pobj, obj, tobj, mlc,
+                                                hint, handle);
+       } else {
+               mlc->mlc_opc = MD_LAYOUT_SPLIT;
+               rc = mdo_declare_layout_change(env, obj, mlc, handle);
+               if (rc)
+                       GOTO(stop_trans, rc);
+
+               rc = mdd_declare_changelog_store(env, mdd, CL_LAYOUT, NULL,
+                                                NULL, handle);
+       }
+       if (rc)
+               GOTO(stop_trans, rc);
+
+       rc = mdd_trans_start(env, mdd, handle);
+       if (rc)
+               GOTO(stop_trans, rc);
+
+       if (is_plain) {
+               rc = mdd_dir_split_plain(env, mdd, pobj, obj, tobj, mlc, hint,
+                                        handle);
+       } else {
+               mdd_write_lock(env, obj, DT_TGT_CHILD);
+               rc = mdo_xattr_set(env, obj, NULL, XATTR_NAME_LMV,
+                                  LU_XATTR_CREATE, handle);
+               mdd_write_unlock(env, obj);
+               if (rc)
+                       GOTO(stop_trans, rc);
+
+               rc = mdd_changelog_data_store_xattr(env, mdd, CL_LAYOUT, 0, obj,
+                                                   XATTR_NAME_LMV, handle);
+       }
+       if (rc)
+               GOTO(stop_trans, rc);
+
+       EXIT;
+
+stop_trans:
+       rc = mdd_trans_stop(env, mdd, rc, handle);
+
+       return rc;
+}
+
 const struct md_dir_operations mdd_dir_ops = {
        .mdo_is_subdir     = mdd_is_subdir,
        .mdo_lookup        = mdd_lookup,