X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fmdd%2Fmdd_dir.c;h=c0e27bf1fa2c691be0e431fcb37d259a8030106c;hb=f3d03bc38a3afdef83635d578ee0b2ffdd985685;hp=b3e439738529f172dfc1db72ae55e8f3240af038;hpb=0a1cf8da806962d663f23ed813764e4011a36ee7;p=fs%2Flustre-release.git diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index b3e4397..c0e27bf 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -124,7 +124,7 @@ int mdd_lookup(const struct lu_env *env, } /** Read the link EA into a temp buffer. - * Uses the mdd_thread_info::mti_big_buf since it is generally large. + * Uses the mdd_thread_info::mti_link_buf since it is generally large. * A pointer to the buffer is stored in \a ldata::ld_buf. * * \retval 0 or error @@ -168,9 +168,9 @@ static int __mdd_links_read(const struct lu_env *env, return linkea_init(ldata); } -static int mdd_links_read(const struct lu_env *env, - struct mdd_object *mdd_obj, - struct linkea_data *ldata) +int mdd_links_read(const struct lu_env *env, + struct mdd_object *mdd_obj, + struct linkea_data *ldata) { int rc; @@ -356,8 +356,7 @@ int mdd_is_subdir(const struct lu_env *env, struct md_object *mo, * -ve other error * */ -static int mdd_dir_is_empty(const struct lu_env *env, - struct mdd_object *dir) +int mdd_dir_is_empty(const struct lu_env *env, struct mdd_object *dir) { struct dt_it *it; struct dt_object *obj; @@ -800,7 +799,7 @@ int mdd_changelog_write_rec(const struct lu_env *env, struct llog_changelog_rec *rec; mdd = lu2mdd_dev(loghandle->lgh_ctxt->loc_obd->obd_lu_dev); - rec = container_of0(r, struct llog_changelog_rec, cr_hdr); + rec = container_of(r, struct llog_changelog_rec, cr_hdr); spin_lock(&mdd->mdd_cl.mc_lock); rec->cr.cr_index = mdd->mdd_cl.mc_index + 1; @@ -1255,7 +1254,7 @@ static inline int mdd_links_del(const struct lu_env *env, /** Read the link EA into a temp buffer. * Uses the name_buf since it is generally large. * \retval IS_ERR err - * \retval ptr to \a lu_buf (always \a mti_big_buf) + * \retval ptr to \a lu_buf (always \a mti_link_buf) */ struct lu_buf *mdd_links_get(const struct lu_env *env, struct mdd_object *mdd_obj) @@ -1938,7 +1937,8 @@ static int mdd_create_data(const struct lu_env *env, if (rc) GOTO(stop, rc); - rc = mdd_changelog_data_store(env, mdd, CL_LAYOUT, 0, son, handle); + rc = mdd_changelog_data_store(env, mdd, CL_LAYOUT, 0, son, handle, + NULL); stop: rc = mdd_trans_stop(env, mdd, rc, handle); @@ -2188,7 +2188,7 @@ static int mdd_declare_create_object(const struct lu_env *env, rc = mdo_declare_xattr_set(env, c, buf, S_ISDIR(attr->la_mode) ? XATTR_NAME_LMV : XATTR_NAME_LOV, - 0, handle); + LU_XATTR_CREATE, handle); if (rc) GOTO(out, rc); @@ -2222,6 +2222,16 @@ static int mdd_declare_create_object(const struct lu_env *env, if (rc < 0) GOTO(out, rc); } + + if (spec->sp_cr_file_encctx != NULL) { + buf = mdd_buf_get_const(env, spec->sp_cr_file_encctx, + spec->sp_cr_file_encctx_size); + rc = mdo_declare_xattr_set(env, c, buf, + LL_XATTR_NAME_ENCRYPTION_CONTEXT, 0, + handle); + if (rc < 0) + GOTO(out, rc); + } out: return rc; } @@ -2293,6 +2303,7 @@ static int mdd_acl_init(const struct lu_env *env, struct mdd_object *pobj, struct lu_buf *acl_buf) { int rc; + ENTRY; if (S_ISLNK(la->la_mode)) { @@ -2382,7 +2393,7 @@ static int mdd_create_object(const struct lu_env *env, struct mdd_object *pobj, rc = mdo_xattr_set(env, son, buf, S_ISDIR(attr->la_mode) ? XATTR_NAME_LMV : XATTR_NAME_LOV, - 0, handle); + LU_XATTR_CREATE, handle); if (rc != 0) GOTO(err_destroy, rc); } @@ -2443,6 +2454,16 @@ static int mdd_create_object(const struct lu_env *env, struct mdd_object *pobj, GOTO(err_initlized, rc); } + if (spec->sp_cr_file_encctx != NULL) { + buf = mdd_buf_get_const(env, spec->sp_cr_file_encctx, + spec->sp_cr_file_encctx_size); + rc = mdo_xattr_set(env, son, buf, + LL_XATTR_NAME_ENCRYPTION_CONTEXT, 0, + handle); + if (rc < 0) + GOTO(err_initlized, rc); + } + err_initlized: if (unlikely(rc != 0)) { int rc2; @@ -2552,22 +2573,23 @@ int mdd_create(const struct lu_env *env, struct md_object *pobj, const struct lu_name *lname, struct md_object *child, struct md_op_spec *spec, struct md_attr *ma) { - struct mdd_thread_info *info = mdd_env_info(env); - struct lu_attr *la = &info->mti_la_for_fix; - struct mdd_object *mdd_pobj = md2mdd_obj(pobj); - struct mdd_object *son = md2mdd_obj(child); - struct mdd_device *mdd = mdo2mdd(pobj); - struct lu_attr *attr = &ma->ma_attr; - struct thandle *handle; - struct lu_attr *pattr = &info->mti_pattr; - struct lu_buf acl_buf; - struct lu_buf def_acl_buf; - struct lu_buf hsm_buf; - struct linkea_data *ldata = &info->mti_link_data; - const char *name = lname->ln_name; + struct mdd_thread_info *info = mdd_env_info(env); + struct lu_attr *la = &info->mti_la_for_fix; + struct mdd_object *mdd_pobj = md2mdd_obj(pobj); + struct mdd_object *son = md2mdd_obj(child); + struct mdd_device *mdd = mdo2mdd(pobj); + struct lu_attr *attr = &ma->ma_attr; + struct thandle *handle; + struct lu_attr *pattr = &info->mti_pattr; + struct lu_buf acl_buf; + struct lu_buf def_acl_buf; + struct lu_buf hsm_buf; + struct linkea_data *ldata = &info->mti_link_data; + const char *name = lname->ln_name; struct dt_allocation_hint *hint = &mdd_env_info(env)->mti_hint; - int rc; - int rc2; + int acl_size = LUSTRE_POSIX_ACL_MAX_SIZE_OLD; + int rc, rc2; + ENTRY; rc = mdd_la_get(env, mdd_pobj, pattr); @@ -2586,13 +2608,25 @@ int mdd_create(const struct lu_env *env, struct md_object *pobj, if (IS_ERR(handle)) GOTO(out_free, rc = PTR_ERR(handle)); - lu_buf_check_and_alloc(&info->mti_xattr_buf, - min_t(unsigned int, mdd->mdd_dt_conf.ddp_max_ea_size, - XATTR_SIZE_MAX)); - acl_buf = info->mti_xattr_buf; - def_acl_buf.lb_buf = info->mti_key; - def_acl_buf.lb_len = sizeof(info->mti_key); +use_bigger_buffer: + acl_buf = *lu_buf_check_and_alloc(&info->mti_xattr_buf, acl_size); + if (!acl_buf.lb_buf) + GOTO(out_stop, rc = -ENOMEM); + /* mti_big_buf is also used down below in mdd_changelog_ns_store(), + * but def_acl_buf is finished with it before then + */ + def_acl_buf = *lu_buf_check_and_alloc(&info->mti_big_buf, acl_size); + if (!def_acl_buf.lb_buf) + GOTO(out_stop, rc = -ENOMEM); + rc = mdd_acl_init(env, mdd_pobj, attr, &def_acl_buf, &acl_buf); + if (unlikely(rc == -ERANGE && + acl_size == LUSTRE_POSIX_ACL_MAX_SIZE_OLD)) { + /* use maximum-sized xattr buffer for too-big default ACL */ + acl_size = min_t(unsigned int, mdd->mdd_dt_conf.ddp_max_ea_size, + XATTR_SIZE_MAX); + goto use_bigger_buffer; + } if (rc < 0) GOTO(out_stop, rc); @@ -2603,11 +2637,8 @@ int mdd_create(const struct lu_env *env, struct md_object *pobj, * migrate may create 1-stripe directory, so lod_ah_init() * doesn't adjust stripe count from lmu. */ - if (lmu && lmu->lum_stripe_count == cpu_to_le32(1)) { - info->mti_lmu = *lmu; - info->mti_lmu.lum_stripe_count = 0; - spec->u.sp_ea.eadata = &info->mti_lmu; - } + if (lmu && lmu->lum_stripe_count == cpu_to_le32(1)) + lmu->lum_stripe_count = 0; } mdd_object_make_hint(env, mdd_pobj, son, attr, spec, hint); @@ -2746,49 +2777,6 @@ out_free: return rc; } -/* - * Get locks on parents in proper order - * RETURN: < 0 - error, rename_order if successful - */ -enum rename_order { - MDD_RN_SAME, - MDD_RN_SRCTGT, - MDD_RN_TGTSRC -}; - -static int mdd_rename_order(const struct lu_env *env, - struct mdd_device *mdd, - struct mdd_object *src_pobj, - const struct lu_attr *pattr, - struct mdd_object *tgt_pobj) -{ - /* order of locking, 1 - tgt-src, 0 - src-tgt*/ - int rc; - - ENTRY; - if (src_pobj == tgt_pobj) - RETURN(MDD_RN_SAME); - - /* compared the parent child relationship of src_p & tgt_p */ - if (lu_fid_eq(&mdd->mdd_root_fid, mdd_object_fid(src_pobj))) { - rc = MDD_RN_SRCTGT; - } else if (lu_fid_eq(&mdd->mdd_root_fid, mdd_object_fid(tgt_pobj))) { - rc = MDD_RN_TGTSRC; - } else { - rc = mdd_is_parent(env, mdd, src_pobj, pattr, - mdd_object_fid(tgt_pobj)); - if (rc == -EREMOTE) - rc = 0; - - if (rc == 1) - rc = MDD_RN_TGTSRC; - else if (rc == 0) - rc = MDD_RN_SRCTGT; - } - - RETURN(rc); -} - /* has not mdd_write{read}_lock on any obj yet. */ static int mdd_rename_sanity_check(const struct lu_env *env, struct mdd_object *src_pobj, @@ -2961,6 +2949,16 @@ static int mdd_declare_rename(const struct lu_env *env, return rc; } +static int mdd_migrate_object(const struct lu_env *env, + struct mdd_object *spobj, + struct mdd_object *tpobj, + struct mdd_object *sobj, + struct mdd_object *tobj, + const struct lu_name *sname, + const struct lu_name *tname, + struct md_op_spec *spec, + struct md_attr *ma); + /* src object can be remote that is why we use only fid and type of object */ static int mdd_rename(const struct lu_env *env, struct md_object *src_pobj, struct md_object *tgt_pobj, @@ -3005,6 +3003,31 @@ static int mdd_rename(const struct lu_env *env, if (rc) GOTO(out_pending, rc); + /* if rename is cross MDTs, migrate symlink if it doesn't have other + * hard links, and target doesn't exist. + */ + if (mdd_object_remote(mdd_sobj) && S_ISLNK(cattr->la_mode) && + cattr->la_nlink == 1 && !tobj) { + struct md_op_spec *spec = &mdd_env_info(env)->mti_spec; + struct lu_device *ld = &mdd->mdd_md_dev.md_lu_dev; + struct lu_fid tfid; + + rc = ld->ld_ops->ldo_fid_alloc(env, ld, &tfid, &tgt_pobj->mo_lu, + NULL); + if (rc < 0) + GOTO(out_pending, rc); + + mdd_tobj = mdd_object_find(env, mdd, &tfid); + if (IS_ERR(mdd_tobj)) + GOTO(out_pending, rc = PTR_ERR(mdd_tobj)); + + memset(spec, 0, sizeof(*spec)); + rc = mdd_migrate_object(env, mdd_spobj, mdd_tpobj, mdd_sobj, + mdd_tobj, lsname, ltname, spec, ma); + mdd_object_put(env, mdd_tobj); + GOTO(out_pending, rc); + } + rc = mdd_la_get(env, mdd_spobj, pattr); if (rc) GOTO(out_pending, rc); @@ -3033,11 +3056,6 @@ static int mdd_rename(const struct lu_env *env, if (rc < 0) GOTO(out_pending, rc); - /* FIXME: Should consider tobj and sobj too in rename_lock. */ - rc = mdd_rename_order(env, mdd, mdd_spobj, pattr, mdd_tpobj); - if (rc < 0) - GOTO(out_pending, rc); - handle = mdd_trans_create(env, mdd); if (IS_ERR(handle)) GOTO(out_pending, rc = PTR_ERR(handle)); @@ -3310,232 +3328,6 @@ static int mdd_migrate_sanity_check(const struct lu_env *env, RETURN(rc); } -typedef int (*mdd_dir_stripe_cb)(const struct lu_env *env, - struct mdd_object *obj, - struct mdd_object *stripe, - const struct lu_buf *lmv_buf, - const struct lu_buf *lmu_buf, - int index, - struct thandle *handle); - -static int mdd_dir_declare_delete_stripe(const struct lu_env *env, - struct mdd_object *obj, - struct mdd_object *stripe, - const struct lu_buf *lmv_buf, - const struct lu_buf *lmu_buf, - int index, - struct thandle *handle) -{ - struct mdd_thread_info *info = mdd_env_info(env); - char *stripe_name = info->mti_name; - struct lmv_user_md *lmu = lmu_buf->lb_buf; - int rc; - - if (index < le32_to_cpu(lmu->lum_stripe_count)) - return 0; - - rc = mdo_declare_index_delete(env, stripe, dotdot, handle); - if (rc) - return rc; - - snprintf(stripe_name, sizeof(info->mti_name), DFID":%d", - PFID(mdd_object_fid(stripe)), index); - - rc = mdo_declare_index_delete(env, obj, stripe_name, handle); - if (rc) - return rc; - - rc = mdo_declare_ref_del(env, obj, handle); - - return rc; -} - -/* delete stripe from its master object namespace */ -static int mdd_dir_delete_stripe(const struct lu_env *env, - struct mdd_object *obj, - struct mdd_object *stripe, - const struct lu_buf *lmv_buf, - const struct lu_buf *lmu_buf, - int index, - struct thandle *handle) -{ - struct mdd_thread_info *info = mdd_env_info(env); - char *stripe_name = info->mti_name; - struct lmv_mds_md_v1 *lmv = lmv_buf->lb_buf; - struct lmv_user_md *lmu = lmu_buf->lb_buf; - __u32 del_offset = le32_to_cpu(lmu->lum_stripe_count); - int rc; - - ENTRY; - - /* local dir will delete via LOD */ - LASSERT(mdd_object_remote(obj)); - LASSERT(del_offset < le32_to_cpu(lmv->lmv_stripe_count)); - - if (index < del_offset) - RETURN(0); - - mdd_write_lock(env, stripe, DT_SRC_CHILD); - rc = __mdd_index_delete_only(env, stripe, dotdot, handle); - if (rc) - GOTO(out, rc); - - snprintf(stripe_name, sizeof(info->mti_name), DFID":%d", - PFID(mdd_object_fid(stripe)), index); - - rc = __mdd_index_delete_only(env, obj, stripe_name, handle); - if (rc) - GOTO(out, rc); - - rc = mdo_ref_del(env, obj, handle); - GOTO(out, rc); -out: - mdd_write_unlock(env, stripe); - - return rc; -} - -static int mdd_dir_declare_destroy_stripe(const struct lu_env *env, - struct mdd_object *obj, - struct mdd_object *stripe, - const struct lu_buf *lmv_buf, - const struct lu_buf *lmu_buf, - int index, - struct thandle *handle) -{ - struct lmv_user_md *lmu = lmu_buf->lb_buf; - __u32 shrink_offset = le32_to_cpu(lmu->lum_stripe_count); - int rc; - - if (index < shrink_offset) { - if (shrink_offset < 2) - return 0; - return mdo_declare_xattr_set(env, stripe, lmv_buf, - XATTR_NAME_LMV".set", 0, handle); - } - - rc = mdo_declare_ref_del(env, stripe, handle); - if (rc) - return rc; - - rc = mdo_declare_destroy(env, stripe, handle); - - return rc; -} - -static int mdd_dir_destroy_stripe(const struct lu_env *env, - struct mdd_object *obj, - struct mdd_object *stripe, - const struct lu_buf *lmv_buf, - const struct lu_buf *lmu_buf, - int index, - struct thandle *handle) -{ - struct mdd_thread_info *info = mdd_env_info(env); - struct lmv_mds_md_v1 *lmv = lmv_buf->lb_buf; - struct lmv_user_md *lmu = lmu_buf->lb_buf; - __u32 shrink_offset = le32_to_cpu(lmu->lum_stripe_count); - int rc; - - ENTRY; - - /* update remaining stripes' LMV */ - if (index < shrink_offset) { - struct lmv_mds_md_v1 *slave_lmv; - struct lu_buf slave_buf = { - .lb_buf = &info->mti_lmv.lmv_md_v1, - .lb_len = sizeof(*slave_lmv) - }; - __u32 version = le32_to_cpu(lmv->lmv_layout_version); - - /* if dir will be shrunk to 1-stripe, don't update */ - if (shrink_offset < 2) - RETURN(0); - - slave_lmv = slave_buf.lb_buf; - memset(slave_lmv, 0, sizeof(*slave_lmv)); - slave_lmv->lmv_magic = cpu_to_le32(LMV_MAGIC_STRIPE); - slave_lmv->lmv_stripe_count = lmu->lum_stripe_count; - slave_lmv->lmv_master_mdt_index = cpu_to_le32(index); - slave_lmv->lmv_hash_type = lmv->lmv_hash_type & - cpu_to_le32(LMV_HASH_TYPE_MASK); - slave_lmv->lmv_layout_version = cpu_to_le32(++version); - - rc = mdo_xattr_set(env, stripe, &slave_buf, - XATTR_NAME_LMV".set", 0, handle); - RETURN(rc); - } - - mdd_write_lock(env, stripe, DT_SRC_CHILD); - rc = mdo_ref_del(env, stripe, handle); - if (!rc) - rc = mdo_destroy(env, stripe, handle); - mdd_write_unlock(env, stripe); - - RETURN(rc); -} - -static int mdd_shrink_stripe_is_empty(const struct lu_env *env, - struct mdd_object *obj, - struct mdd_object *stripe, - const struct lu_buf *lmv_buf, - const struct lu_buf *lmu_buf, - int index, - struct thandle *handle) -{ - struct lmv_user_md *lmu = lmu_buf->lb_buf; - __u32 shrink_offset = le32_to_cpu(lmu->lum_stripe_count); - - /* the default value is 0, but it means 1 */ - if (!shrink_offset) - shrink_offset = 1; - - if (index < shrink_offset) - return 0; - - return mdd_dir_is_empty(env, stripe); -} - -/* - * iterate stripes of striped directory on remote MDT, local striped directory - * is accessed via LOD. - */ -static int mdd_dir_iterate_stripes(const struct lu_env *env, - struct mdd_object *obj, - const struct lu_buf *lmv_buf, - const struct lu_buf *lmu_buf, - struct thandle *handle, - mdd_dir_stripe_cb cb) -{ - struct mdd_device *mdd = mdo2mdd(&obj->mod_obj); - struct lu_fid *fid = &mdd_env_info(env)->mti_fid2; - struct lmv_mds_md_v1 *lmv = lmv_buf->lb_buf; - struct mdd_object *stripe; - int i; - int rc; - - ENTRY; - - LASSERT(lmv); - - for (i = 0; i < le32_to_cpu(lmv->lmv_stripe_count); i++) { - fid_le_to_cpu(fid, &lmv->lmv_stripe_fids[i]); - if (!fid_is_sane(fid)) - continue; - - stripe = mdd_object_find(env, mdd, fid); - if (IS_ERR(stripe)) - RETURN(PTR_ERR(stripe)); - - rc = cb(env, obj, stripe, lmv_buf, lmu_buf, i, handle); - mdd_object_put(env, stripe); - if (rc) - RETURN(rc); - } - - RETURN(0); -} - typedef int (*mdd_xattr_cb)(const struct lu_env *env, struct mdd_object *obj, const struct lu_buf *buf, @@ -3847,14 +3639,15 @@ static int mdd_iterate_linkea(const struct lu_env *env, * \retval 1 don't migrate * \retval -errno on failure */ -static int migrate_linkea_prepare(const struct lu_env *env, - struct mdd_device *mdd, - struct mdd_object *spobj, - struct mdd_object *tpobj, - struct mdd_object *sobj, - const struct lu_name *lname, - const struct lu_attr *attr, - struct linkea_data *ldata) +static int mdd_migrate_linkea_prepare(const struct lu_env *env, + struct mdd_device *mdd, + struct mdd_object *spobj, + struct mdd_object *tpobj, + struct mdd_object *sobj, + const struct lu_name *sname, + const struct lu_name *tname, + const struct lu_attr *attr, + struct linkea_data *ldata) { __u32 source_mdt_index; int rc; @@ -3862,8 +3655,8 @@ static int migrate_linkea_prepare(const struct lu_env *env, ENTRY; memset(ldata, 0, sizeof(*ldata)); - rc = mdd_linkea_prepare(env, sobj, mdd_object_fid(spobj), lname, - mdd_object_fid(tpobj), lname, 1, 0, ldata); + rc = mdd_linkea_prepare(env, sobj, mdd_object_fid(spobj), sname, + mdd_object_fid(tpobj), tname, 1, 0, ldata); if (rc) RETURN(rc); @@ -3889,110 +3682,128 @@ static int migrate_linkea_prepare(const struct lu_env *env, if (rc) RETURN(rc); - rc = mdd_iterate_linkea(env, sobj, NULL, lname, mdd_object_fid(tpobj), + rc = mdd_iterate_linkea(env, sobj, NULL, tname, mdd_object_fid(tpobj), ldata, &source_mdt_index, NULL, mdd_is_link_on_source_mdt); RETURN(rc); } -static int mdd_dir_declare_layout_delete(const struct lu_env *env, - struct mdd_object *obj, - const struct lu_buf *lmv_buf, - const struct lu_buf *lmu_buf, - struct thandle *handle) +static int mdd_declare_migrate_update(const struct lu_env *env, + struct mdd_object *spobj, + struct mdd_object *tpobj, + struct mdd_object *obj, + const struct lu_name *sname, + const struct lu_name *tname, + struct lu_attr *attr, + struct lu_attr *spattr, + struct lu_attr *tpattr, + struct linkea_data *ldata, + struct md_attr *ma, + struct thandle *handle) { + struct mdd_thread_info *info = mdd_env_info(env); + struct lu_attr *la = &info->mti_la_for_fix; int rc; - if (!lmv_buf->lb_buf) - rc = mdo_declare_index_delete(env, obj, dotdot, handle); - else if (mdd_object_remote(obj)) - rc = mdd_dir_iterate_stripes(env, obj, lmv_buf, lmu_buf, handle, - mdd_dir_declare_delete_stripe); - else - rc = mdo_declare_xattr_set(env, obj, lmu_buf, - XATTR_NAME_LMV".del", 0, handle); + rc = mdo_declare_index_delete(env, spobj, sname->ln_name, handle); + if (rc) + return rc; - return rc; -} + if (S_ISDIR(attr->la_mode)) { + rc = mdo_declare_ref_del(env, spobj, handle); + if (rc) + return rc; + } -static int mdd_dir_layout_delete(const struct lu_env *env, - struct mdd_object *obj, - const struct lu_buf *lmv_buf, - const struct lu_buf *lmu_buf, - struct thandle *handle) -{ - int rc; + rc = mdo_declare_index_insert(env, tpobj, mdd_object_fid(obj), + attr->la_mode & S_IFMT, + tname->ln_name, handle); + if (rc) + return rc; - ENTRY; + rc = mdd_declare_links_add(env, obj, handle, ldata); + if (rc) + return rc; - mdd_write_lock(env, obj, DT_SRC_PARENT); - if (!lmv_buf->lb_buf) - /* normal dir */ - rc = __mdd_index_delete_only(env, obj, dotdot, handle); - else if (mdd_object_remote(obj)) - /* striped, but remote */ - rc = mdd_dir_iterate_stripes(env, obj, lmv_buf, lmu_buf, handle, - mdd_dir_delete_stripe); - else - rc = mdo_xattr_set(env, obj, lmu_buf, XATTR_NAME_LMV".del", 0, - handle); - mdd_write_unlock(env, obj); + if (S_ISDIR(attr->la_mode)) { + rc = mdo_declare_ref_add(env, tpobj, handle); + if (rc) + return rc; + } - RETURN(rc); + la->la_valid = LA_CTIME | LA_MTIME; + rc = mdo_declare_attr_set(env, spobj, la, handle); + if (rc) + return rc; + + if (tpobj != spobj) { + rc = mdo_declare_attr_set(env, tpobj, la, handle); + if (rc) + return rc; + } + + return rc; } static int mdd_declare_migrate_create(const struct lu_env *env, + struct mdd_object *spobj, struct mdd_object *tpobj, struct mdd_object *sobj, struct mdd_object *tobj, - const struct lu_name *lname, + const struct lu_name *sname, + const struct lu_name *tname, + struct lu_attr *spattr, + struct lu_attr *tpattr, struct lu_attr *attr, struct lu_buf *sbuf, struct linkea_data *ldata, + struct md_attr *ma, struct md_op_spec *spec, struct dt_allocation_hint *hint, struct thandle *handle) { struct mdd_thread_info *info = mdd_env_info(env); + struct md_layout_change *mlc = &info->mti_mlc; struct lmv_mds_md_v1 *lmv = sbuf->lb_buf; int rc; - if (S_ISDIR(attr->la_mode)) { - struct lu_buf lmu_buf = { NULL }; - - if (lmv) { - struct lmv_user_md *lmu = &info->mti_lmv.lmv_user_md; + ENTRY; - lmu->lum_stripe_count = 0; - lmu_buf.lb_buf = lmu; - lmu_buf.lb_len = sizeof(*lmu); - } + if (S_ISDIR(attr->la_mode)) { + struct lmv_user_md *lum = spec->u.sp_ea.eadata; - rc = mdd_dir_declare_layout_delete(env, sobj, sbuf, &lmu_buf, - handle); + mlc->mlc_opc = MD_LAYOUT_DETACH; + rc = mdo_declare_layout_change(env, sobj, mlc, handle); if (rc) return rc; - if (lmv) { - rc = mdo_declare_xattr_del(env, sobj, XATTR_NAME_LMV, - handle); - if (rc) - return rc; - } + lum->lum_hash_type |= cpu_to_le32(LMV_HASH_FLAG_MIGRATION); + } else if (S_ISLNK(attr->la_mode)) { + spec->u.sp_symname = sbuf->lb_buf; + } else if (S_ISREG(attr->la_mode)) { + spec->sp_cr_flags |= MDS_OPEN_DELAY_CREATE; + spec->sp_cr_flags &= ~MDS_OPEN_HAS_EA; } + mdd_object_make_hint(env, tpobj, tobj, attr, spec, hint); + rc = mdd_declare_create(env, mdo2mdd(&tpobj->mod_obj), tpobj, tobj, - lname, attr, handle, spec, ldata, NULL, NULL, + tname, attr, handle, spec, ldata, NULL, NULL, NULL, hint); if (rc) return rc; - if (S_ISDIR(attr->la_mode) && mdd_dir_is_empty(env, sobj) != 0) { + /* + * tobj mode will be used in mdo_declare_layout_change(), but it's not + * createb yet, copy from sobj. + */ + tobj->mod_obj.mo_lu.lo_header->loh_attr &= ~S_IFMT; + tobj->mod_obj.mo_lu.lo_header->loh_attr |= + sobj->mod_obj.mo_lu.lo_header->loh_attr & S_IFMT; + + if (S_ISDIR(attr->la_mode)) { if (!lmv) { - /* - * if sobj is not striped, fake a 1-stripe LMV, which - * will be used to generate a compound LMV for tobj. - */ + /* if sobj is not striped, fake a 1-stripe LMV */ LASSERT(sizeof(info->mti_key) > lmv_mds_md_size(1, LMV_MAGIC_V1)); lmv = (typeof(lmv))info->mti_key; @@ -4002,31 +3813,17 @@ static int mdd_declare_migrate_create(const struct lu_env *env, lmv->lmv_hash_type = cpu_to_le32(LMV_HASH_TYPE_DEFAULT); fid_le_to_cpu(&lmv->lmv_stripe_fids[0], mdd_object_fid(sobj)); - sbuf->lb_buf = lmv; - sbuf->lb_len = lmv_mds_md_size(1, LMV_MAGIC_V1); - - rc = mdo_declare_xattr_set(env, tobj, sbuf, - XATTR_NAME_LMV".add", 0, - handle); - sbuf->lb_buf = NULL; - sbuf->lb_len = 0; + mlc->mlc_buf.lb_buf = lmv; + mlc->mlc_buf.lb_len = lmv_mds_md_size(1, LMV_MAGIC_V1); } else { - rc = mdo_declare_xattr_set(env, tobj, sbuf, - XATTR_NAME_LMV".add", 0, - handle); + mlc->mlc_buf = *sbuf; } + mlc->mlc_opc = MD_LAYOUT_ATTACH; + rc = mdo_declare_layout_change(env, tobj, mlc, handle); if (rc) return rc; } - /* - * tobj mode will be used in lod_declare_xattr_set(), but it's not - * createb yet, copy from sobj. - */ - tobj->mod_obj.mo_lu.lo_header->loh_attr &= ~S_IFMT; - tobj->mod_obj.mo_lu.lo_header->loh_attr |= - sobj->mod_obj.mo_lu.lo_header->loh_attr & S_IFMT; - rc = mdd_iterate_xattrs(env, sobj, tobj, true, handle, mdo_declare_xattr_set); if (rc) @@ -4051,38 +3848,112 @@ static int mdd_declare_migrate_create(const struct lu_env *env, } if (!S_ISDIR(attr->la_mode)) { - rc = mdd_iterate_linkea(env, sobj, tobj, lname, + rc = mdd_iterate_linkea(env, sobj, tobj, tname, mdd_object_fid(tpobj), ldata, NULL, handle, mdd_declare_update_link); if (rc) return rc; + } - if (lmv) { - rc = mdo_declare_xattr_del(env, sobj, XATTR_NAME_LMV, - handle); + if (!S_ISDIR(attr->la_mode) || lmv) { + rc = mdo_declare_ref_del(env, sobj, handle); + if (rc) + return rc; + + if (S_ISDIR(attr->la_mode)) { + rc = mdo_declare_ref_del(env, sobj, handle); if (rc) return rc; } + + rc = mdo_declare_destroy(env, sobj, handle); + if (rc) + return rc; } + rc = mdd_declare_migrate_update(env, spobj, tpobj, tobj, sname, tname, + attr, spattr, tpattr, ldata, ma, + handle); return rc; } /** - * Create target, migrate xattrs and update links. + * migrate dirent from \a spobj to \a tpobj. + **/ +static int mdd_migrate_update(const struct lu_env *env, + struct mdd_object *spobj, + struct mdd_object *tpobj, + struct mdd_object *obj, + const struct lu_name *sname, + const struct lu_name *tname, + struct lu_attr *attr, + struct lu_attr *spattr, + struct lu_attr *tpattr, + struct linkea_data *ldata, + struct md_attr *ma, + struct thandle *handle) +{ + struct mdd_thread_info *info = mdd_env_info(env); + struct lu_attr *la = &info->mti_la_for_fix; + int rc; + + ENTRY; + + CDEBUG(D_INFO, "update "DFID" from "DFID"/%s to "DFID"/%s\n", + PFID(mdd_object_fid(obj)), PFID(mdd_object_fid(spobj)), + sname->ln_name, PFID(mdd_object_fid(tpobj)), tname->ln_name); + + rc = __mdd_index_delete(env, spobj, sname->ln_name, + S_ISDIR(attr->la_mode), handle); + if (rc) + RETURN(rc); + + rc = __mdd_index_insert(env, tpobj, mdd_object_fid(obj), + attr->la_mode & S_IFMT, + tname->ln_name, handle); + if (rc) + RETURN(rc); + + rc = mdd_links_write(env, obj, ldata, handle); + if (rc) + RETURN(rc); + + la->la_ctime = la->la_mtime = ma->ma_attr.la_ctime; + la->la_valid = LA_CTIME | LA_MTIME; + mdd_write_lock(env, spobj, DT_SRC_PARENT); + rc = mdd_update_time(env, spobj, spattr, la, handle); + mdd_write_unlock(env, spobj); + if (rc) + RETURN(rc); + + if (tpobj != spobj) { + la->la_valid = LA_CTIME | LA_MTIME; + mdd_write_lock(env, tpobj, DT_TGT_PARENT); + rc = mdd_update_time(env, tpobj, tpattr, la, handle); + mdd_write_unlock(env, tpobj); + if (rc) + RETURN(rc); + } + + RETURN(rc); +} + +/** + * Migrate file/dir to target MDT. * * Create target according to \a spec, and then migrate xattrs, if it's - * directory, migrate source stripes to target, else update fid to target - * for links. + * directory, migrate source stripes to target. * * \param[in] env execution environment + * \param[in] spobj source parent object * \param[in] tpobj target parent object * \param[in] sobj source object * \param[in] tobj target object * \param[in] lname file name + * \param[in] spattr source parent attributes + * \param[in] tpattr target parent attributes * \param[in] attr source attributes * \param[in] sbuf source LMV buf - * \param[in] ldata source linkea * \param[in] spec migrate create spec * \param[in] hint target creation hint * \param[in] handle tranasction handle @@ -4091,13 +3962,18 @@ static int mdd_declare_migrate_create(const struct lu_env *env, * \retval -errno on failure **/ static int mdd_migrate_create(const struct lu_env *env, + struct mdd_object *spobj, struct mdd_object *tpobj, struct mdd_object *sobj, struct mdd_object *tobj, - const struct lu_name *lname, + const struct lu_name *sname, + const struct lu_name *tname, + struct lu_attr *spattr, + struct lu_attr *tpattr, struct lu_attr *attr, const struct lu_buf *sbuf, struct linkea_data *ldata, + struct md_attr *ma, struct md_op_spec *spec, struct dt_allocation_hint *hint, struct thandle *handle) @@ -4107,38 +3983,22 @@ static int mdd_migrate_create(const struct lu_env *env, ENTRY; /* - * directory will migrate sobj stripes to tobj: - * 1. delete stripes from sobj. - * 2. add stripes to tobj, see lod_dir_declare_layout_add(). - * 3. create/attach stripes for tobj, see lod_xattr_set_lmv(). + * migrate sobj stripes to tobj if it's directory: + * 1. detach stripes from sobj. + * 2. attach stripes to tobj, see mdd_declare_migrate_mdt(). + * 3. create stripes for tobj, see lod_xattr_set_lmv(). */ if (S_ISDIR(attr->la_mode)) { - struct lu_buf lmu_buf = { NULL }; - - if (sbuf->lb_buf) { - struct mdd_thread_info *info = mdd_env_info(env); - struct lmv_user_md *lmu = &info->mti_lmv.lmv_user_md; + struct mdd_thread_info *info = mdd_env_info(env); + struct md_layout_change *mlc = &info->mti_mlc; - lmu->lum_stripe_count = 0; - lmu_buf.lb_buf = lmu; - lmu_buf.lb_len = sizeof(*lmu); - } + mlc->mlc_opc = MD_LAYOUT_DETACH; - rc = mdd_dir_layout_delete(env, sobj, sbuf, &lmu_buf, handle); + mdd_write_lock(env, sobj, DT_SRC_PARENT); + rc = mdo_layout_change(env, sobj, mlc, handle); + mdd_write_unlock(env, sobj); if (rc) RETURN(rc); - - /* - * delete LMV so that later when destroying sobj it won't delete - * stripes again. - */ - if (sbuf->lb_buf) { - mdd_write_lock(env, sobj, DT_SRC_CHILD); - rc = mdo_xattr_del(env, sobj, XATTR_NAME_LMV, handle); - mdd_write_unlock(env, sobj); - if (rc) - RETURN(rc); - } } /* don't set nlink from sobj */ @@ -4171,177 +4031,86 @@ static int mdd_migrate_create(const struct lu_env *env, mdd_write_lock(env, sobj, DT_SRC_CHILD); rc = mdo_xattr_del(env, sobj, XATTR_NAME_LOV, handle); mdd_write_unlock(env, sobj); - if (rc) + /* O_DELAY_CREATE file may not have LOV, ignore -ENODATA */ + if (rc && rc != -ENODATA) RETURN(rc); + rc = 0; } - if (!S_ISDIR(attr->la_mode)) - rc = mdd_iterate_linkea(env, sobj, tobj, lname, + /* update links FID */ + if (!S_ISDIR(attr->la_mode)) { + rc = mdd_iterate_linkea(env, sobj, tobj, tname, mdd_object_fid(tpobj), ldata, NULL, handle, mdd_update_link); - - RETURN(rc); -} - -static int mdd_declare_migrate_update(const struct lu_env *env, - struct mdd_object *spobj, - struct mdd_object *tpobj, - struct mdd_object *sobj, - struct mdd_object *tobj, - const struct lu_name *lname, - struct lu_attr *attr, - struct lu_attr *spattr, - struct lu_attr *tpattr, - struct linkea_data *ldata, - bool do_create, - bool do_destroy, - struct md_attr *ma, - struct thandle *handle) -{ - struct mdd_thread_info *info = mdd_env_info(env); - const struct lu_fid *fid = mdd_object_fid(do_create ? tobj : sobj); - struct lu_attr *la = &info->mti_la_for_fix; - int rc; - - rc = mdo_declare_index_delete(env, spobj, lname->ln_name, handle); - if (rc) - return rc; - - if (S_ISDIR(attr->la_mode)) { - rc = mdo_declare_ref_del(env, spobj, handle); if (rc) - return rc; + RETURN(rc); } - rc = mdo_declare_index_insert(env, tpobj, fid, mdd_object_type(sobj), - lname->ln_name, handle); - if (rc) - return rc; - - rc = mdd_declare_links_add(env, do_create ? tobj : sobj, handle, ldata); - if (rc) - return rc; - - if (S_ISDIR(attr->la_mode)) { - rc = mdo_declare_ref_add(env, tpobj, handle); + /* don't destroy sobj if it's plain directory */ + if (!S_ISDIR(attr->la_mode) || sbuf->lb_buf) { + mdd_write_lock(env, sobj, DT_SRC_CHILD); + rc = mdo_ref_del(env, sobj, handle); + if (!rc) { + if (S_ISDIR(attr->la_mode)) + rc = mdo_ref_del(env, sobj, handle); + if (!rc) + rc = mdo_destroy(env, sobj, handle); + } + mdd_write_unlock(env, sobj); if (rc) - return rc; + RETURN(rc); } - la->la_valid = LA_CTIME | LA_MTIME; - rc = mdo_declare_attr_set(env, spobj, la, handle); - if (rc) - return rc; + rc = mdd_migrate_update(env, spobj, tpobj, tobj, sname, tname, attr, + spattr, tpattr, ldata, ma, handle); - if (tpobj != spobj) { - rc = mdo_declare_attr_set(env, tpobj, la, handle); - if (rc) - return rc; - } - - if (do_create && do_destroy) { - rc = mdo_declare_ref_del(env, sobj, handle); - if (rc) - return rc; - - rc = mdo_declare_destroy(env, sobj, handle); - if (rc) - return rc; - } - - return rc; + RETURN(rc); } -/** - * migrate dirent from \a spobj to \a tpobj, and destroy \a sobj - **/ -static int mdd_migrate_update(const struct lu_env *env, - struct mdd_object *spobj, - struct mdd_object *tpobj, - struct mdd_object *sobj, - struct mdd_object *tobj, - const struct lu_name *lname, - struct lu_attr *attr, - struct lu_attr *spattr, - struct lu_attr *tpattr, - struct linkea_data *ldata, - bool do_create, - bool do_destroy, - struct md_attr *ma, - struct thandle *handle) +/* NB: if user issued different migrate command, we can't adjust it silently + * here, because this command will decide target MDT in subdir migration in + * LMV. + */ +static int mdd_migrate_cmd_check(struct mdd_device *mdd, + const struct lmv_mds_md_v1 *lmv, + const struct lmv_user_md_v1 *lum, + const struct lu_name *lname) { - struct mdd_thread_info *info = mdd_env_info(env); - const struct lu_fid *fid = mdd_object_fid(do_create ? tobj : sobj); - struct lu_attr *la = &info->mti_la_for_fix; - int rc; - - ENTRY; - - CDEBUG(D_INFO, "update %s "DFID"/"DFID" to "DFID"/"DFID"\n", - lname->ln_name, PFID(mdd_object_fid(spobj)), - PFID(mdd_object_fid(sobj)), PFID(mdd_object_fid(tpobj)), - PFID(fid)); - - rc = __mdd_index_delete(env, spobj, lname->ln_name, - S_ISDIR(attr->la_mode), handle); - if (rc) - RETURN(rc); - - rc = __mdd_index_insert(env, tpobj, fid, mdd_object_type(sobj), - lname->ln_name, handle); - if (rc) - RETURN(rc); - - rc = mdd_links_write(env, do_create ? tobj : sobj, ldata, handle); - if (rc) - RETURN(rc); - - la->la_ctime = la->la_mtime = ma->ma_attr.la_ctime; - la->la_valid = LA_CTIME | LA_MTIME; - mdd_write_lock(env, spobj, DT_SRC_PARENT); - rc = mdd_update_time(env, spobj, spattr, la, handle); - mdd_write_unlock(env, spobj); - if (rc) - RETURN(rc); - - if (tpobj != spobj) { - la->la_valid = LA_CTIME | LA_MTIME; - mdd_write_lock(env, tpobj, DT_TGT_PARENT); - rc = mdd_update_time(env, tpobj, tpattr, la, handle); - mdd_write_unlock(env, tpobj); - if (rc) - RETURN(rc); - } - - /* - * there are three situations we shouldn't destroy source: - * 1. if source is not dir, and it happens to be located on the same MDT - * as target parent. - * 2. if source is not dir, and has link on the same MDT where source is - * located. - * 3. if source is dir, and it's a normal, non-empty dir. - * - * the first two situations equals to !do_create, and the 3rd equals to - * !do_destroy, so the below condition is actually - * !(!do_create || !do_destroy). - * - * NB, if user has opened source dir before migration, he will get - * -ENOENT error when close it later, because source is likely to be - * remote, which can't be moved to orphan list, but except this error - * message, this won't cause any inconsistency or trouble. - */ - if (do_create && do_destroy) { - mdd_write_lock(env, sobj, DT_SRC_CHILD); - mdo_ref_del(env, sobj, handle); - rc = mdo_destroy(env, sobj, handle); - mdd_write_unlock(env, sobj); + __u32 lum_stripe_count = lum->lum_stripe_count; + __u32 lmv_hash_type = lmv->lmv_hash_type; + char *mdt_hash_name[] = { "none", + LMV_HASH_NAME_ALL_CHARS, + LMV_HASH_NAME_FNV_1A_64, + LMV_HASH_NAME_CRUSH, + }; + + if (!lmv_is_sane(lmv)) + return -EBADF; + + /* if stripe_count unspecified, set to 1 */ + if (!lum_stripe_count) + lum_stripe_count = cpu_to_le32(1); + + lmv_hash_type &= ~cpu_to_le32(LMV_HASH_FLAG_MIGRATION); + + /* TODO: check specific MDTs */ + if (lum_stripe_count != lmv->lmv_migrate_offset || + lum->lum_stripe_offset != lmv->lmv_master_mdt_index || + (lum->lum_hash_type && lum->lum_hash_type != lmv_hash_type)) { + CERROR("%s: '"DNAME"' migration was interrupted, run 'lfs migrate -m %d -c %d -H %s "DNAME"' to finish migration.\n", + mdd2obd_dev(mdd)->obd_name, PNAME(lname), + le32_to_cpu(lmv->lmv_master_mdt_index), + le32_to_cpu(lmv->lmv_migrate_offset), + mdt_hash_name[le32_to_cpu(lmv_hash_type)], + PNAME(lname)); + return -EPERM; } - RETURN(rc); + return -EALREADY; } /** - * Migrate directory or file. + * Internal function to migrate directory or file between MDTs. * * migrate source to target in following steps: * 1. create target, append source stripes after target's if it's directory, @@ -4350,231 +4119,110 @@ static int mdd_migrate_update(const struct lu_env *env, * update file linkea, and destroy source if it's not needed any more. * * \param[in] env execution environment - * \param[in] md_pobj parent master object - * \param[in] md_sobj source object - * \param[in] lname file name - * \param[in] md_tobj target object + * \param[in] spobj source parent object + * \param[in] tpobj target parent object + * \param[in] sobj source object + * \param[in] tobj target object + * \param[in] sname source file name + * \param[in] tname target file name * \param[in] spec target creation spec * \param[in] ma used to update \a pobj mtime and ctime * * \retval 0 on success * \retval -errno on failure */ -static int mdd_migrate(const struct lu_env *env, struct md_object *md_pobj, - struct md_object *md_sobj, const struct lu_name *lname, - struct md_object *md_tobj, struct md_op_spec *spec, - struct md_attr *ma) +static int mdd_migrate_object(const struct lu_env *env, + struct mdd_object *spobj, + struct mdd_object *tpobj, + struct mdd_object *sobj, + struct mdd_object *tobj, + const struct lu_name *sname, + const struct lu_name *tname, + struct md_op_spec *spec, + struct md_attr *ma) { - struct mdd_device *mdd = mdo2mdd(md_pobj); struct mdd_thread_info *info = mdd_env_info(env); - struct mdd_object *pobj = md2mdd_obj(md_pobj); - struct mdd_object *sobj = md2mdd_obj(md_sobj); - struct mdd_object *tobj = md2mdd_obj(md_tobj); - struct mdd_object *spobj = NULL; - struct mdd_object *tpobj = NULL; + struct mdd_device *mdd = mdo2mdd(&spobj->mod_obj); struct lu_attr *spattr = &info->mti_pattr; struct lu_attr *tpattr = &info->mti_tpattr; struct lu_attr *attr = &info->mti_cattr; struct linkea_data *ldata = &info->mti_link_data; struct dt_allocation_hint *hint = &info->mti_hint; - struct lu_fid *fid = &info->mti_fid2; - struct lu_buf pbuf = { NULL }; struct lu_buf sbuf = { NULL }; - struct lmv_mds_md_v1 *plmv; + struct lmv_mds_md_v1 *lmv; struct thandle *handle; - bool do_create = true; - bool do_destroy = true; int rc; + ENTRY; rc = mdd_la_get(env, sobj, attr); if (rc) RETURN(rc); - /* locate source and target stripe on pobj, which are the real parent */ - rc = mdd_stripe_get(env, pobj, &pbuf, XATTR_NAME_LMV); - if (rc < 0 && rc != -ENODATA) - RETURN(rc); - - plmv = pbuf.lb_buf; - if (plmv) { - __u32 hash_type = le32_to_cpu(plmv->lmv_hash_type); - __u32 count = le32_to_cpu(plmv->lmv_stripe_count); - int index; - - /* locate target parent stripe */ - if (hash_type & LMV_HASH_FLAG_MIGRATION) { - /* - * fail check here to make sure top dir migration - * succeed. - */ - if (OBD_FAIL_CHECK_RESET(OBD_FAIL_MIGRATE_ENTRIES, 0)) - GOTO(out, rc = -EIO); - hash_type &= ~LMV_HASH_FLAG_MIGRATION; - count = le32_to_cpu(plmv->lmv_migrate_offset); - } - index = lmv_name_to_stripe_index(hash_type, count, - lname->ln_name, - lname->ln_namelen); - if (index < 0) - GOTO(out, rc = index); - - fid_le_to_cpu(fid, &plmv->lmv_stripe_fids[index]); - tpobj = mdd_object_find(env, mdd, fid); - if (IS_ERR(tpobj)) - GOTO(out, rc = PTR_ERR(tpobj)); - - /* locate source parent stripe */ - if (le32_to_cpu(plmv->lmv_hash_type) & - LMV_HASH_FLAG_MIGRATION) { - hash_type = le32_to_cpu(plmv->lmv_migrate_hash); - count = le32_to_cpu(plmv->lmv_stripe_count) - - le32_to_cpu(plmv->lmv_migrate_offset); - - index = lmv_name_to_stripe_index(hash_type, count, - lname->ln_name, - lname->ln_namelen); - if (index < 0) { - mdd_object_put(env, tpobj); - GOTO(out, rc = index); - } - - index += le32_to_cpu(plmv->lmv_migrate_offset); - fid_le_to_cpu(fid, &plmv->lmv_stripe_fids[index]); - spobj = mdd_object_find(env, mdd, fid); - if (IS_ERR(spobj)) { - mdd_object_put(env, tpobj); - GOTO(out, rc = PTR_ERR(spobj)); - } - } else { - spobj = tpobj; - mdd_object_get(spobj); - } - } else { - tpobj = pobj; - spobj = pobj; - mdd_object_get(tpobj); - mdd_object_get(spobj); - } - rc = mdd_la_get(env, spobj, spattr); if (rc) - GOTO(out, rc); + RETURN(rc); rc = mdd_la_get(env, tpobj, tpattr); if (rc) - GOTO(out, rc); + RETURN(rc); - if (S_ISDIR(attr->la_mode)) { - struct lmv_user_md_v1 *lmu = spec->u.sp_ea.eadata; + if (S_ISDIR(attr->la_mode) && !spec->sp_migrate_nsonly) { + struct lmv_user_md_v1 *lum = spec->u.sp_ea.eadata; - LASSERT(lmu); + LASSERT(lum); - /* - * if user use default value '0' for stripe_count, we need to + /* if user use default value '0' for stripe_count, we need to * adjust it to '1' to create a 1-stripe directory. */ - if (lmu->lum_stripe_count == 0) { - /* eadata is from request, don't alter it */ - info->mti_lmu = *lmu; - info->mti_lmu.lum_stripe_count = cpu_to_le32(1); - spec->u.sp_ea.eadata = &info->mti_lmu; - lmu = spec->u.sp_ea.eadata; - } + if (lum->lum_stripe_count == 0) + lum->lum_stripe_count = cpu_to_le32(1); rc = mdd_stripe_get(env, sobj, &sbuf, XATTR_NAME_LMV); - if (rc == -ENODATA) { - if (mdd_dir_is_empty(env, sobj) == 0) { - /* - * if sobj is empty, and target is not striped, - * create target as a normal directory. - */ - if (le32_to_cpu(lmu->lum_stripe_count) == 1) { - info->mti_lmu = *lmu; - info->mti_lmu.lum_stripe_count = 0; - spec->u.sp_ea.eadata = &info->mti_lmu; - lmu = spec->u.sp_ea.eadata; - } - } else { - /* - * sobj is not striped dir, if it's not empty, - * it will be migrated to be a stripe of target, - * don't destroy it after migration. - */ - do_destroy = false; - } - } else if (rc) { + if (rc && rc != -ENODATA) GOTO(out, rc); - } else { - struct lmv_mds_md_v1 *lmv = sbuf.lb_buf; - - if (le32_to_cpu(lmv->lmv_hash_type) & - LMV_HASH_FLAG_MIGRATION) { - __u32 lum_stripe_count = lmu->lum_stripe_count; - __u32 lmv_hash_type = lmv->lmv_hash_type & - cpu_to_le32(LMV_HASH_TYPE_MASK); - - if (!lum_stripe_count) - lum_stripe_count = cpu_to_le32(1); - - /* TODO: check specific MDTs */ - if (lmv->lmv_migrate_offset != - lum_stripe_count || - lmv->lmv_master_mdt_index != - lmu->lum_stripe_offset || - (lmu->lum_hash_type && - lmv_hash_type != lmu->lum_hash_type)) { - CERROR("%s: \'"DNAME"\' migration was " - "interrupted, run \'lfs migrate " - "-m %d -c %d -H %d "DNAME"\' to " - "finish migration.\n", - mdd2obd_dev(mdd)->obd_name, - PNAME(lname), - le32_to_cpu( - lmv->lmv_master_mdt_index), - le32_to_cpu( - lmv->lmv_migrate_offset), - le32_to_cpu(lmv_hash_type), - PNAME(lname)); - GOTO(out, rc = -EPERM); - } - GOTO(out, rc = -EALREADY); + + lmv = sbuf.lb_buf; + if (lmv) { + if (!lmv_is_sane(lmv)) + GOTO(out, rc = -EBADF); + if (lmv_is_migrating(lmv)) { + rc = mdd_migrate_cmd_check(mdd, lmv, lum, + sname); + GOTO(out, rc); } } - } else if (!mdd_object_remote(tpobj)) { - /* - * if source is already on MDT where target parent is located, - * no need to create, just update namespace. - */ - do_create = false; - } else if (S_ISLNK(attr->la_mode)) { - lu_buf_check_and_alloc(&sbuf, attr->la_size + 1); - if (!sbuf.lb_buf) - GOTO(out, rc = -ENOMEM); - rc = mdd_readlink(env, &sobj->mod_obj, &sbuf); - if (rc <= 0) { - rc = rc ?: -EFAULT; - CERROR("%s: "DFID" readlink failed: rc = %d\n", - mdd_obj_dev_name(sobj), - PFID(mdd_object_fid(sobj)), rc); - GOTO(out, rc); + } else if (!S_ISDIR(attr->la_mode)) { + if (spobj == tpobj) + GOTO(out, rc = -EALREADY); + + /* update namespace only if @sobj is on MDT where @tpobj is. */ + if (!mdd_object_remote(tpobj) && !mdd_object_remote(sobj)) + spec->sp_migrate_nsonly = true; + + if (S_ISLNK(attr->la_mode)) { + lu_buf_check_and_alloc(&sbuf, attr->la_size + 1); + if (!sbuf.lb_buf) + GOTO(out, rc = -ENOMEM); + + rc = mdd_readlink(env, &sobj->mod_obj, &sbuf); + if (rc <= 0) { + rc = rc ?: -EFAULT; + CERROR("%s: "DFID" readlink failed: rc = %d\n", + mdd2obd_dev(mdd)->obd_name, + PFID(mdd_object_fid(sobj)), rc); + GOTO(out, rc); + } } - spec->u.sp_symname = sbuf.lb_buf; - } else if (S_ISREG(attr->la_mode)) { - spec->sp_cr_flags |= MDS_OPEN_DELAY_CREATE; - spec->sp_cr_flags &= ~MDS_OPEN_HAS_EA; } - /* - * if sobj has link on the same MDT, no need to create, just update - * namespace, and it will be a remote file on target parent, which is - * similar to rename. - */ - rc = migrate_linkea_prepare(env, mdd, spobj, tpobj, sobj, lname, attr, - ldata); + /* linkea needs update upon FID or parent stripe change */ + rc = mdd_migrate_linkea_prepare(env, mdd, spobj, tpobj, sobj, sname, + tname, attr, ldata); if (rc > 0) - do_create = false; - else if (rc) + /* update namespace only if @sobj has link on its MDT. */ + spec->sp_migrate_nsonly = true; + else if (rc < 0) GOTO(out, rc); rc = mdd_migrate_sanity_check(env, mdd, spobj, tpobj, sobj, tobj, @@ -4582,114 +4230,180 @@ static int mdd_migrate(const struct lu_env *env, struct md_object *md_pobj, if (rc) GOTO(out, rc); - mdd_object_make_hint(env, tpobj, tobj, attr, spec, hint); - handle = mdd_trans_create(env, mdd); if (IS_ERR(handle)) GOTO(out, rc = PTR_ERR(handle)); - if (do_create) { - rc = mdd_declare_migrate_create(env, tpobj, sobj, tobj, lname, - attr, &sbuf, ldata, spec, hint, - handle); - if (rc) - GOTO(stop_trans, rc); - } + if (spec->sp_migrate_nsonly) + rc = mdd_declare_migrate_update(env, spobj, tpobj, sobj, sname, + tname, attr, spattr, tpattr, + ldata, ma, handle); + else + rc = mdd_declare_migrate_create(env, spobj, tpobj, sobj, tobj, + sname, tname, spattr, tpattr, + attr, &sbuf, ldata, ma, spec, + hint, handle); + if (rc) + GOTO(stop, rc); + + rc = mdd_declare_changelog_store(env, mdd, CL_MIGRATE, tname, sname, + handle); + if (rc) + GOTO(stop, rc); + + rc = mdd_trans_start(env, mdd, handle); + if (rc) + GOTO(stop, rc); + + if (spec->sp_migrate_nsonly) + rc = mdd_migrate_update(env, spobj, tpobj, sobj, sname, tname, + attr, spattr, tpattr, ldata, ma, + handle); + else + rc = mdd_migrate_create(env, spobj, tpobj, sobj, tobj, sname, + tname, spattr, tpattr, attr, &sbuf, + ldata, ma, spec, hint, handle); + if (rc) + GOTO(stop, rc); + + rc = mdd_changelog_ns_store(env, mdd, CL_MIGRATE, 0, + spec->sp_migrate_nsonly ? sobj : tobj, + mdd_object_fid(spobj), mdd_object_fid(sobj), + mdd_object_fid(tpobj), tname, sname, + handle); + if (rc) + GOTO(stop, rc); + EXIT; + +stop: + rc = mdd_trans_stop(env, mdd, rc, handle); +out: + lu_buf_free(&sbuf); + + return rc; +} + +/** + * Migrate directory or file between MDTs. + * + * \param[in] env execution environment + * \param[in] md_pobj parent master object + * \param[in] md_sobj source object + * \param[in] lname file name + * \param[in] md_tobj target object + * \param[in] spec target creation spec + * \param[in] ma used to update \a pobj mtime and ctime + * + * \retval 0 on success + * \retval -errno on failure + */ +static int mdd_migrate(const struct lu_env *env, struct md_object *md_pobj, + struct md_object *md_sobj, const struct lu_name *lname, + struct md_object *md_tobj, struct md_op_spec *spec, + struct md_attr *ma) +{ + struct mdd_thread_info *info = mdd_env_info(env); + struct mdd_device *mdd = mdo2mdd(md_pobj); + struct mdd_object *pobj = md2mdd_obj(md_pobj); + struct mdd_object *sobj = md2mdd_obj(md_sobj); + struct mdd_object *tobj = md2mdd_obj(md_tobj); + struct mdd_object *spobj = NULL; + struct mdd_object *tpobj = NULL; + struct lu_buf pbuf = { NULL }; + struct lu_fid *fid = &info->mti_fid2; + struct lmv_mds_md_v1 *lmv; + int rc; + + ENTRY; + + /* locate source and target stripe on pobj, which are the real parent */ + rc = mdd_stripe_get(env, pobj, &pbuf, XATTR_NAME_LMV); + if (rc < 0 && rc != -ENODATA) + RETURN(rc); + + lmv = pbuf.lb_buf; + if (lmv) { + int index; + + if (!lmv_is_sane(lmv)) + GOTO(out, rc = -EBADF); + + /* locate target parent stripe */ + /* fail check here to make sure top dir migration succeed. */ + if (lmv_is_migrating(lmv) && + OBD_FAIL_CHECK_RESET(OBD_FAIL_MIGRATE_ENTRIES, 0)) + GOTO(out, rc = -EIO); + + index = lmv_name_to_stripe_index(lmv, lname->ln_name, + lname->ln_namelen); + if (index < 0) + GOTO(out, rc = index); - rc = mdd_declare_migrate_update(env, spobj, tpobj, sobj, tobj, lname, - attr, spattr, tpattr, ldata, do_create, - do_destroy, ma, handle); - if (rc) - GOTO(stop_trans, rc); + fid_le_to_cpu(fid, &lmv->lmv_stripe_fids[index]); + tpobj = mdd_object_find(env, mdd, fid); + if (IS_ERR(tpobj)) + GOTO(out, rc = PTR_ERR(tpobj)); - rc = mdd_declare_changelog_store(env, mdd, CL_MIGRATE, lname, NULL, - handle); - if (rc) - GOTO(stop_trans, rc); + /* locate source parent stripe */ + if (lmv_is_layout_changing(lmv)) { + index = lmv_name_to_stripe_index_old(lmv, + lname->ln_name, + lname->ln_namelen); + if (index < 0) + GOTO(out, rc = index); - rc = mdd_trans_start(env, mdd, handle); - if (rc) - GOTO(stop_trans, rc); + fid_le_to_cpu(fid, &lmv->lmv_stripe_fids[index]); + spobj = mdd_object_find(env, mdd, fid); + if (IS_ERR(spobj)) + GOTO(out, rc = PTR_ERR(spobj)); - if (do_create) { - rc = mdd_migrate_create(env, tpobj, sobj, tobj, lname, attr, - &sbuf, ldata, spec, hint, handle); - if (rc) - GOTO(stop_trans, rc); + /* parent stripe unchanged */ + if (spobj == tpobj) { + if (!lmv_is_restriping(lmv)) + GOTO(out, rc = -EINVAL); + GOTO(out, rc = -EALREADY); + } + } else { + spobj = tpobj; + mdd_object_get(spobj); + } + } else { + tpobj = pobj; + spobj = pobj; + mdd_object_get(tpobj); + mdd_object_get(spobj); } - rc = mdd_migrate_update(env, spobj, tpobj, sobj, tobj, lname, attr, - spattr, tpattr, ldata, do_create, do_destroy, - ma, handle); - if (rc) - GOTO(stop_trans, rc); - - rc = mdd_changelog_ns_store(env, mdd, CL_MIGRATE, 0, tobj, - mdd_object_fid(spobj), mdd_object_fid(sobj), - mdd_object_fid(tpobj), lname, lname, - handle); - if (rc) - GOTO(stop_trans, rc); + rc = mdd_migrate_object(env, spobj, tpobj, sobj, tobj, lname, lname, + spec, ma); + GOTO(out, rc); - EXIT; -stop_trans: - rc = mdd_trans_stop(env, mdd, rc, handle); out: - if (spobj && !IS_ERR(spobj)) + if (!IS_ERR_OR_NULL(spobj)) mdd_object_put(env, spobj); - if (tpobj && !IS_ERR(tpobj)) + if (!IS_ERR_OR_NULL(tpobj)) mdd_object_put(env, tpobj); - lu_buf_free(&sbuf); lu_buf_free(&pbuf); + return rc; } -static int __mdd_dir_declare_layout_shrink(const struct lu_env *env, - struct mdd_object *pobj, - struct mdd_object *obj, - struct mdd_object *stripe, - struct lu_attr *attr, - struct lu_buf *lmv_buf, - const struct lu_buf *lmu_buf, - struct lu_name *lname, - struct thandle *handle) +static int mdd_declare_1sd_collapse(const struct lu_env *env, + struct mdd_object *pobj, + struct mdd_object *obj, + struct mdd_object *stripe, + struct lu_attr *attr, + struct md_layout_change *mlc, + struct lu_name *lname, + struct thandle *handle) { - struct mdd_thread_info *info = mdd_env_info(env); - struct lmv_mds_md_v1 *lmv = lmv_buf->lb_buf; - struct lmv_user_md *lmu = (typeof(lmu))info->mti_key; - struct lu_buf shrink_buf = { .lb_buf = lmu, - .lb_len = sizeof(*lmu) }; int rc; - LASSERT(lmv); - - memcpy(lmu, lmu_buf->lb_buf, sizeof(*lmu)); - - if (le32_to_cpu(lmu->lum_stripe_count) < 2) - lmu->lum_stripe_count = 0; - - rc = mdd_dir_declare_layout_delete(env, obj, lmv_buf, &shrink_buf, - handle); - if (rc) - return rc; - - if (lmu->lum_stripe_count == 0) { - lmu->lum_stripe_count = cpu_to_le32(1); - - rc = mdo_declare_xattr_del(env, obj, XATTR_NAME_LMV, handle); - if (rc) - return rc; - } - - rc = mdd_dir_iterate_stripes(env, obj, lmv_buf, &shrink_buf, handle, - mdd_dir_declare_destroy_stripe); + mlc->mlc_opc = MD_LAYOUT_DETACH; + rc = mdo_declare_layout_change(env, obj, mlc, handle); if (rc) return rc; - if (le32_to_cpu(lmu->lum_stripe_count) > 1) - return mdo_declare_xattr_set(env, obj, lmv_buf, - XATTR_NAME_LMV".set", 0, handle); - rc = mdo_declare_index_insert(env, stripe, mdd_object_fid(pobj), S_IFDIR, dotdot, handle); if (rc) @@ -4730,84 +4444,30 @@ static int __mdd_dir_declare_layout_shrink(const struct lu_env *env, return rc; return rc; - } -/* - * after files under \a obj were migrated, shrink old stripes from \a obj, - * furthermore, if it becomes a 1-stripe directory, convert it to a normal one. - */ -static int __mdd_dir_layout_shrink(const struct lu_env *env, - struct mdd_object *pobj, - struct mdd_object *obj, - struct mdd_object *stripe, - struct lu_attr *attr, - struct lu_buf *lmv_buf, - const struct lu_buf *lmu_buf, - struct lu_name *lname, - struct thandle *handle) +/* transform one-stripe directory to a plain directory */ +static int mdd_1sd_collapse(const struct lu_env *env, + struct mdd_object *pobj, + struct mdd_object *obj, + struct mdd_object *stripe, + struct lu_attr *attr, + struct md_layout_change *mlc, + struct lu_name *lname, + struct thandle *handle) { - struct mdd_thread_info *info = mdd_env_info(env); - struct lmv_mds_md_v1 *lmv = lmv_buf->lb_buf; - struct lmv_user_md *lmu = (typeof(lmu))info->mti_key; - struct lu_buf shrink_buf = { .lb_buf = lmu, - .lb_len = sizeof(*lmu) }; - int len = lmv_buf->lb_len; - __u32 version = le32_to_cpu(lmv->lmv_layout_version); int rc; ENTRY; - /* lmu needs to be altered, but lmu_buf is const */ - memcpy(lmu, lmu_buf->lb_buf, sizeof(*lmu)); - - /* - * if dir will be shrunk to 1-stripe, delete all stripes, because it - * will be converted to normal dir. - */ - if (le32_to_cpu(lmu->lum_stripe_count) == 1) - lmu->lum_stripe_count = 0; - - /* delete stripes after lmu_stripe_count */ - rc = mdd_dir_layout_delete(env, obj, lmv_buf, &shrink_buf, handle); - if (rc) - RETURN(rc); - - if (lmu->lum_stripe_count == 0) { - lmu->lum_stripe_count = cpu_to_le32(1); - - /* delete LMV to avoid deleting stripes again upon destroy */ - mdd_write_lock(env, obj, DT_SRC_CHILD); - rc = mdo_xattr_del(env, obj, XATTR_NAME_LMV, handle); - mdd_write_unlock(env, obj); - if (rc) - RETURN(rc); - } + /* replace 1-stripe directory with its stripe */ + mlc->mlc_opc = MD_LAYOUT_DETACH; - /* destroy stripes after lmu_stripe_count */ mdd_write_lock(env, obj, DT_SRC_PARENT); - rc = mdd_dir_iterate_stripes(env, obj, lmv_buf, &shrink_buf, handle, - mdd_dir_destroy_stripe); + rc = mdo_layout_change(env, obj, mlc, handle); mdd_write_unlock(env, obj); - - if (le32_to_cpu(lmu->lum_stripe_count) > 1) { - /* update dir LMV, that's all if it's still striped. */ - lmv->lmv_stripe_count = lmu->lum_stripe_count; - lmv->lmv_hash_type &= ~cpu_to_le32(LMV_HASH_FLAG_MIGRATION); - lmv->lmv_migrate_offset = 0; - lmv->lmv_migrate_hash = 0; - lmv->lmv_layout_version = cpu_to_le32(++version); - - lmv_buf->lb_len = sizeof(*lmv); - rc = mdo_xattr_set(env, obj, lmv_buf, XATTR_NAME_LMV".set", 0, - handle); - lmv_buf->lb_len = len; + if (rc) RETURN(rc); - } - - /* replace directory with its remaining stripe */ - LASSERT(pobj); - LASSERT(stripe); mdd_write_lock(env, pobj, DT_SRC_PARENT); mdd_write_lock(env, obj, DT_SRC_CHILD); @@ -4868,11 +4528,11 @@ out: } /* - * shrink directory stripes to lum_stripe_count specified by lum_mds_md. + * shrink directory stripes after migration/merge */ int mdd_dir_layout_shrink(const struct lu_env *env, struct md_object *md_obj, - const struct lu_buf *lmu_buf) + struct md_layout_change *mlc) { struct mdd_device *mdd = mdo2mdd(md_obj); struct mdd_thread_info *info = mdd_env_info(env); @@ -4902,25 +4562,25 @@ int mdd_dir_layout_shrink(const struct lu_env *env, RETURN(rc); lmv = lmv_buf.lb_buf; - lmu = lmu_buf->lb_buf; + if (!lmv_is_sane(lmv)) + RETURN(-EBADF); + + lmu = mlc->mlc_buf.lb_buf; - /* this was checked in MDT */ + /* adjust the default value '0' to '1' */ + if (lmu->lum_stripe_count == 0) + lmu->lum_stripe_count = cpu_to_le32(1); + + /* these were checked in MDT */ LASSERT(le32_to_cpu(lmu->lum_stripe_count) < le32_to_cpu(lmv->lmv_stripe_count)); + LASSERT(!lmv_is_splitting(lmv)); + LASSERT(lmv_is_migrating(lmv) || lmv_is_merging(lmv)); - rc = mdd_dir_iterate_stripes(env, obj, &lmv_buf, lmu_buf, NULL, - mdd_shrink_stripe_is_empty); - if (rc < 0) - GOTO(out, rc); - else if (rc != 0) - GOTO(out, rc = -ENOTEMPTY); - - /* - * if obj stripe count will be shrunk to 1, we need to convert it to a - * normal dir, which will change its fid and update parent namespace, - * get obj name and parent fid from linkea. + /* if dir stripe count will be shrunk to 1, it needs to be transformed + * to a plain dir, which will cause FID change and namespace update. */ - if (le32_to_cpu(lmu->lum_stripe_count) < 2) { + if (le32_to_cpu(lmu->lum_stripe_count) == 1) { struct linkea_data *ldata = &info->mti_link_data; char *filename = info->mti_name; @@ -4964,11 +4624,18 @@ int mdd_dir_layout_shrink(const struct lu_env *env, if (IS_ERR(handle)) GOTO(out, rc = PTR_ERR(handle)); - rc = __mdd_dir_declare_layout_shrink(env, pobj, obj, stripe, attr, - &lmv_buf, lmu_buf, &lname, handle); + mlc->mlc_opc = MD_LAYOUT_SHRINK; + rc = mdo_declare_layout_change(env, obj, mlc, handle); if (rc) GOTO(stop_trans, rc); + if (le32_to_cpu(lmu->lum_stripe_count) == 1) { + rc = mdd_declare_1sd_collapse(env, pobj, obj, stripe, attr, mlc, + &lname, handle); + if (rc) + GOTO(stop_trans, rc); + } + rc = mdd_declare_changelog_store(env, mdd, CL_LAYOUT, NULL, NULL, handle); if (rc) @@ -4978,11 +4645,20 @@ int mdd_dir_layout_shrink(const struct lu_env *env, if (rc) GOTO(stop_trans, rc); - rc = __mdd_dir_layout_shrink(env, pobj, obj, stripe, attr, &lmv_buf, - lmu_buf, &lname, handle); + mdd_write_lock(env, obj, DT_SRC_PARENT); + mlc->mlc_opc = MD_LAYOUT_SHRINK; + rc = mdo_layout_change(env, obj, mlc, handle); + mdd_write_unlock(env, obj); if (rc) GOTO(stop_trans, rc); + if (le32_to_cpu(lmu->lum_stripe_count) == 1) { + rc = mdd_1sd_collapse(env, pobj, obj, stripe, attr, mlc, &lname, + handle); + if (rc) + GOTO(stop_trans, rc); + } + rc = mdd_changelog_data_store_xattr(env, mdd, CL_LAYOUT, 0, obj, XATTR_NAME_LMV, handle); GOTO(stop_trans, rc); @@ -4998,6 +4674,257 @@ out: return rc; } +static int mdd_dir_declare_split_plain(const struct lu_env *env, + struct mdd_device *mdd, + struct mdd_object *pobj, + struct mdd_object *obj, + struct mdd_object *tobj, + struct md_layout_change *mlc, + struct dt_allocation_hint *hint, + struct thandle *handle) +{ + struct mdd_thread_info *info = mdd_env_info(env); + const struct lu_name *lname = mlc->mlc_name; + struct lu_attr *la = &info->mti_la_for_fix; + struct lmv_user_md_v1 *lum = mlc->mlc_spec->u.sp_ea.eadata; + struct linkea_data *ldata = &info->mti_link_data; + struct lmv_mds_md_v1 *lmv; + __u32 count; + int rc; + + mlc->mlc_opc = MD_LAYOUT_DETACH; + rc = mdo_declare_layout_change(env, obj, mlc, handle); + if (rc) + return rc; + + memset(ldata, 0, sizeof(*ldata)); + rc = mdd_linkea_prepare(env, obj, NULL, NULL, mdd_object_fid(pobj), + lname, 1, 0, ldata); + if (rc) + return rc; + + count = lum->lum_stripe_count; + lum->lum_stripe_count = 0; + mdd_object_make_hint(env, pobj, tobj, mlc->mlc_attr, mlc->mlc_spec, + hint); + rc = mdd_declare_create(env, mdo2mdd(&pobj->mod_obj), pobj, tobj, + lname, mlc->mlc_attr, handle, mlc->mlc_spec, + ldata, NULL, NULL, NULL, hint); + if (rc) + return rc; + + /* tobj mode will be used in lod_declare_xattr_set(), but it's not + * created yet. + */ + tobj->mod_obj.mo_lu.lo_header->loh_attr |= S_IFDIR; + + lmv = (typeof(lmv))info->mti_key; + memset(lmv, 0, sizeof(*lmv)); + lmv->lmv_magic = cpu_to_le32(LMV_MAGIC_V1); + lmv->lmv_stripe_count = cpu_to_le32(1); + lmv->lmv_hash_type = cpu_to_le32(LMV_HASH_TYPE_DEFAULT); + fid_le_to_cpu(&lmv->lmv_stripe_fids[0], mdd_object_fid(obj)); + + mlc->mlc_opc = MD_LAYOUT_ATTACH; + mlc->mlc_buf.lb_buf = lmv; + mlc->mlc_buf.lb_len = lmv_mds_md_size(1, LMV_MAGIC_V1); + rc = mdo_declare_layout_change(env, tobj, mlc, handle); + if (rc) + return rc; + + rc = mdd_iterate_xattrs(env, obj, tobj, true, handle, + mdo_declare_xattr_set); + if (rc) + return rc; + + lum->lum_stripe_count = count; + mlc->mlc_opc = MD_LAYOUT_SPLIT; + rc = mdo_declare_layout_change(env, tobj, mlc, handle); + if (rc) + return rc; + + rc = mdo_declare_index_delete(env, pobj, lname->ln_name, handle); + if (rc) + return rc; + + rc = mdo_declare_index_insert(env, pobj, mdd_object_fid(tobj), + S_IFDIR, lname->ln_name, handle); + if (rc) + return rc; + + la->la_valid = LA_CTIME | LA_MTIME; + rc = mdo_declare_attr_set(env, obj, la, handle); + if (rc) + return rc; + + rc = mdo_declare_attr_set(env, pobj, la, handle); + if (rc) + return rc; + + rc = mdd_declare_changelog_store(env, mdd, CL_MIGRATE, lname, NULL, + handle); + return rc; +} + +/** + * plain directory split: + * 1. create \a tobj as plain directory. + * 2. append \a obj as first stripe of \a tobj. + * 3. migrate xattrs from \a obj to \a tobj. + * 4. split \a tobj to specific stripe count. + */ +static int mdd_dir_split_plain(const struct lu_env *env, + struct mdd_device *mdd, + struct mdd_object *pobj, + struct mdd_object *obj, + struct mdd_object *tobj, + struct md_layout_change *mlc, + struct dt_allocation_hint *hint, + struct thandle *handle) +{ + struct mdd_thread_info *info = mdd_env_info(env); + struct lu_attr *pattr = &info->mti_pattr; + struct lu_attr *la = &info->mti_la_for_fix; + const struct lu_name *lname = mlc->mlc_name; + struct linkea_data *ldata = &info->mti_link_data; + int rc; + + ENTRY; + + /* copy linkea out and set on target later */ + rc = mdd_links_read(env, obj, ldata); + if (rc) + RETURN(rc); + + mlc->mlc_opc = MD_LAYOUT_DETACH; + rc = mdo_layout_change(env, obj, mlc, handle); + if (rc) + RETURN(rc); + + /* don't set nlink from obj */ + mlc->mlc_attr->la_valid &= ~LA_NLINK; + + rc = mdd_create_object(env, pobj, tobj, mlc->mlc_attr, mlc->mlc_spec, + NULL, NULL, NULL, hint, handle, false); + if (rc) + RETURN(rc); + + rc = mdd_iterate_xattrs(env, obj, tobj, true, handle, mdo_xattr_set); + if (rc) + RETURN(rc); + + rc = mdd_links_write(env, tobj, ldata, handle); + if (rc) + RETURN(rc); + + rc = __mdd_index_delete(env, pobj, lname->ln_name, true, handle); + if (rc) + RETURN(rc); + + rc = __mdd_index_insert(env, pobj, mdd_object_fid(tobj), S_IFDIR, + lname->ln_name, handle); + if (rc) + RETURN(rc); + + la->la_ctime = la->la_mtime = mlc->mlc_attr->la_mtime; + la->la_valid = LA_CTIME | LA_MTIME; + + mdd_write_lock(env, obj, DT_SRC_CHILD); + rc = mdd_update_time(env, tobj, mlc->mlc_attr, la, handle); + mdd_write_unlock(env, obj); + if (rc) + RETURN(rc); + + rc = mdd_la_get(env, pobj, pattr); + if (rc) + RETURN(rc); + + la->la_valid = LA_CTIME | LA_MTIME; + + mdd_write_lock(env, pobj, DT_SRC_PARENT); + rc = mdd_update_time(env, pobj, pattr, la, handle); + mdd_write_unlock(env, pobj); + if (rc) + RETURN(rc); + + /* FID changes, record it as CL_MIGRATE */ + rc = mdd_changelog_ns_store(env, mdd, CL_MIGRATE, 0, tobj, + mdd_object_fid(pobj), mdd_object_fid(obj), + mdd_object_fid(pobj), lname, lname, handle); + RETURN(rc); +} + +int mdd_dir_layout_split(const struct lu_env *env, struct md_object *o, + struct md_layout_change *mlc) +{ + struct mdd_thread_info *info = mdd_env_info(env); + struct mdd_device *mdd = mdo2mdd(o); + struct mdd_object *obj = md2mdd_obj(o); + struct mdd_object *pobj = md2mdd_obj(mlc->mlc_parent); + struct mdd_object *tobj = md2mdd_obj(mlc->mlc_target); + struct dt_allocation_hint *hint = &info->mti_hint; + bool is_plain = false; + struct thandle *handle; + int rc; + + ENTRY; + + LASSERT(S_ISDIR(mdd_object_type(obj))); + + rc = mdo_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_LMV); + if (rc == -ENODATA) + is_plain = true; + else if (rc < 0) + RETURN(rc); + + handle = mdd_trans_create(env, mdd); + if (IS_ERR(handle)) + RETURN(PTR_ERR(handle)); + + if (is_plain) { + rc = mdd_dir_declare_split_plain(env, mdd, pobj, obj, tobj, mlc, + hint, handle); + } else { + mlc->mlc_opc = MD_LAYOUT_SPLIT; + rc = mdo_declare_layout_change(env, obj, mlc, handle); + if (rc) + GOTO(stop_trans, rc); + + rc = mdd_declare_changelog_store(env, mdd, CL_LAYOUT, NULL, + NULL, handle); + } + if (rc) + GOTO(stop_trans, rc); + + rc = mdd_trans_start(env, mdd, handle); + if (rc) + GOTO(stop_trans, rc); + + if (is_plain) { + rc = mdd_dir_split_plain(env, mdd, pobj, obj, tobj, mlc, hint, + handle); + } else { + mdd_write_lock(env, obj, DT_TGT_CHILD); + rc = mdo_xattr_set(env, obj, NULL, XATTR_NAME_LMV, + LU_XATTR_CREATE, handle); + mdd_write_unlock(env, obj); + if (rc) + GOTO(stop_trans, rc); + + rc = mdd_changelog_data_store_xattr(env, mdd, CL_LAYOUT, 0, obj, + XATTR_NAME_LMV, handle); + } + if (rc) + GOTO(stop_trans, rc); + + EXIT; + +stop_trans: + rc = mdd_trans_stop(env, mdd, rc, handle); + + return rc; +} + const struct md_dir_operations mdd_dir_ops = { .mdo_is_subdir = mdd_is_subdir, .mdo_lookup = mdd_lookup,