From 3f608461b387df056c9563d4c2879b05fb54a5a5 Mon Sep 17 00:00:00 2001 From: Lai Siyao Date: Sat, 15 Feb 2020 21:26:36 +0800 Subject: [PATCH] LU-11025 dne: refactor dir migration Tidy up directory migration to make it easier to support directory restripe. Signed-off-by: Lai Siyao Change-Id: Ibad46d8cf8f984e5f0062792c651b9753c2fb9a6 Reviewed-on: https://review.whamcloud.com/37712 Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Mike Pershin Reviewed-by: Hongchao Zhang --- lustre/mdd/mdd_dir.c | 621 +++++++++++++++++++++++----------------------- lustre/mdd/mdd_internal.h | 1 - lustre/mdt/mdt_handler.c | 1 - lustre/mdt/mdt_internal.h | 1 + lustre/mdt/mdt_lib.c | 2 + lustre/mdt/mdt_lproc.c | 1 + lustre/mdt/mdt_open.c | 42 +--- lustre/mdt/mdt_reint.c | 5 +- lustre/mdt/mdt_xattr.c | 43 ++-- 9 files changed, 337 insertions(+), 380 deletions(-) diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index c490331..c978a46 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -2603,11 +2603,8 @@ int mdd_create(const struct lu_env *env, struct md_object *pobj, * migrate may create 1-stripe directory, so lod_ah_init() * doesn't adjust stripe count from lmu. */ - if (lmu && lmu->lum_stripe_count == cpu_to_le32(1)) { - info->mti_lmu = *lmu; - info->mti_lmu.lum_stripe_count = 0; - spec->u.sp_ea.eadata = &info->mti_lmu; - } + if (lmu && lmu->lum_stripe_count == cpu_to_le32(1)) + lmu->lum_stripe_count = 0; } mdd_object_make_hint(env, mdd_pobj, son, attr, spec, hint); @@ -3621,14 +3618,14 @@ static int mdd_iterate_linkea(const struct lu_env *env, * \retval 1 don't migrate * \retval -errno on failure */ -static int migrate_linkea_prepare(const struct lu_env *env, - struct mdd_device *mdd, - struct mdd_object *spobj, - struct mdd_object *tpobj, - struct mdd_object *sobj, - const struct lu_name *lname, - const struct lu_attr *attr, - struct linkea_data *ldata) +static int mdd_migrate_linkea_prepare(const struct lu_env *env, + struct mdd_device *mdd, + struct mdd_object *spobj, + struct mdd_object *tpobj, + struct mdd_object *sobj, + const struct lu_name *lname, + const struct lu_attr *attr, + struct linkea_data *ldata) { __u32 source_mdt_index; int rc; @@ -3669,14 +3666,74 @@ static int migrate_linkea_prepare(const struct lu_env *env, RETURN(rc); } +static int mdd_declare_migrate_update(const struct lu_env *env, + struct mdd_object *spobj, + struct mdd_object *tpobj, + struct mdd_object *obj, + const struct lu_name *lname, + struct lu_attr *attr, + struct lu_attr *spattr, + struct lu_attr *tpattr, + struct linkea_data *ldata, + struct md_attr *ma, + struct thandle *handle) +{ + struct mdd_thread_info *info = mdd_env_info(env); + struct lu_attr *la = &info->mti_la_for_fix; + int rc; + + rc = mdo_declare_index_delete(env, spobj, lname->ln_name, handle); + if (rc) + return rc; + + if (S_ISDIR(attr->la_mode)) { + rc = mdo_declare_ref_del(env, spobj, handle); + if (rc) + return rc; + } + + rc = mdo_declare_index_insert(env, tpobj, mdd_object_fid(obj), + attr->la_mode & S_IFMT, + lname->ln_name, handle); + if (rc) + return rc; + + rc = mdd_declare_links_add(env, obj, handle, ldata); + if (rc) + return rc; + + if (S_ISDIR(attr->la_mode)) { + rc = mdo_declare_ref_add(env, tpobj, handle); + if (rc) + return rc; + } + + la->la_valid = LA_CTIME | LA_MTIME; + rc = mdo_declare_attr_set(env, spobj, la, handle); + if (rc) + return rc; + + if (tpobj != spobj) { + rc = mdo_declare_attr_set(env, tpobj, la, handle); + if (rc) + return rc; + } + + return rc; +} + static int mdd_declare_migrate_create(const struct lu_env *env, + struct mdd_object *spobj, struct mdd_object *tpobj, struct mdd_object *sobj, struct mdd_object *tobj, const struct lu_name *lname, + struct lu_attr *spattr, + struct lu_attr *tpattr, struct lu_attr *attr, struct lu_buf *sbuf, struct linkea_data *ldata, + struct md_attr *ma, struct md_op_spec *spec, struct dt_allocation_hint *hint, struct thandle *handle) @@ -3686,13 +3743,26 @@ static int mdd_declare_migrate_create(const struct lu_env *env, struct lmv_mds_md_v1 *lmv = sbuf->lb_buf; int rc; + ENTRY; + if (S_ISDIR(attr->la_mode)) { + struct lmv_user_md *lum = spec->u.sp_ea.eadata; + mlc->mlc_opc = MD_LAYOUT_DETACH; rc = mdo_declare_layout_change(env, sobj, mlc, handle); if (rc) return rc; + + lum->lum_hash_type |= cpu_to_le32(LMV_HASH_FLAG_MIGRATION); + } else if (S_ISLNK(attr->la_mode)) { + spec->u.sp_symname = sbuf->lb_buf; + } else if (S_ISREG(attr->la_mode)) { + spec->sp_cr_flags |= MDS_OPEN_DELAY_CREATE; + spec->sp_cr_flags &= ~MDS_OPEN_HAS_EA; } + mdd_object_make_hint(env, tpobj, tobj, attr, spec, hint); + rc = mdd_declare_create(env, mdo2mdd(&tpobj->mod_obj), tpobj, tobj, lname, attr, handle, spec, ldata, NULL, NULL, NULL, hint); @@ -3707,7 +3777,7 @@ static int mdd_declare_migrate_create(const struct lu_env *env, tobj->mod_obj.mo_lu.lo_header->loh_attr |= sobj->mod_obj.mo_lu.lo_header->loh_attr & S_IFMT; - if (S_ISDIR(attr->la_mode) && mdd_dir_is_empty(env, sobj) != 0) { + if (S_ISDIR(attr->la_mode)) { if (!lmv) { /* if sobj is not striped, fake a 1-stripe LMV */ LASSERT(sizeof(info->mti_key) > @@ -3761,24 +3831,103 @@ static int mdd_declare_migrate_create(const struct lu_env *env, return rc; } + if (!S_ISDIR(attr->la_mode) || lmv) { + rc = mdo_declare_ref_del(env, sobj, handle); + if (rc) + return rc; + + if (S_ISDIR(attr->la_mode)) { + rc = mdo_declare_ref_del(env, sobj, handle); + if (rc) + return rc; + } + + rc = mdo_declare_destroy(env, sobj, handle); + if (rc) + return rc; + } + + rc = mdd_declare_migrate_update(env, spobj, tpobj, tobj, lname, attr, + spattr, tpattr, ldata, ma, handle); return rc; } /** - * Create target, migrate xattrs and update links. + * migrate dirent from \a spobj to \a tpobj. + **/ +static int mdd_migrate_update(const struct lu_env *env, + struct mdd_object *spobj, + struct mdd_object *tpobj, + struct mdd_object *obj, + const struct lu_name *lname, + struct lu_attr *attr, + struct lu_attr *spattr, + struct lu_attr *tpattr, + struct linkea_data *ldata, + struct md_attr *ma, + struct thandle *handle) +{ + struct mdd_thread_info *info = mdd_env_info(env); + struct lu_attr *la = &info->mti_la_for_fix; + int rc; + + ENTRY; + + CDEBUG(D_INFO, "update "DFID"/%s to "DFID"/"DFID"\n", + PFID(mdd_object_fid(spobj)), lname->ln_name, + PFID(mdd_object_fid(tpobj)), PFID(mdd_object_fid(obj))); + + rc = __mdd_index_delete(env, spobj, lname->ln_name, + S_ISDIR(attr->la_mode), handle); + if (rc) + RETURN(rc); + + rc = __mdd_index_insert(env, tpobj, mdd_object_fid(obj), + attr->la_mode & S_IFMT, + lname->ln_name, handle); + if (rc) + RETURN(rc); + + rc = mdd_links_write(env, obj, ldata, handle); + if (rc) + RETURN(rc); + + la->la_ctime = la->la_mtime = ma->ma_attr.la_ctime; + la->la_valid = LA_CTIME | LA_MTIME; + mdd_write_lock(env, spobj, DT_SRC_PARENT); + rc = mdd_update_time(env, spobj, spattr, la, handle); + mdd_write_unlock(env, spobj); + if (rc) + RETURN(rc); + + if (tpobj != spobj) { + la->la_valid = LA_CTIME | LA_MTIME; + mdd_write_lock(env, tpobj, DT_TGT_PARENT); + rc = mdd_update_time(env, tpobj, tpattr, la, handle); + mdd_write_unlock(env, tpobj); + if (rc) + RETURN(rc); + } + + RETURN(rc); +} + +/** + * Migrate file/dir to target MDT. * * Create target according to \a spec, and then migrate xattrs, if it's - * directory, migrate source stripes to target, else update fid to target - * for links. + * directory, migrate source stripes to target. * * \param[in] env execution environment + * \param[in] spobj source parent object * \param[in] tpobj target parent object * \param[in] sobj source object * \param[in] tobj target object * \param[in] lname file name + * \param[in] spattr source parent attributes + * \param[in] tpattr target parent attributes * \param[in] attr source attributes * \param[in] sbuf source LMV buf - * \param[in] ldata source linkea * \param[in] spec migrate create spec * \param[in] hint target creation hint * \param[in] handle tranasction handle @@ -3787,13 +3936,17 @@ static int mdd_declare_migrate_create(const struct lu_env *env, * \retval -errno on failure **/ static int mdd_migrate_create(const struct lu_env *env, + struct mdd_object *spobj, struct mdd_object *tpobj, struct mdd_object *sobj, struct mdd_object *tobj, const struct lu_name *lname, + struct lu_attr *spattr, + struct lu_attr *tpattr, struct lu_attr *attr, const struct lu_buf *sbuf, struct linkea_data *ldata, + struct md_attr *ma, struct md_op_spec *spec, struct dt_allocation_hint *hint, struct thandle *handle) @@ -3803,10 +3956,10 @@ static int mdd_migrate_create(const struct lu_env *env, ENTRY; /* - * directory will migrate sobj stripes to tobj: - * 1. delete stripes from sobj. - * 2. add stripes to tobj, see lod_dir_declare_layout_add(). - * 3. create/attach stripes for tobj, see lod_xattr_set_lmv(). + * migrate sobj stripes to tobj if it's directory: + * 1. detach stripes from sobj. + * 2. attach stripes to tobj, see mdd_declare_migrate_mdt(). + * 3. create stripes for tobj, see lod_xattr_set_lmv(). */ if (S_ISDIR(attr->la_mode)) { struct mdd_thread_info *info = mdd_env_info(env); @@ -3851,181 +4004,78 @@ static int mdd_migrate_create(const struct lu_env *env, mdd_write_lock(env, sobj, DT_SRC_CHILD); rc = mdo_xattr_del(env, sobj, XATTR_NAME_LOV, handle); mdd_write_unlock(env, sobj); - if (rc) + /* O_DELAY_CREATE file may not have LOV, ignore -ENODATA */ + if (rc && rc != -ENODATA) RETURN(rc); + rc = 0; } - if (!S_ISDIR(attr->la_mode)) + /* update links FID */ + if (!S_ISDIR(attr->la_mode)) { rc = mdd_iterate_linkea(env, sobj, tobj, lname, mdd_object_fid(tpobj), ldata, NULL, handle, mdd_update_link); - - RETURN(rc); -} - -static int mdd_declare_migrate_update(const struct lu_env *env, - struct mdd_object *spobj, - struct mdd_object *tpobj, - struct mdd_object *sobj, - struct mdd_object *tobj, - const struct lu_name *lname, - struct lu_attr *attr, - struct lu_attr *spattr, - struct lu_attr *tpattr, - struct linkea_data *ldata, - bool do_create, - bool do_destroy, - struct md_attr *ma, - struct thandle *handle) -{ - struct mdd_thread_info *info = mdd_env_info(env); - const struct lu_fid *fid = mdd_object_fid(do_create ? tobj : sobj); - struct lu_attr *la = &info->mti_la_for_fix; - int rc; - - rc = mdo_declare_index_delete(env, spobj, lname->ln_name, handle); - if (rc) - return rc; - - if (S_ISDIR(attr->la_mode)) { - rc = mdo_declare_ref_del(env, spobj, handle); - if (rc) - return rc; - } - - rc = mdo_declare_index_insert(env, tpobj, fid, mdd_object_type(sobj), - lname->ln_name, handle); - if (rc) - return rc; - - rc = mdd_declare_links_add(env, do_create ? tobj : sobj, handle, ldata); - if (rc) - return rc; - - if (S_ISDIR(attr->la_mode)) { - rc = mdo_declare_ref_add(env, tpobj, handle); - if (rc) - return rc; - } - - la->la_valid = LA_CTIME | LA_MTIME; - rc = mdo_declare_attr_set(env, spobj, la, handle); - if (rc) - return rc; - - if (tpobj != spobj) { - rc = mdo_declare_attr_set(env, tpobj, la, handle); if (rc) - return rc; + RETURN(rc); } - if (do_create && do_destroy) { - rc = mdo_declare_ref_del(env, sobj, handle); - if (rc) - return rc; - - if (S_ISDIR(attr->la_mode)) { - rc = mdo_declare_ref_del(env, sobj, handle); - if (rc) - return rc; + /* don't destroy sobj if it's plain directory */ + if (!S_ISDIR(attr->la_mode) || sbuf->lb_buf) { + mdd_write_lock(env, sobj, DT_SRC_CHILD); + rc = mdo_ref_del(env, sobj, handle); + if (!rc) { + if (S_ISDIR(attr->la_mode)) + rc = mdo_ref_del(env, sobj, handle); + if (!rc) + rc = mdo_destroy(env, sobj, handle); } - - rc = mdo_declare_destroy(env, sobj, handle); + mdd_write_unlock(env, sobj); if (rc) - return rc; + RETURN(rc); } - return rc; + rc = mdd_migrate_update(env, spobj, tpobj, tobj, lname, attr, + spattr, tpattr, ldata, ma, handle); + + RETURN(rc); } -/** - * migrate dirent from \a spobj to \a tpobj, and destroy \a sobj - **/ -static int mdd_migrate_update(const struct lu_env *env, - struct mdd_object *spobj, - struct mdd_object *tpobj, - struct mdd_object *sobj, - struct mdd_object *tobj, - const struct lu_name *lname, - struct lu_attr *attr, - struct lu_attr *spattr, - struct lu_attr *tpattr, - struct linkea_data *ldata, - bool do_create, - bool do_destroy, - struct md_attr *ma, - struct thandle *handle) +static int mdd_migrate_cmd_check(struct mdd_device *mdd, + const struct lmv_mds_md_v1 *lmv, + const struct lmv_user_md_v1 *lum, + const struct lu_name *lname) { - struct mdd_thread_info *info = mdd_env_info(env); - const struct lu_fid *fid = mdd_object_fid(do_create ? tobj : sobj); - struct lu_attr *la = &info->mti_la_for_fix; - int rc; - - ENTRY; - - CDEBUG(D_INFO, "update %s "DFID"/"DFID" to "DFID"/"DFID"\n", - lname->ln_name, PFID(mdd_object_fid(spobj)), - PFID(mdd_object_fid(sobj)), PFID(mdd_object_fid(tpobj)), - PFID(fid)); - - rc = __mdd_index_delete(env, spobj, lname->ln_name, - S_ISDIR(attr->la_mode), handle); - if (rc) - RETURN(rc); - - rc = __mdd_index_insert(env, tpobj, fid, mdd_object_type(sobj), - lname->ln_name, handle); - if (rc) - RETURN(rc); - - rc = mdd_links_write(env, do_create ? tobj : sobj, ldata, handle); - if (rc) - RETURN(rc); - - la->la_ctime = la->la_mtime = ma->ma_attr.la_ctime; - la->la_valid = LA_CTIME | LA_MTIME; - mdd_write_lock(env, spobj, DT_SRC_PARENT); - rc = mdd_update_time(env, spobj, spattr, la, handle); - mdd_write_unlock(env, spobj); - if (rc) - RETURN(rc); - - if (tpobj != spobj) { - la->la_valid = LA_CTIME | LA_MTIME; - mdd_write_lock(env, tpobj, DT_TGT_PARENT); - rc = mdd_update_time(env, tpobj, tpattr, la, handle); - mdd_write_unlock(env, tpobj); - if (rc) - RETURN(rc); + __u32 lum_stripe_count = lum->lum_stripe_count; + __u32 lmv_hash_type = lmv->lmv_hash_type; + char *mdt_hash_name[] = { "none", + LMV_HASH_NAME_ALL_CHARS, + LMV_HASH_NAME_FNV_1A_64, + LMV_HASH_NAME_CRUSH, + }; + + if (!lmv_is_sane(lmv)) + return -EBADF; + + /* if stripe_count unspecified, set to 1 */ + if (!lum_stripe_count) + lum_stripe_count = cpu_to_le32(1); + + lmv_hash_type &= ~cpu_to_le32(LMV_HASH_FLAG_MIGRATION); + + /* TODO: check specific MDTs */ + if (lum_stripe_count != lmv->lmv_migrate_offset || + lum->lum_stripe_offset != lmv->lmv_master_mdt_index || + (lum->lum_hash_type && lum->lum_hash_type != lmv_hash_type)) { + CERROR("%s: '"DNAME"' migration was interrupted, run 'lfs migrate -m %d -c %d -H %s "DNAME"' to finish migration.\n", + mdd2obd_dev(mdd)->obd_name, PNAME(lname), + le32_to_cpu(lmv->lmv_master_mdt_index), + le32_to_cpu(lmv->lmv_migrate_offset), + mdt_hash_name[le32_to_cpu(lmv_hash_type)], + PNAME(lname)); + return -EPERM; } - /* - * there are three situations we shouldn't destroy source: - * 1. if source is not dir, and it happens to be located on the same MDT - * as target parent. - * 2. if source is not dir, and has link on the same MDT where source is - * located. - * 3. if source is dir, and it's a normal, non-empty dir. - * - * the first two situations equals to !do_create, and the 3rd equals to - * !do_destroy, so the below condition is actually - * !(!do_create || !do_destroy). - * - * NB, if user has opened source dir before migration, he will get - * -ENOENT error when close it later, because source is likely to be - * remote, which can't be moved to orphan list, but except this error - * message, this won't cause any inconsistency or trouble. - */ - if (do_create && do_destroy) { - mdd_write_lock(env, sobj, DT_SRC_CHILD); - mdo_ref_del(env, sobj, handle); - if (S_ISDIR(attr->la_mode)) - mdo_ref_del(env, sobj, handle); - rc = mdo_destroy(env, sobj, handle); - mdd_write_unlock(env, sobj); - } - - RETURN(rc); + return -EALREADY; } /** @@ -4068,11 +4118,11 @@ static int mdd_migrate(const struct lu_env *env, struct md_object *md_pobj, struct lu_fid *fid = &info->mti_fid2; struct lu_buf pbuf = { NULL }; struct lu_buf sbuf = { NULL }; - struct lmv_mds_md_v1 *plmv; + struct lmv_mds_md_v1 *lmv; struct thandle *handle; - bool do_create = true; - bool do_destroy = true; + bool nsonly = false; int rc; + ENTRY; rc = mdd_la_get(env, sobj, attr); @@ -4084,36 +4134,36 @@ static int mdd_migrate(const struct lu_env *env, struct md_object *md_pobj, if (rc < 0 && rc != -ENODATA) RETURN(rc); - plmv = pbuf.lb_buf; - if (plmv) { - __u32 hash_type = le32_to_cpu(plmv->lmv_hash_type); + lmv = pbuf.lb_buf; + if (lmv) { + __u32 hash_type = le32_to_cpu(lmv->lmv_hash_type); int index; + /* locate target parent stripe */ /* fail check here to make sure top dir migration succeed. */ if ((hash_type & LMV_HASH_FLAG_MIGRATION) && OBD_FAIL_CHECK_RESET(OBD_FAIL_MIGRATE_ENTRIES, 0)) GOTO(out, rc = -EIO); - index = lmv_name_to_stripe_index(plmv, lname->ln_name, + index = lmv_name_to_stripe_index(lmv, lname->ln_name, lname->ln_namelen); if (index < 0) GOTO(out, rc = index); - fid_le_to_cpu(fid, &plmv->lmv_stripe_fids[index]); + fid_le_to_cpu(fid, &lmv->lmv_stripe_fids[index]); tpobj = mdd_object_find(env, mdd, fid); if (IS_ERR(tpobj)) GOTO(out, rc = PTR_ERR(tpobj)); /* locate source parent stripe */ - if (le32_to_cpu(plmv->lmv_hash_type) & - LMV_HASH_FLAG_LAYOUT_CHANGE) { - index = lmv_name_to_stripe_index_old(plmv, + if (hash_type & LMV_HASH_FLAG_LAYOUT_CHANGE) { + index = lmv_name_to_stripe_index_old(lmv, lname->ln_name, lname->ln_namelen); if (index < 0) GOTO(out, rc = index); - fid_le_to_cpu(fid, &plmv->lmv_stripe_fids[index]); + fid_le_to_cpu(fid, &lmv->lmv_stripe_fids[index]); spobj = mdd_object_find(env, mdd, fid); if (IS_ERR(spobj)) GOTO(out, rc = PTR_ERR(spobj)); @@ -4137,115 +4187,58 @@ static int mdd_migrate(const struct lu_env *env, struct md_object *md_pobj, GOTO(out, rc); if (S_ISDIR(attr->la_mode)) { - struct lmv_user_md_v1 *lmu = spec->u.sp_ea.eadata; + struct lmv_user_md_v1 *lum = spec->u.sp_ea.eadata; - LASSERT(lmu); + LASSERT(lum); - /* - * if user use default value '0' for stripe_count, we need to + /* if user use default value '0' for stripe_count, we need to * adjust it to '1' to create a 1-stripe directory. */ - if (lmu->lum_stripe_count == 0) { - /* eadata is from request, don't alter it */ - info->mti_lmu = *lmu; - info->mti_lmu.lum_stripe_count = cpu_to_le32(1); - spec->u.sp_ea.eadata = &info->mti_lmu; - lmu = spec->u.sp_ea.eadata; - } + if (lum->lum_stripe_count == 0) + lum->lum_stripe_count = cpu_to_le32(1); rc = mdd_stripe_get(env, sobj, &sbuf, XATTR_NAME_LMV); - if (rc == -ENODATA) { - if (mdd_dir_is_empty(env, sobj) == 0) { - /* - * if sobj is empty, and target is not striped, - * create target as a normal directory. - */ - if (le32_to_cpu(lmu->lum_stripe_count) == 1) { - info->mti_lmu = *lmu; - info->mti_lmu.lum_stripe_count = 0; - spec->u.sp_ea.eadata = &info->mti_lmu; - lmu = spec->u.sp_ea.eadata; - } - } else { - /* - * sobj is not striped dir, if it's not empty, - * it will be migrated to be a stripe of target, - * don't destroy it after migration. - */ - do_destroy = false; - } - } else if (rc) { + if (rc && rc != -ENODATA) GOTO(out, rc); - } else { - struct lmv_mds_md_v1 *lmv = sbuf.lb_buf; - - if (le32_to_cpu(lmv->lmv_hash_type) & - LMV_HASH_FLAG_MIGRATION) { - __u32 lum_stripe_count = lmu->lum_stripe_count; - __u32 lmv_hash_type = lmv->lmv_hash_type & - cpu_to_le32(LMV_HASH_TYPE_MASK); - - if (!lum_stripe_count) - lum_stripe_count = cpu_to_le32(1); - - /* TODO: check specific MDTs */ - if (lmv->lmv_migrate_offset != - lum_stripe_count || - lmv->lmv_master_mdt_index != - lmu->lum_stripe_offset || - (lmu->lum_hash_type && - lmv_hash_type != lmu->lum_hash_type)) { - CERROR("%s: \'"DNAME"\' migration was " - "interrupted, run \'lfs migrate " - "-m %d -c %d -H %d "DNAME"\' to " - "finish migration.\n", - mdd2obd_dev(mdd)->obd_name, - PNAME(lname), - le32_to_cpu( - lmv->lmv_master_mdt_index), - le32_to_cpu( - lmv->lmv_migrate_offset), - le32_to_cpu(lmv_hash_type), - PNAME(lname)); - GOTO(out, rc = -EPERM); - } - GOTO(out, rc = -EALREADY); - } - } - } else if (!mdd_object_remote(tpobj)) { - /* - * if source is already on MDT where target parent is located, - * no need to create, just update namespace. - */ - do_create = false; - } else if (S_ISLNK(attr->la_mode)) { - lu_buf_check_and_alloc(&sbuf, attr->la_size + 1); - if (!sbuf.lb_buf) - GOTO(out, rc = -ENOMEM); - rc = mdd_readlink(env, &sobj->mod_obj, &sbuf); - if (rc <= 0) { - rc = rc ?: -EFAULT; - CERROR("%s: "DFID" readlink failed: rc = %d\n", - mdd_obj_dev_name(sobj), - PFID(mdd_object_fid(sobj)), rc); + + lmv = sbuf.lb_buf; + if (lmv && + (le32_to_cpu(lmv->lmv_hash_type) & + LMV_HASH_FLAG_MIGRATION)) { + rc = mdd_migrate_cmd_check(mdd, lmv, lum, lname); GOTO(out, rc); } - spec->u.sp_symname = sbuf.lb_buf; - } else if (S_ISREG(attr->la_mode)) { - spec->sp_cr_flags |= MDS_OPEN_DELAY_CREATE; - spec->sp_cr_flags &= ~MDS_OPEN_HAS_EA; + } else { + if (spobj == tpobj) + GOTO(out, rc = -EALREADY); + + /* update namespace only if @sobj is on MDT where @tpobj is. */ + if (!mdd_object_remote(tpobj)) + nsonly = true; + + if (S_ISLNK(attr->la_mode)) { + lu_buf_check_and_alloc(&sbuf, attr->la_size + 1); + if (!sbuf.lb_buf) + GOTO(out, rc = -ENOMEM); + + rc = mdd_readlink(env, &sobj->mod_obj, &sbuf); + if (rc <= 0) { + rc = rc ?: -EFAULT; + CERROR("%s: "DFID" readlink failed: rc = %d\n", + mdd2obd_dev(mdd)->obd_name, + PFID(mdd_object_fid(sobj)), rc); + GOTO(out, rc); + } + } } - /* - * if sobj has link on the same MDT, no need to create, just update - * namespace, and it will be a remote file on target parent, which is - * similar to rename. - */ - rc = migrate_linkea_prepare(env, mdd, spobj, tpobj, sobj, lname, attr, - ldata); + /* linkea needs update upon FID or parent stripe change */ + rc = mdd_migrate_linkea_prepare(env, mdd, spobj, tpobj, sobj, lname, + attr, ldata); if (rc > 0) - do_create = false; - else if (rc) + /* update namespace only if @sobj has link on its MDT. */ + nsonly = true; + else if (rc < 0) GOTO(out, rc); rc = mdd_migrate_sanity_check(env, mdd, spobj, tpobj, sobj, tobj, @@ -4253,23 +4246,19 @@ static int mdd_migrate(const struct lu_env *env, struct md_object *md_pobj, if (rc) GOTO(out, rc); - mdd_object_make_hint(env, tpobj, tobj, attr, spec, hint); - handle = mdd_trans_create(env, mdd); if (IS_ERR(handle)) GOTO(out, rc = PTR_ERR(handle)); - if (do_create) { - rc = mdd_declare_migrate_create(env, tpobj, sobj, tobj, lname, - attr, &sbuf, ldata, spec, hint, + if (nsonly) + rc = mdd_declare_migrate_update(env, spobj, tpobj, sobj, lname, + attr, spattr, tpattr, ldata, ma, + handle); + else + rc = mdd_declare_migrate_create(env, spobj, tpobj, sobj, tobj, + lname, spattr, tpattr, attr, + &sbuf, ldata, ma, spec, hint, handle); - if (rc) - GOTO(stop_trans, rc); - } - - rc = mdd_declare_migrate_update(env, spobj, tpobj, sobj, tobj, lname, - attr, spattr, tpattr, ldata, do_create, - do_destroy, ma, handle); if (rc) GOTO(stop_trans, rc); @@ -4282,36 +4271,34 @@ static int mdd_migrate(const struct lu_env *env, struct md_object *md_pobj, if (rc) GOTO(stop_trans, rc); - if (do_create) { - rc = mdd_migrate_create(env, tpobj, sobj, tobj, lname, attr, - &sbuf, ldata, spec, hint, handle); - if (rc) - GOTO(stop_trans, rc); - } - - rc = mdd_migrate_update(env, spobj, tpobj, sobj, tobj, lname, attr, - spattr, tpattr, ldata, do_create, do_destroy, - ma, handle); + if (nsonly) + rc = mdd_migrate_update(env, spobj, tpobj, sobj, lname, attr, + spattr, tpattr, ldata, ma, handle); + else + rc = mdd_migrate_create(env, spobj, tpobj, sobj, tobj, lname, + spattr, tpattr, attr, &sbuf, ldata, ma, + spec, hint, handle); if (rc) GOTO(stop_trans, rc); - rc = mdd_changelog_ns_store(env, mdd, CL_MIGRATE, 0, tobj, - mdd_object_fid(spobj), mdd_object_fid(sobj), - mdd_object_fid(tpobj), lname, lname, - handle); + rc = mdd_changelog_ns_store(env, mdd, CL_MIGRATE, 0, + nsonly ? sobj : tobj, mdd_object_fid(spobj), + mdd_object_fid(sobj), mdd_object_fid(tpobj), + lname, lname, handle); if (rc) GOTO(stop_trans, rc); - EXIT; + stop_trans: rc = mdd_trans_stop(env, mdd, rc, handle); out: - if (spobj && !IS_ERR(spobj)) + if (!IS_ERR_OR_NULL(spobj)) mdd_object_put(env, spobj); - if (tpobj && !IS_ERR(tpobj)) + if (!IS_ERR_OR_NULL(tpobj)) mdd_object_put(env, tpobj); lu_buf_free(&sbuf); lu_buf_free(&pbuf); + return rc; } diff --git a/lustre/mdd/mdd_internal.h b/lustre/mdd/mdd_internal.h index b26fe1b..5e5c727 100644 --- a/lustre/mdd/mdd_internal.h +++ b/lustre/mdd/mdd_internal.h @@ -203,7 +203,6 @@ struct mdd_thread_info { struct lu_buf mti_link_buf; /* buf for link ea */ struct lu_buf mti_xattr_buf; struct obdo mti_oa; - struct lmv_user_md mti_lmu; struct dt_allocation_hint mti_hint; struct dt_object_format mti_dof; struct linkea_data mti_link_data; diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index 742ea68..bfec8e1 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -3748,7 +3748,6 @@ void mdt_thread_info_init(struct ptlrpc_request *req, info->mti_spec.no_create = 0; info->mti_spec.sp_rm_entry = 0; info->mti_spec.sp_permitted = 0; - info->mti_spec.sp_migrate_close = 0; info->mti_spec.u.sp_ea.eadata = NULL; info->mti_spec.u.sp_ea.eadatalen = 0; diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index f27b67f..e1d903d 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -1273,6 +1273,7 @@ enum mdt_stat_idx { LPROC_MDT_IO_READ, LPROC_MDT_IO_WRITE, LPROC_MDT_IO_PUNCH, + LPROC_MDT_MIGRATE, LPROC_MDT_LAST, }; diff --git a/lustre/mdt/mdt_lib.c b/lustre/mdt/mdt_lib.c index 71290c2..ff76d12 100644 --- a/lustre/mdt/mdt_lib.c +++ b/lustre/mdt/mdt_lib.c @@ -1520,6 +1520,8 @@ static int mdt_migrate_unpack(struct mdt_thread_info *info) RETURN(rc); spec->sp_migrate_close = 1; + } else { + spec->sp_migrate_close = 0; } /* lustre version > 2.11 migration packs lum */ diff --git a/lustre/mdt/mdt_lproc.c b/lustre/mdt/mdt_lproc.c index 17fe08b..7b99d27 100644 --- a/lustre/mdt/mdt_lproc.c +++ b/lustre/mdt/mdt_lproc.c @@ -1227,6 +1227,7 @@ static const char * const mdt_stats[] = { [LPROC_MDT_IO_READ] = "read_bytes", [LPROC_MDT_IO_WRITE] = "write_bytes", [LPROC_MDT_IO_PUNCH] = "punch", + [LPROC_MDT_MIGRATE] = "migrate", }; void mdt_stats_counter_init(struct lprocfs_stats *stats) diff --git a/lustre/mdt/mdt_open.c b/lustre/mdt/mdt_open.c index 967d04a..5b36b78 100644 --- a/lustre/mdt/mdt_open.c +++ b/lustre/mdt/mdt_open.c @@ -1395,7 +1395,6 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) if (result < 0) GOTO(out, result); -again: lh = &info->mti_lh[MDT_LH_PARENT]; mdt_lock_pdo_init(lh, (open_flags & MDS_OPEN_CREAT) ? LCK_PW : LCK_PR, &rr->rr_name); @@ -1427,48 +1426,11 @@ again: PFID(mdt_object_fid(parent)), PNAME(&rr->rr_name), PFID(child_fid)); - if (result != 0 && result != -ENOENT && result != -ESTALE) + if (result != 0 && result != -ENOENT) GOTO(out_parent, result); - if (result == -ENOENT || result == -ESTALE) { - /* If the object is dead, let's check if the object - * is being migrated to a new object */ - if (result == -ESTALE) { - struct lu_buf lmv_buf; - - lmv_buf.lb_buf = info->mti_xattr_buf; - lmv_buf.lb_len = sizeof(info->mti_xattr_buf); - rc = mo_xattr_get(info->mti_env, - mdt_object_child(parent), - &lmv_buf, XATTR_NAME_LMV); - if (rc > 0) { - struct lmv_mds_md_v1 *lmv; - - lmv = lmv_buf.lb_buf; - if (le32_to_cpu(lmv->lmv_hash_type) & - LMV_HASH_FLAG_MIGRATION) { - /* Get the new parent FID and retry */ - mdt_object_unlock_put(info, parent, - lh, 1); - mdt_lock_handle_init(lh); - fid_le_to_cpu( - (struct lu_fid *)rr->rr_fid1, - &lmv->lmv_stripe_fids[1]); - goto again; - } - } - } - + if (result == -ENOENT) { mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_NEG); - if (result == -ESTALE) { - /* - * -ESTALE means the parent is a dead(unlinked) dir, so - * it should return -ENOENT to in accordance with the - * original mds implementaion. - */ - GOTO(out_parent, result = -ENOENT); - } - if (!(open_flags & MDS_OPEN_CREAT)) GOTO(out_parent, result); if (mdt_rdonly(req->rq_export)) diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index c3b5401..90c6005 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -2148,7 +2148,10 @@ lock_parent: rc = mdo_migrate(env, mdt_object_child(pobj), mdt_object_child(sobj), &rr->rr_name, - mdt_object_child(tobj), &info->mti_spec, ma); + mdt_object_child(tobj), + &info->mti_spec, ma); + if (!rc) + mdt_counter_incr(req, LPROC_MDT_MIGRATE); EXIT; mdt_object_unlock(info, tobj, lht, rc); diff --git a/lustre/mdt/mdt_xattr.c b/lustre/mdt/mdt_xattr.c index d73f812..e1f81f3 100644 --- a/lustre/mdt/mdt_xattr.c +++ b/lustre/mdt/mdt_xattr.c @@ -410,34 +410,37 @@ static int mdt_dir_layout_update(struct mdt_thread_info *info) lmv = &ma->ma_lmv->lmv_md_v1; /* ditto */ - if (!(le32_to_cpu(lmv->lmv_hash_type) & LMV_HASH_FLAG_MIGRATION)) + if (!(le32_to_cpu(lmv->lmv_hash_type) & LMV_HASH_FLAG_LAYOUT_CHANGE)) GOTO(unlock_obj, rc = -EALREADY); lum_stripe_count = lmu->lum_stripe_count; if (!lum_stripe_count) lum_stripe_count = cpu_to_le32(1); - if (lmv->lmv_migrate_offset != lum_stripe_count) { - CERROR("%s: "DFID" migrate mdt count mismatch %u != %u\n", - mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1), - lmv->lmv_migrate_offset, lmu->lum_stripe_count); - GOTO(unlock_obj, rc = -EINVAL); - } + if ((le32_to_cpu(lmv->lmv_hash_type) & LMV_HASH_FLAG_MIGRATION)) { + if (lmv->lmv_migrate_offset != lum_stripe_count) { + CERROR("%s: "DFID" migrate mdt count mismatch %u != %u\n", + mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1), + lmv->lmv_migrate_offset, lmu->lum_stripe_count); + GOTO(unlock_obj, rc = -EINVAL); + } - if (lmv->lmv_master_mdt_index != lmu->lum_stripe_offset) { - CERROR("%s: "DFID" migrate mdt index mismatch %u != %u\n", - mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1), - lmv->lmv_master_mdt_index, lmu->lum_stripe_offset); - GOTO(unlock_obj, rc = -EINVAL); - } + if (lmu->lum_stripe_offset != lmv->lmv_master_mdt_index) { + CERROR("%s: "DFID" migrate mdt index mismatch %u != %u\n", + mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1), + lmv->lmv_master_mdt_index, + lmu->lum_stripe_offset); + GOTO(unlock_obj, rc = -EINVAL); + } - if (lum_stripe_count > 1 && lmu->lum_hash_type && - (lmv->lmv_hash_type & cpu_to_le32(LMV_HASH_TYPE_MASK)) != - lmu->lum_hash_type) { - CERROR("%s: "DFID" migrate mdt hash mismatch %u != %u\n", - mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1), - lmv->lmv_hash_type, lmu->lum_hash_type); - GOTO(unlock_obj, rc = -EINVAL); + if (lum_stripe_count > 1 && lmu->lum_hash_type && + (lmv->lmv_hash_type & ~cpu_to_le32(LMV_HASH_FLAG_MIGRATION)) + != lmu->lum_hash_type) { + CERROR("%s: "DFID" migrate mdt hash mismatch %u != %u\n", + mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1), + lmv->lmv_hash_type, lmu->lum_hash_type); + GOTO(unlock_obj, rc = -EINVAL); + } } mlc->mlc_opc = MD_LAYOUT_SHRINK; -- 1.8.3.1