From 0a83d948f37bec7fca6e9aa30f59f26354273b23 Mon Sep 17 00:00:00 2001 From: Lai Siyao Date: Thu, 8 Mar 2018 11:24:31 +0800 Subject: [PATCH] LU-4684 migrate: shrink dir layout after migration Use setxattr(XATTR_NAME_LMV) to shrink dir layout after migration, this may change dir fid because it may shrink dir to 1-stripe, if so replace this dir with its stripe. Signed-off-by: Lai Siyao Change-Id: I4fd3e83e188ac0c6c4845183c17701276fae94a8 Reviewed-on: https://review.whamcloud.com/31626 Reviewed-by: Andreas Dilger Reviewed-by: Fan Yong Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/lod/lod_object.c | 10 +- lustre/mdd/mdd_dir.c | 461 +++++++++++++++++++++++++++++++++++++++++++- lustre/mdd/mdd_internal.h | 10 + lustre/mdd/mdd_object.c | 17 +- lustre/mdt/mdt_internal.h | 11 ++ lustre/mdt/mdt_reint.c | 22 +-- lustre/mdt/mdt_xattr.c | 165 ++++++++++++++++ lustre/utils/liblustreapi.c | 34 +++- 8 files changed, 707 insertions(+), 23 deletions(-) diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index cd38047..dc896ef 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -3337,6 +3337,9 @@ static int lod_declare_xattr_set(const struct lu_env *env, rc = lod_dir_declare_layout_add(env, dt, buf, th); else if (strcmp(op, "del") == 0) rc = lod_dir_declare_layout_delete(env, dt, buf, th); + else if (strcmp(op, "set") == 0) + rc = lod_sub_declare_xattr_set(env, next, buf, + XATTR_NAME_LMV, fl, th); RETURN(rc); } else if (S_ISDIR(mode)) { @@ -4094,12 +4097,15 @@ static int lod_xattr_set(const struct lu_env *env, const char *op = name + strlen(XATTR_NAME_LMV) + 1; rc = -ENOTSUPP; - if (strcmp(op, "del") == 0) - rc = lod_dir_layout_delete(env, dt, buf, th); /* * XATTR_NAME_LMV".add" is never called, but only declared, * because lod_xattr_set_lmv() will do the addition. */ + if (strcmp(op, "del") == 0) + rc = lod_dir_layout_delete(env, dt, buf, th); + else if (strcmp(op, "set") == 0) + rc = lod_sub_xattr_set(env, next, buf, XATTR_NAME_LMV, + fl, th); RETURN(rc); } else if (S_ISDIR(dt->do_lu.lo_header->loh_attr) && diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index aa87adc..e9380ff 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -3309,6 +3309,107 @@ out: return rc; } +static int mdd_dir_declare_destroy_stripe(const struct lu_env *env, + struct mdd_object *obj, + struct mdd_object *stripe, + const struct lu_buf *lmv_buf, + const struct lu_buf *lmu_buf, + int index, + struct thandle *handle) +{ + struct lmv_user_md *lmu = lmu_buf->lb_buf; + __u32 shrink_offset = le32_to_cpu(lmu->lum_stripe_count); + int rc; + + if (index < shrink_offset) { + if (shrink_offset < 2) + return 0; + return mdo_declare_xattr_set(env, stripe, lmv_buf, + XATTR_NAME_LMV".set", 0, handle); + } + + rc = mdo_declare_ref_del(env, stripe, handle); + if (rc) + return rc; + + rc = mdo_declare_destroy(env, stripe, handle); + + return rc; +} + +static int mdd_dir_destroy_stripe(const struct lu_env *env, + struct mdd_object *obj, + struct mdd_object *stripe, + const struct lu_buf *lmv_buf, + const struct lu_buf *lmu_buf, + int index, + struct thandle *handle) +{ + struct mdd_thread_info *info = mdd_env_info(env); + struct lmv_mds_md_v1 *lmv = lmv_buf->lb_buf; + struct lmv_user_md *lmu = lmu_buf->lb_buf; + __u32 shrink_offset = le32_to_cpu(lmu->lum_stripe_count); + int rc; + + ENTRY; + + /* update remaining stripes' LMV */ + if (index < shrink_offset) { + struct lmv_mds_md_v1 *slave_lmv; + struct lu_buf slave_buf = { + .lb_buf = &info->mti_lmv.lmv_md_v1, + .lb_len = sizeof(*slave_lmv) + }; + __u32 version = le32_to_cpu(lmv->lmv_layout_version); + + /* if dir will be shrunk to 1-stripe, don't update */ + if (shrink_offset < 2) + RETURN(0); + + slave_lmv = slave_buf.lb_buf; + memset(slave_lmv, 0, sizeof(*slave_lmv)); + slave_lmv->lmv_magic = cpu_to_le32(LMV_MAGIC_STRIPE); + slave_lmv->lmv_stripe_count = lmu->lum_stripe_count; + slave_lmv->lmv_master_mdt_index = cpu_to_le32(index); + slave_lmv->lmv_hash_type = lmv->lmv_hash_type & + cpu_to_le32(LMV_HASH_TYPE_MASK); + slave_lmv->lmv_layout_version = cpu_to_le32(++version); + + rc = mdo_xattr_set(env, stripe, &slave_buf, + XATTR_NAME_LMV".set", 0, handle); + RETURN(rc); + } + + mdd_write_lock(env, stripe, MOR_SRC_CHILD); + rc = mdo_ref_del(env, stripe, handle); + if (!rc) + rc = mdo_destroy(env, stripe, handle); + mdd_write_unlock(env, stripe); + + RETURN(rc); +} + +static int mdd_shrink_stripe_is_empty(const struct lu_env *env, + struct mdd_object *obj, + struct mdd_object *stripe, + const struct lu_buf *lmv_buf, + const struct lu_buf *lmu_buf, + int index, + struct thandle *handle) +{ + struct lmv_user_md *lmu = lmu_buf->lb_buf; + __u32 shrink_offset = le32_to_cpu(lmu->lum_stripe_count); + + /* the default value is 0, but it means 1 */ + if (!shrink_offset) + shrink_offset = 1; + + if (index < shrink_offset) + return 0; + + return mdd_dir_is_empty(env, stripe); +} + /* * iterate stripes of striped directory on remote MDT, local striped directory * is accessed via LOD. @@ -3768,7 +3869,7 @@ static int mdd_declare_migrate_create(const struct lu_env *env, struct lu_buf lmu_buf = { NULL }; if (lmv) { - struct lmv_user_md *lmu = (typeof(lmu))info->mti_key; + struct lmv_user_md *lmu = &info->mti_lmv.lmv_user_md; lmu->lum_stripe_count = 0; lmu_buf.lb_buf = lmu; @@ -3776,7 +3877,7 @@ static int mdd_declare_migrate_create(const struct lu_env *env, } rc = mdd_dir_declare_layout_delete(env, sobj, sbuf, &lmu_buf, - handle); + handle); if (rc) return rc; @@ -3921,7 +4022,7 @@ static int mdd_migrate_create(const struct lu_env *env, if (sbuf->lb_buf) { struct mdd_thread_info *info = mdd_env_info(env); - struct lmv_user_md *lmu = (typeof(lmu))info->mti_key; + struct lmv_user_md *lmu = &info->mti_lmv.lmv_user_md; lmu->lum_stripe_count = 0; lmu_buf.lb_buf = lmu; @@ -4440,6 +4541,360 @@ out: return rc; } +static int __mdd_dir_declare_layout_shrink(const struct lu_env *env, + struct mdd_object *pobj, + struct mdd_object *obj, + struct mdd_object *stripe, + struct lu_attr *attr, + struct lu_buf *lmv_buf, + const struct lu_buf *lmu_buf, + struct lu_name *lname, + struct thandle *handle) +{ + struct mdd_thread_info *info = mdd_env_info(env); + struct lmv_mds_md_v1 *lmv = lmv_buf->lb_buf; + struct lmv_user_md *lmu = (typeof(lmu))info->mti_key; + struct lu_buf shrink_buf = { .lb_buf = lmu, + .lb_len = sizeof(*lmu) }; + int rc; + + LASSERT(lmv); + + memcpy(lmu, lmu_buf->lb_buf, sizeof(*lmu)); + + if (le32_to_cpu(lmu->lum_stripe_count) < 2) + lmu->lum_stripe_count = 0; + + rc = mdd_dir_declare_layout_delete(env, obj, lmv_buf, &shrink_buf, + handle); + if (rc) + return rc; + + if (lmu->lum_stripe_count == 0) { + lmu->lum_stripe_count = cpu_to_le32(1); + + rc = mdo_declare_xattr_del(env, obj, XATTR_NAME_LMV, handle); + if (rc) + return rc; + } + + rc = mdd_dir_iterate_stripes(env, obj, lmv_buf, &shrink_buf, handle, + mdd_dir_declare_destroy_stripe); + if (rc) + return rc; + + if (le32_to_cpu(lmu->lum_stripe_count) > 1) + return mdo_declare_xattr_set(env, obj, lmv_buf, + XATTR_NAME_LMV".set", 0, handle); + + rc = mdo_declare_index_insert(env, stripe, mdo2fid(pobj), S_IFDIR, + dotdot, handle); + if (rc) + return rc; + + rc = mdd_iterate_xattrs(env, obj, stripe, false, handle, + mdo_declare_xattr_set); + if (rc) + return rc; + + rc = mdo_declare_xattr_del(env, stripe, XATTR_NAME_LMV, handle); + if (rc) + return rc; + + rc = mdo_declare_attr_set(env, stripe, attr, handle); + if (rc) + return rc; + + rc = mdo_declare_index_delete(env, pobj, lname->ln_name, handle); + if (rc) + return rc; + + rc = mdo_declare_index_insert(env, pobj, mdo2fid(stripe), attr->la_mode, + lname->ln_name, handle); + if (rc) + return rc; + + rc = mdo_declare_ref_del(env, obj, handle); + if (rc) + return rc; + + rc = mdo_declare_ref_del(env, obj, handle); + if (rc) + return rc; + + rc = mdo_declare_destroy(env, obj, handle); + if (rc) + return rc; + + return rc; + +} + +/* + * after files under \a obj were migrated, shrink old stripes from \a obj, + * furthermore, if it becomes a 1-stripe directory, convert it to a normal one. + */ +static int __mdd_dir_layout_shrink(const struct lu_env *env, + struct mdd_object *pobj, + struct mdd_object *obj, + struct mdd_object *stripe, + struct lu_attr *attr, + struct lu_buf *lmv_buf, + const struct lu_buf *lmu_buf, + struct lu_name *lname, + struct thandle *handle) +{ + struct mdd_thread_info *info = mdd_env_info(env); + struct lmv_mds_md_v1 *lmv = lmv_buf->lb_buf; + struct lmv_user_md *lmu = (typeof(lmu))info->mti_key; + struct lu_buf shrink_buf = { .lb_buf = lmu, + .lb_len = sizeof(*lmu) }; + int len = lmv_buf->lb_len; + __u32 version = le32_to_cpu(lmv->lmv_layout_version); + int rc; + + ENTRY; + + /* lmu needs to be altered, but lmu_buf is const */ + memcpy(lmu, lmu_buf->lb_buf, sizeof(*lmu)); + + /* + * if dir will be shrunk to 1-stripe, delete all stripes, because it + * will be converted to normal dir. + */ + if (le32_to_cpu(lmu->lum_stripe_count) == 1) + lmu->lum_stripe_count = 0; + + /* delete stripes after lmu_stripe_count */ + rc = mdd_dir_layout_delete(env, obj, lmv_buf, &shrink_buf, handle); + if (rc) + RETURN(rc); + + if (lmu->lum_stripe_count == 0) { + lmu->lum_stripe_count = cpu_to_le32(1); + + /* delete LMV to avoid deleting stripes again upon destroy */ + mdd_write_lock(env, obj, MOR_SRC_CHILD); + rc = mdo_xattr_del(env, obj, XATTR_NAME_LMV, handle); + mdd_write_unlock(env, obj); + if (rc) + RETURN(rc); + } + + /* destroy stripes after lmu_stripe_count */ + mdd_write_lock(env, obj, MOR_SRC_PARENT); + rc = mdd_dir_iterate_stripes(env, obj, lmv_buf, &shrink_buf, handle, + mdd_dir_destroy_stripe); + mdd_write_unlock(env, obj); + + if (le32_to_cpu(lmu->lum_stripe_count) > 1) { + /* update dir LMV, that's all if it's still striped. */ + lmv->lmv_stripe_count = lmu->lum_stripe_count; + lmv->lmv_hash_type &= ~cpu_to_le32(LMV_HASH_FLAG_MIGRATION); + lmv->lmv_migrate_offset = 0; + lmv->lmv_migrate_hash = 0; + lmv->lmv_layout_version = cpu_to_le32(++version); + + lmv_buf->lb_len = sizeof(*lmv); + rc = mdo_xattr_set(env, obj, lmv_buf, XATTR_NAME_LMV".set", 0, + handle); + lmv_buf->lb_len = len; + RETURN(rc); + } + + /* replace directory with its remaining stripe */ + LASSERT(pobj); + LASSERT(stripe); + + mdd_write_lock(env, pobj, MOR_SRC_PARENT); + mdd_write_lock(env, obj, MOR_SRC_CHILD); + + /* insert dotdot to stripe which points to parent */ + rc = __mdd_index_insert_only(env, stripe, mdo2fid(pobj), S_IFDIR, + dotdot, handle); + if (rc) + GOTO(out, rc); + + /* copy xattrs including linkea */ + rc = mdd_iterate_xattrs(env, obj, stripe, false, handle, mdo_xattr_set); + if (rc) + GOTO(out, rc); + + /* delete LMV */ + rc = mdo_xattr_del(env, stripe, XATTR_NAME_LMV, handle); + if (rc) + GOTO(out, rc); + + /* don't set nlink from parent */ + attr->la_valid &= ~LA_NLINK; + + rc = mdo_attr_set(env, stripe, attr, handle); + if (rc) + GOTO(out, rc); + + /* delete dir name from parent */ + rc = __mdd_index_delete_only(env, pobj, lname->ln_name, handle); + if (rc) + GOTO(out, rc); + + /* insert stripe to parent with dir name */ + rc = __mdd_index_insert_only(env, pobj, mdo2fid(stripe), attr->la_mode, + lname->ln_name, handle); + if (rc) + GOTO(out, rc); + + /* destroy dir obj */ + rc = mdo_ref_del(env, obj, handle); + if (rc) + GOTO(out, rc); + + rc = mdo_ref_del(env, obj, handle); + if (rc) + GOTO(out, rc); + + rc = mdo_destroy(env, obj, handle); + if (rc) + GOTO(out, rc); + + EXIT; +out: + mdd_write_unlock(env, obj); + mdd_write_unlock(env, pobj); + + return rc; +} + +/* + * shrink directory stripes to lum_stripe_count specified by lum_mds_md. + */ +int mdd_dir_layout_shrink(const struct lu_env *env, + struct md_object *md_obj, + const struct lu_buf *lmu_buf) +{ + struct mdd_device *mdd = mdo2mdd(md_obj); + struct mdd_thread_info *info = mdd_env_info(env); + struct mdd_object *obj = md2mdd_obj(md_obj); + struct mdd_object *pobj = NULL; + struct mdd_object *stripe = NULL; + struct lu_attr *attr = &info->mti_pattr; + struct lu_fid *fid = &info->mti_fid2; + struct lu_name lname = { NULL }; + struct lu_buf lmv_buf = { NULL }; + struct lmv_mds_md_v1 *lmv; + struct lmv_user_md *lmu; + struct thandle *handle; + int rc; + + ENTRY; + + rc = mdd_la_get(env, obj, attr); + if (rc) + RETURN(rc); + + if (!S_ISDIR(attr->la_mode)) + RETURN(-ENOTDIR); + + rc = mdd_stripe_get(env, obj, &lmv_buf, XATTR_NAME_LMV); + if (rc < 0) + RETURN(rc); + + lmv = lmv_buf.lb_buf; + lmu = lmu_buf->lb_buf; + + /* this was checked in MDT */ + LASSERT(le32_to_cpu(lmu->lum_stripe_count) < + le32_to_cpu(lmv->lmv_stripe_count)); + + rc = mdd_dir_iterate_stripes(env, obj, &lmv_buf, lmu_buf, NULL, + mdd_shrink_stripe_is_empty); + if (rc < 0) + GOTO(out, rc); + else if (rc != 0) + GOTO(out, rc = -ENOTEMPTY); + + /* + * if obj stripe count will be shrunk to 1, we need to convert it to a + * normal dir, which will change its fid and update parent namespace, + * get obj name and parent fid from linkea. + */ + if (le32_to_cpu(lmu->lum_stripe_count) < 2) { + struct linkea_data *ldata = &info->mti_link_data; + char *filename = info->mti_name; + + rc = mdd_links_read(env, obj, ldata); + if (rc) + GOTO(out, rc); + + if (ldata->ld_leh->leh_reccount > 1) + GOTO(out, rc = -EINVAL); + + linkea_first_entry(ldata); + if (!ldata->ld_lee) + GOTO(out, rc = -ENODATA); + + linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen, &lname, + fid); + + /* Note: lname might miss \0 at the end */ + snprintf(filename, sizeof(info->mti_name), "%.*s", + lname.ln_namelen, lname.ln_name); + lname.ln_name = filename; + + pobj = mdd_object_find(env, mdd, fid); + if (IS_ERR(pobj)) { + rc = PTR_ERR(pobj); + pobj = NULL; + GOTO(out, rc); + } + + fid_le_to_cpu(fid, &lmv->lmv_stripe_fids[0]); + + stripe = mdd_object_find(env, mdd, fid); + if (IS_ERR(stripe)) { + mdd_object_put(env, pobj); + pobj = NULL; + GOTO(out, rc = PTR_ERR(stripe)); + } + } + + handle = mdd_trans_create(env, mdd); + if (IS_ERR(handle)) + GOTO(out, rc = PTR_ERR(handle)); + + rc = __mdd_dir_declare_layout_shrink(env, pobj, obj, stripe, attr, + &lmv_buf, lmu_buf, &lname, handle); + if (rc) + GOTO(stop_trans, rc); + + rc = mdd_declare_changelog_store(env, mdd, CL_LAYOUT, NULL, NULL, + handle); + if (rc) + GOTO(stop_trans, rc); + + rc = mdd_trans_start(env, mdd, handle); + if (rc) + GOTO(stop_trans, rc); + + rc = __mdd_dir_layout_shrink(env, pobj, obj, stripe, attr, &lmv_buf, + lmu_buf, &lname, handle); + if (rc) + GOTO(stop_trans, rc); + + rc = mdd_changelog_data_store_xattr(env, mdd, CL_LAYOUT, 0, obj, + XATTR_NAME_LMV, handle); + GOTO(stop_trans, rc); + +stop_trans: + rc = mdd_trans_stop(env, mdd, rc, handle); +out: + if (pobj) { + mdd_object_put(env, stripe); + mdd_object_put(env, pobj); + } + lu_buf_free(&lmv_buf); + return rc; +} + const struct md_dir_operations mdd_dir_ops = { .mdo_is_subdir = mdd_is_subdir, .mdo_lookup = mdd_lookup, diff --git a/lustre/mdd/mdd_internal.h b/lustre/mdd/mdd_internal.h index 5ecce16..7d50e68 100644 --- a/lustre/mdd/mdd_internal.h +++ b/lustre/mdd/mdd_internal.h @@ -204,6 +204,7 @@ struct mdd_thread_info { struct dt_insert_rec mti_dt_rec; struct lfsck_req_local mti_lrl; struct lu_seq_range mti_range; + union lmv_mds_md mti_lmv; }; int mdd_la_get(const struct lu_env *env, struct mdd_object *obj, @@ -270,6 +271,9 @@ int mdd_links_rename(const struct lu_env *env, struct thandle *handle, struct linkea_data *ldata, int first, int check); +int mdd_dir_layout_shrink(const struct lu_env *env, + struct md_object *md_obj, + const struct lu_buf *lmu_buf); struct mdd_thread_info *mdd_env_info(const struct lu_env *env); @@ -351,6 +355,12 @@ int mdd_declare_create_object_internal(const struct lu_env *env, struct dt_allocation_hint *hint); int mdd_stripe_get(const struct lu_env *env, struct mdd_object *obj, struct lu_buf *lmm_buf, const char *name); +int mdd_changelog_data_store_xattr(const struct lu_env *env, + struct mdd_device *mdd, + enum changelog_rec_type type, + int flags, struct mdd_object *mdd_obj, + const char *xattr_name, + struct thandle *handle); /* mdd_trans.c */ void mdd_object_make_hint(const struct lu_env *env, struct mdd_object *parent, diff --git a/lustre/mdd/mdd_object.c b/lustre/mdd/mdd_object.c index a623314..e2c39af 100644 --- a/lustre/mdd/mdd_object.c +++ b/lustre/mdd/mdd_object.c @@ -942,12 +942,12 @@ int mdd_changelog_data_store(const struct lu_env *env, struct mdd_device *mdd, RETURN(rc); } -static int mdd_changelog_data_store_xattr(const struct lu_env *env, - struct mdd_device *mdd, - enum changelog_rec_type type, - int flags, struct mdd_object *mdd_obj, - const char *xattr_name, - struct thandle *handle) +int mdd_changelog_data_store_xattr(const struct lu_env *env, + struct mdd_device *mdd, + enum changelog_rec_type type, + int flags, struct mdd_object *mdd_obj, + const char *xattr_name, + struct thandle *handle) { int rc; @@ -1849,6 +1849,11 @@ static int mdd_xattr_set(const struct lu_env *env, struct md_object *obj, RETURN(rc); } + if (strcmp(name, XATTR_NAME_LMV) == 0) { + rc = mdd_dir_layout_shrink(env, obj, buf); + RETURN(rc); + } + if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0 || strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) { struct posix_acl *acl; diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index b1068af..ddd73f7 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -760,6 +760,16 @@ int mdt_remote_object_lock(struct mdt_thread_info *mti, struct mdt_object *o, const struct lu_fid *fid, struct lustre_handle *lh, enum ldlm_mode mode, __u64 ibits, bool cache); +int mdt_reint_striped_lock(struct mdt_thread_info *info, + struct mdt_object *o, + struct mdt_lock_handle *lh, + __u64 ibits, + struct ldlm_enqueue_info *einfo, + bool cos_incompat); +void mdt_reint_striped_unlock(struct mdt_thread_info *info, + struct mdt_object *o, + struct mdt_lock_handle *lh, + struct ldlm_enqueue_info *einfo, int decref); enum mdt_name_flags { MNF_FIX_ANON = 1, @@ -882,6 +892,7 @@ int mdt_links_read(struct mdt_thread_info *info, struct linkea_data *ldata); int mdt_close_internal(struct mdt_thread_info *info, struct ptlrpc_request *req, struct mdt_body *repbody); +int mdt_remote_permission(struct mdt_thread_info *info); static inline struct mdt_device *mdt_dev(struct lu_device *d) { diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index 821c525..cb77f69 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -244,7 +244,7 @@ static inline int mdt_remote_permission_check(struct mdt_thread_info *info) * retval = 0 remote operation is allowed. * < 0 remote operation is denied. */ -static int mdt_remote_permission(struct mdt_thread_info *info) +int mdt_remote_permission(struct mdt_thread_info *info) { struct md_op_spec *spec = &info->mti_spec; struct lu_attr *attr = &info->mti_attr.ma_attr; @@ -362,12 +362,12 @@ static int mdt_lock_slaves(struct mdt_thread_info *mti, struct mdt_object *obj, policy); } -static inline int mdt_reint_striped_lock(struct mdt_thread_info *info, - struct mdt_object *o, - struct mdt_lock_handle *lh, - __u64 ibits, - struct ldlm_enqueue_info *einfo, - bool cos_incompat) +int mdt_reint_striped_lock(struct mdt_thread_info *info, + struct mdt_object *o, + struct mdt_lock_handle *lh, + __u64 ibits, + struct ldlm_enqueue_info *einfo, + bool cos_incompat) { int rc; @@ -396,10 +396,10 @@ static inline int mdt_reint_striped_lock(struct mdt_thread_info *info, return rc; } -static inline void -mdt_reint_striped_unlock(struct mdt_thread_info *info, struct mdt_object *o, - struct mdt_lock_handle *lh, - struct ldlm_enqueue_info *einfo, int decref) +void mdt_reint_striped_unlock(struct mdt_thread_info *info, + struct mdt_object *o, + struct mdt_lock_handle *lh, + struct ldlm_enqueue_info *einfo, int decref) { if (einfo->ei_cbdata) mdt_unlock_slaves(info, o, einfo, decref); diff --git a/lustre/mdt/mdt_xattr.c b/lustre/mdt/mdt_xattr.c index 29271e9..846abe2 100644 --- a/lustre/mdt/mdt_xattr.c +++ b/lustre/mdt/mdt_xattr.c @@ -305,6 +305,160 @@ out: return rc; } +/* shrink dir layout after migration */ +static int mdt_dir_layout_shrink(struct mdt_thread_info *info) +{ + const struct lu_env *env = info->mti_env; + struct mdt_device *mdt = info->mti_mdt; + struct mdt_reint_record *rr = &info->mti_rr; + struct lmv_user_md *lmu = rr->rr_eadata; + __u32 lum_stripe_count = lmu->lum_stripe_count; + struct lu_buf *buf = &info->mti_buf; + struct lmv_mds_md_v1 *lmv; + struct md_attr *ma = &info->mti_attr; + struct ldlm_enqueue_info *einfo = &info->mti_einfo[0]; + struct mdt_object *pobj = NULL; + struct mdt_object *obj; + struct mdt_lock_handle *lhp = NULL; + struct mdt_lock_handle *lhc; + int rc; + + ENTRY; + + rc = mdt_remote_permission(info); + if (rc) + RETURN(rc); + + /* mti_big_lmm is used to save LMV, but it may be uninitialized. */ + if (unlikely(!info->mti_big_lmm)) { + info->mti_big_lmmsize = lmv_mds_md_size(64, LMV_MAGIC); + OBD_ALLOC(info->mti_big_lmm, info->mti_big_lmmsize); + if (!info->mti_big_lmm) + RETURN(-ENOMEM); + } + + obj = mdt_object_find(env, mdt, rr->rr_fid1); + if (IS_ERR(obj)) + RETURN(PTR_ERR(obj)); + +relock: + /* lock object */ + lhc = &info->mti_lh[MDT_LH_CHILD]; + mdt_lock_reg_init(lhc, LCK_EX); + rc = mdt_reint_striped_lock(info, obj, lhc, MDS_INODELOCK_FULL, einfo, + true); + if (rc) + GOTO(put_obj, rc); + + ma->ma_lmv = info->mti_big_lmm; + ma->ma_lmv_size = info->mti_big_lmmsize; + ma->ma_valid = 0; + rc = mdt_stripe_get(info, obj, ma, XATTR_NAME_LMV); + if (rc) + GOTO(unlock_obj, rc); + + /* user may run 'lfs migrate' multiple times, so it's shrunk already */ + if (!(ma->ma_valid & MA_LMV)) + GOTO(unlock_obj, rc = -EALREADY); + + lmv = &ma->ma_lmv->lmv_md_v1; + + /* ditto */ + if (!(le32_to_cpu(lmv->lmv_hash_type) & LMV_HASH_FLAG_MIGRATION)) + GOTO(unlock_obj, rc = -EALREADY); + + lum_stripe_count = lmu->lum_stripe_count; + if (!lum_stripe_count) + lum_stripe_count = cpu_to_le32(1); + + if (lmv->lmv_migrate_offset != lum_stripe_count) { + CERROR("%s: "DFID" migrate mdt count mismatch %u != %u\n", + mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1), + lmv->lmv_migrate_offset, lmu->lum_stripe_count); + GOTO(unlock_obj, rc = -EINVAL); + } + + if (lmv->lmv_master_mdt_index != lmu->lum_stripe_offset) { + CERROR("%s: "DFID" migrate mdt index mismatch %u != %u\n", + mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1), + lmv->lmv_master_mdt_index, lmu->lum_stripe_offset); + GOTO(unlock_obj, rc = -EINVAL); + } + + if (lum_stripe_count > 1 && + (lmv->lmv_hash_type & cpu_to_le32(LMV_HASH_TYPE_MASK)) != + lmu->lum_hash_type) { + CERROR("%s: "DFID" migrate mdt hash mismatch %u != %u\n", + mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1), + lmv->lmv_hash_type, lmu->lum_hash_type); + GOTO(unlock_obj, rc = -EINVAL); + } + + if (le32_to_cpu(lmu->lum_stripe_count) < 2 && !pobj) { + /* + * lock parent because dir will be shrunk to be 1 stripe, which + * should be converted to normal directory, but that will + * change dir fid and update namespace of parent. + */ + lhp = &info->mti_lh[MDT_LH_PARENT]; + mdt_lock_reg_init(lhp, LCK_PW); + + /* get parent from PFID */ + ma->ma_need |= MA_PFID; + ma->ma_valid = 0; + rc = mdt_attr_get_complex(info, obj, ma); + if (rc) + GOTO(unlock_obj, rc); + + if (!(ma->ma_valid & MA_PFID)) + GOTO(unlock_obj, rc = -ENOTSUPP); + + pobj = mdt_object_find(env, mdt, &ma->ma_pfid); + if (IS_ERR(pobj)) { + rc = PTR_ERR(pobj); + pobj = NULL; + GOTO(unlock_obj, rc); + } + + mdt_reint_striped_unlock(info, obj, lhc, einfo, 1); + + if (mdt_object_remote(pobj)) { + rc = mdt_remote_object_lock(info, pobj, rr->rr_fid1, + &lhp->mlh_rreg_lh, LCK_EX, + MDS_INODELOCK_LOOKUP, + false); + if (rc != ELDLM_OK) { + mdt_object_put(env, pobj); + GOTO(put_obj, rc); + } + mdt_object_unlock(info, NULL, lhp, 1); + } + + rc = mdt_reint_object_lock(info, pobj, lhp, + MDS_INODELOCK_UPDATE, true); + if (rc) { + mdt_object_put(env, pobj); + GOTO(put_obj, rc); + } + + goto relock; + } + + buf->lb_buf = rr->rr_eadata; + buf->lb_len = rr->rr_eadatalen; + rc = mo_xattr_set(env, mdt_object_child(obj), buf, XATTR_NAME_LMV, 0); + GOTO(unlock_obj, rc); + +unlock_obj: + mdt_reint_striped_unlock(info, obj, lhc, einfo, rc); + if (pobj) + mdt_object_unlock_put(info, pobj, lhp, rc); +put_obj: + mdt_object_put(env, obj); + + return rc; +} + int mdt_reint_setxattr(struct mdt_thread_info *info, struct mdt_lock_handle *unused) { @@ -344,6 +498,17 @@ int mdt_reint_setxattr(struct mdt_thread_info *info, } else if (strncmp(xattr_name, XATTR_TRUSTED_PREFIX, sizeof(XATTR_TRUSTED_PREFIX) - 1) == 0) { + /* setxattr(LMV) with lum is used to shrink dir layout */ + if (strcmp(xattr_name, XATTR_NAME_LMV) == 0) { + __u32 *magic = rr->rr_eadata; + + if (le32_to_cpu(*magic) == LMV_USER_MAGIC || + le32_to_cpu(*magic) == LMV_USER_MAGIC_SPECIFIC) { + rc = mdt_dir_layout_shrink(info); + GOTO(out, rc); + } + } + if (!md_capable(mdt_ucred(info), CFS_CAP_SYS_ADMIN)) GOTO(out, rc = -EPERM); diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index a9cb7f1..f37dca0 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -4476,9 +4476,41 @@ out: return ret; } +/* dir migration finished, shrink its stripes */ +static int cb_migrate_mdt_fini(char *path, DIR *parent, DIR **dirp, void *data, + struct dirent64 *de) +{ + struct find_param *param = data; + struct lmv_user_md *lmu = param->fp_lmv_md; + int lmulen = lmv_user_md_size(lmu->lum_stripe_count, lmu->lum_magic); + int ret = 0; + + if (de && de->d_type != DT_DIR) + goto out; + + if (*dirp) { + /* + * close it before setxattr because the latter may destroy the + * original object, and cause close fail. + */ + ret = closedir(*dirp); + *dirp = NULL; + if (ret) + goto out; + } + + ret = setxattr(path, XATTR_NAME_LMV, lmu, lmulen, 0); + if (ret == -EALREADY) + ret = 0; +out: + cb_common_fini(path, parent, dirp, data, de); + return ret; +} + int llapi_migrate_mdt(char *path, struct find_param *param) { - return param_callback(path, cb_migrate_mdt_init, cb_common_fini, param); + return param_callback(path, cb_migrate_mdt_init, cb_migrate_mdt_fini, + param); } int llapi_mv(char *path, struct find_param *param) -- 1.8.3.1