From 55ca00c3d1cd8635258ccbda27ee3f0f9b2966a8 Mon Sep 17 00:00:00 2001 From: Lai Siyao Date: Thu, 18 Apr 2019 18:01:47 +0800 Subject: [PATCH] LU-11213 ptlrpc: intent_getattr fetches default LMV Intent_getattr fetches default LMV, and caches it on client, which will be used in subdir creation. * Add RMF_DEFAULT_MDT_MD in intent_getattr reply. * Save default LMV in ll_inode_info->lli_default_lsm_md, and replace lli_def_stripe_offset with it. * take LOOKUP lock on default LMV setting to let client update cached default LMV. * improve mdt_object_striped() to read from bottom device to avoid reading stripe FIDs. Signed-off-by: Lai Siyao Change-Id: Idb369db2c514a9c5108390f70d9284b3a87d26db Reviewed-on: https://review.whamcloud.com/34802 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Hongchao Zhang Reviewed-by: Oleg Drokin --- lustre/include/lustre_lmv.h | 13 +++++-- lustre/include/lustre_req_layout.h | 1 + lustre/include/md_object.h | 2 + lustre/include/obd.h | 14 +++++-- lustre/llite/llite_internal.h | 19 ++-------- lustre/llite/llite_lib.c | 75 ++++++++++++++++++++++++++++++++++---- lustre/llite/namei.c | 60 +++++++++++++++++++++--------- lustre/lmv/lmv_obd.c | 42 ++++++++++++++++----- lustre/lod/lod_object.c | 7 ++++ lustre/mdc/mdc_locks.c | 21 ++++++----- lustre/mdc/mdc_request.c | 44 ++++++++++++++++------ lustre/mdt/mdt_handler.c | 46 ++++++++++++++++++----- lustre/mdt/mdt_reint.c | 61 +++++++++++++++++++++++-------- lustre/ptlrpc/layout.c | 8 +++- 14 files changed, 312 insertions(+), 101 deletions(-) diff --git a/lustre/include/lustre_lmv.h b/lustre/include/lustre_lmv.h index 3410b92..e683e6c 100644 --- a/lustre/include/lustre_lmv.h +++ b/lustre/include/lustre_lmv.h @@ -74,10 +74,12 @@ lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2) lsm2->lsm_md_pool_name) != 0) return false; - for (idx = 0; idx < lsm1->lsm_md_stripe_count; idx++) { - if (!lu_fid_eq(&lsm1->lsm_md_oinfo[idx].lmo_fid, - &lsm2->lsm_md_oinfo[idx].lmo_fid)) - return false; + if (lsm1->lsm_md_magic == LMV_MAGIC_V1) { + for (idx = 0; idx < lsm1->lsm_md_stripe_count; idx++) { + if (!lu_fid_eq(&lsm1->lsm_md_oinfo[idx].lmo_fid, + &lsm2->lsm_md_oinfo[idx].lmo_fid)) + return false; + } } return true; @@ -94,6 +96,9 @@ static inline void lsm_md_dump(int mask, const struct lmv_stripe_md *lsm) lsm->lsm_md_layout_version, lsm->lsm_md_migrate_offset, lsm->lsm_md_migrate_hash, lsm->lsm_md_pool_name); + if (lsm->lsm_md_magic != LMV_MAGIC_V1) + return; + for (i = 0; i < lsm->lsm_md_stripe_count; i++) CDEBUG(mask, "stripe[%d] "DFID"\n", i, PFID(&lsm->lsm_md_oinfo[i].lmo_fid)); diff --git a/lustre/include/lustre_req_layout.h b/lustre/include/lustre_req_layout.h index 5dbfab9..dd75309 100644 --- a/lustre/include/lustre_req_layout.h +++ b/lustre/include/lustre_req_layout.h @@ -269,6 +269,7 @@ extern struct req_msg_field RMF_DLM_GL_DESC; extern struct req_msg_field RMF_LDLM_INTENT; extern struct req_msg_field RMF_LAYOUT_INTENT; extern struct req_msg_field RMF_MDT_MD; +extern struct req_msg_field RMF_DEFAULT_MDT_MD; extern struct req_msg_field RMF_REC_REINT; extern struct req_msg_field RMF_EADATA; extern struct req_msg_field RMF_EAVALS; diff --git a/lustre/include/md_object.h b/lustre/include/md_object.h index fb8c67f..d245dd1 100644 --- a/lustre/include/md_object.h +++ b/lustre/include/md_object.h @@ -130,9 +130,11 @@ struct md_attr { struct md_som ma_som; struct lov_mds_md *ma_lmm; union lmv_mds_md *ma_lmv; + struct lmv_user_md *ma_default_lmv; void *ma_acl; int ma_lmm_size; int ma_lmv_size; + int ma_default_lmv_size; int ma_acl_size; }; diff --git a/lustre/include/obd.h b/lustre/include/obd.h index b25c9d4..fdb2ecd 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -838,6 +838,14 @@ enum md_cli_flags { CLI_MIGRATE = 1 << 4, }; +enum md_op_code { + LUSTRE_OPC_MKDIR = 0, + LUSTRE_OPC_SYMLINK = 1, + LUSTRE_OPC_MKNOD = 2, + LUSTRE_OPC_CREATE = 3, + LUSTRE_OPC_ANY = 5, +}; + /** * GETXATTR is not included as only a couple of fields in the reply body * is filled, but not FID which is needed for common intent handling in @@ -855,6 +863,7 @@ struct md_op_data { struct lu_fid op_fid4; /* to the operation locks. */ u32 op_mds; /* what mds server open will go to */ __u32 op_mode; + enum md_op_code op_code; struct lustre_handle op_open_handle; s64 op_mod_time; const char *op_name; @@ -863,6 +872,7 @@ struct md_op_data { struct rw_semaphore *op_mea2_sem; struct lmv_stripe_md *op_mea1; struct lmv_stripe_md *op_mea2; + struct lmv_stripe_md *op_default_mea1; /* default LMV */ __u32 op_suppgids[2]; __u32 op_fsuid; __u32 op_fsgid; @@ -896,9 +906,6 @@ struct md_op_data { void *op_file_secctx; __u32 op_file_secctx_size; - /* default stripe offset */ - __u32 op_default_stripe_offset; - __u32 op_projid; /* Used by readdir */ @@ -1041,6 +1048,7 @@ struct lustre_md { struct lmv_stripe_md *lmv; struct lmv_foreign_md *lfm; }; + struct lmv_stripe_md *default_lmv; #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *posix_acl; #endif diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 1315c5f..e824afb 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -169,12 +169,8 @@ struct ll_inode_info { struct rw_semaphore lli_lsm_sem; /* directory stripe information */ struct lmv_stripe_md *lli_lsm_md; - /* default directory stripe offset. This is extracted - * from the "dmv" xattr in order to decide which MDT to - * create a subdirectory on. The MDS itself fetches - * "dmv" and gets the rest of the default layout itself - * (count, hash, etc). */ - __u32 lli_def_stripe_offset; + /* directory default LMV */ + struct lmv_stripe_md *lli_default_lsm_md; }; /* for non-directory */ @@ -984,19 +980,12 @@ int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize); int ll_get_default_mdsize(struct ll_sb_info *sbi, int *default_mdsize); int ll_set_default_mdsize(struct ll_sb_info *sbi, int default_mdsize); -enum { - LUSTRE_OPC_MKDIR = 0, - LUSTRE_OPC_SYMLINK = 1, - LUSTRE_OPC_MKNOD = 2, - LUSTRE_OPC_CREATE = 3, - LUSTRE_OPC_ANY = 5, -}; - void ll_unlock_md_op_lsm(struct md_op_data *op_data); struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data, struct inode *i1, struct inode *i2, const char *name, size_t namelen, - __u32 mode, __u32 opc, void *data); + __u32 mode, enum md_op_code opc, + void *data); void ll_finish_md_op_data(struct md_op_data *op_data); int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg); void ll_compute_rootsquash_state(struct ll_sb_info *sbi); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 91eb088..8e6512b 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -965,7 +965,6 @@ void ll_lli_init(struct ll_inode_info *lli) spin_lock_init(&lli->lli_sa_lock); lli->lli_opendir_pid = 0; lli->lli_sa_enabled = 0; - lli->lli_def_stripe_offset = -1; init_rwsem(&lli->lli_lsm_sem); } else { mutex_init(&lli->lli_size_mutex); @@ -1291,10 +1290,15 @@ void ll_dir_clear_lsm_md(struct inode *inode) LASSERT(S_ISDIR(inode->i_mode)); - if (lli->lli_lsm_md != NULL) { + if (lli->lli_lsm_md) { lmv_free_memmd(lli->lli_lsm_md); lli->lli_lsm_md = NULL; } + + if (lli->lli_default_lsm_md) { + lmv_free_memmd(lli->lli_default_lsm_md); + lli->lli_default_lsm_md = NULL; + } } static struct inode *ll_iget_anon_dir(struct super_block *sb, @@ -1396,6 +1400,46 @@ static int ll_init_lsm_md(struct inode *inode, struct lustre_md *md) return 0; } +static void ll_update_default_lsm_md(struct inode *inode, struct lustre_md *md) +{ + struct ll_inode_info *lli = ll_i2info(inode); + + if (!md->default_lmv) { + /* clear default lsm */ + if (lli->lli_default_lsm_md) { + down_write(&lli->lli_lsm_sem); + if (lli->lli_default_lsm_md) { + lmv_free_memmd(lli->lli_default_lsm_md); + lli->lli_default_lsm_md = NULL; + } + up_write(&lli->lli_lsm_sem); + } + } else if (lli->lli_default_lsm_md) { + /* update default lsm if it changes */ + down_read(&lli->lli_lsm_sem); + if (lli->lli_default_lsm_md && + !lsm_md_eq(lli->lli_default_lsm_md, md->default_lmv)) { + up_read(&lli->lli_lsm_sem); + down_write(&lli->lli_lsm_sem); + if (lli->lli_default_lsm_md) + lmv_free_memmd(lli->lli_default_lsm_md); + lli->lli_default_lsm_md = md->default_lmv; + lsm_md_dump(D_INODE, md->default_lmv); + md->default_lmv = NULL; + up_write(&lli->lli_lsm_sem); + } else { + up_read(&lli->lli_lsm_sem); + } + } else { + /* init default lsm */ + down_write(&lli->lli_lsm_sem); + lli->lli_default_lsm_md = md->default_lmv; + lsm_md_dump(D_INODE, md->default_lmv); + md->default_lmv = NULL; + up_write(&lli->lli_lsm_sem); + } +} + static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md) { struct ll_inode_info *lli = ll_i2info(inode); @@ -1408,6 +1452,10 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md) CDEBUG(D_INODE, "update lsm %p of "DFID"\n", lli->lli_lsm_md, PFID(ll_inode2fid(inode))); + /* update default LMV */ + if (md->default_lmv) + ll_update_default_lsm_md(inode, md); + /* * no striped information from request, lustre_md from req does not * include stripeEA, see ll_md_setattr() @@ -2422,7 +2470,9 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, { struct ll_sb_info *sbi = NULL; struct lustre_md md = { NULL }; + bool default_lmv_deleted = false; int rc; + ENTRY; LASSERT(*inode || sb); @@ -2432,6 +2482,15 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, if (rc != 0) GOTO(out, rc); + /* + * clear default_lmv only if intent_getattr reply doesn't contain it. + * but it needs to be done after iget, check this early because + * ll_update_lsm_md() may change md. + */ + if (it && (it->it_op & (IT_LOOKUP | IT_GETATTR)) && + S_ISDIR(md.body->mbo_mode) && !md.default_lmv) + default_lmv_deleted = true; + if (*inode) { rc = ll_update_inode(*inode, &md); if (rc != 0) @@ -2495,6 +2554,9 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, LDLM_LOCK_PUT(lock); } + if (default_lmv_deleted) + ll_update_default_lsm_md(*inode, &md); + GOTO(out, rc = 0); out: @@ -2571,7 +2633,8 @@ void ll_unlock_md_op_lsm(struct md_op_data *op_data) struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data, struct inode *i1, struct inode *i2, const char *name, size_t namelen, - __u32 mode, __u32 opc, void *data) + __u32 mode, enum md_op_code opc, + void *data) { LASSERT(i1 != NULL); @@ -2595,15 +2658,13 @@ struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data, ll_i2gids(op_data->op_suppgids, i1, i2); op_data->op_fid1 = *ll_inode2fid(i1); - op_data->op_default_stripe_offset = -1; + op_data->op_code = opc; if (S_ISDIR(i1->i_mode)) { down_read(&ll_i2info(i1)->lli_lsm_sem); op_data->op_mea1_sem = &ll_i2info(i1)->lli_lsm_sem; op_data->op_mea1 = ll_i2info(i1)->lli_lsm_md; - if (opc == LUSTRE_OPC_MKDIR) - op_data->op_default_stripe_offset = - ll_i2info(i1)->lli_def_stripe_offset; + op_data->op_default_mea1 = ll_i2info(i1)->lli_default_lsm_md; } if (i2) { diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 80bfd7b..27e7bff 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -256,8 +256,6 @@ void ll_lock_cancel_bits(struct ldlm_lock *lock, __u64 to_cancel) } if (bits & MDS_INODELOCK_XATTR) { - if (S_ISDIR(inode->i_mode)) - ll_i2info(inode)->lli_def_stripe_offset = -1; ll_xattr_cache_destroy(inode); bits &= ~MDS_INODELOCK_XATTR; } @@ -1271,13 +1269,11 @@ again: from_kuid(&init_user_ns, current_fsuid()), from_kgid(&init_user_ns, current_fsgid()), cfs_curproc_cap_pack(), rdev, &request); - if (err < 0 && err != -EREMOTE) - GOTO(err_exit, err); - - /* If the client doesn't know where to create a subdirectory (or - * in case of a race that sends the RPC to the wrong MDS), the - * MDS will return -EREMOTE and the client will fetch the layout - * of the directory, then create the directory on the right MDT. */ +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 14, 58, 0) + /* + * server < 2.12.58 doesn't pack default LMV in intent_getattr reply, + * fetch default LMV here. + */ if (unlikely(err == -EREMOTE)) { struct ll_inode_info *lli = ll_i2info(dir); struct lmv_user_md *lum; @@ -1289,24 +1285,54 @@ again: err2 = ll_dir_getstripe(dir, (void **)&lum, &lumsize, &request, OBD_MD_DEFAULT_MEA); + ll_finish_md_op_data(op_data); + op_data = NULL; if (err2 == 0) { - /* Update stripe_offset and retry */ - lli->lli_def_stripe_offset = lum->lum_stripe_offset; - } else if (err2 == -ENODATA && - lli->lli_def_stripe_offset != -1) { - /* If there are no default stripe EA on the MDT, but the + struct lustre_md md = { NULL }; + + md.body = req_capsule_server_get(&request->rq_pill, + &RMF_MDT_BODY); + if (!md.body) + GOTO(err_exit, err = -EPROTO); + + OBD_ALLOC_PTR(md.default_lmv); + if (!md.default_lmv) + GOTO(err_exit, err = -ENOMEM); + + md.default_lmv->lsm_md_magic = lum->lum_magic; + md.default_lmv->lsm_md_stripe_count = + lum->lum_stripe_count; + md.default_lmv->lsm_md_master_mdt_index = + lum->lum_stripe_offset; + md.default_lmv->lsm_md_hash_type = lum->lum_hash_type; + + err = ll_update_inode(dir, &md); + md_free_lustre_md(sbi->ll_md_exp, &md); + if (err) + GOTO(err_exit, err); + } else if (err2 == -ENODATA && lli->lli_default_lsm_md) { + /* + * If there are no default stripe EA on the MDT, but the * client has default stripe, then it probably means - * default stripe EA has just been deleted. */ - lli->lli_def_stripe_offset = -1; + * default stripe EA has just been deleted. + */ + down_write(&lli->lli_lsm_sem); + if (lli->lli_default_lsm_md) + OBD_FREE_PTR(lli->lli_default_lsm_md); + lli->lli_default_lsm_md = NULL; + up_write(&lli->lli_lsm_sem); } else { GOTO(err_exit, err); } ptlrpc_req_finished(request); request = NULL; - ll_finish_md_op_data(op_data); goto again; } +#endif + + if (err < 0) + GOTO(err_exit, err); ll_update_times(request, dir); diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index 8278969..5110b22 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -1187,13 +1187,12 @@ static int lmv_placement_policy(struct obd_device *obd, le32_to_cpu(lum->lum_magic != LMV_MAGIC_FOREIGN) && le32_to_cpu(lum->lum_stripe_offset) != (__u32)-1) { *mds = le32_to_cpu(lum->lum_stripe_offset); - } else if (op_data->op_default_stripe_offset != (__u32)-1) { - *mds = op_data->op_default_stripe_offset; + } else if (op_data->op_code == LUSTRE_OPC_MKDIR && + op_data->op_default_mea1 && + op_data->op_default_mea1->lsm_md_master_mdt_index != + (__u32)-1) { + *mds = op_data->op_default_mea1->lsm_md_master_mdt_index; op_data->op_mds = *mds; - /* Correct the stripe offset in lum */ - if (lum != NULL && - le32_to_cpu(lum->lum_magic != LMV_MAGIC_FOREIGN)) - lum->lum_stripe_offset = cpu_to_le32(*mds); } else { *mds = op_data->op_mds; } @@ -3029,6 +3028,18 @@ static int lmv_unpack_md_v1(struct obd_export *exp, struct lmv_stripe_md *lsm, RETURN(rc); } +static inline int lmv_unpack_user_md(struct obd_export *exp, + struct lmv_stripe_md *lsm, + const struct lmv_user_md *lmu) +{ + lsm->lsm_md_magic = le32_to_cpu(lmu->lum_magic); + lsm->lsm_md_stripe_count = le32_to_cpu(lmu->lum_stripe_count); + lsm->lsm_md_master_mdt_index = le32_to_cpu(lmu->lum_stripe_offset); + lsm->lsm_md_hash_type = le32_to_cpu(lmu->lum_hash_type); + + return 0; +} + static int lmv_unpackmd(struct obd_export *exp, struct lmv_stripe_md **lsmp, const union lmv_mds_md *lmm, size_t lmm_size) { @@ -3055,11 +3066,15 @@ static int lmv_unpackmd(struct obd_export *exp, struct lmv_stripe_md **lsmp, RETURN(0); } - for (i = 0; i < lsm->lsm_md_stripe_count; i++) { - if (lsm->lsm_md_oinfo[i].lmo_root) - iput(lsm->lsm_md_oinfo[i].lmo_root); + if (lsm->lsm_md_magic == LMV_MAGIC) { + for (i = 0; i < lsm->lsm_md_stripe_count; i++) { + if (lsm->lsm_md_oinfo[i].lmo_root) + iput(lsm->lsm_md_oinfo[i].lmo_root); + } + lsm_size = lmv_stripe_md_size(lsm->lsm_md_stripe_count); + } else { + lsm_size = lmv_stripe_md_size(0); } - lsm_size = lmv_stripe_md_size(lsm->lsm_md_stripe_count); OBD_FREE(lsm, lsm_size); *lsmp = NULL; RETURN(0); @@ -3117,6 +3132,9 @@ static int lmv_unpackmd(struct obd_export *exp, struct lmv_stripe_md **lsmp, case LMV_MAGIC_V1: rc = lmv_unpack_md_v1(exp, lsm, &lmm->lmv_md_v1); break; + case LMV_USER_MAGIC: + rc = lmv_unpack_user_md(exp, lsm, &lmm->lmv_user_md); + break; default: CERROR("%s: unrecognized magic %x\n", exp->exp_obd->obd_name, le32_to_cpu(lmm->lmv_magic)); @@ -3243,6 +3261,10 @@ int lmv_free_lustre_md(struct obd_export *exp, struct lustre_md *md) struct lmv_tgt_desc *tgt = lmv->tgts[0]; ENTRY; + if (md->default_lmv) { + lmv_free_memmd(md->default_lmv); + md->default_lmv = NULL; + } if (md->lmv != NULL) { lmv_free_memmd(md->lmv); md->lmv = NULL; diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index c781ed5..0304a05 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -4831,6 +4831,13 @@ static void lod_striping_from_default(struct lod_object *lo, if (lo->ldo_dir_hash_type == 0) lo->ldo_dir_hash_type = lds->lds_dir_def_hash_type; + /* + * "space" hash type can only be set in default LMV, child use + * FNV_1A_64 in case it's striped. + */ + if (lo->ldo_dir_hash_type == LMV_HASH_TYPE_SPACE) + lo->ldo_dir_hash_type = LMV_HASH_TYPE_FNV_1A_64; + CDEBUG(D_LAYOUT, "striping from default dir: count:%hu, " "offset:%u, hash_type:%u\n", lo->ldo_dir_stripe_count, lo->ldo_dir_stripe_offset, diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index f8dc200..c946fa2 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -508,15 +508,16 @@ static struct ptlrpc_request * mdc_intent_getattr_pack(struct obd_export *exp, struct lookup_intent *it, struct md_op_data *op_data, __u32 acl_bufsize) { - struct ptlrpc_request *req; - struct obd_device *obddev = class_exp2obd(exp); - u64 valid = OBD_MD_FLGETATTR | OBD_MD_FLEASIZE | - OBD_MD_FLMODEASIZE | OBD_MD_FLDIREA | - OBD_MD_MEA | OBD_MD_FLACL; - struct ldlm_intent *lit; - int rc; - __u32 easize; - bool have_secctx = false; + struct ptlrpc_request *req; + struct obd_device *obddev = class_exp2obd(exp); + u64 valid = OBD_MD_FLGETATTR | OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE | + OBD_MD_FLDIREA | OBD_MD_MEA | OBD_MD_FLACL | + OBD_MD_DEFAULT_MEA; + struct ldlm_intent *lit; + __u32 easize; + bool have_secctx = false; + int rc; + ENTRY; req = ptlrpc_request_alloc(class_exp2cliimp(exp), @@ -556,6 +557,8 @@ mdc_intent_getattr_pack(struct obd_export *exp, struct lookup_intent *it, req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, easize); req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, acl_bufsize); + req_capsule_set_size(&req->rq_pill, &RMF_DEFAULT_MDT_MD, RCL_SERVER, + sizeof(struct lmv_user_md)); if (have_secctx) { char *secctx_name; diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index bc5ac2f..291bb03 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -584,14 +584,14 @@ int mdc_get_lustre_md(struct obd_export *exp, struct ptlrpc_request *req, GOTO(out, rc = -EPROTO); } - lmv_size = md->body->mbo_eadatasize; - if (lmv_size == 0) { - CDEBUG(D_INFO, "OBD_MD_FLDIREA is set, " - "but eadatasize 0\n"); - RETURN(-EPROTO); - } - if (md->body->mbo_valid & OBD_MD_MEA) { + lmv_size = md->body->mbo_eadatasize; + if (lmv_size == 0) { + CDEBUG(D_INFO, "OBD_MD_FLDIREA is set, " + "but eadatasize 0\n"); + RETURN(-EPROTO); + } + lmv = req_capsule_server_sized_get(pill, &RMF_MDT_MD, lmv_size); if (lmv == NULL) @@ -601,21 +601,43 @@ int mdc_get_lustre_md(struct obd_export *exp, struct ptlrpc_request *req, if (rc < 0) GOTO(out, rc); - if (rc < (typeof(rc))sizeof(*md->lmv)) { - struct lmv_foreign_md *lfm = md->lfm; + if (rc < (int)sizeof(*md->lmv)) { + struct lmv_foreign_md *lfm = md->lfm; /* short (< sizeof(struct lmv_stripe_md)) * foreign LMV case */ if (lfm->lfm_magic != LMV_MAGIC_FOREIGN) { CDEBUG(D_INFO, - "size too small: rc < sizeof(*md->lmv) (%d < %d)\n", + "lmv size too small: %d < %d\n", rc, (int)sizeof(*md->lmv)); GOTO(out, rc = -EPROTO); } } } - } + + /* since 2.12.58 intent_getattr fetches default LMV */ + if (md->body->mbo_valid & OBD_MD_DEFAULT_MEA) { + lmv_size = sizeof(struct lmv_user_md); + lmv = req_capsule_server_sized_get(pill, + &RMF_DEFAULT_MDT_MD, + lmv_size); + if (!lmv) + GOTO(out, rc = -EPROTO); + + rc = md_unpackmd(md_exp, &md->default_lmv, lmv, + lmv_size); + if (rc < 0) + GOTO(out, rc); + + if (rc < (int)sizeof(*md->default_lmv)) { + CDEBUG(D_INFO, + "default lmv size too small: %d < %d\n", + rc, (int)sizeof(*md->default_lmv)); + GOTO(out, rc = -EPROTO); + } + } + } rc = 0; if (md->body->mbo_valid & OBD_MD_FLACL) { diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index e6855bf..3717336 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -933,8 +933,8 @@ int mdt_stripe_get(struct mdt_thread_info *info, struct mdt_object *o, buf->lb_len = ma->ma_lmv_size; LASSERT(!(ma->ma_valid & MA_LMV)); } else if (strcmp(name, XATTR_NAME_DEFAULT_LMV) == 0) { - buf->lb_buf = ma->ma_lmv; - buf->lb_len = ma->ma_lmv_size; + buf->lb_buf = ma->ma_default_lmv; + buf->lb_len = ma->ma_default_lmv_size; LASSERT(!(ma->ma_valid & MA_LMV_DEF)); } else { return -EINVAL; @@ -967,7 +967,7 @@ got: ma->ma_lmv_size = rc; ma->ma_valid |= MA_LMV; } else if (strcmp(name, XATTR_NAME_DEFAULT_LMV) == 0) { - ma->ma_lmv_size = rc; + ma->ma_default_lmv_size = rc; ma->ma_valid |= MA_LMV_DEF; } @@ -1188,18 +1188,41 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, req->rq_export->exp_client_uuid.uuid); } - /* If it is dir object and client require MEA, then we got MEA */ + /* from 2.12.58 intent_getattr pack default LMV in reply */ if (S_ISDIR(lu_object_attr(&next->mo_lu)) && - (reqbody->mbo_valid & (OBD_MD_MEA | OBD_MD_DEFAULT_MEA))) { + ((reqbody->mbo_valid & (OBD_MD_MEA | OBD_MD_DEFAULT_MEA)) == + (OBD_MD_MEA | OBD_MD_DEFAULT_MEA)) && + req_capsule_has_field(&req->rq_pill, &RMF_DEFAULT_MDT_MD, + RCL_SERVER)) { + ma->ma_lmv = buffer->lb_buf; + ma->ma_lmv_size = buffer->lb_len; + ma->ma_default_lmv = req_capsule_server_get(pill, + &RMF_DEFAULT_MDT_MD); + ma->ma_default_lmv_size = req_capsule_get_size(pill, + &RMF_DEFAULT_MDT_MD, + RCL_SERVER); + ma->ma_need = MA_INODE; + if (ma->ma_lmv_size > 0) + ma->ma_need |= MA_LMV; + if (ma->ma_default_lmv_size > 0) + ma->ma_need |= MA_LMV_DEF; + } else if (S_ISDIR(lu_object_attr(&next->mo_lu)) && + (reqbody->mbo_valid & (OBD_MD_MEA | OBD_MD_DEFAULT_MEA))) { + /* If it is dir and client require MEA, then we got MEA */ /* Assumption: MDT_MD size is enough for lmv size. */ ma->ma_lmv = buffer->lb_buf; ma->ma_lmv_size = buffer->lb_len; ma->ma_need = MA_INODE; if (ma->ma_lmv_size > 0) { - if (reqbody->mbo_valid & OBD_MD_MEA) + if (reqbody->mbo_valid & OBD_MD_MEA) { ma->ma_need |= MA_LMV; - else if (reqbody->mbo_valid & OBD_MD_DEFAULT_MEA) + } else if (reqbody->mbo_valid & OBD_MD_DEFAULT_MEA) { ma->ma_need |= MA_LMV_DEF; + ma->ma_default_lmv = buffer->lb_buf; + ma->ma_lmv = NULL; + ma->ma_default_lmv_size = buffer->lb_len; + ma->ma_lmv_size = 0; + } } } else { ma->ma_lmm = buffer->lb_buf; @@ -1271,8 +1294,13 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, if (!mdt_is_striped_client(req->rq_export)) RETURN(-ENOTSUPP); LASSERT(S_ISDIR(la->la_mode)); - mdt_dump_lmv(D_INFO, ma->ma_lmv); - repbody->mbo_eadatasize = ma->ma_lmv_size; + /* + * when ll_dir_getstripe() gets default LMV, it + * checks mbo_eadatasize. + */ + if (!(ma->ma_valid & MA_LMV)) + repbody->mbo_eadatasize = + ma->ma_default_lmv_size; repbody->mbo_valid |= (OBD_MD_FLDIREA | OBD_MD_DEFAULT_MEA); } diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index 2971d6e..97672f3 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -250,17 +250,25 @@ static int mdt_unlock_slaves(struct mdt_thread_info *mti, static inline int mdt_object_striped(struct mdt_thread_info *mti, struct mdt_object *obj) { + struct lu_device *bottom_dev; + struct lu_object *bottom_obj; int rc; if (!S_ISDIR(obj->mot_header.loh_attr)) return 0; - rc = mo_xattr_get(mti->mti_env, mdt_object_child(obj), &LU_BUF_NULL, + /* getxattr from bottom obj to avoid reading in shard FIDs */ + bottom_dev = dt2lu_dev(mti->mti_mdt->mdt_bottom); + bottom_obj = lu_object_find_slice(mti->mti_env, bottom_dev, + mdt_object_fid(obj), NULL); + if (IS_ERR(bottom_obj)) + return PTR_ERR(bottom_obj); + + rc = dt_xattr_get(mti->mti_env, lu2dt(bottom_obj), &LU_BUF_NULL, XATTR_NAME_LMV); - if (rc <= 0) - return rc == -ENODATA ? 0 : rc; + lu_object_put(mti->mti_env, bottom_obj); - return 1; + return (rc > 0) ? 1 : (rc == -ENODATA) ? 0 : rc; } /** @@ -728,6 +736,8 @@ static int mdt_reint_setattr(struct mdt_thread_info *info, struct lu_buf *buf = &info->mti_buf; struct lu_ucred *uc = mdt_ucred(info); struct mdt_lock_handle *lh; + const char *name; + __u64 lockpart = MDS_INODELOCK_XATTR; /* reject if either remote or striped dir is disabled */ if (ma->ma_valid & MA_LMV) { @@ -741,27 +751,48 @@ static int mdt_reint_setattr(struct mdt_thread_info *info, GOTO(out_put, rc = -EPERM); } + if (!S_ISDIR(lu_object_attr(&mo->mot_obj))) + GOTO(out_put, rc = -ENOTDIR); + if (ma->ma_attr.la_valid != 0) GOTO(out_put, rc = -EPROTO); - lh = &info->mti_lh[MDT_LH_PARENT]; - mdt_lock_reg_init(lh, LCK_PW); - - rc = mdt_object_lock(info, mo, lh, MDS_INODELOCK_XATTR); - if (rc != 0) - GOTO(out_put, rc); - if (ma->ma_valid & MA_LOV) { buf->lb_buf = ma->ma_lmm; buf->lb_len = ma->ma_lmm_size; + name = XATTR_NAME_LOV; } else { - buf->lb_buf = ma->ma_lmv; + struct lmv_user_md *lmu = &ma->ma_lmv->lmv_user_md; + + buf->lb_buf = lmu; buf->lb_len = ma->ma_lmv_size; + + if (le32_to_cpu(lmu->lum_hash_type) == + LMV_HASH_TYPE_SPACE) { + /* + * only allow setting "space" hash type for + * plain directory. + */ + rc = mdt_object_striped(info, mo); + if (rc) + GOTO(out_put, + rc = (rc == 1) ? -EPERM : rc); + } + + name = XATTR_NAME_DEFAULT_LMV; + /* force client to update dir default layout */ + lockpart |= MDS_INODELOCK_LOOKUP; } + + lh = &info->mti_lh[MDT_LH_PARENT]; + mdt_lock_reg_init(lh, LCK_PW); + + rc = mdt_object_lock(info, mo, lh, lockpart); + if (rc != 0) + GOTO(out_put, rc); + rc = mo_xattr_set(info->mti_env, mdt_object_child(mo), buf, - (ma->ma_valid & MA_LOV) ? - XATTR_NAME_LOV : XATTR_NAME_DEFAULT_LMV, - 0); + name, 0); mdt_object_unlock(info, mo, lh, rc); if (rc) diff --git a/lustre/ptlrpc/layout.c b/lustre/ptlrpc/layout.c index b578ee8..7bada20 100644 --- a/lustre/ptlrpc/layout.c +++ b/lustre/ptlrpc/layout.c @@ -465,7 +465,8 @@ static const struct req_msg_field *ldlm_intent_getattr_server[] = { &RMF_MDT_MD, &RMF_ACL, &RMF_CAPA1, - &RMF_FILE_SECCTX + &RMF_FILE_SECCTX, + &RMF_DEFAULT_MDT_MD }; static const struct req_msg_field *ldlm_intent_create_client[] = { @@ -1080,6 +1081,11 @@ struct req_msg_field RMF_MDT_MD = DEFINE_MSGF("mdt_md", RMF_F_NO_SIZE_CHECK, MIN_MD_SIZE, NULL, NULL); EXPORT_SYMBOL(RMF_MDT_MD); +struct req_msg_field RMF_DEFAULT_MDT_MD = + DEFINE_MSGF("default_mdt_md", RMF_F_NO_SIZE_CHECK, MIN_MD_SIZE, NULL, + NULL); +EXPORT_SYMBOL(RMF_DEFAULT_MDT_MD); + struct req_msg_field RMF_REC_REINT = DEFINE_MSGF("rec_reint", 0, sizeof(struct mdt_rec_reint), lustre_swab_mdt_rec_reint, NULL); -- 1.8.3.1