From e7d0868f3ca655baa02f054ac73fd8314c442e53 Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Sat, 12 Apr 2014 19:36:23 -0400 Subject: [PATCH] LU-4008 mdt: Shrink default LOVEA reply buffers Instead of allocating maximum possible LOVEA for intent requests, allocate a sane default to fit up to 100 stripes, should the size be bigger, the buffer reallocation code will kick in. Additionally, since quite awhile ago we no longer send unlink cookies or EA data to the clients from unlinks/renames, so we no longer need to allocate reply buffer for them in mdt_close() and other such places. Also clean up unused fields related to these buffers in md_attr structure. Lustre-commit: 00260466f9c84f21e6db55d5dd6e87e8a9906438 Lustre-change: http://review.whamcloud.com/9322 Change-Id: I06b9f5665ec2616cd9a5d483872369d710400d22 Signed-off-by: Oleg Drokin Signed-off-by: James Simmons Reviewed-on: http://review.whamcloud.com/9942 Tested-by: Jenkins Reviewed-by: Andreas Dilger Reviewed-by: Alex Zhuravlev Tested-by: Maloo Reviewed-by: Mike Pershin --- lustre/include/lustre/lustre_idl.h | 6 ++++ lustre/include/md_object.h | 2 -- lustre/mdt/mdt_handler.c | 69 +++++++++++++++++++++++++++++++++----- lustre/mdt/mdt_internal.h | 1 - lustre/mdt/mdt_lib.c | 26 +++++++------- lustre/mdt/mdt_open.c | 14 +++----- 6 files changed, 82 insertions(+), 36 deletions(-) diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index c433fca..9c38c41 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -1639,6 +1639,12 @@ static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi, #define MAX_MD_SIZE (sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data)) #define MIN_MD_SIZE (sizeof(struct lov_mds_md) + 1 * sizeof(struct lov_ost_data)) +/* This is the default MDT reply size allocated, should the striping be bigger, + * it will be reallocated in mdt_fix_reply. + * 100 stripes is a bit less than 2.5k of data */ +#define DEF_REP_MD_SIZE (sizeof(struct lov_mds_md) + \ + 100 * sizeof(struct lov_ost_data)) + #define XATTR_NAME_ACL_ACCESS "system.posix_acl_access" #define XATTR_NAME_ACL_DEFAULT "system.posix_acl_default" #define XATTR_USER_PREFIX "user." diff --git a/lustre/include/md_object.h b/lustre/include/md_object.h index 5f3f8f6..b28c91b 100644 --- a/lustre/include/md_object.h +++ b/lustre/include/md_object.h @@ -138,13 +138,11 @@ struct md_attr { struct lov_mds_md *ma_lmm; struct lmv_stripe_md *ma_lmv; void *ma_acl; - struct llog_cookie *ma_cookie; struct lustre_capa *ma_capa; struct md_som_data *ma_som; int ma_lmm_size; int ma_lmv_size; int ma_acl_size; - int ma_cookie_size; __u16 ma_layout_gen; }; diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index 9aae1eb..acdc067 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -471,6 +471,32 @@ void mdt_client_compatibility(struct mdt_thread_info *info) EXIT; } +int mdt_attr_get_eabuf_size(struct mdt_thread_info *info, struct mdt_object *o) +{ + const struct lu_env *env = info->mti_env; + int rc, rc2; + + rc = mo_xattr_get(env, mdt_object_child(o), &LU_BUF_NULL, + XATTR_NAME_LOV); + + if (rc == -ENODATA) + rc = 0; + + if (rc < 0) + goto out; + + /* Is it a directory? Let's check for the LMV as well */ + if (S_ISDIR(lu_object_attr(&mdt_object_child(o)->mo_lu))) { + rc2 = mo_xattr_get(env, mdt_object_child(o), &LU_BUF_NULL, + XATTR_NAME_LMV); + if ((rc2 < 0 && rc2 != -ENODATA) || (rc2 > rc)) + rc = rc2; + } + +out: + return rc; +} + static int mdt_big_xattr_get(struct mdt_thread_info *info, struct mdt_object *o, char *name) { @@ -723,13 +749,15 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, GOTO(out, rc = 0); } - buffer->lb_len = reqbody->eadatasize; - if (buffer->lb_len > 0) { + if (reqbody->eadatasize > 0) { buffer->lb_buf = req_capsule_server_get(pill, &RMF_MDT_MD); if (buffer->lb_buf == NULL) GOTO(out, rc = -EPROTO); + buffer->lb_len = req_capsule_get_size(pill, &RMF_MDT_MD, + RCL_SERVER); } else { buffer->lb_buf = NULL; + buffer->lb_len = 0; ma_need &= ~(MA_LOV | MA_LMV); CDEBUG(D_INFO, "%s: RPC from %s: does not need LOVEA.\n", mdt_obd_name(info->mti_mdt), @@ -998,11 +1026,34 @@ int mdt_getattr(struct mdt_thread_info *info) mode = lu_object_attr(&obj->mot_obj); - /* old clients may not report needed easize, use max value then */ - req_capsule_set_size(pill, &RMF_MDT_MD, RCL_SERVER, - reqbody->eadatasize == 0 ? - info->mti_mdt->mdt_max_mdsize : - reqbody->eadatasize); + /* Readlink */ + if (reqbody->valid & OBD_MD_LINKNAME) { + /* No easy way to know how long is the symlink, but it cannot + * be more than PATH_MAX, so we allocate +1 */ + rc = PATH_MAX + 1; + + /* A special case for fs ROOT: getattr there might fetch + * default EA for entire fs, not just for this dir! + */ + } else if (lu_fid_eq(mdt_object_fid(obj), + &info->mti_mdt->mdt_md_root_fid) && + (reqbody->valid & OBD_MD_FLDIREA) && + (lustre_msg_get_opc(mdt_info_req(info)->rq_reqmsg) == + MDS_GETATTR)) { + /* Should the default strping be bigger, mdt_fix_reply + * will reallocate */ + rc = DEF_REP_MD_SIZE; + } else { + /* Hopefully no race in EA change for either file or directory? + */ + rc = mdt_attr_get_eabuf_size(info, obj); + } + + if (rc < 0) + GOTO(out_shrink, rc); + + /* old clients may not report needed easize, use max value then */ + req_capsule_set_size(pill, &RMF_MDT_MD, RCL_SERVER, rc); rc = req_capsule_server_pack(pill); if (unlikely(rc != 0)) @@ -1792,7 +1843,7 @@ static int mdt_reint_internal(struct mdt_thread_info *info, /* for replay (no_create) lmm is not needed, client has it already */ if (req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER)) req_capsule_set_size(pill, &RMF_MDT_MD, RCL_SERVER, - info->mti_rr.rr_eadatalen); + DEF_REP_MD_SIZE); /* llog cookies are always 0, the field is kept for compatibility */ if (req_capsule_has_field(pill, &RMF_LOGCOOKIES, RCL_SERVER)) @@ -2957,7 +3008,7 @@ static int mdt_unpack_req_pack_rep(struct mdt_thread_info *info, __u32 flags) /* Pack reply. */ if (req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER)) req_capsule_set_size(pill, &RMF_MDT_MD, RCL_SERVER, - info->mti_body->eadatasize); + DEF_REP_MD_SIZE); if (req_capsule_has_field(pill, &RMF_LOGCOOKIES, RCL_SERVER)) req_capsule_set_size(pill, &RMF_LOGCOOKIES, RCL_SERVER, 0); diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index a4e5fdd..2f5c792 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -194,7 +194,6 @@ struct mdt_device { /* these values should be updated from lov if necessary. * or should be placed somewhere else. */ int mdt_max_mdsize; - int mdt_max_cookiesize; int mdt_max_ea_size; diff --git a/lustre/mdt/mdt_lib.c b/lustre/mdt/mdt_lib.c index b3f9524..340a541 100644 --- a/lustre/mdt/mdt_lib.c +++ b/lustre/mdt/mdt_lib.c @@ -606,16 +606,19 @@ int mdt_fix_reply(struct mdt_thread_info *info) * buffers before growing it */ if (info->mti_big_lmm_used) { LASSERT(req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER)); - md_packed = req_capsule_get_size(pill, &RMF_MDT_MD, - RCL_SERVER); - LASSERT(md_packed > 0); - /* buffer must be allocated separately */ - LASSERT(info->mti_attr.ma_lmm != - req_capsule_server_get(pill, &RMF_MDT_MD)); - req_capsule_shrink(pill, &RMF_MDT_MD, 0, RCL_SERVER); + /* free big lmm if md_size is not needed */ - if (md_size == 0) + if (md_size == 0) { info->mti_big_lmm_used = 0; + } else { + md_packed = req_capsule_get_size(pill, &RMF_MDT_MD, + RCL_SERVER); + LASSERT(md_packed > 0); + /* buffer must be allocated separately */ + LASSERT(info->mti_attr.ma_lmm != + req_capsule_server_get(pill, &RMF_MDT_MD)); + req_capsule_shrink(pill, &RMF_MDT_MD, 0, RCL_SERVER); + } } else if (req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER)) { req_capsule_shrink(pill, &RMF_MDT_MD, md_size, RCL_SERVER); } @@ -695,12 +698,7 @@ int mdt_handle_last_unlink(struct mdt_thread_info *info, struct mdt_object *mo, } repbody->eadatasize = 0; - if (ma->ma_cookie_size && (ma->ma_valid & MA_COOKIE)) { - repbody->aclsize = ma->ma_cookie_size; - repbody->valid |= OBD_MD_FLCOOKIE; - } - - if (info->mti_mdt->mdt_opts.mo_oss_capa && + if (info->mti_mdt->mdt_lut.lut_oss_capa && exp_connect_flags(info->mti_exp) & OBD_CONNECT_OSS_CAPA && repbody->valid & OBD_MD_FLEASIZE) { struct lustre_capa *capa; diff --git a/lustre/mdt/mdt_open.c b/lustre/mdt/mdt_open.c index 0fe23bc..9b2546a 100644 --- a/lustre/mdt/mdt_open.c +++ b/lustre/mdt/mdt_open.c @@ -1873,7 +1873,6 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) if (created) { ma->ma_need = 0; ma->ma_valid = 0; - ma->ma_cookie_size = 0; rc = mdo_unlink(info->mti_env, mdt_object_child(parent), mdt_object_child(child), @@ -2276,10 +2275,10 @@ int mdt_close(struct mdt_thread_info *info) LASSERT(info->mti_ioepoch); - req_capsule_set_size(info->mti_pill, &RMF_MDT_MD, RCL_SERVER, - info->mti_mdt->mdt_max_mdsize); - req_capsule_set_size(info->mti_pill, &RMF_LOGCOOKIES, RCL_SERVER, - info->mti_mdt->mdt_max_cookiesize); + /* These fields are no longer used and are left for compatibility. + * size is always zero */ + req_capsule_set_size(info->mti_pill, &RMF_MDT_MD, RCL_SERVER, 0); + req_capsule_set_size(info->mti_pill, &RMF_LOGCOOKIES, RCL_SERVER, 0); rc = req_capsule_server_pack(info->mti_pill); if (mdt_check_resent(info, mdt_reconstruct_generic, NULL)) { mdt_client_compatibility(info); @@ -2298,11 +2297,6 @@ int mdt_close(struct mdt_thread_info *info) ma->ma_lmm_size = req_capsule_get_size(info->mti_pill, &RMF_MDT_MD, RCL_SERVER); - ma->ma_cookie = req_capsule_server_get(info->mti_pill, - &RMF_LOGCOOKIES); - ma->ma_cookie_size = req_capsule_get_size(info->mti_pill, - &RMF_LOGCOOKIES, - RCL_SERVER); ma->ma_need = MA_INODE | MA_LOV | MA_COOKIE; repbody->eadatasize = 0; repbody->aclsize = 0; -- 1.8.3.1