From: Oleg Drokin Date: Thu, 20 Feb 2014 05:00:15 +0000 (-0500) Subject: LU-4008 mdt: Shrink default LOVEA reply buffers X-Git-Tag: 2.5.58~26 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=00260466f9c84f21e6db55d5dd6e87e8a9906438 LU-4008 mdt: Shrink default LOVEA reply buffers Instead of allocating maximum possible LOVEA for intent requests, allocate a sane default to fit up to 100 stripes, should the size be bigger, the buffer reallocation code will kick in. Additionally, since quite awhile ago we no longer send unlink cookies or EA data to the clients from unlinks/renames, so we no longer need to allocate reply buffer for them in mdt_close() and other such places. Also clean up unused fields related to these buffers in md_attr structure. Change-Id: I4c5bfb5f2d39653d5612c90967d5e1ac83d441e6 Signed-off-by: Oleg Drokin Reviewed-on: http://review.whamcloud.com/9322 Tested-by: Jenkins Reviewed-by: Matt Ezell Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Mike Pershin --- diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index aa2c175..eccf646 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -1699,6 +1699,12 @@ static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi, #define MAX_MD_SIZE (sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data)) #define MIN_MD_SIZE (sizeof(struct lov_mds_md) + 1 * sizeof(struct lov_ost_data)) +/* This is the default MDT reply size allocated, should the striping be bigger, + * it will be reallocated in mdt_fix_reply. + * 100 stripes is a bit less than 2.5k of data */ +#define DEF_REP_MD_SIZE (sizeof(struct lov_mds_md) + \ + 100 * sizeof(struct lov_ost_data)) + #define XATTR_NAME_ACL_ACCESS "system.posix_acl_access" #define XATTR_NAME_ACL_DEFAULT "system.posix_acl_default" #define XATTR_USER_PREFIX "user." diff --git a/lustre/include/md_object.h b/lustre/include/md_object.h index caf1e16..ba88aed 100644 --- a/lustre/include/md_object.h +++ b/lustre/include/md_object.h @@ -139,13 +139,11 @@ struct md_attr { struct lov_mds_md *ma_lmm; union lmv_mds_md *ma_lmv; void *ma_acl; - struct llog_cookie *ma_cookie; struct lustre_capa *ma_capa; struct md_som_data *ma_som; int ma_lmm_size; int ma_lmv_size; int ma_acl_size; - int ma_cookie_size; __u16 ma_layout_gen; }; diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index fb3ba0a..abb526a 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -481,6 +481,32 @@ void mdt_client_compatibility(struct mdt_thread_info *info) EXIT; } +int mdt_attr_get_eabuf_size(struct mdt_thread_info *info, struct mdt_object *o) +{ + const struct lu_env *env = info->mti_env; + int rc, rc2; + + rc = mo_xattr_get(env, mdt_object_child(o), &LU_BUF_NULL, + XATTR_NAME_LOV); + + if (rc == -ENODATA) + rc = 0; + + if (rc < 0) + goto out; + + /* Is it a directory? Let's check for the LMV as well */ + if (S_ISDIR(lu_object_attr(&mdt_object_child(o)->mo_lu))) { + rc2 = mo_xattr_get(env, mdt_object_child(o), &LU_BUF_NULL, + XATTR_NAME_LMV); + if ((rc2 < 0 && rc2 != -ENODATA) || (rc2 > rc)) + rc = rc2; + } + +out: + return rc; +} + static int mdt_big_xattr_get(struct mdt_thread_info *info, struct mdt_object *o, const char *name) { @@ -767,13 +793,15 @@ static int mdt_getattr_internal(struct mdt_thread_info *info, GOTO(out, rc = 0); } - buffer->lb_len = reqbody->eadatasize; - if (buffer->lb_len > 0) { + if (reqbody->eadatasize > 0) { buffer->lb_buf = req_capsule_server_get(pill, &RMF_MDT_MD); if (buffer->lb_buf == NULL) GOTO(out, rc = -EPROTO); + buffer->lb_len = req_capsule_get_size(pill, &RMF_MDT_MD, + RCL_SERVER); } else { buffer->lb_buf = NULL; + buffer->lb_len = 0; ma_need &= ~(MA_LOV | MA_LMV); CDEBUG(D_INFO, "%s: RPC from %s: does not need LOVEA.\n", mdt_obd_name(info->mti_mdt), @@ -1053,11 +1081,34 @@ int mdt_getattr(struct tgt_session_info *tsi) mode = lu_object_attr(&obj->mot_obj); + /* Readlink */ + if (reqbody->valid & OBD_MD_LINKNAME) { + /* No easy way to know how long is the symlink, but it cannot + * be more than PATH_MAX, so we allocate +1 */ + rc = PATH_MAX + 1; + + /* A special case for fs ROOT: getattr there might fetch + * default EA for entire fs, not just for this dir! + */ + } else if (lu_fid_eq(mdt_object_fid(obj), + &info->mti_mdt->mdt_md_root_fid) && + (reqbody->valid & OBD_MD_FLDIREA) && + (lustre_msg_get_opc(mdt_info_req(info)->rq_reqmsg) == + MDS_GETATTR)) { + /* Should the default strping be bigger, mdt_fix_reply + * will reallocate */ + rc = DEF_REP_MD_SIZE; + } else { + /* Hopefully no race in EA change for either file or directory? + */ + rc = mdt_attr_get_eabuf_size(info, obj); + } + + if (rc < 0) + GOTO(out_shrink, rc); + /* old clients may not report needed easize, use max value then */ - req_capsule_set_size(pill, &RMF_MDT_MD, RCL_SERVER, - reqbody->eadatasize == 0 ? - info->mti_mdt->mdt_max_mdsize : - reqbody->eadatasize); + req_capsule_set_size(pill, &RMF_MDT_MD, RCL_SERVER, rc); rc = req_capsule_server_pack(pill); if (unlikely(rc != 0)) @@ -1768,7 +1819,7 @@ static int mdt_reint_internal(struct mdt_thread_info *info, /* for replay (no_create) lmm is not needed, client has it already */ if (req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER)) req_capsule_set_size(pill, &RMF_MDT_MD, RCL_SERVER, - info->mti_rr.rr_eadatalen); + DEF_REP_MD_SIZE); /* llog cookies are always 0, the field is kept for compatibility */ if (req_capsule_has_field(pill, &RMF_LOGCOOKIES, RCL_SERVER)) @@ -2702,7 +2753,7 @@ static int mdt_unpack_req_pack_rep(struct mdt_thread_info *info, __u32 flags) /* Pack reply. */ if (req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER)) req_capsule_set_size(pill, &RMF_MDT_MD, RCL_SERVER, - info->mti_body->eadatasize); + DEF_REP_MD_SIZE); if (req_capsule_has_field(pill, &RMF_LOGCOOKIES, RCL_SERVER)) req_capsule_set_size(pill, &RMF_LOGCOOKIES, RCL_SERVER, 0); diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index 7970b86..6a19f9f 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -193,7 +193,6 @@ struct mdt_device { /* these values should be updated from lov if necessary. * or should be placed somewhere else. */ int mdt_max_mdsize; - int mdt_max_cookiesize; int mdt_max_ea_size; diff --git a/lustre/mdt/mdt_lib.c b/lustre/mdt/mdt_lib.c index 0b0339c..ae30238 100644 --- a/lustre/mdt/mdt_lib.c +++ b/lustre/mdt/mdt_lib.c @@ -628,16 +628,19 @@ int mdt_fix_reply(struct mdt_thread_info *info) * buffers before growing it */ if (info->mti_big_lmm_used) { LASSERT(req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER)); - md_packed = req_capsule_get_size(pill, &RMF_MDT_MD, - RCL_SERVER); - LASSERT(md_packed > 0); - /* buffer must be allocated separately */ - LASSERT(info->mti_attr.ma_lmm != - req_capsule_server_get(pill, &RMF_MDT_MD)); - req_capsule_shrink(pill, &RMF_MDT_MD, 0, RCL_SERVER); + /* free big lmm if md_size is not needed */ - if (md_size == 0) + if (md_size == 0) { info->mti_big_lmm_used = 0; + } else { + md_packed = req_capsule_get_size(pill, &RMF_MDT_MD, + RCL_SERVER); + LASSERT(md_packed > 0); + /* buffer must be allocated separately */ + LASSERT(info->mti_attr.ma_lmm != + req_capsule_server_get(pill, &RMF_MDT_MD)); + req_capsule_shrink(pill, &RMF_MDT_MD, 0, RCL_SERVER); + } } else if (req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER)) { req_capsule_shrink(pill, &RMF_MDT_MD, md_size, RCL_SERVER); } @@ -725,11 +728,6 @@ int mdt_handle_last_unlink(struct mdt_thread_info *info, struct mdt_object *mo, } repbody->eadatasize = 0; - if (ma->ma_cookie_size && (ma->ma_valid & MA_COOKIE)) { - repbody->aclsize = ma->ma_cookie_size; - repbody->valid |= OBD_MD_FLCOOKIE; - } - if (info->mti_mdt->mdt_lut.lut_oss_capa && exp_connect_flags(info->mti_exp) & OBD_CONNECT_OSS_CAPA && repbody->valid & OBD_MD_FLEASIZE) { diff --git a/lustre/mdt/mdt_open.c b/lustre/mdt/mdt_open.c index fc67526..62933b8 100644 --- a/lustre/mdt/mdt_open.c +++ b/lustre/mdt/mdt_open.c @@ -1892,7 +1892,6 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) if (created) { ma->ma_need = 0; ma->ma_valid = 0; - ma->ma_cookie_size = 0; rc = mdo_unlink(info->mti_env, mdt_object_child(parent), mdt_object_child(child), @@ -2296,10 +2295,12 @@ int mdt_close(struct tgt_session_info *tsi) LASSERT(info->mti_ioepoch); + /* These fields are no longer used and are left for compatibility. + * size is always zero */ req_capsule_set_size(info->mti_pill, &RMF_MDT_MD, RCL_SERVER, - info->mti_mdt->mdt_max_mdsize); + 0); req_capsule_set_size(info->mti_pill, &RMF_LOGCOOKIES, RCL_SERVER, - info->mti_mdt->mdt_max_cookiesize); + 0); rc = req_capsule_server_pack(info->mti_pill); if (mdt_check_resent(info, mdt_reconstruct_generic, NULL)) { mdt_client_compatibility(info); @@ -2318,11 +2319,6 @@ int mdt_close(struct tgt_session_info *tsi) ma->ma_lmm_size = req_capsule_get_size(info->mti_pill, &RMF_MDT_MD, RCL_SERVER); - ma->ma_cookie = req_capsule_server_get(info->mti_pill, - &RMF_LOGCOOKIES); - ma->ma_cookie_size = req_capsule_get_size(info->mti_pill, - &RMF_LOGCOOKIES, - RCL_SERVER); ma->ma_need = MA_INODE | MA_LOV | MA_COOKIE; repbody->eadatasize = 0; repbody->aclsize = 0;