From: Brian Behlendorf Date: Tue, 14 May 2013 22:39:04 +0000 (-0700) Subject: LU-3338 llite: Limit reply buffer size X-Git-Tag: 2.5.57~4 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=006f258300e38ffb2cb45dfd374914a9624c07db LU-3338 llite: Limit reply buffer size When allocating a reply buffer for the striping information don't assume the unlikely worst case. Instead, assume the common case and size the buffer based on the observed default ea/cookie size. The default size is initialized to a single stripe and allowed to grow up to an entire page if needed. This means that for smallish filesystems (less than ~21 OSTs) where the worst case striping information can fit in a single page there is effectively no change. Only for larger filesystem will the default be less than the maximum. This has a number of advantages. * By limiting the default reply buffer size we avoid always vmalloc()'ing the buffer because it exceeds four pages in size and instead kmalloc() it. This prevents the client from thrashing on the global vmalloc() spin lock. * A reply buffer of exactly the right size (no larger) is allocated in the overflow case. These larger reply buffers are still unlikely to exceed the 16k limit where a vmalloc() will occur. * Saves memory in the common case. Wide striped files exceeded the default are expected to be the exception. The reason this patch works is because the ptlrpc layer is smart enough to reallocate the reply buffer when an overflow occurs. Therefore the client doesn't have to drop the incoming reply and send a new request with a larger reply buffer. It's also worth mentioning that the reply buffer always contains a significant amount of extra padding because they are rounded up to the nearest power of two. This means that even files striped wider than the default have a good chance of fitting in the allocated reply buffer. Also remove client eadatasize check in mdt xattr packing because as said above client can handle -EOVERFLOW. Change-Id: Ic6bab2aca208db1f4abcd87974a5879d06d0f2f1 Signed-off-by: Brian Behlendorf Signed-off-by: Lai Siyao Reviewed-on: http://review.whamcloud.com/6339 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Andreas Dilger Reviewed-by: Bob Glossman --- diff --git a/lustre/include/lustre_mdc.h b/lustre/include/lustre_mdc.h index 4f69667..3d343b1 100644 --- a/lustre/include/lustre_mdc.h +++ b/lustre/include/lustre_mdc.h @@ -167,18 +167,27 @@ static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck, EXIT; } +/* Update the maximum observed easize and cookiesize. The default easize + * and cookiesize is initialized to the minimum value but allowed to grow + * up to a single page in size if required to handle the common case. + */ static inline void mdc_update_max_ea_from_body(struct obd_export *exp, - struct mdt_body *body) + struct mdt_body *body) { - if (body->valid & OBD_MD_FLMODEASIZE) { - if (exp->exp_obd->u.cli.cl_max_mds_easize < body->max_mdsize) - exp->exp_obd->u.cli.cl_max_mds_easize = - body->max_mdsize; - if (exp->exp_obd->u.cli.cl_max_mds_cookiesize < - body->max_cookiesize) - exp->exp_obd->u.cli.cl_max_mds_cookiesize = - body->max_cookiesize; - } + if (body->valid & OBD_MD_FLMODEASIZE) { + struct client_obd *cli = &exp->exp_obd->u.cli; + + if (cli->cl_max_mds_easize < body->max_mdsize) { + cli->cl_max_mds_easize = body->max_mdsize; + cli->cl_default_mds_easize = + min_t(__u32, body->max_mdsize, PAGE_CACHE_SIZE); + } + if (cli->cl_max_mds_cookiesize < body->max_cookiesize) { + cli->cl_max_mds_cookiesize = body->max_cookiesize; + cli->cl_default_mds_cookiesize = + min_t(__u32, body->max_cookiesize, PAGE_CACHE_SIZE); + } + } } diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 79924f9..d262f63 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -262,11 +262,12 @@ struct client_obd { struct obd_uuid cl_target_uuid; struct obd_import *cl_import; /* ptlrpc connection state */ int cl_conn_count; - /* max_mds_easize is purely a performance thing so we don't have to - * call obd_size_diskmd() all the time. */ - int cl_default_mds_easize; - int cl_max_mds_easize; - int cl_max_mds_cookiesize; + /* max_mds_easize is purely a performance thing so we don't have to + * call obd_size_diskmd() all the time. */ + int cl_default_mds_easize; + int cl_max_mds_easize; + int cl_default_mds_cookiesize; + int cl_max_mds_cookiesize; enum lustre_sec_part cl_sp_me; enum lustre_sec_part cl_sp_to; @@ -482,6 +483,7 @@ struct lmv_obd { int max_easize; int max_def_easize; int max_cookiesize; + int max_def_cookiesize; int server_timeout; int tgts_size; /* size of tgts array */ @@ -889,7 +891,10 @@ enum obd_cleanup_stage { #define KEY_LOCK_TO_STRIPE "lock_to_stripe" #define KEY_LOVDESC "lovdesc" #define KEY_LOV_IDX "lov_idx" -#define KEY_MAX_EASIZE "max_easize" +#define KEY_MAX_EASIZE "max_easize" +#define KEY_DEFAULT_EASIZE "default_easize" +#define KEY_MAX_COOKIESIZE "max_cookiesize" +#define KEY_DEFAULT_COOKIESIZE "default_cookiesize" #define KEY_MDS_CONN "mds_conn" #define KEY_MGSSEC "mgssec" #define KEY_NEXT_ID "next_id" @@ -1279,7 +1284,7 @@ struct md_ops { const struct lu_fid *, struct ptlrpc_request **); - int (*m_init_ea_size)(struct obd_export *, int, int, int); + int (*m_init_ea_size)(struct obd_export *, int, int, int, int); int (*m_get_lustre_md)(struct obd_export *, struct ptlrpc_request *, struct obd_export *, struct obd_export *, diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index 2f71625..aeb94ba 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -1954,13 +1954,14 @@ static inline ldlm_mode_t md_lock_match(struct obd_export *exp, __u64 flags, } static inline int md_init_ea_size(struct obd_export *exp, int easize, - int def_asize, int cookiesize) + int def_asize, int cookiesize, + int def_cookiesize) { - ENTRY; - EXP_CHECK_MD_OP(exp, init_ea_size); - EXP_MD_COUNTER_INCREMENT(exp, init_ea_size); - RETURN(MDP(exp->exp_obd, init_ea_size)(exp, easize, def_asize, - cookiesize)); + ENTRY; + EXP_CHECK_MD_OP(exp, init_ea_size); + EXP_MD_COUNTER_INCREMENT(exp, init_ea_size); + RETURN(MDP(exp->exp_obd, init_ea_size)(exp, easize, def_asize, + cookiesize, def_cookiesize)); } static inline int md_get_remote_perm(struct obd_export *exp, diff --git a/lustre/lclient/lcommon_misc.c b/lustre/lclient/lcommon_misc.c index 2ba559e..39df79a 100644 --- a/lustre/lclient/lcommon_misc.c +++ b/lustre/lclient/lcommon_misc.c @@ -52,12 +52,12 @@ * calculate this (via a call into the LOV + OSCs) each time we make an RPC. */ int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp) { - struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC_V3 }; - __u32 valsize = sizeof(struct lov_desc); - int rc, easize, def_easize, cookiesize; - struct lov_desc desc; - __u16 stripes; - ENTRY; + struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC_V3 }; + __u32 valsize = sizeof(struct lov_desc); + int rc, easize, def_easize, cookiesize; + struct lov_desc desc; + __u16 stripes, def_stripes; + ENTRY; rc = obd_get_info(NULL, dt_exp, sizeof(KEY_LOVDESC), KEY_LOVDESC, &valsize, &desc, NULL); @@ -68,16 +68,21 @@ int cl_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp) lsm.lsm_stripe_count = stripes; easize = obd_size_diskmd(dt_exp, &lsm); - lsm.lsm_stripe_count = desc.ld_default_stripe_count; - def_easize = obd_size_diskmd(dt_exp, &lsm); + def_stripes = min_t(__u32, desc.ld_default_stripe_count, + LOV_MAX_STRIPE_COUNT); + lsm.lsm_stripe_count = def_stripes; + def_easize = obd_size_diskmd(dt_exp, &lsm); - cookiesize = stripes * sizeof(struct llog_cookie); + cookiesize = stripes * sizeof(struct llog_cookie); - CDEBUG(D_HA, "updating max_mdsize/max_cookiesize: %d/%d\n", - easize, cookiesize); + /* default cookiesize is 0 because from 2.4 server doesn't send + * llog cookies to client. */ + CDEBUG(D_HA, + "updating def/max_easize: %d/%d def/max_cookiesize: 0/%d\n", + def_easize, easize, cookiesize); - rc = md_init_ea_size(md_exp, easize, def_easize, cookiesize); - RETURN(rc); + rc = md_init_ea_size(md_exp, easize, def_easize, cookiesize, 0); + RETURN(rc); } /** diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index a1ccfce..1b3473c 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -541,7 +541,7 @@ int ll_dir_getstripe(struct inode *inode, void **plmm, int *plmm_size, struct md_op_data *op_data; ENTRY; - rc = ll_get_max_mdsize(sbi, &lmm_size); + rc = ll_get_default_mdsize(sbi, &lmm_size); if (rc) RETURN(rc); diff --git a/lustre/llite/file.c b/lustre/llite/file.c index d66f8f2..b92443f 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -1539,9 +1539,9 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, struct md_op_data *op_data; int rc, lmmsize; - rc = ll_get_max_mdsize(sbi, &lmmsize); - if (rc) - RETURN(rc); + rc = ll_get_default_mdsize(sbi, &lmmsize); + if (rc) + RETURN(rc); op_data = ll_prep_md_op_data(NULL, inode, NULL, filename, strlen(filename), lmmsize, @@ -3268,12 +3268,12 @@ int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it, struct md_op_data *op_data; int ealen = 0; - if (S_ISREG(inode->i_mode)) { - rc = ll_get_max_mdsize(sbi, &ealen); - if (rc) - RETURN(rc); - valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE; - } + if (S_ISREG(inode->i_mode)) { + rc = ll_get_default_mdsize(sbi, &ealen); + if (rc) + RETURN(rc); + valid |= OBD_MD_FLEASIZE | OBD_MD_FLMODEASIZE; + } op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, ealen, LUSTRE_OPC_ANY, @@ -3755,7 +3755,7 @@ static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock) * layout here. Please note that we can't use the LVB buffer in * completion AST because it doesn't have a large enough buffer */ oc = ll_mdscapa_get(inode); - rc = ll_get_max_mdsize(sbi, &lmmsize); + rc = ll_get_default_mdsize(sbi, &lmmsize); if (rc == 0) rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, OBD_MD_FLXATTR, XATTR_NAME_LOV, NULL, 0, @@ -3765,7 +3765,7 @@ static int ll_layout_fetch(struct inode *inode, struct ldlm_lock *lock) RETURN(rc); body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - if (body == NULL || body->eadatasize > lmmsize) + if (body == NULL) GOTO(out, rc = -EPROTO); lmmsize = body->eadatasize; diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index e94ff22..dae4db5 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -925,6 +925,9 @@ void lustre_dump_dentry(struct dentry *, int recur); void lustre_dump_inode(struct inode *); int ll_obd_statfs(struct inode *inode, void *arg); int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize); +int ll_get_default_mdsize(struct ll_sb_info *sbi, int *default_mdsize); +int ll_get_max_cookiesize(struct ll_sb_info *sbi, int *max_cookiesize); +int ll_get_default_cookiesize(struct ll_sb_info *sbi, int *default_cookiesize); int ll_process_config(struct lustre_cfg *lcfg); struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data, struct inode *i1, struct inode *i2, @@ -1200,11 +1203,6 @@ static inline struct lu_fid *ll_inode2fid(struct inode *inode) return fid; } -static inline int ll_mds_max_easize(struct super_block *sb) -{ - return sbi2mdc(ll_s2sbi(sb))->cl_max_mds_easize; -} - static inline __u64 ll_file_maxbytes(struct inode *inode) { return ll_i2info(inode)->lli_maxbytes; diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index e744390..2edfe43 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -632,16 +632,55 @@ out: int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize) { - int size, rc; + int size, rc; - *lmmsize = obd_size_diskmd(sbi->ll_dt_exp, NULL); - size = sizeof(int); - rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_EASIZE), - KEY_MAX_EASIZE, &size, lmmsize, NULL); - if (rc) - CERROR("Get max mdsize error rc %d \n", rc); + *lmmsize = obd_size_diskmd(sbi->ll_dt_exp, NULL); + size = sizeof(int); + rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_EASIZE), + KEY_MAX_EASIZE, &size, lmmsize, NULL); + if (rc) + CERROR("Get max mdsize error rc %d\n", rc); - RETURN(rc); + RETURN(rc); +} + +int ll_get_default_mdsize(struct ll_sb_info *sbi, int *lmmsize) +{ + int size, rc; + + size = sizeof(int); + rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_DEFAULT_EASIZE), + KEY_DEFAULT_EASIZE, &size, lmmsize, NULL); + if (rc) + CERROR("Get default mdsize error rc %d\n", rc); + + RETURN(rc); +} + +int ll_get_max_cookiesize(struct ll_sb_info *sbi, int *lmmsize) +{ + int size, rc; + + size = sizeof(int); + rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_COOKIESIZE), + KEY_MAX_COOKIESIZE, &size, lmmsize, NULL); + if (rc) + CERROR("Get max cookiesize error rc %d\n", rc); + + RETURN(rc); +} + +int ll_get_default_cookiesize(struct ll_sb_info *sbi, int *lmmsize) +{ + int size, rc; + + size = sizeof(int); + rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_DEFAULT_COOKIESIZE), + KEY_DEFAULT_COOKIESIZE, &size, lmmsize, NULL); + if (rc) + CERROR("Get default cookiesize error rc %d\n", rc); + + RETURN(rc); } void ll_dump_inode(struct inode *inode) diff --git a/lustre/llite/llite_nfs.c b/lustre/llite/llite_nfs.c index 713e846..be04a1c 100644 --- a/lustre/llite/llite_nfs.c +++ b/lustre/llite/llite_nfs.c @@ -77,9 +77,9 @@ struct inode *search_inode_for_lustre(struct super_block *sb, if (inode) RETURN(inode); - rc = ll_get_max_mdsize(sbi, &eadatalen); - if (rc) - RETURN(ERR_PTR(rc)); + rc = ll_get_default_mdsize(sbi, &eadatalen); + if (rc) + RETURN(ERR_PTR(rc)); /* Because inode is NULL, ll_prep_md_op_data can not * be used here. So we allocate op_data ourselves */ @@ -314,7 +314,7 @@ static struct dentry *ll_get_parent(struct dentry *dchild) ll_get_fsname(dir->i_sb, NULL, 0), PFID(ll_inode2fid(dir))); - rc = ll_get_max_mdsize(sbi, &lmmsize); + rc = ll_get_default_mdsize(sbi, &lmmsize); if (rc != 0) RETURN(ERR_PTR(rc)); diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index 18f1406..9f59cdc 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -723,20 +723,65 @@ static ssize_t ll_lazystatfs_seq_write(struct file *file, const char *buffer, } LPROC_SEQ_FOPS(ll_lazystatfs); -static int ll_maxea_size_seq_show(struct seq_file *m, void *v) +static int ll_max_easize_seq_show(struct seq_file *m, void *v) { struct super_block *sb = m->private; struct ll_sb_info *sbi = ll_s2sbi(sb); unsigned int ealen; int rc; - rc = ll_get_max_mdsize(sbi, &ealen); - if (rc) - return rc; + rc = ll_get_max_mdsize(sbi, &ealen); + if (rc) + return rc; + + return seq_printf(m, "%u\n", ealen); +} +LPROC_SEQ_FOPS_RO(ll_max_easize); + +static int ll_defult_easize_seq_show(struct seq_file *m, void *v) +{ + struct super_block *sb = m->private; + struct ll_sb_info *sbi = ll_s2sbi(sb); + unsigned int ealen; + int rc; + + rc = ll_get_default_mdsize(sbi, &ealen); + if (rc) + return rc; return seq_printf(m, "%u\n", ealen); } -LPROC_SEQ_FOPS_RO(ll_maxea_size); +LPROC_SEQ_FOPS_RO(ll_defult_easize); + +static int ll_max_cookiesize_seq_show(struct seq_file *m, void *v) +{ + struct super_block *sb = m->private; + struct ll_sb_info *sbi = ll_s2sbi(sb); + unsigned int cookielen; + int rc; + + rc = ll_get_max_cookiesize(sbi, &cookielen); + if (rc) + return rc; + + return seq_printf(m, "%u\n", cookielen); +} +LPROC_SEQ_FOPS_RO(ll_max_cookiesize); + +static int ll_defult_cookiesize_seq_show(struct seq_file *m, void *v) +{ + struct super_block *sb = m->private; + struct ll_sb_info *sbi = ll_s2sbi(sb); + unsigned int cookielen; + int rc; + + rc = ll_get_default_cookiesize(sbi, &cookielen); + if (rc) + return rc; + + return seq_printf(m, "%u\n", cookielen); +} +LPROC_SEQ_FOPS_RO(ll_defult_cookiesize); static int ll_sbi_flags_seq_show(struct seq_file *m, void *v) { @@ -825,7 +870,13 @@ struct lprocfs_seq_vars lprocfs_llite_obd_vars[] = { { .name = "lazystatfs", .fops = &ll_lazystatfs_fops }, { .name = "max_easize", - .fops = &ll_maxea_size_fops }, + .fops = &ll_max_easize_fops }, + { .name = "default_easize", + .fops = &ll_defult_easize_fops }, + { .name = "max_cookiesize", + .fops = &ll_max_cookiesize_fops }, + { .name = "default_cookiesize", + .fops = &ll_defult_cookiesize_fops }, { .name = "sbi_flags", .fops = &ll_sbi_flags_fops }, { .name = "xattr_cache", diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index 26ee849..b6397d4 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -362,7 +362,7 @@ static void lmv_set_timeouts(struct obd_device *obd) } static int lmv_init_ea_size(struct obd_export *exp, int easize, - int def_easize, int cookiesize) + int def_easize, int cookiesize, int def_cookiesize) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; @@ -383,11 +383,15 @@ static int lmv_init_ea_size(struct obd_export *exp, int easize, lmv->max_cookiesize = cookiesize; change = 1; } - if (change == 0) - RETURN(0); + if (lmv->max_def_cookiesize < def_cookiesize) { + lmv->max_def_cookiesize = def_cookiesize; + change = 1; + } + if (change == 0) + RETURN(0); - if (lmv->connected == 0) - RETURN(0); + if (lmv->connected == 0) + RETURN(0); for (i = 0; i < lmv->desc.ld_tgt_count; i++) { struct lmv_tgt_desc *tgt = lmv->tgts[i]; @@ -398,7 +402,7 @@ static int lmv_init_ea_size(struct obd_export *exp, int easize, } rc = md_init_ea_size(tgt->ltd_exp, easize, def_easize, - cookiesize); + cookiesize, def_cookiesize); if (rc) { CERROR("%s: obd_init_ea_size() failed on MDT target %d:" " rc = %d.\n", obd->obd_name, i, rc); @@ -486,12 +490,12 @@ int lmv_connect_mdc(struct obd_device *obd, struct lmv_tgt_desc *tgt) tgt->ltd_exp = mdc_exp; lmv->desc.ld_active_tgt_count++; - md_init_ea_size(tgt->ltd_exp, lmv->max_easize, - lmv->max_def_easize, lmv->max_cookiesize); + md_init_ea_size(tgt->ltd_exp, lmv->max_easize, lmv->max_def_easize, + lmv->max_cookiesize, lmv->max_def_cookiesize); - CDEBUG(D_CONFIG, "Connected to %s(%s) successfully (%d)\n", - mdc_obd->obd_name, mdc_obd->obd_uuid.uuid, - cfs_atomic_read(&obd->obd_refcount)); + CDEBUG(D_CONFIG, "Connected to %s(%s) successfully (%d)\n", + mdc_obd->obd_name, mdc_obd->obd_uuid.uuid, + atomic_read(&obd->obd_refcount)); #ifdef __KERNEL__ lmv_proc_dir = obd->obd_proc_private; @@ -614,16 +618,15 @@ static int lmv_add_target(struct obd_device *obd, struct obd_uuid *uuidp, lmv->desc.ld_tgt_count--; memset(tgt, 0, sizeof(*tgt)); spin_unlock(&lmv->lmv_lock); - } else { - int easize = sizeof(struct lmv_stripe_md) + - lmv->desc.ld_tgt_count * - sizeof(struct lu_fid); - lmv_init_ea_size(obd->obd_self_export, easize, 0, 0); - } - } + } else { + int easize = sizeof(struct lmv_stripe_md) + + lmv->desc.ld_tgt_count * sizeof(struct lu_fid); + lmv_init_ea_size(obd->obd_self_export, easize, 0, 0, 0); + } + } - lmv_init_unlock(lmv); - RETURN(rc); + lmv_init_unlock(lmv); + RETURN(rc); } int lmv_check_connect(struct obd_device *obd) @@ -675,7 +678,7 @@ int lmv_check_connect(struct obd_device *obd) class_export_put(lmv->exp); lmv->connected = 1; easize = lmv_mds_md_size(lmv->desc.ld_tgt_count, LMV_MAGIC); - lmv_init_ea_size(obd->obd_self_export, easize, 0, 0); + lmv_init_ea_size(obd->obd_self_export, easize, 0, 0, 0); lmv_init_unlock(lmv); RETURN(0); @@ -2685,12 +2688,16 @@ static int lmv_get_info(const struct lu_env *env, struct obd_export *exp, if (!obd_get_info(env, tgt->ltd_exp, keylen, key, vallen, val, NULL)) RETURN(0); - } - RETURN(-EINVAL); - } else if (KEY_IS(KEY_MAX_EASIZE) || KEY_IS(KEY_CONN_DATA)) { - rc = lmv_check_connect(obd); - if (rc) - RETURN(rc); + } + RETURN(-EINVAL); + } else if (KEY_IS(KEY_MAX_EASIZE) || + KEY_IS(KEY_DEFAULT_EASIZE) || + KEY_IS(KEY_MAX_COOKIESIZE) || + KEY_IS(KEY_DEFAULT_COOKIESIZE) || + KEY_IS(KEY_CONN_DATA)) { + rc = lmv_check_connect(obd); + if (rc) + RETURN(rc); /* * Forwarding this request to first MDS, it should know LOV diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index 23694fd..b6ca65b 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -440,12 +440,12 @@ static struct ptlrpc_request *mdc_intent_unlink_pack(struct obd_export *exp, /* pack the intended request */ mdc_unlink_pack(req, op_data); - req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, - obddev->u.cli.cl_max_mds_easize); - req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, - obddev->u.cli.cl_max_mds_cookiesize); - ptlrpc_request_set_replen(req); - RETURN(req); + req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, + obddev->u.cli.cl_default_mds_easize); + req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, + obddev->u.cli.cl_default_mds_cookiesize); + ptlrpc_request_set_replen(req); + RETURN(req); } static struct ptlrpc_request *mdc_intent_getattr_pack(struct obd_export *exp, @@ -482,17 +482,17 @@ static struct ptlrpc_request *mdc_intent_getattr_pack(struct obd_export *exp, lit = req_capsule_client_get(&req->rq_pill, &RMF_LDLM_INTENT); lit->opc = (__u64)it->it_op; - /* pack the intended request */ - mdc_getattr_pack(req, valid, it->it_flags, op_data, - obddev->u.cli.cl_max_mds_easize); + /* pack the intended request */ + mdc_getattr_pack(req, valid, it->it_flags, op_data, + obddev->u.cli.cl_default_mds_easize); - req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, - obddev->u.cli.cl_max_mds_easize); - if (client_is_remote(exp)) - req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, - sizeof(struct mdt_remote_perm)); - ptlrpc_request_set_replen(req); - RETURN(req); + req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, + obddev->u.cli.cl_default_mds_easize); + if (client_is_remote(exp)) + req_capsule_set_size(&req->rq_pill, &RMF_ACL, RCL_SERVER, + sizeof(struct mdt_remote_perm)); + ptlrpc_request_set_replen(req); + RETURN(req); } static struct ptlrpc_request *mdc_intent_layout_pack(struct obd_export *exp, @@ -529,7 +529,7 @@ static struct ptlrpc_request *mdc_intent_layout_pack(struct obd_export *exp, layout->li_opc = LAYOUT_INTENT_ACCESS; req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER, - obd->u.cli.cl_max_mds_easize); + obd->u.cli.cl_default_mds_easize); ptlrpc_request_set_replen(req); RETURN(req); } diff --git a/lustre/mdc/mdc_reint.c b/lustre/mdc/mdc_reint.c index 56e2d74..cb09729 100644 --- a/lustre/mdc/mdc_reint.c +++ b/lustre/mdc/mdc_reint.c @@ -362,13 +362,13 @@ int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data, RETURN(rc); } - mdc_unlink_pack(req, op_data); + mdc_unlink_pack(req, op_data); - req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, - obd->u.cli.cl_max_mds_easize); - req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_SERVER, - obd->u.cli.cl_max_mds_cookiesize); - ptlrpc_request_set_replen(req); + req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, + obd->u.cli.cl_default_mds_easize); + req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_SERVER, + obd->u.cli.cl_default_mds_cookiesize); + ptlrpc_request_set_replen(req); *request = req; @@ -479,11 +479,11 @@ int mdc_rename(struct obd_export *exp, struct md_op_data *op_data, mdc_rename_pack(req, op_data, old, oldlen, new, newlen); - req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, - obd->u.cli.cl_max_mds_easize); - req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_SERVER, - obd->u.cli.cl_max_mds_cookiesize); - ptlrpc_request_set_replen(req); + req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, + obd->u.cli.cl_default_mds_easize); + req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_SERVER, + obd->u.cli.cl_default_mds_cookiesize); + ptlrpc_request_set_replen(req); rc = mdc_reint(req, obd->u.cli.cl_rpc_lock, LUSTRE_IMP_FULL); *request = req; diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 4e540d6..ee3a654 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -926,10 +926,10 @@ int mdc_close(struct obd_export *exp, struct md_op_data *op_data, mdc_close_pack(req, op_data); - req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, - obd->u.cli.cl_max_mds_easize); - req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_SERVER, - obd->u.cli.cl_max_mds_cookiesize); + req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER, + obd->u.cli.cl_default_mds_easize); + req_capsule_set_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_SERVER, + obd->u.cli.cl_default_mds_cookiesize); ptlrpc_request_set_replen(req); @@ -2826,22 +2826,50 @@ int mdc_set_info_async(const struct lu_env *env, } int mdc_get_info(const struct lu_env *env, struct obd_export *exp, - __u32 keylen, void *key, __u32 *vallen, void *val, - struct lov_stripe_md *lsm) + __u32 keylen, void *key, __u32 *vallen, void *val, + struct lov_stripe_md *lsm) { - int rc = -EINVAL; + int rc = -EINVAL; - if (KEY_IS(KEY_MAX_EASIZE)) { - int mdsize, *max_easize; + if (KEY_IS(KEY_MAX_EASIZE)) { + int mdsize, *max_easize; - if (*vallen != sizeof(int)) - RETURN(-EINVAL); - mdsize = *(int*)val; - if (mdsize > exp->exp_obd->u.cli.cl_max_mds_easize) - exp->exp_obd->u.cli.cl_max_mds_easize = mdsize; - max_easize = val; - *max_easize = exp->exp_obd->u.cli.cl_max_mds_easize; - RETURN(0); + if (*vallen != sizeof(int)) + RETURN(-EINVAL); + mdsize = *(int *)val; + if (mdsize > exp->exp_obd->u.cli.cl_max_mds_easize) + exp->exp_obd->u.cli.cl_max_mds_easize = mdsize; + max_easize = val; + *max_easize = exp->exp_obd->u.cli.cl_max_mds_easize; + RETURN(0); + } else if (KEY_IS(KEY_DEFAULT_EASIZE)) { + int *default_easize; + + if (*vallen != sizeof(int)) + RETURN(-EINVAL); + default_easize = val; + *default_easize = exp->exp_obd->u.cli.cl_default_mds_easize; + RETURN(0); + } else if (KEY_IS(KEY_MAX_COOKIESIZE)) { + int mdsize, *max_cookiesize; + + if (*vallen != sizeof(int)) + RETURN(-EINVAL); + mdsize = *(int *)val; + if (mdsize > exp->exp_obd->u.cli.cl_max_mds_cookiesize) + exp->exp_obd->u.cli.cl_max_mds_cookiesize = mdsize; + max_cookiesize = val; + *max_cookiesize = exp->exp_obd->u.cli.cl_max_mds_cookiesize; + RETURN(0); + } else if (KEY_IS(KEY_DEFAULT_COOKIESIZE)) { + int *default_cookiesize; + + if (*vallen != sizeof(int)) + RETURN(-EINVAL); + default_cookiesize = val; + *default_cookiesize = + exp->exp_obd->u.cli.cl_default_mds_cookiesize; + RETURN(0); } else if (KEY_IS(KEY_CONN_DATA)) { struct obd_import *imp = class_exp2cliimp(exp); struct obd_connect_data *data = val; @@ -3131,26 +3159,33 @@ err_rpc_lock: } /* Initialize the default and maximum LOV EA and cookie sizes. This allows - * us to make MDS RPCs with large enough reply buffers to hold the - * maximum-sized (= maximum striped) EA and cookie without having to - * calculate this (via a call into the LOV + OSCs) each time we make an RPC. */ + * us to make MDS RPCs with large enough reply buffers to hold a default + * sized EA and cookie without having to calculate this (via a call into the + * LOV + OSCs) each time we make an RPC. The maximum size is also tracked + * but not used to avoid wastefully vmalloc()'ing large reply buffers when + * a large number of stripes is possible. If a larger reply buffer is + * required it will be reallocated in the ptlrpc layer due to overflow. + */ static int mdc_init_ea_size(struct obd_export *exp, int easize, - int def_easize, int cookiesize) + int def_easize, int cookiesize, int def_cookiesize) { - struct obd_device *obd = exp->exp_obd; - struct client_obd *cli = &obd->u.cli; - ENTRY; + struct obd_device *obd = exp->exp_obd; + struct client_obd *cli = &obd->u.cli; + ENTRY; - if (cli->cl_max_mds_easize < easize) - cli->cl_max_mds_easize = easize; + if (cli->cl_max_mds_easize < easize) + cli->cl_max_mds_easize = easize; - if (cli->cl_default_mds_easize < def_easize) - cli->cl_default_mds_easize = def_easize; + if (cli->cl_default_mds_easize < def_easize) + cli->cl_default_mds_easize = def_easize; - if (cli->cl_max_mds_cookiesize < cookiesize) - cli->cl_max_mds_cookiesize = cookiesize; + if (cli->cl_max_mds_cookiesize < cookiesize) + cli->cl_max_mds_cookiesize = cookiesize; - RETURN(0); + if (cli->cl_default_mds_cookiesize < def_cookiesize) + cli->cl_default_mds_cookiesize = def_cookiesize; + + RETURN(0); } static int mdc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) diff --git a/lustre/mdt/mdt_xattr.c b/lustre/mdt/mdt_xattr.c index fba6c2d..55f0520 100644 --- a/lustre/mdt/mdt_xattr.c +++ b/lustre/mdt/mdt_xattr.c @@ -101,10 +101,6 @@ static int mdt_getxattr_pack_reply(struct mdt_thread_info * info) RETURN(size); } - if (info->mti_body->eadatasize != 0 && - info->mti_body->eadatasize < size) - RETURN(-ERANGE); - req_capsule_set_size(pill, &RMF_EADATA, RCL_SERVER, info->mti_body->eadatasize == 0 ? 0 : size); rc = req_capsule_server_pack(pill);