From: Ned Bass Date: Wed, 17 Dec 2014 00:05:42 +0000 (-0800) Subject: LU-5549 mdc: cl_default_mds_easize not refreshed X-Git-Tag: 2.7.50~14 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=e709e661395af42471f5ecf3c87f2fa1d3031fbb LU-5549 mdc: cl_default_mds_easize not refreshed The client_obd::cl_default_mds_easize field should track the largest observed EA size advertised by the MDT, subject to a reasonable upper bound. The MDC uses cl_default_mds_easize to calculate the initial size of request buffers. The default value should be small enough to avoid wasted memory and excessive use of vmalloc(), yet large enough to accommodate the common use case. In the current code, the default value is only updated if client_obd::cl_max_mds_easize is strictly less than mdt_body::mbo_max_mdsize. This condition is almost never met, because client_obd::cl_max_mds_easize is computed at client mount-time based on the number of OSTs in the filesystem, so the MDT won't ever observe and advertise an EA size larger than that. As a result, client_obd::cl_default_mds_easize indefinitely retains its initial value, which is computed at client mount-time based on the filesystem's default stripe width. Any getattr() requests for widely striped files will consequently allocate a request buffer that is too small, forcing reallocations on both the client and server side. To avoid this, update client_obd::cl_default_mds_easize independently of the value of client_obd::cl_max_mds_easize. In addition, this patch includes these changes: - Add comments to the client_obd structure to clarify what the cl_{default,max}_mds_{cookie,ea}size values mean. - Prevent mdc_get_info() from storing uninitialized data in client_obd::cl_max_mds_cookiesize. - Use 4096 as an upper bound for the default values. The former bound of PAGE_CACHE_SIZE is too large on 64k-page platforms (i.e. PPC), so it fails to prevent the vmalloc() spinlock contention described in LU-3338. The new value was chosen to be large enough to accommodate common use cases while staying well below the 16k threshold at which allocations start using vmalloc(). - Add test case 27E to ./lustre/tests/sanity.sh. Signed-off-by: Ned Bass Signed-off-by: Kyle Blatter Change-Id: I363017844d6af3e6b67b7c03bd206226f9495116 Reviewed-on: http://review.whamcloud.com/11614 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Lai Siyao Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/lustre_mdc.h b/lustre/include/lustre_mdc.h index c0f070f..7b09164 100644 --- a/lustre/include/lustre_mdc.h +++ b/lustre/include/lustre_mdc.h @@ -165,28 +165,42 @@ static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck, EXIT; } -/* Update the maximum observed easize and cookiesize. The default easize - * and cookiesize is initialized to the minimum value but allowed to grow - * up to a single page in size if required to handle the common case. +/** + * Update the maximum possible easize and cookiesize. + * + * The values are learned from ptlrpc replies sent by the MDT. The + * default easize and cookiesize is initialized to the minimum value but + * allowed to grow up to a single page in size if required to handle the + * common case. + * + * \see client_obd::cl_default_mds_easize and + * client_obd::cl_default_mds_cookiesize + * + * \param[in] exp export for MDC device + * \param[in] body body of ptlrpc reply from MDT + * */ static inline void mdc_update_max_ea_from_body(struct obd_export *exp, struct mdt_body *body) { if (body->mbo_valid & OBD_MD_FLMODEASIZE) { struct client_obd *cli = &exp->exp_obd->u.cli; + __u32 def_easize; + __u32 def_cookiesize; - if (cli->cl_max_mds_easize < body->mbo_max_mdsize) { + if (cli->cl_max_mds_easize < body->mbo_max_mdsize) cli->cl_max_mds_easize = body->mbo_max_mdsize; - cli->cl_default_mds_easize = - min_t(__u32, body->mbo_max_mdsize, - PAGE_CACHE_SIZE); - } - if (cli->cl_max_mds_cookiesize < body->mbo_max_cookiesize) { + + def_easize = min_t(__u32, body->mbo_max_mdsize, + OBD_MAX_DEFAULT_EA_SIZE); + cli->cl_default_mds_easize = def_easize; + + if (cli->cl_max_mds_cookiesize < body->mbo_max_cookiesize) cli->cl_max_mds_cookiesize = body->mbo_max_cookiesize; - cli->cl_default_mds_cookiesize = - min_t(__u32, body->mbo_max_cookiesize, - PAGE_CACHE_SIZE); - } + + def_cookiesize = min_t(__u32, body->mbo_max_cookiesize, + OBD_MAX_DEFAULT_COOKIE_SIZE); + cli->cl_default_mds_cookiesize = def_cookiesize; } } diff --git a/lustre/include/obd.h b/lustre/include/obd.h index cea637d..de554b8 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -205,23 +205,59 @@ enum { NUM_SYNC_ON_CANCEL_STATES }; +/* + * Limit reply buffer size for striping data to one x86_64 page. This + * value is chosen to fit the striping data for common use cases while + * staying well below the limit at which the buffer must be backed by + * vmalloc(). Excessive use of vmalloc() may cause spinlock contention + * on the MDS. + */ +#define OBD_MAX_DEFAULT_EA_SIZE 4096 +#define OBD_MAX_DEFAULT_COOKIE_SIZE 4096 + struct mdc_rpc_lock; struct obd_import; struct client_obd { struct rw_semaphore cl_sem; - struct obd_uuid cl_target_uuid; - struct obd_import *cl_import; /* ptlrpc connection state */ + struct obd_uuid cl_target_uuid; + struct obd_import *cl_import; /* ptlrpc connection state */ size_t cl_conn_count; - /* max_mds_easize is purely a performance thing so we don't have to - * call obd_size_diskmd() all the time. */ + + /* Cache maximum and default values for easize and cookiesize. This is + * strictly a performance optimization to minimize calls to + * obd_size_diskmd(). The default values are used to calculate the + * initial size of a request buffer. The ptlrpc layer will resize the + * buffer as needed to accommodate a larger reply from the + * server. The default values should be small enough to avoid wasted + * memory and excessive use of vmalloc(), yet large enough to avoid + * reallocating the buffer in the common use case. */ + + /* Default EA size for striping attributes. It is initialized at + * mount-time based on the default stripe width of the filesystem, + * then it tracks the largest observed EA size advertised by + * the MDT, up to a maximum value of OBD_MAX_DEFAULT_EA_SIZE. */ __u32 cl_default_mds_easize; + + /* Maximum possible EA size computed at mount-time based on + * the number of OSTs in the filesystem. May be increased at + * run-time if a larger observed size is advertised by the MDT. */ __u32 cl_max_mds_easize; + + /* Default cookie size for llog cookies (see struct llog_cookie). It is + * initialized to zero at mount-time, then it tracks the largest + * observed cookie size advertised by the MDT, up to a maximum value of + * OBD_MAX_DEFAULT_COOKIE_SIZE. Note that llog_cookies are not + * used by clients communicating with MDS versions 2.4.0 and later.*/ __u32 cl_default_mds_cookiesize; + + /* Maximum possible cookie size computed at mount-time based on + * the number of OSTs in the filesystem. May be increased at + * run-time if a larger observed size is advertised by the MDT. */ __u32 cl_max_mds_cookiesize; - enum lustre_sec_part cl_sp_me; - enum lustre_sec_part cl_sp_to; - struct sptlrpc_flavor cl_flvr_mgc; /* fixed flavor of mgc->mgs */ + enum lustre_sec_part cl_sp_me; + enum lustre_sec_part cl_sp_to; + struct sptlrpc_flavor cl_flvr_mgc; /* fixed flavor of mgc->mgs */ /* the grant values are protected by loi_list_lock below */ unsigned long cl_dirty_pages; /* all _dirty_ in pages */ diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index f0ee6b0..7d332ab 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -692,7 +692,8 @@ int ll_set_default_mdsize(struct ll_sb_info *sbi, int lmmsize) { int rc; - if (lmmsize < sizeof(struct lov_mds_md) || lmmsize > PAGE_CACHE_SIZE) + if (lmmsize < sizeof(struct lov_mds_md) || + lmmsize > OBD_MAX_DEFAULT_EA_SIZE) return -EINVAL; rc = obd_set_info_async(NULL, sbi->ll_md_exp, diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index dc22bff..0cc48d7 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -2664,13 +2664,10 @@ static int mdc_get_info(const struct lu_env *env, struct obd_export *exp, *default_easize = exp->exp_obd->u.cli.cl_default_mds_easize; RETURN(0); } else if (KEY_IS(KEY_MAX_COOKIESIZE)) { - __u32 mdsize, *max_cookiesize; + __u32 *max_cookiesize; if (*vallen != sizeof(int)) RETURN(-EINVAL); - mdsize = *(int *)val; - if (mdsize > exp->exp_obd->u.cli.cl_max_mds_cookiesize) - exp->exp_obd->u.cli.cl_max_mds_cookiesize = mdsize; max_cookiesize = val; *max_cookiesize = exp->exp_obd->u.cli.cl_max_mds_cookiesize; RETURN(0); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 79535c2d..03c9cc9 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -2037,6 +2037,35 @@ test_27D() { } run_test 27D "validate llapi_layout API" +# Verify that default_easize is increased from its initial value after +# accessing a widely striped file. +test_27E() { + [ $OSTCOUNT -lt 2 ] && skip "needs >= 2 OSTs" && return + + # 72 bytes is the minimum space required to store striping + # information for a file striped across one OST: + # (sizeof(struct lov_user_md_v3) + + # sizeof(struct lov_user_ost_data_v1)) + local min_easize=72 + $LCTL set_param -n llite.*.default_easize $min_easize || + error "lctl set_param failed" + local easize=$($LCTL get_param -n llite.*.default_easize) + + [ $easize -eq $min_easize ] || + error "failed to set default_easize" + + $LFS setstripe -c $OSTCOUNT $DIR/$tfile || + error "setstripe failed" + cat $DIR/$tfile + rm $DIR/$tfile + + easize=$($LCTL get_param -n llite.*.default_easize) + + [ $easize -gt $min_easize ] || + error "default_easize not updated" +} +run_test 27E "check that default extended attribute size properly increases" + # createtest also checks that device nodes are created and # then visible correctly (#2091) test_28() { # bug 2091