From e38f1189a41d0ec6d3acce5266c4c1544ae9267c Mon Sep 17 00:00:00 2001 From: Li Xi Date: Wed, 2 Jul 2014 11:07:20 +0800 Subject: [PATCH] LU-4933 osc: Automatically increase the max_dirty_mb When RPC size or the max RPCs in flight is increased, the actual limit might be max_dirty_mb. This patch automatically increases the max_dirty_mb value at connection time and when the related values are tuned manually by proc file system. This patch also changes the unit of "cl_dirty" and "cl_dirty_max" in client_obd from byte to page. Lustre-change: http://review.whamcloud.com/10446/ Lustre-commit: bdc5bb52c55470cf8020933f80e327c397810603 Signed-off-by: Li Xi Signed-off-by: Hongchao Zhang Change-Id: I8480122b7370247b17de81d731c2f2b5f67892ce Reviewed-on: http://review.whamcloud.com/10937 Tested-by: Jenkins Reviewed-by: Jinshan Xiong Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: James Simmons --- lustre/include/obd.h | 35 ++++++++++++++++++++++++----- lustre/ldlm/ldlm_lib.c | 12 +++++----- lustre/osc/lproc_osc.c | 25 ++++++++++++--------- lustre/osc/osc_cache.c | 26 +++++++++++----------- lustre/osc/osc_request.c | 58 ++++++++++++++++++++++++++---------------------- lustre/ptlrpc/import.c | 7 +++--- 6 files changed, 98 insertions(+), 65 deletions(-) diff --git a/lustre/include/obd.h b/lustre/include/obd.h index d75ca84..428bc25 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -329,12 +329,12 @@ struct client_obd { enum lustre_sec_part cl_sp_to; struct sptlrpc_flavor cl_flvr_mgc; /* fixed flavor of mgc->mgs */ - /* the grant values are protected by loi_list_lock below */ - long cl_dirty; /* all _dirty_ in bytes */ - long cl_dirty_max; /* allowed w/o rpc */ - long cl_dirty_transit; /* dirty synchronous */ - long cl_avail_grant; /* bytes of credit for ost */ - long cl_lost_grant; /* lost credits (trunc) */ + /* the grant values are protected by loi_list_lock below */ + long cl_dirty_pages; /* all _dirty_ in pages */ + long cl_dirty_max_pages; /* allowed w/o rpc */ + long cl_dirty_transit; /* dirty synchronous */ + long cl_avail_grant; /* bytes of credit for ost */ + long cl_lost_grant; /* lost credits (trunc) */ /* since we allocate grant by blocks, we don't know how many grant will * be used to add a page into cache. As a solution, we reserve maximum @@ -1611,4 +1611,27 @@ static inline int cli_brw_size(struct obd_device *obd) return obd->u.cli.cl_max_pages_per_rpc << PAGE_CACHE_SHIFT; } +/* when RPC size or the max RPCs in flight is increased, the max dirty pages + * of the client should be increased accordingly to avoid sending fragmented + * RPCs over the network when the client runs out of the maximum dirty space + * when so many RPCs are being generated. + */ +static inline void client_adjust_max_dirty(struct client_obd *cli) +{ + /* initializing */ + if (cli->cl_dirty_max_pages <= 0) + cli->cl_dirty_max_pages = (OSC_MAX_DIRTY_DEFAULT * 1024 * 1024) + >> PAGE_CACHE_SHIFT; + else { + long dirty_max = cli->cl_max_rpcs_in_flight * + cli->cl_max_pages_per_rpc; + + if (dirty_max > cli->cl_dirty_max_pages) + cli->cl_dirty_max_pages = dirty_max; + } + + if (cli->cl_dirty_max_pages > totalram_pages / 8) + cli->cl_dirty_max_pages = totalram_pages / 8; +} + #endif /* __OBD_H */ diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index c66df04..3d27f17 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -339,12 +339,12 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) min_t(unsigned int, LUSTRE_CFG_BUFLEN(lcfg, 2), sizeof(server_uuid))); - cli->cl_dirty = 0; - cli->cl_avail_grant = 0; - /* FIXME: Should limit this for the sum of all cl_dirty_max. */ - cli->cl_dirty_max = OSC_MAX_DIRTY_DEFAULT * 1024 * 1024; - if (cli->cl_dirty_max >> PAGE_CACHE_SHIFT > totalram_pages / 8) - cli->cl_dirty_max = totalram_pages << (PAGE_CACHE_SHIFT - 3); + cli->cl_dirty_pages = 0; + cli->cl_avail_grant = 0; + /* FIXME: Should limit this for the sum of all cl_dirty_max_pages. */ + /* cl_dirty_max_pages may be changed at connect time in + * ptlrpc_connect_interpret(). */ + client_adjust_max_dirty(cli); CFS_INIT_LIST_HEAD(&cli->cl_cache_waiters); CFS_INIT_LIST_HEAD(&cli->cl_loi_ready_list); CFS_INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list); diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c index b43dd5b..e05d51c 100644 --- a/lustre/osc/lproc_osc.c +++ b/lustre/osc/lproc_osc.c @@ -109,12 +109,13 @@ static int osc_wr_max_rpcs_in_flight(struct file *file, const char *buffer, if (pool && val > cli->cl_max_rpcs_in_flight) pool->prp_populate(pool, val-cli->cl_max_rpcs_in_flight); - client_obd_list_lock(&cli->cl_loi_list_lock); - cli->cl_max_rpcs_in_flight = val; - client_obd_list_unlock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); + cli->cl_max_rpcs_in_flight = val; + client_adjust_max_dirty(cli); + client_obd_list_unlock(&cli->cl_loi_list_lock); - LPROCFS_CLIMP_EXIT(dev); - return count; + LPROCFS_CLIMP_EXIT(dev); + return count; } static int osc_rd_max_dirty_mb(char *page, char **start, off_t off, int count, @@ -125,11 +126,11 @@ static int osc_rd_max_dirty_mb(char *page, char **start, off_t off, int count, long val; int mult; - client_obd_list_lock(&cli->cl_loi_list_lock); - val = cli->cl_dirty_max; - client_obd_list_unlock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); + val = cli->cl_dirty_max_pages; + client_obd_list_unlock(&cli->cl_loi_list_lock); - mult = 1 << 20; + mult = 1 << (20 - PAGE_CACHE_SHIFT); return lprocfs_read_frac_helper(page, count, val, mult); } @@ -151,7 +152,7 @@ static int osc_wr_max_dirty_mb(struct file *file, const char *buffer, return -ERANGE; client_obd_list_lock(&cli->cl_loi_list_lock); - cli->cl_dirty_max = (obd_count)(pages_number << PAGE_CACHE_SHIFT); + cli->cl_dirty_max_pages = pages_number; osc_wake_cache_waiters(cli); client_obd_list_unlock(&cli->cl_loi_list_lock); @@ -208,7 +209,8 @@ static int osc_rd_cur_dirty_bytes(char *page, char **start, off_t off, int rc; client_obd_list_lock(&cli->cl_loi_list_lock); - rc = snprintf(page, count, "%lu\n", cli->cl_dirty); + rc = snprintf(page, count, "%lu\n", + cli->cl_dirty_pages << PAGE_CACHE_SHIFT); client_obd_list_unlock(&cli->cl_loi_list_lock); return rc; } @@ -490,6 +492,7 @@ static int lprocfs_osc_wr_max_pages_per_rpc(struct file *file, } client_obd_list_lock(&cli->cl_loi_list_lock); cli->cl_max_pages_per_rpc = val; + client_adjust_max_dirty(cli); client_obd_list_unlock(&cli->cl_loi_list_lock); LPROCFS_CLIMP_EXIT(dev); diff --git a/lustre/osc/osc_cache.c b/lustre/osc/osc_cache.c index c5ee26b..c8d8567 100644 --- a/lustre/osc/osc_cache.c +++ b/lustre/osc/osc_cache.c @@ -1320,7 +1320,7 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap, "dropped: %ld avail: %ld, reserved: %ld, flight: %d } " \ "lru {in list: %d, left: %d, waiters: %d }" fmt, \ __tmp->cl_import->imp_obd->obd_name, \ - __tmp->cl_dirty, __tmp->cl_dirty_max, \ + __tmp->cl_dirty_pages, __tmp->cl_dirty_max_pages, \ cfs_atomic_read(&obd_dirty_pages), obd_max_dirty_pages, \ __tmp->cl_lost_grant, __tmp->cl_avail_grant, \ __tmp->cl_reserved_grant, __tmp->cl_w_in_flight, \ @@ -1336,7 +1336,7 @@ static void osc_consume_write_grant(struct client_obd *cli, LASSERT(spin_is_locked(&cli->cl_loi_list_lock.lock)); LASSERT(!(pga->flag & OBD_BRW_FROM_GRANT)); cfs_atomic_inc(&obd_dirty_pages); - cli->cl_dirty += PAGE_CACHE_SIZE; + cli->cl_dirty_pages++; pga->flag |= OBD_BRW_FROM_GRANT; CDEBUG(D_CACHE, "using %lu grant credits for brw %p page %p\n", PAGE_CACHE_SIZE, pga, pga->pg); @@ -1358,11 +1358,11 @@ static void osc_release_write_grant(struct client_obd *cli, pga->flag &= ~OBD_BRW_FROM_GRANT; cfs_atomic_dec(&obd_dirty_pages); - cli->cl_dirty -= PAGE_CACHE_SIZE; + cli->cl_dirty_pages--; if (pga->flag & OBD_BRW_NOCACHE) { pga->flag &= ~OBD_BRW_NOCACHE; cfs_atomic_dec(&obd_dirty_transit_pages); - cli->cl_dirty_transit -= PAGE_CACHE_SIZE; + cli->cl_dirty_transit--; } EXIT; } @@ -1431,7 +1431,7 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages, client_obd_list_lock(&cli->cl_loi_list_lock); cfs_atomic_sub(nr_pages, &obd_dirty_pages); - cli->cl_dirty -= nr_pages << PAGE_CACHE_SHIFT; + cli->cl_dirty_pages -= nr_pages; cli->cl_lost_grant += lost_grant; if (cli->cl_avail_grant < grant && cli->cl_lost_grant >= grant) { /* borrow some grant from truncate to avoid the case that @@ -1443,7 +1443,7 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages, client_obd_list_unlock(&cli->cl_loi_list_lock); CDEBUG(D_CACHE, "lost %u grant: %lu avail: %lu dirty: %lu\n", lost_grant, cli->cl_lost_grant, - cli->cl_avail_grant, cli->cl_dirty); + cli->cl_avail_grant, cli->cl_dirty_pages << PAGE_CACHE_SHIFT); } /** @@ -1473,11 +1473,11 @@ static int osc_enter_cache_try(struct client_obd *cli, if (rc < 0) return 0; - if (cli->cl_dirty + PAGE_CACHE_SIZE <= cli->cl_dirty_max && + if (cli->cl_dirty_pages < cli->cl_dirty_max_pages && cfs_atomic_read(&obd_dirty_pages) + 1 <= obd_max_dirty_pages) { osc_consume_write_grant(cli, &oap->oap_brw_page); if (transient) { - cli->cl_dirty_transit += PAGE_CACHE_SIZE; + cli->cl_dirty_transit++; cfs_atomic_inc(&obd_dirty_transit_pages); oap->oap_brw_flags |= OBD_BRW_NOCACHE; } @@ -1523,7 +1523,7 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli, /* force the caller to try sync io. this can jump the list * of queued writes and create a discontiguous rpc stream */ if (OBD_FAIL_CHECK(OBD_FAIL_OSC_NO_GRANT) || - cli->cl_dirty_max < PAGE_CACHE_SIZE || + cli->cl_dirty_max_pages == 0 || cli->cl_ar.ar_force_sync || loi->loi_ar.ar_force_sync) GOTO(out, rc = -EDQUOT); @@ -1540,7 +1540,7 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli, init_waitqueue_head(&ocw.ocw_waitq); ocw.ocw_oap = oap; ocw.ocw_grant = bytes; - while (cli->cl_dirty > 0 || cli->cl_w_in_flight > 0) { + while (cli->cl_dirty_pages > 0 || cli->cl_w_in_flight > 0) { cfs_list_add_tail(&ocw.ocw_entry, &cli->cl_cache_waiters); ocw.ocw_rc = 0; client_obd_list_unlock(&cli->cl_loi_list_lock); @@ -1606,12 +1606,12 @@ void osc_wake_cache_waiters(struct client_obd *cli) ocw->ocw_rc = -EDQUOT; /* we can't dirty more */ - if ((cli->cl_dirty + PAGE_CACHE_SIZE > cli->cl_dirty_max) || + if ((cli->cl_dirty_pages >= cli->cl_dirty_max_pages) || (cfs_atomic_read(&obd_dirty_pages) + 1 > obd_max_dirty_pages)) { CDEBUG(D_CACHE, "no dirty room: dirty: %ld " - "osc max %ld, sys max %d\n", cli->cl_dirty, - cli->cl_dirty_max, obd_max_dirty_pages); + "osc max %ld, sys max %d\n", cli->cl_dirty_pages, + cli->cl_dirty_max_pages, obd_max_dirty_pages); goto wakeup; } diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 30821395..cc42688 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -830,13 +830,14 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa, LASSERT(!(oa->o_valid & bits)); - oa->o_valid |= bits; - client_obd_list_lock(&cli->cl_loi_list_lock); - oa->o_dirty = cli->cl_dirty; - if (unlikely(cli->cl_dirty - cli->cl_dirty_transit > - cli->cl_dirty_max)) { + oa->o_valid |= bits; + client_obd_list_lock(&cli->cl_loi_list_lock); + oa->o_dirty = cli->cl_dirty_pages << PAGE_CACHE_SHIFT; + if (unlikely(cli->cl_dirty_pages - cli->cl_dirty_transit > + cli->cl_dirty_max_pages)) { CERROR("dirty %lu - %lu > dirty_max %lu\n", - cli->cl_dirty, cli->cl_dirty_transit, cli->cl_dirty_max); + cli->cl_dirty_pages, cli->cl_dirty_transit, + cli->cl_dirty_max_pages); oa->o_undirty = 0; } else if (unlikely(cfs_atomic_read(&obd_dirty_pages) - cfs_atomic_read(&obd_dirty_transit_pages) > @@ -849,15 +850,17 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa, cfs_atomic_read(&obd_dirty_transit_pages), obd_max_dirty_pages); oa->o_undirty = 0; - } else if (unlikely(cli->cl_dirty_max - cli->cl_dirty > 0x7fffffff)) { + } else if (unlikely(cli->cl_dirty_max_pages - cli->cl_dirty_pages > + 0x7fffffff)) { CERROR("dirty %lu - dirty_max %lu too big???\n", - cli->cl_dirty, cli->cl_dirty_max); + cli->cl_dirty_pages, cli->cl_dirty_max_pages); oa->o_undirty = 0; } else { long max_in_flight = (cli->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT) * (cli->cl_max_rpcs_in_flight + 1); - oa->o_undirty = max(cli->cl_dirty_max, max_in_flight); + oa->o_undirty = max(cli->cl_dirty_max_pages << PAGE_CACHE_SHIFT, + max_in_flight); } oa->o_grant = cli->cl_avail_grant + cli->cl_reserved_grant; oa->o_dropped = cli->cl_lost_grant; @@ -1055,24 +1058,26 @@ static int osc_del_shrink_grant(struct client_obd *client) static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd) { - /* - * ocd_grant is the total grant amount we're expect to hold: if we've - * been evicted, it's the new avail_grant amount, cl_dirty will drop - * to 0 as inflight RPCs fail out; otherwise, it's avail_grant + dirty. - * - * race is tolerable here: if we're evicted, but imp_state already - * left EVICTED state, then cl_dirty must be 0 already. - */ - client_obd_list_lock(&cli->cl_loi_list_lock); - if (cli->cl_import->imp_state == LUSTRE_IMP_EVICTED) - cli->cl_avail_grant = ocd->ocd_grant; - else - cli->cl_avail_grant = ocd->ocd_grant - cli->cl_dirty; + /* + * ocd_grant is the total grant amount we're expect to hold: if we've + * been evicted, it's the new avail_grant amount, cl_dirty_pages will + * drop to 0 as inflight RPCs fail out; otherwise, it's avail_grant + + * dirty. + * + * race is tolerable here: if we're evicted, but imp_state already + * left EVICTED state, then cl_dirty_pages must be 0 already. + */ + client_obd_list_lock(&cli->cl_loi_list_lock); + if (cli->cl_import->imp_state == LUSTRE_IMP_EVICTED) + cli->cl_avail_grant = ocd->ocd_grant; + else + cli->cl_avail_grant = ocd->ocd_grant - + (cli->cl_dirty_pages << PAGE_CACHE_SHIFT); if (cli->cl_avail_grant < 0) { CWARN("%s: available grant < 0: avail/ocd/dirty %ld/%u/%ld\n", cli->cl_import->imp_obd->obd_name, cli->cl_avail_grant, - ocd->ocd_grant, cli->cl_dirty); + ocd->ocd_grant, cli->cl_dirty_pages << PAGE_CACHE_SHIFT); /* workaround for servers which do not have the patch from * LU-2679 */ cli->cl_avail_grant = ocd->ocd_grant; @@ -3311,9 +3316,10 @@ static int osc_reconnect(const struct lu_env *env, if (data != NULL && (data->ocd_connect_flags & OBD_CONNECT_GRANT)) { long lost_grant; - client_obd_list_lock(&cli->cl_loi_list_lock); - data->ocd_grant = (cli->cl_avail_grant + cli->cl_dirty) ?: - 2 * cli_brw_size(obd); + client_obd_list_lock(&cli->cl_loi_list_lock); + data->ocd_grant = (cli->cl_avail_grant + + (cli->cl_dirty_pages << PAGE_CACHE_SHIFT)) ?: + 2 * cli_brw_size(obd); lost_grant = cli->cl_lost_grant; cli->cl_lost_grant = 0; client_obd_list_unlock(&cli->cl_loi_list_lock); diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 2d20ec3..e543d16 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -1147,9 +1147,10 @@ finish: else imp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18; - LASSERT((cli->cl_max_pages_per_rpc <= PTLRPC_MAX_BRW_PAGES) && - (cli->cl_max_pages_per_rpc > 0)); - } + LASSERT((cli->cl_max_pages_per_rpc <= PTLRPC_MAX_BRW_PAGES) && + (cli->cl_max_pages_per_rpc > 0)); + client_adjust_max_dirty(cli); + } out: imp->imp_connect_tried = 1; -- 1.8.3.1