From 93ef6e7863b4e3aadfb3171f7f4958c5040446ec Mon Sep 17 00:00:00 2001 From: Bobi Jam Date: Thu, 21 Dec 2017 19:46:28 +0800 Subject: [PATCH] LU-10239 osc: limit chunk number of write submit Don't queue too many pages in an extent for a write RPC, we need to take care of the chunk limit in write submit as well (refers to LU-8135 for more details). Change-Id: Idaca6f365a6f8bb467fea43c811f775b37f49818 Signed-off-by: Bobi Jam Reviewed-on: https://review.whamcloud.com/30627 Reviewed-by: Andreas Dilger Reviewed-by: Jinshan Xiong Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/osc/osc_cache.c | 30 ------------------------------ lustre/osc/osc_internal.h | 29 +++++++++++++++++++++++++++++ lustre/osc/osc_io.c | 27 +++++++++++++++++++++++++-- 3 files changed, 54 insertions(+), 32 deletions(-) diff --git a/lustre/osc/osc_cache.c b/lustre/osc/osc_cache.c index 5394a75..4660212 100644 --- a/lustre/osc/osc_cache.c +++ b/lustre/osc/osc_cache.c @@ -1991,36 +1991,6 @@ static int try_to_add_extent_for_io(struct client_obd *cli, RETURN(1); } -static inline unsigned osc_max_write_chunks(const struct client_obd *cli) -{ - /* - * LU-8135: - * - * The maximum size of a single transaction is about 64MB in ZFS. - * #define DMU_MAX_ACCESS (64 * 1024 * 1024) - * - * Since ZFS is a copy-on-write file system, a single dirty page in - * a chunk will result in the rewrite of the whole chunk, therefore - * an RPC shouldn't be allowed to contain too many chunks otherwise - * it will make transaction size much bigger than 64MB, especially - * with big block size for ZFS. - * - * This piece of code is to make sure that OSC won't send write RPCs - * with too many chunks. The maximum chunk size that an RPC can cover - * is set to PTLRPC_MAX_BRW_SIZE, which is defined to 16MB. Ideally - * OST should tell the client what the biggest transaction size is, - * but it's good enough for now. - * - * This limitation doesn't apply to ldiskfs, which allows as many - * chunks in one RPC as we want. However, it won't have any benefits - * to have too many discontiguous pages in one RPC. - * - * An osc_extent won't cover over a RPC size, so the chunks in an - * osc_extent won't bigger than PTLRPC_MAX_BRW_SIZE >> chunkbits. - */ - return PTLRPC_MAX_BRW_SIZE >> cli->cl_chunkbits; -} - /** * In order to prevent multiple ptlrpcd from breaking contiguous extents, * get_write_extent() takes all appropriate extents in atomic. diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h index d1bf9d2..62b5c2b 100644 --- a/lustre/osc/osc_internal.h +++ b/lustre/osc/osc_internal.h @@ -173,6 +173,35 @@ extern unsigned long osc_cache_shrink_count(struct shrinker *sk, struct shrink_control *sc); extern unsigned long osc_cache_shrink_scan(struct shrinker *sk, struct shrink_control *sc); +static inline unsigned int osc_max_write_chunks(const struct client_obd *cli) +{ + /* + * LU-8135: + * + * The maximum size of a single transaction is about 64MB in ZFS. + * #define DMU_MAX_ACCESS (64 * 1024 * 1024) + * + * Since ZFS is a copy-on-write file system, a single dirty page in + * a chunk will result in the rewrite of the whole chunk, therefore + * an RPC shouldn't be allowed to contain too many chunks otherwise + * it will make transaction size much bigger than 64MB, especially + * with big block size for ZFS. + * + * This piece of code is to make sure that OSC won't send write RPCs + * with too many chunks. The maximum chunk size that an RPC can cover + * is set to PTLRPC_MAX_BRW_SIZE, which is defined to 16MB. Ideally + * OST should tell the client what the biggest transaction size is, + * but it's good enough for now. + * + * This limitation doesn't apply to ldiskfs, which allows as many + * chunks in one RPC as we want. However, it won't have any benefits + * to have too many discontiguous pages in one RPC. + * + * An osc_extent won't cover over a RPC size, so the chunks in an + * osc_extent won't bigger than PTLRPC_MAX_BRW_SIZE >> chunkbits. + */ + return PTLRPC_MAX_BRW_SIZE >> cli->cl_chunkbits; +} static inline void osc_set_io_portal(struct ptlrpc_request *req) { diff --git a/lustre/osc/osc_io.c b/lustre/osc/osc_io.c index 4fc147a..d4ef2d1 100644 --- a/lustre/osc/osc_io.c +++ b/lustre/osc/osc_io.c @@ -121,6 +121,9 @@ int osc_io_submit(const struct lu_env *env, const struct cl_io_slice *ios, int result = 0; int brw_flags; unsigned int max_pages; + unsigned int ppc_bits; /* pages per chunk bits */ + unsigned int ppc; + bool sync_queue = false; LASSERT(qin->pl_nr > 0); @@ -129,6 +132,8 @@ int osc_io_submit(const struct lu_env *env, const struct cl_io_slice *ios, osc = cl2osc(ios->cis_obj); cli = osc_cli(osc); max_pages = cli->cl_max_pages_per_rpc; + ppc_bits = cli->cl_chunkbits - PAGE_SHIFT; + ppc = 1 << ppc_bits; brw_flags = osc_io_srvlock(cl2osc_io(env, ios)) ? OBD_BRW_SRVLOCK : 0; brw_flags |= crt == CRT_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ; @@ -185,12 +190,30 @@ int osc_io_submit(const struct lu_env *env, const struct cl_io_slice *ios, else /* async IO */ cl_page_list_del(env, qin, page); - if (++queued == max_pages) { - queued = 0; + queued++; + if (queued == max_pages) { + sync_queue = true; + } else if (crt == CRT_WRITE) { + unsigned int chunks; + unsigned int next_chunks; + + chunks = (queued + ppc - 1) >> ppc_bits; + /* chunk number if add another page */ + next_chunks = (queued + ppc) >> ppc_bits; + + /* next page will excceed write chunk limit */ + if (chunks == osc_max_write_chunks(cli) && + next_chunks > chunks) + sync_queue = true; + } + + if (sync_queue) { result = osc_queue_sync_pages(env, io, osc, &list, brw_flags); if (result < 0) break; + queued = 0; + sync_queue = false; } } -- 1.8.3.1