LU-10239 osc: limit chunk number of write submit

author Bobi Jam <bobijam@whamcloud.com>

Thu, 21 Dec 2017 11:46:28 +0000 (19:46 +0800)

committer Oleg Drokin <green@whamcloud.com>

Fri, 4 Jan 2019 04:44:52 +0000 (04:44 +0000)
author Bobi Jam <bobijam@whamcloud.com>
Thu, 21 Dec 2017 11:46:28 +0000 (19:46 +0800)
committer Oleg Drokin <green@whamcloud.com>
Fri, 4 Jan 2019 04:44:52 +0000 (04:44 +0000)
diff --git a/lustre/osc/osc_cache.c b/lustre/osc/osc_cache.c

index 5394a75..4660212 100644 (file)
--- a/lustre/osc/osc_cache.c
+++ b/lustre/osc/osc_cache.c
@@ -1991,36 +1991,6 @@ static int try_to_add_extent_for_io(struct client_obd *cli,
         RETURN(1);
  }
  
-static inline unsigned osc_max_write_chunks(const struct client_obd *cli)
-{
-       /*
-        * LU-8135:
-        *
-        * The maximum size of a single transaction is about 64MB in ZFS.
-        * #define DMU_MAX_ACCESS (64 * 1024 * 1024)
-        *
-        * Since ZFS is a copy-on-write file system, a single dirty page in
-        * a chunk will result in the rewrite of the whole chunk, therefore
-        * an RPC shouldn't be allowed to contain too many chunks otherwise
-        * it will make transaction size much bigger than 64MB, especially
-        * with big block size for ZFS.
-        *
-        * This piece of code is to make sure that OSC won't send write RPCs
-        * with too many chunks. The maximum chunk size that an RPC can cover
-        * is set to PTLRPC_MAX_BRW_SIZE, which is defined to 16MB. Ideally
-        * OST should tell the client what the biggest transaction size is,
-        * but it's good enough for now.
-        *
-        * This limitation doesn't apply to ldiskfs, which allows as many
-        * chunks in one RPC as we want. However, it won't have any benefits
-        * to have too many discontiguous pages in one RPC.
-        *
-        * An osc_extent won't cover over a RPC size, so the chunks in an
-        * osc_extent won't bigger than PTLRPC_MAX_BRW_SIZE >> chunkbits.
-        */
-       return PTLRPC_MAX_BRW_SIZE >> cli->cl_chunkbits;
-}
-
  /**
   * In order to prevent multiple ptlrpcd from breaking contiguous extents,
   * get_write_extent() takes all appropriate extents in atomic.
diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h

index d1bf9d2..62b5c2b 100644 (file)
--- a/lustre/osc/osc_internal.h
+++ b/lustre/osc/osc_internal.h
@@ -173,6 +173,35 @@ extern unsigned long osc_cache_shrink_count(struct shrinker *sk,
                                             struct shrink_control *sc);
  extern unsigned long osc_cache_shrink_scan(struct shrinker *sk,
                                            struct shrink_control *sc);
+static inline unsigned int osc_max_write_chunks(const struct client_obd *cli)
+{
+       /*
+        * LU-8135:
+        *
+        * The maximum size of a single transaction is about 64MB in ZFS.
+        * #define DMU_MAX_ACCESS (64 * 1024 * 1024)
+        *
+        * Since ZFS is a copy-on-write file system, a single dirty page in
+        * a chunk will result in the rewrite of the whole chunk, therefore
+        * an RPC shouldn't be allowed to contain too many chunks otherwise
+        * it will make transaction size much bigger than 64MB, especially
+        * with big block size for ZFS.
+        *
+        * This piece of code is to make sure that OSC won't send write RPCs
+        * with too many chunks. The maximum chunk size that an RPC can cover
+        * is set to PTLRPC_MAX_BRW_SIZE, which is defined to 16MB. Ideally
+        * OST should tell the client what the biggest transaction size is,
+        * but it's good enough for now.
+        *
+        * This limitation doesn't apply to ldiskfs, which allows as many
+        * chunks in one RPC as we want. However, it won't have any benefits
+        * to have too many discontiguous pages in one RPC.
+        *
+        * An osc_extent won't cover over a RPC size, so the chunks in an
+        * osc_extent won't bigger than PTLRPC_MAX_BRW_SIZE >> chunkbits.
+        */
+       return PTLRPC_MAX_BRW_SIZE >> cli->cl_chunkbits;
+}
  
  static inline void osc_set_io_portal(struct ptlrpc_request *req)
  {
diff --git a/lustre/osc/osc_io.c b/lustre/osc/osc_io.c

index 4fc147a..d4ef2d1 100644 (file)
--- a/lustre/osc/osc_io.c
+++ b/lustre/osc/osc_io.c
@@ -121,6 +121,9 @@ int osc_io_submit(const struct lu_env *env, const struct cl_io_slice *ios,
         int result = 0;
         int brw_flags;
         unsigned int max_pages;
+       unsigned int ppc_bits; /* pages per chunk bits */
+       unsigned int ppc;
+       bool sync_queue = false;
  
         LASSERT(qin->pl_nr > 0);
  
@@ -129,6 +132,8 @@ int osc_io_submit(const struct lu_env *env, const struct cl_io_slice *ios,
         osc = cl2osc(ios->cis_obj);
         cli = osc_cli(osc);
         max_pages = cli->cl_max_pages_per_rpc;
+       ppc_bits = cli->cl_chunkbits - PAGE_SHIFT;
+       ppc = 1 << ppc_bits;
  
         brw_flags = osc_io_srvlock(cl2osc_io(env, ios)) ? OBD_BRW_SRVLOCK : 0;
         brw_flags |= crt == CRT_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ;
@@ -185,12 +190,30 @@ int osc_io_submit(const struct lu_env *env, const struct cl_io_slice *ios,
                 else /* async IO */
                         cl_page_list_del(env, qin, page);
  
-               if (++queued == max_pages) {
-                       queued = 0;
+               queued++;
+               if (queued == max_pages) {
+                       sync_queue = true;
+               } else if (crt == CRT_WRITE) {
+                       unsigned int chunks;
+                       unsigned int next_chunks;
+
+                       chunks = (queued + ppc - 1) >> ppc_bits;
+                       /* chunk number if add another page */
+                       next_chunks = (queued + ppc) >> ppc_bits;
+
+                       /* next page will excceed write chunk limit */
+                       if (chunks == osc_max_write_chunks(cli) &&
+                           next_chunks > chunks)
+                               sync_queue = true;
+               }
+
+               if (sync_queue) {
                         result = osc_queue_sync_pages(env, io, osc, &list,
                                                       brw_flags);
                         if (result < 0)
                                 break;
+                       queued = 0;
+                       sync_queue = false;
                 }
         }
author	Bobi Jam <bobijam@whamcloud.com>
	Thu, 21 Dec 2017 11:46:28 +0000 (19:46 +0800)
committer	Oleg Drokin <green@whamcloud.com>
	Fri, 4 Jan 2019 04:44:52 +0000 (04:44 +0000)
lustre/osc/osc_cache.c		patch \| blob \| history
lustre/osc/osc_internal.h		patch \| blob \| history
lustre/osc/osc_io.c		patch \| blob \| history