Whamcloud - gitweb
LU-4933 osc: Automatically increase the max_dirty_mb 37/10937/2
authorLi Xi <lixi@ddn.com>
Wed, 2 Jul 2014 03:07:20 +0000 (11:07 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 2 Oct 2014 04:31:55 +0000 (04:31 +0000)
When RPC size or the max RPCs in flight is increased, the actual
limit might be max_dirty_mb. This patch automatically increases
the max_dirty_mb value at connection time and when the related
values are tuned manually by proc file system.

This patch also changes the unit of "cl_dirty" and "cl_dirty_max"
in client_obd from byte to page.

Lustre-change: http://review.whamcloud.com/10446/
Lustre-commit: bdc5bb52c55470cf8020933f80e327c397810603

Signed-off-by: Li Xi <lixi@ddn.com>
Signed-off-by: Hongchao Zhang <hongchao.zhang@intel.com>
Change-Id: I8480122b7370247b17de81d731c2f2b5f67892ce
Reviewed-on: http://review.whamcloud.com/10937
Tested-by: Jenkins
Reviewed-by: Jinshan Xiong <jinshan.xiong@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: James Simmons <uja.ornl@gmail.com>
lustre/include/obd.h
lustre/ldlm/ldlm_lib.c
lustre/osc/lproc_osc.c
lustre/osc/osc_cache.c
lustre/osc/osc_request.c
lustre/ptlrpc/import.c

index d75ca84..428bc25 100644 (file)
@@ -329,12 +329,12 @@ struct client_obd {
         enum lustre_sec_part     cl_sp_to;
         struct sptlrpc_flavor    cl_flvr_mgc;   /* fixed flavor of mgc->mgs */
 
-        /* the grant values are protected by loi_list_lock below */
-        long                     cl_dirty;         /* all _dirty_ in bytes */
-        long                     cl_dirty_max;     /* allowed w/o rpc */
-        long                     cl_dirty_transit; /* dirty synchronous */
-        long                     cl_avail_grant;   /* bytes of credit for ost */
-        long                     cl_lost_grant;    /* lost credits (trunc) */
+       /* the grant values are protected by loi_list_lock below */
+       long                     cl_dirty_pages;      /* all _dirty_ in pages */
+       long                     cl_dirty_max_pages;  /* allowed w/o rpc */
+       long                     cl_dirty_transit;    /* dirty synchronous */
+       long                     cl_avail_grant;   /* bytes of credit for ost */
+       long                     cl_lost_grant;    /* lost credits (trunc) */
 
        /* since we allocate grant by blocks, we don't know how many grant will
         * be used to add a page into cache. As a solution, we reserve maximum
@@ -1611,4 +1611,27 @@ static inline int cli_brw_size(struct obd_device *obd)
        return obd->u.cli.cl_max_pages_per_rpc << PAGE_CACHE_SHIFT;
 }
 
+/* when RPC size or the max RPCs in flight is increased, the max dirty pages
+ * of the client should be increased accordingly to avoid sending fragmented
+ * RPCs over the network when the client runs out of the maximum dirty space
+ * when so many RPCs are being generated.
+ */
+static inline void client_adjust_max_dirty(struct client_obd *cli)
+{
+        /* initializing */
+       if (cli->cl_dirty_max_pages <= 0)
+               cli->cl_dirty_max_pages = (OSC_MAX_DIRTY_DEFAULT * 1024 * 1024)
+                                                       >> PAGE_CACHE_SHIFT;
+       else {
+               long dirty_max = cli->cl_max_rpcs_in_flight *
+                                               cli->cl_max_pages_per_rpc;
+
+               if (dirty_max > cli->cl_dirty_max_pages)
+                       cli->cl_dirty_max_pages = dirty_max;
+       }
+
+       if (cli->cl_dirty_max_pages > totalram_pages / 8)
+               cli->cl_dirty_max_pages = totalram_pages / 8;
+}
+
 #endif /* __OBD_H */
index c66df04..3d27f17 100644 (file)
@@ -339,12 +339,12 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
                min_t(unsigned int, LUSTRE_CFG_BUFLEN(lcfg, 2),
                      sizeof(server_uuid)));
 
-        cli->cl_dirty = 0;
-        cli->cl_avail_grant = 0;
-       /* FIXME: Should limit this for the sum of all cl_dirty_max. */
-       cli->cl_dirty_max = OSC_MAX_DIRTY_DEFAULT * 1024 * 1024;
-       if (cli->cl_dirty_max >> PAGE_CACHE_SHIFT > totalram_pages / 8)
-               cli->cl_dirty_max = totalram_pages << (PAGE_CACHE_SHIFT - 3);
+       cli->cl_dirty_pages = 0;
+       cli->cl_avail_grant = 0;
+       /* FIXME: Should limit this for the sum of all cl_dirty_max_pages. */
+       /* cl_dirty_max_pages may be changed at connect time in
+        * ptlrpc_connect_interpret(). */
+       client_adjust_max_dirty(cli);
         CFS_INIT_LIST_HEAD(&cli->cl_cache_waiters);
         CFS_INIT_LIST_HEAD(&cli->cl_loi_ready_list);
         CFS_INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list);
index b43dd5b..e05d51c 100644 (file)
@@ -109,12 +109,13 @@ static int osc_wr_max_rpcs_in_flight(struct file *file, const char *buffer,
         if (pool && val > cli->cl_max_rpcs_in_flight)
                 pool->prp_populate(pool, val-cli->cl_max_rpcs_in_flight);
 
-        client_obd_list_lock(&cli->cl_loi_list_lock);
-        cli->cl_max_rpcs_in_flight = val;
-        client_obd_list_unlock(&cli->cl_loi_list_lock);
+       client_obd_list_lock(&cli->cl_loi_list_lock);
+       cli->cl_max_rpcs_in_flight = val;
+       client_adjust_max_dirty(cli);
+       client_obd_list_unlock(&cli->cl_loi_list_lock);
 
-        LPROCFS_CLIMP_EXIT(dev);
-        return count;
+       LPROCFS_CLIMP_EXIT(dev);
+       return count;
 }
 
 static int osc_rd_max_dirty_mb(char *page, char **start, off_t off, int count,
@@ -125,11 +126,11 @@ static int osc_rd_max_dirty_mb(char *page, char **start, off_t off, int count,
         long val;
         int mult;
 
-        client_obd_list_lock(&cli->cl_loi_list_lock);
-        val = cli->cl_dirty_max;
-        client_obd_list_unlock(&cli->cl_loi_list_lock);
+       client_obd_list_lock(&cli->cl_loi_list_lock);
+       val = cli->cl_dirty_max_pages;
+       client_obd_list_unlock(&cli->cl_loi_list_lock);
 
-        mult = 1 << 20;
+       mult = 1 << (20 - PAGE_CACHE_SHIFT);
         return lprocfs_read_frac_helper(page, count, val, mult);
 }
 
@@ -151,7 +152,7 @@ static int osc_wr_max_dirty_mb(struct file *file, const char *buffer,
                return -ERANGE;
 
        client_obd_list_lock(&cli->cl_loi_list_lock);
-       cli->cl_dirty_max = (obd_count)(pages_number << PAGE_CACHE_SHIFT);
+       cli->cl_dirty_max_pages = pages_number;
        osc_wake_cache_waiters(cli);
        client_obd_list_unlock(&cli->cl_loi_list_lock);
 
@@ -208,7 +209,8 @@ static int osc_rd_cur_dirty_bytes(char *page, char **start, off_t off,
         int rc;
 
         client_obd_list_lock(&cli->cl_loi_list_lock);
-        rc = snprintf(page, count, "%lu\n", cli->cl_dirty);
+       rc = snprintf(page, count, "%lu\n",
+                     cli->cl_dirty_pages << PAGE_CACHE_SHIFT);
         client_obd_list_unlock(&cli->cl_loi_list_lock);
         return rc;
 }
@@ -490,6 +492,7 @@ static int lprocfs_osc_wr_max_pages_per_rpc(struct file *file,
        }
        client_obd_list_lock(&cli->cl_loi_list_lock);
        cli->cl_max_pages_per_rpc = val;
+       client_adjust_max_dirty(cli);
        client_obd_list_unlock(&cli->cl_loi_list_lock);
 
        LPROCFS_CLIMP_EXIT(dev);
index c5ee26b..c8d8567 100644 (file)
@@ -1320,7 +1320,7 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap,
               "dropped: %ld avail: %ld, reserved: %ld, flight: %d } "        \
               "lru {in list: %d, left: %d, waiters: %d }" fmt,               \
               __tmp->cl_import->imp_obd->obd_name,                           \
-              __tmp->cl_dirty, __tmp->cl_dirty_max,                          \
+              __tmp->cl_dirty_pages, __tmp->cl_dirty_max_pages,              \
               cfs_atomic_read(&obd_dirty_pages), obd_max_dirty_pages,        \
               __tmp->cl_lost_grant, __tmp->cl_avail_grant,                   \
               __tmp->cl_reserved_grant, __tmp->cl_w_in_flight,               \
@@ -1336,7 +1336,7 @@ static void osc_consume_write_grant(struct client_obd *cli,
        LASSERT(spin_is_locked(&cli->cl_loi_list_lock.lock));
        LASSERT(!(pga->flag & OBD_BRW_FROM_GRANT));
        cfs_atomic_inc(&obd_dirty_pages);
-       cli->cl_dirty += PAGE_CACHE_SIZE;
+       cli->cl_dirty_pages++;
        pga->flag |= OBD_BRW_FROM_GRANT;
        CDEBUG(D_CACHE, "using %lu grant credits for brw %p page %p\n",
               PAGE_CACHE_SIZE, pga, pga->pg);
@@ -1358,11 +1358,11 @@ static void osc_release_write_grant(struct client_obd *cli,
 
        pga->flag &= ~OBD_BRW_FROM_GRANT;
        cfs_atomic_dec(&obd_dirty_pages);
-       cli->cl_dirty -= PAGE_CACHE_SIZE;
+       cli->cl_dirty_pages--;
        if (pga->flag & OBD_BRW_NOCACHE) {
                pga->flag &= ~OBD_BRW_NOCACHE;
                cfs_atomic_dec(&obd_dirty_transit_pages);
-               cli->cl_dirty_transit -= PAGE_CACHE_SIZE;
+               cli->cl_dirty_transit--;
        }
        EXIT;
 }
@@ -1431,7 +1431,7 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages,
 
        client_obd_list_lock(&cli->cl_loi_list_lock);
        cfs_atomic_sub(nr_pages, &obd_dirty_pages);
-       cli->cl_dirty -= nr_pages << PAGE_CACHE_SHIFT;
+       cli->cl_dirty_pages -= nr_pages;
        cli->cl_lost_grant += lost_grant;
        if (cli->cl_avail_grant < grant && cli->cl_lost_grant >= grant) {
                /* borrow some grant from truncate to avoid the case that
@@ -1443,7 +1443,7 @@ static void osc_free_grant(struct client_obd *cli, unsigned int nr_pages,
        client_obd_list_unlock(&cli->cl_loi_list_lock);
        CDEBUG(D_CACHE, "lost %u grant: %lu avail: %lu dirty: %lu\n",
               lost_grant, cli->cl_lost_grant,
-              cli->cl_avail_grant, cli->cl_dirty);
+              cli->cl_avail_grant, cli->cl_dirty_pages << PAGE_CACHE_SHIFT);
 }
 
 /**
@@ -1473,11 +1473,11 @@ static int osc_enter_cache_try(struct client_obd *cli,
        if (rc < 0)
                return 0;
 
-       if (cli->cl_dirty + PAGE_CACHE_SIZE <= cli->cl_dirty_max &&
+       if (cli->cl_dirty_pages < cli->cl_dirty_max_pages &&
            cfs_atomic_read(&obd_dirty_pages) + 1 <= obd_max_dirty_pages) {
                osc_consume_write_grant(cli, &oap->oap_brw_page);
                if (transient) {
-                       cli->cl_dirty_transit += PAGE_CACHE_SIZE;
+                       cli->cl_dirty_transit++;
                        cfs_atomic_inc(&obd_dirty_transit_pages);
                        oap->oap_brw_flags |= OBD_BRW_NOCACHE;
                }
@@ -1523,7 +1523,7 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
        /* force the caller to try sync io.  this can jump the list
         * of queued writes and create a discontiguous rpc stream */
        if (OBD_FAIL_CHECK(OBD_FAIL_OSC_NO_GRANT) ||
-           cli->cl_dirty_max < PAGE_CACHE_SIZE     ||
+           cli->cl_dirty_max_pages == 0 ||
            cli->cl_ar.ar_force_sync || loi->loi_ar.ar_force_sync)
                GOTO(out, rc = -EDQUOT);
 
@@ -1540,7 +1540,7 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
        init_waitqueue_head(&ocw.ocw_waitq);
        ocw.ocw_oap   = oap;
        ocw.ocw_grant = bytes;
-       while (cli->cl_dirty > 0 || cli->cl_w_in_flight > 0) {
+       while (cli->cl_dirty_pages > 0 || cli->cl_w_in_flight > 0) {
                cfs_list_add_tail(&ocw.ocw_entry, &cli->cl_cache_waiters);
                ocw.ocw_rc = 0;
                client_obd_list_unlock(&cli->cl_loi_list_lock);
@@ -1606,12 +1606,12 @@ void osc_wake_cache_waiters(struct client_obd *cli)
 
                ocw->ocw_rc = -EDQUOT;
                /* we can't dirty more */
-               if ((cli->cl_dirty + PAGE_CACHE_SIZE > cli->cl_dirty_max) ||
+               if ((cli->cl_dirty_pages >= cli->cl_dirty_max_pages) ||
                    (cfs_atomic_read(&obd_dirty_pages) + 1 >
                     obd_max_dirty_pages)) {
                        CDEBUG(D_CACHE, "no dirty room: dirty: %ld "
-                              "osc max %ld, sys max %d\n", cli->cl_dirty,
-                              cli->cl_dirty_max, obd_max_dirty_pages);
+                              "osc max %ld, sys max %d\n", cli->cl_dirty_pages,
+                              cli->cl_dirty_max_pages, obd_max_dirty_pages);
                        goto wakeup;
                }
 
index 3082139..cc42688 100644 (file)
@@ -830,13 +830,14 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
 
         LASSERT(!(oa->o_valid & bits));
 
-        oa->o_valid |= bits;
-        client_obd_list_lock(&cli->cl_loi_list_lock);
-        oa->o_dirty = cli->cl_dirty;
-       if (unlikely(cli->cl_dirty - cli->cl_dirty_transit >
-                    cli->cl_dirty_max)) {
+       oa->o_valid |= bits;
+       client_obd_list_lock(&cli->cl_loi_list_lock);
+       oa->o_dirty = cli->cl_dirty_pages << PAGE_CACHE_SHIFT;
+       if (unlikely(cli->cl_dirty_pages - cli->cl_dirty_transit >
+                    cli->cl_dirty_max_pages)) {
                CERROR("dirty %lu - %lu > dirty_max %lu\n",
-                      cli->cl_dirty, cli->cl_dirty_transit, cli->cl_dirty_max);
+                      cli->cl_dirty_pages, cli->cl_dirty_transit,
+                      cli->cl_dirty_max_pages);
                oa->o_undirty = 0;
        } else if (unlikely(cfs_atomic_read(&obd_dirty_pages) -
                            cfs_atomic_read(&obd_dirty_transit_pages) >
@@ -849,15 +850,17 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
                       cfs_atomic_read(&obd_dirty_transit_pages),
                       obd_max_dirty_pages);
                oa->o_undirty = 0;
-       } else if (unlikely(cli->cl_dirty_max - cli->cl_dirty > 0x7fffffff)) {
+       } else if (unlikely(cli->cl_dirty_max_pages - cli->cl_dirty_pages >
+                           0x7fffffff)) {
                CERROR("dirty %lu - dirty_max %lu too big???\n",
-                      cli->cl_dirty, cli->cl_dirty_max);
+                      cli->cl_dirty_pages, cli->cl_dirty_max_pages);
                oa->o_undirty = 0;
        } else {
                long max_in_flight = (cli->cl_max_pages_per_rpc <<
                                      PAGE_CACHE_SHIFT) *
                                     (cli->cl_max_rpcs_in_flight + 1);
-                oa->o_undirty = max(cli->cl_dirty_max, max_in_flight);
+               oa->o_undirty = max(cli->cl_dirty_max_pages << PAGE_CACHE_SHIFT,
+                                   max_in_flight);
         }
        oa->o_grant = cli->cl_avail_grant + cli->cl_reserved_grant;
         oa->o_dropped = cli->cl_lost_grant;
@@ -1055,24 +1058,26 @@ static int osc_del_shrink_grant(struct client_obd *client)
 
 static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd)
 {
-        /*
-         * ocd_grant is the total grant amount we're expect to hold: if we've
-         * been evicted, it's the new avail_grant amount, cl_dirty will drop
-         * to 0 as inflight RPCs fail out; otherwise, it's avail_grant + dirty.
-         *
-         * race is tolerable here: if we're evicted, but imp_state already
-         * left EVICTED state, then cl_dirty must be 0 already.
-         */
-        client_obd_list_lock(&cli->cl_loi_list_lock);
-        if (cli->cl_import->imp_state == LUSTRE_IMP_EVICTED)
-                cli->cl_avail_grant = ocd->ocd_grant;
-        else
-                cli->cl_avail_grant = ocd->ocd_grant - cli->cl_dirty;
+       /*
+        * ocd_grant is the total grant amount we're expect to hold: if we've
+        * been evicted, it's the new avail_grant amount, cl_dirty_pages will
+        * drop to 0 as inflight RPCs fail out; otherwise, it's avail_grant +
+        * dirty.
+        *
+        * race is tolerable here: if we're evicted, but imp_state already
+        * left EVICTED state, then cl_dirty_pages must be 0 already.
+        */
+       client_obd_list_lock(&cli->cl_loi_list_lock);
+       if (cli->cl_import->imp_state == LUSTRE_IMP_EVICTED)
+               cli->cl_avail_grant = ocd->ocd_grant;
+       else
+               cli->cl_avail_grant = ocd->ocd_grant -
+                                     (cli->cl_dirty_pages << PAGE_CACHE_SHIFT);
 
         if (cli->cl_avail_grant < 0) {
                CWARN("%s: available grant < 0: avail/ocd/dirty %ld/%u/%ld\n",
                      cli->cl_import->imp_obd->obd_name, cli->cl_avail_grant,
-                     ocd->ocd_grant, cli->cl_dirty);
+                     ocd->ocd_grant, cli->cl_dirty_pages << PAGE_CACHE_SHIFT);
                /* workaround for servers which do not have the patch from
                 * LU-2679 */
                cli->cl_avail_grant = ocd->ocd_grant;
@@ -3311,9 +3316,10 @@ static int osc_reconnect(const struct lu_env *env,
         if (data != NULL && (data->ocd_connect_flags & OBD_CONNECT_GRANT)) {
                 long lost_grant;
 
-                client_obd_list_lock(&cli->cl_loi_list_lock);
-                data->ocd_grant = (cli->cl_avail_grant + cli->cl_dirty) ?:
-                               2 * cli_brw_size(obd);
+               client_obd_list_lock(&cli->cl_loi_list_lock);
+               data->ocd_grant = (cli->cl_avail_grant +
+                                 (cli->cl_dirty_pages << PAGE_CACHE_SHIFT)) ?:
+                                 2 * cli_brw_size(obd);
                 lost_grant = cli->cl_lost_grant;
                 cli->cl_lost_grant = 0;
                 client_obd_list_unlock(&cli->cl_loi_list_lock);
index 2d20ec3..e543d16 100644 (file)
@@ -1147,9 +1147,10 @@ finish:
                 else
                         imp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18;
 
-                LASSERT((cli->cl_max_pages_per_rpc <= PTLRPC_MAX_BRW_PAGES) &&
-                        (cli->cl_max_pages_per_rpc > 0));
-        }
+               LASSERT((cli->cl_max_pages_per_rpc <= PTLRPC_MAX_BRW_PAGES) &&
+                       (cli->cl_max_pages_per_rpc > 0));
+               client_adjust_max_dirty(cli);
+       }
 
 out:
        imp->imp_connect_tried = 1;