Whamcloud - gitweb
LU-9679 osc: convert cl_cache_waiters to a wait_queue. 05/37605/8
authorNeilBrown <neilb@suse.com>
Fri, 14 Dec 2018 03:48:45 +0000 (14:48 +1100)
committerOleg Drokin <green@whamcloud.com>
Tue, 24 Mar 2020 05:15:41 +0000 (05:15 +0000)
cli->cl_cache_waiters is a list of tasks that need
to be woken when grant-space becomes available.  This
means it is acting much like a wait queue.
So let's change it to really be a wait queue.

The current implementation adds new waiters to the end of the list,
and calls osc_enter_cache_try() on each in order.  We can provide the
same behaviour by using an exclusive wait, and having each waiter wake
the next task when it succeeds.

If a waiter notices that success has become impossible, it wakes all
other waiters.

If a waiter times out, it doesn't wake other - just leaves them to
time out themselves.

Note that the old code handled -EINTR from the wait function.  That is
not a possible return value when wait_event_idle* is used, so that
case is discarded.

As we need wait_event_idle_exclusive_timeout_cmd(), we should fix the
bug in that macro - the "might_sleep()" is wrong, as a spinlock might
be held at that point.

Linux-Commit: 31f45f56ecdf ("lustre: osc_cache: convert
cl_cache_waiters to a wait_queue.")

Signed-off-by: Mr NeilBrown <neilb@suse.com>
Change-Id: Ib7622ea2daea8f6e59bef95d3b6c5a80d209b81e
Reviewed-on: https://review.whamcloud.com/37605
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
libcfs/include/libcfs/linux/linux-wait.h
lustre/include/lustre_osc.h
lustre/include/obd.h
lustre/ldlm/ldlm_lib.c
lustre/osc/osc_cache.c
lustre/osc/osc_internal.h
lustre/osc/osc_page.c

index 8fac3db..fd154ba 100644 (file)
@@ -281,7 +281,6 @@ do {                                                                        \
                                              cmd1, cmd2)               \
 ({                                                                     \
        long __ret = timeout;                                           \
                                              cmd1, cmd2)               \
 ({                                                                     \
        long __ret = timeout;                                           \
-       might_sleep();                                                  \
        if (!___wait_cond_timeout1(condition))                          \
                __ret = __wait_event_idle_exclusive_timeout_cmd(        \
                        wq_head, condition, timeout, cmd1, cmd2);       \
        if (!___wait_cond_timeout1(condition))                          \
                __ret = __wait_event_idle_exclusive_timeout_cmd(        \
                        wq_head, condition, timeout, cmd1, cmd2);       \
@@ -400,7 +399,6 @@ do {                                                                        \
                                              cmd1, cmd2)               \
 ({                                                                     \
        long __ret = timeout;                                           \
                                              cmd1, cmd2)               \
 ({                                                                     \
        long __ret = timeout;                                           \
-       might_sleep();                                                  \
        if (!___wait_cond_timeout1(condition))                          \
                __ret = __wait_event_idle_exclusive_timeout_cmd(        \
                        wq_head, condition, timeout, cmd1, cmd2);       \
        if (!___wait_cond_timeout1(condition))                          \
                __ret = __wait_event_idle_exclusive_timeout_cmd(        \
                        wq_head, condition, timeout, cmd1, cmd2);       \
index 4c243ed..9a0fd6a 100644 (file)
@@ -99,14 +99,6 @@ static inline struct osc_async_page *brw_page2oap(struct brw_page *pga)
        return container_of(pga, struct osc_async_page, oap_brw_page);
 }
 
        return container_of(pga, struct osc_async_page, oap_brw_page);
 }
 
-struct osc_cache_waiter {
-       struct list_head        ocw_entry;
-       wait_queue_head_t       ocw_waitq;
-       struct osc_async_page   *ocw_oap;
-       int                     ocw_grant;
-       int                     ocw_rc;
-};
-
 struct osc_device {
        struct cl_device        od_cl;
        struct obd_export       *od_exp;
 struct osc_device {
        struct cl_device        od_cl;
        struct obd_export       *od_exp;
@@ -598,7 +590,10 @@ int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj,
                         pgoff_t start, pgoff_t end);
 int osc_io_unplug0(const struct lu_env *env, struct client_obd *cli,
                   struct osc_object *osc, int async);
                         pgoff_t start, pgoff_t end);
 int osc_io_unplug0(const struct lu_env *env, struct client_obd *cli,
                   struct osc_object *osc, int async);
-void osc_wake_cache_waiters(struct client_obd *cli);
+static inline void osc_wake_cache_waiters(struct client_obd *cli)
+{
+       wake_up(&cli->cl_cache_waiters);
+}
 
 static inline int osc_io_unplug_async(const struct lu_env *env,
                                      struct client_obd *cli,
 
 static inline int osc_io_unplug_async(const struct lu_env *env,
                                      struct client_obd *cli,
index 6a5f2dc..9cbc8f7 100644 (file)
@@ -221,7 +221,7 @@ struct client_obd {
         * grant before trying to dirty a page and unreserve the rest.
         * See osc_{reserve|unreserve}_grant for details. */
        long                    cl_reserved_grant;
         * grant before trying to dirty a page and unreserve the rest.
         * See osc_{reserve|unreserve}_grant for details. */
        long                    cl_reserved_grant;
-       struct list_head        cl_cache_waiters; /* waiting for cache/grant */
+       wait_queue_head_t       cl_cache_waiters; /* waiting for cache/grant */
        time64_t                cl_next_shrink_grant;   /* seconds */
        struct list_head        cl_grant_chain;
        time64_t                cl_grant_shrink_interval; /* seconds */
        time64_t                cl_next_shrink_grant;   /* seconds */
        struct list_head        cl_grant_chain;
        time64_t                cl_grant_shrink_interval; /* seconds */
index 8a0ea40..6d14580 100644 (file)
@@ -378,7 +378,7 @@ int client_obd_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
         * ptlrpc_connect_interpret().
         */
        client_adjust_max_dirty(cli);
         * ptlrpc_connect_interpret().
         */
        client_adjust_max_dirty(cli);
-       INIT_LIST_HEAD(&cli->cl_cache_waiters);
+       init_waitqueue_head(&cli->cl_cache_waiters);
        INIT_LIST_HEAD(&cli->cl_loi_ready_list);
        INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list);
        INIT_LIST_HEAD(&cli->cl_loi_write_list);
        INIT_LIST_HEAD(&cli->cl_loi_ready_list);
        INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list);
        INIT_LIST_HEAD(&cli->cl_loi_write_list);
index 56fd9e4..1f343e2 100644 (file)
@@ -1533,15 +1533,26 @@ out:
        return rc;
 }
 
        return rc;
 }
 
-static int ocw_granted(struct client_obd *cli, struct osc_cache_waiter *ocw)
+/* Following two inlines exist to pass code fragments
+ * to wait_event_idle_exclusive_timeout_cmd().  Passing
+ * code fragments as macro args can look confusing, so
+ * we provide inlines to encapsulate them.
+ */
+static inline void cli_unlock_and_unplug(const struct lu_env *env,
+                                        struct client_obd *cli,
+                                        struct osc_async_page *oap)
 {
 {
-       int rc;
-       spin_lock(&cli->cl_loi_list_lock);
-       rc = list_empty(&ocw->ocw_entry);
        spin_unlock(&cli->cl_loi_list_lock);
        spin_unlock(&cli->cl_loi_list_lock);
-       return rc;
+       osc_io_unplug_async(env, cli, NULL);
+       CDEBUG(D_CACHE,
+              "%s: sleeping for cache space for %p\n",
+              cli_name(cli), oap);
 }
 
 }
 
+static inline void cli_lock_after_unplug(struct client_obd *cli)
+{
+       spin_lock(&cli->cl_loi_list_lock);
+}
 /**
  * The main entry to reserve dirty page accounting. Usually the grant reserved
  * in this function will be freed in bulk in osc_free_grant() unless it fails
 /**
  * The main entry to reserve dirty page accounting. Usually the grant reserved
  * in this function will be freed in bulk in osc_free_grant() unless it fails
@@ -1554,8 +1565,11 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
 {
        struct osc_object       *osc = oap->oap_obj;
        struct lov_oinfo        *loi = osc->oo_oinfo;
 {
        struct osc_object       *osc = oap->oap_obj;
        struct lov_oinfo        *loi = osc->oo_oinfo;
-       struct osc_cache_waiter  ocw;
        int                      rc = -EDQUOT;
        int                      rc = -EDQUOT;
+       unsigned long timeout = cfs_time_seconds(AT_OFF ? obd_timeout : at_max);
+       int remain;
+       bool entered = false;
+
        ENTRY;
 
        OSC_DUMP_GRANT(D_CACHE, cli, "need:%d\n", bytes);
        ENTRY;
 
        OSC_DUMP_GRANT(D_CACHE, cli, "need:%d\n", bytes);
@@ -1571,83 +1585,40 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli,
                GOTO(out, rc = -EDQUOT);
        }
 
                GOTO(out, rc = -EDQUOT);
        }
 
-       /* Hopefully normal case - cache space and write credits available */
-       if (list_empty(&cli->cl_cache_waiters) &&
-           osc_enter_cache_try(cli, oap, bytes)) {
-               OSC_DUMP_GRANT(D_CACHE, cli, "granted from cache\n");
-               GOTO(out, rc = 0);
-       }
-
-       /* We can get here for two reasons: too many dirty pages in cache, or
+       /*
+        * We can wait here for two reasons: too many dirty pages in cache, or
         * run out of grants. In both cases we should write dirty pages out.
         * Adding a cache waiter will trigger urgent write-out no matter what
         * RPC size will be.
         * run out of grants. In both cases we should write dirty pages out.
         * Adding a cache waiter will trigger urgent write-out no matter what
         * RPC size will be.
-        * The exiting condition is no avail grants and no dirty pages caching,
-        * that really means there is no space on the OST. */
-       init_waitqueue_head(&ocw.ocw_waitq);
-       ocw.ocw_oap   = oap;
-       ocw.ocw_grant = bytes;
-       while (cli->cl_dirty_pages > 0 || cli->cl_w_in_flight > 0) {
-               list_add_tail(&ocw.ocw_entry, &cli->cl_cache_waiters);
-               ocw.ocw_rc = 0;
-               spin_unlock(&cli->cl_loi_list_lock);
-
-               osc_io_unplug_async(env, cli, NULL);
-
-               CDEBUG(D_CACHE, "%s: sleeping for cache space @ %p for %p\n",
-                      cli_name(cli), &ocw, oap);
-
-               rc = wait_event_idle_timeout(ocw.ocw_waitq,
-                                            ocw_granted(cli, &ocw),
-                                            cfs_time_seconds(AT_OFF ?
-                                                             obd_timeout :
-                                                             at_max));
-
-               spin_lock(&cli->cl_loi_list_lock);
-
-               if (rc <= 0) {
-                       /* wait_event_idle_timeout timed out */
-                       list_del_init(&ocw.ocw_entry);
-                       if (rc == 0)
-                               rc = -ETIMEDOUT;
-                       break;
-               }
-               LASSERT(list_empty(&ocw.ocw_entry));
-               rc = ocw.ocw_rc;
-
-               if (rc != -EDQUOT)
-                       break;
-               if (osc_enter_cache_try(cli, oap, bytes)) {
-                       rc = 0;
-                       break;
-               }
-       }
-
-       switch (rc) {
-       case 0:
-               OSC_DUMP_GRANT(D_CACHE, cli, "finally got grant space\n");
-               break;
-       case -ETIMEDOUT:
+        * The exiting condition (other than success) is no avail grants
+        * and no dirty pages caching, that really means there is no space
+        * on the OST.
+        */
+       remain = wait_event_idle_exclusive_timeout_cmd(
+               cli->cl_cache_waiters,
+               (entered = osc_enter_cache_try(cli, oap, bytes)) ||
+               (cli->cl_dirty_pages == 0 && cli->cl_w_in_flight == 0),
+               timeout,
+               cli_unlock_and_unplug(env, cli, oap),
+               cli_lock_after_unplug(cli));
+
+       if (entered) {
+               if (remain == timeout)
+                       OSC_DUMP_GRANT(D_CACHE, cli, "granted from cache\n");
+               else
+                       OSC_DUMP_GRANT(D_CACHE, cli,
+                                      "finally got grant space\n");
+               wake_up(&cli->cl_cache_waiters);
+               rc = 0;
+       } else if (remain == 0) {
                OSC_DUMP_GRANT(D_CACHE, cli,
                               "timeout, fall back to sync i/o\n");
                osc_extent_tree_dump(D_CACHE, osc);
                /* fall back to synchronous I/O */
                OSC_DUMP_GRANT(D_CACHE, cli,
                               "timeout, fall back to sync i/o\n");
                osc_extent_tree_dump(D_CACHE, osc);
                /* fall back to synchronous I/O */
-               rc = -EDQUOT;
-               break;
-       case -EINTR:
-               /* Ensures restartability - LU-3581 */
-               OSC_DUMP_GRANT(D_CACHE, cli, "interrupted\n");
-               rc = -ERESTARTSYS;
-               break;
-       case -EDQUOT:
+       } else {
                OSC_DUMP_GRANT(D_CACHE, cli,
                               "no grant space, fall back to sync i/o\n");
                OSC_DUMP_GRANT(D_CACHE, cli,
                               "no grant space, fall back to sync i/o\n");
-               break;
-       default:
-               CDEBUG(D_CACHE, "%s: event for cache space @ %p never arrived "
-                      "due to %d, fall back to sync i/o\n",
-                      cli_name(cli), &ocw, rc);
-               break;
+               wake_up_all(&cli->cl_cache_waiters);
        }
        EXIT;
 out:
        }
        EXIT;
 out:
@@ -1655,36 +1626,6 @@ out:
        RETURN(rc);
 }
 
        RETURN(rc);
 }
 
-/* caller must hold loi_list_lock */
-void osc_wake_cache_waiters(struct client_obd *cli)
-{
-       struct list_head *l, *tmp;
-       struct osc_cache_waiter *ocw;
-
-       ENTRY;
-       list_for_each_safe(l, tmp, &cli->cl_cache_waiters) {
-               ocw = list_entry(l, struct osc_cache_waiter, ocw_entry);
-
-               ocw->ocw_rc = -EDQUOT;
-
-               if (osc_enter_cache_try(cli, ocw->ocw_oap, ocw->ocw_grant))
-                       ocw->ocw_rc = 0;
-
-               if (ocw->ocw_rc == 0 ||
-                   !(cli->cl_dirty_pages > 0 || cli->cl_w_in_flight > 0)) {
-                       list_del_init(&ocw->ocw_entry);
-                       CDEBUG(D_CACHE, "wake up %p for oap %p, avail grant "
-                              "%ld, %d\n", ocw, ocw->ocw_oap,
-                              cli->cl_avail_grant, ocw->ocw_rc);
-
-                       wake_up(&ocw->ocw_waitq);
-               }
-       }
-
-       EXIT;
-}
-EXPORT_SYMBOL(osc_wake_cache_waiters);
-
 static int osc_max_rpc_in_flight(struct client_obd *cli, struct osc_object *osc)
 {
        int hprpc = !!list_empty(&osc->oo_hp_exts);
 static int osc_max_rpc_in_flight(struct client_obd *cli, struct osc_object *osc)
 {
        int hprpc = !!list_empty(&osc->oo_hp_exts);
@@ -1724,8 +1665,9 @@ static int osc_makes_rpc(struct client_obd *cli, struct osc_object *osc,
                }
                /* trigger a write rpc stream as long as there are dirtiers
                 * waiting for space.  as they're waiting, they're not going to
                }
                /* trigger a write rpc stream as long as there are dirtiers
                 * waiting for space.  as they're waiting, they're not going to
-                * create more pages to coalesce with what's waiting.. */
-               if (!list_empty(&cli->cl_cache_waiters)) {
+                * create more pages to coalesce with what's waiting..
+                */
+               if (waitqueue_active(&cli->cl_cache_waiters)) {
                        CDEBUG(D_CACHE, "cache waiters forcing RPC\n");
                        RETURN(1);
                }
                        CDEBUG(D_CACHE, "cache waiters forcing RPC\n");
                        RETURN(1);
                }
@@ -2177,8 +2119,9 @@ static struct osc_object *osc_next_obj(struct client_obd *cli)
        /* then if we have cache waiters, return all objects with queued
         * writes.  This is especially important when many small files
         * have filled up the cache and not been fired into rpcs because
        /* then if we have cache waiters, return all objects with queued
         * writes.  This is especially important when many small files
         * have filled up the cache and not been fired into rpcs because
-        * they don't pass the nr_pending/object threshhold */
-       if (!list_empty(&cli->cl_cache_waiters) &&
+        * they don't pass the nr_pending/object threshhold
+        */
+       if (waitqueue_active(&cli->cl_cache_waiters) &&
            !list_empty(&cli->cl_loi_write_list))
                RETURN(list_to_obj(&cli->cl_loi_write_list, write_item));
 
            !list_empty(&cli->cl_loi_write_list))
                RETURN(list_to_obj(&cli->cl_loi_write_list, write_item));
 
index b2bc407..5a65dcd 100644 (file)
@@ -42,7 +42,6 @@ extern atomic_t osc_pool_req_count;
 extern unsigned int osc_reqpool_maxreqcount;
 extern struct ptlrpc_request_pool *osc_rq_pool;
 
 extern unsigned int osc_reqpool_maxreqcount;
 extern struct ptlrpc_request_pool *osc_rq_pool;
 
-void osc_wake_cache_waiters(struct client_obd *cli);
 int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes);
 void osc_schedule_grant_work(void);
 void osc_update_next_shrink(struct client_obd *cli);
 int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes);
 void osc_schedule_grant_work(void);
 void osc_update_next_shrink(struct client_obd *cli);
index d0fd5e2..8f3258f 100644 (file)
@@ -150,7 +150,7 @@ static int osc_page_print(const struct lu_env *env,
                          cli->cl_r_in_flight, cli->cl_w_in_flight,
                          cli->cl_max_rpcs_in_flight,
                          cli->cl_avail_grant,
                          cli->cl_r_in_flight, cli->cl_w_in_flight,
                          cli->cl_max_rpcs_in_flight,
                          cli->cl_avail_grant,
-                         list_empty_marker(&cli->cl_cache_waiters),
+                         waitqueue_active(&cli->cl_cache_waiters) ? '+' : '-',
                          list_empty_marker(&cli->cl_loi_ready_list),
                          list_empty_marker(&cli->cl_loi_hp_ready_list),
                          list_empty_marker(&cli->cl_loi_write_list),
                          list_empty_marker(&cli->cl_loi_ready_list),
                          list_empty_marker(&cli->cl_loi_hp_ready_list),
                          list_empty_marker(&cli->cl_loi_write_list),