Whamcloud - gitweb
LU-12362 ptlrpc: use wait_woken() in ptlrpcd() 69/45069/4
authorMr NeilBrown <neilb@suse.de>
Tue, 28 Sep 2021 05:20:30 +0000 (15:20 +1000)
committerOleg Drokin <green@whamcloud.com>
Sun, 10 Oct 2021 03:30:44 +0000 (03:30 +0000)
Using wait_event() to wait for ptlrpcd_check() to succeed is
problematic.  ptlrpcd_check() is complex and can wait for other
events.  This nested waiting can behave differently to expectation and
generates a warning

   do not call blocking ops when !TASK_RUNNING

This happens because the task state is set to TASK_IDLE before
ptlrpcd_check() is calls.

A better approach (introduce for precisely this use-case) is to use
wait_woken() and woken_wake_function().

When a wake_up is requested on the waitq, woken_wake_function() sets a
flag to record the wakeup.  wait_woken() will wait until this flag is
set.  This way, the task state doesn't need to be set until after
ptlrpcd_check() has completed.

wait_woken() was introduced in Linux 3.19, so libcfs is enhance to
provide the functionality on older kernels.

Signed-off-by: Mr NeilBrown <neilb@suse.de>
Change-Id: Iaddf56e2e76c204435bbef3c857e54ce0a6772bc
Reviewed-on: https://review.whamcloud.com/45069
Reviewed-by: James Simmons <jsimmons@infradead.org>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Patrick Farrell <pfarrell@whamcloud.com>
Reviewed-by: xinliang <xinliang.liu@linaro.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
libcfs/autoconf/lustre-libcfs.m4
libcfs/include/libcfs/linux/linux-wait.h
libcfs/libcfs/linux/linux-wait.c
lustre/ptlrpc/ptlrpcd.c

index 6407f53..b425654 100644 (file)
@@ -438,6 +438,22 @@ ktime_get_seconds, [
 ]) # LIBCFS_KTIME_GET_SECONDS
 
 #
+# Kernel version 3.19 commit v3.18-rc2-26-g61ada528dea0
+# introduce wait_woken()
+#
+AC_DEFUN([LIBCFS_WAIT_WOKEN],[
+LB_CHECK_COMPILE([does function 'wait_woken' exist],
+wait_woken, [
+       #include <linux/wait.h>
+],[
+       wait_woken(NULL, 0, 0);
+],[
+       AC_DEFINE(HAVE_WAIT_WOKEN, 1,
+               ['wait_woken, is available'])
+])
+]) # LIBCFS_WAIT_WOKEN
+
+#
 # Kernel version 4.0 commit 41fbf3b39d5eca01527338b4d0ee15ee1ae1023c
 # introduced the helper function ktime_ms_delta.
 #
@@ -1699,6 +1715,7 @@ LIBCFS_TIMESPEC64_SUB
 LIBCFS_TIMESPEC64_TO_KTIME
 # 3.19
 LIBCFS_KTIME_GET_SECONDS
+LIBCFS_WAIT_WOKEN
 # 4.0
 LIBCFS_KTIME_MS_DELTA
 # 4.1
index f0f7f69..1043977 100644 (file)
@@ -583,4 +583,11 @@ do {                                                                       \
        __ret;                                                          \
 })
 
+#ifndef HAVE_WAIT_WOKEN
+#define WQ_FLAG_WOKEN          0x02
+long wait_woken(wait_queue_entry_t *wait, unsigned int mode, long timeout);
+int woken_wake_function(wait_queue_entry_t *wait, unsigned int mode,
+                       int sync, void *key);
+#endif /* HAVE_WAIT_WOKEN */
+
 #endif /* __LICBFS_LINUX_WAIT_BIT_H */
index 5843d80..33117c2 100644 (file)
@@ -113,3 +113,62 @@ void __init wait_bit_init(void)
                init_waitqueue_head(bit_wait_table + i);
 }
 #endif /* ! HAVE_WAIT_VAR_EVENT */
+
+#ifndef HAVE_WAIT_WOKEN
+/*
+ * DEFINE_WAIT_FUNC(wait, woken_wake_func);
+ *
+ * add_wait_queue(&wq_head, &wait);
+ * for (;;) {
+ *     if (condition)
+ *         break;
+ *
+ *     // in wait_woken()                      // in woken_wake_function()
+ *
+ *     p->state = mode;                                wq_entry->flags |= WQ_FLAG_WOKEN;
+ *     smp_mb(); // A                          try_to_wake_up():
+ *     if (!(wq_entry->flags & WQ_FLAG_WOKEN))    <full barrier>
+ *         schedule()                             if (p->state & mode)
+ *     p->state = TASK_RUNNING;                              p->state = TASK_RUNNING;
+ *     wq_entry->flags &= ~WQ_FLAG_WOKEN;      ~~~~~~~~~~~~~~~~~~
+ *     smp_mb(); // B                          condition = true;
+ * }                                           smp_mb(); // C
+ * remove_wait_queue(&wq_head, &wait);         wq_entry->flags |= WQ_FLAG_WOKEN;
+ */
+long wait_woken(struct wait_queue_entry *wq_entry, unsigned int mode,
+               long timeout)
+{
+       /*
+        * The below executes an smp_mb(), which matches with the full barrier
+        * executed by the try_to_wake_up() in woken_wake_function() such that
+        * either we see the store to wq_entry->flags in woken_wake_function()
+        * or woken_wake_function() sees our store to current->state.
+        */
+       set_current_state(mode); /* A */
+       if (!(wq_entry->flags & WQ_FLAG_WOKEN))
+               timeout = schedule_timeout(timeout);
+       __set_current_state(TASK_RUNNING);
+
+       /*
+        * The below executes an smp_mb(), which matches with the smp_mb() (C)
+        * in woken_wake_function() such that either we see the wait condition
+        * being true or the store to wq_entry->flags in woken_wake_function()
+        * follows ours in the coherence order.
+        */
+       smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); /* B */
+
+       return timeout;
+}
+EXPORT_SYMBOL(wait_woken);
+
+int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned int mode,
+                       int sync, void *key)
+{
+       /* Pairs with the smp_store_mb() in wait_woken(). */
+       smp_mb(); /* C */
+       wq_entry->flags |= WQ_FLAG_WOKEN;
+
+       return default_wake_function(wq_entry, mode, sync, key);
+}
+EXPORT_SYMBOL(woken_wake_function);
+#endif /* HAVE_WAIT_WOKEN */
index f3f04a1..4c9b577 100644 (file)
@@ -477,20 +477,32 @@ static int ptlrpcd(void *arg)
         * new_req_list and ptlrpcd_check() moves them into the set.
         */
        do {
+               DEFINE_WAIT_FUNC(wait, woken_wake_function);
                time64_t timeout;
 
                timeout = ptlrpc_set_next_timeout(set);
 
                lu_context_enter(&env.le_ctx);
                lu_context_enter(env.le_ses);
-               if (timeout == 0)
-                       wait_event_idle(set->set_waitq,
-                                       ptlrpcd_check(&env, pc));
-               else if (wait_event_idle_timeout(set->set_waitq,
-                                                ptlrpcd_check(&env, pc),
-                                                cfs_time_seconds(timeout))
-                        == 0)
+
+               add_wait_queue(&set->set_waitq, &wait);
+               while (!ptlrpcd_check(&env, pc)) {
+                       int ret;
+
+                       if (timeout == 0)
+                               ret = wait_woken(&wait, TASK_IDLE,
+                                                MAX_SCHEDULE_TIMEOUT);
+                       else
+                               ret = wait_woken(&wait, TASK_IDLE,
+                                                cfs_time_seconds(timeout));
+                       if (ret != 0)
+                               continue;
+                       /* Timed out */
                        ptlrpc_expired_set(set);
+                       break;
+               }
+               remove_wait_queue(&set->set_waitq, &wait);
+
                lu_context_exit(&env.le_ctx);
                lu_context_exit(env.le_ses);