Whamcloud - gitweb
LU-10467 lustre: add wait_event macros suitable for upstream 62/35962/8
authorMr NeilBrown <neilb@suse.de>
Fri, 23 Aug 2019 06:02:22 +0000 (16:02 +1000)
committerOleg Drokin <green@whamcloud.com>
Mon, 30 Sep 2019 23:12:02 +0000 (23:12 +0000)
This patch adds three sorts of wait_event macros.

1/ wait_event_idle_* which are available upstream, but not
   in older kernels.
   if TASK_NOLOAD is not available, we use TASK_UNINTERRUPTIBLE,
   and block all interrupts.

   We cannot use ___wait_cond_timeout() as it changed signature
   in 3.13. so we define our own ___wait_cond_timeout1().

2/ wait_event_idle_exclusive_lifo() and
   wait_event_idle_exclusive_lifo_timeout()
    which might be accepted upstream if we can make a strong case

   prepare_to_wait_event() doesn't support this directly, but
   as it won't relink a wait_entry that is already linked, it
   is sufficient to link to the head of the queue before calling
   prepare_to_wait_event().

3/ l_wait_event_abortable
   l_wait_event_abortable_timeout
   l_wait_event_abortable_exclusive
    which are unlikely to be accepted upstream, but match the general
    approach of upstream wait_event macros, and are useful
    to lustre.
    Possibly some or all of these should become wait_event_killable_*
    LUSTRE_FATAL_SIGS is moved over to linux-wait.h.

___wait_event() and related macros are copied from upstream linux,
and modified slightly to work across all supported kernels.

Test-Parameters: trivial
Signed-off-by: Mr NeilBrown <neilb@suse.com>
Change-Id: I2d260fc159dbe5b1a3cc7a26e4aeedf30150d85a
Signed-off-by: Mr NeilBrown <neilb@suse.de>
Reviewed-on: https://review.whamcloud.com/35962
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Shaun Tancheff <stancheff@cray.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
libcfs/include/libcfs/linux/linux-wait.h
lustre/include/lustre_lib.h

index a497dce..ec72376 100644 (file)
@@ -2,6 +2,8 @@
 #ifndef __LIBCFS_LINUX_WAIT_BIT_H
 #define __LIBCFS_LINUX_WAIT_BIT_H
 
+/* Make sure we can see if we have TASK_NOLOAD */
+#include <linux/sched.h>
 /*
  * Linux wait-bit related types and methods:
  */
@@ -31,6 +33,18 @@ extern long prepare_to_wait_event(wait_queue_head_t *wq_head,
                                  wait_queue_entry_t *wq_entry, int state);
 #endif
 
+/* ___wait_cond_timeout changed number of args in v3.12-rc1-78-g35a2af94c7ce
+ * so let's define our own ___wait_cond_timeout1
+ */
+
+#define ___wait_cond_timeout1(condition)                               \
+({                                                                     \
+       bool __cond = (condition);                                      \
+       if (__cond && !__ret)                                           \
+               __ret = 1;                                              \
+       __cond || !__ret;                                               \
+})
+
 #ifndef HAVE_CLEAR_AND_WAKE_UP_BIT
 /**
  * clear_and_wake_up_bit - clear a bit and wake up anyone waiting on that bit
@@ -110,7 +124,7 @@ do {                                                                        \
 })
 
 #define __wait_var_event_timeout(var, condition, timeout)              \
-       ___wait_var_event(var, ___wait_cond_timeout(condition),         \
+       ___wait_var_event(var, ___wait_cond_timeout1(condition),        \
                          TASK_UNINTERRUPTIBLE, 0, timeout,             \
                          __ret = schedule_timeout(__ret))
 
@@ -118,10 +132,423 @@ do {                                                                     \
 ({                                                                     \
        long __ret = timeout;                                           \
        might_sleep();                                                  \
-       if (!___wait_cond_timeout(condition))                           \
+       if (!___wait_cond_timeout1(condition))                          \
                __ret = __wait_var_event_timeout(var, condition, timeout); \
        __ret;                                                          \
 })
 #endif /* ! HAVE_WAIT_VAR_EVENT */
 
+/*
+ * prepare_to_wait_event() does not support an exclusive
+ * lifo wait.
+ * However it will not relink the wait_queue_entry if
+ * it is already linked.  So we link to the head of the
+ * queue here, and it will stay there.
+ */
+static inline void prepare_to_wait_exclusive_head(
+       wait_queue_head_t *waitq, wait_queue_entry_t *link)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&(waitq->lock), flags);
+#ifdef HAVE_WAIT_QUEUE_ENTRY_LIST
+       if (list_empty(&link->entry))
+#else
+       if (list_empty(&link->task_list))
+#endif
+               __add_wait_queue_exclusive(waitq, link);
+       spin_unlock_irqrestore(&((waitq)->lock), flags);
+}
+
+#ifndef ___wait_event
+/*
+ * The below macro ___wait_event() has an explicit shadow of the __ret
+ * variable when used from the wait_event_*() macros.
+ *
+ * This is so that both can use the ___wait_cond_timeout1() construct
+ * to wrap the condition.
+ *
+ * The type inconsistency of the wait_event_*() __ret variable is also
+ * on purpose; we use long where we can return timeout values and int
+ * otherwise.
+ */
+
+#define ___wait_event(wq_head, condition, state, exclusive, ret, cmd)  \
+({                                                                     \
+       __label__ __out;                                                \
+       wait_queue_entry_ __wq_entry;                                   \
+       long __ret = ret;       /* explicit shadow */                   \
+                                                                       \
+       init_wait(&__wq_entry);                                         \
+       if (exclusive)                                                  \
+               __wq_entry.flags = WQ_FLAG_EXCLUSIVE                    \
+       for (;;) {                                                      \
+               long __int = prepare_to_wait_event(&wq_head,            \
+                                                 &__wq_entry, state);  \
+                                                                       \
+               if (condition)                                          \
+                       break;                                          \
+                                                                       \
+               if (___wait_is_interruptible(state) && __int) {         \
+                       __ret = __int;                                  \
+                       goto __out;                                     \
+               }                                                       \
+                                                                       \
+               cmd;                                                    \
+       }                                                               \
+       finish_wait(&wq_head, &__wq_entry);                             \
+__out: __ret;                                                          \
+})
+#endif
+
+#ifndef TASK_NOLOAD
+
+#define ___wait_event_idle(wq_head, condition, exclusive, ret, cmd)    \
+({                                                                     \
+       wait_queue_entry_t __wq_entry;                                  \
+       long __ret = ret;       /* explicit shadow */                   \
+       sigset_t __blocked;                                             \
+                                                                       \
+       __blocked = cfs_block_sigsinv(0);                               \
+       init_wait(&__wq_entry);                                         \
+       if (exclusive)                                                  \
+               __wq_entry.flags = WQ_FLAG_EXCLUSIVE;                   \
+       for (;;) {                                                      \
+               prepare_to_wait_event(&wq_head,                         \
+                                  &__wq_entry,                         \
+                                  TASK_INTERRUPTIBLE);                 \
+                                                                       \
+               if (condition)                                          \
+                       break;                                          \
+               /* See justification in __l_wait_event */               \
+               if (signal_pending(current))                            \
+                       cfs_clear_sigpending();                         \
+                                                                       \
+               cmd;                                                    \
+       }                                                               \
+       finish_wait(&wq_head, &__wq_entry);                             \
+       cfs_restore_sigs(__blocked);                                    \
+       __ret;                                                          \
+})
+
+#define wait_event_idle(wq_head, condition)                            \
+do {                                                                   \
+       might_sleep();                                                  \
+       if (!(condition))                                               \
+               ___wait_event_idle(wq_head, condition, 0, 0, schedule());\
+} while (0)
+
+#define wait_event_idle_exclusive(wq_head, condition)                  \
+do {                                                                   \
+       might_sleep();                                                  \
+       if (!(condition))                                               \
+               ___wait_event_idle(wq_head, condition, 1, 0, schedule());\
+} while (0)
+
+#define __wait_event_idle_exclusive_timeout(wq_head, condition, timeout)\
+       ___wait_event_idle(wq_head, ___wait_cond_timeout1(condition),   \
+                          1, timeout,                                  \
+                          __ret = schedule_timeout(__ret))
+
+#define wait_event_idle_exclusive_timeout(wq_head, condition, timeout) \
+({                                                                     \
+       long __ret = timeout;                                           \
+       might_sleep();                                                  \
+       if (!___wait_cond_timeout1(condition))                          \
+               __ret = __wait_event_idle_exclusive_timeout(            \
+                       wq_head, condition, timeout);                   \
+       __ret;                                                          \
+})
+
+#define __wait_event_idle_exclusive_timeout_cmd(wq_head, condition,    \
+                                               timeout, cmd1, cmd2)    \
+       ___wait_event_idle(wq_head, ___wait_cond_timeout1(condition),   \
+                          1, timeout,                                  \
+                          cmd1; __ret = schedule_timeout(__ret); cmd2)
+
+#define wait_event_idle_exclusive_timeout_cmd(wq_head, condition, timeout,\
+                                             cmd1, cmd2)               \
+({                                                                     \
+       long __ret = timeout;                                           \
+       might_sleep();                                                  \
+       if (!___wait_cond_timeout1(condition))                          \
+               __ret = __wait_event_idle_exclusive_timeout_cmd(        \
+                       wq_head, condition, timeout, cmd1, cmd2);       \
+       __ret;                                                          \
+})
+
+#define __wait_event_idle_timeout(wq_head, condition, timeout)         \
+       ___wait_event_idle(wq_head, ___wait_cond_timeout1(condition),   \
+                          0, timeout,                                  \
+                          __ret = schedule_timeout(__ret))
+
+#define wait_event_idle_timeout(wq_head, condition, timeout)           \
+({                                                                     \
+       long __ret = timeout;                                           \
+       might_sleep();                                                  \
+       if (!___wait_cond_timeout1(condition))                          \
+               __ret = __wait_event_idle_timeout(wq_head, condition,   \
+                                                 timeout);             \
+       __ret;                                                          \
+})
+
+#else /* TASK_IDLE */
+#ifndef wait_event_idle
+/**
+ * wait_event_idle - wait for a condition without contributing to system load
+ * @wq_head: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ *
+ * The process is put to sleep (TASK_IDLE) until the
+ * @condition evaluates to true.
+ * The @condition is checked each time the waitqueue @wq_head is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ */
+#define wait_event_idle(wq_head, condition)                            \
+do {                                                                   \
+       might_sleep();                                                  \
+       if (!(condition))                                               \
+               ___wait_event(wq_head, condition, TASK_IDLE, 0, 0,      \
+                             schedule());                              \
+} while (0)
+#endif
+#ifndef wait_event_idle_exclusive
+/**
+ * wait_event_idle_exclusive - wait for a condition without contributing to
+ *               system load
+ * @wq_head: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ *
+ * The process is put to sleep (TASK_IDLE) until the
+ * @condition evaluates to true.
+ * The @condition is checked each time the waitqueue @wq_head is woken up.
+ *
+ * The process is put on the wait queue with an WQ_FLAG_EXCLUSIVE flag
+ * set thus if other processes wait on the same list, when this
+ * process is woken further processes are not considered.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ */
+#define wait_event_idle_exclusive(wq_head, condition)                  \
+do {                                                                   \
+       might_sleep();                                                  \
+       if (!(condition))                                               \
+               ___wait_event(wq_head, condition, TASK_IDLE, 1, 0,      \
+                             schedule());                              \
+} while (0)
+#endif
+#ifndef wait_event_idle_exclusive_timeout
+/**
+ * wait_event_idle_exclusive_timeout - sleep without load until a condition
+ *                       becomes true or a timeout elapses
+ * @wq_head: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @timeout: timeout, in jiffies
+ *
+ * The process is put to sleep (TASK_IDLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq_head is woken up.
+ *
+ * The process is put on the wait queue with an WQ_FLAG_EXCLUSIVE flag
+ * set thus if other processes wait on the same list, when this
+ * process is woken further processes are not considered.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * Returns:
+ * 0 if the @condition evaluated to %false after the @timeout elapsed,
+ * 1 if the @condition evaluated to %true after the @timeout elapsed,
+ * or the remaining jiffies (at least 1) if the @condition evaluated
+ * to %true before the @timeout elapsed.
+ */
+#define wait_event_idle_exclusive_timeout(wq_head, condition, timeout) \
+({                                                                     \
+       long __ret = timeout;                                           \
+       might_sleep();                                                  \
+       if (!___wait_cond_timeout1(condition))                          \
+               __ret = __wait_event_idle_exclusive_timeout(wq_head,    \
+                                                           condition,  \
+                                                           timeout);   \
+       __ret;                                                          \
+})
+#endif
+#ifndef wait_event_idle_exclusive_timeout_cmd
+#define __wait_event_idle_exclusive_timeout_cmd(wq_head, condition,    \
+                                               timeout, cmd1, cmd2)    \
+       ___wait_event(wq_head, ___wait_cond_timeout1(condition),        \
+                     TASK_IDLE, 1, timeout,                            \
+                     cmd1; __ret = schedule_timeout(__ret); cmd2)
+
+#define wait_event_idle_exclusive_timeout_cmd(wq_head, condition, timeout,\
+                                             cmd1, cmd2)               \
+({                                                                     \
+       long __ret = timeout;                                           \
+       might_sleep();                                                  \
+       if (!___wait_cond_timeout1(condition))                          \
+               __ret = __wait_event_idle_exclusive_timeout_cmd(        \
+                       wq_head, condition, timeout, cmd1, cmd2);       \
+       __ret;                                                          \
+})
+#endif
+
+#ifndef wait_event_idle_timeout
+
+#define __wait_event_idle_timeout(wq_head, condition, timeout)         \
+       ___wait_event(wq_head, ___wait_cond_timeout1(condition),        \
+                     TASK_IDLE, 0, timeout,                            \
+                     __ret = schedule_timeout(__ret))
+
+/**
+ * wait_event_idle_timeout - sleep without load until a condition becomes
+ *                           true or a timeout elapses
+ * @wq_head: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @timeout: timeout, in jiffies
+ *
+ * The process is put to sleep (TASK_IDLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq_head is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * Returns:
+ * 0 if the @condition evaluated to %false after the @timeout elapsed,
+ * 1 if the @condition evaluated to %true after the @timeout elapsed,
+ * or the remaining jiffies (at least 1) if the @condition evaluated
+ * to %true before the @timeout elapsed.
+ */
+#define wait_event_idle_timeout(wq_head, condition, timeout)           \
+({                                                                     \
+       long __ret = timeout;                                           \
+       might_sleep();                                                  \
+       if (!___wait_cond_timeout1(condition))                          \
+               __ret = __wait_event_idle_timeout(wq_head, condition,   \
+                                                 timeout);             \
+       __ret;                                                          \
+})
+#endif
+#endif /* TASK_IDLE */
+
+/* ___wait_event_lifo is used for lifo exclusive 'idle' waits */
+#ifdef TASK_NOLOAD
+
+#define ___wait_event_lifo(wq_head, condition, ret, cmd)               \
+({                                                                     \
+       wait_queue_entry_t       __wq_entry;                            \
+       long __ret = ret;       /* explicit shadow */                   \
+                                                                       \
+       init_wait(&__wq_entry);                                         \
+       __wq_entry.flags =  WQ_FLAG_EXCLUSIVE;                          \
+       for (;;) {                                                      \
+               prepare_to_wait_exclusive_head(&wq_head, &__wq_entry);  \
+               prepare_to_wait_event(&wq_head, &__wq_entry, TASK_IDLE);\
+                                                                       \
+               if (condition)                                          \
+                       break;                                          \
+                                                                       \
+               cmd;                                                    \
+       }                                                               \
+       finish_wait(&wq_head, &__wq_entry);                             \
+       __ret;                                                          \
+})
+#else
+#define ___wait_event_lifo(wq_head, condition, ret, cmd)               \
+({                                                                     \
+       wait_queue_entry_t __wq_entry;                                  \
+       long __ret = ret;       /* explicit shadow */                   \
+       sigset_t __blocked;                                             \
+                                                                       \
+       __blocked = cfs_block_sigsinv(0);                               \
+       init_wait(&__wq_entry);                                         \
+       __wq_entry.flags = WQ_FLAG_EXCLUSIVE;                           \
+       for (;;) {                                                      \
+               prepare_to_wait_exclusive_head(&wq_head, &__wq_entry);  \
+               prepare_to_wait_event(&wq_head, &__wq_entry,            \
+                                     TASK_INTERRUPTIBLE);              \
+                                                                       \
+               if (condition)                                          \
+                       break;                                          \
+               /* See justification in __l_wait_event */               \
+               if (signal_pending(current))                            \
+                       cfs_clear_sigpending();                         \
+                                                                       \
+               cmd;                                                    \
+       }                                                               \
+       cfs_restore_sigs(__blocked);                                    \
+       finish_wait(&wq_head, &__wq_entry);                             \
+       __ret;                                                          \
+})
+#endif
+
+#define wait_event_idle_exclusive_lifo(wq_head, condition)             \
+do {                                                                   \
+       might_sleep();                                                  \
+       if (!(condition))                                               \
+               ___wait_event_lifo(wq_head, condition, 0, schedule());  \
+} while (0)
+
+#define __wait_event_idle_lifo_timeout(wq_head, condition, timeout)    \
+       ___wait_event_lifo(wq_head, ___wait_cond_timeout1(condition),   \
+                          timeout,                                     \
+                          __ret = schedule_timeout(__ret))
+
+#define wait_event_idle_exclusive_lifo_timeout(wq_head, condition, timeout)\
+({                                                                     \
+       long __ret = timeout;                                           \
+       might_sleep();                                                  \
+       if (!___wait_cond_timeout1(condition))                          \
+               __ret = __wait_event_idle_lifo_timeout(wq_head,         \
+                                                      condition,       \
+                                                      timeout);        \
+       __ret;                                                          \
+})
+
+/* l_wait_event_abortable() is a bit like wait_event_killable()
+ * except there is a fixed set of signals which will abort:
+ * LUSTRE_FATAL_SIGS
+ */
+#define LUSTRE_FATAL_SIGS                                       \
+       (sigmask(SIGKILL) | sigmask(SIGINT) | sigmask(SIGTERM) | \
+        sigmask(SIGQUIT) | sigmask(SIGALRM))
+
+#define l_wait_event_abortable(wq, condition)                          \
+({                                                                     \
+       sigset_t __new_blocked, __old_blocked;                          \
+       int __ret = 0;                                                  \
+       siginitsetinv(&__new_blocked, LUSTRE_FATAL_SIGS);               \
+       sigprocmask(SIG_BLOCK, &__new_blocked, &__old_blocked);         \
+       __ret = wait_event_interruptible(wq, condition);                \
+       sigprocmask(SIG_SETMASK, &__old_blocked, NULL);                 \
+       __ret;                                                          \
+})
+
+#define l_wait_event_abortable_timeout(wq, condition, timeout)         \
+({                                                                     \
+       sigset_t __new_blocked, __old_blocked;                          \
+       int __ret = 0;                                                  \
+       siginitsetinv(&__new_blocked, LUSTRE_FATAL_SIGS);               \
+       sigprocmask(SIG_BLOCK, &__new_blocked, &__old_blocked);         \
+       __ret = wait_event_interruptible_timeout(wq, condition, timeout);\
+       sigprocmask(SIG_SETMASK, &__old_blocked, NULL);                 \
+       __ret;                                                          \
+})
+
+#define l_wait_event_abortable_exclusive(wq, condition)                        \
+({                                                                     \
+       sigset_t __new_blocked, __old_blocked;                          \
+       int __ret = 0;                                                  \
+       siginitsetinv(&__new_blocked, LUSTRE_FATAL_SIGS);               \
+       sigprocmask(SIG_BLOCK, &__new_blocked, &__old_blocked);         \
+       __ret = wait_event_interruptible_exclusive(wq, condition);      \
+       sigprocmask(SIG_SETMASK, &__old_blocked, NULL);                 \
+       __ret;                                                          \
+})
+
 #endif /* __LICBFS_LINUX_WAIT_BIT_H */
index fb5b317..bf370a3 100644 (file)
@@ -213,10 +213,6 @@ struct l_wait_info {
 
 #define LWI_INTR(cb, data)  LWI_TIMEOUT_INTR(0, NULL, cb, data)
 
-#define LUSTRE_FATAL_SIGS                                       \
-       (sigmask(SIGKILL) | sigmask(SIGINT) | sigmask(SIGTERM) | \
-        sigmask(SIGQUIT) | sigmask(SIGALRM))
-
 /**
  * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively
  * waiting threads, which is not always desirable because all threads will