Whamcloud - gitweb
b=23289 new API: cfs_waitq_add_exclusive_head
authorJian Yu <jian.yu@oracle.com>
Wed, 1 Dec 2010 12:16:21 +0000 (20:16 +0800)
committerVitaly Fertman <vitaly.fertman@oracle.com>
Fri, 3 Dec 2010 19:22:46 +0000 (22:22 +0300)
With this patch, we can reduce total number of active threads because
waitq is a LIFO list for exclusive waiting.

o=Liang Zhen
i=andreas.dilger
i=eric.mei

libcfs/autoconf/lustre-libcfs.m4
libcfs/include/libcfs/libcfs_prim.h
libcfs/libcfs/linux/linux-prim.c
libcfs/libcfs/user-prim.c
lustre/include/lustre_lib.h
lustre/mdt/mdt_capa.c
lustre/ptlrpc/service.c

index 0aaf7d3..c09767a 100644 (file)
@@ -632,6 +632,29 @@ LB_LINUX_TRY_COMPILE([
 ])
 
 #
+# LIBCFS_ADD_WAIT_QUEUE_EXCLUSIVE
+#
+# 2.6.34 adds __add_wait_queue_exclusive
+#
+AC_DEFUN([LIBCFS_ADD_WAIT_QUEUE_EXCLUSIVE],
+[AC_MSG_CHECKING([if __add_wait_queue_exclusive exists])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/wait.h>
+],[
+        wait_queue_head_t queue;
+        wait_queue_t      wait;
+
+        __add_wait_queue_exclusive(&queue, &wait);
+],[
+        AC_MSG_RESULT(yes)
+        AC_DEFINE(HAVE___ADD_WAIT_QUEUE_EXCLUSIVE, 1,
+                  [__add_wait_queue_exclusive exists])
+],[
+        AC_MSG_RESULT(no)
+])
+])
+
+#
 # LIBCFS_PROG_LINUX
 #
 # LNet linux kernel checks
@@ -679,6 +702,8 @@ LIBCFS_FUNC_UNSHARE_FS_STRUCT
 LIBCFS_SOCK_MAP_FD_2ARG
 # 2.6.32
 LIBCFS_STACKTRACE_OPS_HAVE_WALK_STACK
+# 2.6.34
+LIBCFS_ADD_WAIT_QUEUE_EXCLUSIVE
 ])
 
 #
index 2aa7422..03da8fe 100644 (file)
@@ -65,6 +65,8 @@ void cfs_waitlink_init(cfs_waitlink_t *link);
 void cfs_waitq_add(cfs_waitq_t *waitq, cfs_waitlink_t *link);
 void cfs_waitq_add_exclusive(cfs_waitq_t *waitq,
                              cfs_waitlink_t *link);
+void cfs_waitq_add_exclusive_head(cfs_waitq_t *waitq,
+                                  cfs_waitlink_t *link);
 void cfs_waitq_del(cfs_waitq_t *waitq, cfs_waitlink_t *link);
 int  cfs_waitq_active(cfs_waitq_t *waitq);
 void cfs_waitq_signal(cfs_waitq_t *waitq);
index 79dce3e..03a83f5 100644 (file)
@@ -73,6 +73,17 @@ cfs_waitq_add(cfs_waitq_t *waitq, cfs_waitlink_t *link)
 }
 EXPORT_SYMBOL(cfs_waitq_add);
 
+#ifndef HAVE___ADD_WAIT_QUEUE_EXCLUSIVE
+
+static inline void __add_wait_queue_exclusive(wait_queue_head_t *q,
+                                              wait_queue_t *wait)
+{
+        wait->flags |= WQ_FLAG_EXCLUSIVE;
+        __add_wait_queue(q, wait);
+}
+
+#endif /* HAVE___ADD_WAIT_QUEUE_EXCLUSIVE */
+
 void
 cfs_waitq_add_exclusive(cfs_waitq_t *waitq,
                         cfs_waitlink_t *link)
@@ -81,6 +92,30 @@ cfs_waitq_add_exclusive(cfs_waitq_t *waitq,
 }
 EXPORT_SYMBOL(cfs_waitq_add_exclusive);
 
+/**
+ * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively
+ * waiting threads, which is not always desirable because all threads will
+ * be waken up again and again, even user only needs a few of them to be
+ * active most time. This is not good for performance because cache can
+ * be polluted by different threads.
+ *
+ * LIFO list can resolve this problem because we always wakeup the most
+ * recent active thread by default.
+ *
+ * NB: please don't call non-exclusive & exclusive wait on the same
+ * waitq if cfs_waitq_add_exclusive_head is used.
+ */
+void
+cfs_waitq_add_exclusive_head(cfs_waitq_t *waitq, cfs_waitlink_t *link)
+{
+        unsigned long flags;
+
+        spin_lock_irqsave(&LINUX_WAITQ_HEAD(waitq)->lock, flags);
+        __add_wait_queue_exclusive(LINUX_WAITQ_HEAD(waitq), LINUX_WAITQ(link));
+        spin_unlock_irqrestore(&LINUX_WAITQ_HEAD(waitq)->lock, flags);
+}
+EXPORT_SYMBOL(cfs_waitq_add_exclusive_head);
+
 void
 cfs_waitq_del(cfs_waitq_t *waitq, cfs_waitlink_t *link)
 {
index 0d2a400..825aa70 100644 (file)
@@ -80,6 +80,11 @@ void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, struct cfs_waitlink *link)
         (void)link;
 }
 
+void cfs_waitq_add_exclusive_head(struct cfs_waitq *waitq, struct cfs_waitlink *link)
+{
+        cfs_waitq_add_exclusive(waitq, link);
+}
+
 void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link)
 {
         LASSERT(waitq != NULL);
index 4beca8e..df7105b 100644 (file)
@@ -667,7 +667,7 @@ struct l_wait_info {
  * wait for @condition to become true, but no longer than timeout, specified
  * by @info.
  */
-#define __l_wait_event(wq, condition, info, ret, excl)                         \
+#define __l_wait_event(wq, condition, info, ret, l_add_wait)                   \
 do {                                                                           \
         cfs_waitlink_t __wait;                                                 \
         cfs_duration_t __timeout = info->lwi_timeout;                          \
@@ -679,10 +679,7 @@ do {                                                                           \
                 break;                                                         \
                                                                                \
         cfs_waitlink_init(&__wait);                                            \
-        if (excl)                                                              \
-                cfs_waitq_add_exclusive(&wq, &__wait);                         \
-        else                                                                   \
-                cfs_waitq_add(&wq, &__wait);                                   \
+        l_add_wait(&wq, &__wait);                                              \
                                                                                \
         /* Block all signals (just the non-fatal ones if no timeout). */       \
         if (info->lwi_on_signal != NULL && (__timeout == 0 || __allow_intr))   \
@@ -754,7 +751,7 @@ do {                                                                           \
 } while (0)
 
 #else /* !__KERNEL__ */
-#define __l_wait_event(wq, condition, info, ret, excl)                  \
+#define __l_wait_event(wq, condition, info, ret, l_add_wait)            \
 do {                                                                    \
         long __timeout = info->lwi_timeout;                             \
         long __now;                                                     \
@@ -806,7 +803,8 @@ do {                                                                    \
         int                 __ret;                              \
         struct l_wait_info *__info = (info);                    \
                                                                 \
-        __l_wait_event(wq, condition, __info, __ret, 0);        \
+        __l_wait_event(wq, condition, __info,                   \
+                       __ret, cfs_waitq_add);                   \
         __ret;                                                  \
 })
 
@@ -815,16 +813,39 @@ do {                                                                    \
         int                 __ret;                              \
         struct l_wait_info *__info = (info);                    \
                                                                 \
-        __l_wait_event(wq, condition, __info, __ret, 1);        \
+        __l_wait_event(wq, condition, __info,                   \
+                       __ret, cfs_waitq_add_exclusive);         \
+        __ret;                                                  \
+})
+
+#define l_wait_event_exclusive_head(wq, condition, info)        \
+({                                                              \
+        int                 __ret;                              \
+        struct l_wait_info *__info = (info);                    \
+                                                                \
+        __l_wait_event(wq, condition, __info,                   \
+                       __ret, cfs_waitq_add_exclusive_head);    \
         __ret;                                                  \
 })
 
-#define l_cfs_wait_event(wq, condition)                         \
+#define l_wait_condition(wq, condition)                         \
 ({                                                              \
         struct l_wait_info lwi = { 0 };                         \
         l_wait_event(wq, condition, &lwi);                      \
 })
 
+#define l_wait_condition_exclusive(wq, condition)               \
+({                                                              \
+        struct l_wait_info lwi = { 0 };                         \
+        l_wait_event_exclusive(wq, condition, &lwi);            \
+})
+
+#define l_wait_condition_exclusive_head(wq, condition)          \
+({                                                              \
+        struct l_wait_info lwi = { 0 };                         \
+        l_wait_event_exclusive_head(wq, condition, &lwi);       \
+})
+
 #ifdef __KERNEL__
 #define LIBLUSTRE_CLIENT (0)
 #else
index 71e8434..bd10eda 100644 (file)
@@ -294,7 +294,7 @@ int mdt_ck_thread_start(struct mdt_device *mdt)
                 return rc;
         }
 
-        l_cfs_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_RUNNING);
+        l_wait_condition(thread->t_ctl_waitq, thread->t_flags & SVC_RUNNING);
         return 0;
 }
 
@@ -307,5 +307,5 @@ void mdt_ck_thread_stop(struct mdt_device *mdt)
 
         thread->t_flags = SVC_STOPPING;
         cfs_waitq_signal(&thread->t_ctl_waitq);
-        l_cfs_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED);
+        l_wait_condition(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED);
 }
index 79e0e37..3b9faab 100644 (file)
@@ -2177,7 +2177,7 @@ static int ptlrpc_main(void *arg)
 
                 cfs_cond_resched();
 
-                l_wait_event_exclusive(svc->srv_waitq,
+                l_wait_event_exclusive_head(svc->srv_waitq,
                                        ptlrpc_thread_stopping(thread) ||
                                        ptlrpc_server_request_waiting(svc) ||
                                        ptlrpc_server_request_pending(svc, 0) ||
@@ -2305,7 +2305,7 @@ static int ptlrpc_hr_main(void *arg)
 
         while (!cfs_test_bit(HRT_STOPPING, &t->hrt_flags)) {
 
-                l_cfs_wait_event(t->hrt_wait, hrt_dont_sleep(t, &replies));
+                l_wait_condition(t->hrt_wait, hrt_dont_sleep(t, &replies));
                 while (!cfs_list_empty(&replies)) {
                         struct ptlrpc_reply_state *rs;
 
@@ -2340,7 +2340,7 @@ static int ptlrpc_start_hr_thread(struct ptlrpc_hr_service *hr, int n, int cpu)
                 cfs_complete(&t->hrt_completion);
                 GOTO(out, rc);
         }
-        l_cfs_wait_event(t->hrt_wait, cfs_test_bit(HRT_RUNNING, &t->hrt_flags));
+        l_wait_condition(t->hrt_wait, cfs_test_bit(HRT_RUNNING, &t->hrt_flags));
         RETURN(0);
  out:
         return rc;