From 40e312a8275ed9240e63f0ac023d8b7a38136f42 Mon Sep 17 00:00:00 2001 From: Jian Yu Date: Wed, 1 Dec 2010 20:16:21 +0800 Subject: [PATCH] b=23289 new API: cfs_waitq_add_exclusive_head With this patch, we can reduce total number of active threads because waitq is a LIFO list for exclusive waiting. o=Liang Zhen i=andreas.dilger i=eric.mei --- libcfs/autoconf/lustre-libcfs.m4 | 25 ++++++++++++++++++++++++ libcfs/include/libcfs/libcfs_prim.h | 2 ++ libcfs/libcfs/linux/linux-prim.c | 35 +++++++++++++++++++++++++++++++++ libcfs/libcfs/user-prim.c | 5 +++++ lustre/include/lustre_lib.h | 39 ++++++++++++++++++++++++++++--------- lustre/mdt/mdt_capa.c | 4 ++-- lustre/ptlrpc/service.c | 6 +++--- 7 files changed, 102 insertions(+), 14 deletions(-) diff --git a/libcfs/autoconf/lustre-libcfs.m4 b/libcfs/autoconf/lustre-libcfs.m4 index 0aaf7d3..c09767a 100644 --- a/libcfs/autoconf/lustre-libcfs.m4 +++ b/libcfs/autoconf/lustre-libcfs.m4 @@ -632,6 +632,29 @@ LB_LINUX_TRY_COMPILE([ ]) # +# LIBCFS_ADD_WAIT_QUEUE_EXCLUSIVE +# +# 2.6.34 adds __add_wait_queue_exclusive +# +AC_DEFUN([LIBCFS_ADD_WAIT_QUEUE_EXCLUSIVE], +[AC_MSG_CHECKING([if __add_wait_queue_exclusive exists]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + wait_queue_head_t queue; + wait_queue_t wait; + + __add_wait_queue_exclusive(&queue, &wait); +],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE___ADD_WAIT_QUEUE_EXCLUSIVE, 1, + [__add_wait_queue_exclusive exists]) +],[ + AC_MSG_RESULT(no) +]) +]) + +# # LIBCFS_PROG_LINUX # # LNet linux kernel checks @@ -679,6 +702,8 @@ LIBCFS_FUNC_UNSHARE_FS_STRUCT LIBCFS_SOCK_MAP_FD_2ARG # 2.6.32 LIBCFS_STACKTRACE_OPS_HAVE_WALK_STACK +# 2.6.34 +LIBCFS_ADD_WAIT_QUEUE_EXCLUSIVE ]) # diff --git a/libcfs/include/libcfs/libcfs_prim.h b/libcfs/include/libcfs/libcfs_prim.h index 2aa7422..03da8fe 100644 --- a/libcfs/include/libcfs/libcfs_prim.h +++ b/libcfs/include/libcfs/libcfs_prim.h @@ -65,6 +65,8 @@ void cfs_waitlink_init(cfs_waitlink_t *link); void cfs_waitq_add(cfs_waitq_t *waitq, cfs_waitlink_t *link); void cfs_waitq_add_exclusive(cfs_waitq_t *waitq, cfs_waitlink_t *link); +void cfs_waitq_add_exclusive_head(cfs_waitq_t *waitq, + cfs_waitlink_t *link); void cfs_waitq_del(cfs_waitq_t *waitq, cfs_waitlink_t *link); int cfs_waitq_active(cfs_waitq_t *waitq); void cfs_waitq_signal(cfs_waitq_t *waitq); diff --git a/libcfs/libcfs/linux/linux-prim.c b/libcfs/libcfs/linux/linux-prim.c index 79dce3e..03a83f5 100644 --- a/libcfs/libcfs/linux/linux-prim.c +++ b/libcfs/libcfs/linux/linux-prim.c @@ -73,6 +73,17 @@ cfs_waitq_add(cfs_waitq_t *waitq, cfs_waitlink_t *link) } EXPORT_SYMBOL(cfs_waitq_add); +#ifndef HAVE___ADD_WAIT_QUEUE_EXCLUSIVE + +static inline void __add_wait_queue_exclusive(wait_queue_head_t *q, + wait_queue_t *wait) +{ + wait->flags |= WQ_FLAG_EXCLUSIVE; + __add_wait_queue(q, wait); +} + +#endif /* HAVE___ADD_WAIT_QUEUE_EXCLUSIVE */ + void cfs_waitq_add_exclusive(cfs_waitq_t *waitq, cfs_waitlink_t *link) @@ -81,6 +92,30 @@ cfs_waitq_add_exclusive(cfs_waitq_t *waitq, } EXPORT_SYMBOL(cfs_waitq_add_exclusive); +/** + * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively + * waiting threads, which is not always desirable because all threads will + * be waken up again and again, even user only needs a few of them to be + * active most time. This is not good for performance because cache can + * be polluted by different threads. + * + * LIFO list can resolve this problem because we always wakeup the most + * recent active thread by default. + * + * NB: please don't call non-exclusive & exclusive wait on the same + * waitq if cfs_waitq_add_exclusive_head is used. + */ +void +cfs_waitq_add_exclusive_head(cfs_waitq_t *waitq, cfs_waitlink_t *link) +{ + unsigned long flags; + + spin_lock_irqsave(&LINUX_WAITQ_HEAD(waitq)->lock, flags); + __add_wait_queue_exclusive(LINUX_WAITQ_HEAD(waitq), LINUX_WAITQ(link)); + spin_unlock_irqrestore(&LINUX_WAITQ_HEAD(waitq)->lock, flags); +} +EXPORT_SYMBOL(cfs_waitq_add_exclusive_head); + void cfs_waitq_del(cfs_waitq_t *waitq, cfs_waitlink_t *link) { diff --git a/libcfs/libcfs/user-prim.c b/libcfs/libcfs/user-prim.c index 0d2a400..825aa70 100644 --- a/libcfs/libcfs/user-prim.c +++ b/libcfs/libcfs/user-prim.c @@ -80,6 +80,11 @@ void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, struct cfs_waitlink *link) (void)link; } +void cfs_waitq_add_exclusive_head(struct cfs_waitq *waitq, struct cfs_waitlink *link) +{ + cfs_waitq_add_exclusive(waitq, link); +} + void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link) { LASSERT(waitq != NULL); diff --git a/lustre/include/lustre_lib.h b/lustre/include/lustre_lib.h index 4beca8ed..df7105b 100644 --- a/lustre/include/lustre_lib.h +++ b/lustre/include/lustre_lib.h @@ -667,7 +667,7 @@ struct l_wait_info { * wait for @condition to become true, but no longer than timeout, specified * by @info. */ -#define __l_wait_event(wq, condition, info, ret, excl) \ +#define __l_wait_event(wq, condition, info, ret, l_add_wait) \ do { \ cfs_waitlink_t __wait; \ cfs_duration_t __timeout = info->lwi_timeout; \ @@ -679,10 +679,7 @@ do { \ break; \ \ cfs_waitlink_init(&__wait); \ - if (excl) \ - cfs_waitq_add_exclusive(&wq, &__wait); \ - else \ - cfs_waitq_add(&wq, &__wait); \ + l_add_wait(&wq, &__wait); \ \ /* Block all signals (just the non-fatal ones if no timeout). */ \ if (info->lwi_on_signal != NULL && (__timeout == 0 || __allow_intr)) \ @@ -754,7 +751,7 @@ do { \ } while (0) #else /* !__KERNEL__ */ -#define __l_wait_event(wq, condition, info, ret, excl) \ +#define __l_wait_event(wq, condition, info, ret, l_add_wait) \ do { \ long __timeout = info->lwi_timeout; \ long __now; \ @@ -806,7 +803,8 @@ do { \ int __ret; \ struct l_wait_info *__info = (info); \ \ - __l_wait_event(wq, condition, __info, __ret, 0); \ + __l_wait_event(wq, condition, __info, \ + __ret, cfs_waitq_add); \ __ret; \ }) @@ -815,16 +813,39 @@ do { \ int __ret; \ struct l_wait_info *__info = (info); \ \ - __l_wait_event(wq, condition, __info, __ret, 1); \ + __l_wait_event(wq, condition, __info, \ + __ret, cfs_waitq_add_exclusive); \ + __ret; \ +}) + +#define l_wait_event_exclusive_head(wq, condition, info) \ +({ \ + int __ret; \ + struct l_wait_info *__info = (info); \ + \ + __l_wait_event(wq, condition, __info, \ + __ret, cfs_waitq_add_exclusive_head); \ __ret; \ }) -#define l_cfs_wait_event(wq, condition) \ +#define l_wait_condition(wq, condition) \ ({ \ struct l_wait_info lwi = { 0 }; \ l_wait_event(wq, condition, &lwi); \ }) +#define l_wait_condition_exclusive(wq, condition) \ +({ \ + struct l_wait_info lwi = { 0 }; \ + l_wait_event_exclusive(wq, condition, &lwi); \ +}) + +#define l_wait_condition_exclusive_head(wq, condition) \ +({ \ + struct l_wait_info lwi = { 0 }; \ + l_wait_event_exclusive_head(wq, condition, &lwi); \ +}) + #ifdef __KERNEL__ #define LIBLUSTRE_CLIENT (0) #else diff --git a/lustre/mdt/mdt_capa.c b/lustre/mdt/mdt_capa.c index 71e8434..bd10eda 100644 --- a/lustre/mdt/mdt_capa.c +++ b/lustre/mdt/mdt_capa.c @@ -294,7 +294,7 @@ int mdt_ck_thread_start(struct mdt_device *mdt) return rc; } - l_cfs_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_RUNNING); + l_wait_condition(thread->t_ctl_waitq, thread->t_flags & SVC_RUNNING); return 0; } @@ -307,5 +307,5 @@ void mdt_ck_thread_stop(struct mdt_device *mdt) thread->t_flags = SVC_STOPPING; cfs_waitq_signal(&thread->t_ctl_waitq); - l_cfs_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED); + l_wait_condition(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED); } diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 79e0e37..3b9faab 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -2177,7 +2177,7 @@ static int ptlrpc_main(void *arg) cfs_cond_resched(); - l_wait_event_exclusive(svc->srv_waitq, + l_wait_event_exclusive_head(svc->srv_waitq, ptlrpc_thread_stopping(thread) || ptlrpc_server_request_waiting(svc) || ptlrpc_server_request_pending(svc, 0) || @@ -2305,7 +2305,7 @@ static int ptlrpc_hr_main(void *arg) while (!cfs_test_bit(HRT_STOPPING, &t->hrt_flags)) { - l_cfs_wait_event(t->hrt_wait, hrt_dont_sleep(t, &replies)); + l_wait_condition(t->hrt_wait, hrt_dont_sleep(t, &replies)); while (!cfs_list_empty(&replies)) { struct ptlrpc_reply_state *rs; @@ -2340,7 +2340,7 @@ static int ptlrpc_start_hr_thread(struct ptlrpc_hr_service *hr, int n, int cpu) cfs_complete(&t->hrt_completion); GOTO(out, rc); } - l_cfs_wait_event(t->hrt_wait, cfs_test_bit(HRT_RUNNING, &t->hrt_flags)); + l_wait_condition(t->hrt_wait, cfs_test_bit(HRT_RUNNING, &t->hrt_flags)); RETURN(0); out: return rc; -- 1.8.3.1