From e022d16870b40812251aed12876b0be96bcfeb7e Mon Sep 17 00:00:00 2001 From: Johann Lombardi Date: Fri, 8 Jan 2010 12:22:46 +0100 Subject: [PATCH] b=19557 add cond_resched() call in lustre hash to prevent hogging cpu --- lnet/include/libcfs/darwin/darwin-prim.h | 5 +++++ lnet/include/libcfs/darwin/kp30.h | 2 -- lnet/include/libcfs/linux/kp30.h | 9 --------- lnet/include/libcfs/linux/linux-prim.h | 10 ++++++++++ lnet/include/libcfs/user-prim.h | 1 + lnet/include/libcfs/winnt/kp30.h | 6 ------ lnet/include/libcfs/winnt/winnt-prim.h | 8 ++++++++ lnet/klnds/iiblnd/iiblnd_cb.c | 2 +- lnet/klnds/o2iblnd/o2iblnd_cb.c | 2 +- lnet/klnds/openiblnd/openiblnd_cb.c | 2 +- lnet/klnds/ralnd/ralnd_cb.c | 2 +- lnet/klnds/socklnd/socklnd_cb.c | 2 +- lnet/klnds/viblnd/viblnd_cb.c | 2 +- lnet/selftest/workitem.c | 4 ++-- lustre/ChangeLog | 6 ++++++ lustre/obdclass/class_hash.c | 1 + lustre/obdclass/genops.c | 2 +- lustre/ptlrpc/service.c | 2 +- 18 files changed, 41 insertions(+), 27 deletions(-) diff --git a/lnet/include/libcfs/darwin/darwin-prim.h b/lnet/include/libcfs/darwin/darwin-prim.h index 1e5f639..a3719ce 100644 --- a/lnet/include/libcfs/darwin/darwin-prim.h +++ b/lnet/include/libcfs/darwin/darwin-prim.h @@ -281,6 +281,11 @@ static inline int cfs_schedule_timeout(int state, int64_t timeout) #define cfs_schedule() cfs_schedule_timeout(CFS_TASK_UNINT, CFS_TICK) #define cfs_pause(tick) cfs_schedule_timeout(CFS_TASK_UNINT, tick) +/* XXX cfs_cond_resched() is sometimes called at each loop iteration + * (e.g. lustre_hash_for_each_empty()), so this definition is pretty + * unefficient and can be harmful if we have many elements to process */ +#define cfs_cond_resched() cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE, 1) + #define __wait_event(wq, condition) \ do { \ struct cfs_waitlink __wait; \ diff --git a/lnet/include/libcfs/darwin/kp30.h b/lnet/include/libcfs/darwin/kp30.h index 7d022a9..01a9f60 100644 --- a/lnet/include/libcfs/darwin/kp30.h +++ b/lnet/include/libcfs/darwin/kp30.h @@ -57,8 +57,6 @@ #include #include -#define our_cond_resched() cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE, 1) - #ifdef CONFIG_SMP #define LASSERT_SPIN_LOCKED(lock) do {} while(0) /* XXX */ #else diff --git a/lnet/include/libcfs/linux/kp30.h b/lnet/include/libcfs/linux/kp30.h index 0431bab..ec5713d 100644 --- a/lnet/include/libcfs/linux/kp30.h +++ b/lnet/include/libcfs/linux/kp30.h @@ -98,11 +98,6 @@ do { \ #define PageUptodate Page_Uptodate #define our_recalc_sigpending(current) recalc_sigpending(current) #define num_online_cpus() smp_num_cpus -static inline void our_cond_resched(void) -{ - if (current->need_resched) - schedule (); -} #define work_struct_t struct tq_struct #define cfs_get_work_data(type,field,data) (data) #else @@ -130,10 +125,6 @@ do { \ #define wait_on_page wait_on_page_locked #define our_recalc_sigpending(current) recalc_sigpending() #define strtok(a,b) strpbrk(a, b) -static inline void our_cond_resched(void) -{ - cond_resched(); -} #define work_struct_t struct work_struct #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */ diff --git a/lnet/include/libcfs/linux/linux-prim.h b/lnet/include/libcfs/linux/linux-prim.h index 091d703..6dca3e8 100644 --- a/lnet/include/libcfs/linux/linux-prim.h +++ b/lnet/include/libcfs/linux/linux-prim.h @@ -153,6 +153,16 @@ typedef long cfs_task_state_t; #define cfs_schedule_timeout(s, t) schedule_timeout(t) #define cfs_schedule() schedule() +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +static inline void cfs_cond_resched(void) +{ + if (current->need_resched) + schedule(); +} +#else +#define cfs_cond_resched() cond_resched() +#endif + /* Kernel thread */ typedef int (*cfs_thread_t)(void *); diff --git a/lnet/include/libcfs/user-prim.h b/lnet/include/libcfs/user-prim.h index 889174b..c9250e8 100644 --- a/lnet/include/libcfs/user-prim.h +++ b/lnet/include/libcfs/user-prim.h @@ -99,6 +99,7 @@ int64_t cfs_waitq_timedwait(struct cfs_waitlink *link, int state, int64_t timeou cfs_waitlink_t l; \ cfs_waitq_timedwait(&l, s, t); \ } while (0) +#define cfs_cond_resched() do {} while(0) #define CFS_TASK_INTERRUPTIBLE (0) #define CFS_TASK_UNINT (0) diff --git a/lnet/include/libcfs/winnt/kp30.h b/lnet/include/libcfs/winnt/kp30.h index 336a649..fd12f19 100644 --- a/lnet/include/libcfs/winnt/kp30.h +++ b/lnet/include/libcfs/winnt/kp30.h @@ -51,12 +51,6 @@ #define CFS_SYSFS_MODULE_PARM 0 /* no sysfs access to module parameters */ - -static inline void our_cond_resched() -{ - schedule_timeout(1i64); -} - #ifdef CONFIG_SMP #define LASSERT_SPIN_LOCKED(lock) do {} while(0) /* XXX */ #else diff --git a/lnet/include/libcfs/winnt/winnt-prim.h b/lnet/include/libcfs/winnt/winnt-prim.h index 4b25e87..75ebd8a 100644 --- a/lnet/include/libcfs/winnt/winnt-prim.h +++ b/lnet/include/libcfs/winnt/winnt-prim.h @@ -541,6 +541,14 @@ int wake_up_process(cfs_task_t * task); #define cfs_schedule_timeout(state, time) schedule_timeout(time) void sleep_on(cfs_waitq_t *waitq); +/* XXX cfs_cond_resched() is sometimes called at each loop iteration + * (e.g. lustre_hash_for_each_empty()), so this definition is pretty + * unefficient and can be harmful if we have many elements to process */ +static inline void cfs_cond_resched() +{ + schedule_timeout(1i64); +} + #define CFS_DECL_JOURNAL_DATA #define CFS_PUSH_JOURNAL do {;} while(0) #define CFS_POP_JOURNAL do {;} while(0) diff --git a/lnet/klnds/iiblnd/iiblnd_cb.c b/lnet/klnds/iiblnd/iiblnd_cb.c index fb6ce60..9e7b62c 100644 --- a/lnet/klnds/iiblnd/iiblnd_cb.c +++ b/lnet/klnds/iiblnd/iiblnd_cb.c @@ -3310,7 +3310,7 @@ kibnal_scheduler(void *arg) spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags); - our_cond_resched(); + cfs_cond_resched(); busy_loops = 0; spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index 70f24a8..0c84891 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -3186,7 +3186,7 @@ kiblnd_scheduler(void *arg) spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock, flags); - our_cond_resched(); + cfs_cond_resched(); busy_loops = 0; spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags); diff --git a/lnet/klnds/openiblnd/openiblnd_cb.c b/lnet/klnds/openiblnd/openiblnd_cb.c index 85f1166..31dade0 100644 --- a/lnet/klnds/openiblnd/openiblnd_cb.c +++ b/lnet/klnds/openiblnd/openiblnd_cb.c @@ -2614,7 +2614,7 @@ kibnal_scheduler(void *arg) !list_empty(&kibnal_data.kib_sched_rxq) || kibnal_data.kib_shutdown); } else { - our_cond_resched(); + cfs_cond_resched(); } spin_lock_irqsave(&kibnal_data.kib_sched_lock, diff --git a/lnet/klnds/ralnd/ralnd_cb.c b/lnet/klnds/ralnd/ralnd_cb.c index 9fa2958..5c2871f 100644 --- a/lnet/klnds/ralnd/ralnd_cb.c +++ b/lnet/klnds/ralnd/ralnd_cb.c @@ -1941,7 +1941,7 @@ kranal_scheduler (void *arg) if (busy_loops++ >= RANAL_RESCHED) { spin_unlock_irqrestore(&dev->rad_lock, flags); - our_cond_resched(); + cfs_cond_resched(); busy_loops = 0; spin_lock_irqsave(&dev->rad_lock, flags); diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index f6e88c0..2ff5fcc 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -1520,7 +1520,7 @@ int ksocknal_scheduler (void *arg) !ksocknal_sched_cansleep(sched), rc); LASSERT (rc == 0); } else { - our_cond_resched(); + cfs_cond_resched(); } cfs_spin_lock_bh (&sched->kss_lock); diff --git a/lnet/klnds/viblnd/viblnd_cb.c b/lnet/klnds/viblnd/viblnd_cb.c index 0528b0ed..3fc6376 100644 --- a/lnet/klnds/viblnd/viblnd_cb.c +++ b/lnet/klnds/viblnd/viblnd_cb.c @@ -3583,7 +3583,7 @@ kibnal_scheduler(void *arg) spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags); - our_cond_resched(); + cfs_cond_resched(); busy_loops = 0; spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); diff --git a/lnet/selftest/workitem.c b/lnet/selftest/workitem.c index 5e49b8b..67ff22e 100644 --- a/lnet/selftest/workitem.c +++ b/lnet/selftest/workitem.c @@ -193,7 +193,7 @@ swi_scheduler_main (void *arg) swi_data.wi_waitq, !swi_sched_cansleep(&swi_data.wi_runq)); else - our_cond_resched(); + cfs_cond_resched(); spin_lock(&swi_data.wi_lock); } @@ -247,7 +247,7 @@ swi_serial_scheduler_main (void *arg) swi_data.wi_serial_waitq, !swi_sched_cansleep(&swi_data.wi_serial_runq)); else - our_cond_resched(); + cfs_cond_resched(); spin_lock(&swi_data.wi_lock); } diff --git a/lustre/ChangeLog b/lustre/ChangeLog index cd693d6..a4e6519 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -17,6 +17,12 @@ Severity : normal Bugzilla : 21489 Description: cp -p command does not preserve the dates and timestamp +Severity : normal +Bugzilla : 19557 +Description: BUG: soft lockup - CPU#1 stuck for 10s! [ll_mdt_07:4523] +Details : add cond_resched() calls in lustre_hash_for_each_empty() + to prevent hogging the CPU. + 2009-10-16 Sun Microsystems, Inc. * version 1.8.1.1 * Support for kernels: diff --git a/lustre/obdclass/class_hash.c b/lustre/obdclass/class_hash.c index 5370a4e..1c94f0e 100644 --- a/lustre/obdclass/class_hash.c +++ b/lustre/obdclass/class_hash.c @@ -484,6 +484,7 @@ restart: read_unlock(&lh->lh_rwlock); func(obj, data); (void)lh_put(lh, hnode); + cfs_cond_resched(); goto restart; } write_unlock(&lhb->lhb_rwlock); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 8cb6ead..c56db17 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -1458,7 +1458,7 @@ void obd_zombie_impexp_cull(void) if (export != NULL) class_export_destroy(export); - + cfs_cond_resched(); } while (import != NULL || export != NULL); } diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 42f09f8a..77a3488 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -1670,7 +1670,7 @@ static int ptlrpc_main(void *arg) lc_watchdog_disable(thread->t_watchdog); - cond_resched(); + cfs_cond_resched(); l_wait_event_exclusive (svc->srv_waitq, ((thread->t_flags & SVC_STOPPING) != 0 && -- 1.8.3.1