From db3217248c664e69a942c3a7b8df14e2bd5cedc4 Mon Sep 17 00:00:00 2001 From: adilger Date: Mon, 12 Apr 2004 19:54:43 +0000 Subject: [PATCH] Don't wait forever in sleep_on() if two threads call portals_debug_dumplog() at the same time. b=3107 r=zab --- lnet/libcfs/debug.c | 22 +++++++++++++++------- lustre/ChangeLog | 1 + lustre/portals/libcfs/debug.c | 22 +++++++++++++++------- 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/lnet/libcfs/debug.c b/lnet/libcfs/debug.c index cb6a1b5..538b7a2 100644 --- a/lnet/libcfs/debug.c +++ b/lnet/libcfs/debug.c @@ -56,7 +56,7 @@ static char *debug_buf = NULL; static unsigned long debug_size = 0; static atomic_t debug_off_a = ATOMIC_INIT(0); static int debug_wrapped; -wait_queue_head_t debug_ctlwq; +static DECLARE_WAIT_QUEUE_HEAD(debug_ctlwq); #define DAEMON_SND_SIZE (64 << 10) /* @@ -243,7 +243,7 @@ int portals_do_debug_dumplog(void *arg) PTR_ERR(file)); GOTO(out, PTR_ERR(file)); } else { - printk(KERN_ALERT "LustreError: dumping log to %s ... writing ...\n", + printk(KERN_ALERT "LustreError: dumping log to %s ...\n", debug_file_name); } @@ -416,18 +416,26 @@ void portals_debug_print(void) void portals_debug_dumplog(void) { int rc; + DECLARE_WAITQUEUE(wait, current); ENTRY; - init_waitqueue_head(&debug_ctlwq); + /* we're being careful to ensure that the kernel thread is + * able to set our state to running as it exits before we + * get to schedule() */ + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&debug_ctlwq, &wait); rc = kernel_thread(portals_do_debug_dumplog, NULL, CLONE_VM | CLONE_FS | CLONE_FILES); - if (rc < 0) { + if (rc < 0) printk(KERN_ERR "LustreError: cannot start log dump thread: " "%d\n", rc); - return; - } - sleep_on(&debug_ctlwq); + else + schedule(); + + /* be sure to teardown if kernel_thread() failed */ + remove_wait_queue(&debug_ctlwq, &wait); + set_current_state(TASK_RUNNING); } int portals_debug_daemon_start(char *file, unsigned int size) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 8d1eda0..173fb56 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -12,6 +12,7 @@ tbd Cluster File Systems, Inc. - don't use cancelling-locks' kms values (2947) - use highest lock extent for kms, not last one (2925) - don't dereference ERR_PTR() dentry in error handling path (3107) + - fix thread race in portals_debug_dumplog() (3122) * miscellania - allow default OST striping configuration per directory (1414) diff --git a/lustre/portals/libcfs/debug.c b/lustre/portals/libcfs/debug.c index cb6a1b5..538b7a2 100644 --- a/lustre/portals/libcfs/debug.c +++ b/lustre/portals/libcfs/debug.c @@ -56,7 +56,7 @@ static char *debug_buf = NULL; static unsigned long debug_size = 0; static atomic_t debug_off_a = ATOMIC_INIT(0); static int debug_wrapped; -wait_queue_head_t debug_ctlwq; +static DECLARE_WAIT_QUEUE_HEAD(debug_ctlwq); #define DAEMON_SND_SIZE (64 << 10) /* @@ -243,7 +243,7 @@ int portals_do_debug_dumplog(void *arg) PTR_ERR(file)); GOTO(out, PTR_ERR(file)); } else { - printk(KERN_ALERT "LustreError: dumping log to %s ... writing ...\n", + printk(KERN_ALERT "LustreError: dumping log to %s ...\n", debug_file_name); } @@ -416,18 +416,26 @@ void portals_debug_print(void) void portals_debug_dumplog(void) { int rc; + DECLARE_WAITQUEUE(wait, current); ENTRY; - init_waitqueue_head(&debug_ctlwq); + /* we're being careful to ensure that the kernel thread is + * able to set our state to running as it exits before we + * get to schedule() */ + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&debug_ctlwq, &wait); rc = kernel_thread(portals_do_debug_dumplog, NULL, CLONE_VM | CLONE_FS | CLONE_FILES); - if (rc < 0) { + if (rc < 0) printk(KERN_ERR "LustreError: cannot start log dump thread: " "%d\n", rc); - return; - } - sleep_on(&debug_ctlwq); + else + schedule(); + + /* be sure to teardown if kernel_thread() failed */ + remove_wait_queue(&debug_ctlwq, &wait); + set_current_state(TASK_RUNNING); } int portals_debug_daemon_start(char *file, unsigned int size) -- 1.8.3.1