Whamcloud - gitweb
Don't wait forever in sleep_on() if two threads call portals_debug_dumplog()
authoradilger <adilger>
Mon, 12 Apr 2004 19:54:43 +0000 (19:54 +0000)
committeradilger <adilger>
Mon, 12 Apr 2004 19:54:43 +0000 (19:54 +0000)
at the same time.
b=3107
r=zab

lnet/libcfs/debug.c
lustre/ChangeLog
lustre/portals/libcfs/debug.c

index cb6a1b5..538b7a2 100644 (file)
@@ -56,7 +56,7 @@ static char *debug_buf = NULL;
 static unsigned long debug_size = 0;
 static atomic_t debug_off_a = ATOMIC_INIT(0);
 static int debug_wrapped;
-wait_queue_head_t debug_ctlwq;
+static DECLARE_WAIT_QUEUE_HEAD(debug_ctlwq);
 #define DAEMON_SND_SIZE      (64 << 10)
 
 /*
@@ -243,7 +243,7 @@ int portals_do_debug_dumplog(void *arg)
                        PTR_ERR(file));
                 GOTO(out, PTR_ERR(file));
         } else {
-                printk(KERN_ALERT "LustreError: dumping log to %s ... writing ...\n",
+                printk(KERN_ALERT "LustreError: dumping log to %s ...\n",
                        debug_file_name);
         }
 
@@ -416,18 +416,26 @@ void portals_debug_print(void)
 void portals_debug_dumplog(void)
 {
         int rc;
+        DECLARE_WAITQUEUE(wait, current);
         ENTRY;
 
-        init_waitqueue_head(&debug_ctlwq);
+        /* we're being careful to ensure that the kernel thread is
+         * able to set our state to running as it exits before we
+         * get to schedule() */
+        set_current_state(TASK_INTERRUPTIBLE);
+        add_wait_queue(&debug_ctlwq, &wait);
 
         rc = kernel_thread(portals_do_debug_dumplog,
                            NULL, CLONE_VM | CLONE_FS | CLONE_FILES);
-        if (rc < 0) {
+        if (rc < 0)
                 printk(KERN_ERR "LustreError: cannot start log dump thread: "
                        "%d\n", rc);
-                return;
-        }
-        sleep_on(&debug_ctlwq);
+        else
+                schedule();
+
+        /* be sure to teardown if kernel_thread() failed */
+        remove_wait_queue(&debug_ctlwq, &wait);
+        set_current_state(TASK_RUNNING);
 }
 
 int portals_debug_daemon_start(char *file, unsigned int size)
index 8d1eda0..173fb56 100644 (file)
@@ -12,6 +12,7 @@ tbd  Cluster File Systems, Inc. <info@clusterfs.com>
        - don't use cancelling-locks' kms values (2947)
        - use highest lock extent for kms, not last one (2925)
        - don't dereference ERR_PTR() dentry in error handling path (3107)
+       - fix thread race in portals_debug_dumplog() (3122)
        * miscellania
        - allow default OST striping configuration per directory (1414)
 
index cb6a1b5..538b7a2 100644 (file)
@@ -56,7 +56,7 @@ static char *debug_buf = NULL;
 static unsigned long debug_size = 0;
 static atomic_t debug_off_a = ATOMIC_INIT(0);
 static int debug_wrapped;
-wait_queue_head_t debug_ctlwq;
+static DECLARE_WAIT_QUEUE_HEAD(debug_ctlwq);
 #define DAEMON_SND_SIZE      (64 << 10)
 
 /*
@@ -243,7 +243,7 @@ int portals_do_debug_dumplog(void *arg)
                        PTR_ERR(file));
                 GOTO(out, PTR_ERR(file));
         } else {
-                printk(KERN_ALERT "LustreError: dumping log to %s ... writing ...\n",
+                printk(KERN_ALERT "LustreError: dumping log to %s ...\n",
                        debug_file_name);
         }
 
@@ -416,18 +416,26 @@ void portals_debug_print(void)
 void portals_debug_dumplog(void)
 {
         int rc;
+        DECLARE_WAITQUEUE(wait, current);
         ENTRY;
 
-        init_waitqueue_head(&debug_ctlwq);
+        /* we're being careful to ensure that the kernel thread is
+         * able to set our state to running as it exits before we
+         * get to schedule() */
+        set_current_state(TASK_INTERRUPTIBLE);
+        add_wait_queue(&debug_ctlwq, &wait);
 
         rc = kernel_thread(portals_do_debug_dumplog,
                            NULL, CLONE_VM | CLONE_FS | CLONE_FILES);
-        if (rc < 0) {
+        if (rc < 0)
                 printk(KERN_ERR "LustreError: cannot start log dump thread: "
                        "%d\n", rc);
-                return;
-        }
-        sleep_on(&debug_ctlwq);
+        else
+                schedule();
+
+        /* be sure to teardown if kernel_thread() failed */
+        remove_wait_queue(&debug_ctlwq, &wait);
+        set_current_state(TASK_RUNNING);
 }
 
 int portals_debug_daemon_start(char *file, unsigned int size)