Whamcloud - gitweb
b=20592
authordeshmukh <deshmukh>
Fri, 6 Nov 2009 13:55:08 +0000 (13:55 +0000)
committerdeshmukh <deshmukh>
Fri, 6 Nov 2009 13:55:08 +0000 (13:55 +0000)
i=johann
i=andrew.perepechko (panda)

use kthread_run() instead of kernel_thread() so that log dumper won't stuck
in do_exit(). (patch ported to head)

libcfs/include/libcfs/linux/linux-prim.h
libcfs/include/libcfs/user-prim.h
libcfs/libcfs/debug.c
lustre/llite/file.c
lustre/tests/sanity.sh

index b1f09b1..32d528d 100644 (file)
@@ -61,6 +61,7 @@
 #include <linux/timer.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/kthread.h>
 
 #include <linux/miscdevice.h>
 #include <libcfs/linux/portals_compat25.h>
@@ -141,6 +142,8 @@ typedef wait_queue_t                        cfs_waitlink_t;
 typedef wait_queue_head_t              cfs_waitq_t;
 typedef long                            cfs_task_state_t;
 
+#define cfs_kthread_run(fn, data, fmt, arg...) kthread_run(fn, data, fmt, ##arg)
+
 /* Kernel thread */
 typedef int (*cfs_thread_t)(void *);
 
index 7a38d7c..711bb67 100644 (file)
@@ -139,6 +139,7 @@ static inline int cfs_psdev_deregister(cfs_psdev_t *foo)
 #define cfs_sigfillset(l) do {}         while (0)
 #define cfs_recalc_sigpending(l)        do {} while (0)
 #define cfs_kernel_thread(l,m,n)        LBUG()
+#define cfs_kthread_run(fn,d,fmt,...)   LBUG()
 
 #ifdef HAVE_LIBPTHREAD
 typedef int (*cfs_thread_t)(void *);
index 5813e68..8f0dbde 100644 (file)
@@ -347,7 +347,6 @@ void libcfs_debug_dumplog_internal(void *arg)
 
 int libcfs_debug_dumplog_thread(void *arg)
 {
-        cfs_daemonize("");
         libcfs_debug_dumplog_internal(arg);
         cfs_waitq_signal(&debug_ctlwq);
         return 0;
@@ -355,8 +354,8 @@ int libcfs_debug_dumplog_thread(void *arg)
 
 void libcfs_debug_dumplog(void)
 {
-        int            rc;
         cfs_waitlink_t wait;
+        cfs_task_t    *dumper;
         ENTRY;
 
         /* we're being careful to ensure that the kernel thread is
@@ -366,12 +365,12 @@ void libcfs_debug_dumplog(void)
         set_current_state(TASK_INTERRUPTIBLE);
         cfs_waitq_add(&debug_ctlwq, &wait);
 
-        rc = cfs_kernel_thread(libcfs_debug_dumplog_thread,
-                               (void *)(long_ptr_t)cfs_curproc_pid(),
-                               CLONE_VM | CLONE_FS | CLONE_FILES);
-        if (rc < 0)
+        dumper = cfs_kthread_run(libcfs_debug_dumplog_thread,
+                                 (void*)(long)cfs_curproc_pid(),
+                                 "libcfs_debug_dumper");
+        if (IS_ERR(dumper))
                 printk(KERN_ERR "LustreError: cannot start log dump thread: "
-                       "%d\n", rc);
+                       "%ld\n", PTR_ERR(dumper));
         else
                 cfs_waitq_wait(&wait, CFS_TASK_INTERRUPTIBLE);
 
index 708ad77..8fdc7f0 100644 (file)
@@ -331,6 +331,10 @@ int ll_file_release(struct inode *inode, struct file *file)
         lli->lli_async_rc = 0;
 
         rc = ll_md_close(sbi->ll_md_exp, inode, file);
+
+        if (OBD_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, obd_fail_val))
+                libcfs_debug_dumplog();
+
         RETURN(rc);
 }
 
index 08ab1e0..a4a3b90 100644 (file)
@@ -6579,6 +6579,21 @@ test_170() {
 }
 run_test 170 "test lctl df to handle corrupted log ====================="
 
+test_171() { # bug20592
+#define OBD_FAIL_PTLRPC_DUMP_LOG         0x50e
+        $LCTL set_param fail_loc=0x50e
+        $LCTL set_param fail_val=3000
+        multiop_bg_pause $DIR/$tfile Os || true
+        # cause log dump
+        sleep 3
+        if dmesg | grep "recursive fault"; then
+                error "caught a recursive fault"
+        fi
+        $LCTL set_param fail_loc=0
+        true
+}
+run_test 171 "test libcfs_debug_dumplog_thread stuck in do_exit() ======"
+
 # it would be good to share it with obdfilter-survey/libecho code
 setup_obdecho_osc () {
         local rc=0