From: deshmukh Date: Fri, 6 Nov 2009 13:55:08 +0000 (+0000) Subject: b=20592 X-Git-Tag: GIT_EPOCH_B_HD_KDMU~111 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=e14a4074dc6234e923a0f39730b2dede41ba8009 b=20592 i=johann i=andrew.perepechko (panda) use kthread_run() instead of kernel_thread() so that log dumper won't stuck in do_exit(). (patch ported to head) --- diff --git a/libcfs/include/libcfs/linux/linux-prim.h b/libcfs/include/libcfs/linux/linux-prim.h index b1f09b1..32d528d 100644 --- a/libcfs/include/libcfs/linux/linux-prim.h +++ b/libcfs/include/libcfs/linux/linux-prim.h @@ -61,6 +61,7 @@ #include #include #include +#include #include #include @@ -141,6 +142,8 @@ typedef wait_queue_t cfs_waitlink_t; typedef wait_queue_head_t cfs_waitq_t; typedef long cfs_task_state_t; +#define cfs_kthread_run(fn, data, fmt, arg...) kthread_run(fn, data, fmt, ##arg) + /* Kernel thread */ typedef int (*cfs_thread_t)(void *); diff --git a/libcfs/include/libcfs/user-prim.h b/libcfs/include/libcfs/user-prim.h index 7a38d7c..711bb67 100644 --- a/libcfs/include/libcfs/user-prim.h +++ b/libcfs/include/libcfs/user-prim.h @@ -139,6 +139,7 @@ static inline int cfs_psdev_deregister(cfs_psdev_t *foo) #define cfs_sigfillset(l) do {} while (0) #define cfs_recalc_sigpending(l) do {} while (0) #define cfs_kernel_thread(l,m,n) LBUG() +#define cfs_kthread_run(fn,d,fmt,...) LBUG() #ifdef HAVE_LIBPTHREAD typedef int (*cfs_thread_t)(void *); diff --git a/libcfs/libcfs/debug.c b/libcfs/libcfs/debug.c index 5813e68..8f0dbde 100644 --- a/libcfs/libcfs/debug.c +++ b/libcfs/libcfs/debug.c @@ -347,7 +347,6 @@ void libcfs_debug_dumplog_internal(void *arg) int libcfs_debug_dumplog_thread(void *arg) { - cfs_daemonize(""); libcfs_debug_dumplog_internal(arg); cfs_waitq_signal(&debug_ctlwq); return 0; @@ -355,8 +354,8 @@ int libcfs_debug_dumplog_thread(void *arg) void libcfs_debug_dumplog(void) { - int rc; cfs_waitlink_t wait; + cfs_task_t *dumper; ENTRY; /* we're being careful to ensure that the kernel thread is @@ -366,12 +365,12 @@ void libcfs_debug_dumplog(void) set_current_state(TASK_INTERRUPTIBLE); cfs_waitq_add(&debug_ctlwq, &wait); - rc = cfs_kernel_thread(libcfs_debug_dumplog_thread, - (void *)(long_ptr_t)cfs_curproc_pid(), - CLONE_VM | CLONE_FS | CLONE_FILES); - if (rc < 0) + dumper = cfs_kthread_run(libcfs_debug_dumplog_thread, + (void*)(long)cfs_curproc_pid(), + "libcfs_debug_dumper"); + if (IS_ERR(dumper)) printk(KERN_ERR "LustreError: cannot start log dump thread: " - "%d\n", rc); + "%ld\n", PTR_ERR(dumper)); else cfs_waitq_wait(&wait, CFS_TASK_INTERRUPTIBLE); diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 708ad77..8fdc7f0 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -331,6 +331,10 @@ int ll_file_release(struct inode *inode, struct file *file) lli->lli_async_rc = 0; rc = ll_md_close(sbi->ll_md_exp, inode, file); + + if (OBD_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_DUMP_LOG, obd_fail_val)) + libcfs_debug_dumplog(); + RETURN(rc); } diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 08ab1e0..a4a3b90 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -6579,6 +6579,21 @@ test_170() { } run_test 170 "test lctl df to handle corrupted log =====================" +test_171() { # bug20592 +#define OBD_FAIL_PTLRPC_DUMP_LOG 0x50e + $LCTL set_param fail_loc=0x50e + $LCTL set_param fail_val=3000 + multiop_bg_pause $DIR/$tfile Os || true + # cause log dump + sleep 3 + if dmesg | grep "recursive fault"; then + error "caught a recursive fault" + fi + $LCTL set_param fail_loc=0 + true +} +run_test 171 "test libcfs_debug_dumplog_thread stuck in do_exit() ======" + # it would be good to share it with obdfilter-survey/libecho code setup_obdecho_osc () { local rc=0