X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=libcfs%2Flibcfs%2Fdebug.c;h=412a10c069fd1ed2ef583e57a07df1fdc1eaaff3;hp=3a0c2898963826af7d5495d18bcc65d6ce93034e;hb=f7392c7c4a16bc1127ee448f937ba81c50dcdfd5;hpb=adeb29400a4a6466e45cb8c793500ae6d37ed642 diff --git a/libcfs/libcfs/debug.c b/libcfs/libcfs/debug.c index 3a0c289..412a10c 100644 --- a/libcfs/libcfs/debug.c +++ b/libcfs/libcfs/debug.c @@ -37,23 +37,27 @@ # define DEBUG_SUBSYSTEM S_LNET +#include #include +#include #include -#include +#include +#include +#include #include "tracefile.h" static char debug_file_name[1024]; unsigned int libcfs_subsystem_debug = ~0; +EXPORT_SYMBOL(libcfs_subsystem_debug); module_param(libcfs_subsystem_debug, int, 0644); MODULE_PARM_DESC(libcfs_subsystem_debug, "Lustre kernel debug subsystem mask"); -EXPORT_SYMBOL(libcfs_subsystem_debug); -unsigned int libcfs_debug = (D_CANTMASK | - D_NETERROR | D_HA | D_CONFIG | D_IOCTL | D_LFSCK); +unsigned int libcfs_debug = (D_CANTMASK | D_NETERROR | D_HA | D_CONFIG | + D_IOCTL | D_LFSCK | D_TTY); +EXPORT_SYMBOL(libcfs_debug); module_param(libcfs_debug, int, 0644); MODULE_PARM_DESC(libcfs_debug, "Lustre kernel debug mask"); -EXPORT_SYMBOL(libcfs_debug); static int libcfs_param_debug_mb_set(const char *val, cfs_kernel_param_arg_t *kp) @@ -65,39 +69,22 @@ static int libcfs_param_debug_mb_set(const char *val, if (rc < 0) return rc; -/* - * RHEL6 does not support any kind of locking so we have to provide - * our own - */ -#if !defined(HAVE_MODULE_PARAM_LOCKING) && !defined(HAVE_KERNEL_PARAM_LOCK) - kernel_param_lock(THIS_MODULE); -#endif - if (!*((unsigned int *)kp->arg)) { - *((unsigned int *)kp->arg) = num; + num = cfs_trace_set_debug_mb(num); -#if !defined(HAVE_MODULE_PARAM_LOCKING) && !defined(HAVE_KERNEL_PARAM_LOCK) - kernel_param_unlock(THIS_MODULE); -#endif - return 0; - } - - rc = cfs_trace_set_debug_mb(num); - - if (!rc) - *((unsigned int *)kp->arg) = cfs_trace_get_debug_mb(); + *((unsigned int *)kp->arg) = num; + num = cfs_trace_get_debug_mb(); + if (num) + /* This value is more precise */ + *((unsigned int *)kp->arg) = num; -#if !defined(HAVE_MODULE_PARAM_LOCKING) && !defined(HAVE_KERNEL_PARAM_LOCK) - kernel_param_unlock(THIS_MODULE); -#endif - return rc; + return 0; } -/* - * While debug_mb setting look like unsigned int, in fact +/* While debug_mb setting look like unsigned int, in fact * it needs quite a bunch of extra processing, so we define special * debug_mb parameter type with corresponding methods to handle this case */ -static struct kernel_param_ops param_ops_debug_mb = { +static const struct kernel_param_ops param_ops_debug_mb = { .set = libcfs_param_debug_mb_set, .get = param_get_uint, }; @@ -148,7 +135,9 @@ static int param_get_delay(char *buffer, cfs_kernel_param_arg_t *kp) { unsigned int d = *(unsigned int *)kp->arg; - return sprintf(buffer, "%lu", jiffies_to_msecs(d * 10) / MSEC_PER_SEC); + param_get_byte(buffer, kp); + return sprintf(buffer, "%lu%c", jiffies_to_msecs(d * 10) / MSEC_PER_SEC, + strnchr(buffer, PAGE_SIZE, '\n') ? '\n' : '\0'); } unsigned int libcfs_console_max_delay; @@ -161,7 +150,7 @@ static int param_set_console_max_delay(const char *val, libcfs_console_min_delay, INT_MAX); } -static struct kernel_param_ops param_ops_console_max_delay = { +static const struct kernel_param_ops param_ops_console_max_delay = { .set = param_set_console_max_delay, .get = param_get_delay, }; @@ -184,7 +173,7 @@ static int param_set_console_min_delay(const char *val, 1, libcfs_console_max_delay); } -static struct kernel_param_ops param_ops_console_min_delay = { +static const struct kernel_param_ops param_ops_console_min_delay = { .set = param_set_console_min_delay, .get = param_get_delay, }; @@ -224,7 +213,7 @@ static int param_set_uintpos(const char *val, return param_set_uint_minmax(val, kp, 1, -1); } -static struct kernel_param_ops param_ops_uintpos = { +static const struct kernel_param_ops param_ops_uintpos = { .set = param_set_uintpos, .get = param_get_uint, }; @@ -256,29 +245,25 @@ unsigned int libcfs_panic_on_lbug = 1; module_param(libcfs_panic_on_lbug, uint, 0644); MODULE_PARM_DESC(libcfs_panic_on_lbug, "Lustre kernel panic on LBUG"); -atomic_t libcfs_kmemory = ATOMIC_INIT(0); -EXPORT_SYMBOL(libcfs_kmemory); +atomic64_t libcfs_kmem = ATOMIC64_INIT(0); +EXPORT_SYMBOL(libcfs_kmem); -static wait_queue_head_t debug_ctlwq; - -char libcfs_debug_file_path_arr[PATH_MAX] = LIBCFS_DEBUG_FILE_PATH_DEFAULT; -EXPORT_SYMBOL(libcfs_debug_file_path_arr); +static DECLARE_COMPLETION(debug_complete); /* We need to pass a pointer here, but elsewhere this must be a const */ -static char *libcfs_debug_file_path = LIBCFS_DEBUG_FILE_PATH_DEFAULT; +char *libcfs_debug_file_path = LIBCFS_DEBUG_FILE_PATH_DEFAULT; +EXPORT_SYMBOL(libcfs_debug_file_path); module_param(libcfs_debug_file_path, charp, 0644); MODULE_PARM_DESC(libcfs_debug_file_path, "Path for dumping debug logs, set 'NONE' to prevent log dumping"); int libcfs_panic_in_progress; -/* - * libcfs_debug_token2mask() expects the returned - * string in lower-case - */ +/* libcfs_debug_token2mask() expects the returned string in lower-case */ static const char *libcfs_debug_subsys2str(int subsys) { - static const char *libcfs_debug_subsystems[] = LIBCFS_DEBUG_SUBSYS_NAMES; + static const char * const libcfs_debug_subsystems[] = + LIBCFS_DEBUG_SUBSYS_NAMES; if (subsys >= ARRAY_SIZE(libcfs_debug_subsystems)) return NULL; @@ -286,13 +271,11 @@ static const char *libcfs_debug_subsys2str(int subsys) return libcfs_debug_subsystems[subsys]; } -/* - * libcfs_debug_token2mask() expects the returned - * string in lower-case - */ +/* libcfs_debug_token2mask() expects the returned string in lower-case */ static const char *libcfs_debug_dbg2str(int debug) { - static const char *libcfs_debug_masks[] = LIBCFS_DEBUG_MASKS_NAMES; + static const char * const libcfs_debug_masks[] = + LIBCFS_DEBUG_MASKS_NAMES; if (debug >= ARRAY_SIZE(libcfs_debug_masks)) return NULL; @@ -315,11 +298,11 @@ libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys) len = 1; } else { /* space-separated tokens */ for (i = 0; i < 32; i++) { - if ((mask & (1 << i)) == 0) + if ((mask & BIT(i)) == 0) continue; token = fn(i); - if (token == NULL) /* unused bit */ + if (!token) /* unused bit */ continue; if (len > 0) { /* separator? */ @@ -361,14 +344,11 @@ libcfs_debug_str2mask(int *mask, const char *str, int is_subsys) if (!isspace(str[n-1])) break; matched = n; - t = sscanf(str, "%i%n", &m, &matched); if (t >= 1 && matched == n) { /* don't print warning for lctl set_param debug=0 or -1 */ if (m != 0 && m != -1) - CWARN("You are trying to use a numerical value for the " - "mask - this will be deprecated in a future " - "release.\n"); + CWARN("You are trying to use a numerical value for the mask - this will be deprecated in a future release.\n"); *mask = m; return 0; } @@ -377,77 +357,319 @@ libcfs_debug_str2mask(int *mask, const char *str, int is_subsys) 0xffffffff); } +char lnet_debug_log_upcall[1024] = "/usr/lib/lustre/lnet_debug_log_upcall"; + +/* Upcall function once a Lustre log has been dumped. + * + * @file path of the dumped log + */ +static void libcfs_run_debug_log_upcall(char *file) +{ + char *argv[3]; + int rc; + static const char * const envp[] = { + "HOME=/", + "PATH=/sbin:/bin:/usr/sbin:/usr/bin", + NULL + }; + + ENTRY; + argv[0] = lnet_debug_log_upcall; + + LASSERTF(file, "called on a null filename\n"); + argv[1] = file; /* only need to pass the path of the file */ + + argv[2] = NULL; + + rc = call_usermodehelper(argv[0], argv, (char **)envp, 1); + if (rc < 0 && rc != -ENOENT) { + CERROR("Error %d invoking LNET debug log upcall %s %s; check /sys/kernel/debug/lnet/debug_log_upcall\n", + rc, argv[0], argv[1]); + } else { + CDEBUG(D_HA, "Invoked LNET debug log upcall %s %s\n", + argv[0], argv[1]); + } +} + /** * Dump Lustre log to ::debug_file_path by calling tracefile_dump_all_pages() */ -void libcfs_debug_dumplog_internal(void *arg) +static void libcfs_debug_dumplog_internal(void *arg) { static time64_t last_dump_time; time64_t current_time; - void *journal_info; - journal_info = current->journal_info; - current->journal_info = NULL; current_time = ktime_get_real_seconds(); - if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) != 0 && + if (strncmp(libcfs_debug_file_path, "NONE", 4) != 0 && current_time > last_dump_time) { last_dump_time = current_time; snprintf(debug_file_name, sizeof(debug_file_name) - 1, - "%s.%lld.%ld", libcfs_debug_file_path_arr, + "%s.%lld.%ld", libcfs_debug_file_path, (s64)current_time, (uintptr_t)arg); - printk(KERN_ALERT "LustreError: dumping log to %s\n", - debug_file_name); + pr_alert("LustreError: dumping log to %s\n", debug_file_name); cfs_tracefile_dump_all_pages(debug_file_name); libcfs_run_debug_log_upcall(debug_file_name); } - current->journal_info = journal_info; } static int libcfs_debug_dumplog_thread(void *arg) { libcfs_debug_dumplog_internal(arg); - wake_up(&debug_ctlwq); + complete(&debug_complete); return 0; } +static DEFINE_MUTEX(libcfs_debug_dumplog_lock); + void libcfs_debug_dumplog(void) { - wait_queue_entry_t wait; struct task_struct *dumper; ENTRY; - /* - * we're being careful to ensure that the kernel thread is - * able to set our state to running as it exits before we - * get to schedule() - */ - init_waitqueue_entry(&wait, current); - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&debug_ctlwq, &wait); + if (mutex_trylock(&libcfs_debug_dumplog_lock) == 0) + return; + /* If a previous call was interrupted, debug_complete->done + * might be elevated, and so we won't actually wait here. + * So we reinit the completion to ensure we wait for + * one thread to complete, though it might not be the one + * we start if there are overlaping thread. + */ + reinit_completion(&debug_complete); dumper = kthread_run(libcfs_debug_dumplog_thread, - (void *)(long)current_pid(), + (void *)(long)current->pid, "libcfs_debug_dumper"); if (IS_ERR(dumper)) - printk(KERN_ERR "LustreError: cannot start log dump thread:" - " %ld\n", PTR_ERR(dumper)); + pr_err("LustreError: cannot start log dump thread: rc = %ld\n", + PTR_ERR(dumper)); else - schedule(); + wait_for_completion_interruptible(&debug_complete); - /* be sure to teardown if cfs_create_thread() failed */ - remove_wait_queue(&debug_ctlwq, &wait); - set_current_state(TASK_RUNNING); + mutex_unlock(&libcfs_debug_dumplog_lock); } EXPORT_SYMBOL(libcfs_debug_dumplog); +/* coverity[+kill] */ +void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msgdata) +{ + libcfs_catastrophe = 1; + libcfs_debug_msg(msgdata, "LBUG\n"); + + if (in_interrupt()) { + panic("LBUG in interrupt.\n"); + /* not reached */ + } + + libcfs_debug_dumpstack(NULL); + if (libcfs_panic_on_lbug) + panic("LBUG"); + else + libcfs_debug_dumplog(); + set_current_state(TASK_UNINTERRUPTIBLE); + while (1) + schedule(); +} +EXPORT_SYMBOL(lbug_with_loc); + +#ifdef CONFIG_STACKTRACE + +#ifndef HAVE_SAVE_STACK_TRACE_TSK +#define save_stack_trace_tsk(tsk, trace) \ +do { \ + if (tsk == current) \ + save_stack_trace(trace); \ + else \ + pr_info("No stack, save_stack_trace_tsk() not exported\n"); \ +} while (0) +#endif + +static void cfs_print_stack_trace(unsigned long *entries, unsigned int nr) +{ + unsigned int i; + + /* Prefer %pB for backtraced symbolic names since it was added in: + * Linux v2.6.38-6557-g0f77a8d37825 + * vsprintf: Introduce %pB format specifier + */ + for (i = 0; i < nr; i++) + pr_info("[<0>] %pB\n", (void *)entries[i]); +} + +#define MAX_ST_ENTRIES 100 +static DEFINE_SPINLOCK(st_lock); + +/* Linux v5.1-rc5 214d8ca6ee ("stacktrace: Provide common infrastructure") + * CONFIG_ARCH_STACKWALK indicates that save_stack_trace_tsk symbol is not + * exported. Use symbol_get() to find if save_stack_trace_tsk is available. + */ +#ifdef CONFIG_ARCH_STACKWALK +typedef unsigned int (stack_trace_save_tsk_t)(struct task_struct *task, + unsigned long *store, + unsigned int size, + unsigned int skipnr); +static stack_trace_save_tsk_t *task_dump_stack; +#endif + +void __init cfs_debug_init(void) +{ +#ifdef CONFIG_ARCH_STACKWALK + task_dump_stack = (void *) + kallsyms_lookup_name("stack_trace_save_tsk"); + +#endif +} + +static void libcfs_call_trace(struct task_struct *tsk) +{ + static unsigned long entries[MAX_ST_ENTRIES]; +#ifdef CONFIG_ARCH_STACKWALK + unsigned int nr_entries; + + spin_lock(&st_lock); + pr_info("Pid: %d, comm: %.20s %s %s\n", tsk->pid, tsk->comm, + init_utsname()->release, init_utsname()->version); + pr_info("Call Trace TBD:\n"); + if (task_dump_stack) { + nr_entries = task_dump_stack(tsk, entries, MAX_ST_ENTRIES, 0); + cfs_print_stack_trace(entries, nr_entries); + } + spin_unlock(&st_lock); +#else + struct stack_trace trace; + + trace.nr_entries = 0; + trace.max_entries = MAX_ST_ENTRIES; + trace.entries = entries; + trace.skip = 0; + + spin_lock(&st_lock); + pr_info("Pid: %d, comm: %.20s %s %s\n", tsk->pid, tsk->comm, + init_utsname()->release, init_utsname()->version); + pr_info("Call Trace:\n"); + save_stack_trace_tsk(tsk, &trace); + cfs_print_stack_trace(trace.entries, trace.nr_entries); + spin_unlock(&st_lock); +#endif +} + +#else /* !CONFIG_STACKTRACE */ + +#ifdef CONFIG_X86 +#include +#include + +#ifdef HAVE_STACKTRACE_OPS +static int print_trace_stack(void *data, char *name) +{ + printk(" <%s> ", name); + return 0; +} + +#ifdef STACKTRACE_OPS_ADDRESS_RETURN_INT +static int +#else +static void +#endif +print_trace_address(void *data, unsigned long addr, int reliable) +{ + char fmt[32]; + + touch_nmi_watchdog(); + sprintf(fmt, " [<%016lx>] %s%%s\n", addr, reliable ? "" : "? "); + __print_symbol(fmt, addr); +#ifdef STACKTRACE_OPS_ADDRESS_RETURN_INT + return 0; +#endif +} + +static const struct stacktrace_ops print_trace_ops = { + .stack = print_trace_stack, + .address = print_trace_address, + .walk_stack = print_context_stack, +}; +#endif /* HAVE_STACKTRACE_OPS */ + +static void libcfs_call_trace(struct task_struct *tsk) +{ +#ifdef HAVE_STACKTRACE_OPS + printk("Pid: %d, comm: %.20s\n", tsk->pid, tsk->comm); + printk("\nCall Trace:\n"); + dump_trace(tsk, NULL, NULL, 0, &print_trace_ops, NULL); + printk("\n"); +#else /* !HAVE_STACKTRACE_OPS */ + if (tsk == current) + dump_stack(); + else + CWARN("can't show stack: kernel doesn't export show_task\n"); +#endif /* HAVE_STACKTRACE_OPS */ +} + +#else /* !CONFIG_X86 */ + +static void libcfs_call_trace(struct task_struct *tsk) +{ + if (tsk == current) + dump_stack(); + else + CWARN("can't show stack: kernel doesn't export show_task\n"); +} + +#endif /* CONFIG_X86 */ + +#endif /* CONFIG_STACKTRACE */ + +void libcfs_debug_dumpstack(struct task_struct *tsk) +{ + libcfs_call_trace(tsk ?: current); +} +EXPORT_SYMBOL(libcfs_debug_dumpstack); + +static int panic_notifier(struct notifier_block *self, unsigned long unused1, + void *unused2) +{ + if (libcfs_panic_in_progress) + return 0; + + libcfs_panic_in_progress = 1; + mb(); + +#ifdef LNET_DUMP_ON_PANIC + /* This is currently disabled because it spews far too much to the + * console on the rare cases it is ever triggered. */ + + if (in_interrupt()) { + cfs_trace_debug_print(); + } else { + libcfs_debug_dumplog_internal((void *)(long)current->pid); + } +#endif + return 0; +} + +static struct notifier_block libcfs_panic_notifier = { + .notifier_call = panic_notifier, + .next = NULL, + .priority = 10000, +}; + +static void libcfs_register_panic_notifier(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &libcfs_panic_notifier); +} + +static void libcfs_unregister_panic_notifier(void) +{ + atomic_notifier_chain_unregister(&panic_notifier_list, + &libcfs_panic_notifier); +} + int libcfs_debug_init(unsigned long bufsize) { - int rc = 0; unsigned int max = libcfs_debug_mb; - - init_waitqueue_head(&debug_ctlwq); + int rc = 0; if (libcfs_console_max_delay <= 0 || /* not set by user or */ libcfs_console_min_delay <= 0 || /* set to invalid values */ @@ -456,21 +678,14 @@ int libcfs_debug_init(unsigned long bufsize) libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY; } - if (libcfs_debug_file_path != NULL) { - strlcpy(libcfs_debug_file_path_arr, - libcfs_debug_file_path, - sizeof(libcfs_debug_file_path_arr)); - } - - /* - * If libcfs_debug_mb is set to an invalid value or uninitialized - * then just make the total buffers smp_num_cpus * TCD_MAX_PAGES + /* If libcfs_debug_mb is uninitialized then just make the + * total buffers smp_num_cpus * TCD_MAX_PAGES */ - if (max > cfs_trace_max_debug_mb() || max < num_possible_cpus()) { + if (max < num_possible_cpus()) { max = TCD_MAX_PAGES; } else { max = (max / num_possible_cpus()); - max = (max << (20 - PAGE_SHIFT)); + max <<= (20 - PAGE_SHIFT); } rc = cfs_tracefile_init(max); @@ -500,20 +715,20 @@ int libcfs_debug_clear_buffer(void) return 0; } -/* - * Debug markers, although printed by S_LNET - * should not be be marked as such. - */ +/* Debug markers, although printed by S_LNET should not be be marked as such. */ #undef DEBUG_SUBSYSTEM #define DEBUG_SUBSYSTEM S_UNDEFINED int libcfs_debug_mark_buffer(const char *text) { - CDEBUG(D_TRACE, "**************************************************\n"); + CDEBUG(D_TRACE, + "**************************************************\n"); LCONSOLE(D_WARNING, "DEBUG MARKER: %s\n", text); - CDEBUG(D_TRACE, "**************************************************\n"); + CDEBUG(D_TRACE, + "**************************************************\n"); return 0; } + #undef DEBUG_SUBSYSTEM #define DEBUG_SUBSYSTEM S_LNET