4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2017, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
31 * libcfs/libcfs/debug.c
33 * Author: Phil Schwan <phil@clusterfs.com>
37 # define DEBUG_SUBSYSTEM S_LNET
39 #include <linux/module.h>
40 #include <linux/ctype.h>
41 #include <libcfs/libcfs_string.h>
42 #include <linux/kthread.h>
43 #include <linux/stacktrace.h>
44 #include <linux/utsname.h>
45 #include <linux/kallsyms.h>
46 #ifdef HAVE_PANIC_NOTIFIER_H
47 #include <linux/panic_notifier.h>
49 #include "tracefile.h"
51 static char debug_file_name[1024];
53 unsigned int libcfs_subsystem_debug = LIBCFS_S_DEFAULT;
54 EXPORT_SYMBOL(libcfs_subsystem_debug);
55 module_param(libcfs_subsystem_debug, int, 0644);
56 MODULE_PARM_DESC(libcfs_subsystem_debug, "Lustre kernel debug subsystem mask");
58 unsigned int libcfs_debug = LIBCFS_D_DEFAULT;
59 EXPORT_SYMBOL(libcfs_debug);
60 module_param(libcfs_debug, int, 0644);
61 MODULE_PARM_DESC(libcfs_debug, "Lustre kernel debug mask");
63 static int libcfs_param_debug_mb_set(const char *val,
64 cfs_kernel_param_arg_t *kp)
69 rc = kstrtouint(val, 0, &num);
73 num = cfs_trace_set_debug_mb(num);
75 *((unsigned int *)kp->arg) = num;
76 num = cfs_trace_get_debug_mb();
78 /* This value is more precise */
79 *((unsigned int *)kp->arg) = num;
84 /* While debug_mb setting look like unsigned int, in fact
85 * it needs quite a bunch of extra processing, so we define special
86 * debug_mb parameter type with corresponding methods to handle this case
88 static const struct kernel_param_ops param_ops_debug_mb = {
89 .set = libcfs_param_debug_mb_set,
90 .get = param_get_uint,
93 #define param_check_debug_mb(name, p) \
94 __param_check(name, p, unsigned int)
96 static unsigned int libcfs_debug_mb;
97 #ifdef HAVE_KERNEL_PARAM_OPS
98 module_param(libcfs_debug_mb, debug_mb, 0644);
100 module_param_call(libcfs_debug_mb, libcfs_param_debug_mb_set, param_get_uint,
101 ¶m_ops_debug_mb, 0644);
103 MODULE_PARM_DESC(libcfs_debug_mb, "Total debug buffer size.");
105 unsigned int libcfs_printk = D_CANTMASK;
106 module_param(libcfs_printk, uint, 0644);
107 MODULE_PARM_DESC(libcfs_printk, "Lustre kernel debug console mask");
109 unsigned int libcfs_console_ratelimit = 1;
110 module_param(libcfs_console_ratelimit, uint, 0644);
111 MODULE_PARM_DESC(libcfs_console_ratelimit, "Lustre kernel debug console ratelimit (0 to disable)");
113 static int param_set_delay_minmax(const char *val,
114 cfs_kernel_param_arg_t *kp,
121 rc = kstrtoint(val, 0, &sec);
125 /* The sysfs setting is in centiseconds */
126 d = cfs_time_seconds(sec) / 100;
127 if (d < min || d > max)
130 *((unsigned int *)kp->arg) = d;
135 static int param_get_delay(char *buffer, cfs_kernel_param_arg_t *kp)
137 unsigned int d = *(unsigned int *)kp->arg;
139 param_get_byte(buffer, kp);
140 return sprintf(buffer, "%lu%c", jiffies_to_msecs(d * 10) / MSEC_PER_SEC,
141 strnchr(buffer, PAGE_SIZE, '\n') ? '\n' : '\0');
144 unsigned int libcfs_console_max_delay;
145 unsigned int libcfs_console_min_delay;
147 static int param_set_console_max_delay(const char *val,
148 cfs_kernel_param_arg_t *kp)
150 return param_set_delay_minmax(val, kp,
151 libcfs_console_min_delay, INT_MAX);
154 static const struct kernel_param_ops param_ops_console_max_delay = {
155 .set = param_set_console_max_delay,
156 .get = param_get_delay,
159 #define param_check_console_max_delay(name, p) \
160 __param_check(name, p, unsigned int)
162 #ifdef HAVE_KERNEL_PARAM_OPS
163 module_param(libcfs_console_max_delay, console_max_delay, 0644);
165 module_param_call(libcfs_console_max_delay, param_set_console_max_delay,
166 param_get_delay, ¶m_ops_console_max_delay, 0644);
168 MODULE_PARM_DESC(libcfs_console_max_delay, "Lustre kernel debug console max delay (jiffies)");
170 static int param_set_console_min_delay(const char *val,
171 cfs_kernel_param_arg_t *kp)
173 return param_set_delay_minmax(val, kp,
174 1, libcfs_console_max_delay);
177 static const struct kernel_param_ops param_ops_console_min_delay = {
178 .set = param_set_console_min_delay,
179 .get = param_get_delay,
182 #define param_check_console_min_delay(name, p) \
183 __param_check(name, p, unsigned int)
185 #ifdef HAVE_KERNEL_PARAM_OPS
186 module_param(libcfs_console_min_delay, console_min_delay, 0644);
188 module_param_call(libcfs_console_min_delay, param_set_console_min_delay,
189 param_get_delay, ¶m_ops_console_min_delay, 0644);
191 MODULE_PARM_DESC(libcfs_console_min_delay, "Lustre kernel debug console min delay (jiffies)");
193 #ifndef HAVE_PARAM_SET_UINT_MINMAX
194 static int param_set_uint_minmax(const char *val,
195 cfs_kernel_param_arg_t *kp,
196 unsigned int min, unsigned int max)
204 ret = kstrtouint(val, 0, &num);
205 if (ret < 0 || num < min || num > max)
208 *((unsigned int *)kp->arg) = num;
213 static int param_set_uintpos(const char *val,
214 cfs_kernel_param_arg_t *kp)
216 return param_set_uint_minmax(val, kp, 1, -1);
219 static const struct kernel_param_ops param_ops_uintpos = {
220 .set = param_set_uintpos,
221 .get = param_get_uint,
224 #define param_check_uintpos(name, p) \
225 __param_check(name, p, unsigned int)
227 unsigned int libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF;
228 #ifdef HAVE_KERNEL_PARAM_OPS
229 module_param(libcfs_console_backoff, uintpos, 0644);
231 module_param_call(libcfs_console_backoff, param_set_uintpos, param_get_uint,
232 ¶m_ops_uintpos, 0644);
234 MODULE_PARM_DESC(libcfs_console_backoff, "Lustre kernel debug console backoff factor");
236 unsigned int libcfs_debug_binary = 1;
238 unsigned int libcfs_catastrophe;
239 EXPORT_SYMBOL(libcfs_catastrophe);
241 unsigned int libcfs_watchdog_ratelimit = 300;
242 EXPORT_SYMBOL(libcfs_watchdog_ratelimit);
244 unsigned int libcfs_panic_on_lbug = 1;
245 module_param(libcfs_panic_on_lbug, uint, 0644);
246 MODULE_PARM_DESC(libcfs_panic_on_lbug, "Lustre kernel panic on LBUG");
248 atomic64_t libcfs_kmem = ATOMIC64_INIT(0);
249 EXPORT_SYMBOL(libcfs_kmem);
251 static DECLARE_COMPLETION(debug_complete);
253 /* We need to pass a pointer here, but elsewhere this must be a const */
254 char *libcfs_debug_file_path = LIBCFS_DEBUG_FILE_PATH_DEFAULT;
255 EXPORT_SYMBOL(libcfs_debug_file_path);
256 module_param(libcfs_debug_file_path, charp, 0644);
257 MODULE_PARM_DESC(libcfs_debug_file_path,
258 "Path for dumping debug logs, set 'NONE' to prevent log dumping");
260 int libcfs_panic_in_progress;
262 /* libcfs_debug_token2mask() expects the returned string in lower-case */
263 static const char *libcfs_debug_subsys2str(int subsys)
265 static const char *const libcfs_debug_subsystems[] =
266 LIBCFS_DEBUG_SUBSYS_NAMES;
268 if (subsys >= ARRAY_SIZE(libcfs_debug_subsystems))
271 return libcfs_debug_subsystems[subsys];
274 /* libcfs_debug_token2mask() expects the returned string in lower-case */
275 static const char *libcfs_debug_dbg2str(int debug)
277 static const char * const libcfs_debug_masks[] =
278 LIBCFS_DEBUG_MASKS_NAMES;
280 if (debug >= ARRAY_SIZE(libcfs_debug_masks))
283 return libcfs_debug_masks[debug];
286 int libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys)
288 const char *(*bit2str)(int bit) = is_subsys ? libcfs_debug_subsys2str :
289 libcfs_debug_dbg2str;
291 return cfs_mask2str(str, size, mask, bit2str, ' ');
294 int libcfs_debug_str2mask(int *mask, const char *str, int is_subsys)
296 const char *(*bit2str)(int bit) = is_subsys ? libcfs_debug_subsys2str :
297 libcfs_debug_dbg2str;
304 /* Allow a number for backwards compatibility */
305 for (n = strlen(str); n > 0; n--)
306 if (!isspace(str[n - 1]))
309 t = sscanf(str, "%i%n", &m, &matched);
310 if (t >= 1 && matched == n) {
311 /* don't print warning for lctl set_param debug=0 or -1 */
312 if (m != 0 && m != -1)
313 CWARN("using a numerical debug mask is deprecated\n");
318 rc = cfs_str2mask(str, bit2str, &newmask, is_subsys ? 0 : D_CANTMASK,
319 ~0, is_subsys ? LIBCFS_S_DEFAULT : LIBCFS_D_DEFAULT);
326 char lnet_debug_log_upcall[1024] = "/usr/lib/lustre/lnet_debug_log_upcall";
328 /* Upcall function once a Lustre log has been dumped.
330 * @file path of the dumped log
332 static void libcfs_run_debug_log_upcall(char *file)
336 static const char * const envp[] = {
338 "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
343 argv[0] = lnet_debug_log_upcall;
345 LASSERTF(file, "called on a null filename\n");
346 argv[1] = file; /* only need to pass the path of the file */
350 rc = call_usermodehelper(argv[0], argv, (char **)envp, 1);
351 if (rc < 0 && rc != -ENOENT) {
352 CERROR("Error %d invoking LNET debug log upcall %s %s; check /sys/kernel/debug/lnet/debug_log_upcall\n",
353 rc, argv[0], argv[1]);
355 CDEBUG(D_HA, "Invoked LNET debug log upcall %s %s\n",
361 * Dump Lustre log to ::debug_file_path by calling tracefile_dump_all_pages()
363 static void libcfs_debug_dumplog_internal(void *arg)
365 static time64_t last_dump_time;
366 time64_t current_time;
368 current_time = ktime_get_real_seconds();
370 if (strncmp(libcfs_debug_file_path, "NONE", 4) != 0 &&
371 current_time > last_dump_time) {
372 last_dump_time = current_time;
373 snprintf(debug_file_name, sizeof(debug_file_name) - 1,
374 "%s.%lld.%ld", libcfs_debug_file_path,
375 (s64)current_time, (uintptr_t)arg);
376 pr_alert("LustreError: dumping log to %s\n", debug_file_name);
377 cfs_tracefile_dump_all_pages(debug_file_name);
378 libcfs_run_debug_log_upcall(debug_file_name);
382 static int libcfs_debug_dumplog_thread(void *arg)
384 libcfs_debug_dumplog_internal(arg);
385 complete(&debug_complete);
389 static DEFINE_MUTEX(libcfs_debug_dumplog_lock);
391 void libcfs_debug_dumplog(void)
393 struct task_struct *dumper;
397 if (mutex_trylock(&libcfs_debug_dumplog_lock) == 0)
400 /* If a previous call was interrupted, debug_complete->done
401 * might be elevated, and so we won't actually wait here.
402 * So we reinit the completion to ensure we wait for
403 * one thread to complete, though it might not be the one
404 * we start if there are overlaping thread.
406 reinit_completion(&debug_complete);
407 dumper = kthread_run(libcfs_debug_dumplog_thread,
408 (void *)(long)current->pid,
409 "libcfs_debug_dumper");
411 pr_err("LustreError: cannot start log dump thread: rc = %ld\n",
414 wait_for_completion_interruptible(&debug_complete);
416 mutex_unlock(&libcfs_debug_dumplog_lock);
418 EXPORT_SYMBOL(libcfs_debug_dumplog);
420 void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msgdata)
422 libcfs_catastrophe = 1;
423 libcfs_debug_msg(msgdata, "LBUG\n");
425 if (in_interrupt()) {
426 panic("LBUG in interrupt.\n");
431 if (libcfs_panic_on_lbug)
434 libcfs_debug_dumplog();
435 set_current_state(TASK_UNINTERRUPTIBLE);
439 EXPORT_SYMBOL(lbug_with_loc);
441 #ifdef CONFIG_STACKTRACE
443 #ifndef HAVE_SAVE_STACK_TRACE_TSK
444 #define save_stack_trace_tsk(tsk, trace) \
446 if (tsk == current) \
447 save_stack_trace(trace); \
449 pr_info("No stack, save_stack_trace_tsk() not exported\n"); \
453 static void cfs_print_stack_trace(unsigned long *entries, unsigned int nr)
457 /* Prefer %pB for backtraced symbolic names since it was added in:
458 * Linux v2.6.38-6557-g0f77a8d37825
459 * vsprintf: Introduce %pB format specifier
461 for (i = 0; i < nr; i++)
462 pr_info("[<0>] %pB\n", (void *)entries[i]);
465 #define MAX_ST_ENTRIES 100
466 static DEFINE_SPINLOCK(st_lock);
468 static void libcfs_call_trace(struct task_struct *tsk)
470 static unsigned long entries[MAX_ST_ENTRIES];
471 #ifdef CONFIG_ARCH_STACKWALK
472 unsigned int nr_entries;
475 pr_info("Pid: %d, comm: %.20s %s %s\n", tsk->pid, tsk->comm,
476 init_utsname()->release, init_utsname()->version);
477 pr_info("Call Trace TBD:\n");
478 nr_entries = cfs_stack_trace_save_tsk(tsk, entries, MAX_ST_ENTRIES, 0);
479 cfs_print_stack_trace(entries, nr_entries);
480 spin_unlock(&st_lock);
482 struct stack_trace trace;
484 trace.nr_entries = 0;
485 trace.max_entries = MAX_ST_ENTRIES;
486 trace.entries = entries;
490 pr_info("Pid: %d, comm: %.20s %s %s\n", tsk->pid, tsk->comm,
491 init_utsname()->release, init_utsname()->version);
492 pr_info("Call Trace:\n");
493 save_stack_trace_tsk(tsk, &trace);
494 cfs_print_stack_trace(trace.entries, trace.nr_entries);
495 spin_unlock(&st_lock);
499 #else /* !CONFIG_STACKTRACE */
502 #include <linux/nmi.h>
503 #include <asm/stacktrace.h>
505 #ifdef HAVE_STACKTRACE_OPS
506 static int print_trace_stack(void *data, char *name)
508 printk(" <%s> ", name);
512 #ifdef STACKTRACE_OPS_ADDRESS_RETURN_INT
517 print_trace_address(void *data, unsigned long addr, int reliable)
521 touch_nmi_watchdog();
522 sprintf(fmt, " [<%016lx>] %s%%s\n", addr, reliable ? "" : "? ");
523 __print_symbol(fmt, addr);
524 #ifdef STACKTRACE_OPS_ADDRESS_RETURN_INT
529 static const struct stacktrace_ops print_trace_ops = {
530 .stack = print_trace_stack,
531 .address = print_trace_address,
532 .walk_stack = print_context_stack,
534 #endif /* HAVE_STACKTRACE_OPS */
536 static void libcfs_call_trace(struct task_struct *tsk)
538 #ifdef HAVE_STACKTRACE_OPS
539 printk("Pid: %d, comm: %.20s\n", tsk->pid, tsk->comm);
540 printk("\nCall Trace:\n");
541 dump_trace(tsk, NULL, NULL, 0, &print_trace_ops, NULL);
543 #else /* !HAVE_STACKTRACE_OPS */
547 CWARN("can't show stack: kernel doesn't export show_task\n");
548 #endif /* HAVE_STACKTRACE_OPS */
551 #else /* !CONFIG_X86 */
553 static void libcfs_call_trace(struct task_struct *tsk)
558 CWARN("can't show stack: kernel doesn't export show_task\n");
561 #endif /* CONFIG_X86 */
563 #endif /* CONFIG_STACKTRACE */
565 void libcfs_debug_dumpstack(struct task_struct *tsk)
567 libcfs_call_trace(tsk ?: current);
569 EXPORT_SYMBOL(libcfs_debug_dumpstack);
571 static int panic_notifier(struct notifier_block *self, unsigned long unused1,
574 if (libcfs_panic_in_progress)
577 libcfs_panic_in_progress = 1;
580 #ifdef LNET_DUMP_ON_PANIC
581 /* This is currently disabled because it spews far too much to the
582 * console on the rare cases it is ever triggered. */
584 if (in_interrupt()) {
585 cfs_trace_debug_print();
587 libcfs_debug_dumplog_internal((void *)(long)current->pid);
593 static struct notifier_block libcfs_panic_notifier = {
594 .notifier_call = panic_notifier,
599 static void libcfs_register_panic_notifier(void)
601 atomic_notifier_chain_register(&panic_notifier_list,
602 &libcfs_panic_notifier);
605 static void libcfs_unregister_panic_notifier(void)
607 atomic_notifier_chain_unregister(&panic_notifier_list,
608 &libcfs_panic_notifier);
611 static bool debug_started;
613 int libcfs_debug_init(unsigned long bufsize)
615 unsigned int max = libcfs_debug_mb;
621 debug_started = true;
622 if (libcfs_console_max_delay <= 0 || /* not set by user or */
623 libcfs_console_min_delay <= 0 || /* set to invalid values */
624 libcfs_console_min_delay >= libcfs_console_max_delay) {
625 libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY;
626 libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY;
629 /* If libcfs_debug_mb is uninitialized then just make the
630 * total buffers smp_num_cpus * TCD_MAX_PAGES
632 if (max < num_possible_cpus())
635 max <<= (20 - PAGE_SHIFT);
637 rc = cfs_tracefile_init(max);
641 libcfs_register_panic_notifier();
642 kernel_param_lock(THIS_MODULE);
643 libcfs_debug_mb = cfs_trace_get_debug_mb();
644 kernel_param_unlock(THIS_MODULE);
648 int libcfs_debug_cleanup(void)
650 libcfs_unregister_panic_notifier();
651 kernel_param_lock(THIS_MODULE);
652 cfs_tracefile_exit();
653 kernel_param_unlock(THIS_MODULE);
654 debug_started = false;
658 int libcfs_debug_clear_buffer(void)
660 cfs_trace_flush_pages();
664 /* Debug markers, although printed by S_LNET should not be be marked as such. */
665 #undef DEBUG_SUBSYSTEM
666 #define DEBUG_SUBSYSTEM S_UNDEFINED
667 int libcfs_debug_mark_buffer(const char *text)
670 "**************************************************\n");
671 LCONSOLE(D_WARNING, "DEBUG MARKER: %s\n", text);
673 "**************************************************\n");
678 #undef DEBUG_SUBSYSTEM
679 #define DEBUG_SUBSYSTEM S_LNET