Whamcloud - gitweb
LU-14550 libcfs: fix setting of debug_path
[fs/lustre-release.git] / libcfs / libcfs / debug.c
index 3a0c289..412a10c 100644 (file)
 
 # define DEBUG_SUBSYSTEM S_LNET
 
+#include <linux/module.h>
 #include <linux/ctype.h>
+#include <libcfs/libcfs_string.h>
 #include <linux/kthread.h>
-#include <libcfs/libcfs.h>
+#include <linux/stacktrace.h>
+#include <linux/utsname.h>
+#include <linux/kallsyms.h>
 #include "tracefile.h"
 
 static char debug_file_name[1024];
 
 unsigned int libcfs_subsystem_debug = ~0;
+EXPORT_SYMBOL(libcfs_subsystem_debug);
 module_param(libcfs_subsystem_debug, int, 0644);
 MODULE_PARM_DESC(libcfs_subsystem_debug, "Lustre kernel debug subsystem mask");
-EXPORT_SYMBOL(libcfs_subsystem_debug);
 
-unsigned int libcfs_debug = (D_CANTMASK |
-                            D_NETERROR | D_HA | D_CONFIG | D_IOCTL | D_LFSCK);
+unsigned int libcfs_debug = (D_CANTMASK | D_NETERROR | D_HA | D_CONFIG |
+                            D_IOCTL | D_LFSCK | D_TTY);
+EXPORT_SYMBOL(libcfs_debug);
 module_param(libcfs_debug, int, 0644);
 MODULE_PARM_DESC(libcfs_debug, "Lustre kernel debug mask");
-EXPORT_SYMBOL(libcfs_debug);
 
 static int libcfs_param_debug_mb_set(const char *val,
                                     cfs_kernel_param_arg_t *kp)
@@ -65,39 +69,22 @@ static int libcfs_param_debug_mb_set(const char *val,
        if (rc < 0)
                return rc;
 
-/*
- * RHEL6 does not support any kind of locking so we have to provide
- * our own
- */
-#if !defined(HAVE_MODULE_PARAM_LOCKING) && !defined(HAVE_KERNEL_PARAM_LOCK)
-       kernel_param_lock(THIS_MODULE);
-#endif
-       if (!*((unsigned int *)kp->arg)) {
-               *((unsigned int *)kp->arg) = num;
+       num = cfs_trace_set_debug_mb(num);
 
-#if !defined(HAVE_MODULE_PARAM_LOCKING) && !defined(HAVE_KERNEL_PARAM_LOCK)
-               kernel_param_unlock(THIS_MODULE);
-#endif
-               return 0;
-       }
-
-       rc = cfs_trace_set_debug_mb(num);
-
-       if (!rc)
-               *((unsigned int *)kp->arg) = cfs_trace_get_debug_mb();
+       *((unsigned int *)kp->arg) = num;
+       num = cfs_trace_get_debug_mb();
+       if (num)
+               /* This value is more precise */
+               *((unsigned int *)kp->arg) = num;
 
-#if !defined(HAVE_MODULE_PARAM_LOCKING) && !defined(HAVE_KERNEL_PARAM_LOCK)
-       kernel_param_unlock(THIS_MODULE);
-#endif
-       return rc;
+       return 0;
 }
 
-/*
- * While debug_mb setting look like unsigned int, in fact
+/* While debug_mb setting look like unsigned int, in fact
  * it needs quite a bunch of extra processing, so we define special
  * debug_mb parameter type with corresponding methods to handle this case
  */
-static struct kernel_param_ops param_ops_debug_mb = {
+static const struct kernel_param_ops param_ops_debug_mb = {
        .set = libcfs_param_debug_mb_set,
        .get = param_get_uint,
 };
@@ -148,7 +135,9 @@ static int param_get_delay(char *buffer, cfs_kernel_param_arg_t *kp)
 {
        unsigned int d = *(unsigned int *)kp->arg;
 
-       return sprintf(buffer, "%lu", jiffies_to_msecs(d * 10) / MSEC_PER_SEC);
+       param_get_byte(buffer, kp);
+       return sprintf(buffer, "%lu%c", jiffies_to_msecs(d * 10) / MSEC_PER_SEC,
+                      strnchr(buffer, PAGE_SIZE, '\n') ? '\n' : '\0');
 }
 
 unsigned int libcfs_console_max_delay;
@@ -161,7 +150,7 @@ static int param_set_console_max_delay(const char *val,
                                      libcfs_console_min_delay, INT_MAX);
 }
 
-static struct kernel_param_ops param_ops_console_max_delay = {
+static const struct kernel_param_ops param_ops_console_max_delay = {
        .set = param_set_console_max_delay,
        .get = param_get_delay,
 };
@@ -184,7 +173,7 @@ static int param_set_console_min_delay(const char *val,
                                      1, libcfs_console_max_delay);
 }
 
-static struct kernel_param_ops param_ops_console_min_delay = {
+static const struct kernel_param_ops param_ops_console_min_delay = {
        .set = param_set_console_min_delay,
        .get = param_get_delay,
 };
@@ -224,7 +213,7 @@ static int param_set_uintpos(const char *val,
        return param_set_uint_minmax(val, kp, 1, -1);
 }
 
-static struct kernel_param_ops param_ops_uintpos = {
+static const struct kernel_param_ops param_ops_uintpos = {
        .set = param_set_uintpos,
        .get = param_get_uint,
 };
@@ -256,29 +245,25 @@ unsigned int libcfs_panic_on_lbug = 1;
 module_param(libcfs_panic_on_lbug, uint, 0644);
 MODULE_PARM_DESC(libcfs_panic_on_lbug, "Lustre kernel panic on LBUG");
 
-atomic_t libcfs_kmemory = ATOMIC_INIT(0);
-EXPORT_SYMBOL(libcfs_kmemory);
+atomic64_t libcfs_kmem = ATOMIC64_INIT(0);
+EXPORT_SYMBOL(libcfs_kmem);
 
-static wait_queue_head_t debug_ctlwq;
-
-char libcfs_debug_file_path_arr[PATH_MAX] = LIBCFS_DEBUG_FILE_PATH_DEFAULT;
-EXPORT_SYMBOL(libcfs_debug_file_path_arr);
+static DECLARE_COMPLETION(debug_complete);
 
 /* We need to pass a pointer here, but elsewhere this must be a const */
-static char *libcfs_debug_file_path = LIBCFS_DEBUG_FILE_PATH_DEFAULT;
+char *libcfs_debug_file_path = LIBCFS_DEBUG_FILE_PATH_DEFAULT;
+EXPORT_SYMBOL(libcfs_debug_file_path);
 module_param(libcfs_debug_file_path, charp, 0644);
 MODULE_PARM_DESC(libcfs_debug_file_path,
                 "Path for dumping debug logs, set 'NONE' to prevent log dumping");
 
 int libcfs_panic_in_progress;
 
-/*
- * libcfs_debug_token2mask() expects the returned
- * string in lower-case
- */
+/* libcfs_debug_token2mask() expects the returned string in lower-case */
 static const char *libcfs_debug_subsys2str(int subsys)
 {
-       static const char *libcfs_debug_subsystems[] = LIBCFS_DEBUG_SUBSYS_NAMES;
+       static const char * const libcfs_debug_subsystems[] =
+               LIBCFS_DEBUG_SUBSYS_NAMES;
 
        if (subsys >= ARRAY_SIZE(libcfs_debug_subsystems))
                return NULL;
@@ -286,13 +271,11 @@ static const char *libcfs_debug_subsys2str(int subsys)
        return libcfs_debug_subsystems[subsys];
 }
 
-/*
- * libcfs_debug_token2mask() expects the returned
- * string in lower-case
- */
+/* libcfs_debug_token2mask() expects the returned string in lower-case */
 static const char *libcfs_debug_dbg2str(int debug)
 {
-       static const char *libcfs_debug_masks[] = LIBCFS_DEBUG_MASKS_NAMES;
+       static const char * const libcfs_debug_masks[] =
+               LIBCFS_DEBUG_MASKS_NAMES;
 
        if (debug >= ARRAY_SIZE(libcfs_debug_masks))
                return NULL;
@@ -315,11 +298,11 @@ libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys)
                len = 1;
        } else {                                /* space-separated tokens */
                for (i = 0; i < 32; i++) {
-                       if ((mask & (1 << i)) == 0)
+                       if ((mask & BIT(i)) == 0)
                                continue;
 
                        token = fn(i);
-                       if (token == NULL)      /* unused bit */
+                       if (!token)     /* unused bit */
                                continue;
 
                        if (len > 0) {          /* separator? */
@@ -361,14 +344,11 @@ libcfs_debug_str2mask(int *mask, const char *str, int is_subsys)
                if (!isspace(str[n-1]))
                        break;
        matched = n;
-
        t = sscanf(str, "%i%n", &m, &matched);
        if (t >= 1 && matched == n) {
                /* don't print warning for lctl set_param debug=0 or -1 */
                if (m != 0 && m != -1)
-                       CWARN("You are trying to use a numerical value for the "
-                             "mask - this will be deprecated in a future "
-                             "release.\n");
+                       CWARN("You are trying to use a numerical value for the mask - this will be deprecated in a future release.\n");
                *mask = m;
                return 0;
        }
@@ -377,77 +357,319 @@ libcfs_debug_str2mask(int *mask, const char *str, int is_subsys)
                            0xffffffff);
 }
 
+char lnet_debug_log_upcall[1024] = "/usr/lib/lustre/lnet_debug_log_upcall";
+
+/* Upcall function once a Lustre log has been dumped.
+ *
+ * @file       path of the dumped log
+ */
+static void libcfs_run_debug_log_upcall(char *file)
+{
+       char *argv[3];
+       int rc;
+       static const char * const envp[] = {
+               "HOME=/",
+               "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
+               NULL
+       };
+
+       ENTRY;
+       argv[0] = lnet_debug_log_upcall;
+
+       LASSERTF(file, "called on a null filename\n");
+       argv[1] = file; /* only need to pass the path of the file */
+
+       argv[2] = NULL;
+
+       rc = call_usermodehelper(argv[0], argv, (char **)envp, 1);
+       if (rc < 0 && rc != -ENOENT) {
+               CERROR("Error %d invoking LNET debug log upcall %s %s; check /sys/kernel/debug/lnet/debug_log_upcall\n",
+                      rc, argv[0], argv[1]);
+       } else {
+               CDEBUG(D_HA, "Invoked LNET debug log upcall %s %s\n",
+                      argv[0], argv[1]);
+       }
+}
+
 /**
  * Dump Lustre log to ::debug_file_path by calling tracefile_dump_all_pages()
  */
-void libcfs_debug_dumplog_internal(void *arg)
+static void libcfs_debug_dumplog_internal(void *arg)
 {
        static time64_t last_dump_time;
        time64_t current_time;
-       void *journal_info;
 
-       journal_info = current->journal_info;
-       current->journal_info = NULL;
        current_time = ktime_get_real_seconds();
 
-       if (strncmp(libcfs_debug_file_path_arr, "NONE", 4) != 0 &&
+       if (strncmp(libcfs_debug_file_path, "NONE", 4) != 0 &&
            current_time > last_dump_time) {
                last_dump_time = current_time;
                snprintf(debug_file_name, sizeof(debug_file_name) - 1,
-                        "%s.%lld.%ld", libcfs_debug_file_path_arr,
+                        "%s.%lld.%ld", libcfs_debug_file_path,
                         (s64)current_time, (uintptr_t)arg);
-               printk(KERN_ALERT "LustreError: dumping log to %s\n",
-                      debug_file_name);
+               pr_alert("LustreError: dumping log to %s\n", debug_file_name);
                cfs_tracefile_dump_all_pages(debug_file_name);
                libcfs_run_debug_log_upcall(debug_file_name);
        }
-       current->journal_info = journal_info;
 }
 
 static int libcfs_debug_dumplog_thread(void *arg)
 {
        libcfs_debug_dumplog_internal(arg);
-       wake_up(&debug_ctlwq);
+       complete(&debug_complete);
        return 0;
 }
 
+static DEFINE_MUTEX(libcfs_debug_dumplog_lock);
+
 void libcfs_debug_dumplog(void)
 {
-       wait_queue_entry_t wait;
        struct task_struct *dumper;
 
        ENTRY;
 
-       /*
-        * we're being careful to ensure that the kernel thread is
-        * able to set our state to running as it exits before we
-        * get to schedule()
-        */
-       init_waitqueue_entry(&wait, current);
-       set_current_state(TASK_INTERRUPTIBLE);
-       add_wait_queue(&debug_ctlwq, &wait);
+       if (mutex_trylock(&libcfs_debug_dumplog_lock) == 0)
+               return;
 
+       /* If a previous call was interrupted, debug_complete->done
+        * might be elevated, and so we won't actually wait here.
+        * So we reinit the completion to ensure we wait for
+        * one thread to complete, though it might not be the one
+        * we start if there are overlaping thread.
+        */
+       reinit_completion(&debug_complete);
        dumper = kthread_run(libcfs_debug_dumplog_thread,
-                            (void *)(long)current_pid(),
+                            (void *)(long)current->pid,
                             "libcfs_debug_dumper");
        if (IS_ERR(dumper))
-               printk(KERN_ERR "LustreError: cannot start log dump thread:"
-                      " %ld\n", PTR_ERR(dumper));
+               pr_err("LustreError: cannot start log dump thread: rc = %ld\n",
+                      PTR_ERR(dumper));
        else
-               schedule();
+               wait_for_completion_interruptible(&debug_complete);
 
-       /* be sure to teardown if cfs_create_thread() failed */
-       remove_wait_queue(&debug_ctlwq, &wait);
-       set_current_state(TASK_RUNNING);
+       mutex_unlock(&libcfs_debug_dumplog_lock);
 }
 EXPORT_SYMBOL(libcfs_debug_dumplog);
 
+/* coverity[+kill] */
+void __noreturn lbug_with_loc(struct libcfs_debug_msg_data *msgdata)
+{
+       libcfs_catastrophe = 1;
+       libcfs_debug_msg(msgdata, "LBUG\n");
+
+       if (in_interrupt()) {
+               panic("LBUG in interrupt.\n");
+               /* not reached */
+       }
+
+       libcfs_debug_dumpstack(NULL);
+       if (libcfs_panic_on_lbug)
+               panic("LBUG");
+       else
+               libcfs_debug_dumplog();
+       set_current_state(TASK_UNINTERRUPTIBLE);
+       while (1)
+               schedule();
+}
+EXPORT_SYMBOL(lbug_with_loc);
+
+#ifdef CONFIG_STACKTRACE
+
+#ifndef HAVE_SAVE_STACK_TRACE_TSK
+#define save_stack_trace_tsk(tsk, trace)                                      \
+do {                                                                          \
+       if (tsk == current)                                                    \
+               save_stack_trace(trace);                                       \
+       else                                                                   \
+               pr_info("No stack, save_stack_trace_tsk() not exported\n");    \
+} while (0)
+#endif
+
+static void cfs_print_stack_trace(unsigned long *entries, unsigned int nr)
+{
+       unsigned int i;
+
+       /* Prefer %pB for backtraced symbolic names since it was added in:
+        * Linux v2.6.38-6557-g0f77a8d37825
+        * vsprintf: Introduce %pB format specifier
+        */
+       for (i = 0; i < nr; i++)
+               pr_info("[<0>] %pB\n", (void *)entries[i]);
+}
+
+#define MAX_ST_ENTRIES 100
+static DEFINE_SPINLOCK(st_lock);
+
+/* Linux v5.1-rc5 214d8ca6ee ("stacktrace: Provide common infrastructure")
+ * CONFIG_ARCH_STACKWALK indicates that save_stack_trace_tsk symbol is not
+ * exported. Use symbol_get() to find if save_stack_trace_tsk is available.
+ */
+#ifdef CONFIG_ARCH_STACKWALK
+typedef unsigned int (stack_trace_save_tsk_t)(struct task_struct *task,
+                                             unsigned long *store,
+                                             unsigned int size,
+                                             unsigned int skipnr);
+static stack_trace_save_tsk_t *task_dump_stack;
+#endif
+
+void __init cfs_debug_init(void)
+{
+#ifdef CONFIG_ARCH_STACKWALK
+       task_dump_stack = (void *)
+                       kallsyms_lookup_name("stack_trace_save_tsk");
+
+#endif
+}
+
+static void libcfs_call_trace(struct task_struct *tsk)
+{
+       static unsigned long entries[MAX_ST_ENTRIES];
+#ifdef CONFIG_ARCH_STACKWALK
+       unsigned int nr_entries;
+
+       spin_lock(&st_lock);
+       pr_info("Pid: %d, comm: %.20s %s %s\n", tsk->pid, tsk->comm,
+               init_utsname()->release, init_utsname()->version);
+       pr_info("Call Trace TBD:\n");
+       if (task_dump_stack) {
+               nr_entries = task_dump_stack(tsk, entries, MAX_ST_ENTRIES, 0);
+               cfs_print_stack_trace(entries, nr_entries);
+       }
+       spin_unlock(&st_lock);
+#else
+       struct stack_trace trace;
+
+       trace.nr_entries = 0;
+       trace.max_entries = MAX_ST_ENTRIES;
+       trace.entries = entries;
+       trace.skip = 0;
+
+       spin_lock(&st_lock);
+       pr_info("Pid: %d, comm: %.20s %s %s\n", tsk->pid, tsk->comm,
+               init_utsname()->release, init_utsname()->version);
+       pr_info("Call Trace:\n");
+       save_stack_trace_tsk(tsk, &trace);
+       cfs_print_stack_trace(trace.entries, trace.nr_entries);
+       spin_unlock(&st_lock);
+#endif
+}
+
+#else /* !CONFIG_STACKTRACE */
+
+#ifdef CONFIG_X86
+#include <linux/nmi.h>
+#include <asm/stacktrace.h>
+
+#ifdef HAVE_STACKTRACE_OPS
+static int print_trace_stack(void *data, char *name)
+{
+       printk(" <%s> ", name);
+       return 0;
+}
+
+#ifdef STACKTRACE_OPS_ADDRESS_RETURN_INT
+static int
+#else
+static void
+#endif
+print_trace_address(void *data, unsigned long addr, int reliable)
+{
+       char fmt[32];
+
+       touch_nmi_watchdog();
+       sprintf(fmt, " [<%016lx>] %s%%s\n", addr, reliable ? "" : "? ");
+       __print_symbol(fmt, addr);
+#ifdef STACKTRACE_OPS_ADDRESS_RETURN_INT
+       return 0;
+#endif
+}
+
+static const struct stacktrace_ops print_trace_ops = {
+       .stack          = print_trace_stack,
+       .address        = print_trace_address,
+       .walk_stack     = print_context_stack,
+};
+#endif /* HAVE_STACKTRACE_OPS */
+
+static void libcfs_call_trace(struct task_struct *tsk)
+{
+#ifdef HAVE_STACKTRACE_OPS
+       printk("Pid: %d, comm: %.20s\n", tsk->pid, tsk->comm);
+       printk("\nCall Trace:\n");
+       dump_trace(tsk, NULL, NULL, 0, &print_trace_ops, NULL);
+       printk("\n");
+#else /* !HAVE_STACKTRACE_OPS */
+       if (tsk == current)
+               dump_stack();
+       else
+               CWARN("can't show stack: kernel doesn't export show_task\n");
+#endif /* HAVE_STACKTRACE_OPS */
+}
+
+#else /* !CONFIG_X86 */
+
+static void libcfs_call_trace(struct task_struct *tsk)
+{
+       if (tsk == current)
+               dump_stack();
+       else
+               CWARN("can't show stack: kernel doesn't export show_task\n");
+}
+
+#endif /* CONFIG_X86 */
+
+#endif /* CONFIG_STACKTRACE */
+
+void libcfs_debug_dumpstack(struct task_struct *tsk)
+{
+       libcfs_call_trace(tsk ?: current);
+}
+EXPORT_SYMBOL(libcfs_debug_dumpstack);
+
+static int panic_notifier(struct notifier_block *self, unsigned long unused1,
+                         void *unused2)
+{
+       if (libcfs_panic_in_progress)
+               return 0;
+
+       libcfs_panic_in_progress = 1;
+       mb();
+
+#ifdef LNET_DUMP_ON_PANIC
+       /* This is currently disabled because it spews far too much to the
+        * console on the rare cases it is ever triggered. */
+
+       if (in_interrupt()) {
+               cfs_trace_debug_print();
+       } else {
+               libcfs_debug_dumplog_internal((void *)(long)current->pid);
+       }
+#endif
+       return 0;
+}
+
+static struct notifier_block libcfs_panic_notifier = {
+       .notifier_call          = panic_notifier,
+       .next                   = NULL,
+       .priority               = 10000,
+};
+
+static void libcfs_register_panic_notifier(void)
+{
+       atomic_notifier_chain_register(&panic_notifier_list,
+                                      &libcfs_panic_notifier);
+}
+
+static void libcfs_unregister_panic_notifier(void)
+{
+       atomic_notifier_chain_unregister(&panic_notifier_list,
+                                        &libcfs_panic_notifier);
+}
+
 int libcfs_debug_init(unsigned long bufsize)
 {
-       int rc = 0;
        unsigned int max = libcfs_debug_mb;
-
-       init_waitqueue_head(&debug_ctlwq);
+       int rc = 0;
 
        if (libcfs_console_max_delay <= 0 || /* not set by user or */
            libcfs_console_min_delay <= 0 || /* set to invalid values */
@@ -456,21 +678,14 @@ int libcfs_debug_init(unsigned long bufsize)
                libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY;
        }
 
-       if (libcfs_debug_file_path != NULL) {
-               strlcpy(libcfs_debug_file_path_arr,
-                       libcfs_debug_file_path,
-                       sizeof(libcfs_debug_file_path_arr));
-       }
-
-       /*
-        * If libcfs_debug_mb is set to an invalid value or uninitialized
-        * then just make the total buffers smp_num_cpus * TCD_MAX_PAGES
+       /* If libcfs_debug_mb is uninitialized then just make the
+        * total buffers smp_num_cpus * TCD_MAX_PAGES
         */
-       if (max > cfs_trace_max_debug_mb() || max < num_possible_cpus()) {
+       if (max < num_possible_cpus()) {
                max = TCD_MAX_PAGES;
        } else {
                max = (max / num_possible_cpus());
-               max = (max << (20 - PAGE_SHIFT));
+               max <<= (20 - PAGE_SHIFT);
        }
 
        rc = cfs_tracefile_init(max);
@@ -500,20 +715,20 @@ int libcfs_debug_clear_buffer(void)
        return 0;
 }
 
-/*
- * Debug markers, although printed by S_LNET
- * should not be be marked as such.
- */
+/* Debug markers, although printed by S_LNET should not be be marked as such. */
 #undef DEBUG_SUBSYSTEM
 #define DEBUG_SUBSYSTEM S_UNDEFINED
 int libcfs_debug_mark_buffer(const char *text)
 {
-       CDEBUG(D_TRACE, "**************************************************\n");
+       CDEBUG(D_TRACE,
+              "**************************************************\n");
        LCONSOLE(D_WARNING, "DEBUG MARKER: %s\n", text);
-       CDEBUG(D_TRACE, "**************************************************\n");
+       CDEBUG(D_TRACE,
+              "**************************************************\n");
 
        return 0;
 }
+
 #undef DEBUG_SUBSYSTEM
 #define DEBUG_SUBSYSTEM S_LNET