X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lnet%2Flibcfs%2Fdebug.c;h=9f0ce91cdcb72ac2f46709abe2daf4ba69135f45;hb=91169ca2cc221888bb05e6f61395521bdbe24707;hp=c56f76ff39f6d8ed24c8e3c78c0aba1b18f5f713;hpb=191061ee668400324f4505cf498f1ee2d57e4962;p=fs%2Flustre-release.git diff --git a/lnet/libcfs/debug.c b/lnet/libcfs/debug.c index c56f76f..9f0ce91 100644 --- a/lnet/libcfs/debug.c +++ b/lnet/libcfs/debug.c @@ -24,47 +24,19 @@ # define EXPORT_SYMTAB #endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - # define DEBUG_SUBSYSTEM S_PORTALS -#include -#include -#include +#include +#include #include "tracefile.h" -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -#include -#endif - -unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL | - S_GMNAL | S_OPENIBNAL); +unsigned int portal_subsystem_debug = ~0 - (S_PORTALS); EXPORT_SYMBOL(portal_subsystem_debug); unsigned int portal_debug = (D_WARNING | D_DLMTRACE | D_ERROR | D_EMERG | D_HA | - D_RPCTRACE | D_VFSTRACE); + D_RPCTRACE | D_VFSTRACE | D_CONFIG | D_IOCTL | + D_CONSOLE); EXPORT_SYMBOL(portal_debug); unsigned int portal_printk; @@ -73,51 +45,58 @@ EXPORT_SYMBOL(portal_printk); unsigned int portal_stack; EXPORT_SYMBOL(portal_stack); +unsigned int portals_catastrophe; +EXPORT_SYMBOL(portals_catastrophe); + #ifdef __KERNEL__ atomic_t portal_kmemory = ATOMIC_INIT(0); EXPORT_SYMBOL(portal_kmemory); #endif -static DECLARE_WAIT_QUEUE_HEAD(debug_ctlwq); +static cfs_waitq_t debug_ctlwq; char debug_file_path[1024] = "/tmp/lustre-log"; static char debug_file_name[1024]; -static int handled_panic; /* to avoid recursive calls to notifiers */ -char portals_upcall[1024] = "/usr/lib/lustre/portals_upcall"; -int portals_do_debug_dumplog(void *arg) +void portals_debug_dumplog_internal(void *arg) { - void *journal_info; - - kportal_daemonize(""); + CFS_DECL_JOURNAL_DATA; - reparent_to_init(); - journal_info = current->journal_info; - current->journal_info = NULL; + CFS_PUSH_JOURNAL; snprintf(debug_file_name, sizeof(debug_file_path) - 1, - "%s.%ld.%ld", debug_file_path, CURRENT_SECONDS, (long)arg); + "%s.%ld.%ld", debug_file_path, cfs_time_current_sec(), (long)arg); + printk(KERN_ALERT "LustreError: dumping log to %s\n", debug_file_name); tracefile_dump_all_pages(debug_file_name); - current->journal_info = journal_info; - wake_up(&debug_ctlwq); + CFS_POP_JOURNAL; +} + +int portals_debug_dumplog_thread(void *arg) +{ + kportal_daemonize(""); + reparent_to_init(); + portals_debug_dumplog_internal(arg); + cfs_waitq_signal(&debug_ctlwq); return 0; } void portals_debug_dumplog(void) { - int rc; - DECLARE_WAITQUEUE(wait, current); + int rc; + cfs_waitlink_t wait; ENTRY; /* we're being careful to ensure that the kernel thread is * able to set our state to running as it exits before we * get to schedule() */ + cfs_waitlink_init(&wait); set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&debug_ctlwq, &wait); + cfs_waitq_add(&debug_ctlwq, &wait); - rc = kernel_thread(portals_do_debug_dumplog, (void *)(long)current->pid, - CLONE_VM | CLONE_FS | CLONE_FILES); + rc = cfs_kernel_thread(portals_debug_dumplog_thread, + (void *)(long)cfs_curproc_pid(), + CLONE_VM | CLONE_FS | CLONE_FILES); if (rc < 0) printk(KERN_ERR "LustreError: cannot start log dump thread: " "%d\n", rc); @@ -125,13 +104,16 @@ void portals_debug_dumplog(void) schedule(); /* be sure to teardown if kernel_thread() failed */ - remove_wait_queue(&debug_ctlwq, &wait); + cfs_waitq_del(&debug_ctlwq, &wait); set_current_state(TASK_RUNNING); } +#ifdef PORTALS_DUMP_ON_PANIC static int panic_dumplog(struct notifier_block *self, unsigned long unused1, void *unused2) { + static int handled_panic; /* to avoid recursive calls to notifiers */ + if (handled_panic) return 0; else @@ -153,17 +135,35 @@ static struct notifier_block lustre_panic_notifier = { next : NULL, priority : 10000 }; +#endif + +#ifdef CRAY_PORTALS +extern void *lus_portals_debug; +#endif int portals_debug_init(unsigned long bufsize) { + cfs_waitq_init(&debug_ctlwq); +#ifdef CRAY_PORTALS + lus_portals_debug = &portals_debug_msg; +#endif +#ifdef PORTALS_DUMP_ON_PANIC + /* This is currently disabled because it spews far too much to the + * console on the rare cases it is ever triggered. */ notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier); +#endif return tracefile_init(); } int portals_debug_cleanup(void) { tracefile_exit(); +#ifdef PORTALS_DUMP_ON_PANIC notifier_chain_unregister(&panic_notifier_list, &lustre_panic_notifier); +#endif +#ifdef CRAY_PORTALS + lus_portals_debug = NULL; +#endif return 0; } @@ -180,7 +180,7 @@ int portals_debug_clear_buffer(void) int portals_debug_mark_buffer(char *text) { CDEBUG(D_TRACE,"***************************************************\n"); - CWARN("DEBUG MARKER: %s\n", text); + CDEBUG(D_WARNING, "DEBUG MARKER: %s\n", text); CDEBUG(D_TRACE,"***************************************************\n"); return 0; @@ -195,213 +195,61 @@ void portals_debug_set_level(unsigned int debug_level) portal_debug = debug_level; } -void portals_run_upcall(char **argv) -{ - int rc; - int argc; - char *envp[] = { - "HOME=/", - "PATH=/sbin:/bin:/usr/sbin:/usr/bin", - NULL}; - ENTRY; - - argv[0] = portals_upcall; - argc = 1; - while (argv[argc] != NULL) - argc++; - - LASSERT(argc >= 2); - - rc = USERMODEHELPER(argv[0], argv, envp); - if (rc < 0) { - CERROR("Error %d invoking portals upcall %s %s%s%s%s%s%s%s%s; " - "check /proc/sys/portals/upcall\n", - rc, argv[0], argv[1], - argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], - argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], - argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], - argc < 6 ? "" : ",..."); - } else { - CERROR("Invoked portals upcall %s %s%s%s%s%s%s%s%s\n", - argv[0], argv[1], - argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], - argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], - argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], - argc < 6 ? "" : ",..."); - } -} - -void portals_run_lbug_upcall(char *file, const char *fn, const int line) -{ - char *argv[6]; - char buf[32]; - - ENTRY; - snprintf (buf, sizeof buf, "%d", line); - - argv[1] = "LBUG"; - argv[2] = file; - argv[3] = (char *)fn; - argv[4] = buf; - argv[5] = NULL; - - portals_run_upcall (argv); -} - char *portals_nid2str(int nal, ptl_nid_t nid, char *str) { if (nid == PTL_NID_ANY) { - snprintf(str, PTL_NALFMT_SIZE - 1, "%s", - "PTL_NID_ANY"); + snprintf(str, PTL_NALFMT_SIZE, "%s", "PTL_NID_ANY"); return str; } switch(nal){ /* XXX this could be a nal method of some sort, 'cept it's config * dependent whether (say) socknal NIDs are actually IP addresses... */ -#ifndef CRAY_PORTALS +#if !CRAY_PORTALS case TCPNAL: /* userspace NAL */ + case IIBNAL: + case VIBNAL: case OPENIBNAL: - case SOCKNAL: - snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u", - (__u32)(nid >> 32), HIPQUAD(nid)); + case RANAL: + case SOCKNAL: { + /* HIPQUAD requires __u32, but we can't cast in it */ + __u32 nid32 = (__u32)nid; + if ((__u32)(nid >> 32)) { + snprintf(str, PTL_NALFMT_SIZE, "%u:%u.%u.%u.%u", + (__u32)(nid >> 32), HIPQUAD(nid32)); + } else { + snprintf(str, PTL_NALFMT_SIZE, "%u.%u.%u.%u", + HIPQUAD(nid32)); + } break; + } case QSWNAL: case GMNAL: - snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u", + case LONAL: + snprintf(str, PTL_NALFMT_SIZE, "%u:%u", (__u32)(nid >> 32), (__u32)nid); break; #endif default: - snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx", + snprintf(str, PTL_NALFMT_SIZE, "?%x? %llx", nal, (long long)nid); break; } return str; } -/* bug #4615 */ -char *portals_id2str(int nal, ptl_process_id_t id, char *str) -{ - switch(nal){ -#ifndef CRAY_PORTALS - case TCPNAL: - /* userspace NAL */ - case OPENIBNAL: - case SOCKNAL: - snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u,%u", - (__u32)(id.nid >> 32), HIPQUAD((id.nid)) , id.pid); - break; - case QSWNAL: - case GMNAL: - snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u,%u", - (__u32)(id.nid >> 32), (__u32)id.nid, id.pid); - break; -#endif - default: - snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx,%lx", - nal, (long long)id.nid, (long)id.pid ); - break; - } - return str; -} - - -#ifdef __KERNEL__ -char stack_backtrace[LUSTRE_TRACE_SIZE]; -spinlock_t stack_backtrace_lock = SPIN_LOCK_UNLOCKED; - -#if defined(__arch_um__) - -char *portals_debug_dumpstack(void) -{ - asm("int $3"); - return "dump stack\n"; -} - -#elif defined(__i386__) - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -extern int lookup_symbol(unsigned long address, char *buf, int buflen); -const char *kallsyms_lookup(unsigned long addr, - unsigned long *symbolsize, - unsigned long *offset, - char **modname, char *namebuf) -{ - int rc = lookup_symbol(addr, namebuf, 128); - if (rc == -ENOSYS) - return NULL; - return namebuf; -} -#endif -char *portals_debug_dumpstack(void) -{ - unsigned long esp = current->thread.esp, addr; - unsigned long *stack = (unsigned long *)&esp; - char *buf = stack_backtrace, *pbuf = buf; - int size; - - /* User space on another CPU? */ - if ((esp ^ (unsigned long)current) & (PAGE_MASK << 1)){ - buf[0] = '\0'; - goto out; - } - - size = sprintf(pbuf, " Call Trace: "); - pbuf += size; - while (((long) stack & (THREAD_SIZE - 1)) != 0) { - addr = *stack++; - if (kernel_text_address(addr)) { - const char *sym_name; - char *modname, buffer[128]; - unsigned long junk, offset; - - sym_name = kallsyms_lookup(addr, &junk, &offset, - &modname, buffer); - if (sym_name == NULL) { - if (buf + LUSTRE_TRACE_SIZE <= pbuf + 12) - break; - size = sprintf(pbuf, "[<%08lx>] ", addr); - } else { - if (buf + LUSTRE_TRACE_SIZE - /* fix length + sizeof('\0') */ - <= pbuf + strlen(buffer) + 28 + 1) - break; - size = sprintf(pbuf, "([<%08lx>] %s (0x%p)) ", - addr, buffer, stack - 1); - } - pbuf += size; - } - } -out: - return buf; -} - -#else /* !__arch_um__ && !__i386__ */ - -char *portals_debug_dumpstack(void) +char *portals_id2str(int nal, ptl_process_id_t id, char *str) { - char *buf = stack_backtrace; - buf[0] = '\0'; - return buf; -} + int len; -#endif /* __arch_um__ */ -struct task_struct *portals_current(void) -{ - CWARN("current task struct is %p\n", current); - return current; + portals_nid2str(nal, id.nid, str); + len = strlen(str); + snprintf(str + len, PTL_NALFMT_SIZE - len, "-%u", id.pid); + return str; } -EXPORT_SYMBOL(stack_backtrace_lock); -EXPORT_SYMBOL(portals_debug_dumpstack); -EXPORT_SYMBOL(portals_current); -#endif /* __KERNEL__ */ - EXPORT_SYMBOL(portals_debug_dumplog); EXPORT_SYMBOL(portals_debug_set_level); -EXPORT_SYMBOL(portals_run_upcall); -EXPORT_SYMBOL(portals_run_lbug_upcall); EXPORT_SYMBOL(portals_nid2str); EXPORT_SYMBOL(portals_id2str);