X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lnet%2Flibcfs%2Fdebug.c;h=b7fd218df360d9d0a5ae2955eca8c4b5c2b13c21;hb=7e80985d02e71f02b9a99f91d6eb0e154ba56c85;hp=f571958c33fec9834c7cc14d627caeb4cac68c6b;hpb=fb1d86804d2e9f82045c5198b2a9850321c64fb9;p=fs%2Flustre-release.git diff --git a/lnet/libcfs/debug.c b/lnet/libcfs/debug.c index f571958..b7fd218 100644 --- a/lnet/libcfs/debug.c +++ b/lnet/libcfs/debug.c @@ -24,43 +24,14 @@ # define EXPORT_SYMTAB #endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - # define DEBUG_SUBSYSTEM S_PORTALS -#include -#include -#include +#include +#include #include "tracefile.h" -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -#include -#endif - -unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL | - S_GMNAL | S_IBNAL); +unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_NAL); EXPORT_SYMBOL(portal_subsystem_debug); unsigned int portal_debug = (D_WARNING | D_DLMTRACE | D_ERROR | D_EMERG | D_HA | @@ -78,47 +49,50 @@ atomic_t portal_kmemory = ATOMIC_INIT(0); EXPORT_SYMBOL(portal_kmemory); #endif -static DECLARE_WAIT_QUEUE_HEAD(debug_ctlwq); +static cfs_waitq_t debug_ctlwq; char debug_file_path[1024] = "/tmp/lustre-log"; static char debug_file_name[1024]; -static int handled_panic; /* to avoid recursive calls to notifiers */ -char portals_upcall[1024] = "/usr/lib/lustre/portals_upcall"; -int portals_do_debug_dumplog(void *arg) +void portals_debug_dumplog_internal(void *arg) { - void *journal_info; - - kportal_daemonize(""); + CFS_DECL_JOURNAL_DATA; - reparent_to_init(); - journal_info = current->journal_info; - current->journal_info = NULL; + CFS_PUSH_JOURNAL; snprintf(debug_file_name, sizeof(debug_file_path) - 1, - "%s.%ld.%ld", debug_file_path, CURRENT_SECONDS, (long)arg); + "%s.%ld.%ld", debug_file_path, cfs_time_current_sec(), (long)arg); printk(KERN_ALERT "LustreError: dumping log to %s\n", debug_file_name); tracefile_dump_all_pages(debug_file_name); - current->journal_info = journal_info; - wake_up(&debug_ctlwq); + CFS_POP_JOURNAL; +} + +int portals_debug_dumplog_thread(void *arg) +{ + kportal_daemonize(""); + reparent_to_init(); + portals_debug_dumplog_internal(arg); + cfs_waitq_signal(&debug_ctlwq); return 0; } void portals_debug_dumplog(void) { - int rc; - DECLARE_WAITQUEUE(wait, current); + int rc; + cfs_waitlink_t wait; ENTRY; /* we're being careful to ensure that the kernel thread is * able to set our state to running as it exits before we * get to schedule() */ + cfs_waitlink_init(&wait); set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&debug_ctlwq, &wait); + cfs_waitq_add(&debug_ctlwq, &wait); - rc = kernel_thread(portals_do_debug_dumplog, (void *)(long)current->pid, - CLONE_VM | CLONE_FS | CLONE_FILES); + rc = cfs_kernel_thread(portals_debug_dumplog_thread, + (void *)(long)cfs_curproc_pid(), + CLONE_VM | CLONE_FS | CLONE_FILES); if (rc < 0) printk(KERN_ERR "LustreError: cannot start log dump thread: " "%d\n", rc); @@ -126,13 +100,16 @@ void portals_debug_dumplog(void) schedule(); /* be sure to teardown if kernel_thread() failed */ - remove_wait_queue(&debug_ctlwq, &wait); + cfs_waitq_del(&debug_ctlwq, &wait); set_current_state(TASK_RUNNING); } +#ifdef PORTALS_DUMP_ON_PANIC static int panic_dumplog(struct notifier_block *self, unsigned long unused1, void *unused2) { + static int handled_panic; /* to avoid recursive calls to notifiers */ + if (handled_panic) return 0; else @@ -154,17 +131,35 @@ static struct notifier_block lustre_panic_notifier = { next : NULL, priority : 10000 }; +#endif + +#ifdef CRAY_PORTALS +extern void *lus_portals_debug; +#endif int portals_debug_init(unsigned long bufsize) { + cfs_waitq_init(&debug_ctlwq); +#ifdef CRAY_PORTALS + lus_portals_debug = &portals_debug_msg; +#endif +#ifdef PORTALS_DUMP_ON_PANIC + /* This is currently disabled because it spews far too much to the + * console on the rare cases it is ever triggered. */ notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier); +#endif return tracefile_init(); } int portals_debug_cleanup(void) { tracefile_exit(); +#ifdef PORTALS_DUMP_ON_PANIC notifier_chain_unregister(&panic_notifier_list, &lustre_panic_notifier); +#endif +#ifdef CRAY_PORTALS + lus_portals_debug = NULL; +#endif return 0; } @@ -196,59 +191,6 @@ void portals_debug_set_level(unsigned int debug_level) portal_debug = debug_level; } -void portals_run_upcall(char **argv) -{ - int rc; - int argc; - char *envp[] = { - "HOME=/", - "PATH=/sbin:/bin:/usr/sbin:/usr/bin", - NULL}; - ENTRY; - - argv[0] = portals_upcall; - argc = 1; - while (argv[argc] != NULL) - argc++; - - LASSERT(argc >= 2); - - rc = USERMODEHELPER(argv[0], argv, envp); - if (rc < 0) { - CERROR("Error %d invoking portals upcall %s %s%s%s%s%s%s%s%s; " - "check /proc/sys/portals/upcall\n", - rc, argv[0], argv[1], - argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], - argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], - argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], - argc < 6 ? "" : ",..."); - } else { - CERROR("Invoked portals upcall %s %s%s%s%s%s%s%s%s\n", - argv[0], argv[1], - argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], - argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], - argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], - argc < 6 ? "" : ",..."); - } -} - -void portals_run_lbug_upcall(char *file, const char *fn, const int line) -{ - char *argv[6]; - char buf[32]; - - ENTRY; - snprintf (buf, sizeof buf, "%d", line); - - argv[1] = "LBUG"; - argv[2] = file; - argv[3] = (char *)fn; - argv[4] = buf; - argv[5] = NULL; - - portals_run_upcall (argv); -} - char *portals_nid2str(int nal, ptl_nid_t nid, char *str) { if (nid == PTL_NID_ANY) { @@ -259,17 +201,28 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str) switch(nal){ /* XXX this could be a nal method of some sort, 'cept it's config * dependent whether (say) socknal NIDs are actually IP addresses... */ -#if !CRAY_PORTALS +#if !CRAY_PORTALS case TCPNAL: /* userspace NAL */ case IIBNAL: + case VIBNAL: case OPENIBNAL: - case SOCKNAL: - snprintf(str, PTL_NALFMT_SIZE, "%u:%u.%u.%u.%u", - (__u32)(nid >> 32), HIPQUAD(nid)); + case RANAL: + case SOCKNAL: { + /* HIPQUAD requires __u32, but we can't cast in it */ + __u32 nid32 = (__u32)nid; + if ((__u32)(nid >> 32)) { + snprintf(str, PTL_NALFMT_SIZE, "%u:%u.%u.%u.%u", + (__u32)(nid >> 32), HIPQUAD(nid32)); + } else { + snprintf(str, PTL_NALFMT_SIZE, "%u.%u.%u.%u", + HIPQUAD(nid32)); + } break; + } case QSWNAL: case GMNAL: + case LONAL: snprintf(str, PTL_NALFMT_SIZE, "%u:%u", (__u32)(nid >> 32), (__u32)nid); break; @@ -285,108 +238,14 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str) char *portals_id2str(int nal, ptl_process_id_t id, char *str) { int len; - + portals_nid2str(nal, id.nid, str); len = strlen(str); - snprintf(str + len, PTL_NALFMT_SIZE, "-%u", id.pid); + snprintf(str + len, PTL_NALFMT_SIZE - len, "-%u", id.pid); return str; } -#ifdef __KERNEL__ -char stack_backtrace[LUSTRE_TRACE_SIZE]; -spinlock_t stack_backtrace_lock = SPIN_LOCK_UNLOCKED; - -#if defined(__arch_um__) - -char *portals_debug_dumpstack(void) -{ - asm("int $3"); - return "dump stack\n"; -} - -#elif defined(__i386__) - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -extern int lookup_symbol(unsigned long address, char *buf, int buflen); -const char *kallsyms_lookup(unsigned long addr, - unsigned long *symbolsize, - unsigned long *offset, - char **modname, char *namebuf) -{ - int rc = lookup_symbol(addr, namebuf, 128); - if (rc == -ENOSYS) - return NULL; - return namebuf; -} -#endif - -char *portals_debug_dumpstack(void) -{ - unsigned long esp = current->thread.esp, addr; - unsigned long *stack = (unsigned long *)&esp; - char *buf = stack_backtrace, *pbuf = buf; - int size; - - /* User space on another CPU? */ - if ((esp ^ (unsigned long)current) & (PAGE_MASK << 1)){ - buf[0] = '\0'; - goto out; - } - - size = sprintf(pbuf, " Call Trace: "); - pbuf += size; - while (((long) stack & (THREAD_SIZE - 1)) != 0) { - addr = *stack++; - if (kernel_text_address(addr)) { - const char *sym_name; - char *modname, buffer[128]; - unsigned long junk, offset; - - sym_name = kallsyms_lookup(addr, &junk, &offset, - &modname, buffer); - if (sym_name == NULL) { - if (buf + LUSTRE_TRACE_SIZE <= pbuf + 12) - break; - size = sprintf(pbuf, "[<%08lx>] ", addr); - } else { - if (buf + LUSTRE_TRACE_SIZE - /* fix length + sizeof('\0') */ - <= pbuf + strlen(buffer) + 28 + 1) - break; - size = sprintf(pbuf, "([<%08lx>] %s (0x%p)) ", - addr, buffer, stack - 1); - } - pbuf += size; - } - } -out: - return buf; -} - -#else /* !__arch_um__ && !__i386__ */ - -char *portals_debug_dumpstack(void) -{ - char *buf = stack_backtrace; - buf[0] = '\0'; - return buf; -} - -#endif /* __arch_um__ */ -struct task_struct *portals_current(void) -{ - CWARN("current task struct is %p\n", current); - return current; -} - -EXPORT_SYMBOL(stack_backtrace_lock); -EXPORT_SYMBOL(portals_debug_dumpstack); -EXPORT_SYMBOL(portals_current); -#endif /* __KERNEL__ */ - EXPORT_SYMBOL(portals_debug_dumplog); EXPORT_SYMBOL(portals_debug_set_level); -EXPORT_SYMBOL(portals_run_upcall); -EXPORT_SYMBOL(portals_run_lbug_upcall); EXPORT_SYMBOL(portals_nid2str); EXPORT_SYMBOL(portals_id2str);