X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fportals%2Flibcfs%2Fdebug.c;h=c56f76ff39f6d8ed24c8e3c78c0aba1b18f5f713;hp=914b78f9b385441f453ae3a12b6938c25a5caade;hb=191061ee668400324f4505cf498f1ee2d57e4962;hpb=3de901fceee79de12a31428bcc6ba3a00f10d1fe diff --git a/lustre/portals/libcfs/debug.c b/lustre/portals/libcfs/debug.c index 914b78f9..c56f76f 100644 --- a/lustre/portals/libcfs/debug.c +++ b/lustre/portals/libcfs/debug.c @@ -45,6 +45,7 @@ #include #include #include +#include # define DEBUG_SUBSYSTEM S_PORTALS @@ -52,17 +53,20 @@ #include #include +#include "tracefile.h" + +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) +#include +#endif + unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL | - S_GMNAL | S_IBNAL); + S_GMNAL | S_OPENIBNAL); EXPORT_SYMBOL(portal_subsystem_debug); unsigned int portal_debug = (D_WARNING | D_DLMTRACE | D_ERROR | D_EMERG | D_HA | D_RPCTRACE | D_VFSTRACE); EXPORT_SYMBOL(portal_debug); -unsigned int portal_cerror = 1; -EXPORT_SYMBOL(portal_cerror); - unsigned int portal_printk; EXPORT_SYMBOL(portal_printk); @@ -74,485 +78,55 @@ atomic_t portal_kmemory = ATOMIC_INIT(0); EXPORT_SYMBOL(portal_kmemory); #endif -#define DEBUG_OVERFLOW 1024 -static char *debug_buf = NULL; -static unsigned long debug_size = 0; -static atomic_t debug_off_a = ATOMIC_INIT(0); -static int debug_wrapped; -wait_queue_head_t debug_ctlwq; -#define DAEMON_SND_SIZE (64 << 10) - -/* - * used by the daemon to keep track the offset into debug_buffer for the next - * write to the file. Usually, the daemon is to write out buffer - * from debug_daemon_next_write upto debug_off - * variable usage - * Reader - portals_debug_msg() - * Writer - portals_debug_daemon() - * portals_debug_daemon_start() during daemon init time - * portals_debug_daemon_continue() to reset to debug_off - * portals_debug_clear_buffer() reset to debug_off for clear - * Note that *_start(), *_continue() & *clear_buffer() should serialized; - */ -static atomic_t debug_daemon_next_write; - -/* - * A debug_daemon can be in following states - * stopped - stopped state means there is no debug_daemon running. - * accordingly, it must be in paused state - * a daemon is in !stopped && !paused state after - * "lctl debug_daemon start" creates debug_daemon successfully - * Variable Usage - * Reader - portals_debug_daemon() - * portals_debug_set_daemon() routines - * Writer - portals_debug_set_daemon() routines - * portals_debug_daemon() on IO error - * paused - a debug_daemon state is changed from !paused into paused - * when "lctl debug_daemon paused" is issued - * "lctl debug_daemon continue" gets a daemon into !paused mode - * Reader - portals_debug_set_daemon() routines - * portals_debug_msg() - * Writer - portals_debug_set_daemon() on init - * portals_debug_daemon() - * - * Daemon state diagram. - * (stopped, paused) - * | <-- debug_daemon start - * V - * (!stopped, !paused) - * | <-- debug_daemon pause - * V - * (!stopped, paused) - * | <-- debug_daemon continue - * V - * (!stopped, !paused) - * | <-- debug_daemon stop - * V - * (stopped, paused) - * Overlapped - this is a state when CDEBUG is too fast for the daemon to - * write out the debug_bufferr. That is, debug_off is to - * overlap debug_daemon_next_write; - * Reader - portals_debug_msg() - * Writer - portals_debug_msg() - */ - -/* - * Description on Trace Daemon Synchronization - * - * Three categories of code are synchronizing between each other - * 1. lctl, portals_debug_set_daemon(), the user debug control code, - * as well as portals_debug_clear_buffer() - * 2. CDEBUG, portals_debug_msg(), the debug put messages routine - * 3. Daemon, portals_debug_daemon(), to write out debug log file - * - * - * Three different controls for synchronizations - * - * 1. debug_daemon_semaphore - * The usage of this semaphore is to serialize multiple lctl controls - * in manipulating debug daemon state. The semaphore serves as the - * gatekeeper to allow only one user control thread, at any giving time, - * to access debug daemon state and keeps the other user control requests - * in wait state until the current control request is serviced. - * - * 2. wait_queue_head_t lctl (paired with lctl_event flag) - * Lctl event is the event between portals_debug_set_daemon() and - * portals_debug_daemon(). Lctl is an indicator for portals_debug_daemon() - * to flush data out to file. portals_debug_daemon() is to use lctl event - * as signal channel to wakeup portals_debug_set_daemon() upon flush - * operation is done. - * - * Producer : - * portals_debug_daemon() uses to wake up - * portals_debug_set_daemon(), pause and stop, routines - * Consumer : - * portals_debug_set_daemon(), stop and pause operations, - * wait and sleep on the event - * - * 3. wait_queue_head_t daemon (paired with daemon_event flag) - * This is an event channel to wakeup portals_debug_daemon. Daemon - * wakes up to run whenever there is an event posted. Daemon handles - * 2 types of operations . 1. Writes data out to debug file, 2. Flushes - * file and terminates base on lctl event. - * File operation - - * Daemon is normally in a sleep state. - * Daemon is woken up through daemon event whenever CDEBUG is - * putting data over any 64K boundary. - * File flush and termination - - * On portals_debug_daemon_stop/pause() operations, lctl control - * is to wake up daemon through daemon event. - * - * We can't use sleep_on() and wake_up() to replace daemon event because - * portals_debug_daemon() must catch the wakeup operation posted by - * portals_debug_daemon_stop/pause(). Otherwise, stop and pause may - * stuck in lctl wait event. - * - * Producer : - * a. portals_debug_daemon_pause() and portals_debug_daemon_stop() - * uses the event to wake up portals_debug_daemon() - * b. portals_debug_msg() uses the event to wake up - * portals_debug_daemon() whenever the data output is acrossing - * a 64K bytes boundary. - * Consumer : - * portals_debug_daemon() wakes up upon daemon event. - * - * Sequence for portals_debug_daemon_stop() operation - * - * _Portals_debug_daemon_stop()_ _Daemon_ - * Wait_event(daemon) or running - * Paused = 1; - * Wakeup_event (daemon) - * Wait_event(lctl) - * Set force_flush flag if lctlevnt - * Flush data - * Wakeup_event (lctl) - * Wait_event(daemon) - * Stopped = 1; - * Wakeup_event (daemon) - * Wait_event(lctl) - * Exit daemon loop if (Stopped) - * Wakeup_event (lctl) - * Exit - * Return to user application - * - * - * _Portals_debug_msg()_ _Daemon_ - * Wait_event(daemon) or running - * If (WriteStart<64Kjournal_info; current->journal_info = NULL; - sprintf(debug_file_name, "%s.%ld", debug_file_path, CURRENT_SECONDS); - file = filp_open(debug_file_name, O_CREAT|O_EXCL|O_RDWR, 0644); - if (!file || IS_ERR(file)) { - CERROR("cannot open %s for dumping: %ld\n", debug_file_name, - PTR_ERR(file)); - GOTO(out, PTR_ERR(file)); - } else { - printk(KERN_ALERT "LustreError: dumping log to %s ... writing ...\n", - debug_file_name); - } - - debug_off = atomic_read(&debug_off_a); - oldfs = get_fs(); - set_fs(get_ds()); - if (debug_wrapped) { - rc = file->f_op->write(file, debug_buf + debug_off + 1, - debug_size-debug_off-1, &file->f_pos); - rc += file->f_op->write(file, debug_buf, debug_off + 1, - &file->f_pos); - } else { - rc = file->f_op->write(file, debug_buf, debug_off,&file->f_pos); - } - printk("LustreError: wrote %d bytes\n", rc); - set_fs(oldfs); + snprintf(debug_file_name, sizeof(debug_file_path) - 1, + "%s.%ld.%ld", debug_file_path, CURRENT_SECONDS, (long)arg); + tracefile_dump_all_pages(debug_file_name); - rc = file->f_op->fsync(file, file->f_dentry, 1); - if (rc) - CERROR("sync returns %d\n", rc); - filp_close(file, 0); -out: current->journal_info = journal_info; wake_up(&debug_ctlwq); return 0; } -int portals_debug_daemon(void *arg) -{ - struct file *file; - void *journal_info; - mm_segment_t oldfs; - unsigned long force_flush = 0; - unsigned long size, off, flags; - int rc; - - kportal_daemonize("ldebug_daemon"); - reparent_to_init(); - journal_info = current->journal_info; - current->journal_info = NULL; - - file = filp_open(debug_daemon_file_path, - O_CREAT|O_TRUNC|O_RDWR|O_LARGEFILE, 0644); - - if (!file || IS_ERR(file)) { - CERROR("cannot open %s for logging", debug_daemon_file_path); - GOTO(out1, PTR_ERR(file)); - } - printk(KERN_INFO "daemon dumping log to %s\n", debug_daemon_file_path); - - debug_daemon_state.overlapped = 0; - debug_daemon_state.stopped = 0; - - spin_lock_irqsave(&portals_debug_lock, flags); - off = atomic_read(&debug_off_a) + 1; - if (debug_wrapped) - off = (off >= debug_size)? 0 : off; - else - off = 0; - atomic_set(&debug_daemon_next_write, off); - atomic_set(&debug_daemon_state.paused, 0); - spin_unlock_irqrestore(&portals_debug_lock, flags); - - oldfs = get_fs(); - set_fs(KERNEL_DS); - while (1) { - unsigned long ending; - unsigned long start, tail; - long delta; - - debug_daemon_state.daemon_event = 0; - - ending = atomic_read(&debug_off_a); - start = atomic_read(&debug_daemon_next_write); - - /* check if paused is imposed by lctl ? */ - force_flush = !debug_daemon_state.lctl_event; - - delta = ending - start; - tail = debug_size - start; - size = (delta >= 0) ? delta : tail; - while (size && (force_flush || (delta < 0) || - (size >= DAEMON_SND_SIZE))) { - if (daemon_file_size_limit) { - int ssize = daemon_file_size_limit - file->f_pos; - if (size > ssize) - size = ssize; - } - - rc = file->f_op->write(file, debug_buf+start, - size, &file->f_pos); - if (rc < 0) { - printk(KERN_ALERT "LustreError: Debug_daemon " - "write error %d\n", rc); - goto out; - } - start += rc; - delta = ending - start; - tail = debug_size - start; - if (tail == 0) - start = 0; - if (delta >= 0) - size = delta; - else - size = (tail == 0) ? ending : tail; - if (daemon_file_size_limit == file->f_pos) { - // file wrapped around - file->f_pos = 0; - } - } - atomic_set(&debug_daemon_next_write, start); - if (force_flush) { - rc = file->f_op->fsync(file, file->f_dentry, 1); - if (rc < 0) { - printk(KERN_ALERT "LustreError: Debug_daemon " - "sync error %d\n", rc); - goto out; - } - if (debug_daemon_state.stopped) - break; - debug_daemon_state.lctl_event = 1; - wake_up(&debug_daemon_state.lctl); - } - wait_event(debug_daemon_state.daemon, - debug_daemon_state.daemon_event); - } -out: - atomic_set(&debug_daemon_state.paused, 1); - debug_daemon_state.stopped = 1; - set_fs(oldfs); - filp_close(file, 0); - current->journal_info = journal_info; -out1: - debug_daemon_state.lctl_event = 1; - wake_up(&debug_daemon_state.lctl); - return 0; -} - -void portals_debug_print(void) -{ - unsigned long dumplen = 64 * 1024; - char *start1, *start2; - char *end1, *end2; - unsigned long debug_off = atomic_read(&debug_off_a); - - start1 = debug_buf + debug_off - dumplen; - if (start1 < debug_buf) { - start1 += debug_size; - end1 = debug_buf + debug_size - 1; - start2 = debug_buf; - end2 = debug_buf + debug_off; - } else { - end1 = debug_buf + debug_off; - start2 = debug_buf + debug_off; - end2 = debug_buf + debug_off; - } - - while (start1 < end1) { - int count = MIN(1024, end1 - start1); - printk("LustreError: %*s", count, start1); - start1 += 1024; - } - while (start2 < end2) { - int count = MIN(1024, end2 - start2); - printk("LustreError: %*s", count, start2); - start2 += 1024; - } -} - void portals_debug_dumplog(void) { int rc; + DECLARE_WAITQUEUE(wait, current); ENTRY; - init_waitqueue_head(&debug_ctlwq); + /* we're being careful to ensure that the kernel thread is + * able to set our state to running as it exits before we + * get to schedule() */ + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&debug_ctlwq, &wait); - rc = kernel_thread(portals_do_debug_dumplog, - NULL, CLONE_VM | CLONE_FS | CLONE_FILES); - if (rc < 0) { + rc = kernel_thread(portals_do_debug_dumplog, (void *)(long)current->pid, + CLONE_VM | CLONE_FS | CLONE_FILES); + if (rc < 0) printk(KERN_ERR "LustreError: cannot start log dump thread: " "%d\n", rc); - return; - } - sleep_on(&debug_ctlwq); -} - -int portals_debug_daemon_start(char *file, unsigned int size) -{ - int rc; - - if (!debug_daemon_state.stopped) - return -EALREADY; - - if (file != NULL) - strncpy(debug_daemon_file_path, file, 1024); - - init_waitqueue_head(&debug_daemon_state.lctl); - init_waitqueue_head(&debug_daemon_state.daemon); - - daemon_file_size_limit = size << 20; - - debug_daemon_state.lctl_event = 0; - rc = kernel_thread(portals_debug_daemon, NULL, 0); - if (rc < 0) { - printk(KERN_ERR "LustreError: cannot start debug daemon thread\n"); - strncpy(debug_daemon_file_path, "\0", 1); - return rc; - } - wait_event(debug_daemon_state.lctl, debug_daemon_state.lctl_event); - return 0; -} - -int portals_debug_daemon_pause(void) -{ - if (atomic_read(&debug_daemon_state.paused)) - return -EALREADY; - - atomic_set(&debug_daemon_state.paused, 1); - debug_daemon_state.lctl_event = 0; - debug_daemon_state.daemon_event = 1; - wake_up(&debug_daemon_state.daemon); - wait_event(debug_daemon_state.lctl, debug_daemon_state.lctl_event); - return 0; -} - -int portals_debug_daemon_continue(void) -{ - if (!atomic_read(&debug_daemon_state.paused)) - return -EINVAL; - if (debug_daemon_state.stopped) - return -EINVAL; - - debug_daemon_state.overlapped = 0; - atomic_set(&debug_daemon_next_write, atomic_read(&debug_off_a)); - atomic_set(&debug_daemon_state.paused, 0); - return 0; -} - -int portals_debug_daemon_stop(void) -{ - if (debug_daemon_state.stopped) - return -EALREADY; - - if (!atomic_read(&debug_daemon_state.paused)) - portals_debug_daemon_pause(); - - debug_daemon_state.lctl_event = 0; - debug_daemon_state.stopped = 1; - - debug_daemon_state.daemon_event = 1; - wake_up(&debug_daemon_state.daemon); - wait_event(debug_daemon_state.lctl, debug_daemon_state.lctl_event); - - debug_daemon_file_path[0] = '\0'; - return 0; -} - -int portals_debug_set_daemon(unsigned int cmd, unsigned int length, - char *filename, unsigned int size) -{ - int rc = -EINVAL; + else + schedule(); - down(&debug_daemon_semaphore); - switch (cmd) { - case DEBUG_DAEMON_START: - if (length && (filename[length -1] != '\0')) { - CERROR("Invalid filename for debug_daemon\n"); - rc = -EINVAL; - break; - } - rc = portals_debug_daemon_start(filename, size); - break; - case DEBUG_DAEMON_STOP: - rc = portals_debug_daemon_stop(); - break; - case DEBUG_DAEMON_PAUSE: - rc = portals_debug_daemon_pause(); - break; - case DEBUG_DAEMON_CONTINUE: - rc = portals_debug_daemon_continue(); - break; - default: - CERROR("unknown set_daemon cmd\n"); - } - up(&debug_daemon_semaphore); - return rc; + /* be sure to teardown if kernel_thread() failed */ + remove_wait_queue(&debug_ctlwq, &wait); + set_current_state(TASK_RUNNING); } static int panic_dumplog(struct notifier_block *self, unsigned long unused1, @@ -564,7 +138,7 @@ static int panic_dumplog(struct notifier_block *self, unsigned long unused1, handled_panic = 1; if (in_interrupt()) { - portals_debug_print(); + trace_debug_print(); return 0; } @@ -582,80 +156,29 @@ static struct notifier_block lustre_panic_notifier = { int portals_debug_init(unsigned long bufsize) { - unsigned long debug_off = atomic_read(&debug_off_a); - if (debug_buf != NULL) - return -EALREADY; - - atomic_set(&debug_daemon_state.paused, 1); - debug_daemon_state.stopped = 1; - - debug_buf = vmalloc(bufsize + DEBUG_OVERFLOW); - if (debug_buf == NULL) - return -ENOMEM; - memset(debug_buf, 0, bufsize + DEBUG_OVERFLOW); - debug_wrapped = 0; - - //printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n", - //bufsize, debug_buf); - atomic_set(&debug_off_a, debug_off); notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier); - debug_size = bufsize; - - return 0; + return tracefile_init(); } int portals_debug_cleanup(void) { + tracefile_exit(); notifier_chain_unregister(&panic_notifier_list, &lustre_panic_notifier); - if (debug_buf == NULL) - return -EINVAL; - - down(&debug_daemon_semaphore); - portals_debug_daemon_stop(); - - vfree(debug_buf); - atomic_set(&debug_off_a, 0); - up(&debug_daemon_semaphore); - return 0; } int portals_debug_clear_buffer(void) { - unsigned long flags; - unsigned long state; - - if (debug_buf == NULL) - return -EINVAL; - - down(&debug_daemon_semaphore); - state = atomic_read(&debug_daemon_state.paused); - if (!state) - portals_debug_daemon_pause(); - spin_lock_irqsave(&portals_debug_lock, flags); - atomic_set(&debug_off_a, 0); - debug_wrapped = 0; - atomic_set(&debug_daemon_next_write, 0); - debug_daemon_state.overlapped = 0; - spin_unlock_irqrestore(&portals_debug_lock, flags); - - if (!state) - atomic_set(&debug_daemon_state.paused, 0); - up(&debug_daemon_semaphore); - + trace_flush_pages(); return 0; } /* Debug markers, although printed by S_PORTALS - * should not be be marked as such. - */ + * should not be be marked as such. */ #undef DEBUG_SUBSYSTEM #define DEBUG_SUBSYSTEM S_UNDEFINED int portals_debug_mark_buffer(char *text) { - if (debug_buf == NULL) - return -EINVAL; - CDEBUG(D_TRACE,"***************************************************\n"); CWARN("DEBUG MARKER: %s\n", text); CDEBUG(D_TRACE,"***************************************************\n"); @@ -665,231 +188,10 @@ int portals_debug_mark_buffer(char *text) #undef DEBUG_SUBSYSTEM #define DEBUG_SUBSYSTEM S_PORTALS -/* this copies a snapshot of the debug buffer into an array of pages - * before doing the potentially blocking copy into userspace. it could - * be warning userspace if things wrap heavily while its off copying. */ -__s32 portals_debug_copy_to_user(char *buf, unsigned long len) -{ - int rc; - unsigned long total, debug_off, i, off, copied; - unsigned long flags; - struct page *page; - LIST_HEAD(my_pages); - struct list_head *pos, *n; - - if (len < debug_size) - return -ENOSPC; - - for (i = 0 ; i < debug_size; i += PAGE_SIZE) { - page = alloc_page(GFP_NOFS); - if (page == NULL) { - rc = -ENOMEM; - goto cleanup; - } - list_add(&PAGE_LIST(page), &my_pages); - } - - spin_lock_irqsave(&portals_debug_lock, flags); - debug_off = atomic_read(&debug_off_a); - - /* Sigh. If the buffer is empty, then skip to the end. */ - if (debug_off == 0 && !debug_wrapped) { - spin_unlock_irqrestore(&portals_debug_lock, flags); - rc = 0; - goto cleanup; - } - - if (debug_wrapped) { - off = debug_off + 1; - total = debug_size; - } else { - off = 0; - total = debug_off; - } - copied = 0; - list_for_each(pos, &my_pages) { - unsigned long to_copy; - void *addr; - - page = list_entry(pos, struct page, PAGE_LIST_ENTRY); - to_copy = min(total - off, PAGE_SIZE); - if (to_copy == 0) { - off = 0; - to_copy = min(debug_size - off, PAGE_SIZE); - } -finish_partial: - addr = kmap_atomic(page, KM_USER0); - memcpy(addr, debug_buf + off, to_copy); - kunmap_atomic(addr, KM_USER0); - copied += to_copy; - if (copied >= total) - break; - - off += to_copy; - if (off >= debug_size) { - off = 0; - if (to_copy != PAGE_SIZE) { - to_copy = PAGE_SIZE - to_copy; - goto finish_partial; - } - } - } - - spin_unlock_irqrestore(&portals_debug_lock, flags); - - off = 0; - list_for_each(pos, &my_pages) { - unsigned long to_copy; - page = list_entry(pos, struct page, PAGE_LIST_ENTRY); - - to_copy = min(copied - off, PAGE_SIZE); - rc = copy_to_user(buf + off, kmap(page), to_copy); - kunmap(page); - if (rc) { - rc = -EFAULT; - goto cleanup; - } - off += to_copy; - if (off >= copied) - break; - } - rc = copied; - -cleanup: - list_for_each_safe(pos, n, &my_pages) { - page = list_entry(pos, struct page, PAGE_LIST_ENTRY); - list_del(&PAGE_LIST(page)); - __free_page(page); - } - return rc; -} - -/* FIXME: I'm not very smart; someone smarter should make this better. */ -void -portals_debug_msg(int subsys, int mask, char *file, const char *fn, - const int line, unsigned long stack, char *format, ...) -{ - va_list ap; - unsigned long flags; - int max_nob; - int prefix_nob; - int msg_nob; - struct timeval tv; - unsigned long base_offset; - unsigned long debug_off; - - if (debug_buf == NULL) { - printk("LustreError: portals_debug_msg: debug_buf is NULL!\n"); - return; - } - - spin_lock_irqsave(&portals_debug_lock, flags); - debug_off = atomic_read(&debug_off_a); - if (!atomic_read(&debug_daemon_state.paused)) { - unsigned long available; - long delta; - long v = atomic_read(&debug_daemon_next_write); - - delta = debug_off - v; - available = (delta>=0) ? debug_size-delta : -delta; - // Check if we still have enough debug buffer for CDEBUG - if (available < DAEMON_SND_SIZE) { - /* Drop CDEBUG packets until enough debug_buffer is - * available */ - if (debug_daemon_state.overlapped) - goto out; - /* If this is the first time, leave a marker in the - * output */ - debug_daemon_state.overlapped = 1; - format = "DEBUG MARKER: Debug buffer overlapped\n"; - printk(KERN_ERR "LustreError: debug daemon buffer " - "overlapped\n"); - } else /* More space just became available */ - debug_daemon_state.overlapped = 0; - } - - max_nob = debug_size - debug_off + DEBUG_OVERFLOW; - if (max_nob <= 0) { - spin_unlock_irqrestore(&portals_debug_lock, flags); - printk("LustreError: logic error in portals_debug_msg: " - "< 0 bytes to write\n"); - return; - } - - /* NB since we pass a non-zero sized buffer (at least) on the first - * print, we can be assured that by the end of all the snprinting, - * we _do_ have a terminated buffer, even if our message got truncated. - */ - - do_gettimeofday(&tv); - - prefix_nob = snprintf(debug_buf + debug_off, max_nob, - "%06x:%06x:%d:%lu.%06lu:%lu:%d:", - subsys, mask, smp_processor_id(), - tv.tv_sec, tv.tv_usec, stack, current->pid); - max_nob -= prefix_nob; - - if(*(format + strlen(format) - 1) != '\n') - printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n", - file, line, fn); - -#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) - msg_nob = snprintf(debug_buf + debug_off + prefix_nob, max_nob, - "%d:(%s:%d:%s()) ", - current->thread.extern_pid, file, line, fn); -#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - msg_nob = snprintf(debug_buf + debug_off + prefix_nob, max_nob, - "%d:(%s:%d:%s()) ", - current->thread.mode.tt.extern_pid, file, line, fn); -#else - msg_nob = snprintf(debug_buf + debug_off + prefix_nob, max_nob, - "%d:(%s:%d:%s()) ", - current->pid, file, line, fn); -#endif - - va_start(ap, format); - msg_nob += vsnprintf(debug_buf + debug_off + prefix_nob + msg_nob, - max_nob, format, ap); - max_nob -= msg_nob; - va_end(ap); - - /* Print to console, while msg is contiguous in debug_buf */ - /* NB safely terminated see above */ - if ((mask & D_EMERG) != 0) - printk(KERN_EMERG "LustreError: %s", - debug_buf + debug_off + prefix_nob); - else if ((mask & D_ERROR) != 0) - printk(KERN_ERR "LustreError: %s", - debug_buf + debug_off + prefix_nob); - else if ((mask & D_WARNING) != 0) - printk(KERN_WARNING "Lustre: %s", - debug_buf + debug_off + prefix_nob); - else if (portal_printk) - printk("<%d>Lustre: %s", portal_printk, - debug_buf+debug_off+prefix_nob); - base_offset = debug_off & 0xFFFF; - - debug_off += prefix_nob + msg_nob; - if (debug_off > debug_size) { - memcpy(debug_buf, debug_buf + debug_size, - debug_off - debug_size + 1); - debug_off -= debug_size; - debug_wrapped = 1; - } - - atomic_set(&debug_off_a, debug_off); - if (!atomic_read(&debug_daemon_state.paused) && - ((base_offset+prefix_nob+msg_nob) >= DAEMON_SND_SIZE)) { - debug_daemon_state.daemon_event = 1; - wake_up(&debug_daemon_state.daemon); - } -out: - spin_unlock_irqrestore(&portals_debug_lock, flags); -} - void portals_debug_set_level(unsigned int debug_level) { - printk("Lustre: Setting portals debug level to %08x\n", debug_level); + printk(KERN_WARNING "Lustre: Setting portals debug level to %08x\n", + debug_level); portal_debug = debug_level; } @@ -948,36 +250,69 @@ void portals_run_lbug_upcall(char *file, const char *fn, const int line) char *portals_nid2str(int nal, ptl_nid_t nid, char *str) { + if (nid == PTL_NID_ANY) { + snprintf(str, PTL_NALFMT_SIZE - 1, "%s", + "PTL_NID_ANY"); + return str; + } + switch(nal){ -/* XXX this should be a nal method of some sort */ +/* XXX this could be a nal method of some sort, 'cept it's config + * dependent whether (say) socknal NIDs are actually IP addresses... */ #ifndef CRAY_PORTALS case TCPNAL: /* userspace NAL */ + case OPENIBNAL: case SOCKNAL: - sprintf(str, "%u:%d.%d.%d.%d", (__u32)(nid >> 32), - HIPQUAD(nid)); + snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u", + (__u32)(nid >> 32), HIPQUAD(nid)); break; case QSWNAL: case GMNAL: - case IBNAL: - case SCIMACNAL: - sprintf(str, "%u:%u", (__u32)(nid >> 32), (__u32)nid); + snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u", + (__u32)(nid >> 32), (__u32)nid); break; #endif default: - snprintf(str, PTL_NALFMT_SIZE-1, "(?%llx)", (long long)nid); + snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx", + nal, (long long)nid); + break; + } + return str; +} +/* bug #4615 */ +char *portals_id2str(int nal, ptl_process_id_t id, char *str) +{ + switch(nal){ +#ifndef CRAY_PORTALS + case TCPNAL: + /* userspace NAL */ + case OPENIBNAL: + case SOCKNAL: + snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u,%u", + (__u32)(id.nid >> 32), HIPQUAD((id.nid)) , id.pid); + break; + case QSWNAL: + case GMNAL: + snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u,%u", + (__u32)(id.nid >> 32), (__u32)id.nid, id.pid); + break; +#endif + default: + snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx,%lx", + nal, (long long)id.nid, (long)id.pid ); + break; } return str; } + #ifdef __KERNEL__ char stack_backtrace[LUSTRE_TRACE_SIZE]; spinlock_t stack_backtrace_lock = SPIN_LOCK_UNLOCKED; #if defined(__arch_um__) -extern int is_kernel_text_address(unsigned long addr); - char *portals_debug_dumpstack(void) { asm("int $3"); @@ -986,33 +321,45 @@ char *portals_debug_dumpstack(void) #elif defined(__i386__) -extern int is_kernel_text_address(unsigned long addr); +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) extern int lookup_symbol(unsigned long address, char *buf, int buflen); +const char *kallsyms_lookup(unsigned long addr, + unsigned long *symbolsize, + unsigned long *offset, + char **modname, char *namebuf) +{ + int rc = lookup_symbol(addr, namebuf, 128); + if (rc == -ENOSYS) + return NULL; + return namebuf; +} +#endif char *portals_debug_dumpstack(void) { - unsigned long esp = current->thread.esp; + unsigned long esp = current->thread.esp, addr; unsigned long *stack = (unsigned long *)&esp; + char *buf = stack_backtrace, *pbuf = buf; int size; - unsigned long addr; - char *buf = stack_backtrace; - char *pbuf = buf; - static char buffer[512]; - int rc = 0; /* User space on another CPU? */ - if ((esp ^ (unsigned long)current) & (PAGE_MASK<<1)){ + if ((esp ^ (unsigned long)current) & (PAGE_MASK << 1)){ buf[0] = '\0'; goto out; } size = sprintf(pbuf, " Call Trace: "); pbuf += size; - while (((long) stack & (THREAD_SIZE-1)) != 0) { + while (((long) stack & (THREAD_SIZE - 1)) != 0) { addr = *stack++; - if (is_kernel_text_address(addr)) { - rc = lookup_symbol(addr, buffer, 512); - if (rc == -ENOSYS) { + if (kernel_text_address(addr)) { + const char *sym_name; + char *modname, buffer[128]; + unsigned long junk, offset; + + sym_name = kallsyms_lookup(addr, &junk, &offset, + &modname, buffer); + if (sym_name == NULL) { if (buf + LUSTRE_TRACE_SIZE <= pbuf + 12) break; size = sprintf(pbuf, "[<%08lx>] ", addr); @@ -1022,7 +369,7 @@ char *portals_debug_dumpstack(void) <= pbuf + strlen(buffer) + 28 + 1) break; size = sprintf(pbuf, "([<%08lx>] %s (0x%p)) ", - addr, buffer, stack-1); + addr, buffer, stack - 1); } pbuf += size; } @@ -1041,13 +388,20 @@ char *portals_debug_dumpstack(void) } #endif /* __arch_um__ */ +struct task_struct *portals_current(void) +{ + CWARN("current task struct is %p\n", current); + return current; +} + EXPORT_SYMBOL(stack_backtrace_lock); EXPORT_SYMBOL(portals_debug_dumpstack); +EXPORT_SYMBOL(portals_current); #endif /* __KERNEL__ */ EXPORT_SYMBOL(portals_debug_dumplog); -EXPORT_SYMBOL(portals_debug_msg); EXPORT_SYMBOL(portals_debug_set_level); EXPORT_SYMBOL(portals_run_upcall); EXPORT_SYMBOL(portals_run_lbug_upcall); EXPORT_SYMBOL(portals_nid2str); +EXPORT_SYMBOL(portals_id2str);