X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Flibcfs%2Fdebug.c;h=b7fd218df360d9d0a5ae2955eca8c4b5c2b13c21;hp=7ad93277d73d317530925cc58b97da4a23bbc954;hb=7e80985d02e71f02b9a99f91d6eb0e154ba56c85;hpb=30c3a18963d1d6d70175fbbbdd9554e1eb2fa40d diff --git a/lnet/libcfs/debug.c b/lnet/libcfs/debug.c index 7ad9327..b7fd218 100644 --- a/lnet/libcfs/debug.c +++ b/lnet/libcfs/debug.c @@ -24,524 +24,99 @@ # define EXPORT_SYMTAB #endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +# define DEBUG_SUBSYSTEM S_PORTALS -#include -#include -#include -#include -#include +#include +#include -# define DEBUG_SUBSYSTEM S_PORTALS +#include "tracefile.h" -#include -#include +unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_NAL); +EXPORT_SYMBOL(portal_subsystem_debug); -#define DEBUG_OVERFLOW 1024 -static char *debug_buf = NULL; -static unsigned long debug_size = 0; -static atomic_t debug_off_a = ATOMIC_INIT(0); -static int debug_wrapped; -wait_queue_head_t debug_ctlwq; -#define DAEMON_SND_SIZE (64 << 10) +unsigned int portal_debug = (D_WARNING | D_DLMTRACE | D_ERROR | D_EMERG | D_HA | + D_RPCTRACE | D_VFSTRACE); +EXPORT_SYMBOL(portal_debug); -/* - * used by the daemon to keep track the offset into debug_buffer for the next - * write to the file. Usually, the daemon is to write out buffer - * from debug_daemon_next_write upto debug_off - * variable usage - * Reader - portals_debug_msg() - * Writer - portals_debug_daemon() - * portals_debug_daemon_start() during daemon init time - * portals_debug_daemon_continue() to reset to debug_off - * portals_debug_clear_buffer() reset to debug_off for clear - * Note that *_start(), *_continue() & *clear_buffer() should serialized; - */ -static atomic_t debug_daemon_next_write; +unsigned int portal_printk; +EXPORT_SYMBOL(portal_printk); -/* - * A debug_daemon can be in following states - * stopped - stopped state means there is no debug_daemon running. - * accordingly, it must be in paused state - * a daemon is in !stopped && !paused state after - * "lctl debug_daemon start" creates debug_daemon successfully - * Variable Usage - * Reader - portals_debug_daemon() - * portals_debug_set_daemon() routines - * Writer - portals_debug_set_daemon() routines - * portals_debug_daemon() on IO error - * paused - a debug_daemon state is changed from !paused into paused - * when "lctl debug_daemon paused" is issued - * "lctl debug_daemon continue" gets a daemon into !paused mode - * Reader - portals_debug_set_daemon() routines - * portals_debug_msg() - * Writer - portals_debug_set_daemon() on init - * portals_debug_daemon() - * - * Daemon state diagram. - * (stopped, paused) - * | <-- debug_daemon start - * V - * (!stopped, !paused) - * | <-- debug_daemon pause - * V - * (!stopped, paused) - * | <-- debug_daemon continue - * V - * (!stopped, !paused) - * | <-- debug_daemon stop - * V - * (stopped, paused) - * Overlapped - this is a state when CDEBUG is too fast for the daemon to - * write out the debug_bufferr. That is, debug_off is to - * overlap debug_daemon_next_write; - * Reader - portals_debug_msg() - * Writer - portals_debug_msg() - */ +unsigned int portal_stack; +EXPORT_SYMBOL(portal_stack); -/* - * Description on Trace Daemon Synchronization - * - * Three categories of code are synchronizing between each other - * 1. lctl, portals_debug_set_daemon(), the user debug control code, - * as well as portals_debug_clear_buffer() - * 2. CDEBUG, portals_debug_msg(), the debug put messages routine - * 3. Daemon, portals_debug_daemon(), to write out debug log file - * - * - * Three different controls for synchronizations - * - * 1. debug_daemon_semaphore - * The usage of this semaphore is to serialize multiple lctl controls - * in manipulating debug daemon state. The semaphore serves as the - * gatekeeper to allow only one user control thread, at any giving time, - * to access debug daemon state and keeps the other user control requests - * in wait state until the current control request is serviced. - * - * 2. wait_queue_head_t lctl (paired with lctl_event flag) - * Lctl event is the event between portals_debug_set_daemon() and - * portals_debug_daemon(). Lctl is an indicator for portals_debug_daemon() - * to flush data out to file. portals_debug_daemon() is to use lctl event - * as signal channel to wakeup portals_debug_set_daemon() upon flush - * operation is done. - * - * Producer : - * portals_debug_daemon() uses to wake up - * portals_debug_set_daemon(), pause and stop, routines - * Consumer : - * portals_debug_set_daemon(), stop and pause operations, - * wait and sleep on the event - * - * 3. wait_queue_head_t daemon (paired with daemon_event flag) - * This is an event channel to wakeup portals_debug_daemon. Daemon - * wakes up to run whenever there is an event posted. Daemon handles - * 2 types of operations . 1. Writes data out to debug file, 2. Flushes - * file and terminates base on lctl event. - * File operation - - * Daemon is normally in a sleep state. - * Daemon is woken up through daemon event whenever CDEBUG is - * putting data over any 64K boundary. - * File flush and termination - - * On portals_debug_daemon_stop/pause() operations, lctl control - * is to wake up daemon through daemon event. - * - * We can't use sleep_on() and wake_up() to replace daemon event because - * portals_debug_daemon() must catch the wakeup operation posted by - * portals_debug_daemon_stop/pause(). Otherwise, stop and pause may - * stuck in lctl wait event. - * - * Producer : - * a. portals_debug_daemon_pause() and portals_debug_daemon_stop() - * uses the event to wake up portals_debug_daemon() - * b. portals_debug_msg() uses the event to wake up - * portals_debug_daemon() whenever the data output is acrossing - * a 64K bytes boundary. - * Consumer : - * portals_debug_daemon() wakes up upon daemon event. - * - * Sequence for portals_debug_daemon_stop() operation - * - * _Portals_debug_daemon_stop()_ _Daemon_ - * Wait_event(daemon) or running - * Paused = 1; - * Wakeup_event (daemon) - * Wait_event(lctl) - * Set force_flush flag if lctlevnt - * Flush data - * Wakeup_event (lctl) - * Wait_event(daemon) - * Stopped = 1; - * Wakeup_event (daemon) - * Wait_event(lctl) - * Exit daemon loop if (Stopped) - * Wakeup_event (lctl) - * Exit - * Return to user application - * - * - * _Portals_debug_msg()_ _Daemon_ - * Wait_event(daemon) or running - * If (WriteStart<64Kjournal_info; - current->journal_info = NULL; - sprintf(debug_file_name, "%s.%ld", debug_file_path, CURRENT_SECONDS); - file = filp_open(debug_file_name, O_CREAT|O_EXCL|O_RDWR, 0644); + snprintf(debug_file_name, sizeof(debug_file_path) - 1, + "%s.%ld.%ld", debug_file_path, cfs_time_current_sec(), (long)arg); + printk(KERN_ALERT "LustreError: dumping log to %s\n", debug_file_name); + tracefile_dump_all_pages(debug_file_name); - if (!file || IS_ERR(file)) { - CERROR("cannot open %s for dumping: %ld\n", debug_file_name, - PTR_ERR(file)); - GOTO(out, PTR_ERR(file)); - } else { - printk(KERN_ALERT "LustreError: dumping log to %s ... writing ...\n", - debug_file_name); - } - - debug_off = atomic_read(&debug_off_a); - oldfs = get_fs(); - set_fs(get_ds()); - if (debug_wrapped) { - rc = file->f_op->write(file, debug_buf + debug_off + 1, - debug_size-debug_off-1, &file->f_pos); - rc += file->f_op->write(file, debug_buf, debug_off + 1, - &file->f_pos); - } else { - rc = file->f_op->write(file, debug_buf, debug_off,&file->f_pos); - } - printk("LustreError: wrote %d bytes\n", rc); - set_fs(oldfs); - - rc = file->f_op->fsync(file, file->f_dentry, 1); - if (rc) - CERROR("sync returns %d\n", rc); - filp_close(file, 0); -out: - current->journal_info = journal_info; - wake_up(&debug_ctlwq); - return 0; + CFS_POP_JOURNAL; } -int portals_debug_daemon(void *arg) +int portals_debug_dumplog_thread(void *arg) { - struct file *file; - void *journal_info; - mm_segment_t oldfs; - unsigned long force_flush = 0; - unsigned long size, off, flags; - int rc; - - kportal_daemonize("ldebug_daemon"); + kportal_daemonize(""); reparent_to_init(); - journal_info = current->journal_info; - current->journal_info = NULL; - - file = filp_open(debug_daemon_file_path, - O_CREAT|O_TRUNC|O_RDWR|O_LARGEFILE, 0644); - - if (!file || IS_ERR(file)) { - CERROR("cannot open %s for logging", debug_daemon_file_path); - GOTO(out1, PTR_ERR(file)); - } - printk(KERN_INFO "daemon dumping log to %s\n", debug_daemon_file_path); - - debug_daemon_state.overlapped = 0; - debug_daemon_state.stopped = 0; - - spin_lock_irqsave(&portals_debug_lock, flags); - off = atomic_read(&debug_off_a) + 1; - if (debug_wrapped) - off = (off >= debug_size)? 0 : off; - else - off = 0; - atomic_set(&debug_daemon_next_write, off); - atomic_set(&debug_daemon_state.paused, 0); - spin_unlock_irqrestore(&portals_debug_lock, flags); - - oldfs = get_fs(); - set_fs(KERNEL_DS); - while (1) { - unsigned long ending; - unsigned long start, tail; - long delta; - - debug_daemon_state.daemon_event = 0; - - ending = atomic_read(&debug_off_a); - start = atomic_read(&debug_daemon_next_write); - - /* check if paused is imposed by lctl ? */ - force_flush = !debug_daemon_state.lctl_event; - - delta = ending - start; - tail = debug_size - start; - size = (delta >= 0) ? delta : tail; - while (size && (force_flush || (delta < 0) || - (size >= DAEMON_SND_SIZE))) { - if (daemon_file_size_limit) { - int ssize = daemon_file_size_limit - file->f_pos; - if (size > ssize) - size = ssize; - } - - rc = file->f_op->write(file, debug_buf+start, - size, &file->f_pos); - if (rc < 0) { - printk(KERN_ALERT "LustreError: Debug_daemon " - "write error %d\n", rc); - goto out; - } - start += rc; - delta = ending - start; - tail = debug_size - start; - if (tail == 0) - start = 0; - if (delta >= 0) - size = delta; - else - size = (tail == 0) ? ending : tail; - if (daemon_file_size_limit == file->f_pos) { - // file wrapped around - file->f_pos = 0; - } - } - atomic_set(&debug_daemon_next_write, start); - if (force_flush) { - rc = file->f_op->fsync(file, file->f_dentry, 1); - if (rc < 0) { - printk(KERN_ALERT "LustreError: Debug_daemon " - "sync error %d\n", rc); - goto out; - } - if (debug_daemon_state.stopped) - break; - debug_daemon_state.lctl_event = 1; - wake_up(&debug_daemon_state.lctl); - } - wait_event(debug_daemon_state.daemon, - debug_daemon_state.daemon_event); - } -out: - atomic_set(&debug_daemon_state.paused, 1); - debug_daemon_state.stopped = 1; - set_fs(oldfs); - filp_close(file, 0); - current->journal_info = journal_info; -out1: - debug_daemon_state.lctl_event = 1; - wake_up(&debug_daemon_state.lctl); + portals_debug_dumplog_internal(arg); + cfs_waitq_signal(&debug_ctlwq); return 0; } -void portals_debug_print(void) -{ - unsigned long dumplen = 64 * 1024; - char *start1, *start2; - char *end1, *end2; - unsigned long debug_off = atomic_read(&debug_off_a); - - start1 = debug_buf + debug_off - dumplen; - if (start1 < debug_buf) { - start1 += debug_size; - end1 = debug_buf + debug_size - 1; - start2 = debug_buf; - end2 = debug_buf + debug_off; - } else { - end1 = debug_buf + debug_off; - start2 = debug_buf + debug_off; - end2 = debug_buf + debug_off; - } - - while (start1 < end1) { - int count = MIN(1024, end1 - start1); - printk("LustreError: %*s", count, start1); - start1 += 1024; - } - while (start2 < end2) { - int count = MIN(1024, end2 - start2); - printk("LustreError: %*s", count, start2); - start2 += 1024; - } -} - void portals_debug_dumplog(void) { - int rc; + int rc; + cfs_waitlink_t wait; ENTRY; - init_waitqueue_head(&debug_ctlwq); - - rc = kernel_thread(portals_do_debug_dumplog, - NULL, CLONE_VM | CLONE_FS | CLONE_FILES); - if (rc < 0) { + /* we're being careful to ensure that the kernel thread is + * able to set our state to running as it exits before we + * get to schedule() */ + cfs_waitlink_init(&wait); + set_current_state(TASK_INTERRUPTIBLE); + cfs_waitq_add(&debug_ctlwq, &wait); + + rc = cfs_kernel_thread(portals_debug_dumplog_thread, + (void *)(long)cfs_curproc_pid(), + CLONE_VM | CLONE_FS | CLONE_FILES); + if (rc < 0) printk(KERN_ERR "LustreError: cannot start log dump thread: " "%d\n", rc); - return; - } - sleep_on(&debug_ctlwq); -} - -int portals_debug_daemon_start(char *file, unsigned int size) -{ - int rc; - - if (!debug_daemon_state.stopped) - return -EALREADY; - - if (file != NULL) - strncpy(debug_daemon_file_path, file, 1024); - - init_waitqueue_head(&debug_daemon_state.lctl); - init_waitqueue_head(&debug_daemon_state.daemon); - - daemon_file_size_limit = size << 20; - - debug_daemon_state.lctl_event = 0; - rc = kernel_thread(portals_debug_daemon, NULL, 0); - if (rc < 0) { - printk(KERN_ERR "LustreError: cannot start debug daemon thread\n"); - strncpy(debug_daemon_file_path, "\0", 1); - return rc; - } - wait_event(debug_daemon_state.lctl, debug_daemon_state.lctl_event); - return 0; -} - -int portals_debug_daemon_pause(void) -{ - if (atomic_read(&debug_daemon_state.paused)) - return -EALREADY; - - atomic_set(&debug_daemon_state.paused, 1); - debug_daemon_state.lctl_event = 0; - debug_daemon_state.daemon_event = 1; - wake_up(&debug_daemon_state.daemon); - wait_event(debug_daemon_state.lctl, debug_daemon_state.lctl_event); - return 0; -} - -int portals_debug_daemon_continue(void) -{ - if (!atomic_read(&debug_daemon_state.paused)) - return -EINVAL; - if (debug_daemon_state.stopped) - return -EINVAL; - - debug_daemon_state.overlapped = 0; - atomic_set(&debug_daemon_next_write, atomic_read(&debug_off_a)); - atomic_set(&debug_daemon_state.paused, 0); - return 0; -} - -int portals_debug_daemon_stop(void) -{ - if (debug_daemon_state.stopped) - return -EALREADY; - - if (!atomic_read(&debug_daemon_state.paused)) - portals_debug_daemon_pause(); - - debug_daemon_state.lctl_event = 0; - debug_daemon_state.stopped = 1; - - debug_daemon_state.daemon_event = 1; - wake_up(&debug_daemon_state.daemon); - wait_event(debug_daemon_state.lctl, debug_daemon_state.lctl_event); - - debug_daemon_file_path[0] = '\0'; - return 0; -} - -int portals_debug_set_daemon(unsigned int cmd, unsigned int length, - char *filename, unsigned int size) -{ - int rc = -EINVAL; + else + schedule(); - down(&debug_daemon_semaphore); - switch (cmd) { - case DEBUG_DAEMON_START: - if (length && (filename[length -1] != '\0')) { - CERROR("Invalid filename for debug_daemon\n"); - rc = -EINVAL; - break; - } - rc = portals_debug_daemon_start(filename, size); - break; - case DEBUG_DAEMON_STOP: - rc = portals_debug_daemon_stop(); - break; - case DEBUG_DAEMON_PAUSE: - rc = portals_debug_daemon_pause(); - break; - case DEBUG_DAEMON_CONTINUE: - rc = portals_debug_daemon_continue(); - break; - default: - CERROR("unknown set_daemon cmd\n"); - } - up(&debug_daemon_semaphore); - return rc; + /* be sure to teardown if kernel_thread() failed */ + cfs_waitq_del(&debug_ctlwq, &wait); + set_current_state(TASK_RUNNING); } +#ifdef PORTALS_DUMP_ON_PANIC static int panic_dumplog(struct notifier_block *self, unsigned long unused1, void *unused2) { + static int handled_panic; /* to avoid recursive calls to notifiers */ + if (handled_panic) return 0; else handled_panic = 1; if (in_interrupt()) { - portals_debug_print(); + trace_debug_print(); return 0; } @@ -556,85 +131,52 @@ static struct notifier_block lustre_panic_notifier = { next : NULL, priority : 10000 }; +#endif + +#ifdef CRAY_PORTALS +extern void *lus_portals_debug; +#endif int portals_debug_init(unsigned long bufsize) { - unsigned long debug_off = atomic_read(&debug_off_a); - if (debug_buf != NULL) - return -EALREADY; - - atomic_set(&debug_daemon_state.paused, 1); - debug_daemon_state.stopped = 1; - - debug_buf = vmalloc(bufsize + DEBUG_OVERFLOW); - if (debug_buf == NULL) - return -ENOMEM; - memset(debug_buf, 0, debug_size); - debug_wrapped = 0; - - //printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n", - //bufsize, debug_buf); - atomic_set(&debug_off_a, debug_off); + cfs_waitq_init(&debug_ctlwq); +#ifdef CRAY_PORTALS + lus_portals_debug = &portals_debug_msg; +#endif +#ifdef PORTALS_DUMP_ON_PANIC + /* This is currently disabled because it spews far too much to the + * console on the rare cases it is ever triggered. */ notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier); - debug_size = bufsize; - - return 0; +#endif + return tracefile_init(); } int portals_debug_cleanup(void) { + tracefile_exit(); +#ifdef PORTALS_DUMP_ON_PANIC notifier_chain_unregister(&panic_notifier_list, &lustre_panic_notifier); - if (debug_buf == NULL) - return -EINVAL; - - down(&debug_daemon_semaphore); - portals_debug_daemon_stop(); - - vfree(debug_buf); - atomic_set(&debug_off_a, 0); - up(&debug_daemon_semaphore); - +#endif +#ifdef CRAY_PORTALS + lus_portals_debug = NULL; +#endif return 0; } int portals_debug_clear_buffer(void) { - unsigned long flags; - unsigned long state; - - if (debug_buf == NULL) - return -EINVAL; - - down(&debug_daemon_semaphore); - state = atomic_read(&debug_daemon_state.paused); - if (!state) - portals_debug_daemon_pause(); - spin_lock_irqsave(&portals_debug_lock, flags); - atomic_set(&debug_off_a, 0); - debug_wrapped = 0; - atomic_set(&debug_daemon_next_write, 0); - debug_daemon_state.overlapped = 0; - spin_unlock_irqrestore(&portals_debug_lock, flags); - - if (!state) - atomic_set(&debug_daemon_state.paused, 0); - up(&debug_daemon_semaphore); - + trace_flush_pages(); return 0; } /* Debug markers, although printed by S_PORTALS - * should not be be marked as such. - */ + * should not be be marked as such. */ #undef DEBUG_SUBSYSTEM #define DEBUG_SUBSYSTEM S_UNDEFINED int portals_debug_mark_buffer(char *text) { - if (debug_buf == NULL) - return -EINVAL; - CDEBUG(D_TRACE,"***************************************************\n"); - CWARN("DEBUG MARKER: %s\n", text); + CDEBUG(D_WARNING, "DEBUG MARKER: %s\n", text); CDEBUG(D_TRACE,"***************************************************\n"); return 0; @@ -642,390 +184,68 @@ int portals_debug_mark_buffer(char *text) #undef DEBUG_SUBSYSTEM #define DEBUG_SUBSYSTEM S_PORTALS -/* this copies a snapshot of the debug buffer into an array of pages - * before doing the potentially blocking copy into userspace. it could - * be warning userspace if things wrap heavily while its off copying. */ -__s32 portals_debug_copy_to_user(char *buf, unsigned long len) -{ - int rc; - unsigned long total, debug_off, i, off, copied; - unsigned long flags; - struct page *page; - LIST_HEAD(my_pages); - struct list_head *pos, *n; - - if (len < debug_size) - return -ENOSPC; - - for (i = 0 ; i < debug_size; i += PAGE_SIZE) { - page = alloc_page(GFP_NOFS); - if (page == NULL) { - rc = -ENOMEM; - goto cleanup; - } - list_add(&page->list, &my_pages); - } - - spin_lock_irqsave(&portals_debug_lock, flags); - debug_off = atomic_read(&debug_off_a); - - /* Sigh. If the buffer is empty, then skip to the end. */ - if (debug_off == 0 && !debug_wrapped) { - spin_unlock_irqrestore(&portals_debug_lock, flags); - rc = 0; - goto cleanup; - } - - if (debug_wrapped) { - off = debug_off + 1; - total = debug_size; - } else { - off = 0; - total = debug_off; - } - copied = 0; - list_for_each(pos, &my_pages) { - unsigned long to_copy; - void *addr; - - page = list_entry(pos, struct page, list); - to_copy = min(total - off, PAGE_SIZE); - if (to_copy == 0) { - off = 0; - to_copy = min(debug_size - off, PAGE_SIZE); - } -finish_partial: - addr = kmap_atomic(page, KM_USER0); - memcpy(addr, debug_buf + off, to_copy); - kunmap_atomic(addr, KM_USER0); - copied += to_copy; - if (copied >= total) - break; - - off += to_copy; - if (off >= debug_size) { - off = 0; - if (to_copy != PAGE_SIZE) { - to_copy = PAGE_SIZE - to_copy; - goto finish_partial; - } - } - } - - spin_unlock_irqrestore(&portals_debug_lock, flags); - - off = 0; - list_for_each(pos, &my_pages) { - unsigned long to_copy; - page = list_entry(pos, struct page, list); - - to_copy = min(copied - off, PAGE_SIZE); - rc = copy_to_user(buf + off, kmap(page), to_copy); - kunmap(page); - if (rc) { - rc = -EFAULT; - goto cleanup; - } - off += to_copy; - if (off >= copied) - break; - } - rc = copied; - -cleanup: - list_for_each_safe(pos, n, &my_pages) { - page = list_entry(pos, struct page, list); - list_del(&page->list); - __free_page(page); - } - return rc; -} - -/* FIXME: I'm not very smart; someone smarter should make this better. */ -void -portals_debug_msg(int subsys, int mask, char *file, const char *fn, - const int line, unsigned long stack, char *format, ...) -{ - va_list ap; - unsigned long flags; - int max_nob; - int prefix_nob; - int msg_nob; - struct timeval tv; - unsigned long base_offset; - unsigned long debug_off; - - if (debug_buf == NULL) { - printk("LustreError: portals_debug_msg: debug_buf is NULL!\n"); - return; - } - - spin_lock_irqsave(&portals_debug_lock, flags); - debug_off = atomic_read(&debug_off_a); - if (!atomic_read(&debug_daemon_state.paused)) { - unsigned long available; - long delta; - long v = atomic_read(&debug_daemon_next_write); - - delta = debug_off - v; - available = (delta>=0) ? debug_size-delta : -delta; - // Check if we still have enough debug buffer for CDEBUG - if (available < DAEMON_SND_SIZE) { - /* Drop CDEBUG packets until enough debug_buffer is - * available */ - if (debug_daemon_state.overlapped) - goto out; - /* If this is the first time, leave a marker in the - * output */ - debug_daemon_state.overlapped = 1; - format = "DEBUG MARKER: Debug buffer overlapped\n"; - printk(KERN_ERR "LustreError: debug daemon buffer " - "overlapped\n"); - } else /* More space just became available */ - debug_daemon_state.overlapped = 0; - } - - max_nob = debug_size - debug_off + DEBUG_OVERFLOW; - if (max_nob <= 0) { - spin_unlock_irqrestore(&portals_debug_lock, flags); - printk("LustreError: logic error in portals_debug_msg: " - "< 0 bytes to write\n"); - return; - } - - /* NB since we pass a non-zero sized buffer (at least) on the first - * print, we can be assured that by the end of all the snprinting, - * we _do_ have a terminated buffer, even if our message got truncated. - */ - - do_gettimeofday(&tv); - - prefix_nob = snprintf(debug_buf + debug_off, max_nob, - "%06x:%06x:%d:%lu.%06lu:%lu:%d:", - subsys, mask, smp_processor_id(), - tv.tv_sec, tv.tv_usec, stack, current->pid); - max_nob -= prefix_nob; - - if(*(format + strlen(format) - 1) != '\n') - printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n", - file, line, fn); - -#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) - msg_nob = snprintf(debug_buf + debug_off + prefix_nob, max_nob, - "%d:(%s:%d:%s()) ", - current->thread.extern_pid, file, line, fn); -#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - msg_nob = snprintf(debug_buf + debug_off + prefix_nob, max_nob, - "%d:(%s:%d:%s()) ", - current->thread.mode.tt.extern_pid, file, line, fn); -#else - msg_nob = snprintf(debug_buf + debug_off + prefix_nob, max_nob, - "%d:(%s:%d:%s()) ", - current->pid, file, line, fn); -#endif - - va_start(ap, format); - msg_nob += vsnprintf(debug_buf + debug_off + prefix_nob + msg_nob, - max_nob, format, ap); - max_nob -= msg_nob; - va_end(ap); - - /* Print to console, while msg is contiguous in debug_buf */ - /* NB safely terminated see above */ - if ((mask & D_EMERG) != 0) - printk(KERN_EMERG "LustreError: %s", - debug_buf + debug_off + prefix_nob); - else if ((mask & D_ERROR) != 0) - printk(KERN_ERR "LustreError: %s", - debug_buf + debug_off + prefix_nob); - else if ((mask & D_WARNING) != 0) - printk(KERN_WARNING "Lustre: %s", - debug_buf + debug_off + prefix_nob); - else if (portal_printk) - printk("<%d>Lustre: %s", portal_printk, - debug_buf+debug_off+prefix_nob); - base_offset = debug_off & 0xFFFF; - - debug_off += prefix_nob + msg_nob; - if (debug_off > debug_size) { - memcpy(debug_buf, debug_buf + debug_size, - debug_off - debug_size + 1); - debug_off -= debug_size; - debug_wrapped = 1; - } - - atomic_set(&debug_off_a, debug_off); - if (!atomic_read(&debug_daemon_state.paused) && - ((base_offset+prefix_nob+msg_nob) >= DAEMON_SND_SIZE)) { - debug_daemon_state.daemon_event = 1; - wake_up(&debug_daemon_state.daemon); - } -out: - spin_unlock_irqrestore(&portals_debug_lock, flags); -} - void portals_debug_set_level(unsigned int debug_level) { - printk("Lustre: Setting portals debug level to %08x\n", debug_level); + printk(KERN_WARNING "Lustre: Setting portals debug level to %08x\n", + debug_level); portal_debug = debug_level; } -void portals_run_upcall(char **argv) +char *portals_nid2str(int nal, ptl_nid_t nid, char *str) { - int rc; - int argc; - char *envp[] = { - "HOME=/", - "PATH=/sbin:/bin:/usr/sbin:/usr/bin", - NULL}; - ENTRY; - - argv[0] = portals_upcall; - argc = 1; - while (argv[argc] != NULL) - argc++; - - LASSERT(argc >= 2); - - rc = USERMODEHELPER(argv[0], argv, envp); - if (rc < 0) { - CERROR("Error %d invoking portals upcall %s %s%s%s%s%s%s%s%s; " - "check /proc/sys/portals/upcall\n", - rc, argv[0], argv[1], - argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], - argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], - argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], - argc < 6 ? "" : ",..."); - } else { - CERROR("Invoked portals upcall %s %s%s%s%s%s%s%s%s\n", - argv[0], argv[1], - argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], - argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], - argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], - argc < 6 ? "" : ",..."); + if (nid == PTL_NID_ANY) { + snprintf(str, PTL_NALFMT_SIZE, "%s", "PTL_NID_ANY"); + return str; } -} - -void portals_run_lbug_upcall(char *file, const char *fn, const int line) -{ - char *argv[6]; - char buf[32]; - ENTRY; - snprintf (buf, sizeof buf, "%d", line); - - argv[1] = "LBUG"; - argv[2] = file; - argv[3] = (char *)fn; - argv[4] = buf; - argv[5] = NULL; - - portals_run_upcall (argv); -} - -char *portals_nid2str(int nal, ptl_nid_t nid, char *str) -{ switch(nal){ +/* XXX this could be a nal method of some sort, 'cept it's config + * dependent whether (say) socknal NIDs are actually IP addresses... */ +#if !CRAY_PORTALS case TCPNAL: /* userspace NAL */ - case SOCKNAL: - sprintf(str, "%u:%d.%d.%d.%d", (__u32)(nid >> 32), - HIPQUAD(nid)); + case IIBNAL: + case VIBNAL: + case OPENIBNAL: + case RANAL: + case SOCKNAL: { + /* HIPQUAD requires __u32, but we can't cast in it */ + __u32 nid32 = (__u32)nid; + if ((__u32)(nid >> 32)) { + snprintf(str, PTL_NALFMT_SIZE, "%u:%u.%u.%u.%u", + (__u32)(nid >> 32), HIPQUAD(nid32)); + } else { + snprintf(str, PTL_NALFMT_SIZE, "%u.%u.%u.%u", + HIPQUAD(nid32)); + } break; + } case QSWNAL: case GMNAL: - case IBNAL: - case SCIMACNAL: - sprintf(str, "%u:%u", (__u32)(nid >> 32), (__u32)nid); + case LONAL: + snprintf(str, PTL_NALFMT_SIZE, "%u:%u", + (__u32)(nid >> 32), (__u32)nid); break; +#endif default: - return NULL; + snprintf(str, PTL_NALFMT_SIZE, "?%x? %llx", + nal, (long long)nid); + break; } return str; } -#ifdef __KERNEL__ -#include -#if (LUSTRE_KERNEL_VERSION >= 30) -#warning "FIXME: remove workaround when l30 is widely used" -char stack_backtrace[LUSTRE_TRACE_SIZE]; -spinlock_t stack_backtrace_lock = SPIN_LOCK_UNLOCKED; - -#if defined(__arch_um__) - -extern int is_kernel_text_address(unsigned long addr); - -char *portals_debug_dumpstack(void) -{ - asm("int $3"); - return "dump stack"; -} - -#elif defined(__i386__) - -extern int is_kernel_text_address(unsigned long addr); -extern int lookup_symbol(unsigned long address, char *buf, int buflen); - -char *portals_debug_dumpstack(void) +char *portals_id2str(int nal, ptl_process_id_t id, char *str) { - unsigned long esp = current->thread.esp; - unsigned long *stack = (unsigned long *)&esp; - int size; - unsigned long addr; - char *buf = stack_backtrace; - char *pbuf = buf; - static char buffer[512]; - int rc = 0; - - /* User space on another CPU? */ - if ((esp ^ (unsigned long)current) & (PAGE_MASK<<1)){ - buf[0] = '\0'; - goto out; - } - - size = sprintf(pbuf, " Call Trace: "); - pbuf += size; - while (((long) stack & (THREAD_SIZE-1)) != 0) { - addr = *stack++; - if (is_kernel_text_address(addr)) { - rc = lookup_symbol(addr, buffer, 512); - if (rc == -ENOSYS) { - if (buf + LUSTRE_TRACE_SIZE <= pbuf + 12) - break; - size = sprintf(pbuf, "[<%08lx>] ", addr); - } else { - if (buf + LUSTRE_TRACE_SIZE - /* fix length + sizeof('\0') */ - <= pbuf + strlen(buffer) + 28 + 1) - break; - size = sprintf(pbuf, "([<%08lx>] %s (0x%p)) ", - addr, buffer, stack-1); - } - pbuf += size; - } - } -out: - return buf; -} - -#else /* !__arch_um__ && !__i386__ */ + int len; -char *portals_debug_dumpstack(void) -{ - char *buf = stack_backtrace; - buf[0] = '\0'; - return buf; + portals_nid2str(nal, id.nid, str); + len = strlen(str); + snprintf(str + len, PTL_NALFMT_SIZE - len, "-%u", id.pid); + return str; } -#endif /* __arch_um__ */ -EXPORT_SYMBOL(stack_backtrace_lock); -EXPORT_SYMBOL(portals_debug_dumpstack); -#endif /* LUSTRE_KERNEL_VERSION < 30 */ -#endif /* __KERNEL__ */ - EXPORT_SYMBOL(portals_debug_dumplog); -EXPORT_SYMBOL(portals_debug_msg); EXPORT_SYMBOL(portals_debug_set_level); -EXPORT_SYMBOL(portals_run_upcall); -EXPORT_SYMBOL(portals_run_lbug_upcall); EXPORT_SYMBOL(portals_nid2str); +EXPORT_SYMBOL(portals_id2str);