1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2004 Cluster File Systems, Inc.
5 * Author: Zach Brown <zab@clusterfs.com>
6 * Author: Phil Schwan <phil@clusterfs.com>
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #include <linux/kernel.h>
25 #include <linux/module.h>
26 #include <linux/init.h>
27 #include <linux/rwsem.h>
28 #include <linux/proc_fs.h>
29 #include <linux/file.h>
30 #include <linux/smp.h>
31 #include <linux/ctype.h>
32 #include <asm/uaccess.h>
34 #include <linux/mm_inline.h>
37 #define DEBUG_SUBSYSTEM S_PORTALS
39 #include <linux/kp30.h>
40 #include <linux/portals_compat25.h>
41 #include <linux/libcfs.h>
43 #define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
45 /* XXX move things up to the top, comment */
48 struct trace_cpu_data {
49 struct list_head tcd_pages;
50 unsigned long tcd_cur_pages;
52 struct list_head tcd_daemon_pages;
53 unsigned long tcd_cur_daemon_pages;
55 unsigned long tcd_max_pages;
56 int tcd_shutting_down;
58 char __pad[SMP_CACHE_BYTES];
59 } trace_data[NR_CPUS] __cacheline_aligned;
61 struct page_collection {
62 struct list_head pc_pages;
64 int pc_want_daemon_pages;
67 struct tracefiled_ctl {
68 struct completion tctl_start;
69 struct completion tctl_stop;
70 wait_queue_head_t tctl_waitq;
72 atomic_t tctl_shutdown;
75 #define TRACEFILE_SIZE (500 << 20)
76 static DECLARE_RWSEM(tracefile_sem);
77 static char *tracefile = NULL;
78 static long long tracefile_size = TRACEFILE_SIZE;
79 static struct tracefiled_ctl trace_tctl;
80 static DECLARE_MUTEX(trace_thread_sem);
81 static int thread_running = 0;
84 #define get_cpu() smp_processor_id()
85 #define put_cpu() do { } while (0)
88 #define trace_get_tcd(FLAGS) ({ \
89 struct trace_cpu_data *__ret; \
90 int __cpu = get_cpu(); \
91 local_irq_save(FLAGS); \
92 __ret = &trace_data[__cpu].tcd; \
96 #define trace_put_tcd(TCD, FLAGS) do { \
97 local_irq_restore(FLAGS); \
101 static void put_pages_on_daemon_list_on_cpu(void *info);
103 /* return a page that has 'len' bytes left at the end */
104 static struct page *trace_get_page(struct trace_cpu_data *tcd,
107 struct page *page = NULL;
109 if (len > PAGE_SIZE) {
110 printk(KERN_ERR "cowardly refusing to write %lu bytes in a "
115 if (!list_empty(&tcd->tcd_pages)) {
116 page = list_entry(tcd->tcd_pages.prev, struct page,
118 if (page->index + len <= PAGE_SIZE)
122 if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
123 page = alloc_page(GFP_ATOMIC);
125 /* the kernel should print a message for us. fall back
126 * to using the last page in the ring buffer. */
130 page->mapping = (void *)(long)smp_processor_id();
131 list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages);
132 tcd->tcd_cur_pages++;
134 if (tcd->tcd_cur_pages > 8 && thread_running) {
135 struct tracefiled_ctl *tctl = &trace_tctl;
136 wake_up(&tctl->tctl_waitq);
142 if (thread_running) {
143 int pgcount = tcd->tcd_cur_pages / 10;
144 struct page_collection pc;
145 struct list_head *pos, *tmp;
146 printk(KERN_WARNING "debug daemon buffer overflowed; discarding"
147 " 10%% of pages (%d)\n", pgcount + 1);
149 INIT_LIST_HEAD(&pc.pc_pages);
150 spin_lock_init(&pc.pc_lock);
152 list_for_each_safe(pos, tmp, &tcd->tcd_pages) {
158 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
159 list_del(&PAGE_LIST(page));
160 list_add_tail(&PAGE_LIST(page), &pc.pc_pages);
161 tcd->tcd_cur_pages--;
163 put_pages_on_daemon_list_on_cpu(&pc);
165 LASSERT(!list_empty(&tcd->tcd_pages));
167 page = list_entry(tcd->tcd_pages.next, struct page, PAGE_LIST_ENTRY);
170 list_del(&PAGE_LIST(page));
171 list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages);
175 static void print_to_console(struct ptldebug_header *hdr, int mask, char *buf,
176 int len, char *file, const char *fn)
178 char *prefix = NULL, *ptype = NULL;
180 if ((mask & D_EMERG) != 0) {
181 prefix = "LustreError";
183 } else if ((mask & D_ERROR) != 0) {
184 prefix = "LustreError";
186 } else if ((mask & D_WARNING) != 0) {
188 ptype = KERN_WARNING;
189 } else if (portal_printk) {
194 printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid,
195 hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf);
198 void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
199 const int line, unsigned long stack, char *format, ...)
201 struct trace_cpu_data *tcd;
202 struct ptldebug_header header;
204 char *debug_buf = format;
205 int known_size, needed = 85 /* average message length */, max_nob;
211 if (mask == D_PORTALS && !(portal_debug & D_PORTALS))
214 if (strchr(file, '/'))
215 file = strrchr(file, '/') + 1;
217 if (*(format + strlen(format) - 1) != '\n')
218 printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
221 tcd = trace_get_tcd(flags);
222 if (tcd->tcd_shutting_down)
225 do_gettimeofday(&tv);
227 header.ph_subsys = subsys;
228 header.ph_mask = mask;
229 header.ph_cpu_id = smp_processor_id();
230 header.ph_sec = (__u32)tv.tv_sec;
231 header.ph_usec = tv.tv_usec;
232 header.ph_stack = stack;
233 header.ph_pid = current->pid;
234 header.ph_line_num = line;
236 #if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
237 header.ph_extern_pid = current->thread.extern_pid;
238 #elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
239 header.ph_extern_pid = current->thread.mode.tt.extern_pid;
241 header.ph_extern_pid = 0;
244 known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls
247 page = trace_get_page(tcd, needed + known_size);
250 if (needed + known_size > PAGE_SIZE)
252 needed = strlen(format);
256 debug_buf = page_address(page) + page->index + known_size;
258 max_nob = PAGE_SIZE - page->index - known_size;
259 LASSERT(max_nob > 0);
260 va_start(ap, format);
261 needed = vsnprintf(debug_buf, max_nob, format, ap);
264 if (needed > max_nob) /* overflow. oh poop. */
267 header.ph_len = known_size + needed;
268 debug_buf = page_address(page) + page->index;
270 memcpy(debug_buf, &header, sizeof(header));
271 page->index += sizeof(header);
272 debug_buf += sizeof(header);
274 strcpy(debug_buf, file);
275 page->index += strlen(file) + 1;
276 debug_buf += strlen(file) + 1;
278 strcpy(debug_buf, fn);
279 page->index += strlen(fn) + 1;
280 debug_buf += strlen(fn) + 1;
282 page->index += needed;
283 if (page->index > PAGE_SIZE)
284 printk(KERN_EMERG "page->index == %lu in portals_debug_msg\n",
288 if ((mask & (D_EMERG | D_ERROR | D_WARNING)) || portal_printk)
289 print_to_console(&header, mask, debug_buf, needed, file, fn);
291 trace_put_tcd(tcd, flags);
293 EXPORT_SYMBOL(portals_debug_msg);
295 static void collect_pages_on_cpu(void *info)
297 struct trace_cpu_data *tcd;
299 struct page_collection *pc = info;
301 tcd = trace_get_tcd(flags);
303 spin_lock(&pc->pc_lock);
304 list_splice(&tcd->tcd_pages, &pc->pc_pages);
305 INIT_LIST_HEAD(&tcd->tcd_pages);
306 tcd->tcd_cur_pages = 0;
307 if (pc->pc_want_daemon_pages) {
308 list_splice(&tcd->tcd_daemon_pages, &pc->pc_pages);
309 INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
310 tcd->tcd_cur_daemon_pages = 0;
312 spin_unlock(&pc->pc_lock);
314 trace_put_tcd(tcd, flags);
317 static void collect_pages(struct page_collection *pc)
319 /* needs to be fixed up for preempt */
320 INIT_LIST_HEAD(&pc->pc_pages);
321 collect_pages_on_cpu(pc);
322 smp_call_function(collect_pages_on_cpu, pc, 0, 1);
325 static void put_pages_back_on_cpu(void *info)
327 struct page_collection *pc = info;
328 struct trace_cpu_data *tcd;
329 struct list_head *pos, *tmp, *cur_head;
332 tcd = trace_get_tcd(flags);
334 cur_head = tcd->tcd_pages.next;
336 spin_lock(&pc->pc_lock);
337 list_for_each_safe(pos, tmp, &pc->pc_pages) {
340 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
341 LASSERT(page->index <= PAGE_SIZE);
342 LASSERT(page_count(page) > 0);
344 if ((unsigned long)page->mapping != smp_processor_id())
347 list_del(&PAGE_LIST(page));
348 list_add_tail(&PAGE_LIST(page), cur_head);
349 tcd->tcd_cur_pages++;
351 spin_unlock(&pc->pc_lock);
353 trace_put_tcd(tcd, flags);
356 static void put_pages_back(struct page_collection *pc)
358 /* needs to be fixed up for preempt */
359 put_pages_back_on_cpu(pc);
360 smp_call_function(put_pages_back_on_cpu, pc, 0, 1);
363 /* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that
364 * we have a good amount of data at all times for dumping during an LBUG, even
365 * if we have been steadily writing (and otherwise discarding) pages via the
367 static void put_pages_on_daemon_list_on_cpu(void *info)
369 struct page_collection *pc = info;
370 struct trace_cpu_data *tcd;
371 struct list_head *pos, *tmp;
374 tcd = trace_get_tcd(flags);
376 spin_lock(&pc->pc_lock);
377 list_for_each_safe(pos, tmp, &pc->pc_pages) {
380 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
381 LASSERT(page->index <= PAGE_SIZE);
382 LASSERT(page_count(page) > 0);
383 if ((unsigned long)page->mapping != smp_processor_id())
386 list_del(&PAGE_LIST(page));
387 list_add_tail(&PAGE_LIST(page), &tcd->tcd_daemon_pages);
388 tcd->tcd_cur_daemon_pages++;
390 if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
391 LASSERT(!list_empty(&tcd->tcd_daemon_pages));
392 page = list_entry(tcd->tcd_daemon_pages.next,
393 struct page, PAGE_LIST_ENTRY);
395 LASSERT(page->index <= PAGE_SIZE);
396 LASSERT(page_count(page) > 0);
399 list_del(&PAGE_LIST(page));
400 page->mapping = NULL;
402 tcd->tcd_cur_daemon_pages--;
405 spin_unlock(&pc->pc_lock);
407 trace_put_tcd(tcd, flags);
410 static void put_pages_on_daemon_list(struct page_collection *pc)
412 put_pages_on_daemon_list_on_cpu(pc);
413 smp_call_function(put_pages_on_daemon_list_on_cpu, pc, 0, 1);
416 void trace_debug_print(void)
418 struct page_collection pc;
419 struct list_head *pos, *tmp;
421 spin_lock_init(&pc.pc_lock);
424 list_for_each_safe(pos, tmp, &pc.pc_pages) {
428 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
429 LASSERT(page->index <= PAGE_SIZE);
430 LASSERT(page_count(page) > 0);
432 p = page_address(page);
433 while (p < ((char *)page_address(page) + PAGE_SIZE)) {
434 struct ptldebug_header *hdr;
439 p += strlen(file) + 1;
442 len = hdr->ph_len - (p - (char *)hdr);
444 print_to_console(hdr, D_EMERG, p, len, file, fn);
447 list_del(&PAGE_LIST(page));
448 page->mapping = NULL;
453 int tracefile_dump_all_pages(char *filename)
455 struct page_collection pc;
457 struct list_head *pos, *tmp;
461 down_write(&tracefile_sem);
463 filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600);
466 printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
471 spin_lock_init(&pc.pc_lock);
472 pc.pc_want_daemon_pages = 1;
474 if (list_empty(&pc.pc_pages)) {
479 /* ok, for now, just write the pages. in the future we'll be building
480 * iobufs with the pages and calling generic_direct_IO */
483 list_for_each_safe(pos, tmp, &pc.pc_pages) {
486 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
487 LASSERT(page->index <= PAGE_SIZE);
488 LASSERT(page_count(page) > 0);
490 rc = filp->f_op->write(filp, page_address(page), page->index,
492 if (rc != page->index) {
493 printk(KERN_WARNING "wanted to write %lu but wrote "
494 "%d\n", page->index, rc);
498 list_del(&PAGE_LIST(page));
499 page->mapping = NULL;
503 rc = filp->f_op->fsync(filp, filp->f_dentry, 1);
505 printk(KERN_ERR "sync returns %d\n", rc);
509 up_write(&tracefile_sem);
513 void trace_flush_pages(void)
515 struct page_collection pc;
516 struct list_head *pos, *tmp;
518 spin_lock_init(&pc.pc_lock);
521 list_for_each_safe(pos, tmp, &pc.pc_pages) {
524 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
525 LASSERT(page->index <= PAGE_SIZE);
526 LASSERT(page_count(page) > 0);
528 list_del(&PAGE_LIST(page));
529 page->mapping = NULL;
534 int trace_dk(struct file *file, const char *buffer, unsigned long count,
541 name = kmalloc(count + 1, GFP_KERNEL);
545 if (copy_from_user(name, buffer, count)) {
550 if (name[0] != '/') {
555 /* be nice and strip out trailing '\n' */
556 for (off = count ; off > 2 && isspace(name[off - 1]); off--)
560 rc = tracefile_dump_all_pages(name);
566 EXPORT_SYMBOL(trace_dk);
568 static int tracefiled(void *arg)
570 struct page_collection pc;
571 struct tracefiled_ctl *tctl = arg;
572 struct list_head *pos, *tmp;
573 struct ptldebug_header *hdr;
579 /* we're started late enough that we pick up init's fs context */
580 /* this is so broken in uml? what on earth is going on? */
581 kportal_daemonize("ktracefiled");
584 spin_lock_init(&pc.pc_lock);
585 complete(&tctl->tctl_start);
590 init_waitqueue_entry(&__wait, current);
591 add_wait_queue(&tctl->tctl_waitq, &__wait);
592 set_current_state(TASK_INTERRUPTIBLE);
593 schedule_timeout(HZ);
594 remove_wait_queue(&tctl->tctl_waitq, &__wait);
596 if (atomic_read(&tctl->tctl_shutdown))
599 pc.pc_want_daemon_pages = 0;
601 if (list_empty(&pc.pc_pages))
605 down_read(&tracefile_sem);
606 if (tracefile != NULL) {
607 filp = filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE,
610 printk("couldn't open %s: %ld\n", tracefile,
615 up_read(&tracefile_sem);
617 put_pages_on_daemon_list(&pc);
624 /* mark the first header, so we can sort in chunks */
625 page = list_entry(pc.pc_pages.next, struct page,
627 LASSERT(page->index <= PAGE_SIZE);
628 LASSERT(page_count(page) > 0);
630 hdr = page_address(page);
631 hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
633 list_for_each_safe(pos, tmp, &pc.pc_pages) {
635 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
636 LASSERT(page->index <= PAGE_SIZE);
637 LASSERT(page_count(page) > 0);
639 if (f_pos >= tracefile_size)
641 else if (f_pos > filp->f_dentry->d_inode->i_size)
642 f_pos = filp->f_dentry->d_inode->i_size;
644 rc = filp->f_op->write(filp, page_address(page),
645 page->index, &f_pos);
646 if (rc != page->index) {
647 printk(KERN_WARNING "wanted to write %lu but "
648 "wrote %d\n", page->index, rc);
655 put_pages_on_daemon_list(&pc);
657 complete(&tctl->tctl_stop);
661 int trace_start_thread(void)
663 struct tracefiled_ctl *tctl = &trace_tctl;
666 down(&trace_thread_sem);
670 init_completion(&tctl->tctl_start);
671 init_completion(&tctl->tctl_stop);
672 init_waitqueue_head(&tctl->tctl_waitq);
673 atomic_set(&tctl->tctl_shutdown, 0);
675 if (kernel_thread(tracefiled, tctl, 0) < 0) {
680 wait_for_completion(&tctl->tctl_start);
683 up(&trace_thread_sem);
687 void trace_stop_thread(void)
689 struct tracefiled_ctl *tctl = &trace_tctl;
691 down(&trace_thread_sem);
692 if (thread_running) {
693 printk(KERN_INFO "Shutting down debug daemon thread...\n");
694 atomic_set(&tctl->tctl_shutdown, 1);
695 wait_for_completion(&tctl->tctl_stop);
698 up(&trace_thread_sem);
701 int trace_write_daemon_file(struct file *file, const char *buffer,
702 unsigned long count, void *data)
708 name = kmalloc(count + 1, GFP_KERNEL);
712 if (copy_from_user(name, buffer, count)) {
717 /* be nice and strip out trailing '\n' */
718 for (off = count ; off > 2 && isspace(name[off - 1]); off--)
723 down_write(&tracefile_sem);
724 if (strcmp(name, "stop") == 0) {
728 } else if (strncmp(name, "size=", 5) == 0) {
729 tracefile_size = simple_strtoul(name + 5, NULL, 0);
730 if (tracefile_size < 10 || tracefile_size > 20480)
731 tracefile_size = TRACEFILE_SIZE;
733 tracefile_size <<= 20;
737 if (name[0] != '/') {
742 if (tracefile != NULL)
748 printk(KERN_INFO "Lustre: debug daemon will attempt to start writing "
749 "to %s (%lukB max)\n", tracefile, (long)(tracefile_size >> 10));
751 trace_start_thread();
754 up_write(&tracefile_sem);
761 int trace_read_daemon_file(char *page, char **start, off_t off, int count,
762 int *eof, void *data)
766 down_read(&tracefile_sem);
767 rc = snprintf(page, count, "%s", tracefile);
768 up_read(&tracefile_sem);
773 int trace_write_debug_mb(struct file *file, const char *buffer,
774 unsigned long count, void *data)
780 if (count >= sizeof(string)) {
781 printk(KERN_ERR "Lustre: value too large (length %lu bytes)\n",
786 if (copy_from_user(string, buffer, count))
789 max = simple_strtoul(string, NULL, 0);
793 if (max > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5 || max >= 512) {
794 printk(KERN_ERR "Lustre: Refusing to set debug buffer size to "
795 "%dMB, which is more than 80%% of available RAM (%lu)\n",
796 max, (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5);
802 for (i = 0; i < NR_CPUS; i++) {
803 struct trace_cpu_data *tcd;
804 tcd = &trace_data[i].tcd;
805 tcd->tcd_max_pages = max << (20 - PAGE_SHIFT);
810 int trace_read_debug_mb(char *page, char **start, off_t off, int count,
811 int *eof, void *data)
813 struct trace_cpu_data *tcd;
817 tcd = trace_get_tcd(flags);
818 rc = snprintf(page, count, "%lu\n",
819 (tcd->tcd_max_pages >> (20 - PAGE_SHIFT)) * smp_num_cpus);
820 trace_put_tcd(tcd, flags);
825 int tracefile_init(void)
827 struct trace_cpu_data *tcd;
830 for (i = 0; i < NR_CPUS; i++) {
831 tcd = &trace_data[i].tcd;
832 INIT_LIST_HEAD(&tcd->tcd_pages);
833 INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
834 tcd->tcd_cur_pages = 0;
835 tcd->tcd_cur_daemon_pages = 0;
836 tcd->tcd_max_pages = TCD_MAX_PAGES;
837 tcd->tcd_shutting_down = 0;
842 static void trace_cleanup_on_cpu(void *info)
844 struct trace_cpu_data *tcd;
845 struct list_head *pos, *tmp;
848 tcd = trace_get_tcd(flags);
850 tcd->tcd_shutting_down = 1;
852 list_for_each_safe(pos, tmp, &tcd->tcd_pages) {
855 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
856 LASSERT(page->index <= PAGE_SIZE);
857 LASSERT(page_count(page) > 0);
859 list_del(&PAGE_LIST(page));
860 page->mapping = NULL;
863 tcd->tcd_cur_pages = 0;
865 trace_put_tcd(tcd, flags);
868 static void trace_cleanup(void)
870 struct page_collection pc;
872 INIT_LIST_HEAD(&pc.pc_pages);
873 spin_lock_init(&pc.pc_lock);
875 trace_cleanup_on_cpu(&pc);
876 smp_call_function(trace_cleanup_on_cpu, &pc, 0, 1);
879 void tracefile_exit(void)