1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2004 Cluster File Systems, Inc.
5 * Author: Zach Brown <zab@clusterfs.com>
6 * Author: Phil Schwan <phil@clusterfs.com>
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #include <linux/kernel.h>
25 #include <linux/module.h>
26 #include <linux/init.h>
27 #include <linux/rwsem.h>
28 #include <linux/proc_fs.h>
29 #include <linux/file.h>
30 #include <linux/smp.h>
31 #include <linux/ctype.h>
32 #include <asm/uaccess.h>
34 #include <linux/mm_inline.h>
37 #define DEBUG_SUBSYSTEM S_PORTALS
39 #include <linux/kp30.h>
40 #include <linux/portals_compat25.h>
41 #include <linux/libcfs.h>
43 #define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
45 /* XXX move things up to the top, comment */
48 struct trace_cpu_data {
49 struct list_head tcd_pages;
50 unsigned long tcd_cur_pages;
52 struct list_head tcd_daemon_pages;
53 unsigned long tcd_cur_daemon_pages;
55 unsigned long tcd_max_pages;
56 int tcd_shutting_down;
58 char __pad[SMP_CACHE_BYTES];
59 } trace_data[NR_CPUS] __cacheline_aligned;
61 struct page_collection {
62 struct list_head pc_pages;
64 int pc_want_daemon_pages;
67 struct tracefiled_ctl {
68 struct completion tctl_start;
69 struct completion tctl_stop;
70 wait_queue_head_t tctl_waitq;
72 atomic_t tctl_shutdown;
75 #define TRACEFILE_SIZE (500 << 20)
76 static DECLARE_RWSEM(tracefile_sem);
77 static char *tracefile = NULL;
78 static long long tracefile_size = TRACEFILE_SIZE;
79 static struct tracefiled_ctl trace_tctl;
80 static DECLARE_MUTEX(trace_thread_sem);
81 static int thread_running = 0;
84 #define get_cpu() smp_processor_id()
85 #define put_cpu() do { } while (0)
88 #define trace_get_tcd(FLAGS) ({ \
89 struct trace_cpu_data *__ret; \
90 int __cpu = get_cpu(); \
91 local_irq_save(FLAGS); \
92 __ret = &trace_data[__cpu].tcd; \
96 #define trace_put_tcd(TCD, FLAGS) do { \
97 local_irq_restore(FLAGS); \
101 static void put_pages_on_daemon_list_on_cpu(void *info);
103 /* return a page that has 'len' bytes left at the end */
104 static struct page *trace_get_page(struct trace_cpu_data *tcd,
107 struct page *page = NULL;
109 if (len > PAGE_SIZE) {
110 printk(KERN_ERR "cowardly refusing to write %lu bytes in a "
115 if (!list_empty(&tcd->tcd_pages)) {
116 page = list_entry(tcd->tcd_pages.prev, struct page,
118 if (page->index + len <= PAGE_SIZE)
122 if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
123 page = alloc_page(GFP_ATOMIC);
125 /* the kernel should print a message for us. fall back
126 * to using the last page in the ring buffer. */
130 page->mapping = (void *)(long)smp_processor_id();
131 list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages);
132 tcd->tcd_cur_pages++;
134 if (tcd->tcd_cur_pages > 8 && thread_running) {
135 struct tracefiled_ctl *tctl = &trace_tctl;
136 wake_up(&tctl->tctl_waitq);
142 if (thread_running) {
143 int pgcount = tcd->tcd_cur_pages / 10;
144 struct page_collection pc;
145 struct list_head *pos, *tmp;
146 printk(KERN_WARNING "debug daemon buffer overflowed; discarding"
147 " 10%% of pages (%d)\n", pgcount + 1);
149 INIT_LIST_HEAD(&pc.pc_pages);
150 spin_lock_init(&pc.pc_lock);
152 list_for_each_safe(pos, tmp, &tcd->tcd_pages) {
158 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
159 list_del(&PAGE_LIST(page));
160 list_add_tail(&PAGE_LIST(page), &pc.pc_pages);
161 tcd->tcd_cur_pages--;
163 put_pages_on_daemon_list_on_cpu(&pc);
165 LASSERT(!list_empty(&tcd->tcd_pages));
167 page = list_entry(tcd->tcd_pages.next, struct page, PAGE_LIST_ENTRY);
170 list_del(&PAGE_LIST(page));
171 list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages);
175 static void print_to_console(struct ptldebug_header *hdr, int mask, char *buf,
176 int len, char *file, const char *fn)
178 char *prefix = NULL, *ptype = NULL;
180 if ((mask & D_EMERG) != 0) {
181 prefix = "LustreError";
183 } else if ((mask & D_ERROR) != 0) {
184 prefix = "LustreError";
186 } else if ((mask & D_WARNING) != 0) {
188 ptype = KERN_WARNING;
189 } else if (portal_printk) {
194 printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid,
195 hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf);
198 void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
199 const int line, unsigned long stack, char *format, ...)
201 struct trace_cpu_data *tcd;
202 struct ptldebug_header header;
204 char *debug_buf = format;
205 int known_size, needed = 85 /* average message length */, max_nob;
210 if (strchr(file, '/'))
211 file = strrchr(file, '/') + 1;
213 if (*(format + strlen(format) - 1) != '\n')
214 printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
217 tcd = trace_get_tcd(flags);
218 if (tcd->tcd_shutting_down)
221 do_gettimeofday(&tv);
223 header.ph_subsys = subsys;
224 header.ph_mask = mask;
225 header.ph_cpu_id = smp_processor_id();
226 header.ph_sec = (__u32)tv.tv_sec;
227 header.ph_usec = tv.tv_usec;
228 header.ph_stack = stack;
229 header.ph_pid = current->pid;
230 header.ph_line_num = line;
232 #if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
233 header.ph_extern_pid = current->thread.extern_pid;
234 #elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
235 header.ph_extern_pid = current->thread.mode.tt.extern_pid;
237 header.ph_extern_pid = 0;
240 known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls
243 page = trace_get_page(tcd, needed + known_size);
246 if (needed + known_size > PAGE_SIZE)
248 needed = strlen(format);
252 debug_buf = page_address(page) + page->index + known_size;
254 max_nob = PAGE_SIZE - page->index - known_size;
255 LASSERT(max_nob > 0);
256 va_start(ap, format);
257 needed = vsnprintf(debug_buf, max_nob, format, ap);
260 if (needed > max_nob) /* overflow. oh poop. */
263 header.ph_len = known_size + needed;
264 debug_buf = page_address(page) + page->index;
266 memcpy(debug_buf, &header, sizeof(header));
267 page->index += sizeof(header);
268 debug_buf += sizeof(header);
270 strcpy(debug_buf, file);
271 page->index += strlen(file) + 1;
272 debug_buf += strlen(file) + 1;
274 strcpy(debug_buf, fn);
275 page->index += strlen(fn) + 1;
276 debug_buf += strlen(fn) + 1;
278 page->index += needed;
279 if (page->index > PAGE_SIZE)
280 printk(KERN_EMERG "page->index == %lu in portals_debug_msg\n",
284 if ((mask & (D_EMERG | D_ERROR | D_WARNING)) || portal_printk)
285 print_to_console(&header, mask, debug_buf, needed, file, fn);
287 trace_put_tcd(tcd, flags);
289 EXPORT_SYMBOL(portals_debug_msg);
291 static void collect_pages_on_cpu(void *info)
293 struct trace_cpu_data *tcd;
295 struct page_collection *pc = info;
297 tcd = trace_get_tcd(flags);
299 spin_lock(&pc->pc_lock);
300 list_splice(&tcd->tcd_pages, &pc->pc_pages);
301 INIT_LIST_HEAD(&tcd->tcd_pages);
302 tcd->tcd_cur_pages = 0;
303 if (pc->pc_want_daemon_pages) {
304 list_splice(&tcd->tcd_daemon_pages, &pc->pc_pages);
305 INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
306 tcd->tcd_cur_daemon_pages = 0;
308 spin_unlock(&pc->pc_lock);
310 trace_put_tcd(tcd, flags);
313 static void collect_pages(struct page_collection *pc)
315 /* needs to be fixed up for preempt */
316 INIT_LIST_HEAD(&pc->pc_pages);
317 collect_pages_on_cpu(pc);
318 smp_call_function(collect_pages_on_cpu, pc, 0, 1);
321 static void put_pages_back_on_cpu(void *info)
323 struct page_collection *pc = info;
324 struct trace_cpu_data *tcd;
325 struct list_head *pos, *tmp, *cur_head;
328 tcd = trace_get_tcd(flags);
330 cur_head = tcd->tcd_pages.next;
332 spin_lock(&pc->pc_lock);
333 list_for_each_safe(pos, tmp, &pc->pc_pages) {
336 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
337 LASSERT(page->index <= PAGE_SIZE);
338 LASSERT(page_count(page) > 0);
340 if ((unsigned long)page->mapping != smp_processor_id())
343 list_del(&PAGE_LIST(page));
344 list_add_tail(&PAGE_LIST(page), cur_head);
345 tcd->tcd_cur_pages++;
347 spin_unlock(&pc->pc_lock);
349 trace_put_tcd(tcd, flags);
352 static void put_pages_back(struct page_collection *pc)
354 /* needs to be fixed up for preempt */
355 put_pages_back_on_cpu(pc);
356 smp_call_function(put_pages_back_on_cpu, pc, 0, 1);
359 /* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that
360 * we have a good amount of data at all times for dumping during an LBUG, even
361 * if we have been steadily writing (and otherwise discarding) pages via the
363 static void put_pages_on_daemon_list_on_cpu(void *info)
365 struct page_collection *pc = info;
366 struct trace_cpu_data *tcd;
367 struct list_head *pos, *tmp;
370 tcd = trace_get_tcd(flags);
372 spin_lock(&pc->pc_lock);
373 list_for_each_safe(pos, tmp, &pc->pc_pages) {
376 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
377 LASSERT(page->index <= PAGE_SIZE);
378 LASSERT(page_count(page) > 0);
379 if ((unsigned long)page->mapping != smp_processor_id())
382 list_del(&PAGE_LIST(page));
383 list_add_tail(&PAGE_LIST(page), &tcd->tcd_daemon_pages);
384 tcd->tcd_cur_daemon_pages++;
386 if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
387 LASSERT(!list_empty(&tcd->tcd_daemon_pages));
388 page = list_entry(tcd->tcd_daemon_pages.next,
389 struct page, PAGE_LIST_ENTRY);
391 LASSERT(page->index <= PAGE_SIZE);
392 LASSERT(page_count(page) > 0);
395 list_del(&PAGE_LIST(page));
396 page->mapping = NULL;
398 tcd->tcd_cur_daemon_pages--;
401 spin_unlock(&pc->pc_lock);
403 trace_put_tcd(tcd, flags);
406 static void put_pages_on_daemon_list(struct page_collection *pc)
408 put_pages_on_daemon_list_on_cpu(pc);
409 smp_call_function(put_pages_on_daemon_list_on_cpu, pc, 0, 1);
412 void trace_debug_print(void)
414 struct page_collection pc;
415 struct list_head *pos, *tmp;
417 spin_lock_init(&pc.pc_lock);
420 list_for_each_safe(pos, tmp, &pc.pc_pages) {
424 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
425 LASSERT(page->index <= PAGE_SIZE);
426 LASSERT(page_count(page) > 0);
428 p = page_address(page);
429 while (p < ((char *)page_address(page) + PAGE_SIZE)) {
430 struct ptldebug_header *hdr;
435 p += strlen(file) + 1;
438 len = hdr->ph_len - (p - (char *)hdr);
440 print_to_console(hdr, D_EMERG, p, len, file, fn);
443 list_del(&PAGE_LIST(page));
444 page->mapping = NULL;
449 int tracefile_dump_all_pages(char *filename)
451 struct page_collection pc;
453 struct list_head *pos, *tmp;
457 down_write(&tracefile_sem);
459 filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600);
462 printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
467 spin_lock_init(&pc.pc_lock);
468 pc.pc_want_daemon_pages = 1;
470 if (list_empty(&pc.pc_pages)) {
475 /* ok, for now, just write the pages. in the future we'll be building
476 * iobufs with the pages and calling generic_direct_IO */
479 list_for_each_safe(pos, tmp, &pc.pc_pages) {
482 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
483 LASSERT(page->index <= PAGE_SIZE);
484 LASSERT(page_count(page) > 0);
486 rc = filp->f_op->write(filp, page_address(page), page->index,
488 if (rc != page->index) {
489 printk(KERN_WARNING "wanted to write %lu but wrote "
490 "%d\n", page->index, rc);
494 list_del(&PAGE_LIST(page));
495 page->mapping = NULL;
499 rc = filp->f_op->fsync(filp, filp->f_dentry, 1);
501 printk(KERN_ERR "sync returns %d\n", rc);
505 up_write(&tracefile_sem);
509 void trace_flush_pages(void)
511 struct page_collection pc;
512 struct list_head *pos, *tmp;
514 spin_lock_init(&pc.pc_lock);
517 list_for_each_safe(pos, tmp, &pc.pc_pages) {
520 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
521 LASSERT(page->index <= PAGE_SIZE);
522 LASSERT(page_count(page) > 0);
524 list_del(&PAGE_LIST(page));
525 page->mapping = NULL;
530 int trace_dk(struct file *file, const char *buffer, unsigned long count,
537 name = kmalloc(count + 1, GFP_KERNEL);
541 if (copy_from_user(name, buffer, count)) {
546 if (name[0] != '/') {
551 /* be nice and strip out trailing '\n' */
552 for (off = count ; off > 2 && isspace(name[off - 1]); off--)
556 rc = tracefile_dump_all_pages(name);
562 EXPORT_SYMBOL(trace_dk);
564 static int tracefiled(void *arg)
566 struct page_collection pc;
567 struct tracefiled_ctl *tctl = arg;
568 struct list_head *pos, *tmp;
569 struct ptldebug_header *hdr;
575 /* we're started late enough that we pick up init's fs context */
576 /* this is so broken in uml? what on earth is going on? */
577 kportal_daemonize("ktracefiled");
580 spin_lock_init(&pc.pc_lock);
581 complete(&tctl->tctl_start);
586 init_waitqueue_entry(&__wait, current);
587 add_wait_queue(&tctl->tctl_waitq, &__wait);
588 set_current_state(TASK_INTERRUPTIBLE);
589 schedule_timeout(HZ);
590 remove_wait_queue(&tctl->tctl_waitq, &__wait);
592 if (atomic_read(&tctl->tctl_shutdown))
595 pc.pc_want_daemon_pages = 0;
597 if (list_empty(&pc.pc_pages))
601 down_read(&tracefile_sem);
602 if (tracefile != NULL) {
603 filp = filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE,
606 printk("couldn't open %s: %ld\n", tracefile,
611 up_read(&tracefile_sem);
613 put_pages_on_daemon_list(&pc);
620 /* mark the first header, so we can sort in chunks */
621 page = list_entry(pc.pc_pages.next, struct page,
623 LASSERT(page->index <= PAGE_SIZE);
624 LASSERT(page_count(page) > 0);
626 hdr = page_address(page);
627 hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
629 list_for_each_safe(pos, tmp, &pc.pc_pages) {
631 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
632 LASSERT(page->index <= PAGE_SIZE);
633 LASSERT(page_count(page) > 0);
635 if (f_pos >= tracefile_size)
637 else if (f_pos > filp->f_dentry->d_inode->i_size)
638 f_pos = filp->f_dentry->d_inode->i_size;
640 rc = filp->f_op->write(filp, page_address(page),
641 page->index, &f_pos);
642 if (rc != page->index) {
643 printk(KERN_WARNING "wanted to write %lu but "
644 "wrote %d\n", page->index, rc);
651 put_pages_on_daemon_list(&pc);
653 complete(&tctl->tctl_stop);
657 int trace_start_thread(void)
659 struct tracefiled_ctl *tctl = &trace_tctl;
662 down(&trace_thread_sem);
666 init_completion(&tctl->tctl_start);
667 init_completion(&tctl->tctl_stop);
668 init_waitqueue_head(&tctl->tctl_waitq);
669 atomic_set(&tctl->tctl_shutdown, 0);
671 if (kernel_thread(tracefiled, tctl, 0) < 0) {
676 wait_for_completion(&tctl->tctl_start);
679 up(&trace_thread_sem);
683 void trace_stop_thread(void)
685 struct tracefiled_ctl *tctl = &trace_tctl;
687 down(&trace_thread_sem);
688 if (thread_running) {
689 printk(KERN_INFO "Shutting down debug daemon thread...\n");
690 atomic_set(&tctl->tctl_shutdown, 1);
691 wait_for_completion(&tctl->tctl_stop);
694 up(&trace_thread_sem);
697 int trace_write_daemon_file(struct file *file, const char *buffer,
698 unsigned long count, void *data)
704 name = kmalloc(count + 1, GFP_KERNEL);
708 if (copy_from_user(name, buffer, count)) {
713 /* be nice and strip out trailing '\n' */
714 for (off = count ; off > 2 && isspace(name[off - 1]); off--)
719 down_write(&tracefile_sem);
720 if (strcmp(name, "stop") == 0) {
724 } else if (strncmp(name, "size=", 5) == 0) {
725 tracefile_size = simple_strtoul(name + 5, NULL, 0);
726 if (tracefile_size < 10 || tracefile_size > 20480)
727 tracefile_size = TRACEFILE_SIZE;
729 tracefile_size <<= 20;
733 if (name[0] != '/') {
738 if (tracefile != NULL)
744 printk(KERN_INFO "Lustre: debug daemon will attempt to start writing "
745 "to %s (%lukB max)\n", tracefile, (long)(tracefile_size >> 10));
747 trace_start_thread();
750 up_write(&tracefile_sem);
757 int trace_read_daemon_file(char *page, char **start, off_t off, int count,
758 int *eof, void *data)
762 down_read(&tracefile_sem);
763 rc = snprintf(page, count, "%s", tracefile);
764 up_read(&tracefile_sem);
769 int trace_write_debug_mb(struct file *file, const char *buffer,
770 unsigned long count, void *data)
776 if (count >= sizeof(string)) {
777 printk(KERN_ERR "Lustre: value too large (length %lu bytes)\n",
782 if (copy_from_user(string, buffer, count))
785 max = simple_strtoul(string, NULL, 0);
789 if (max > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5 || max >= 512) {
790 printk(KERN_ERR "Lustre: Refusing to set debug buffer size to "
791 "%dMB, which is more than 80%% of available RAM (%lu)\n",
792 max, (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5);
798 for (i = 0; i < NR_CPUS; i++) {
799 struct trace_cpu_data *tcd;
800 tcd = &trace_data[i].tcd;
801 tcd->tcd_max_pages = max << (20 - PAGE_SHIFT);
806 int trace_read_debug_mb(char *page, char **start, off_t off, int count,
807 int *eof, void *data)
809 struct trace_cpu_data *tcd;
813 tcd = trace_get_tcd(flags);
814 rc = snprintf(page, count, "%lu\n",
815 (tcd->tcd_max_pages >> (20 - PAGE_SHIFT)) * smp_num_cpus);
816 trace_put_tcd(tcd, flags);
821 int tracefile_init(void)
823 struct trace_cpu_data *tcd;
826 for (i = 0; i < NR_CPUS; i++) {
827 tcd = &trace_data[i].tcd;
828 INIT_LIST_HEAD(&tcd->tcd_pages);
829 INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
830 tcd->tcd_cur_pages = 0;
831 tcd->tcd_cur_daemon_pages = 0;
832 tcd->tcd_max_pages = TCD_MAX_PAGES;
833 tcd->tcd_shutting_down = 0;
838 static void trace_cleanup_on_cpu(void *info)
840 struct trace_cpu_data *tcd;
841 struct list_head *pos, *tmp;
844 tcd = trace_get_tcd(flags);
846 tcd->tcd_shutting_down = 1;
848 list_for_each_safe(pos, tmp, &tcd->tcd_pages) {
851 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
852 LASSERT(page->index <= PAGE_SIZE);
853 LASSERT(page_count(page) > 0);
855 list_del(&PAGE_LIST(page));
856 page->mapping = NULL;
859 tcd->tcd_cur_pages = 0;
861 trace_put_tcd(tcd, flags);
864 static void trace_cleanup(void)
866 struct page_collection pc;
868 INIT_LIST_HEAD(&pc.pc_pages);
869 spin_lock_init(&pc.pc_lock);
871 trace_cleanup_on_cpu(&pc);
872 smp_call_function(trace_cleanup_on_cpu, &pc, 0, 1);
875 void tracefile_exit(void)