1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2004 Cluster File Systems, Inc.
5 * Author: Zach Brown <zab@clusterfs.com>
6 * Author: Phil Schwan <phil@clusterfs.com>
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #include <linux/kernel.h>
25 #include <linux/module.h>
26 #include <linux/init.h>
27 #include <linux/rwsem.h>
28 #include <linux/proc_fs.h>
29 #include <linux/file.h>
30 #include <linux/smp.h>
31 #include <linux/ctype.h>
32 #include <asm/uaccess.h>
34 #include <linux/mm_inline.h>
37 #define DEBUG_SUBSYSTEM S_PORTALS
39 #include <linux/kp30.h>
40 #include <linux/portals_compat25.h>
41 #include <linux/lustre_compat25.h>
42 #include <linux/libcfs.h>
44 #define TCD_MAX_PAGES 1280
46 /* XXX move things up to the top, comment */
49 struct trace_cpu_data {
50 struct list_head tcd_pages;
51 unsigned long tcd_cur_pages;
53 struct list_head tcd_daemon_pages;
54 unsigned long tcd_cur_daemon_pages;
56 unsigned long tcd_max_pages;
57 int tcd_shutting_down;
59 char __pad[SMP_CACHE_BYTES];
60 } trace_data[NR_CPUS] __cacheline_aligned;
62 struct page_collection {
63 struct list_head pc_pages;
65 int pc_want_daemon_pages;
68 struct tracefiled_ctl {
69 struct completion tctl_start;
70 struct completion tctl_stop;
71 wait_queue_head_t tctl_waitq;
73 atomic_t tctl_shutdown;
76 static DECLARE_RWSEM(tracefile_sem);
77 static char *tracefile = NULL;
78 static struct tracefiled_ctl trace_tctl;
79 static DECLARE_MUTEX(trace_thread_sem);
80 static int thread_running = 0;
83 #define get_cpu() smp_processor_id()
84 #define put_cpu() do { } while (0)
87 #define trace_get_tcd(FLAGS) ({ \
88 struct trace_cpu_data *__ret; \
89 int __cpu = get_cpu(); \
90 local_irq_save(FLAGS); \
91 __ret = &trace_data[__cpu].tcd; \
95 #define trace_put_tcd(TCD, FLAGS) do { \
96 local_irq_restore(FLAGS); \
100 static void put_pages_on_daemon_list_on_cpu(void *info);
102 /* return a page that has 'len' bytes left at the end */
103 static struct page *trace_get_page(struct trace_cpu_data *tcd,
106 struct page *page = NULL;
108 if (len > PAGE_SIZE) {
109 printk(KERN_ERR "cowardly refusing to write %lu bytes in a "
114 if (!list_empty(&tcd->tcd_pages)) {
115 page = list_entry(tcd->tcd_pages.prev, struct page,
117 if (page->index + len <= PAGE_SIZE)
121 if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
122 page = alloc_page(GFP_ATOMIC);
124 /* the kernel should print a message for us. fall back
125 * to using the last page in the ring buffer. */
130 page->mapping = (void *)(long)smp_processor_id();
131 list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages);
132 tcd->tcd_cur_pages++;
134 if (tcd->tcd_cur_pages > 8 && thread_running) {
135 struct tracefiled_ctl *tctl = &trace_tctl;
136 wake_up(&tctl->tctl_waitq);
142 if (thread_running) {
143 int pgcount = tcd->tcd_cur_pages / 10;
144 struct page_collection pc;
145 struct list_head *pos, *tmp;
146 printk(KERN_WARNING "debug daemon buffer overflowed; discarding"
147 " 10%% of pages (%d)\n", pgcount + 1);
149 INIT_LIST_HEAD(&pc.pc_pages);
150 spin_lock_init(&pc.pc_lock);
152 list_for_each_safe(pos, tmp, &tcd->tcd_pages) {
158 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
159 list_del(&PAGE_LIST(page));
160 list_add_tail(&PAGE_LIST(page), &pc.pc_pages);
161 tcd->tcd_cur_pages--;
163 put_pages_on_daemon_list_on_cpu(&pc);
165 LASSERT(!list_empty(&tcd->tcd_pages));
167 page = list_entry(tcd->tcd_pages.next, struct page, PAGE_LIST_ENTRY);
170 list_del(&PAGE_LIST(page));
171 list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages);
175 static void print_to_console(struct ptldebug_header *hdr, int mask, char *buf,
176 int len, char *file, const char *fn)
178 char *prefix = NULL, *ptype = NULL;
180 if ((mask & D_EMERG) != 0) {
181 prefix = "LustreError";
183 } else if ((mask & D_ERROR) != 0) {
184 prefix = "LustreError";
186 } else if ((mask & D_WARNING) != 0) {
188 ptype = KERN_WARNING;
189 } else if (portal_printk) {
194 printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid,
195 hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf);
198 void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
199 const int line, unsigned long stack, char *format, ...)
201 struct trace_cpu_data *tcd;
202 struct ptldebug_header header;
205 int known_size, needed, max_nob;
210 if (*(format + strlen(format) - 1) != '\n')
211 printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
214 tcd = trace_get_tcd(flags);
215 if (tcd->tcd_shutting_down)
218 do_gettimeofday(&tv);
220 header.ph_subsys = subsys;
221 header.ph_mask = mask;
222 header.ph_cpu_id = smp_processor_id();
223 header.ph_sec = (__u32)tv.tv_sec;
224 header.ph_usec = tv.tv_usec;
225 header.ph_stack = stack;
226 header.ph_pid = current->pid;
227 header.ph_line_num = line;
229 #if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
230 header.ph_extern_pid = current->thread.extern_pid;
231 #elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
232 header.ph_extern_pid = current->thread.mode.tt.extern_pid;
234 header.ph_extern_pid = 0;
237 known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls
239 page = trace_get_page(tcd, known_size + 40); /* slop */
244 debug_buf = page_address(page) + page->index + known_size;
246 va_start(ap, format);
247 max_nob = PAGE_SIZE - page->index - known_size;
248 LASSERT(max_nob > 0);
249 needed = vsnprintf(debug_buf, max_nob, format, ap);
252 if (needed > max_nob) {
253 /* overflow. oh poop. */
254 page = trace_get_page(tcd, needed + known_size);
258 header.ph_len = known_size + needed;
259 debug_buf = page_address(page) + page->index;
261 memcpy(debug_buf, &header, sizeof(header));
262 page->index += sizeof(header);
263 debug_buf += sizeof(header);
265 strcpy(debug_buf, file);
266 page->index += strlen(file) + 1;
267 debug_buf += strlen(file) + 1;
269 strcpy(debug_buf, fn);
270 page->index += strlen(fn) + 1;
271 debug_buf += strlen(fn) + 1;
273 page->index += needed;
274 if (page->index > PAGE_SIZE)
275 printk(KERN_EMERG "page->index == %lu in portals_debug_msg\n",
278 if ((mask & (D_EMERG | D_ERROR | D_WARNING)) || portal_printk)
279 print_to_console(&header, mask, debug_buf, needed, file, fn);
282 trace_put_tcd(tcd, flags);
284 EXPORT_SYMBOL(portals_debug_msg);
286 static void collect_pages_on_cpu(void *info)
288 struct trace_cpu_data *tcd;
290 struct page_collection *pc = info;
292 tcd = trace_get_tcd(flags);
294 spin_lock(&pc->pc_lock);
295 list_splice(&tcd->tcd_pages, &pc->pc_pages);
296 INIT_LIST_HEAD(&tcd->tcd_pages);
297 tcd->tcd_cur_pages = 0;
298 if (pc->pc_want_daemon_pages) {
299 list_splice(&tcd->tcd_daemon_pages, &pc->pc_pages);
300 INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
301 tcd->tcd_cur_daemon_pages = 0;
303 spin_unlock(&pc->pc_lock);
305 trace_put_tcd(tcd, flags);
308 static void collect_pages(struct page_collection *pc)
310 /* needs to be fixed up for preempt */
311 INIT_LIST_HEAD(&pc->pc_pages);
312 collect_pages_on_cpu(pc);
313 smp_call_function(collect_pages_on_cpu, pc, 0, 1);
316 static void put_pages_back_on_cpu(void *info)
318 struct page_collection *pc = info;
319 struct trace_cpu_data *tcd;
320 struct list_head *pos, *tmp, *cur_head;
323 tcd = trace_get_tcd(flags);
325 cur_head = tcd->tcd_pages.next;
327 spin_lock(&pc->pc_lock);
328 list_for_each_safe(pos, tmp, &pc->pc_pages) {
331 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
332 LASSERT(page->index <= PAGE_SIZE);
333 LASSERT(page_count(page) > 0);
335 if ((unsigned long)page->mapping != smp_processor_id())
338 list_del(&PAGE_LIST(page));
339 list_add_tail(&PAGE_LIST(page), cur_head);
340 tcd->tcd_cur_pages++;
342 spin_unlock(&pc->pc_lock);
344 trace_put_tcd(tcd, flags);
347 static void put_pages_back(struct page_collection *pc)
349 /* needs to be fixed up for preempt */
350 put_pages_back_on_cpu(pc);
351 smp_call_function(put_pages_back_on_cpu, pc, 0, 1);
354 /* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that
355 * we have a good amount of data at all times for dumping during an LBUG, even
356 * if we have been steadily writing (and otherwise discarding) pages via the
358 static void put_pages_on_daemon_list_on_cpu(void *info)
360 struct page_collection *pc = info;
361 struct trace_cpu_data *tcd;
362 struct list_head *pos, *tmp;
365 tcd = trace_get_tcd(flags);
367 spin_lock(&pc->pc_lock);
368 list_for_each_safe(pos, tmp, &pc->pc_pages) {
371 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
372 LASSERT(page->index <= PAGE_SIZE);
373 LASSERT(page_count(page) > 0);
374 if ((unsigned long)page->mapping != smp_processor_id())
377 list_del(&PAGE_LIST(page));
378 list_add_tail(&PAGE_LIST(page), &tcd->tcd_daemon_pages);
379 tcd->tcd_cur_daemon_pages++;
381 if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
382 LASSERT(!list_empty(&tcd->tcd_daemon_pages));
383 page = list_entry(tcd->tcd_daemon_pages.next,
384 struct page, PAGE_LIST_ENTRY);
386 LASSERT(page->index <= PAGE_SIZE);
387 LASSERT(page_count(page) > 0);
390 list_del(&PAGE_LIST(page));
391 page->mapping = NULL;
393 tcd->tcd_cur_daemon_pages--;
396 spin_unlock(&pc->pc_lock);
398 trace_put_tcd(tcd, flags);
401 static void put_pages_on_daemon_list(struct page_collection *pc)
403 put_pages_on_daemon_list_on_cpu(pc);
404 smp_call_function(put_pages_on_daemon_list_on_cpu, pc, 0, 1);
407 void trace_debug_print(void)
409 struct page_collection pc;
410 struct list_head *pos, *tmp;
412 spin_lock_init(&pc.pc_lock);
415 list_for_each_safe(pos, tmp, &pc.pc_pages) {
419 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
420 LASSERT(page->index <= PAGE_SIZE);
421 LASSERT(page_count(page) > 0);
423 p = page_address(page);
424 while (p < ((char *)page_address(page) + PAGE_SIZE)) {
425 struct ptldebug_header *hdr;
430 p += strlen(file) + 1;
433 len = hdr->ph_len - (p - (char *)hdr);
435 print_to_console(hdr, D_EMERG, p, len, file, fn);
438 list_del(&PAGE_LIST(page));
439 page->mapping = NULL;
444 int tracefile_dump_all_pages(char *filename)
446 struct page_collection pc;
448 struct list_head *pos, *tmp;
452 down_write(&tracefile_sem);
454 filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY, 0600);
457 printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
462 spin_lock_init(&pc.pc_lock);
463 pc.pc_want_daemon_pages = 1;
465 if (list_empty(&pc.pc_pages)) {
470 /* ok, for now, just write the pages. in the future we'll be building
471 * iobufs with the pages and calling generic_direct_IO */
474 list_for_each_safe(pos, tmp, &pc.pc_pages) {
477 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
478 LASSERT(page->index <= PAGE_SIZE);
479 LASSERT(page_count(page) > 0);
481 rc = filp->f_op->write(filp, page_address(page), page->index,
483 if (rc != page->index) {
484 printk(KERN_WARNING "wanted to write %lu but wrote "
485 "%d\n", page->index, rc);
489 list_del(&PAGE_LIST(page));
490 page->mapping = NULL;
494 rc = filp->f_op->fsync(filp, filp->f_dentry, 1);
496 printk(KERN_ERR "sync returns %d\n", rc);
500 up_write(&tracefile_sem);
504 void trace_flush_pages(void)
506 struct page_collection pc;
507 struct list_head *pos, *tmp;
509 spin_lock_init(&pc.pc_lock);
512 list_for_each_safe(pos, tmp, &pc.pc_pages) {
515 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
516 LASSERT(page->index <= PAGE_SIZE);
517 LASSERT(page_count(page) > 0);
519 list_del(&PAGE_LIST(page));
520 page->mapping = NULL;
525 int trace_dk(struct file *file, const char *buffer, unsigned long count,
532 name = kmalloc(count + 1, GFP_KERNEL);
536 if (copy_from_user(name, buffer, count)) {
541 if (name[0] != '/') {
546 /* be nice and strip out trailing '\n' */
547 for (off = count ; off > 2 && isspace(name[off - 1]); off--)
551 rc = tracefile_dump_all_pages(name);
557 EXPORT_SYMBOL(trace_dk);
559 static int tracefiled(void *arg)
561 struct page_collection pc;
562 struct tracefiled_ctl *tctl = arg;
563 struct list_head *pos, *tmp;
564 struct ptldebug_header *hdr;
570 /* we're started late enough that we pick up init's fs context */
571 /* this is so broken in uml? what on earth is going on? */
572 kportal_daemonize("ktracefiled");
575 spin_lock_init(&pc.pc_lock);
576 complete(&tctl->tctl_start);
581 init_waitqueue_entry(&__wait, current);
582 add_wait_queue(&tctl->tctl_waitq, &__wait);
583 set_current_state(TASK_INTERRUPTIBLE);
584 schedule_timeout(HZ);
585 remove_wait_queue(&tctl->tctl_waitq, &__wait);
587 if (atomic_read(&tctl->tctl_shutdown))
590 pc.pc_want_daemon_pages = 0;
592 if (list_empty(&pc.pc_pages))
596 down_read(&tracefile_sem);
597 if (tracefile != NULL) {
598 filp = filp_open(tracefile, O_CREAT|O_RDWR|O_APPEND|O_LARGEFILE,
601 printk("couldn't open %s: %ld\n", tracefile,
606 up_read(&tracefile_sem);
608 put_pages_on_daemon_list(&pc);
615 /* mark the first header, so we can sort in chunks */
616 page = list_entry(pc.pc_pages.next, struct page,
618 LASSERT(page->index <= PAGE_SIZE);
619 LASSERT(page_count(page) > 0);
621 hdr = page_address(page);
622 hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
624 list_for_each_safe(pos, tmp, &pc.pc_pages) {
625 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
626 LASSERT(page->index <= PAGE_SIZE);
627 LASSERT(page_count(page) > 0);
629 rc = filp->f_op->write(filp, page_address(page),
630 page->index, &filp->f_pos);
631 if (rc != page->index) {
632 printk(KERN_WARNING "wanted to write %lu but "
633 "wrote %d\n", page->index, rc);
640 put_pages_on_daemon_list(&pc);
642 complete(&tctl->tctl_stop);
646 int trace_start_thread(void)
648 struct tracefiled_ctl *tctl = &trace_tctl;
651 down(&trace_thread_sem);
655 init_completion(&tctl->tctl_start);
656 init_completion(&tctl->tctl_stop);
657 init_waitqueue_head(&tctl->tctl_waitq);
658 atomic_set(&tctl->tctl_shutdown, 0);
660 if (kernel_thread(tracefiled, tctl, 0) < 0) {
665 wait_for_completion(&tctl->tctl_start);
668 up(&trace_thread_sem);
672 void trace_stop_thread(void)
674 struct tracefiled_ctl *tctl = &trace_tctl;
676 down(&trace_thread_sem);
677 if (thread_running) {
678 printk(KERN_INFO "Shutting down debug daemon thread...\n");
679 atomic_set(&tctl->tctl_shutdown, 1);
680 wait_for_completion(&tctl->tctl_stop);
683 up(&trace_thread_sem);
686 int trace_write_daemon_file(struct file *file, const char *buffer,
687 unsigned long count, void *data)
693 name = kmalloc(count + 1, GFP_KERNEL);
697 if (copy_from_user(name, buffer, count)) {
702 /* be nice and strip out trailing '\n' */
703 for (off = count ; off > 2 && isspace(name[off - 1]); off--)
708 down_write(&tracefile_sem);
709 if (strcmp(name, "stop") == 0) {
715 if (name[0] != '/') {
720 if (tracefile != NULL)
725 trace_start_thread();
728 up_write(&tracefile_sem);
736 int trace_read_daemon_file(char *page, char **start, off_t off, int count,
737 int *eof, void *data)
741 down_read(&tracefile_sem);
742 rc = snprintf(page, count, "%s", tracefile);
743 up_read(&tracefile_sem);
748 int trace_write_debug_size(struct file *file, const char *buffer,
749 unsigned long count, void *data)
754 string = kmalloc(count + 1, GFP_KERNEL);
758 if (copy_from_user(string, buffer, count)) {
763 max = simple_strtoul(string, NULL, 0);
770 if (max > num_physpages / 5 * 4) {
771 printk(KERN_ERR "Lustre: Refusing to set debug buffer size to "
772 "%d pages, which is more than 80%% of physical pages "
773 "(%lu).\n", max * smp_num_cpus, num_physpages / 5 * 4);
776 for (i = 0; i < NR_CPUS; i++) {
777 struct trace_cpu_data *tcd;
778 tcd = &trace_data[i].tcd;
779 tcd->tcd_max_pages = max;
786 int trace_read_debug_size(char *page, char **start, off_t off, int count,
787 int *eof, void *data)
789 struct trace_cpu_data *tcd;
793 tcd = trace_get_tcd(flags);
794 rc = snprintf(page, count, "%lu", tcd->tcd_max_pages);
795 trace_put_tcd(tcd, flags);
800 int tracefile_init(void)
802 struct trace_cpu_data *tcd;
805 for (i = 0; i < NR_CPUS; i++) {
806 tcd = &trace_data[i].tcd;
807 INIT_LIST_HEAD(&tcd->tcd_pages);
808 INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
809 tcd->tcd_cur_pages = 0;
810 tcd->tcd_cur_daemon_pages = 0;
811 tcd->tcd_max_pages = TCD_MAX_PAGES;
812 tcd->tcd_shutting_down = 0;
817 static void trace_cleanup_on_cpu(void *info)
819 struct trace_cpu_data *tcd;
820 struct list_head *pos, *tmp;
823 tcd = trace_get_tcd(flags);
825 tcd->tcd_shutting_down = 1;
827 list_for_each_safe(pos, tmp, &tcd->tcd_pages) {
830 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
831 LASSERT(page->index <= PAGE_SIZE);
832 LASSERT(page_count(page) > 0);
834 list_del(&PAGE_LIST(page));
835 page->mapping = NULL;
838 tcd->tcd_cur_pages = 0;
840 trace_put_tcd(tcd, flags);
843 static void trace_cleanup(void)
845 struct page_collection pc;
847 INIT_LIST_HEAD(&pc.pc_pages);
848 spin_lock_init(&pc.pc_lock);
850 trace_cleanup_on_cpu(&pc);
851 smp_call_function(trace_cleanup_on_cpu, &pc, 0, 1);
854 void tracefile_exit(void)