1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2004 Cluster File Systems, Inc.
5 * Author: Zach Brown <zab@clusterfs.com>
6 * Author: Phil Schwan <phil@clusterfs.com>
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #include <linux/kernel.h>
25 #include <linux/module.h>
26 #include <linux/init.h>
27 #include <linux/rwsem.h>
28 #include <linux/proc_fs.h>
29 #include <linux/file.h>
30 #include <linux/smp.h>
31 #include <linux/ctype.h>
32 #include <asm/uaccess.h>
34 #include <linux/mm_inline.h>
37 #define DEBUG_SUBSYSTEM S_PORTALS
39 #include <linux/kp30.h>
40 #include <linux/portals_compat25.h>
41 #include <linux/libcfs.h>
43 #define TCD_MAX_PAGES 1280
45 /* XXX move things up to the top, comment */
48 struct trace_cpu_data {
49 struct list_head tcd_pages;
50 unsigned long tcd_cur_pages;
52 struct list_head tcd_daemon_pages;
53 unsigned long tcd_cur_daemon_pages;
55 unsigned long tcd_max_pages;
56 int tcd_shutting_down;
58 char __pad[SMP_CACHE_BYTES];
59 } trace_data[NR_CPUS] __cacheline_aligned;
61 struct page_collection {
62 struct list_head pc_pages;
64 int pc_want_daemon_pages;
67 struct tracefiled_ctl {
68 struct completion tctl_start;
69 struct completion tctl_stop;
70 wait_queue_head_t tctl_waitq;
72 atomic_t tctl_shutdown;
75 static DECLARE_RWSEM(tracefile_sem);
76 static char *tracefile = NULL;
77 static struct tracefiled_ctl trace_tctl;
78 static DECLARE_MUTEX(trace_thread_sem);
79 static int thread_running = 0;
82 #define get_cpu() smp_processor_id()
83 #define put_cpu() do { } while (0)
86 #define trace_get_tcd(FLAGS) ({ \
87 struct trace_cpu_data *__ret; \
88 int __cpu = get_cpu(); \
89 local_irq_save(FLAGS); \
90 __ret = &trace_data[__cpu].tcd; \
94 #define trace_put_tcd(TCD, FLAGS) do { \
95 local_irq_restore(FLAGS); \
99 static void put_pages_on_daemon_list_on_cpu(void *info);
101 /* return a page that has 'len' bytes left at the end */
102 static struct page *trace_get_page(struct trace_cpu_data *tcd,
105 struct page *page = NULL;
107 if (len > PAGE_SIZE) {
108 printk(KERN_ERR "cowardly refusing to write %lu bytes in a "
113 if (!list_empty(&tcd->tcd_pages)) {
114 page = list_entry(tcd->tcd_pages.prev, struct page,
116 if (page->index + len <= PAGE_SIZE)
120 if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
121 page = alloc_page(GFP_ATOMIC);
123 /* the kernel should print a message for us. fall back
124 * to using the last page in the ring buffer. */
129 page->mapping = (void *)(long)smp_processor_id();
130 list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages);
131 tcd->tcd_cur_pages++;
133 if (tcd->tcd_cur_pages > 8 && thread_running) {
134 struct tracefiled_ctl *tctl = &trace_tctl;
135 wake_up(&tctl->tctl_waitq);
141 if (thread_running) {
142 int pgcount = tcd->tcd_cur_pages / 10;
143 struct page_collection pc;
144 struct list_head *pos, *tmp;
145 printk(KERN_WARNING "debug daemon buffer overflowed; discarding"
146 " 10%% of pages (%d)\n", pgcount + 1);
148 INIT_LIST_HEAD(&pc.pc_pages);
149 spin_lock_init(&pc.pc_lock);
151 list_for_each_safe(pos, tmp, &tcd->tcd_pages) {
157 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
158 list_del(&PAGE_LIST(page));
159 list_add_tail(&PAGE_LIST(page), &pc.pc_pages);
160 tcd->tcd_cur_pages--;
162 put_pages_on_daemon_list_on_cpu(&pc);
164 LASSERT(!list_empty(&tcd->tcd_pages));
166 page = list_entry(tcd->tcd_pages.next, struct page, PAGE_LIST_ENTRY);
169 list_del(&PAGE_LIST(page));
170 list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages);
174 static void print_to_console(struct ptldebug_header *hdr, int mask, char *buf,
175 int len, char *file, const char *fn)
177 char *prefix = NULL, *ptype = NULL;
179 if ((mask & D_EMERG) != 0) {
180 prefix = "LustreError";
182 } else if ((mask & D_ERROR) != 0) {
183 prefix = "LustreError";
185 } else if ((mask & D_WARNING) != 0) {
187 ptype = KERN_WARNING;
188 } else if (portal_printk) {
193 printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid,
194 hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf);
197 void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
198 const int line, unsigned long stack, char *format, ...)
200 struct trace_cpu_data *tcd;
201 struct ptldebug_header header;
204 int known_size, needed, max_nob;
209 if (*(format + strlen(format) - 1) != '\n')
210 printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
213 tcd = trace_get_tcd(flags);
214 if (tcd->tcd_shutting_down)
217 do_gettimeofday(&tv);
219 header.ph_subsys = subsys;
220 header.ph_mask = mask;
221 header.ph_cpu_id = smp_processor_id();
222 header.ph_sec = (__u32)tv.tv_sec;
223 header.ph_usec = tv.tv_usec;
224 header.ph_stack = stack;
225 header.ph_pid = current->pid;
226 header.ph_line_num = line;
228 #if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
229 header.ph_extern_pid = current->thread.extern_pid;
230 #elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
231 header.ph_extern_pid = current->thread.mode.tt.extern_pid;
233 header.ph_extern_pid = 0;
236 known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls
238 page = trace_get_page(tcd, known_size + 40); /* slop */
243 debug_buf = page_address(page) + page->index + known_size;
245 va_start(ap, format);
246 max_nob = PAGE_SIZE - page->index - known_size;
247 LASSERT(max_nob > 0);
248 needed = vsnprintf(debug_buf, max_nob, format, ap);
251 if (needed > max_nob) {
252 /* overflow. oh poop. */
253 page = trace_get_page(tcd, needed + known_size);
257 header.ph_len = known_size + needed;
258 debug_buf = page_address(page) + page->index;
260 memcpy(debug_buf, &header, sizeof(header));
261 page->index += sizeof(header);
262 debug_buf += sizeof(header);
264 strcpy(debug_buf, file);
265 page->index += strlen(file) + 1;
266 debug_buf += strlen(file) + 1;
268 strcpy(debug_buf, fn);
269 page->index += strlen(fn) + 1;
270 debug_buf += strlen(fn) + 1;
272 page->index += needed;
273 if (page->index > PAGE_SIZE)
274 printk(KERN_EMERG "page->index == %lu in portals_debug_msg\n",
277 if ((mask & (D_EMERG | D_ERROR | D_WARNING)) || portal_printk)
278 print_to_console(&header, mask, debug_buf, needed, file, fn);
281 trace_put_tcd(tcd, flags);
283 EXPORT_SYMBOL(portals_debug_msg);
285 static void collect_pages_on_cpu(void *info)
287 struct trace_cpu_data *tcd;
289 struct page_collection *pc = info;
291 tcd = trace_get_tcd(flags);
293 spin_lock(&pc->pc_lock);
294 list_splice(&tcd->tcd_pages, &pc->pc_pages);
295 INIT_LIST_HEAD(&tcd->tcd_pages);
296 tcd->tcd_cur_pages = 0;
297 if (pc->pc_want_daemon_pages) {
298 list_splice(&tcd->tcd_daemon_pages, &pc->pc_pages);
299 INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
300 tcd->tcd_cur_daemon_pages = 0;
302 spin_unlock(&pc->pc_lock);
304 trace_put_tcd(tcd, flags);
307 static void collect_pages(struct page_collection *pc)
309 /* needs to be fixed up for preempt */
310 INIT_LIST_HEAD(&pc->pc_pages);
311 collect_pages_on_cpu(pc);
312 smp_call_function(collect_pages_on_cpu, pc, 0, 1);
315 static void put_pages_back_on_cpu(void *info)
317 struct page_collection *pc = info;
318 struct trace_cpu_data *tcd;
319 struct list_head *pos, *tmp, *cur_head;
322 tcd = trace_get_tcd(flags);
324 cur_head = tcd->tcd_pages.next;
326 spin_lock(&pc->pc_lock);
327 list_for_each_safe(pos, tmp, &pc->pc_pages) {
330 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
331 LASSERT(page->index <= PAGE_SIZE);
332 LASSERT(page_count(page) > 0);
334 if ((unsigned long)page->mapping != smp_processor_id())
337 list_del(&PAGE_LIST(page));
338 list_add_tail(&PAGE_LIST(page), cur_head);
339 tcd->tcd_cur_pages++;
341 spin_unlock(&pc->pc_lock);
343 trace_put_tcd(tcd, flags);
346 static void put_pages_back(struct page_collection *pc)
348 /* needs to be fixed up for preempt */
349 put_pages_back_on_cpu(pc);
350 smp_call_function(put_pages_back_on_cpu, pc, 0, 1);
353 /* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that
354 * we have a good amount of data at all times for dumping during an LBUG, even
355 * if we have been steadily writing (and otherwise discarding) pages via the
357 static void put_pages_on_daemon_list_on_cpu(void *info)
359 struct page_collection *pc = info;
360 struct trace_cpu_data *tcd;
361 struct list_head *pos, *tmp;
364 tcd = trace_get_tcd(flags);
366 spin_lock(&pc->pc_lock);
367 list_for_each_safe(pos, tmp, &pc->pc_pages) {
370 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
371 LASSERT(page->index <= PAGE_SIZE);
372 LASSERT(page_count(page) > 0);
373 if ((unsigned long)page->mapping != smp_processor_id())
376 list_del(&PAGE_LIST(page));
377 list_add_tail(&PAGE_LIST(page), &tcd->tcd_daemon_pages);
378 tcd->tcd_cur_daemon_pages++;
380 if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
381 LASSERT(!list_empty(&tcd->tcd_daemon_pages));
382 page = list_entry(tcd->tcd_daemon_pages.next,
383 struct page, PAGE_LIST_ENTRY);
385 LASSERT(page->index <= PAGE_SIZE);
386 LASSERT(page_count(page) > 0);
389 list_del(&PAGE_LIST(page));
390 page->mapping = NULL;
392 tcd->tcd_cur_daemon_pages--;
395 spin_unlock(&pc->pc_lock);
397 trace_put_tcd(tcd, flags);
400 static void put_pages_on_daemon_list(struct page_collection *pc)
402 put_pages_on_daemon_list_on_cpu(pc);
403 smp_call_function(put_pages_on_daemon_list_on_cpu, pc, 0, 1);
406 void trace_debug_print(void)
408 struct page_collection pc;
409 struct list_head *pos, *tmp;
411 spin_lock_init(&pc.pc_lock);
414 list_for_each_safe(pos, tmp, &pc.pc_pages) {
418 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
419 LASSERT(page->index <= PAGE_SIZE);
420 LASSERT(page_count(page) > 0);
422 p = page_address(page);
423 while (p < ((char *)page_address(page) + PAGE_SIZE)) {
424 struct ptldebug_header *hdr;
429 p += strlen(file) + 1;
432 len = hdr->ph_len - (p - (char *)hdr);
434 print_to_console(hdr, D_EMERG, p, len, file, fn);
437 list_del(&PAGE_LIST(page));
438 page->mapping = NULL;
443 int tracefile_dump_all_pages(char *filename)
445 struct page_collection pc;
447 struct list_head *pos, *tmp;
451 down_write(&tracefile_sem);
453 filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY, 0600);
456 printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
461 spin_lock_init(&pc.pc_lock);
462 pc.pc_want_daemon_pages = 1;
464 if (list_empty(&pc.pc_pages)) {
469 /* ok, for now, just write the pages. in the future we'll be building
470 * iobufs with the pages and calling generic_direct_IO */
473 list_for_each_safe(pos, tmp, &pc.pc_pages) {
476 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
477 LASSERT(page->index <= PAGE_SIZE);
478 LASSERT(page_count(page) > 0);
480 rc = filp->f_op->write(filp, page_address(page), page->index,
482 if (rc != page->index) {
483 printk(KERN_WARNING "wanted to write %lu but wrote "
484 "%d\n", page->index, rc);
488 list_del(&PAGE_LIST(page));
489 page->mapping = NULL;
493 rc = filp->f_op->fsync(filp, filp->f_dentry, 1);
495 printk(KERN_ERR "sync returns %d\n", rc);
499 up_write(&tracefile_sem);
503 void trace_flush_pages(void)
505 struct page_collection pc;
506 struct list_head *pos, *tmp;
508 spin_lock_init(&pc.pc_lock);
511 list_for_each_safe(pos, tmp, &pc.pc_pages) {
514 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
515 LASSERT(page->index <= PAGE_SIZE);
516 LASSERT(page_count(page) > 0);
518 list_del(&PAGE_LIST(page));
519 page->mapping = NULL;
524 int trace_dk(struct file *file, const char *buffer, unsigned long count,
531 name = kmalloc(count + 1, GFP_KERNEL);
535 if (copy_from_user(name, buffer, count)) {
540 if (name[0] != '/') {
545 /* be nice and strip out trailing '\n' */
546 for (off = count ; off > 2 && isspace(name[off - 1]); off--)
550 rc = tracefile_dump_all_pages(name);
556 EXPORT_SYMBOL(trace_dk);
558 static int tracefiled(void *arg)
560 struct page_collection pc;
561 struct tracefiled_ctl *tctl = arg;
562 struct list_head *pos, *tmp;
563 struct ptldebug_header *hdr;
569 /* we're started late enough that we pick up init's fs context */
570 /* this is so broken in uml? what on earth is going on? */
571 kportal_daemonize("ktracefiled");
574 spin_lock_init(&pc.pc_lock);
575 complete(&tctl->tctl_start);
580 init_waitqueue_entry(&__wait, current);
581 add_wait_queue(&tctl->tctl_waitq, &__wait);
582 set_current_state(TASK_INTERRUPTIBLE);
583 schedule_timeout(HZ);
584 remove_wait_queue(&tctl->tctl_waitq, &__wait);
586 if (atomic_read(&tctl->tctl_shutdown))
589 pc.pc_want_daemon_pages = 0;
591 if (list_empty(&pc.pc_pages))
595 down_read(&tracefile_sem);
596 if (tracefile != NULL) {
597 filp = filp_open(tracefile, O_CREAT|O_RDWR|O_APPEND|O_LARGEFILE,
600 printk("couldn't open %s: %ld\n", tracefile,
605 up_read(&tracefile_sem);
607 put_pages_on_daemon_list(&pc);
614 /* mark the first header, so we can sort in chunks */
615 page = list_entry(pc.pc_pages.next, struct page,
617 LASSERT(page->index <= PAGE_SIZE);
618 LASSERT(page_count(page) > 0);
620 hdr = page_address(page);
621 hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
623 list_for_each_safe(pos, tmp, &pc.pc_pages) {
624 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
625 LASSERT(page->index <= PAGE_SIZE);
626 LASSERT(page_count(page) > 0);
628 rc = filp->f_op->write(filp, page_address(page),
629 page->index, &filp->f_pos);
630 if (rc != page->index) {
631 printk(KERN_WARNING "wanted to write %lu but "
632 "wrote %d\n", page->index, rc);
639 put_pages_on_daemon_list(&pc);
641 complete(&tctl->tctl_stop);
645 int trace_start_thread(void)
647 struct tracefiled_ctl *tctl = &trace_tctl;
650 down(&trace_thread_sem);
654 init_completion(&tctl->tctl_start);
655 init_completion(&tctl->tctl_stop);
656 init_waitqueue_head(&tctl->tctl_waitq);
657 atomic_set(&tctl->tctl_shutdown, 0);
659 if (kernel_thread(tracefiled, tctl, 0) < 0) {
664 wait_for_completion(&tctl->tctl_start);
667 up(&trace_thread_sem);
671 void trace_stop_thread(void)
673 struct tracefiled_ctl *tctl = &trace_tctl;
675 down(&trace_thread_sem);
676 if (thread_running) {
677 printk(KERN_INFO "Shutting down debug daemon thread...\n");
678 atomic_set(&tctl->tctl_shutdown, 1);
679 wait_for_completion(&tctl->tctl_stop);
682 up(&trace_thread_sem);
685 int trace_write_daemon_file(struct file *file, const char *buffer,
686 unsigned long count, void *data)
692 name = kmalloc(count + 1, GFP_KERNEL);
696 if (copy_from_user(name, buffer, count)) {
701 /* be nice and strip out trailing '\n' */
702 for (off = count ; off > 2 && isspace(name[off - 1]); off--)
707 down_write(&tracefile_sem);
708 if (strcmp(name, "stop") == 0) {
714 if (name[0] != '/') {
719 if (tracefile != NULL)
724 trace_start_thread();
727 up_write(&tracefile_sem);
735 int trace_read_daemon_file(char *page, char **start, off_t off, int count,
736 int *eof, void *data)
740 down_read(&tracefile_sem);
741 rc = snprintf(page, count, "%s", tracefile);
742 up_read(&tracefile_sem);
747 int trace_write_debug_size(struct file *file, const char *buffer,
748 unsigned long count, void *data)
753 string = kmalloc(count + 1, GFP_KERNEL);
757 if (copy_from_user(string, buffer, count)) {
762 max = simple_strtoul(string, NULL, 0);
769 if (max > num_physpages / 5 * 4) {
770 printk(KERN_ERR "Lustre: Refusing to set debug buffer size to "
771 "%d pages, which is more than 80%% of physical pages "
772 "(%lu).\n", max * smp_num_cpus, num_physpages / 5 * 4);
776 for (i = 0; i < NR_CPUS; i++) {
777 struct trace_cpu_data *tcd;
778 tcd = &trace_data[i].tcd;
779 tcd->tcd_max_pages = max;
786 int trace_read_debug_size(char *page, char **start, off_t off, int count,
787 int *eof, void *data)
789 struct trace_cpu_data *tcd;
793 tcd = trace_get_tcd(flags);
794 rc = snprintf(page, count, "%lu", tcd->tcd_max_pages);
795 trace_put_tcd(tcd, flags);
800 int tracefile_init(void)
802 struct trace_cpu_data *tcd;
805 for (i = 0; i < NR_CPUS; i++) {
806 tcd = &trace_data[i].tcd;
807 INIT_LIST_HEAD(&tcd->tcd_pages);
808 INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
809 tcd->tcd_cur_pages = 0;
810 tcd->tcd_cur_daemon_pages = 0;
811 tcd->tcd_max_pages = TCD_MAX_PAGES;
812 tcd->tcd_shutting_down = 0;
817 static void trace_cleanup_on_cpu(void *info)
819 struct trace_cpu_data *tcd;
820 struct list_head *pos, *tmp;
823 tcd = trace_get_tcd(flags);
825 tcd->tcd_shutting_down = 1;
827 list_for_each_safe(pos, tmp, &tcd->tcd_pages) {
830 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
831 LASSERT(page->index <= PAGE_SIZE);
832 LASSERT(page_count(page) > 0);
834 list_del(&PAGE_LIST(page));
835 page->mapping = NULL;
838 tcd->tcd_cur_pages = 0;
840 trace_put_tcd(tcd, flags);
843 static void trace_cleanup(void)
845 struct page_collection pc;
847 INIT_LIST_HEAD(&pc.pc_pages);
848 spin_lock_init(&pc.pc_lock);
850 trace_cleanup_on_cpu(&pc);
851 smp_call_function(trace_cleanup_on_cpu, &pc, 0, 1);
854 void tracefile_exit(void)