1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2004 Cluster File Systems, Inc.
5 * Author: Zach Brown <zab@clusterfs.com>
6 * Author: Phil Schwan <phil@clusterfs.com>
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #define DEBUG_SUBSYSTEM S_PORTALS
26 #define LUSTRE_TRACEFILE_PRIVATE
27 #include "tracefile.h"
29 #include <libcfs/kp30.h>
30 #include <libcfs/libcfs.h>
32 /* XXX move things up to the top, comment */
33 union trace_data_union trace_data[NR_CPUS] __cacheline_aligned;
35 struct rw_semaphore tracefile_sem;
36 char *tracefile = NULL;
37 long long tracefile_size = TRACEFILE_SIZE;
38 static struct tracefiled_ctl trace_tctl;
39 struct semaphore trace_thread_sem;
40 static int thread_running = 0;
42 static void put_pages_on_daemon_list_on_cpu(void *info);
44 static inline struct trace_page *tage_from_list(struct list_head *list)
46 return list_entry(list, struct trace_page, linkage);
49 static struct trace_page *tage_alloc(int gfp)
52 struct trace_page *tage;
54 page = cfs_alloc_page(gfp);
56 tage = cfs_alloc(sizeof *tage, gfp);
65 static void tage_free(struct trace_page *tage)
67 LASSERT(tage != NULL);
69 if (tage->page != NULL)
70 cfs_free_page(tage->page);
74 static void tage_to_tail(struct trace_page *tage, struct list_head *queue)
76 LASSERT(tage != NULL);
77 LASSERT(queue != NULL);
79 list_move_tail(&tage->linkage, queue);
82 static int tage_invariant(struct trace_page *tage)
86 tage->used <= CFS_PAGE_SIZE &&
87 cfs_page_count(tage->page) > 0;
90 /* return a page that has 'len' bytes left at the end */
91 static struct trace_page *trace_get_tage(struct trace_cpu_data *tcd,
94 struct trace_page *tage;
96 if (len > CFS_PAGE_SIZE) {
97 printk(KERN_ERR "cowardly refusing to write %lu bytes in a "
102 if (!list_empty(&tcd->tcd_pages)) {
103 tage = tage_from_list(tcd->tcd_pages.prev);
104 if (tage->used + len <= CFS_PAGE_SIZE)
108 if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
109 tage = tage_alloc(CFS_ALLOC_ATOMIC);
111 /* the kernel should print a message for us. fall back
112 * to using the last page in the ring buffer. */
116 tage->cpu = smp_processor_id();
117 list_add_tail(&tage->linkage, &tcd->tcd_pages);
118 tcd->tcd_cur_pages++;
120 if (tcd->tcd_cur_pages > 8 && thread_running) {
121 struct tracefiled_ctl *tctl = &trace_tctl;
122 cfs_waitq_signal(&tctl->tctl_waitq);
128 if (thread_running) {
129 int pgcount = tcd->tcd_cur_pages / 10;
130 struct page_collection pc;
131 struct trace_page *tage;
132 struct trace_page *tmp;
134 printk(KERN_WARNING "debug daemon buffer overflowed; discarding"
135 " 10%% of pages (%d)\n", pgcount + 1);
137 CFS_INIT_LIST_HEAD(&pc.pc_pages);
138 spin_lock_init(&pc.pc_lock);
140 list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
144 list_move_tail(&tage->linkage, &pc.pc_pages);
145 tcd->tcd_cur_pages--;
147 put_pages_on_daemon_list_on_cpu(&pc);
149 LASSERT(!list_empty(&tcd->tcd_pages));
151 tage = tage_from_list(tcd->tcd_pages.next);
153 tage_to_tail(tage, &tcd->tcd_pages);
158 void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
159 const int line, unsigned long stack, char *format, ...)
161 struct trace_cpu_data *tcd;
162 struct ptldebug_header header;
163 struct trace_page *tage;
164 char *debug_buf = format;
165 int known_size, needed = 85 /* average message length */, max_nob;
170 if (mask == D_PORTALS && !(portal_debug & D_PORTALS))
173 if (strchr(file, '/'))
174 file = strrchr(file, '/') + 1;
176 if (*(format + strlen(format) - 1) != '\n')
177 printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
180 tcd = trace_get_tcd(flags);
181 if (tcd->tcd_shutting_down)
184 set_ptldebug_header(&header, subsys, mask, line, stack);
185 known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls
188 tage = trace_get_tage(tcd, needed + known_size);
191 if (needed + known_size > CFS_PAGE_SIZE)
193 needed = strlen(format);
197 debug_buf = cfs_page_address(tage->page) + tage->used + known_size;
199 max_nob = CFS_PAGE_SIZE - tage->used - known_size;
200 LASSERT(max_nob > 0);
201 va_start(ap, format);
202 needed = vsnprintf(debug_buf, max_nob, format, ap);
205 if (needed > max_nob) /* overflow. oh poop. */
208 header.ph_len = known_size + needed;
209 debug_buf = cfs_page_address(tage->page) + tage->used;
211 memcpy(debug_buf, &header, sizeof(header));
212 tage->used += sizeof(header);
213 debug_buf += sizeof(header);
215 strcpy(debug_buf, file);
216 tage->used += strlen(file) + 1;
217 debug_buf += strlen(file) + 1;
219 strcpy(debug_buf, fn);
220 tage->used += strlen(fn) + 1;
221 debug_buf += strlen(fn) + 1;
223 tage->used += needed;
224 if (tage->used > CFS_PAGE_SIZE)
226 "tage->used == %u in portals_debug_msg\n", tage->used);
229 if ((mask & (D_EMERG | D_ERROR | D_WARNING)) || portal_printk)
230 print_to_console(&header, mask, debug_buf, needed, file, fn);
232 trace_put_tcd(tcd, flags);
234 EXPORT_SYMBOL(portals_debug_msg);
236 static void collect_pages_on_cpu(void *info)
238 struct trace_cpu_data *tcd;
240 struct page_collection *pc = info;
242 tcd = trace_get_tcd(flags);
244 spin_lock(&pc->pc_lock);
245 list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
246 tcd->tcd_cur_pages = 0;
247 if (pc->pc_want_daemon_pages) {
248 list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages);
249 tcd->tcd_cur_daemon_pages = 0;
251 spin_unlock(&pc->pc_lock);
253 trace_put_tcd(tcd, flags);
256 static void collect_pages(struct page_collection *pc)
258 /* needs to be fixed up for preempt */
259 CFS_INIT_LIST_HEAD(&pc->pc_pages);
260 collect_pages_on_cpu(pc);
261 smp_call_function(collect_pages_on_cpu, pc, 0, 1);
264 static void put_pages_back_on_cpu(void *info)
266 struct page_collection *pc = info;
267 struct trace_cpu_data *tcd;
268 struct list_head *cur_head;
270 struct trace_page *tage;
271 struct trace_page *tmp;
273 tcd = trace_get_tcd(flags);
275 cur_head = tcd->tcd_pages.next;
277 spin_lock(&pc->pc_lock);
278 list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
280 LASSERT(tage_invariant(tage));
282 if (tage->cpu != smp_processor_id())
285 tage_to_tail(tage, cur_head);
286 tcd->tcd_cur_pages++;
288 spin_unlock(&pc->pc_lock);
290 trace_put_tcd(tcd, flags);
293 static void put_pages_back(struct page_collection *pc)
295 /* needs to be fixed up for preempt */
296 put_pages_back_on_cpu(pc);
297 smp_call_function(put_pages_back_on_cpu, pc, 0, 1);
300 /* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that
301 * we have a good amount of data at all times for dumping during an LBUG, even
302 * if we have been steadily writing (and otherwise discarding) pages via the
304 static void put_pages_on_daemon_list_on_cpu(void *info)
306 struct page_collection *pc = info;
307 struct trace_cpu_data *tcd;
308 struct trace_page *tage;
309 struct trace_page *tmp;
312 tcd = trace_get_tcd(flags);
314 spin_lock(&pc->pc_lock);
315 list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
317 LASSERT(tage_invariant(tage));
319 if (tage->cpu != smp_processor_id())
322 tage_to_tail(tage, &tcd->tcd_daemon_pages);
323 tcd->tcd_cur_daemon_pages++;
325 if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
326 struct trace_page *victim;
328 LASSERT(!list_empty(&tcd->tcd_daemon_pages));
329 victim = tage_from_list(tcd->tcd_daemon_pages.next);
331 LASSERT(tage_invariant(victim));
333 list_del(&victim->linkage);
335 tcd->tcd_cur_daemon_pages--;
338 spin_unlock(&pc->pc_lock);
340 trace_put_tcd(tcd, flags);
343 static void put_pages_on_daemon_list(struct page_collection *pc)
345 put_pages_on_daemon_list_on_cpu(pc);
346 smp_call_function(put_pages_on_daemon_list_on_cpu, pc, 0, 1);
349 void trace_debug_print(void)
351 struct page_collection pc;
352 struct trace_page *tage;
353 struct trace_page *tmp;
355 spin_lock_init(&pc.pc_lock);
358 list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
362 LASSERT(tage_invariant(tage));
365 p = cfs_page_address(page);
366 while (p < ((char *)cfs_page_address(page) + CFS_PAGE_SIZE)) {
367 struct ptldebug_header *hdr;
372 p += strlen(file) + 1;
375 len = hdr->ph_len - (p - (char *)hdr);
377 print_to_console(hdr, D_EMERG, p, len, file, fn);
380 list_del(&tage->linkage);
385 int tracefile_dump_all_pages(char *filename)
387 struct page_collection pc;
389 struct trace_page *tage;
390 struct trace_page *tmp;
394 down_write(&tracefile_sem);
396 filp = cfs_filp_open(filename,
397 O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600, &rc);
399 printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
404 spin_lock_init(&pc.pc_lock);
405 pc.pc_want_daemon_pages = 1;
407 if (list_empty(&pc.pc_pages)) {
412 /* ok, for now, just write the pages. in the future we'll be building
413 * iobufs with the pages and calling generic_direct_IO */
415 list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
417 LASSERT(tage_invariant(tage));
419 rc = cfs_filp_write(filp, cfs_page_address(tage->page),
420 tage->used, cfs_filp_poff(filp));
421 if (rc != tage->used) {
422 printk(KERN_WARNING "wanted to write %u but wrote "
423 "%d\n", tage->used, rc);
427 list_del(&tage->linkage);
431 rc = cfs_filp_fsync(filp);
433 printk(KERN_ERR "sync returns %d\n", rc);
435 cfs_filp_close(filp);
437 up_write(&tracefile_sem);
441 void trace_flush_pages(void)
443 struct page_collection pc;
444 struct trace_page *tage;
445 struct trace_page *tmp;
447 spin_lock_init(&pc.pc_lock);
450 list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
452 LASSERT(tage_invariant(tage));
454 list_del(&tage->linkage);
459 int trace_dk(struct file *file, const char *buffer, unsigned long count,
466 name = cfs_alloc(count + 1, CFS_ALLOC_STD);
470 if (copy_from_user(name, buffer, count)) {
475 if (name[0] != '/') {
480 /* be nice and strip out trailing '\n' */
481 for (off = count ; off > 2 && isspace(name[off - 1]); off--)
485 rc = tracefile_dump_all_pages(name);
491 EXPORT_SYMBOL(trace_dk);
493 static int tracefiled(void *arg)
495 struct page_collection pc;
496 struct tracefiled_ctl *tctl = arg;
497 struct trace_page *tage;
498 struct trace_page *tmp;
499 struct ptldebug_header *hdr;
504 /* we're started late enough that we pick up init's fs context */
505 /* this is so broken in uml? what on earth is going on? */
506 kportal_daemonize("ktracefiled");
509 spin_lock_init(&pc.pc_lock);
510 complete(&tctl->tctl_start);
513 cfs_waitlink_t __wait;
515 cfs_waitlink_init(&__wait);
516 cfs_waitq_add(&tctl->tctl_waitq, &__wait);
517 set_current_state(TASK_INTERRUPTIBLE);
518 cfs_waitq_timedwait(&__wait, cfs_time_seconds(1));
519 cfs_waitq_del(&tctl->tctl_waitq, &__wait);
521 if (atomic_read(&tctl->tctl_shutdown))
524 pc.pc_want_daemon_pages = 0;
526 if (list_empty(&pc.pc_pages))
530 down_read(&tracefile_sem);
531 if (tracefile != NULL) {
532 filp = cfs_filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE,
535 printk("couldn't open %s: %d\n", tracefile, rc);
537 up_read(&tracefile_sem);
539 put_pages_on_daemon_list(&pc);
545 /* mark the first header, so we can sort in chunks */
546 tage = tage_from_list(pc.pc_pages.next);
547 LASSERT(tage_invariant(tage));
549 hdr = cfs_page_address(tage->page);
550 hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
552 list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
555 LASSERT(tage_invariant(tage));
557 if (f_pos >= tracefile_size)
559 else if (f_pos > cfs_filp_size(filp))
560 f_pos = cfs_filp_size(filp);
562 rc = cfs_filp_write(filp, cfs_page_address(tage->page),
564 if (rc != tage->used) {
565 printk(KERN_WARNING "wanted to write %u but "
566 "wrote %d\n", tage->used, rc);
571 cfs_filp_close(filp);
573 put_pages_on_daemon_list(&pc);
575 complete(&tctl->tctl_stop);
579 int trace_start_thread(void)
581 struct tracefiled_ctl *tctl = &trace_tctl;
584 mutex_down(&trace_thread_sem);
588 init_completion(&tctl->tctl_start);
589 init_completion(&tctl->tctl_stop);
590 cfs_waitq_init(&tctl->tctl_waitq);
591 atomic_set(&tctl->tctl_shutdown, 0);
593 if (cfs_kernel_thread(tracefiled, tctl, 0) < 0) {
598 wait_for_completion(&tctl->tctl_start);
601 mutex_up(&trace_thread_sem);
605 void trace_stop_thread(void)
607 struct tracefiled_ctl *tctl = &trace_tctl;
609 mutex_down(&trace_thread_sem);
610 if (thread_running) {
611 printk(KERN_INFO "Shutting down debug daemon thread...\n");
612 atomic_set(&tctl->tctl_shutdown, 1);
613 wait_for_completion(&tctl->tctl_stop);
616 mutex_up(&trace_thread_sem);
619 int tracefile_init(void)
621 struct trace_cpu_data *tcd;
624 for (i = 0; i < NR_CPUS; i++) {
625 tcd = &trace_data[i].tcd;
626 CFS_INIT_LIST_HEAD(&tcd->tcd_pages);
627 CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
628 tcd->tcd_cur_pages = 0;
629 tcd->tcd_cur_daemon_pages = 0;
630 tcd->tcd_max_pages = TCD_MAX_PAGES;
631 tcd->tcd_shutting_down = 0;
636 static void trace_cleanup_on_cpu(void *info)
638 struct trace_cpu_data *tcd;
639 struct trace_page *tage;
640 struct trace_page *tmp;
643 tcd = trace_get_tcd(flags);
645 tcd->tcd_shutting_down = 1;
647 list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
648 LASSERT(tage_invariant(tage));
650 list_del(&tage->linkage);
653 tcd->tcd_cur_pages = 0;
655 trace_put_tcd(tcd, flags);
658 static void trace_cleanup(void)
660 struct page_collection pc;
662 CFS_INIT_LIST_HEAD(&pc.pc_pages);
663 spin_lock_init(&pc.pc_lock);
665 trace_cleanup_on_cpu(&pc);
666 smp_call_function(trace_cleanup_on_cpu, &pc, 0, 1);
669 void tracefile_exit(void)