1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2004 Cluster File Systems, Inc.
5 * Author: Zach Brown <zab@clusterfs.com>
6 * Author: Phil Schwan <phil@clusterfs.com>
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #define DEBUG_SUBSYSTEM S_PORTALS
26 #define LUSTRE_TRACEFILE_PRIVATE
27 #include "tracefile.h"
29 #include <libcfs/kp30.h>
30 #include <libcfs/libcfs.h>
32 /* XXX move things up to the top, comment */
33 union trace_data_union trace_data[NR_CPUS] __cacheline_aligned;
35 struct rw_semaphore tracefile_sem;
36 char *tracefile = NULL;
37 long long tracefile_size = TRACEFILE_SIZE;
38 static struct tracefiled_ctl trace_tctl;
39 struct semaphore trace_thread_sem;
40 static int thread_running = 0;
42 static void put_pages_on_daemon_list_on_cpu(void *info);
44 static inline struct trace_page *tage_from_list(struct list_head *list)
46 return list_entry(list, struct trace_page, linkage);
49 static struct trace_page *tage_alloc(int gfp)
52 struct trace_page *tage;
54 page = cfs_alloc_page(gfp);
58 tage = cfs_alloc(sizeof(*tage), gfp);
68 static void tage_free(struct trace_page *tage)
70 LASSERT(tage != NULL);
71 LASSERT(tage->page != NULL);
73 cfs_free_page(tage->page);
77 static void tage_to_tail(struct trace_page *tage, struct list_head *queue)
79 LASSERT(tage != NULL);
80 LASSERT(queue != NULL);
82 list_move_tail(&tage->linkage, queue);
85 static void LASSERT_TAGE_INVARIANT(struct trace_page *tage)
87 LASSERT(tage != NULL);
88 LASSERT(tage->page != NULL);
89 LASSERTF(tage->used <= CFS_PAGE_SIZE, "used = %u, PAGE_SIZE %lu\n",
90 tage->used, CFS_PAGE_SIZE);
91 LASSERTF(cfs_page_count(tage->page) > 0, "count = %d\n",
92 cfs_page_count(tage->page));
95 /* return a page that has 'len' bytes left at the end */
96 static struct trace_page *trace_get_tage(struct trace_cpu_data *tcd,
99 struct trace_page *tage;
101 if (len > CFS_PAGE_SIZE) {
102 printk(KERN_ERR "cowardly refusing to write %lu bytes in a "
107 if (!list_empty(&tcd->tcd_pages)) {
108 tage = tage_from_list(tcd->tcd_pages.prev);
109 if (tage->used + len <= CFS_PAGE_SIZE)
113 if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
114 tage = tage_alloc(CFS_ALLOC_ATOMIC);
116 /* the kernel should print a message for us. fall back
117 * to using the last page in the ring buffer. */
122 tage->cpu = smp_processor_id();
123 list_add_tail(&tage->linkage, &tcd->tcd_pages);
124 tcd->tcd_cur_pages++;
126 if (tcd->tcd_cur_pages > 8 && thread_running) {
127 struct tracefiled_ctl *tctl = &trace_tctl;
128 cfs_waitq_signal(&tctl->tctl_waitq);
134 if (thread_running) {
135 int pgcount = tcd->tcd_cur_pages / 10;
136 struct page_collection pc;
137 struct trace_page *tage;
138 struct trace_page *tmp;
140 printk(KERN_WARNING "debug daemon buffer overflowed; discarding"
141 " 10%% of pages (%d)\n", pgcount + 1);
143 CFS_INIT_LIST_HEAD(&pc.pc_pages);
144 spin_lock_init(&pc.pc_lock);
146 list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
150 list_move_tail(&tage->linkage, &pc.pc_pages);
151 tcd->tcd_cur_pages--;
153 put_pages_on_daemon_list_on_cpu(&pc);
155 LASSERT(!list_empty(&tcd->tcd_pages));
158 if (list_empty(&tcd->tcd_pages))
161 tage = tage_from_list(tcd->tcd_pages.next);
163 tage_to_tail(tage, &tcd->tcd_pages);
168 void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
169 const int line, unsigned long stack, char *format, ...)
171 struct trace_cpu_data *tcd;
172 struct ptldebug_header header;
173 struct trace_page *tage;
174 char *debug_buf = format;
175 int known_size, needed = 85 /* average message length */, max_nob;
180 if (mask == D_PORTALS && !(portal_debug & D_PORTALS))
183 if (strchr(file, '/'))
184 file = strrchr(file, '/') + 1;
186 if (*(format + strlen(format) - 1) != '\n')
187 printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
190 tcd = trace_get_tcd(flags);
191 if (tcd->tcd_shutting_down)
194 set_ptldebug_header(&header, subsys, mask, line, stack);
195 known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls
198 tage = trace_get_tage(tcd, needed + known_size);
201 if (needed + known_size > CFS_PAGE_SIZE)
203 needed = strlen(format);
207 debug_buf = cfs_page_address(tage->page) + tage->used + known_size;
209 max_nob = CFS_PAGE_SIZE - tage->used - known_size;
210 LASSERT(max_nob > 0);
211 va_start(ap, format);
212 needed = vsnprintf(debug_buf, max_nob, format, ap);
215 if (needed > max_nob) /* overflow. oh poop. */
218 header.ph_len = known_size + needed;
219 debug_buf = cfs_page_address(tage->page) + tage->used;
221 memcpy(debug_buf, &header, sizeof(header));
222 tage->used += sizeof(header);
223 debug_buf += sizeof(header);
225 strcpy(debug_buf, file);
226 tage->used += strlen(file) + 1;
227 debug_buf += strlen(file) + 1;
229 strcpy(debug_buf, fn);
230 tage->used += strlen(fn) + 1;
231 debug_buf += strlen(fn) + 1;
233 tage->used += needed;
234 if (tage->used > CFS_PAGE_SIZE)
236 "tage->used == %u in portals_debug_msg\n", tage->used);
239 if ((mask & (D_EMERG | D_ERROR | D_WARNING | D_CONSOLE)) || portal_printk)
240 print_to_console(&header, mask, debug_buf, needed, file, fn);
242 trace_put_tcd(tcd, flags);
244 EXPORT_SYMBOL(portals_debug_msg);
246 static void collect_pages_on_cpu(void *info)
248 struct trace_cpu_data *tcd;
250 struct page_collection *pc = info;
252 tcd = trace_get_tcd(flags);
254 spin_lock(&pc->pc_lock);
255 list_splice(&tcd->tcd_pages, &pc->pc_pages);
256 CFS_INIT_LIST_HEAD(&tcd->tcd_pages);
257 tcd->tcd_cur_pages = 0;
258 if (pc->pc_want_daemon_pages) {
259 list_splice(&tcd->tcd_daemon_pages, &pc->pc_pages);
260 CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
261 tcd->tcd_cur_daemon_pages = 0;
263 spin_unlock(&pc->pc_lock);
265 trace_put_tcd(tcd, flags);
268 static void collect_pages(struct page_collection *pc)
270 /* needs to be fixed up for preempt */
271 CFS_INIT_LIST_HEAD(&pc->pc_pages);
272 collect_pages_on_cpu(pc);
273 smp_call_function(collect_pages_on_cpu, pc, 0, 1);
276 static void put_pages_back_on_cpu(void *info)
278 struct page_collection *pc = info;
279 struct trace_cpu_data *tcd;
280 struct list_head *cur_head;
282 struct trace_page *tage;
283 struct trace_page *tmp;
285 tcd = trace_get_tcd(flags);
287 cur_head = tcd->tcd_pages.next;
289 spin_lock(&pc->pc_lock);
290 list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
292 LASSERT_TAGE_INVARIANT(tage);
294 if (tage->cpu != smp_processor_id())
297 tage_to_tail(tage, cur_head);
298 tcd->tcd_cur_pages++;
300 spin_unlock(&pc->pc_lock);
302 trace_put_tcd(tcd, flags);
305 static void put_pages_back(struct page_collection *pc)
307 /* needs to be fixed up for preempt */
308 put_pages_back_on_cpu(pc);
309 smp_call_function(put_pages_back_on_cpu, pc, 0, 1);
312 /* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that
313 * we have a good amount of data at all times for dumping during an LBUG, even
314 * if we have been steadily writing (and otherwise discarding) pages via the
316 static void put_pages_on_daemon_list_on_cpu(void *info)
318 struct page_collection *pc = info;
319 struct trace_cpu_data *tcd;
320 struct trace_page *tage;
321 struct trace_page *tmp;
324 tcd = trace_get_tcd(flags);
326 spin_lock(&pc->pc_lock);
327 list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
329 LASSERT_TAGE_INVARIANT(tage);
331 if (tage->cpu != smp_processor_id())
334 tage_to_tail(tage, &tcd->tcd_daemon_pages);
335 tcd->tcd_cur_daemon_pages++;
337 if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
338 struct trace_page *victim;
340 LASSERT(!list_empty(&tcd->tcd_daemon_pages));
341 victim = tage_from_list(tcd->tcd_daemon_pages.next);
343 LASSERT_TAGE_INVARIANT(victim);
345 list_del(&victim->linkage);
347 tcd->tcd_cur_daemon_pages--;
350 spin_unlock(&pc->pc_lock);
352 trace_put_tcd(tcd, flags);
355 static void put_pages_on_daemon_list(struct page_collection *pc)
357 put_pages_on_daemon_list_on_cpu(pc);
358 smp_call_function(put_pages_on_daemon_list_on_cpu, pc, 0, 1);
361 void trace_debug_print(void)
363 struct page_collection pc;
364 struct trace_page *tage;
365 struct trace_page *tmp;
367 spin_lock_init(&pc.pc_lock);
369 pc.pc_want_daemon_pages = 1;
371 list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
375 LASSERT_TAGE_INVARIANT(tage);
378 p = cfs_page_address(page);
379 while (p < ((char *)cfs_page_address(page) + CFS_PAGE_SIZE)) {
380 struct ptldebug_header *hdr;
385 p += strlen(file) + 1;
388 len = hdr->ph_len - (p - (char *)hdr);
390 print_to_console(hdr, D_EMERG, p, len, file, fn);
393 list_del(&tage->linkage);
398 int tracefile_dump_all_pages(char *filename)
400 struct page_collection pc;
402 struct trace_page *tage;
403 struct trace_page *tmp;
407 down_write(&tracefile_sem);
409 filp = cfs_filp_open(filename,
410 O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0666, &rc);
412 printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
417 spin_lock_init(&pc.pc_lock);
418 pc.pc_want_daemon_pages = 1;
420 if (list_empty(&pc.pc_pages)) {
425 /* ok, for now, just write the pages. in the future we'll be building
426 * iobufs with the pages and calling generic_direct_IO */
428 list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
430 LASSERT_TAGE_INVARIANT(tage);
432 rc = cfs_filp_write(filp, cfs_page_address(tage->page),
433 tage->used, cfs_filp_poff(filp));
434 if (rc != tage->used) {
435 printk(KERN_WARNING "wanted to write %u but wrote "
436 "%d\n", tage->used, rc);
440 list_del(&tage->linkage);
444 rc = cfs_filp_fsync(filp);
446 printk(KERN_ERR "sync returns %d\n", rc);
448 cfs_filp_close(filp);
450 up_write(&tracefile_sem);
454 void trace_flush_pages(void)
456 struct page_collection pc;
457 struct trace_page *tage;
458 struct trace_page *tmp;
460 spin_lock_init(&pc.pc_lock);
462 pc.pc_want_daemon_pages = 1;
464 list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
466 LASSERT_TAGE_INVARIANT(tage);
468 list_del(&tage->linkage);
473 int trace_dk(struct file *file, const char *buffer, unsigned long count,
480 name = cfs_alloc(count + 1, CFS_ALLOC_STD);
484 if (copy_from_user(name, buffer, count)) {
489 if (name[0] != '/') {
494 /* be nice and strip out trailing '\n' */
495 for (off = count ; off > 2 && isspace(name[off - 1]); off--)
499 rc = tracefile_dump_all_pages(name);
505 EXPORT_SYMBOL(trace_dk);
507 static int tracefiled(void *arg)
509 struct page_collection pc;
510 struct tracefiled_ctl *tctl = arg;
511 struct trace_page *tage;
512 struct trace_page *tmp;
513 struct ptldebug_header *hdr;
518 /* we're started late enough that we pick up init's fs context */
519 /* this is so broken in uml? what on earth is going on? */
520 kportal_daemonize("ktracefiled");
523 spin_lock_init(&pc.pc_lock);
524 complete(&tctl->tctl_start);
527 cfs_waitlink_t __wait;
529 cfs_waitlink_init(&__wait);
530 cfs_waitq_add(&tctl->tctl_waitq, &__wait);
531 set_current_state(TASK_INTERRUPTIBLE);
532 cfs_waitq_timedwait(&__wait, cfs_time_seconds(1));
533 cfs_waitq_del(&tctl->tctl_waitq, &__wait);
535 if (atomic_read(&tctl->tctl_shutdown))
538 pc.pc_want_daemon_pages = 0;
540 if (list_empty(&pc.pc_pages))
544 down_read(&tracefile_sem);
545 if (tracefile != NULL) {
546 filp = cfs_filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE,
549 printk("couldn't open %s: %d\n", tracefile, rc);
551 up_read(&tracefile_sem);
553 put_pages_on_daemon_list(&pc);
559 /* mark the first header, so we can sort in chunks */
560 tage = tage_from_list(pc.pc_pages.next);
561 LASSERT_TAGE_INVARIANT(tage);
563 hdr = cfs_page_address(tage->page);
564 hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
566 list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
569 LASSERT_TAGE_INVARIANT(tage);
571 if (f_pos >= tracefile_size)
573 else if (f_pos > cfs_filp_size(filp))
574 f_pos = cfs_filp_size(filp);
576 rc = cfs_filp_write(filp, cfs_page_address(tage->page),
578 if (rc != tage->used) {
579 printk(KERN_WARNING "wanted to write %u but "
580 "wrote %d\n", tage->used, rc);
586 cfs_filp_close(filp);
587 put_pages_on_daemon_list(&pc);
589 complete(&tctl->tctl_stop);
593 int trace_start_thread(void)
595 struct tracefiled_ctl *tctl = &trace_tctl;
598 mutex_down(&trace_thread_sem);
602 init_completion(&tctl->tctl_start);
603 init_completion(&tctl->tctl_stop);
604 cfs_waitq_init(&tctl->tctl_waitq);
605 atomic_set(&tctl->tctl_shutdown, 0);
607 if (cfs_kernel_thread(tracefiled, tctl, 0) < 0) {
612 wait_for_completion(&tctl->tctl_start);
615 mutex_up(&trace_thread_sem);
619 void trace_stop_thread(void)
621 struct tracefiled_ctl *tctl = &trace_tctl;
623 mutex_down(&trace_thread_sem);
624 if (thread_running) {
625 printk(KERN_INFO "Shutting down debug daemon thread...\n");
626 atomic_set(&tctl->tctl_shutdown, 1);
627 wait_for_completion(&tctl->tctl_stop);
630 mutex_up(&trace_thread_sem);
633 int tracefile_init(void)
635 struct trace_cpu_data *tcd;
638 for (i = 0; i < NR_CPUS; i++) {
639 tcd = &trace_data[i].tcd;
640 CFS_INIT_LIST_HEAD(&tcd->tcd_pages);
641 CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
642 tcd->tcd_cur_pages = 0;
643 tcd->tcd_cur_daemon_pages = 0;
644 tcd->tcd_max_pages = TCD_MAX_PAGES;
645 tcd->tcd_shutting_down = 0;
650 static void trace_cleanup_on_cpu(void *info)
652 struct trace_cpu_data *tcd;
653 struct trace_page *tage;
654 struct trace_page *tmp;
657 tcd = trace_get_tcd(flags);
659 tcd->tcd_shutting_down = 1;
661 list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
662 LASSERT_TAGE_INVARIANT(tage);
664 list_del(&tage->linkage);
667 tcd->tcd_cur_pages = 0;
669 trace_put_tcd(tcd, flags);
672 static void trace_cleanup(void)
674 struct page_collection pc;
676 CFS_INIT_LIST_HEAD(&pc.pc_pages);
677 spin_lock_init(&pc.pc_lock);
679 trace_cleanup_on_cpu(&pc);
680 smp_call_function(trace_cleanup_on_cpu, &pc, 0, 1);
683 void tracefile_exit(void)