Whamcloud - gitweb
e38477384e018da523e13b32a5b4fc0fa7cbbdca
[fs/lustre-release.git] / lnet / libcfs / tracefile.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2004 Cluster File Systems, Inc.
5  *   Author: Zach Brown <zab@clusterfs.com>
6  *   Author: Phil Schwan <phil@clusterfs.com>
7  *
8  *   This file is part of Lustre, http://www.lustre.org.
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24
25 #define DEBUG_SUBSYSTEM S_PORTALS
26 #define LUSTRE_TRACEFILE_PRIVATE
27 #include "tracefile.h"
28
29 #include <libcfs/kp30.h>
30 #include <libcfs/libcfs.h>
31
32 /* XXX move things up to the top, comment */
33 union trace_data_union trace_data[NR_CPUS] __cacheline_aligned;
34
35 struct rw_semaphore tracefile_sem;
36 char *tracefile = NULL;
37 long long tracefile_size = TRACEFILE_SIZE;
38 static struct tracefiled_ctl trace_tctl;
39 struct semaphore trace_thread_sem;
40 static int thread_running = 0;
41
42 static void put_pages_on_daemon_list_on_cpu(void *info);
43
44 static inline struct trace_page *tage_from_list(struct list_head *list)
45 {
46         return list_entry(list, struct trace_page, linkage);
47 }
48
49 static struct trace_page *tage_alloc(int gfp)
50 {
51         cfs_page_t        *page;
52         struct trace_page *tage;
53
54         page = cfs_alloc_page(gfp);
55         if (page != NULL) {
56                 tage = cfs_alloc(sizeof *tage, gfp);
57                 if (tage == NULL)
58                         cfs_free_page(page);
59                 tage->page = page;
60         } else
61                 tage = NULL;
62         return tage;
63 }
64
65 static void tage_free(struct trace_page *tage)
66 {
67         LASSERT(tage != NULL);
68
69         if (tage->page != NULL)
70                 cfs_free_page(tage->page);
71         cfs_free(tage);
72 }
73
74 static void tage_to_tail(struct trace_page *tage, struct list_head *queue)
75 {
76         LASSERT(tage != NULL);
77         LASSERT(queue != NULL);
78
79         list_move_tail(&tage->linkage, queue);
80 }
81
82 static int tage_invariant(struct trace_page *tage)
83 {
84         return
85                 tage != NULL &&
86                 tage->used <= CFS_PAGE_SIZE &&
87                 cfs_page_count(tage->page) > 0;
88 }
89
90 /* return a page that has 'len' bytes left at the end */
91 static struct trace_page *trace_get_tage(struct trace_cpu_data *tcd,
92                                          unsigned long len)
93 {
94         struct trace_page *tage;
95
96         if (len > CFS_PAGE_SIZE) {
97                 printk(KERN_ERR "cowardly refusing to write %lu bytes in a "
98                        "page\n", len);
99                 return NULL;
100         }
101
102         if (!list_empty(&tcd->tcd_pages)) {
103                 tage = tage_from_list(tcd->tcd_pages.prev);
104                 if (tage->used + len <= CFS_PAGE_SIZE)
105                         return tage;
106         }
107
108         if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
109                 tage = tage_alloc(CFS_ALLOC_ATOMIC);
110                 if (tage == NULL) {
111                         /* the kernel should print a message for us.  fall back
112                          * to using the last page in the ring buffer. */
113                         goto ring_buffer;
114                 }
115                 tage->used = 0;
116                 tage->cpu = smp_processor_id();
117                 list_add_tail(&tage->linkage, &tcd->tcd_pages);
118                 tcd->tcd_cur_pages++;
119
120                 if (tcd->tcd_cur_pages > 8 && thread_running) {
121                         struct tracefiled_ctl *tctl = &trace_tctl;
122                         cfs_waitq_signal(&tctl->tctl_waitq);
123                 }
124                 return tage;
125         }
126
127  ring_buffer:
128         if (thread_running) {
129                 int pgcount = tcd->tcd_cur_pages / 10;
130                 struct page_collection pc;
131                 struct trace_page *tage;
132                 struct trace_page *tmp;
133
134                 printk(KERN_WARNING "debug daemon buffer overflowed; discarding"
135                        " 10%% of pages (%d)\n", pgcount + 1);
136
137                 CFS_INIT_LIST_HEAD(&pc.pc_pages);
138                 spin_lock_init(&pc.pc_lock);
139
140                 list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
141                         if (pgcount-- == 0)
142                                 break;
143
144                         list_move_tail(&tage->linkage, &pc.pc_pages);
145                         tcd->tcd_cur_pages--;
146                 }
147                 put_pages_on_daemon_list_on_cpu(&pc);
148         }
149         LASSERT(!list_empty(&tcd->tcd_pages));
150
151         tage = tage_from_list(tcd->tcd_pages.next);
152         tage->used = 0;
153         tage_to_tail(tage, &tcd->tcd_pages);
154
155         return tage;
156 }
157
158 void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
159                        const int line, unsigned long stack, char *format, ...)
160 {
161         struct trace_cpu_data *tcd;
162         struct ptldebug_header header;
163         struct trace_page *tage;
164         char *debug_buf = format;
165         int known_size, needed = 85 /* average message length */, max_nob;
166         va_list       ap;
167         unsigned long flags;
168
169 #ifdef CRAY_PORTALS
170         if (mask == D_PORTALS && !(portal_debug & D_PORTALS))
171                 return;
172 #endif
173         if (strchr(file, '/'))
174                 file = strrchr(file, '/') + 1;
175
176         if (*(format + strlen(format) - 1) != '\n')
177                 printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
178                        file, line, fn);
179
180         tcd = trace_get_tcd(flags);
181         if (tcd->tcd_shutting_down)
182                 goto out;
183
184         set_ptldebug_header(&header, subsys, mask, line, stack);
185         known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls
186
187  retry:
188         tage = trace_get_tage(tcd, needed + known_size);
189         if (tage == NULL) {
190                 debug_buf = format;
191                 if (needed + known_size > CFS_PAGE_SIZE)
192                         mask |= D_ERROR;
193                 needed = strlen(format);
194                 goto out;
195         }
196
197         debug_buf = cfs_page_address(tage->page) + tage->used + known_size;
198
199         max_nob = CFS_PAGE_SIZE - tage->used - known_size;
200         LASSERT(max_nob > 0);
201         va_start(ap, format);
202         needed = vsnprintf(debug_buf, max_nob, format, ap);
203         va_end(ap);
204
205         if (needed > max_nob) /* overflow.  oh poop. */
206                 goto retry;
207
208         header.ph_len = known_size + needed;
209         debug_buf = cfs_page_address(tage->page) + tage->used;
210
211         memcpy(debug_buf, &header, sizeof(header));
212         tage->used += sizeof(header);
213         debug_buf += sizeof(header);
214
215         strcpy(debug_buf, file);
216         tage->used += strlen(file) + 1;
217         debug_buf += strlen(file) + 1;
218
219         strcpy(debug_buf, fn);
220         tage->used += strlen(fn) + 1;
221         debug_buf += strlen(fn) + 1;
222
223         tage->used += needed;
224         if (tage->used > CFS_PAGE_SIZE)
225                 printk(KERN_EMERG
226                        "tage->used == %u in portals_debug_msg\n", tage->used);
227
228  out:
229         if ((mask & (D_EMERG | D_ERROR | D_WARNING | D_CONSOLE)) || portal_printk)
230                 print_to_console(&header, mask, debug_buf, needed, file, fn);
231
232         trace_put_tcd(tcd, flags);
233 }
234 EXPORT_SYMBOL(portals_debug_msg);
235
236 static void collect_pages_on_cpu(void *info)
237 {
238         struct trace_cpu_data *tcd;
239         unsigned long flags;
240         struct page_collection *pc = info;
241
242         tcd = trace_get_tcd(flags);
243
244         spin_lock(&pc->pc_lock);
245         list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
246         tcd->tcd_cur_pages = 0;
247         if (pc->pc_want_daemon_pages) {
248                 list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages);
249                 tcd->tcd_cur_daemon_pages = 0;
250         }
251         spin_unlock(&pc->pc_lock);
252
253         trace_put_tcd(tcd, flags);
254 }
255
256 static void collect_pages(struct page_collection *pc)
257 {
258         /* needs to be fixed up for preempt */
259         CFS_INIT_LIST_HEAD(&pc->pc_pages);
260         collect_pages_on_cpu(pc);
261         smp_call_function(collect_pages_on_cpu, pc, 0, 1);
262 }
263
264 static void put_pages_back_on_cpu(void *info)
265 {
266         struct page_collection *pc = info;
267         struct trace_cpu_data *tcd;
268         struct list_head *cur_head;
269         unsigned long flags;
270         struct trace_page *tage;
271         struct trace_page *tmp;
272
273         tcd = trace_get_tcd(flags);
274
275         cur_head = tcd->tcd_pages.next;
276
277         spin_lock(&pc->pc_lock);
278         list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
279
280                 LASSERT(tage_invariant(tage));
281
282                 if (tage->cpu != smp_processor_id())
283                         continue;
284
285                 tage_to_tail(tage, cur_head);
286                 tcd->tcd_cur_pages++;
287         }
288         spin_unlock(&pc->pc_lock);
289
290         trace_put_tcd(tcd, flags);
291 }
292
293 static void put_pages_back(struct page_collection *pc)
294 {
295         /* needs to be fixed up for preempt */
296         put_pages_back_on_cpu(pc);
297         smp_call_function(put_pages_back_on_cpu, pc, 0, 1);
298 }
299
300 /* Add pages to a per-cpu debug daemon ringbuffer.  This buffer makes sure that
301  * we have a good amount of data at all times for dumping during an LBUG, even
302  * if we have been steadily writing (and otherwise discarding) pages via the
303  * debug daemon. */
304 static void put_pages_on_daemon_list_on_cpu(void *info)
305 {
306         struct page_collection *pc = info;
307         struct trace_cpu_data *tcd;
308         struct trace_page *tage;
309         struct trace_page *tmp;
310         unsigned long flags;
311
312         tcd = trace_get_tcd(flags);
313
314         spin_lock(&pc->pc_lock);
315         list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
316
317                 LASSERT(tage_invariant(tage));
318
319                 if (tage->cpu != smp_processor_id())
320                         continue;
321
322                 tage_to_tail(tage, &tcd->tcd_daemon_pages);
323                 tcd->tcd_cur_daemon_pages++;
324
325                 if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
326                         struct trace_page *victim;
327
328                         LASSERT(!list_empty(&tcd->tcd_daemon_pages));
329                         victim = tage_from_list(tcd->tcd_daemon_pages.next);
330
331                         LASSERT(tage_invariant(victim));
332
333                         list_del(&victim->linkage);
334                         tage_free(victim);
335                         tcd->tcd_cur_daemon_pages--;
336                 }
337         }
338         spin_unlock(&pc->pc_lock);
339
340         trace_put_tcd(tcd, flags);
341 }
342
343 static void put_pages_on_daemon_list(struct page_collection *pc)
344 {
345         put_pages_on_daemon_list_on_cpu(pc);
346         smp_call_function(put_pages_on_daemon_list_on_cpu, pc, 0, 1);
347 }
348
349 void trace_debug_print(void)
350 {
351         struct page_collection pc;
352         struct trace_page *tage;
353         struct trace_page *tmp;
354
355         spin_lock_init(&pc.pc_lock);
356
357         collect_pages(&pc);
358         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
359                 char *p, *file, *fn;
360                 cfs_page_t *page;
361
362                 LASSERT(tage_invariant(tage));
363
364                 page = tage->page;
365                 p = cfs_page_address(page);
366                 while (p < ((char *)cfs_page_address(page) + CFS_PAGE_SIZE)) {
367                         struct ptldebug_header *hdr;
368                         int len;
369                         hdr = (void *)p;
370                         p += sizeof(*hdr);
371                         file = p;
372                         p += strlen(file) + 1;
373                         fn = p;
374                         p += strlen(fn) + 1;
375                         len = hdr->ph_len - (p - (char *)hdr);
376
377                         print_to_console(hdr, D_EMERG, p, len, file, fn);
378                 }
379
380                 list_del(&tage->linkage);
381                 tage_free(tage);
382         }
383 }
384
385 int tracefile_dump_all_pages(char *filename)
386 {
387         struct page_collection pc;
388         cfs_file_t *filp;
389         struct trace_page *tage;
390         struct trace_page *tmp;
391         CFS_DECL_MMSPACE;
392         int rc;
393
394         down_write(&tracefile_sem);
395
396         filp = cfs_filp_open(filename,
397                              O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600, &rc);
398         if (!filp) {
399                 printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
400                        filename, rc);
401                 goto out;
402         }
403
404         spin_lock_init(&pc.pc_lock);
405         pc.pc_want_daemon_pages = 1;
406         collect_pages(&pc);
407         if (list_empty(&pc.pc_pages)) {
408                 rc = 0;
409                 goto close;
410         }
411
412         /* ok, for now, just write the pages.  in the future we'll be building
413          * iobufs with the pages and calling generic_direct_IO */
414         CFS_MMSPACE_OPEN;
415         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
416
417                 LASSERT(tage_invariant(tage));
418
419                 rc = cfs_filp_write(filp, cfs_page_address(tage->page),
420                                     tage->used, cfs_filp_poff(filp));
421                 if (rc != tage->used) {
422                         printk(KERN_WARNING "wanted to write %u but wrote "
423                                "%d\n", tage->used, rc);
424                         put_pages_back(&pc);
425                         break;
426                 }
427                 list_del(&tage->linkage);
428                 tage_free(tage);
429         }
430         CFS_MMSPACE_CLOSE;
431         rc = cfs_filp_fsync(filp);
432         if (rc)
433                 printk(KERN_ERR "sync returns %d\n", rc);
434  close:
435         cfs_filp_close(filp);
436  out:
437         up_write(&tracefile_sem);
438         return rc;
439 }
440
441 void trace_flush_pages(void)
442 {
443         struct page_collection pc;
444         struct trace_page *tage;
445         struct trace_page *tmp;
446
447         spin_lock_init(&pc.pc_lock);
448
449         collect_pages(&pc);
450         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
451
452                 LASSERT(tage_invariant(tage));
453
454                 list_del(&tage->linkage);
455                 tage_free(tage);
456         }
457 }
458
459 int trace_dk(struct file *file, const char *buffer, unsigned long count,
460              void *data)
461 {
462         char *name;
463         unsigned long off;
464         int rc;
465
466         name = cfs_alloc(count + 1, CFS_ALLOC_STD);
467         if (name == NULL)
468                 return -ENOMEM;
469
470         if (copy_from_user(name, buffer, count)) {
471                 rc = -EFAULT;
472                 goto out;
473         }
474
475         if (name[0] != '/') {
476                 rc = -EINVAL;
477                 goto out;
478         }
479
480         /* be nice and strip out trailing '\n' */
481         for (off = count ; off > 2 && isspace(name[off - 1]); off--)
482                 ;
483
484         name[off] = '\0';
485         rc = tracefile_dump_all_pages(name);
486 out:
487         if (name)
488                 cfs_free(name);
489         return count;
490 }
491 EXPORT_SYMBOL(trace_dk);
492
493 static int tracefiled(void *arg)
494 {
495         struct page_collection pc;
496         struct tracefiled_ctl *tctl = arg;
497         struct trace_page *tage;
498         struct trace_page *tmp;
499         struct ptldebug_header *hdr;
500         cfs_file_t *filp;
501         CFS_DECL_MMSPACE;
502         int rc;
503
504         /* we're started late enough that we pick up init's fs context */
505         /* this is so broken in uml?  what on earth is going on? */
506         kportal_daemonize("ktracefiled");
507         reparent_to_init();
508
509         spin_lock_init(&pc.pc_lock);
510         complete(&tctl->tctl_start);
511
512         while (1) {
513                 cfs_waitlink_t __wait;
514
515                 cfs_waitlink_init(&__wait);
516                 cfs_waitq_add(&tctl->tctl_waitq, &__wait);
517                 set_current_state(TASK_INTERRUPTIBLE);
518                 cfs_waitq_timedwait(&__wait, cfs_time_seconds(1));
519                 cfs_waitq_del(&tctl->tctl_waitq, &__wait);
520
521                 if (atomic_read(&tctl->tctl_shutdown))
522                         break;
523
524                 pc.pc_want_daemon_pages = 0;
525                 collect_pages(&pc);
526                 if (list_empty(&pc.pc_pages))
527                         continue;
528
529                 filp = NULL;
530                 down_read(&tracefile_sem);
531                 if (tracefile != NULL) {
532                         filp = cfs_filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE,
533                                         0600, &rc);
534                         if (!(filp))
535                                 printk("couldn't open %s: %d\n", tracefile, rc);
536                 }
537                 up_read(&tracefile_sem);
538                 if (filp == NULL) {
539                         put_pages_on_daemon_list(&pc);
540                         continue;
541                 }
542
543                 CFS_MMSPACE_OPEN;
544
545                 /* mark the first header, so we can sort in chunks */
546                 tage = tage_from_list(pc.pc_pages.next);
547                 LASSERT(tage_invariant(tage));
548
549                 hdr = cfs_page_address(tage->page);
550                 hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
551
552                 list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
553                         static loff_t f_pos;
554
555                         LASSERT(tage_invariant(tage));
556
557                         if (f_pos >= tracefile_size)
558                                 f_pos = 0;
559                         else if (f_pos > cfs_filp_size(filp))
560                                 f_pos = cfs_filp_size(filp);
561
562                         rc = cfs_filp_write(filp, cfs_page_address(tage->page),
563                                             tage->used, &f_pos);
564                         if (rc != tage->used) {
565                                 printk(KERN_WARNING "wanted to write %u but "
566                                        "wrote %d\n", tage->used, rc);
567                                 put_pages_back(&pc);
568                         }
569                 }
570                 CFS_MMSPACE_CLOSE;
571                 cfs_filp_close(filp);
572
573                 put_pages_on_daemon_list(&pc);
574         }
575         complete(&tctl->tctl_stop);
576         return 0;
577 }
578
579 int trace_start_thread(void)
580 {
581         struct tracefiled_ctl *tctl = &trace_tctl;
582         int rc = 0;
583
584         mutex_down(&trace_thread_sem);
585         if (thread_running)
586                 goto out;
587
588         init_completion(&tctl->tctl_start);
589         init_completion(&tctl->tctl_stop);
590         cfs_waitq_init(&tctl->tctl_waitq);
591         atomic_set(&tctl->tctl_shutdown, 0);
592
593         if (cfs_kernel_thread(tracefiled, tctl, 0) < 0) {
594                 rc = -ECHILD;
595                 goto out;
596         }
597
598         wait_for_completion(&tctl->tctl_start);
599         thread_running = 1;
600 out:
601         mutex_up(&trace_thread_sem);
602         return rc;
603 }
604
605 void trace_stop_thread(void)
606 {
607         struct tracefiled_ctl *tctl = &trace_tctl;
608
609         mutex_down(&trace_thread_sem);
610         if (thread_running) {
611                 printk(KERN_INFO "Shutting down debug daemon thread...\n");
612                 atomic_set(&tctl->tctl_shutdown, 1);
613                 wait_for_completion(&tctl->tctl_stop);
614                 thread_running = 0;
615         }
616         mutex_up(&trace_thread_sem);
617 }
618
619 int tracefile_init(void)
620 {
621         struct trace_cpu_data *tcd;
622         int i;
623
624         for (i = 0; i < NR_CPUS; i++) {
625                 tcd = &trace_data[i].tcd;
626                 CFS_INIT_LIST_HEAD(&tcd->tcd_pages);
627                 CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
628                 tcd->tcd_cur_pages = 0;
629                 tcd->tcd_cur_daemon_pages = 0;
630                 tcd->tcd_max_pages = TCD_MAX_PAGES;
631                 tcd->tcd_shutting_down = 0;
632         }
633         return 0;
634 }
635
636 static void trace_cleanup_on_cpu(void *info)
637 {
638         struct trace_cpu_data *tcd;
639         struct trace_page *tage;
640         struct trace_page *tmp;
641         unsigned long flags;
642
643         tcd = trace_get_tcd(flags);
644
645         tcd->tcd_shutting_down = 1;
646
647         list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
648                 LASSERT(tage_invariant(tage));
649
650                 list_del(&tage->linkage);
651                 tage_free(tage);
652         }
653         tcd->tcd_cur_pages = 0;
654
655         trace_put_tcd(tcd, flags);
656 }
657
658 static void trace_cleanup(void)
659 {
660         struct page_collection pc;
661
662         CFS_INIT_LIST_HEAD(&pc.pc_pages);
663         spin_lock_init(&pc.pc_lock);
664
665         trace_cleanup_on_cpu(&pc);
666         smp_call_function(trace_cleanup_on_cpu, &pc, 0, 1);
667 }
668
669 void tracefile_exit(void)
670 {
671         trace_stop_thread();
672         trace_cleanup();
673 }