Whamcloud - gitweb
6ab716b5997bf5581485ec66085a2d62a548f16c
[fs/lustre-release.git] / lnet / libcfs / tracefile.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2004 Cluster File Systems, Inc.
5  *   Author: Zach Brown <zab@clusterfs.com>
6  *   Author: Phil Schwan <phil@clusterfs.com>
7  *
8  *   This file is part of Lustre, http://www.lustre.org.
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24
25 #define DEBUG_SUBSYSTEM S_PORTALS
26 #define LUSTRE_TRACEFILE_PRIVATE
27 #include "tracefile.h"
28
29 #include <libcfs/kp30.h>
30 #include <libcfs/libcfs.h>
31
32 /* XXX move things up to the top, comment */
33 union trace_data_union trace_data[NR_CPUS] __cacheline_aligned;
34
35 struct rw_semaphore tracefile_sem;
36 char *tracefile = NULL;
37 long long tracefile_size = TRACEFILE_SIZE;
38 static struct tracefiled_ctl trace_tctl;
39 struct semaphore trace_thread_sem;
40 static int thread_running = 0;
41
42 static void put_pages_on_daemon_list_on_cpu(void *info);
43
44 static inline struct trace_page *tage_from_list(struct list_head *list)
45 {
46         return list_entry(list, struct trace_page, linkage);
47 }
48
49 static struct trace_page *tage_alloc(int gfp)
50 {
51         cfs_page_t        *page;
52         struct trace_page *tage;
53
54         page = cfs_alloc_page(gfp);
55         if (page == NULL)
56                 return NULL;
57         
58         tage = cfs_alloc(sizeof(*tage), gfp);
59         if (tage == NULL) {
60                 cfs_free_page(page);
61                 return NULL;
62         }
63         
64         tage->page = page;
65         return tage;
66 }
67
68 static void tage_free(struct trace_page *tage)
69 {
70         LASSERT(tage != NULL);
71         LASSERT(tage->page != NULL);
72
73         cfs_free_page(tage->page);
74         cfs_free(tage);
75 }
76
77 static void tage_to_tail(struct trace_page *tage, struct list_head *queue)
78 {
79         LASSERT(tage != NULL);
80         LASSERT(queue != NULL);
81
82         list_move_tail(&tage->linkage, queue);
83 }
84
85 static int tage_invariant(struct trace_page *tage)
86 {
87         return (tage != NULL &&
88                 tage->page != NULL &&
89                 tage->used <= CFS_PAGE_SIZE &&
90                 cfs_page_count(tage->page) > 0);
91 }
92
93 /* return a page that has 'len' bytes left at the end */
94 static struct trace_page *trace_get_tage(struct trace_cpu_data *tcd,
95                                          unsigned long len)
96 {
97         struct trace_page *tage;
98
99         if (len > CFS_PAGE_SIZE) {
100                 printk(KERN_ERR "cowardly refusing to write %lu bytes in a "
101                        "page\n", len);
102                 return NULL;
103         }
104
105         if (!list_empty(&tcd->tcd_pages)) {
106                 tage = tage_from_list(tcd->tcd_pages.prev);
107                 if (tage->used + len <= CFS_PAGE_SIZE)
108                         return tage;
109         }
110
111         if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
112                 tage = tage_alloc(CFS_ALLOC_ATOMIC);
113                 if (tage == NULL) {
114                         /* the kernel should print a message for us.  fall back
115                          * to using the last page in the ring buffer. */
116                         goto ring_buffer;
117                 }
118
119                 tage->used = 0;
120                 tage->cpu = smp_processor_id();
121                 list_add_tail(&tage->linkage, &tcd->tcd_pages);
122                 tcd->tcd_cur_pages++;
123
124                 if (tcd->tcd_cur_pages > 8 && thread_running) {
125                         struct tracefiled_ctl *tctl = &trace_tctl;
126                         cfs_waitq_signal(&tctl->tctl_waitq);
127                 }
128                 return tage;
129         }
130
131  ring_buffer:
132         if (thread_running) {
133                 int pgcount = tcd->tcd_cur_pages / 10;
134                 struct page_collection pc;
135                 struct trace_page *tage;
136                 struct trace_page *tmp;
137
138                 printk(KERN_WARNING "debug daemon buffer overflowed; discarding"
139                        " 10%% of pages (%d)\n", pgcount + 1);
140
141                 CFS_INIT_LIST_HEAD(&pc.pc_pages);
142                 spin_lock_init(&pc.pc_lock);
143
144                 list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
145                         if (pgcount-- == 0)
146                                 break;
147
148                         list_move_tail(&tage->linkage, &pc.pc_pages);
149                         tcd->tcd_cur_pages--;
150                 }
151                 put_pages_on_daemon_list_on_cpu(&pc);
152
153                 LASSERT(!list_empty(&tcd->tcd_pages));
154         }
155
156         if (list_empty(&tcd->tcd_pages))
157                 return NULL;
158
159         tage = tage_from_list(tcd->tcd_pages.next);
160         tage->used = 0;
161         tage_to_tail(tage, &tcd->tcd_pages);
162
163         return tage;
164 }
165
166 void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
167                        const int line, unsigned long stack, char *format, ...)
168 {
169         struct trace_cpu_data *tcd;
170         struct ptldebug_header header;
171         struct trace_page *tage;
172         char *debug_buf = format;
173         int known_size, needed = 85 /* average message length */, max_nob;
174         va_list       ap;
175         unsigned long flags;
176
177 #ifdef CRAY_PORTALS
178         if (mask == D_PORTALS && !(portal_debug & D_PORTALS))
179                 return;
180 #endif
181         if (strchr(file, '/'))
182                 file = strrchr(file, '/') + 1;
183
184         if (*(format + strlen(format) - 1) != '\n')
185                 printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
186                        file, line, fn);
187
188         tcd = trace_get_tcd(flags);
189         if (tcd->tcd_shutting_down)
190                 goto out;
191
192         set_ptldebug_header(&header, subsys, mask, line, stack);
193         known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls
194
195  retry:
196         tage = trace_get_tage(tcd, needed + known_size);
197         if (tage == NULL) {
198                 debug_buf = format;
199                 if (needed + known_size > CFS_PAGE_SIZE)
200                         mask |= D_ERROR;
201                 needed = strlen(format);
202                 goto out;
203         }
204
205         debug_buf = cfs_page_address(tage->page) + tage->used + known_size;
206
207         max_nob = CFS_PAGE_SIZE - tage->used - known_size;
208         LASSERT(max_nob > 0);
209         va_start(ap, format);
210         needed = vsnprintf(debug_buf, max_nob, format, ap);
211         va_end(ap);
212
213         if (needed > max_nob) /* overflow.  oh poop. */
214                 goto retry;
215
216         header.ph_len = known_size + needed;
217         debug_buf = cfs_page_address(tage->page) + tage->used;
218
219         memcpy(debug_buf, &header, sizeof(header));
220         tage->used += sizeof(header);
221         debug_buf += sizeof(header);
222
223         strcpy(debug_buf, file);
224         tage->used += strlen(file) + 1;
225         debug_buf += strlen(file) + 1;
226
227         strcpy(debug_buf, fn);
228         tage->used += strlen(fn) + 1;
229         debug_buf += strlen(fn) + 1;
230
231         tage->used += needed;
232         if (tage->used > CFS_PAGE_SIZE)
233                 printk(KERN_EMERG
234                        "tage->used == %u in portals_debug_msg\n", tage->used);
235
236  out:
237         if ((mask & (D_EMERG | D_ERROR | D_WARNING | D_CONSOLE)) || portal_printk)
238                 print_to_console(&header, mask, debug_buf, needed, file, fn);
239
240         trace_put_tcd(tcd, flags);
241 }
242 EXPORT_SYMBOL(portals_debug_msg);
243
244 static void collect_pages_on_cpu(void *info)
245 {
246         struct trace_cpu_data *tcd;
247         unsigned long flags;
248         struct page_collection *pc = info;
249
250         tcd = trace_get_tcd(flags);
251
252         spin_lock(&pc->pc_lock);
253         list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
254         tcd->tcd_cur_pages = 0;
255         if (pc->pc_want_daemon_pages) {
256                 list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages);
257                 tcd->tcd_cur_daemon_pages = 0;
258         }
259         spin_unlock(&pc->pc_lock);
260
261         trace_put_tcd(tcd, flags);
262 }
263
264 static void collect_pages(struct page_collection *pc)
265 {
266         /* needs to be fixed up for preempt */
267         CFS_INIT_LIST_HEAD(&pc->pc_pages);
268         collect_pages_on_cpu(pc);
269         smp_call_function(collect_pages_on_cpu, pc, 0, 1);
270 }
271
272 static void put_pages_back_on_cpu(void *info)
273 {
274         struct page_collection *pc = info;
275         struct trace_cpu_data *tcd;
276         struct list_head *cur_head;
277         unsigned long flags;
278         struct trace_page *tage;
279         struct trace_page *tmp;
280
281         tcd = trace_get_tcd(flags);
282
283         cur_head = tcd->tcd_pages.next;
284
285         spin_lock(&pc->pc_lock);
286         list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
287
288                 LASSERT(tage_invariant(tage));
289
290                 if (tage->cpu != smp_processor_id())
291                         continue;
292
293                 tage_to_tail(tage, cur_head);
294                 tcd->tcd_cur_pages++;
295         }
296         spin_unlock(&pc->pc_lock);
297
298         trace_put_tcd(tcd, flags);
299 }
300
301 static void put_pages_back(struct page_collection *pc)
302 {
303         /* needs to be fixed up for preempt */
304         put_pages_back_on_cpu(pc);
305         smp_call_function(put_pages_back_on_cpu, pc, 0, 1);
306 }
307
308 /* Add pages to a per-cpu debug daemon ringbuffer.  This buffer makes sure that
309  * we have a good amount of data at all times for dumping during an LBUG, even
310  * if we have been steadily writing (and otherwise discarding) pages via the
311  * debug daemon. */
312 static void put_pages_on_daemon_list_on_cpu(void *info)
313 {
314         struct page_collection *pc = info;
315         struct trace_cpu_data *tcd;
316         struct trace_page *tage;
317         struct trace_page *tmp;
318         unsigned long flags;
319
320         tcd = trace_get_tcd(flags);
321
322         spin_lock(&pc->pc_lock);
323         list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
324
325                 LASSERT(tage_invariant(tage));
326
327                 if (tage->cpu != smp_processor_id())
328                         continue;
329
330                 tage_to_tail(tage, &tcd->tcd_daemon_pages);
331                 tcd->tcd_cur_daemon_pages++;
332
333                 if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
334                         struct trace_page *victim;
335
336                         LASSERT(!list_empty(&tcd->tcd_daemon_pages));
337                         victim = tage_from_list(tcd->tcd_daemon_pages.next);
338
339                         LASSERT(tage_invariant(victim));
340
341                         list_del(&victim->linkage);
342                         tage_free(victim);
343                         tcd->tcd_cur_daemon_pages--;
344                 }
345         }
346         spin_unlock(&pc->pc_lock);
347
348         trace_put_tcd(tcd, flags);
349 }
350
351 static void put_pages_on_daemon_list(struct page_collection *pc)
352 {
353         put_pages_on_daemon_list_on_cpu(pc);
354         smp_call_function(put_pages_on_daemon_list_on_cpu, pc, 0, 1);
355 }
356
357 void trace_debug_print(void)
358 {
359         struct page_collection pc;
360         struct trace_page *tage;
361         struct trace_page *tmp;
362
363         spin_lock_init(&pc.pc_lock);
364
365         collect_pages(&pc);
366         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
367                 char *p, *file, *fn;
368                 cfs_page_t *page;
369
370                 LASSERT(tage_invariant(tage));
371
372                 page = tage->page;
373                 p = cfs_page_address(page);
374                 while (p < ((char *)cfs_page_address(page) + CFS_PAGE_SIZE)) {
375                         struct ptldebug_header *hdr;
376                         int len;
377                         hdr = (void *)p;
378                         p += sizeof(*hdr);
379                         file = p;
380                         p += strlen(file) + 1;
381                         fn = p;
382                         p += strlen(fn) + 1;
383                         len = hdr->ph_len - (p - (char *)hdr);
384
385                         print_to_console(hdr, D_EMERG, p, len, file, fn);
386                 }
387
388                 list_del(&tage->linkage);
389                 tage_free(tage);
390         }
391 }
392
393 int tracefile_dump_all_pages(char *filename)
394 {
395         struct page_collection pc;
396         cfs_file_t *filp;
397         struct trace_page *tage;
398         struct trace_page *tmp;
399         CFS_DECL_MMSPACE;
400         int rc;
401
402         down_write(&tracefile_sem);
403
404         filp = cfs_filp_open(filename,
405                              O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600, &rc);
406         if (!filp) {
407                 printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
408                        filename, rc);
409                 goto out;
410         }
411
412         spin_lock_init(&pc.pc_lock);
413         pc.pc_want_daemon_pages = 1;
414         collect_pages(&pc);
415         if (list_empty(&pc.pc_pages)) {
416                 rc = 0;
417                 goto close;
418         }
419
420         /* ok, for now, just write the pages.  in the future we'll be building
421          * iobufs with the pages and calling generic_direct_IO */
422         CFS_MMSPACE_OPEN;
423         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
424
425                 LASSERT(tage_invariant(tage));
426
427                 rc = cfs_filp_write(filp, cfs_page_address(tage->page),
428                                     tage->used, cfs_filp_poff(filp));
429                 if (rc != tage->used) {
430                         printk(KERN_WARNING "wanted to write %u but wrote "
431                                "%d\n", tage->used, rc);
432                         put_pages_back(&pc);
433                         break;
434                 }
435                 list_del(&tage->linkage);
436                 tage_free(tage);
437         }
438         CFS_MMSPACE_CLOSE;
439         rc = cfs_filp_fsync(filp);
440         if (rc)
441                 printk(KERN_ERR "sync returns %d\n", rc);
442  close:
443         cfs_filp_close(filp);
444  out:
445         up_write(&tracefile_sem);
446         return rc;
447 }
448
449 void trace_flush_pages(void)
450 {
451         struct page_collection pc;
452         struct trace_page *tage;
453         struct trace_page *tmp;
454
455         spin_lock_init(&pc.pc_lock);
456
457         collect_pages(&pc);
458         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
459
460                 LASSERT(tage_invariant(tage));
461
462                 list_del(&tage->linkage);
463                 tage_free(tage);
464         }
465 }
466
467 int trace_dk(struct file *file, const char *buffer, unsigned long count,
468              void *data)
469 {
470         char *name;
471         unsigned long off;
472         int rc;
473
474         name = cfs_alloc(count + 1, CFS_ALLOC_STD);
475         if (name == NULL)
476                 return -ENOMEM;
477
478         if (copy_from_user(name, buffer, count)) {
479                 rc = -EFAULT;
480                 goto out;
481         }
482
483         if (name[0] != '/') {
484                 rc = -EINVAL;
485                 goto out;
486         }
487
488         /* be nice and strip out trailing '\n' */
489         for (off = count ; off > 2 && isspace(name[off - 1]); off--)
490                 ;
491
492         name[off] = '\0';
493         rc = tracefile_dump_all_pages(name);
494 out:
495         if (name)
496                 cfs_free(name);
497         return count;
498 }
499 EXPORT_SYMBOL(trace_dk);
500
501 static int tracefiled(void *arg)
502 {
503         struct page_collection pc;
504         struct tracefiled_ctl *tctl = arg;
505         struct trace_page *tage;
506         struct trace_page *tmp;
507         struct ptldebug_header *hdr;
508         cfs_file_t *filp;
509         CFS_DECL_MMSPACE;
510         int rc;
511
512         /* we're started late enough that we pick up init's fs context */
513         /* this is so broken in uml?  what on earth is going on? */
514         kportal_daemonize("ktracefiled");
515         reparent_to_init();
516
517         spin_lock_init(&pc.pc_lock);
518         complete(&tctl->tctl_start);
519
520         while (1) {
521                 cfs_waitlink_t __wait;
522
523                 cfs_waitlink_init(&__wait);
524                 cfs_waitq_add(&tctl->tctl_waitq, &__wait);
525                 set_current_state(TASK_INTERRUPTIBLE);
526                 cfs_waitq_timedwait(&__wait, cfs_time_seconds(1));
527                 cfs_waitq_del(&tctl->tctl_waitq, &__wait);
528
529                 if (atomic_read(&tctl->tctl_shutdown))
530                         break;
531
532                 pc.pc_want_daemon_pages = 0;
533                 collect_pages(&pc);
534                 if (list_empty(&pc.pc_pages))
535                         continue;
536
537                 filp = NULL;
538                 down_read(&tracefile_sem);
539                 if (tracefile != NULL) {
540                         filp = cfs_filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE,
541                                         0600, &rc);
542                         if (!(filp))
543                                 printk("couldn't open %s: %d\n", tracefile, rc);
544                 }
545                 up_read(&tracefile_sem);
546                 if (filp == NULL) {
547                         put_pages_on_daemon_list(&pc);
548                         continue;
549                 }
550
551                 CFS_MMSPACE_OPEN;
552
553                 /* mark the first header, so we can sort in chunks */
554                 tage = tage_from_list(pc.pc_pages.next);
555                 LASSERT(tage_invariant(tage));
556
557                 hdr = cfs_page_address(tage->page);
558                 hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
559
560                 list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
561                         static loff_t f_pos;
562
563                         LASSERT(tage_invariant(tage));
564
565                         if (f_pos >= tracefile_size)
566                                 f_pos = 0;
567                         else if (f_pos > cfs_filp_size(filp))
568                                 f_pos = cfs_filp_size(filp);
569
570                         rc = cfs_filp_write(filp, cfs_page_address(tage->page),
571                                             tage->used, &f_pos);
572                         if (rc != tage->used) {
573                                 printk(KERN_WARNING "wanted to write %u but "
574                                        "wrote %d\n", tage->used, rc);
575                                 put_pages_back(&pc);
576                         }
577                 }
578                 CFS_MMSPACE_CLOSE;
579                 cfs_filp_close(filp);
580
581                 put_pages_on_daemon_list(&pc);
582         }
583         complete(&tctl->tctl_stop);
584         return 0;
585 }
586
587 int trace_start_thread(void)
588 {
589         struct tracefiled_ctl *tctl = &trace_tctl;
590         int rc = 0;
591
592         mutex_down(&trace_thread_sem);
593         if (thread_running)
594                 goto out;
595
596         init_completion(&tctl->tctl_start);
597         init_completion(&tctl->tctl_stop);
598         cfs_waitq_init(&tctl->tctl_waitq);
599         atomic_set(&tctl->tctl_shutdown, 0);
600
601         if (cfs_kernel_thread(tracefiled, tctl, 0) < 0) {
602                 rc = -ECHILD;
603                 goto out;
604         }
605
606         wait_for_completion(&tctl->tctl_start);
607         thread_running = 1;
608 out:
609         mutex_up(&trace_thread_sem);
610         return rc;
611 }
612
613 void trace_stop_thread(void)
614 {
615         struct tracefiled_ctl *tctl = &trace_tctl;
616
617         mutex_down(&trace_thread_sem);
618         if (thread_running) {
619                 printk(KERN_INFO "Shutting down debug daemon thread...\n");
620                 atomic_set(&tctl->tctl_shutdown, 1);
621                 wait_for_completion(&tctl->tctl_stop);
622                 thread_running = 0;
623         }
624         mutex_up(&trace_thread_sem);
625 }
626
627 int tracefile_init(void)
628 {
629         struct trace_cpu_data *tcd;
630         int i;
631
632         for (i = 0; i < NR_CPUS; i++) {
633                 tcd = &trace_data[i].tcd;
634                 CFS_INIT_LIST_HEAD(&tcd->tcd_pages);
635                 CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
636                 tcd->tcd_cur_pages = 0;
637                 tcd->tcd_cur_daemon_pages = 0;
638                 tcd->tcd_max_pages = TCD_MAX_PAGES;
639                 tcd->tcd_shutting_down = 0;
640         }
641         return 0;
642 }
643
644 static void trace_cleanup_on_cpu(void *info)
645 {
646         struct trace_cpu_data *tcd;
647         struct trace_page *tage;
648         struct trace_page *tmp;
649         unsigned long flags;
650
651         tcd = trace_get_tcd(flags);
652
653         tcd->tcd_shutting_down = 1;
654
655         list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
656                 LASSERT(tage_invariant(tage));
657
658                 list_del(&tage->linkage);
659                 tage_free(tage);
660         }
661         tcd->tcd_cur_pages = 0;
662
663         trace_put_tcd(tcd, flags);
664 }
665
666 static void trace_cleanup(void)
667 {
668         struct page_collection pc;
669
670         CFS_INIT_LIST_HEAD(&pc.pc_pages);
671         spin_lock_init(&pc.pc_lock);
672
673         trace_cleanup_on_cpu(&pc);
674         smp_call_function(trace_cleanup_on_cpu, &pc, 0, 1);
675 }
676
677 void tracefile_exit(void)
678 {
679         trace_stop_thread();
680         trace_cleanup();
681 }