Whamcloud - gitweb
file jbd-stats-2.6.9.patch was initially added on branch b1_4.
[fs/lustre-release.git] / lnet / libcfs / tracefile.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2004 Cluster File Systems, Inc.
5  *   Author: Zach Brown <zab@clusterfs.com>
6  *   Author: Phil Schwan <phil@clusterfs.com>
7  *
8  *   This file is part of Lustre, http://www.lustre.org.
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24
25 #define DEBUG_SUBSYSTEM S_PORTALS
26 #define LUSTRE_TRACEFILE_PRIVATE
27 #include "tracefile.h"
28
29 #include <libcfs/kp30.h>
30 #include <libcfs/libcfs.h>
31
32 /* XXX move things up to the top, comment */
33 union trace_data_union trace_data[NR_CPUS] __cacheline_aligned;
34
35 struct rw_semaphore tracefile_sem;
36 char *tracefile = NULL;
37 long long tracefile_size = TRACEFILE_SIZE;
38 static struct tracefiled_ctl trace_tctl;
39 struct semaphore trace_thread_sem;
40 static int thread_running = 0;
41
42 static void put_pages_on_daemon_list_on_cpu(void *info);
43
44 static inline struct trace_page *tage_from_list(struct list_head *list)
45 {
46         return list_entry(list, struct trace_page, linkage);
47 }
48
49 static struct trace_page *tage_alloc(int gfp)
50 {
51         cfs_page_t        *page;
52         struct trace_page *tage;
53
54         page = cfs_alloc_page(gfp);
55         if (page == NULL)
56                 return NULL;
57         
58         tage = cfs_alloc(sizeof(*tage), gfp);
59         if (tage == NULL) {
60                 cfs_free_page(page);
61                 return NULL;
62         }
63         
64         tage->page = page;
65         return tage;
66 }
67
68 static void tage_free(struct trace_page *tage)
69 {
70         LASSERT(tage != NULL);
71         LASSERT(tage->page != NULL);
72
73         cfs_free_page(tage->page);
74         cfs_free(tage);
75 }
76
77 static void tage_to_tail(struct trace_page *tage, struct list_head *queue)
78 {
79         LASSERT(tage != NULL);
80         LASSERT(queue != NULL);
81
82         list_move_tail(&tage->linkage, queue);
83 }
84
85 static void LASSERT_TAGE_INVARIANT(struct trace_page *tage)
86 {
87         LASSERT(tage != NULL);
88         LASSERT(tage->page != NULL);
89         LASSERTF(tage->used <= CFS_PAGE_SIZE, "used = %u, PAGE_SIZE %lu\n",
90                  tage->used, CFS_PAGE_SIZE);
91         LASSERTF(cfs_page_count(tage->page) > 0, "count = %d\n",
92                  cfs_page_count(tage->page));
93 }
94
95 /* return a page that has 'len' bytes left at the end */
96 static struct trace_page *trace_get_tage(struct trace_cpu_data *tcd,
97                                          unsigned long len)
98 {
99         struct trace_page *tage;
100
101         if (len > CFS_PAGE_SIZE) {
102                 printk(KERN_ERR "cowardly refusing to write %lu bytes in a "
103                        "page\n", len);
104                 return NULL;
105         }
106
107         if (!list_empty(&tcd->tcd_pages)) {
108                 tage = tage_from_list(tcd->tcd_pages.prev);
109                 if (tage->used + len <= CFS_PAGE_SIZE)
110                         return tage;
111         }
112
113         if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
114                 tage = tage_alloc(CFS_ALLOC_ATOMIC);
115                 if (tage == NULL) {
116                         /* the kernel should print a message for us.  fall back
117                          * to using the last page in the ring buffer. */
118                         goto ring_buffer;
119                 }
120
121                 tage->used = 0;
122                 tage->cpu = smp_processor_id();
123                 list_add_tail(&tage->linkage, &tcd->tcd_pages);
124                 tcd->tcd_cur_pages++;
125
126                 if (tcd->tcd_cur_pages > 8 && thread_running) {
127                         struct tracefiled_ctl *tctl = &trace_tctl;
128                         cfs_waitq_signal(&tctl->tctl_waitq);
129                 }
130                 return tage;
131         }
132
133  ring_buffer:
134         if (thread_running) {
135                 int pgcount = tcd->tcd_cur_pages / 10;
136                 struct page_collection pc;
137                 struct trace_page *tage;
138                 struct trace_page *tmp;
139
140                 printk(KERN_WARNING "debug daemon buffer overflowed; discarding"
141                        " 10%% of pages (%d)\n", pgcount + 1);
142
143                 CFS_INIT_LIST_HEAD(&pc.pc_pages);
144                 spin_lock_init(&pc.pc_lock);
145
146                 list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
147                         if (pgcount-- == 0)
148                                 break;
149
150                         list_move_tail(&tage->linkage, &pc.pc_pages);
151                         tcd->tcd_cur_pages--;
152                 }
153                 put_pages_on_daemon_list_on_cpu(&pc);
154
155                 LASSERT(!list_empty(&tcd->tcd_pages));
156         }
157
158         if (list_empty(&tcd->tcd_pages))
159                 return NULL;
160
161         tage = tage_from_list(tcd->tcd_pages.next);
162         tage->used = 0;
163         tage_to_tail(tage, &tcd->tcd_pages);
164
165         return tage;
166 }
167
168 void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
169                        const int line, unsigned long stack, char *format, ...)
170 {
171         struct trace_cpu_data *tcd;
172         struct ptldebug_header header;
173         struct trace_page *tage;
174         char *debug_buf = format;
175         int known_size, needed = 85 /* average message length */, max_nob;
176         va_list       ap;
177         unsigned long flags;
178
179 #ifdef CRAY_PORTALS
180         if (mask == D_PORTALS && !(portal_debug & D_PORTALS))
181                 return;
182 #endif
183         if (strchr(file, '/'))
184                 file = strrchr(file, '/') + 1;
185
186         if (*(format + strlen(format) - 1) != '\n')
187                 printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
188                        file, line, fn);
189
190         tcd = trace_get_tcd(flags);
191         if (tcd->tcd_shutting_down)
192                 goto out;
193
194         set_ptldebug_header(&header, subsys, mask, line, stack);
195         known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls
196
197  retry:
198         tage = trace_get_tage(tcd, needed + known_size);
199         if (tage == NULL) {
200                 debug_buf = format;
201                 if (needed + known_size > CFS_PAGE_SIZE)
202                         mask |= D_ERROR;
203                 needed = strlen(format);
204                 goto out;
205         }
206
207         debug_buf = cfs_page_address(tage->page) + tage->used + known_size;
208
209         max_nob = CFS_PAGE_SIZE - tage->used - known_size;
210         LASSERT(max_nob > 0);
211         va_start(ap, format);
212         needed = vsnprintf(debug_buf, max_nob, format, ap);
213         va_end(ap);
214
215         if (needed > max_nob) /* overflow.  oh poop. */
216                 goto retry;
217
218         header.ph_len = known_size + needed;
219         debug_buf = cfs_page_address(tage->page) + tage->used;
220
221         memcpy(debug_buf, &header, sizeof(header));
222         tage->used += sizeof(header);
223         debug_buf += sizeof(header);
224
225         strcpy(debug_buf, file);
226         tage->used += strlen(file) + 1;
227         debug_buf += strlen(file) + 1;
228
229         strcpy(debug_buf, fn);
230         tage->used += strlen(fn) + 1;
231         debug_buf += strlen(fn) + 1;
232
233         tage->used += needed;
234         if (tage->used > CFS_PAGE_SIZE)
235                 printk(KERN_EMERG
236                        "tage->used == %u in portals_debug_msg\n", tage->used);
237
238  out:
239         if ((mask & (D_EMERG | D_ERROR | D_WARNING | D_CONSOLE)) || portal_printk)
240                 print_to_console(&header, mask, debug_buf, needed, file, fn);
241
242         trace_put_tcd(tcd, flags);
243 }
244 EXPORT_SYMBOL(portals_debug_msg);
245
246 static void collect_pages_on_cpu(void *info)
247 {
248         struct trace_cpu_data *tcd;
249         unsigned long flags;
250         struct page_collection *pc = info;
251
252         tcd = trace_get_tcd(flags);
253
254         spin_lock(&pc->pc_lock);
255         list_splice(&tcd->tcd_pages, &pc->pc_pages);
256         CFS_INIT_LIST_HEAD(&tcd->tcd_pages);
257         tcd->tcd_cur_pages = 0;
258         if (pc->pc_want_daemon_pages) {
259                 list_splice(&tcd->tcd_daemon_pages, &pc->pc_pages);
260                 CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
261                 tcd->tcd_cur_daemon_pages = 0;
262         }
263         spin_unlock(&pc->pc_lock);
264
265         trace_put_tcd(tcd, flags);
266 }
267
268 static void collect_pages(struct page_collection *pc)
269 {
270         /* needs to be fixed up for preempt */
271         CFS_INIT_LIST_HEAD(&pc->pc_pages);
272         collect_pages_on_cpu(pc);
273         smp_call_function(collect_pages_on_cpu, pc, 0, 1);
274 }
275
276 static void put_pages_back_on_cpu(void *info)
277 {
278         struct page_collection *pc = info;
279         struct trace_cpu_data *tcd;
280         struct list_head *cur_head;
281         unsigned long flags;
282         struct trace_page *tage;
283         struct trace_page *tmp;
284
285         tcd = trace_get_tcd(flags);
286
287         cur_head = tcd->tcd_pages.next;
288
289         spin_lock(&pc->pc_lock);
290         list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
291
292                 LASSERT_TAGE_INVARIANT(tage);
293
294                 if (tage->cpu != smp_processor_id())
295                         continue;
296
297                 tage_to_tail(tage, cur_head);
298                 tcd->tcd_cur_pages++;
299         }
300         spin_unlock(&pc->pc_lock);
301
302         trace_put_tcd(tcd, flags);
303 }
304
305 static void put_pages_back(struct page_collection *pc)
306 {
307         /* needs to be fixed up for preempt */
308         put_pages_back_on_cpu(pc);
309         smp_call_function(put_pages_back_on_cpu, pc, 0, 1);
310 }
311
312 /* Add pages to a per-cpu debug daemon ringbuffer.  This buffer makes sure that
313  * we have a good amount of data at all times for dumping during an LBUG, even
314  * if we have been steadily writing (and otherwise discarding) pages via the
315  * debug daemon. */
316 static void put_pages_on_daemon_list_on_cpu(void *info)
317 {
318         struct page_collection *pc = info;
319         struct trace_cpu_data *tcd;
320         struct trace_page *tage;
321         struct trace_page *tmp;
322         unsigned long flags;
323
324         tcd = trace_get_tcd(flags);
325
326         spin_lock(&pc->pc_lock);
327         list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
328
329                 LASSERT_TAGE_INVARIANT(tage);
330
331                 if (tage->cpu != smp_processor_id())
332                         continue;
333
334                 tage_to_tail(tage, &tcd->tcd_daemon_pages);
335                 tcd->tcd_cur_daemon_pages++;
336
337                 if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
338                         struct trace_page *victim;
339
340                         LASSERT(!list_empty(&tcd->tcd_daemon_pages));
341                         victim = tage_from_list(tcd->tcd_daemon_pages.next);
342
343                         LASSERT_TAGE_INVARIANT(victim);
344
345                         list_del(&victim->linkage);
346                         tage_free(victim);
347                         tcd->tcd_cur_daemon_pages--;
348                 }
349         }
350         spin_unlock(&pc->pc_lock);
351
352         trace_put_tcd(tcd, flags);
353 }
354
355 static void put_pages_on_daemon_list(struct page_collection *pc)
356 {
357         put_pages_on_daemon_list_on_cpu(pc);
358         smp_call_function(put_pages_on_daemon_list_on_cpu, pc, 0, 1);
359 }
360
361 void trace_debug_print(void)
362 {
363         struct page_collection pc;
364         struct trace_page *tage;
365         struct trace_page *tmp;
366
367         spin_lock_init(&pc.pc_lock);
368
369         pc.pc_want_daemon_pages = 1;
370         collect_pages(&pc);
371         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
372                 char *p, *file, *fn;
373                 cfs_page_t *page;
374
375                 LASSERT_TAGE_INVARIANT(tage);
376
377                 page = tage->page;
378                 p = cfs_page_address(page);
379                 while (p < ((char *)cfs_page_address(page) + CFS_PAGE_SIZE)) {
380                         struct ptldebug_header *hdr;
381                         int len;
382                         hdr = (void *)p;
383                         p += sizeof(*hdr);
384                         file = p;
385                         p += strlen(file) + 1;
386                         fn = p;
387                         p += strlen(fn) + 1;
388                         len = hdr->ph_len - (p - (char *)hdr);
389
390                         print_to_console(hdr, D_EMERG, p, len, file, fn);
391                 }
392
393                 list_del(&tage->linkage);
394                 tage_free(tage);
395         }
396 }
397
398 int tracefile_dump_all_pages(char *filename)
399 {
400         struct page_collection pc;
401         cfs_file_t *filp;
402         struct trace_page *tage;
403         struct trace_page *tmp;
404         CFS_DECL_MMSPACE;
405         int rc;
406
407         down_write(&tracefile_sem);
408
409         filp = cfs_filp_open(filename,
410                              O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0666, &rc);
411         if (!filp) {
412                 printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
413                        filename, rc);
414                 goto out;
415         }
416
417         spin_lock_init(&pc.pc_lock);
418         pc.pc_want_daemon_pages = 1;
419         collect_pages(&pc);
420         if (list_empty(&pc.pc_pages)) {
421                 rc = 0;
422                 goto close;
423         }
424
425         /* ok, for now, just write the pages.  in the future we'll be building
426          * iobufs with the pages and calling generic_direct_IO */
427         CFS_MMSPACE_OPEN;
428         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
429
430                 LASSERT_TAGE_INVARIANT(tage);
431
432                 rc = cfs_filp_write(filp, cfs_page_address(tage->page),
433                                     tage->used, cfs_filp_poff(filp));
434                 if (rc != tage->used) {
435                         printk(KERN_WARNING "wanted to write %u but wrote "
436                                "%d\n", tage->used, rc);
437                         put_pages_back(&pc);
438                         break;
439                 }
440                 list_del(&tage->linkage);
441                 tage_free(tage);
442         }
443         CFS_MMSPACE_CLOSE;
444         rc = cfs_filp_fsync(filp);
445         if (rc)
446                 printk(KERN_ERR "sync returns %d\n", rc);
447  close:
448         cfs_filp_close(filp);
449  out:
450         up_write(&tracefile_sem);
451         return rc;
452 }
453
454 void trace_flush_pages(void)
455 {
456         struct page_collection pc;
457         struct trace_page *tage;
458         struct trace_page *tmp;
459
460         spin_lock_init(&pc.pc_lock);
461
462         pc.pc_want_daemon_pages = 1;
463         collect_pages(&pc);
464         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
465
466                 LASSERT_TAGE_INVARIANT(tage);
467
468                 list_del(&tage->linkage);
469                 tage_free(tage);
470         }
471 }
472
473 int trace_dk(struct file *file, const char *buffer, unsigned long count,
474              void *data)
475 {
476         char *name;
477         unsigned long off;
478         int rc;
479
480         name = cfs_alloc(count + 1, CFS_ALLOC_STD);
481         if (name == NULL)
482                 return -ENOMEM;
483
484         if (copy_from_user(name, buffer, count)) {
485                 rc = -EFAULT;
486                 goto out;
487         }
488
489         if (name[0] != '/') {
490                 rc = -EINVAL;
491                 goto out;
492         }
493
494         /* be nice and strip out trailing '\n' */
495         for (off = count ; off > 2 && isspace(name[off - 1]); off--)
496                 ;
497
498         name[off] = '\0';
499         rc = tracefile_dump_all_pages(name);
500 out:
501         if (name)
502                 cfs_free(name);
503         return count;
504 }
505 EXPORT_SYMBOL(trace_dk);
506
507 static int tracefiled(void *arg)
508 {
509         struct page_collection pc;
510         struct tracefiled_ctl *tctl = arg;
511         struct trace_page *tage;
512         struct trace_page *tmp;
513         struct ptldebug_header *hdr;
514         cfs_file_t *filp;
515         CFS_DECL_MMSPACE;
516         int rc;
517
518         /* we're started late enough that we pick up init's fs context */
519         /* this is so broken in uml?  what on earth is going on? */
520         kportal_daemonize("ktracefiled");
521         reparent_to_init();
522
523         spin_lock_init(&pc.pc_lock);
524         complete(&tctl->tctl_start);
525
526         while (1) {
527                 cfs_waitlink_t __wait;
528
529                 cfs_waitlink_init(&__wait);
530                 cfs_waitq_add(&tctl->tctl_waitq, &__wait);
531                 set_current_state(TASK_INTERRUPTIBLE);
532                 cfs_waitq_timedwait(&__wait, cfs_time_seconds(1));
533                 cfs_waitq_del(&tctl->tctl_waitq, &__wait);
534
535                 if (atomic_read(&tctl->tctl_shutdown))
536                         break;
537
538                 pc.pc_want_daemon_pages = 0;
539                 collect_pages(&pc);
540                 if (list_empty(&pc.pc_pages))
541                         continue;
542
543                 filp = NULL;
544                 down_read(&tracefile_sem);
545                 if (tracefile != NULL) {
546                         filp = cfs_filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE,
547                                         0600, &rc);
548                         if (!(filp))
549                                 printk("couldn't open %s: %d\n", tracefile, rc);
550                 }
551                 up_read(&tracefile_sem);
552                 if (filp == NULL) {
553                         put_pages_on_daemon_list(&pc);
554                         continue;
555                 }
556
557                 CFS_MMSPACE_OPEN;
558
559                 /* mark the first header, so we can sort in chunks */
560                 tage = tage_from_list(pc.pc_pages.next);
561                 LASSERT_TAGE_INVARIANT(tage);
562
563                 hdr = cfs_page_address(tage->page);
564                 hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
565
566                 list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
567                         static loff_t f_pos;
568
569                         LASSERT_TAGE_INVARIANT(tage);
570
571                         if (f_pos >= tracefile_size)
572                                 f_pos = 0;
573                         else if (f_pos > cfs_filp_size(filp))
574                                 f_pos = cfs_filp_size(filp);
575
576                         rc = cfs_filp_write(filp, cfs_page_address(tage->page),
577                                             tage->used, &f_pos);
578                         if (rc != tage->used) {
579                                 printk(KERN_WARNING "wanted to write %u but "
580                                        "wrote %d\n", tage->used, rc);
581                                 put_pages_back(&pc);
582                         }
583                 }
584                 CFS_MMSPACE_CLOSE;
585
586                 cfs_filp_close(filp);
587                 put_pages_on_daemon_list(&pc);
588         }
589         complete(&tctl->tctl_stop);
590         return 0;
591 }
592
593 int trace_start_thread(void)
594 {
595         struct tracefiled_ctl *tctl = &trace_tctl;
596         int rc = 0;
597
598         mutex_down(&trace_thread_sem);
599         if (thread_running)
600                 goto out;
601
602         init_completion(&tctl->tctl_start);
603         init_completion(&tctl->tctl_stop);
604         cfs_waitq_init(&tctl->tctl_waitq);
605         atomic_set(&tctl->tctl_shutdown, 0);
606
607         if (cfs_kernel_thread(tracefiled, tctl, 0) < 0) {
608                 rc = -ECHILD;
609                 goto out;
610         }
611
612         wait_for_completion(&tctl->tctl_start);
613         thread_running = 1;
614 out:
615         mutex_up(&trace_thread_sem);
616         return rc;
617 }
618
619 void trace_stop_thread(void)
620 {
621         struct tracefiled_ctl *tctl = &trace_tctl;
622
623         mutex_down(&trace_thread_sem);
624         if (thread_running) {
625                 printk(KERN_INFO "Shutting down debug daemon thread...\n");
626                 atomic_set(&tctl->tctl_shutdown, 1);
627                 wait_for_completion(&tctl->tctl_stop);
628                 thread_running = 0;
629         }
630         mutex_up(&trace_thread_sem);
631 }
632
633 int tracefile_init(void)
634 {
635         struct trace_cpu_data *tcd;
636         int i;
637
638         for (i = 0; i < NR_CPUS; i++) {
639                 tcd = &trace_data[i].tcd;
640                 CFS_INIT_LIST_HEAD(&tcd->tcd_pages);
641                 CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
642                 tcd->tcd_cur_pages = 0;
643                 tcd->tcd_cur_daemon_pages = 0;
644                 tcd->tcd_max_pages = TCD_MAX_PAGES;
645                 tcd->tcd_shutting_down = 0;
646         }
647         return 0;
648 }
649
650 static void trace_cleanup_on_cpu(void *info)
651 {
652         struct trace_cpu_data *tcd;
653         struct trace_page *tage;
654         struct trace_page *tmp;
655         unsigned long flags;
656
657         tcd = trace_get_tcd(flags);
658
659         tcd->tcd_shutting_down = 1;
660
661         list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
662                 LASSERT_TAGE_INVARIANT(tage);
663
664                 list_del(&tage->linkage);
665                 tage_free(tage);
666         }
667         tcd->tcd_cur_pages = 0;
668
669         trace_put_tcd(tcd, flags);
670 }
671
672 static void trace_cleanup(void)
673 {
674         struct page_collection pc;
675
676         CFS_INIT_LIST_HEAD(&pc.pc_pages);
677         spin_lock_init(&pc.pc_lock);
678
679         trace_cleanup_on_cpu(&pc);
680         smp_call_function(trace_cleanup_on_cpu, &pc, 0, 1);
681 }
682
683 void tracefile_exit(void)
684 {
685         trace_stop_thread();
686         trace_cleanup();
687 }