Whamcloud - gitweb
- ptlrpc_ping_interpret is needless:
[fs/lustre-release.git] / lnet / libcfs / tracefile.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2004 Cluster File Systems, Inc.
5  *   Author: Zach Brown <zab@clusterfs.com>
6  *   Author: Phil Schwan <phil@clusterfs.com>
7  *
8  *   This file is part of Lustre, http://www.lustre.org.
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24
25 #define DEBUG_SUBSYSTEM S_PORTALS
26 #define LUSTRE_TRACEFILE_PRIVATE
27 #include "tracefile.h"
28
29 #include <libcfs/kp30.h>
30 #include <libcfs/libcfs.h>
31
32 /* XXX move things up to the top, comment */
33 union trace_data_union trace_data[NR_CPUS] __cacheline_aligned;
34
35 struct rw_semaphore tracefile_sem;
36 char *tracefile = NULL;
37 long long tracefile_size = TRACEFILE_SIZE;
38 static struct tracefiled_ctl trace_tctl;
39 struct semaphore trace_thread_sem;
40 static int thread_running = 0;
41
42 static void put_pages_on_daemon_list_on_cpu(void *info);
43
44 static inline struct trace_page *tage_from_list(struct list_head *list)
45 {
46         return list_entry(list, struct trace_page, linkage);
47 }
48
49 static struct trace_page *tage_alloc(int gfp)
50 {
51         cfs_page_t        *page;
52         struct trace_page *tage;
53
54         page = cfs_alloc_page(gfp);
55         if (page == NULL)
56                 return NULL;
57         
58         tage = cfs_alloc(sizeof(*tage), gfp);
59         if (tage == NULL) {
60                 cfs_free_page(page);
61                 return NULL;
62         }
63         
64         tage->page = page;
65         return tage;
66 }
67
68 static void tage_free(struct trace_page *tage)
69 {
70         LASSERT(tage != NULL);
71         LASSERT(tage->page != NULL);
72
73         cfs_free_page(tage->page);
74         cfs_free(tage);
75 }
76
77 static void tage_to_tail(struct trace_page *tage, struct list_head *queue)
78 {
79         LASSERT(tage != NULL);
80         LASSERT(queue != NULL);
81
82         list_move_tail(&tage->linkage, queue);
83 }
84
85 static int tage_invariant(struct trace_page *tage)
86 {
87         return (tage != NULL &&
88                 tage->page != NULL &&
89                 tage->used <= CFS_PAGE_SIZE &&
90                 cfs_page_count(tage->page) > 0);
91 }
92
93 /* return a page that has 'len' bytes left at the end */
94 static struct trace_page *trace_get_tage(struct trace_cpu_data *tcd,
95                                          unsigned long len)
96 {
97         struct trace_page *tage;
98
99         if (len > CFS_PAGE_SIZE) {
100                 printk(KERN_ERR "cowardly refusing to write %lu bytes in a "
101                        "page\n", len);
102                 return NULL;
103         }
104
105         if (!list_empty(&tcd->tcd_pages)) {
106                 tage = tage_from_list(tcd->tcd_pages.prev);
107                 if (tage->used + len <= CFS_PAGE_SIZE)
108                         return tage;
109         }
110
111         if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
112                 tage = tage_alloc(CFS_ALLOC_ATOMIC);
113                 if (tage == NULL) {
114                         /* the kernel should print a message for us.  fall back
115                          * to using the last page in the ring buffer. */
116                         goto ring_buffer;
117                 }
118
119                 tage->used = 0;
120                 tage->cpu = smp_processor_id();
121                 list_add_tail(&tage->linkage, &tcd->tcd_pages);
122                 tcd->tcd_cur_pages++;
123
124                 if (tcd->tcd_cur_pages > 8 && thread_running) {
125                         struct tracefiled_ctl *tctl = &trace_tctl;
126                         cfs_waitq_signal(&tctl->tctl_waitq);
127                 }
128                 return tage;
129         }
130
131  ring_buffer:
132         if (thread_running) {
133                 int pgcount = tcd->tcd_cur_pages / 10;
134                 struct page_collection pc;
135                 struct trace_page *tage;
136                 struct trace_page *tmp;
137
138                 printk(KERN_WARNING "debug daemon buffer overflowed; discarding"
139                        " 10%% of pages (%d)\n", pgcount + 1);
140
141                 CFS_INIT_LIST_HEAD(&pc.pc_pages);
142                 spin_lock_init(&pc.pc_lock);
143
144                 list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
145                         if (pgcount-- == 0)
146                                 break;
147
148                         list_move_tail(&tage->linkage, &pc.pc_pages);
149                         tcd->tcd_cur_pages--;
150                 }
151                 put_pages_on_daemon_list_on_cpu(&pc);
152
153                 LASSERT(!list_empty(&tcd->tcd_pages));
154         }
155
156         if (list_empty(&tcd->tcd_pages))
157                 return NULL;
158
159         tage = tage_from_list(tcd->tcd_pages.next);
160         tage->used = 0;
161         tage_to_tail(tage, &tcd->tcd_pages);
162
163         return tage;
164 }
165
166 void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
167                        const int line, unsigned long stack, char *format, ...)
168 {
169         struct trace_cpu_data *tcd;
170         struct ptldebug_header header;
171         struct trace_page *tage;
172         char *debug_buf = format;
173         int known_size, needed = 85 /* average message length */, max_nob;
174         va_list       ap;
175         unsigned long flags;
176
177 #ifdef CRAY_PORTALS
178         if (mask == D_PORTALS && !(portal_debug & D_PORTALS))
179                 return;
180 #endif
181         if (strchr(file, '/'))
182                 file = strrchr(file, '/') + 1;
183
184         if (*(format + strlen(format) - 1) != '\n')
185                 printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
186                        file, line, fn);
187
188         tcd = trace_get_tcd(flags);
189         if (tcd->tcd_shutting_down)
190                 goto out;
191
192         set_ptldebug_header(&header, subsys, mask, line, stack);
193         known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls
194
195  retry:
196         tage = trace_get_tage(tcd, needed + known_size);
197         if (tage == NULL) {
198                 debug_buf = format;
199                 if (needed + known_size > CFS_PAGE_SIZE)
200                         mask |= D_ERROR;
201                 needed = strlen(format);
202                 goto out;
203         }
204
205         debug_buf = cfs_page_address(tage->page) + tage->used + known_size;
206
207         max_nob = CFS_PAGE_SIZE - tage->used - known_size;
208         LASSERT(max_nob > 0);
209         va_start(ap, format);
210         needed = vsnprintf(debug_buf, max_nob, format, ap);
211         va_end(ap);
212
213         if (needed > max_nob) /* overflow.  oh poop. */
214                 goto retry;
215
216         header.ph_len = known_size + needed;
217         debug_buf = cfs_page_address(tage->page) + tage->used;
218
219         memcpy(debug_buf, &header, sizeof(header));
220         tage->used += sizeof(header);
221         debug_buf += sizeof(header);
222
223         strcpy(debug_buf, file);
224         tage->used += strlen(file) + 1;
225         debug_buf += strlen(file) + 1;
226
227         strcpy(debug_buf, fn);
228         tage->used += strlen(fn) + 1;
229         debug_buf += strlen(fn) + 1;
230
231         tage->used += needed;
232         if (tage->used > CFS_PAGE_SIZE)
233                 printk(KERN_EMERG
234                        "tage->used == %u in portals_debug_msg\n", tage->used);
235
236  out:
237         if ((mask & (D_EMERG | D_ERROR | D_WARNING | D_CONSOLE)) || portal_printk)
238                 print_to_console(&header, mask, debug_buf, needed, file, fn);
239
240         trace_put_tcd(tcd, flags);
241 }
242 EXPORT_SYMBOL(portals_debug_msg);
243
244 static void collect_pages_on_cpu(void *info)
245 {
246         struct trace_cpu_data *tcd;
247         unsigned long flags;
248         struct page_collection *pc = info;
249
250         tcd = trace_get_tcd(flags);
251
252         spin_lock(&pc->pc_lock);
253         list_splice(&tcd->tcd_pages, &pc->pc_pages);
254         CFS_INIT_LIST_HEAD(&tcd->tcd_pages);
255         tcd->tcd_cur_pages = 0;
256         if (pc->pc_want_daemon_pages) {
257                 list_splice(&tcd->tcd_daemon_pages, &pc->pc_pages);
258                 CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
259                 tcd->tcd_cur_daemon_pages = 0;
260         }
261         spin_unlock(&pc->pc_lock);
262
263         trace_put_tcd(tcd, flags);
264 }
265
266 static void collect_pages(struct page_collection *pc)
267 {
268         /* needs to be fixed up for preempt */
269         CFS_INIT_LIST_HEAD(&pc->pc_pages);
270         collect_pages_on_cpu(pc);
271         smp_call_function(collect_pages_on_cpu, pc, 0, 1);
272 }
273
274 static void put_pages_back_on_cpu(void *info)
275 {
276         struct page_collection *pc = info;
277         struct trace_cpu_data *tcd;
278         struct list_head *cur_head;
279         unsigned long flags;
280         struct trace_page *tage;
281         struct trace_page *tmp;
282
283         tcd = trace_get_tcd(flags);
284
285         cur_head = tcd->tcd_pages.next;
286
287         spin_lock(&pc->pc_lock);
288         list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
289
290                 LASSERT(tage_invariant(tage));
291
292                 if (tage->cpu != smp_processor_id())
293                         continue;
294
295                 tage_to_tail(tage, cur_head);
296                 tcd->tcd_cur_pages++;
297         }
298         spin_unlock(&pc->pc_lock);
299
300         trace_put_tcd(tcd, flags);
301 }
302
303 static void put_pages_back(struct page_collection *pc)
304 {
305         /* needs to be fixed up for preempt */
306         put_pages_back_on_cpu(pc);
307         smp_call_function(put_pages_back_on_cpu, pc, 0, 1);
308 }
309
310 /* Add pages to a per-cpu debug daemon ringbuffer.  This buffer makes sure that
311  * we have a good amount of data at all times for dumping during an LBUG, even
312  * if we have been steadily writing (and otherwise discarding) pages via the
313  * debug daemon. */
314 static void put_pages_on_daemon_list_on_cpu(void *info)
315 {
316         struct page_collection *pc = info;
317         struct trace_cpu_data *tcd;
318         struct trace_page *tage;
319         struct trace_page *tmp;
320         unsigned long flags;
321
322         tcd = trace_get_tcd(flags);
323
324         spin_lock(&pc->pc_lock);
325         list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
326
327                 LASSERT(tage_invariant(tage));
328
329                 if (tage->cpu != smp_processor_id())
330                         continue;
331
332                 tage_to_tail(tage, &tcd->tcd_daemon_pages);
333                 tcd->tcd_cur_daemon_pages++;
334
335                 if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
336                         struct trace_page *victim;
337
338                         LASSERT(!list_empty(&tcd->tcd_daemon_pages));
339                         victim = tage_from_list(tcd->tcd_daemon_pages.next);
340
341                         LASSERT(tage_invariant(victim));
342
343                         list_del(&victim->linkage);
344                         tage_free(victim);
345                         tcd->tcd_cur_daemon_pages--;
346                 }
347         }
348         spin_unlock(&pc->pc_lock);
349
350         trace_put_tcd(tcd, flags);
351 }
352
353 static void put_pages_on_daemon_list(struct page_collection *pc)
354 {
355         put_pages_on_daemon_list_on_cpu(pc);
356         smp_call_function(put_pages_on_daemon_list_on_cpu, pc, 0, 1);
357 }
358
359 void trace_debug_print(void)
360 {
361         struct page_collection pc;
362         struct trace_page *tage;
363         struct trace_page *tmp;
364
365         spin_lock_init(&pc.pc_lock);
366
367         pc.pc_want_daemon_pages = 1;
368         collect_pages(&pc);
369         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
370                 char *p, *file, *fn;
371                 cfs_page_t *page;
372
373                 LASSERT(tage_invariant(tage));
374
375                 page = tage->page;
376                 p = cfs_page_address(page);
377                 while (p < ((char *)cfs_page_address(page) + CFS_PAGE_SIZE)) {
378                         struct ptldebug_header *hdr;
379                         int len;
380                         hdr = (void *)p;
381                         p += sizeof(*hdr);
382                         file = p;
383                         p += strlen(file) + 1;
384                         fn = p;
385                         p += strlen(fn) + 1;
386                         len = hdr->ph_len - (p - (char *)hdr);
387
388                         print_to_console(hdr, D_EMERG, p, len, file, fn);
389                 }
390
391                 list_del(&tage->linkage);
392                 tage_free(tage);
393         }
394 }
395
396 int tracefile_dump_all_pages(char *filename)
397 {
398         struct page_collection pc;
399         cfs_file_t *filp;
400         struct trace_page *tage;
401         struct trace_page *tmp;
402         CFS_DECL_MMSPACE;
403         int rc;
404
405         down_write(&tracefile_sem);
406
407         filp = cfs_filp_open(filename,
408                              O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600, &rc);
409         if (!filp) {
410                 printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
411                        filename, rc);
412                 goto out;
413         }
414
415         spin_lock_init(&pc.pc_lock);
416         pc.pc_want_daemon_pages = 1;
417         collect_pages(&pc);
418         if (list_empty(&pc.pc_pages)) {
419                 rc = 0;
420                 goto close;
421         }
422
423         /* ok, for now, just write the pages.  in the future we'll be building
424          * iobufs with the pages and calling generic_direct_IO */
425         CFS_MMSPACE_OPEN;
426         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
427
428                 LASSERT(tage_invariant(tage));
429
430                 rc = cfs_filp_write(filp, cfs_page_address(tage->page),
431                                     tage->used, cfs_filp_poff(filp));
432                 if (rc != tage->used) {
433                         printk(KERN_WARNING "wanted to write %u but wrote "
434                                "%d\n", tage->used, rc);
435                         put_pages_back(&pc);
436                         break;
437                 }
438                 list_del(&tage->linkage);
439                 tage_free(tage);
440         }
441         CFS_MMSPACE_CLOSE;
442         rc = cfs_filp_fsync(filp);
443         if (rc)
444                 printk(KERN_ERR "sync returns %d\n", rc);
445  close:
446         cfs_filp_close(filp);
447  out:
448         up_write(&tracefile_sem);
449         return rc;
450 }
451
452 void trace_flush_pages(void)
453 {
454         struct page_collection pc;
455         struct trace_page *tage;
456         struct trace_page *tmp;
457
458         spin_lock_init(&pc.pc_lock);
459
460         pc.pc_want_daemon_pages = 1;
461         collect_pages(&pc);
462         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
463
464                 LASSERT(tage_invariant(tage));
465
466                 list_del(&tage->linkage);
467                 tage_free(tage);
468         }
469 }
470
471 int trace_dk(struct file *file, const char *buffer, unsigned long count,
472              void *data)
473 {
474         char *name;
475         unsigned long off;
476         int rc;
477
478         name = cfs_alloc(count + 1, CFS_ALLOC_STD);
479         if (name == NULL)
480                 return -ENOMEM;
481
482         if (copy_from_user(name, buffer, count)) {
483                 rc = -EFAULT;
484                 goto out;
485         }
486
487         if (name[0] != '/') {
488                 rc = -EINVAL;
489                 goto out;
490         }
491
492         /* be nice and strip out trailing '\n' */
493         for (off = count ; off > 2 && isspace(name[off - 1]); off--)
494                 ;
495
496         name[off] = '\0';
497         rc = tracefile_dump_all_pages(name);
498 out:
499         if (name)
500                 cfs_free(name);
501         return count;
502 }
503 EXPORT_SYMBOL(trace_dk);
504
505 static int tracefiled(void *arg)
506 {
507         struct page_collection pc;
508         struct tracefiled_ctl *tctl = arg;
509         struct trace_page *tage;
510         struct trace_page *tmp;
511         struct ptldebug_header *hdr;
512         cfs_file_t *filp;
513         CFS_DECL_MMSPACE;
514         int rc;
515
516         /* we're started late enough that we pick up init's fs context */
517         /* this is so broken in uml?  what on earth is going on? */
518         kportal_daemonize("ktracefiled");
519         reparent_to_init();
520
521         spin_lock_init(&pc.pc_lock);
522         complete(&tctl->tctl_start);
523
524         while (1) {
525                 cfs_waitlink_t __wait;
526
527                 cfs_waitlink_init(&__wait);
528                 cfs_waitq_add(&tctl->tctl_waitq, &__wait);
529                 set_current_state(TASK_INTERRUPTIBLE);
530                 cfs_waitq_timedwait(&__wait, cfs_time_seconds(1));
531                 cfs_waitq_del(&tctl->tctl_waitq, &__wait);
532
533                 if (atomic_read(&tctl->tctl_shutdown))
534                         break;
535
536                 pc.pc_want_daemon_pages = 0;
537                 collect_pages(&pc);
538                 if (list_empty(&pc.pc_pages))
539                         continue;
540
541                 filp = NULL;
542                 down_read(&tracefile_sem);
543                 if (tracefile != NULL) {
544                         filp = cfs_filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE,
545                                         0600, &rc);
546                         if (!(filp))
547                                 printk("couldn't open %s: %d\n", tracefile, rc);
548                 }
549                 up_read(&tracefile_sem);
550                 if (filp == NULL) {
551                         put_pages_on_daemon_list(&pc);
552                         continue;
553                 }
554
555                 CFS_MMSPACE_OPEN;
556
557                 /* mark the first header, so we can sort in chunks */
558                 tage = tage_from_list(pc.pc_pages.next);
559                 LASSERT(tage_invariant(tage));
560
561                 hdr = cfs_page_address(tage->page);
562                 hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
563
564                 list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
565                         static loff_t f_pos;
566
567                         LASSERT(tage_invariant(tage));
568
569                         if (f_pos >= tracefile_size)
570                                 f_pos = 0;
571                         else if (f_pos > cfs_filp_size(filp))
572                                 f_pos = cfs_filp_size(filp);
573
574                         rc = cfs_filp_write(filp, cfs_page_address(tage->page),
575                                             tage->used, &f_pos);
576                         if (rc != tage->used) {
577                                 printk(KERN_WARNING "wanted to write %u but "
578                                        "wrote %d\n", tage->used, rc);
579                                 put_pages_back(&pc);
580                         }
581                 }
582                 CFS_MMSPACE_CLOSE;
583
584                 cfs_filp_close(filp);
585                 put_pages_on_daemon_list(&pc);
586         }
587         complete(&tctl->tctl_stop);
588         return 0;
589 }
590
591 int trace_start_thread(void)
592 {
593         struct tracefiled_ctl *tctl = &trace_tctl;
594         int rc = 0;
595
596         mutex_down(&trace_thread_sem);
597         if (thread_running)
598                 goto out;
599
600         init_completion(&tctl->tctl_start);
601         init_completion(&tctl->tctl_stop);
602         cfs_waitq_init(&tctl->tctl_waitq);
603         atomic_set(&tctl->tctl_shutdown, 0);
604
605         if (cfs_kernel_thread(tracefiled, tctl, 0) < 0) {
606                 rc = -ECHILD;
607                 goto out;
608         }
609
610         wait_for_completion(&tctl->tctl_start);
611         thread_running = 1;
612 out:
613         mutex_up(&trace_thread_sem);
614         return rc;
615 }
616
617 void trace_stop_thread(void)
618 {
619         struct tracefiled_ctl *tctl = &trace_tctl;
620
621         mutex_down(&trace_thread_sem);
622         if (thread_running) {
623                 printk(KERN_INFO "Shutting down debug daemon thread...\n");
624                 atomic_set(&tctl->tctl_shutdown, 1);
625                 wait_for_completion(&tctl->tctl_stop);
626                 thread_running = 0;
627         }
628         mutex_up(&trace_thread_sem);
629 }
630
631 int tracefile_init(void)
632 {
633         struct trace_cpu_data *tcd;
634         int i;
635
636         for (i = 0; i < NR_CPUS; i++) {
637                 tcd = &trace_data[i].tcd;
638                 CFS_INIT_LIST_HEAD(&tcd->tcd_pages);
639                 CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
640                 tcd->tcd_cur_pages = 0;
641                 tcd->tcd_cur_daemon_pages = 0;
642                 tcd->tcd_max_pages = TCD_MAX_PAGES;
643                 tcd->tcd_shutting_down = 0;
644         }
645         return 0;
646 }
647
648 static void trace_cleanup_on_cpu(void *info)
649 {
650         struct trace_cpu_data *tcd;
651         struct trace_page *tage;
652         struct trace_page *tmp;
653         unsigned long flags;
654
655         tcd = trace_get_tcd(flags);
656
657         tcd->tcd_shutting_down = 1;
658
659         list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
660                 LASSERT(tage_invariant(tage));
661
662                 list_del(&tage->linkage);
663                 tage_free(tage);
664         }
665         tcd->tcd_cur_pages = 0;
666
667         trace_put_tcd(tcd, flags);
668 }
669
670 static void trace_cleanup(void)
671 {
672         struct page_collection pc;
673
674         CFS_INIT_LIST_HEAD(&pc.pc_pages);
675         spin_lock_init(&pc.pc_lock);
676
677         trace_cleanup_on_cpu(&pc);
678         smp_call_function(trace_cleanup_on_cpu, &pc, 0, 1);
679 }
680
681 void tracefile_exit(void)
682 {
683         trace_stop_thread();
684         trace_cleanup();
685 }