Whamcloud - gitweb
LU-1347 build: remove the vim/emacs modelines
[fs/lustre-release.git] / libcfs / libcfs / tracefile.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  */
30 /*
31  * This file is part of Lustre, http://www.lustre.org/
32  * Lustre is a trademark of Sun Microsystems, Inc.
33  *
34  * libcfs/libcfs/tracefile.c
35  *
36  * Author: Zach Brown <zab@clusterfs.com>
37  * Author: Phil Schwan <phil@clusterfs.com>
38  */
39
40
41 #define DEBUG_SUBSYSTEM S_LNET
42 #define LUSTRE_TRACEFILE_PRIVATE
43 #include "tracefile.h"
44
45 #include <libcfs/libcfs.h>
46
47 /* XXX move things up to the top, comment */
48 union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[CFS_NR_CPUS] __cacheline_aligned;
49
50 char cfs_tracefile[TRACEFILE_NAME_SIZE];
51 long long cfs_tracefile_size = CFS_TRACEFILE_SIZE;
52 static struct tracefiled_ctl trace_tctl;
53 cfs_mutex_t cfs_trace_thread_mutex;
54 static int thread_running = 0;
55
56 cfs_atomic_t cfs_tage_allocated = CFS_ATOMIC_INIT(0);
57
58 static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
59                                          struct cfs_trace_cpu_data *tcd);
60
61 static inline struct cfs_trace_page *
62 cfs_tage_from_list(cfs_list_t *list)
63 {
64         return cfs_list_entry(list, struct cfs_trace_page, linkage);
65 }
66
67 static struct cfs_trace_page *cfs_tage_alloc(int gfp)
68 {
69         cfs_page_t            *page;
70         struct cfs_trace_page *tage;
71
72         /* My caller is trying to free memory */
73         if (!cfs_in_interrupt() && cfs_memory_pressure_get())
74                 return NULL;
75
76         /*
77          * Don't spam console with allocation failures: they will be reported
78          * by upper layer anyway.
79          */
80         gfp |= CFS_ALLOC_NOWARN;
81         page = cfs_alloc_page(gfp);
82         if (page == NULL)
83                 return NULL;
84
85         tage = cfs_alloc(sizeof(*tage), gfp);
86         if (tage == NULL) {
87                 cfs_free_page(page);
88                 return NULL;
89         }
90
91         tage->page = page;
92         cfs_atomic_inc(&cfs_tage_allocated);
93         return tage;
94 }
95
96 static void cfs_tage_free(struct cfs_trace_page *tage)
97 {
98         __LASSERT(tage != NULL);
99         __LASSERT(tage->page != NULL);
100
101         cfs_free_page(tage->page);
102         cfs_free(tage);
103         cfs_atomic_dec(&cfs_tage_allocated);
104 }
105
106 static void cfs_tage_to_tail(struct cfs_trace_page *tage,
107                              cfs_list_t *queue)
108 {
109         __LASSERT(tage != NULL);
110         __LASSERT(queue != NULL);
111
112         cfs_list_move_tail(&tage->linkage, queue);
113 }
114
115 int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, int gfp,
116                            cfs_list_t *stock)
117 {
118         int i;
119
120         /*
121          * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
122          * from here: this will lead to infinite recursion.
123          */
124
125         for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++ i) {
126                 struct cfs_trace_page *tage;
127
128                 tage = cfs_tage_alloc(gfp);
129                 if (tage == NULL)
130                         break;
131                 cfs_list_add_tail(&tage->linkage, stock);
132         }
133         return i;
134 }
135
136 /* return a page that has 'len' bytes left at the end */
137 static struct cfs_trace_page *
138 cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len)
139 {
140         struct cfs_trace_page *tage;
141
142         if (tcd->tcd_cur_pages > 0) {
143                 __LASSERT(!cfs_list_empty(&tcd->tcd_pages));
144                 tage = cfs_tage_from_list(tcd->tcd_pages.prev);
145                 if (tage->used + len <= CFS_PAGE_SIZE)
146                         return tage;
147         }
148
149         if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
150                 if (tcd->tcd_cur_stock_pages > 0) {
151                         tage = cfs_tage_from_list(tcd->tcd_stock_pages.prev);
152                         -- tcd->tcd_cur_stock_pages;
153                         cfs_list_del_init(&tage->linkage);
154                 } else {
155                         tage = cfs_tage_alloc(CFS_ALLOC_ATOMIC);
156                         if (tage == NULL) {
157                                 if (printk_ratelimit())
158                                         printk(CFS_KERN_WARNING
159                                                "cannot allocate a tage (%ld)\n",
160                                        tcd->tcd_cur_pages);
161                                 return NULL;
162                         }
163                 }
164
165                 tage->used = 0;
166                 tage->cpu = cfs_smp_processor_id();
167                 tage->type = tcd->tcd_type;
168                 cfs_list_add_tail(&tage->linkage, &tcd->tcd_pages);
169                 tcd->tcd_cur_pages++;
170
171                 if (tcd->tcd_cur_pages > 8 && thread_running) {
172                         struct tracefiled_ctl *tctl = &trace_tctl;
173                         /*
174                          * wake up tracefiled to process some pages.
175                          */
176                         cfs_waitq_signal(&tctl->tctl_waitq);
177                 }
178                 return tage;
179         }
180         return NULL;
181 }
182
183 static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd)
184 {
185         int pgcount = tcd->tcd_cur_pages / 10;
186         struct page_collection pc;
187         struct cfs_trace_page *tage;
188         struct cfs_trace_page *tmp;
189
190         /*
191          * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
192          * from here: this will lead to infinite recursion.
193          */
194
195         if (printk_ratelimit())
196                 printk(CFS_KERN_WARNING "debug daemon buffer overflowed; "
197                        "discarding 10%% of pages (%d of %ld)\n",
198                        pgcount + 1, tcd->tcd_cur_pages);
199
200         CFS_INIT_LIST_HEAD(&pc.pc_pages);
201         cfs_spin_lock_init(&pc.pc_lock);
202
203         cfs_list_for_each_entry_safe_typed(tage, tmp, &tcd->tcd_pages,
204                                            struct cfs_trace_page, linkage) {
205                 if (pgcount-- == 0)
206                         break;
207
208                 cfs_list_move_tail(&tage->linkage, &pc.pc_pages);
209                 tcd->tcd_cur_pages--;
210         }
211         put_pages_on_tcd_daemon_list(&pc, tcd);
212 }
213
214 /* return a page that has 'len' bytes left at the end */
215 static struct cfs_trace_page *cfs_trace_get_tage(struct cfs_trace_cpu_data *tcd,
216                                                  unsigned long len)
217 {
218         struct cfs_trace_page *tage;
219
220         /*
221          * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
222          * from here: this will lead to infinite recursion.
223          */
224
225         if (len > CFS_PAGE_SIZE) {
226                 printk(CFS_KERN_ERR
227                        "cowardly refusing to write %lu bytes in a page\n", len);
228                 return NULL;
229         }
230
231         tage = cfs_trace_get_tage_try(tcd, len);
232         if (tage != NULL)
233                 return tage;
234         if (thread_running)
235                 cfs_tcd_shrink(tcd);
236         if (tcd->tcd_cur_pages > 0) {
237                 tage = cfs_tage_from_list(tcd->tcd_pages.next);
238                 tage->used = 0;
239                 cfs_tage_to_tail(tage, &tcd->tcd_pages);
240         }
241         return tage;
242 }
243
244 int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
245                      const char *format, ...)
246 {
247         va_list args;
248         int     rc;
249
250         va_start(args, format);
251         rc = libcfs_debug_vmsg2(msgdata, format, args, NULL);
252         va_end(args);
253
254         return rc;
255 }
256 EXPORT_SYMBOL(libcfs_debug_msg);
257
258 int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
259                        const char *format1, va_list args,
260                        const char *format2, ...)
261 {
262         struct cfs_trace_cpu_data *tcd = NULL;
263         struct ptldebug_header     header = {0};
264         struct cfs_trace_page     *tage;
265         /* string_buf is used only if tcd != NULL, and is always set then */
266         char                      *string_buf = NULL;
267         char                      *debug_buf;
268         int                        known_size;
269         int                        needed = 85; /* average message length */
270         int                        max_nob;
271         va_list                    ap;
272         int                        depth;
273         int                        i;
274         int                        remain;
275         int                        mask = msgdata->msg_mask;
276         char                      *file = (char *)msgdata->msg_file;
277         cfs_debug_limit_state_t   *cdls = msgdata->msg_cdls;
278
279         if (strchr(file, '/'))
280                 file = strrchr(file, '/') + 1;
281
282         tcd = cfs_trace_get_tcd();
283
284         /* cfs_trace_get_tcd() grabs a lock, which disables preemption and
285          * pins us to a particular CPU.  This avoids an smp_processor_id()
286          * warning on Linux when debugging is enabled. */
287         cfs_set_ptldebug_header(&header, msgdata, CDEBUG_STACK());
288
289         if (tcd == NULL)                /* arch may not log in IRQ context */
290                 goto console;
291
292         if (tcd->tcd_cur_pages == 0)
293                 header.ph_flags |= PH_FLAG_FIRST_RECORD;
294
295         if (tcd->tcd_shutting_down) {
296                 cfs_trace_put_tcd(tcd);
297                 tcd = NULL;
298                 goto console;
299         }
300
301         depth = __current_nesting_level();
302         known_size = strlen(file) + 1 + depth;
303         if (msgdata->msg_fn)
304                 known_size += strlen(msgdata->msg_fn) + 1;
305
306         if (libcfs_debug_binary)
307                 known_size += sizeof(header);
308
309         /*/
310          * '2' used because vsnprintf return real size required for output
311          * _without_ terminating NULL.
312          * if needed is to small for this format.
313          */
314         for (i = 0; i < 2; i++) {
315                 tage = cfs_trace_get_tage(tcd, needed + known_size + 1);
316                 if (tage == NULL) {
317                         if (needed + known_size > CFS_PAGE_SIZE)
318                                 mask |= D_ERROR;
319
320                         cfs_trace_put_tcd(tcd);
321                         tcd = NULL;
322                         goto console;
323                 }
324
325                 string_buf = (char *)cfs_page_address(tage->page) +
326                                         tage->used + known_size;
327
328                 max_nob = CFS_PAGE_SIZE - tage->used - known_size;
329                 if (max_nob <= 0) {
330                         printk(CFS_KERN_EMERG "negative max_nob: %d\n",
331                                max_nob);
332                         mask |= D_ERROR;
333                         cfs_trace_put_tcd(tcd);
334                         tcd = NULL;
335                         goto console;
336                 }
337
338                 needed = 0;
339                 if (format1) {
340                         va_copy(ap, args);
341                         needed = vsnprintf(string_buf, max_nob, format1, ap);
342                         va_end(ap);
343                 }
344
345                 if (format2) {
346                         remain = max_nob - needed;
347                         if (remain < 0)
348                                 remain = 0;
349
350                         va_start(ap, format2);
351                         needed += vsnprintf(string_buf + needed, remain,
352                                             format2, ap);
353                         va_end(ap);
354                 }
355
356                 if (needed < max_nob) /* well. printing ok.. */
357                         break;
358         }
359
360         if (*(string_buf+needed-1) != '\n')
361                 printk(CFS_KERN_INFO "format at %s:%d:%s doesn't end in "
362                        "newline\n", file, msgdata->msg_line, msgdata->msg_fn);
363
364         header.ph_len = known_size + needed;
365         debug_buf = (char *)cfs_page_address(tage->page) + tage->used;
366
367         if (libcfs_debug_binary) {
368                 memcpy(debug_buf, &header, sizeof(header));
369                 tage->used += sizeof(header);
370                 debug_buf += sizeof(header);
371         }
372
373         /* indent message according to the nesting level */
374         while (depth-- > 0) {
375                 *(debug_buf++) = '.';
376                 ++ tage->used;
377         }
378
379         strcpy(debug_buf, file);
380         tage->used += strlen(file) + 1;
381         debug_buf += strlen(file) + 1;
382
383         if (msgdata->msg_fn) {
384                 strcpy(debug_buf, msgdata->msg_fn);
385                 tage->used += strlen(msgdata->msg_fn) + 1;
386                 debug_buf += strlen(msgdata->msg_fn) + 1;
387         }
388
389         __LASSERT(debug_buf == string_buf);
390
391         tage->used += needed;
392         __LASSERT (tage->used <= CFS_PAGE_SIZE);
393
394 console:
395         if ((mask & libcfs_printk) == 0) {
396                 /* no console output requested */
397                 if (tcd != NULL)
398                         cfs_trace_put_tcd(tcd);
399                 return 1;
400         }
401
402         if (cdls != NULL) {
403                 if (libcfs_console_ratelimit &&
404                     cdls->cdls_next != 0 &&     /* not first time ever */
405                     !cfs_time_after(cfs_time_current(), cdls->cdls_next)) {
406                         /* skipping a console message */
407                         cdls->cdls_count++;
408                         if (tcd != NULL)
409                                 cfs_trace_put_tcd(tcd);
410                         return 1;
411                 }
412
413                 if (cfs_time_after(cfs_time_current(), cdls->cdls_next +
414                                                        libcfs_console_max_delay
415                                                        + cfs_time_seconds(10))) {
416                         /* last timeout was a long time ago */
417                         cdls->cdls_delay /= libcfs_console_backoff * 4;
418                 } else {
419                         cdls->cdls_delay *= libcfs_console_backoff;
420
421                         if (cdls->cdls_delay < libcfs_console_min_delay)
422                                 cdls->cdls_delay = libcfs_console_min_delay;
423                         else if (cdls->cdls_delay > libcfs_console_max_delay)
424                                 cdls->cdls_delay = libcfs_console_max_delay;
425                 }
426
427                 /* ensure cdls_next is never zero after it's been seen */
428                 cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1;
429         }
430
431         if (tcd != NULL) {
432                 cfs_print_to_console(&header, mask, string_buf, needed, file,
433                                      msgdata->msg_fn);
434                 cfs_trace_put_tcd(tcd);
435         } else {
436                 string_buf = cfs_trace_get_console_buffer();
437
438                 needed = 0;
439                 if (format1 != NULL) {
440                         va_copy(ap, args);
441                         needed = vsnprintf(string_buf,
442                                            CFS_TRACE_CONSOLE_BUFFER_SIZE,
443                                            format1, ap);
444                         va_end(ap);
445                 }
446                 if (format2 != NULL) {
447                         remain = CFS_TRACE_CONSOLE_BUFFER_SIZE - needed;
448                         if (remain > 0) {
449                                 va_start(ap, format2);
450                                 needed += vsnprintf(string_buf+needed, remain,
451                                                     format2, ap);
452                                 va_end(ap);
453                         }
454                 }
455                 cfs_print_to_console(&header, mask,
456                                      string_buf, needed, file, msgdata->msg_fn);
457
458                 cfs_trace_put_console_buffer(string_buf);
459         }
460
461         if (cdls != NULL && cdls->cdls_count != 0) {
462                 string_buf = cfs_trace_get_console_buffer();
463
464                 needed = snprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE,
465                                   "Skipped %d previous similar message%s\n",
466                                   cdls->cdls_count,
467                                   (cdls->cdls_count > 1) ? "s" : "");
468
469                 cfs_print_to_console(&header, mask,
470                                      string_buf, needed, file, msgdata->msg_fn);
471
472                 cfs_trace_put_console_buffer(string_buf);
473                 cdls->cdls_count = 0;
474         }
475
476         return 0;
477 }
478 EXPORT_SYMBOL(libcfs_debug_vmsg2);
479
480 void
481 libcfs_assertion_failed(const char *expr, struct libcfs_debug_msg_data *msgdata)
482 {
483         libcfs_debug_msg(msgdata, "ASSERTION(%s) failed\n", expr);
484         /* cfs_enter_debugger(); */
485         lbug_with_loc(msgdata);
486 }
487 EXPORT_SYMBOL(libcfs_assertion_failed);
488
489 void
490 cfs_trace_assertion_failed(const char *str,
491                            struct libcfs_debug_msg_data *msgdata)
492 {
493         struct ptldebug_header hdr;
494
495         libcfs_panic_in_progress = 1;
496         libcfs_catastrophe = 1;
497         cfs_mb();
498
499         cfs_set_ptldebug_header(&hdr, msgdata, CDEBUG_STACK());
500
501         cfs_print_to_console(&hdr, D_EMERG, str, strlen(str),
502                              msgdata->msg_file, msgdata->msg_fn);
503
504         LIBCFS_PANIC("Lustre debug assertion failure\n");
505
506         /* not reached */
507 }
508
509 static void
510 panic_collect_pages(struct page_collection *pc)
511 {
512         /* Do the collect_pages job on a single CPU: assumes that all other
513          * CPUs have been stopped during a panic.  If this isn't true for some
514          * arch, this will have to be implemented separately in each arch.  */
515         int                        i;
516         int                        j;
517         struct cfs_trace_cpu_data *tcd;
518
519         CFS_INIT_LIST_HEAD(&pc->pc_pages);
520
521         cfs_tcd_for_each(tcd, i, j) {
522                 cfs_list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
523                 tcd->tcd_cur_pages = 0;
524
525                 if (pc->pc_want_daemon_pages) {
526                         cfs_list_splice_init(&tcd->tcd_daemon_pages,
527                                              &pc->pc_pages);
528                         tcd->tcd_cur_daemon_pages = 0;
529                 }
530         }
531 }
532
533 static void collect_pages_on_all_cpus(struct page_collection *pc)
534 {
535         struct cfs_trace_cpu_data *tcd;
536         int i, cpu;
537
538         cfs_spin_lock(&pc->pc_lock);
539         cfs_for_each_possible_cpu(cpu) {
540                 cfs_tcd_for_each_type_lock(tcd, i, cpu) {
541                         cfs_list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
542                         tcd->tcd_cur_pages = 0;
543                         if (pc->pc_want_daemon_pages) {
544                                 cfs_list_splice_init(&tcd->tcd_daemon_pages,
545                                                      &pc->pc_pages);
546                                 tcd->tcd_cur_daemon_pages = 0;
547                         }
548                 }
549         }
550         cfs_spin_unlock(&pc->pc_lock);
551 }
552
553 static void collect_pages(struct page_collection *pc)
554 {
555         CFS_INIT_LIST_HEAD(&pc->pc_pages);
556
557         if (libcfs_panic_in_progress)
558                 panic_collect_pages(pc);
559         else
560                 collect_pages_on_all_cpus(pc);
561 }
562
563 static void put_pages_back_on_all_cpus(struct page_collection *pc)
564 {
565         struct cfs_trace_cpu_data *tcd;
566         cfs_list_t *cur_head;
567         struct cfs_trace_page *tage;
568         struct cfs_trace_page *tmp;
569         int i, cpu;
570
571         cfs_spin_lock(&pc->pc_lock);
572         cfs_for_each_possible_cpu(cpu) {
573                 cfs_tcd_for_each_type_lock(tcd, i, cpu) {
574                         cur_head = tcd->tcd_pages.next;
575
576                         cfs_list_for_each_entry_safe_typed(tage, tmp,
577                                                            &pc->pc_pages,
578                                                            struct cfs_trace_page,
579                                                            linkage) {
580
581                                 __LASSERT_TAGE_INVARIANT(tage);
582
583                                 if (tage->cpu != cpu || tage->type != i)
584                                         continue;
585
586                                 cfs_tage_to_tail(tage, cur_head);
587                                 tcd->tcd_cur_pages++;
588                         }
589                 }
590         }
591         cfs_spin_unlock(&pc->pc_lock);
592 }
593
594 static void put_pages_back(struct page_collection *pc)
595 {
596         if (!libcfs_panic_in_progress)
597                 put_pages_back_on_all_cpus(pc);
598 }
599
600 /* Add pages to a per-cpu debug daemon ringbuffer.  This buffer makes sure that
601  * we have a good amount of data at all times for dumping during an LBUG, even
602  * if we have been steadily writing (and otherwise discarding) pages via the
603  * debug daemon. */
604 static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
605                                          struct cfs_trace_cpu_data *tcd)
606 {
607         struct cfs_trace_page *tage;
608         struct cfs_trace_page *tmp;
609
610         cfs_spin_lock(&pc->pc_lock);
611         cfs_list_for_each_entry_safe_typed(tage, tmp, &pc->pc_pages,
612                                            struct cfs_trace_page, linkage) {
613
614                 __LASSERT_TAGE_INVARIANT(tage);
615
616                 if (tage->cpu != tcd->tcd_cpu || tage->type != tcd->tcd_type)
617                         continue;
618
619                 cfs_tage_to_tail(tage, &tcd->tcd_daemon_pages);
620                 tcd->tcd_cur_daemon_pages++;
621
622                 if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
623                         struct cfs_trace_page *victim;
624
625                         __LASSERT(!cfs_list_empty(&tcd->tcd_daemon_pages));
626                         victim = cfs_tage_from_list(tcd->tcd_daemon_pages.next);
627
628                         __LASSERT_TAGE_INVARIANT(victim);
629
630                         cfs_list_del(&victim->linkage);
631                         cfs_tage_free(victim);
632                         tcd->tcd_cur_daemon_pages--;
633                 }
634         }
635         cfs_spin_unlock(&pc->pc_lock);
636 }
637
638 static void put_pages_on_daemon_list(struct page_collection *pc)
639 {
640         struct cfs_trace_cpu_data *tcd;
641         int i, cpu;
642
643         cfs_for_each_possible_cpu(cpu) {
644                 cfs_tcd_for_each_type_lock(tcd, i, cpu)
645                         put_pages_on_tcd_daemon_list(pc, tcd);
646         }
647 }
648
649 void cfs_trace_debug_print(void)
650 {
651         struct page_collection pc;
652         struct cfs_trace_page *tage;
653         struct cfs_trace_page *tmp;
654
655         cfs_spin_lock_init(&pc.pc_lock);
656
657         pc.pc_want_daemon_pages = 1;
658         collect_pages(&pc);
659         cfs_list_for_each_entry_safe_typed(tage, tmp, &pc.pc_pages,
660                                            struct cfs_trace_page, linkage) {
661                 char *p, *file, *fn;
662                 cfs_page_t *page;
663
664                 __LASSERT_TAGE_INVARIANT(tage);
665
666                 page = tage->page;
667                 p = cfs_page_address(page);
668                 while (p < ((char *)cfs_page_address(page) + tage->used)) {
669                         struct ptldebug_header *hdr;
670                         int len;
671                         hdr = (void *)p;
672                         p += sizeof(*hdr);
673                         file = p;
674                         p += strlen(file) + 1;
675                         fn = p;
676                         p += strlen(fn) + 1;
677                         len = hdr->ph_len - (int)(p - (char *)hdr);
678
679                         cfs_print_to_console(hdr, D_EMERG, p, len, file, fn);
680
681                         p += len;
682                 }
683
684                 cfs_list_del(&tage->linkage);
685                 cfs_tage_free(tage);
686         }
687 }
688
689 int cfs_tracefile_dump_all_pages(char *filename)
690 {
691         struct page_collection pc;
692         cfs_file_t *filp;
693         struct cfs_trace_page *tage;
694         struct cfs_trace_page *tmp;
695         int rc;
696
697         CFS_DECL_MMSPACE;
698
699         cfs_tracefile_write_lock();
700
701         filp = cfs_filp_open(filename,
702                              O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600, &rc);
703         if (!filp) {
704                 if (rc != -EEXIST)
705                         printk(CFS_KERN_ERR
706                                "LustreError: can't open %s for dump: rc %d\n",
707                                filename, rc);
708                 goto out;
709         }
710
711         cfs_spin_lock_init(&pc.pc_lock);
712         pc.pc_want_daemon_pages = 1;
713         collect_pages(&pc);
714         if (cfs_list_empty(&pc.pc_pages)) {
715                 rc = 0;
716                 goto close;
717         }
718
719         /* ok, for now, just write the pages.  in the future we'll be building
720          * iobufs with the pages and calling generic_direct_IO */
721         CFS_MMSPACE_OPEN;
722         cfs_list_for_each_entry_safe_typed(tage, tmp, &pc.pc_pages,
723                                            struct cfs_trace_page, linkage) {
724
725                 __LASSERT_TAGE_INVARIANT(tage);
726
727                 rc = cfs_filp_write(filp, cfs_page_address(tage->page),
728                                     tage->used, cfs_filp_poff(filp));
729                 if (rc != (int)tage->used) {
730                         printk(CFS_KERN_WARNING "wanted to write %u but wrote "
731                                "%d\n", tage->used, rc);
732                         put_pages_back(&pc);
733                         __LASSERT(cfs_list_empty(&pc.pc_pages));
734                         break;
735                 }
736                 cfs_list_del(&tage->linkage);
737                 cfs_tage_free(tage);
738         }
739         CFS_MMSPACE_CLOSE;
740         rc = cfs_filp_fsync(filp);
741         if (rc)
742                 printk(CFS_KERN_ERR "sync returns %d\n", rc);
743  close:
744         cfs_filp_close(filp);
745  out:
746         cfs_tracefile_write_unlock();
747         return rc;
748 }
749
750 void cfs_trace_flush_pages(void)
751 {
752         struct page_collection pc;
753         struct cfs_trace_page *tage;
754         struct cfs_trace_page *tmp;
755
756         cfs_spin_lock_init(&pc.pc_lock);
757
758         pc.pc_want_daemon_pages = 1;
759         collect_pages(&pc);
760         cfs_list_for_each_entry_safe_typed(tage, tmp, &pc.pc_pages,
761                                            struct cfs_trace_page, linkage) {
762
763                 __LASSERT_TAGE_INVARIANT(tage);
764
765                 cfs_list_del(&tage->linkage);
766                 cfs_tage_free(tage);
767         }
768 }
769
770 int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
771                             const char *usr_buffer, int usr_buffer_nob)
772 {
773         int    nob;
774
775         if (usr_buffer_nob > knl_buffer_nob)
776                 return -EOVERFLOW;
777
778         if (cfs_copy_from_user((void *)knl_buffer,
779                            (void *)usr_buffer, usr_buffer_nob))
780                 return -EFAULT;
781
782         nob = strnlen(knl_buffer, usr_buffer_nob);
783         while (nob-- >= 0)                      /* strip trailing whitespace */
784                 if (!isspace(knl_buffer[nob]))
785                         break;
786
787         if (nob < 0)                            /* empty string */
788                 return -EINVAL;
789
790         if (nob == knl_buffer_nob)              /* no space to terminate */
791                 return -EOVERFLOW;
792
793         knl_buffer[nob + 1] = 0;                /* terminate */
794         return 0;
795 }
796
797 int cfs_trace_copyout_string(char *usr_buffer, int usr_buffer_nob,
798                              const char *knl_buffer, char *append)
799 {
800         /* NB if 'append' != NULL, it's a single character to append to the
801          * copied out string - usually "\n", for /proc entries and "" (i.e. a
802          * terminating zero byte) for sysctl entries */
803         int   nob = strlen(knl_buffer);
804
805         if (nob > usr_buffer_nob)
806                 nob = usr_buffer_nob;
807
808         if (cfs_copy_to_user(usr_buffer, knl_buffer, nob))
809                 return -EFAULT;
810
811         if (append != NULL && nob < usr_buffer_nob) {
812                 if (cfs_copy_to_user(usr_buffer + nob, append, 1))
813                         return -EFAULT;
814
815                 nob++;
816         }
817
818         return nob;
819 }
820 EXPORT_SYMBOL(cfs_trace_copyout_string);
821
822 int cfs_trace_allocate_string_buffer(char **str, int nob)
823 {
824         if (nob > 2 * CFS_PAGE_SIZE)            /* string must be "sensible" */
825                 return -EINVAL;
826
827         *str = cfs_alloc(nob, CFS_ALLOC_STD | CFS_ALLOC_ZERO);
828         if (*str == NULL)
829                 return -ENOMEM;
830
831         return 0;
832 }
833
834 void cfs_trace_free_string_buffer(char *str, int nob)
835 {
836         cfs_free(str);
837 }
838
839 int cfs_trace_dump_debug_buffer_usrstr(void *usr_str, int usr_str_nob)
840 {
841         char         *str;
842         int           rc;
843
844         rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
845         if (rc != 0)
846                 return rc;
847
848         rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
849                                      usr_str, usr_str_nob);
850         if (rc != 0)
851                 goto out;
852
853 #if !defined(__WINNT__)
854         if (str[0] != '/') {
855                 rc = -EINVAL;
856                 goto out;
857         }
858 #endif
859         rc = cfs_tracefile_dump_all_pages(str);
860 out:
861         cfs_trace_free_string_buffer(str, usr_str_nob + 1);
862         return rc;
863 }
864
865 int cfs_trace_daemon_command(char *str)
866 {
867         int       rc = 0;
868
869         cfs_tracefile_write_lock();
870
871         if (strcmp(str, "stop") == 0) {
872                 cfs_tracefile_write_unlock();
873                 cfs_trace_stop_thread();
874                 cfs_tracefile_write_lock();
875                 memset(cfs_tracefile, 0, sizeof(cfs_tracefile));
876
877         } else if (strncmp(str, "size=", 5) == 0) {
878                 cfs_tracefile_size = simple_strtoul(str + 5, NULL, 0);
879                 if (cfs_tracefile_size < 10 || cfs_tracefile_size > 20480)
880                         cfs_tracefile_size = CFS_TRACEFILE_SIZE;
881                 else
882                         cfs_tracefile_size <<= 20;
883
884         } else if (strlen(str) >= sizeof(cfs_tracefile)) {
885                 rc = -ENAMETOOLONG;
886 #ifndef __WINNT__
887         } else if (str[0] != '/') {
888                 rc = -EINVAL;
889 #endif
890         } else {
891                 strcpy(cfs_tracefile, str);
892
893                 printk(CFS_KERN_INFO
894                        "Lustre: debug daemon will attempt to start writing "
895                        "to %s (%lukB max)\n", cfs_tracefile,
896                        (long)(cfs_tracefile_size >> 10));
897
898                 cfs_trace_start_thread();
899         }
900
901         cfs_tracefile_write_unlock();
902         return rc;
903 }
904
905 int cfs_trace_daemon_command_usrstr(void *usr_str, int usr_str_nob)
906 {
907         char *str;
908         int   rc;
909
910         rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
911         if (rc != 0)
912                 return rc;
913
914         rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
915                                  usr_str, usr_str_nob);
916         if (rc == 0)
917                 rc = cfs_trace_daemon_command(str);
918
919         cfs_trace_free_string_buffer(str, usr_str_nob + 1);
920         return rc;
921 }
922
923 int cfs_trace_set_debug_mb(int mb)
924 {
925         int i;
926         int j;
927         int pages;
928         int limit = cfs_trace_max_debug_mb();
929         struct cfs_trace_cpu_data *tcd;
930
931         if (mb < cfs_num_possible_cpus()) {
932                 printk(CFS_KERN_WARNING
933                        "Lustre: %d MB is too small for debug buffer size, "
934                        "setting it to %d MB.\n", mb, cfs_num_possible_cpus());
935                 mb = cfs_num_possible_cpus();
936         }
937
938         if (mb > limit) {
939                 printk(CFS_KERN_WARNING
940                        "Lustre: %d MB is too large for debug buffer size, "
941                        "setting it to %d MB.\n", mb, limit);
942                 mb = limit;
943         }
944
945         mb /= cfs_num_possible_cpus();
946         pages = mb << (20 - CFS_PAGE_SHIFT);
947
948         cfs_tracefile_write_lock();
949
950         cfs_tcd_for_each(tcd, i, j)
951                 tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
952
953         cfs_tracefile_write_unlock();
954
955         return 0;
956 }
957
958 int cfs_trace_set_debug_mb_usrstr(void *usr_str, int usr_str_nob)
959 {
960         char     str[32];
961         int      rc;
962
963         rc = cfs_trace_copyin_string(str, sizeof(str), usr_str, usr_str_nob);
964         if (rc < 0)
965                 return rc;
966
967         return cfs_trace_set_debug_mb(simple_strtoul(str, NULL, 0));
968 }
969
970 int cfs_trace_get_debug_mb(void)
971 {
972         int i;
973         int j;
974         struct cfs_trace_cpu_data *tcd;
975         int total_pages = 0;
976
977         cfs_tracefile_read_lock();
978
979         cfs_tcd_for_each(tcd, i, j)
980                 total_pages += tcd->tcd_max_pages;
981
982         cfs_tracefile_read_unlock();
983
984         return (total_pages >> (20 - CFS_PAGE_SHIFT)) + 1;
985 }
986
987 static int tracefiled(void *arg)
988 {
989         struct page_collection pc;
990         struct tracefiled_ctl *tctl = arg;
991         struct cfs_trace_page *tage;
992         struct cfs_trace_page *tmp;
993         cfs_file_t *filp;
994         int last_loop = 0;
995         int rc;
996
997         CFS_DECL_MMSPACE;
998
999         /* we're started late enough that we pick up init's fs context */
1000         /* this is so broken in uml?  what on earth is going on? */
1001         cfs_daemonize("ktracefiled");
1002
1003         cfs_spin_lock_init(&pc.pc_lock);
1004         cfs_complete(&tctl->tctl_start);
1005
1006         while (1) {
1007                 cfs_waitlink_t __wait;
1008
1009                 pc.pc_want_daemon_pages = 0;
1010                 collect_pages(&pc);
1011                 if (cfs_list_empty(&pc.pc_pages))
1012                         goto end_loop;
1013
1014                 filp = NULL;
1015                 cfs_tracefile_read_lock();
1016                 if (cfs_tracefile[0] != 0) {
1017                         filp = cfs_filp_open(cfs_tracefile,
1018                                              O_CREAT | O_RDWR | O_LARGEFILE,
1019                                              0600, &rc);
1020                         if (!(filp))
1021                                 printk(CFS_KERN_WARNING "couldn't open %s: "
1022                                        "%d\n", cfs_tracefile, rc);
1023                 }
1024                 cfs_tracefile_read_unlock();
1025                 if (filp == NULL) {
1026                         put_pages_on_daemon_list(&pc);
1027                         __LASSERT(cfs_list_empty(&pc.pc_pages));
1028                         goto end_loop;
1029                 }
1030
1031                 CFS_MMSPACE_OPEN;
1032
1033                 cfs_list_for_each_entry_safe_typed(tage, tmp, &pc.pc_pages,
1034                                                    struct cfs_trace_page,
1035                                                    linkage) {
1036                         static loff_t f_pos;
1037
1038                         __LASSERT_TAGE_INVARIANT(tage);
1039
1040                         if (f_pos >= (off_t)cfs_tracefile_size)
1041                                 f_pos = 0;
1042                         else if (f_pos > (off_t)cfs_filp_size(filp))
1043                                 f_pos = cfs_filp_size(filp);
1044
1045                         rc = cfs_filp_write(filp, cfs_page_address(tage->page),
1046                                             tage->used, &f_pos);
1047                         if (rc != (int)tage->used) {
1048                                 printk(CFS_KERN_WARNING "wanted to write %u "
1049                                        "but wrote %d\n", tage->used, rc);
1050                                 put_pages_back(&pc);
1051                                 __LASSERT(cfs_list_empty(&pc.pc_pages));
1052                         }
1053                 }
1054                 CFS_MMSPACE_CLOSE;
1055
1056                 cfs_filp_close(filp);
1057                 put_pages_on_daemon_list(&pc);
1058                 if (!cfs_list_empty(&pc.pc_pages)) {
1059                         int i;
1060
1061                         printk(CFS_KERN_ALERT "Lustre: trace pages aren't "
1062                                " empty\n");
1063                         printk(CFS_KERN_ERR "total cpus(%d): ",
1064                                cfs_num_possible_cpus());
1065                         for (i = 0; i < cfs_num_possible_cpus(); i++)
1066                                 if (cpu_online(i))
1067                                         printk(CFS_KERN_ERR "%d(on) ", i);
1068                                 else
1069                                         printk(CFS_KERN_ERR "%d(off) ", i);
1070                         printk(CFS_KERN_ERR "\n");
1071
1072                         i = 0;
1073                         cfs_list_for_each_entry_safe(tage, tmp, &pc.pc_pages,
1074                                                      linkage)
1075                                 printk(CFS_KERN_ERR "page %d belongs to cpu "
1076                                        "%d\n", ++i, tage->cpu);
1077                         printk(CFS_KERN_ERR "There are %d pages unwritten\n",
1078                                i);
1079                 }
1080                 __LASSERT(cfs_list_empty(&pc.pc_pages));
1081 end_loop:
1082                 if (cfs_atomic_read(&tctl->tctl_shutdown)) {
1083                         if (last_loop == 0) {
1084                                 last_loop = 1;
1085                                 continue;
1086                         } else {
1087                                 break;
1088                         }
1089                 }
1090                 cfs_waitlink_init(&__wait);
1091                 cfs_waitq_add(&tctl->tctl_waitq, &__wait);
1092                 cfs_set_current_state(CFS_TASK_INTERRUPTIBLE);
1093                 cfs_waitq_timedwait(&__wait, CFS_TASK_INTERRUPTIBLE,
1094                                     cfs_time_seconds(1));
1095                 cfs_waitq_del(&tctl->tctl_waitq, &__wait);
1096         }
1097         cfs_complete(&tctl->tctl_stop);
1098         return 0;
1099 }
1100
1101 int cfs_trace_start_thread(void)
1102 {
1103         struct tracefiled_ctl *tctl = &trace_tctl;
1104         int rc = 0;
1105
1106         cfs_mutex_lock(&cfs_trace_thread_mutex);
1107         if (thread_running)
1108                 goto out;
1109
1110         cfs_init_completion(&tctl->tctl_start);
1111         cfs_init_completion(&tctl->tctl_stop);
1112         cfs_waitq_init(&tctl->tctl_waitq);
1113         cfs_atomic_set(&tctl->tctl_shutdown, 0);
1114
1115         if (cfs_create_thread(tracefiled, tctl, 0) < 0) {
1116                 rc = -ECHILD;
1117                 goto out;
1118         }
1119
1120         cfs_wait_for_completion(&tctl->tctl_start);
1121         thread_running = 1;
1122 out:
1123         cfs_mutex_unlock(&cfs_trace_thread_mutex);
1124         return rc;
1125 }
1126
1127 void cfs_trace_stop_thread(void)
1128 {
1129         struct tracefiled_ctl *tctl = &trace_tctl;
1130
1131         cfs_mutex_lock(&cfs_trace_thread_mutex);
1132         if (thread_running) {
1133                 printk(CFS_KERN_INFO
1134                        "Lustre: shutting down debug daemon thread...\n");
1135                 cfs_atomic_set(&tctl->tctl_shutdown, 1);
1136                 cfs_wait_for_completion(&tctl->tctl_stop);
1137                 thread_running = 0;
1138         }
1139         cfs_mutex_unlock(&cfs_trace_thread_mutex);
1140 }
1141
1142 int cfs_tracefile_init(int max_pages)
1143 {
1144         struct cfs_trace_cpu_data *tcd;
1145         int                    i;
1146         int                    j;
1147         int                    rc;
1148         int                    factor;
1149
1150         rc = cfs_tracefile_init_arch();
1151         if (rc != 0)
1152                 return rc;
1153
1154         cfs_tcd_for_each(tcd, i, j) {
1155                 /* tcd_pages_factor is initialized int tracefile_init_arch. */
1156                 factor = tcd->tcd_pages_factor;
1157                 CFS_INIT_LIST_HEAD(&tcd->tcd_pages);
1158                 CFS_INIT_LIST_HEAD(&tcd->tcd_stock_pages);
1159                 CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
1160                 tcd->tcd_cur_pages = 0;
1161                 tcd->tcd_cur_stock_pages = 0;
1162                 tcd->tcd_cur_daemon_pages = 0;
1163                 tcd->tcd_max_pages = (max_pages * factor) / 100;
1164                 LASSERT(tcd->tcd_max_pages > 0);
1165                 tcd->tcd_shutting_down = 0;
1166         }
1167
1168         return 0;
1169 }
1170
1171 static void trace_cleanup_on_all_cpus(void)
1172 {
1173         struct cfs_trace_cpu_data *tcd;
1174         struct cfs_trace_page *tage;
1175         struct cfs_trace_page *tmp;
1176         int i, cpu;
1177
1178         cfs_for_each_possible_cpu(cpu) {
1179                 cfs_tcd_for_each_type_lock(tcd, i, cpu) {
1180                         tcd->tcd_shutting_down = 1;
1181
1182                         cfs_list_for_each_entry_safe_typed(tage, tmp,
1183                                                            &tcd->tcd_pages,
1184                                                            struct cfs_trace_page,
1185                                                            linkage) {
1186                                 __LASSERT_TAGE_INVARIANT(tage);
1187
1188                                 cfs_list_del(&tage->linkage);
1189                                 cfs_tage_free(tage);
1190                         }
1191
1192                         tcd->tcd_cur_pages = 0;
1193                 }
1194         }
1195 }
1196
1197 static void cfs_trace_cleanup(void)
1198 {
1199         struct page_collection pc;
1200
1201         CFS_INIT_LIST_HEAD(&pc.pc_pages);
1202         cfs_spin_lock_init(&pc.pc_lock);
1203
1204         trace_cleanup_on_all_cpus();
1205
1206         cfs_tracefile_fini_arch();
1207 }
1208
1209 void cfs_tracefile_exit(void)
1210 {
1211         cfs_trace_stop_thread();
1212         cfs_trace_cleanup();
1213 }