Whamcloud - gitweb
LU-2503 libcfs: Call lbug_with_loc() in LASSERT()
[fs/lustre-release.git] / libcfs / libcfs / tracefile.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  */
30 /*
31  * This file is part of Lustre, http://www.lustre.org/
32  * Lustre is a trademark of Sun Microsystems, Inc.
33  *
34  * libcfs/libcfs/tracefile.c
35  *
36  * Author: Zach Brown <zab@clusterfs.com>
37  * Author: Phil Schwan <phil@clusterfs.com>
38  */
39
40
41 #define DEBUG_SUBSYSTEM S_LNET
42 #define LUSTRE_TRACEFILE_PRIVATE
43 #include "tracefile.h"
44
45 #include <libcfs/libcfs.h>
46
47 /* XXX move things up to the top, comment */
48 union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[CFS_NR_CPUS] __cacheline_aligned;
49
50 char cfs_tracefile[TRACEFILE_NAME_SIZE];
51 long long cfs_tracefile_size = CFS_TRACEFILE_SIZE;
52 static struct tracefiled_ctl trace_tctl;
53 struct mutex cfs_trace_thread_mutex;
54 static int thread_running = 0;
55
56 cfs_atomic_t cfs_tage_allocated = CFS_ATOMIC_INIT(0);
57
58 static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
59                                          struct cfs_trace_cpu_data *tcd);
60
61 static inline struct cfs_trace_page *
62 cfs_tage_from_list(cfs_list_t *list)
63 {
64         return cfs_list_entry(list, struct cfs_trace_page, linkage);
65 }
66
67 static struct cfs_trace_page *cfs_tage_alloc(int gfp)
68 {
69         cfs_page_t            *page;
70         struct cfs_trace_page *tage;
71
72         /* My caller is trying to free memory */
73         if (!cfs_in_interrupt() && cfs_memory_pressure_get())
74                 return NULL;
75
76         /*
77          * Don't spam console with allocation failures: they will be reported
78          * by upper layer anyway.
79          */
80         gfp |= CFS_ALLOC_NOWARN;
81         page = cfs_alloc_page(gfp);
82         if (page == NULL)
83                 return NULL;
84
85         tage = cfs_alloc(sizeof(*tage), gfp);
86         if (tage == NULL) {
87                 cfs_free_page(page);
88                 return NULL;
89         }
90
91         tage->page = page;
92         cfs_atomic_inc(&cfs_tage_allocated);
93         return tage;
94 }
95
96 static void cfs_tage_free(struct cfs_trace_page *tage)
97 {
98         __LASSERT(tage != NULL);
99         __LASSERT(tage->page != NULL);
100
101         cfs_free_page(tage->page);
102         cfs_free(tage);
103         cfs_atomic_dec(&cfs_tage_allocated);
104 }
105
106 static void cfs_tage_to_tail(struct cfs_trace_page *tage,
107                              cfs_list_t *queue)
108 {
109         __LASSERT(tage != NULL);
110         __LASSERT(queue != NULL);
111
112         cfs_list_move_tail(&tage->linkage, queue);
113 }
114
115 int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, int gfp,
116                            cfs_list_t *stock)
117 {
118         int i;
119
120         /*
121          * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
122          * from here: this will lead to infinite recursion.
123          */
124
125         for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++ i) {
126                 struct cfs_trace_page *tage;
127
128                 tage = cfs_tage_alloc(gfp);
129                 if (tage == NULL)
130                         break;
131                 cfs_list_add_tail(&tage->linkage, stock);
132         }
133         return i;
134 }
135
136 /* return a page that has 'len' bytes left at the end */
137 static struct cfs_trace_page *
138 cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len)
139 {
140         struct cfs_trace_page *tage;
141
142         if (tcd->tcd_cur_pages > 0) {
143                 __LASSERT(!cfs_list_empty(&tcd->tcd_pages));
144                 tage = cfs_tage_from_list(tcd->tcd_pages.prev);
145                 if (tage->used + len <= CFS_PAGE_SIZE)
146                         return tage;
147         }
148
149         if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
150                 if (tcd->tcd_cur_stock_pages > 0) {
151                         tage = cfs_tage_from_list(tcd->tcd_stock_pages.prev);
152                         -- tcd->tcd_cur_stock_pages;
153                         cfs_list_del_init(&tage->linkage);
154                 } else {
155                         tage = cfs_tage_alloc(CFS_ALLOC_ATOMIC);
156                         if (tage == NULL) {
157                                 if (printk_ratelimit())
158                                         printk(CFS_KERN_WARNING
159                                                "cannot allocate a tage (%ld)\n",
160                                        tcd->tcd_cur_pages);
161                                 return NULL;
162                         }
163                 }
164
165                 tage->used = 0;
166                 tage->cpu = cfs_smp_processor_id();
167                 tage->type = tcd->tcd_type;
168                 cfs_list_add_tail(&tage->linkage, &tcd->tcd_pages);
169                 tcd->tcd_cur_pages++;
170
171                 if (tcd->tcd_cur_pages > 8 && thread_running) {
172                         struct tracefiled_ctl *tctl = &trace_tctl;
173                         /*
174                          * wake up tracefiled to process some pages.
175                          */
176                         cfs_waitq_signal(&tctl->tctl_waitq);
177                 }
178                 return tage;
179         }
180         return NULL;
181 }
182
183 static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd)
184 {
185         int pgcount = tcd->tcd_cur_pages / 10;
186         struct page_collection pc;
187         struct cfs_trace_page *tage;
188         struct cfs_trace_page *tmp;
189
190         /*
191          * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
192          * from here: this will lead to infinite recursion.
193          */
194
195         if (printk_ratelimit())
196                 printk(CFS_KERN_WARNING "debug daemon buffer overflowed; "
197                        "discarding 10%% of pages (%d of %ld)\n",
198                        pgcount + 1, tcd->tcd_cur_pages);
199
200         CFS_INIT_LIST_HEAD(&pc.pc_pages);
201         spin_lock_init(&pc.pc_lock);
202
203         cfs_list_for_each_entry_safe_typed(tage, tmp, &tcd->tcd_pages,
204                                            struct cfs_trace_page, linkage) {
205                 if (pgcount-- == 0)
206                         break;
207
208                 cfs_list_move_tail(&tage->linkage, &pc.pc_pages);
209                 tcd->tcd_cur_pages--;
210         }
211         put_pages_on_tcd_daemon_list(&pc, tcd);
212 }
213
214 /* return a page that has 'len' bytes left at the end */
215 static struct cfs_trace_page *cfs_trace_get_tage(struct cfs_trace_cpu_data *tcd,
216                                                  unsigned long len)
217 {
218         struct cfs_trace_page *tage;
219
220         /*
221          * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
222          * from here: this will lead to infinite recursion.
223          */
224
225         if (len > CFS_PAGE_SIZE) {
226                 printk(CFS_KERN_ERR
227                        "cowardly refusing to write %lu bytes in a page\n", len);
228                 return NULL;
229         }
230
231         tage = cfs_trace_get_tage_try(tcd, len);
232         if (tage != NULL)
233                 return tage;
234         if (thread_running)
235                 cfs_tcd_shrink(tcd);
236         if (tcd->tcd_cur_pages > 0) {
237                 tage = cfs_tage_from_list(tcd->tcd_pages.next);
238                 tage->used = 0;
239                 cfs_tage_to_tail(tage, &tcd->tcd_pages);
240         }
241         return tage;
242 }
243
244 int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
245                      const char *format, ...)
246 {
247         va_list args;
248         int     rc;
249
250         va_start(args, format);
251         rc = libcfs_debug_vmsg2(msgdata, format, args, NULL);
252         va_end(args);
253
254         return rc;
255 }
256 EXPORT_SYMBOL(libcfs_debug_msg);
257
258 int libcfs_debug_vmsg2(struct libcfs_debug_msg_data *msgdata,
259                        const char *format1, va_list args,
260                        const char *format2, ...)
261 {
262         struct cfs_trace_cpu_data *tcd = NULL;
263         struct ptldebug_header     header = {0};
264         struct cfs_trace_page     *tage;
265         /* string_buf is used only if tcd != NULL, and is always set then */
266         char                      *string_buf = NULL;
267         char                      *debug_buf;
268         int                        known_size;
269         int                        needed = 85; /* average message length */
270         int                        max_nob;
271         va_list                    ap;
272         int                        depth;
273         int                        i;
274         int                        remain;
275         int                        mask = msgdata->msg_mask;
276         char                      *file = (char *)msgdata->msg_file;
277         cfs_debug_limit_state_t   *cdls = msgdata->msg_cdls;
278
279         if (strchr(file, '/'))
280                 file = strrchr(file, '/') + 1;
281
282         tcd = cfs_trace_get_tcd();
283
284         /* cfs_trace_get_tcd() grabs a lock, which disables preemption and
285          * pins us to a particular CPU.  This avoids an smp_processor_id()
286          * warning on Linux when debugging is enabled. */
287         cfs_set_ptldebug_header(&header, msgdata, CDEBUG_STACK());
288
289         if (tcd == NULL)                /* arch may not log in IRQ context */
290                 goto console;
291
292         if (tcd->tcd_cur_pages == 0)
293                 header.ph_flags |= PH_FLAG_FIRST_RECORD;
294
295         if (tcd->tcd_shutting_down) {
296                 cfs_trace_put_tcd(tcd);
297                 tcd = NULL;
298                 goto console;
299         }
300
301         depth = __current_nesting_level();
302         known_size = strlen(file) + 1 + depth;
303         if (msgdata->msg_fn)
304                 known_size += strlen(msgdata->msg_fn) + 1;
305
306         if (libcfs_debug_binary)
307                 known_size += sizeof(header);
308
309         /*/
310          * '2' used because vsnprintf return real size required for output
311          * _without_ terminating NULL.
312          * if needed is to small for this format.
313          */
314         for (i = 0; i < 2; i++) {
315                 tage = cfs_trace_get_tage(tcd, needed + known_size + 1);
316                 if (tage == NULL) {
317                         if (needed + known_size > CFS_PAGE_SIZE)
318                                 mask |= D_ERROR;
319
320                         cfs_trace_put_tcd(tcd);
321                         tcd = NULL;
322                         goto console;
323                 }
324
325                 string_buf = (char *)cfs_page_address(tage->page) +
326                                         tage->used + known_size;
327
328                 max_nob = CFS_PAGE_SIZE - tage->used - known_size;
329                 if (max_nob <= 0) {
330                         printk(CFS_KERN_EMERG "negative max_nob: %d\n",
331                                max_nob);
332                         mask |= D_ERROR;
333                         cfs_trace_put_tcd(tcd);
334                         tcd = NULL;
335                         goto console;
336                 }
337
338                 needed = 0;
339                 if (format1) {
340                         va_copy(ap, args);
341                         needed = vsnprintf(string_buf, max_nob, format1, ap);
342                         va_end(ap);
343                 }
344
345                 if (format2) {
346                         remain = max_nob - needed;
347                         if (remain < 0)
348                                 remain = 0;
349
350                         va_start(ap, format2);
351                         needed += vsnprintf(string_buf + needed, remain,
352                                             format2, ap);
353                         va_end(ap);
354                 }
355
356                 if (needed < max_nob) /* well. printing ok.. */
357                         break;
358         }
359
360         if (*(string_buf+needed-1) != '\n')
361                 printk(CFS_KERN_INFO "format at %s:%d:%s doesn't end in "
362                        "newline\n", file, msgdata->msg_line, msgdata->msg_fn);
363
364         header.ph_len = known_size + needed;
365         debug_buf = (char *)cfs_page_address(tage->page) + tage->used;
366
367         if (libcfs_debug_binary) {
368                 memcpy(debug_buf, &header, sizeof(header));
369                 tage->used += sizeof(header);
370                 debug_buf += sizeof(header);
371         }
372
373         /* indent message according to the nesting level */
374         while (depth-- > 0) {
375                 *(debug_buf++) = '.';
376                 ++ tage->used;
377         }
378
379         strcpy(debug_buf, file);
380         tage->used += strlen(file) + 1;
381         debug_buf += strlen(file) + 1;
382
383         if (msgdata->msg_fn) {
384                 strcpy(debug_buf, msgdata->msg_fn);
385                 tage->used += strlen(msgdata->msg_fn) + 1;
386                 debug_buf += strlen(msgdata->msg_fn) + 1;
387         }
388
389         __LASSERT(debug_buf == string_buf);
390
391         tage->used += needed;
392         __LASSERT (tage->used <= CFS_PAGE_SIZE);
393
394 console:
395         if ((mask & libcfs_printk) == 0) {
396                 /* no console output requested */
397                 if (tcd != NULL)
398                         cfs_trace_put_tcd(tcd);
399                 return 1;
400         }
401
402         if (cdls != NULL) {
403                 if (libcfs_console_ratelimit &&
404                     cdls->cdls_next != 0 &&     /* not first time ever */
405                     !cfs_time_after(cfs_time_current(), cdls->cdls_next)) {
406                         /* skipping a console message */
407                         cdls->cdls_count++;
408                         if (tcd != NULL)
409                                 cfs_trace_put_tcd(tcd);
410                         return 1;
411                 }
412
413                 if (cfs_time_after(cfs_time_current(), cdls->cdls_next +
414                                                        libcfs_console_max_delay
415                                                        + cfs_time_seconds(10))) {
416                         /* last timeout was a long time ago */
417                         cdls->cdls_delay /= libcfs_console_backoff * 4;
418                 } else {
419                         cdls->cdls_delay *= libcfs_console_backoff;
420
421                         if (cdls->cdls_delay < libcfs_console_min_delay)
422                                 cdls->cdls_delay = libcfs_console_min_delay;
423                         else if (cdls->cdls_delay > libcfs_console_max_delay)
424                                 cdls->cdls_delay = libcfs_console_max_delay;
425                 }
426
427                 /* ensure cdls_next is never zero after it's been seen */
428                 cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1;
429         }
430
431         if (tcd != NULL) {
432                 cfs_print_to_console(&header, mask, string_buf, needed, file,
433                                      msgdata->msg_fn);
434                 cfs_trace_put_tcd(tcd);
435         } else {
436                 string_buf = cfs_trace_get_console_buffer();
437
438                 needed = 0;
439                 if (format1 != NULL) {
440                         va_copy(ap, args);
441                         needed = vsnprintf(string_buf,
442                                            CFS_TRACE_CONSOLE_BUFFER_SIZE,
443                                            format1, ap);
444                         va_end(ap);
445                 }
446                 if (format2 != NULL) {
447                         remain = CFS_TRACE_CONSOLE_BUFFER_SIZE - needed;
448                         if (remain > 0) {
449                                 va_start(ap, format2);
450                                 needed += vsnprintf(string_buf+needed, remain,
451                                                     format2, ap);
452                                 va_end(ap);
453                         }
454                 }
455                 cfs_print_to_console(&header, mask,
456                                      string_buf, needed, file, msgdata->msg_fn);
457
458                 cfs_trace_put_console_buffer(string_buf);
459         }
460
461         if (cdls != NULL && cdls->cdls_count != 0) {
462                 string_buf = cfs_trace_get_console_buffer();
463
464                 needed = snprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE,
465                                   "Skipped %d previous similar message%s\n",
466                                   cdls->cdls_count,
467                                   (cdls->cdls_count > 1) ? "s" : "");
468
469                 cfs_print_to_console(&header, mask,
470                                      string_buf, needed, file, msgdata->msg_fn);
471
472                 cfs_trace_put_console_buffer(string_buf);
473                 cdls->cdls_count = 0;
474         }
475
476         return 0;
477 }
478 EXPORT_SYMBOL(libcfs_debug_vmsg2);
479
480 void
481 cfs_trace_assertion_failed(const char *str,
482                            struct libcfs_debug_msg_data *msgdata)
483 {
484         struct ptldebug_header hdr;
485
486         libcfs_panic_in_progress = 1;
487         libcfs_catastrophe = 1;
488         cfs_mb();
489
490         cfs_set_ptldebug_header(&hdr, msgdata, CDEBUG_STACK());
491
492         cfs_print_to_console(&hdr, D_EMERG, str, strlen(str),
493                              msgdata->msg_file, msgdata->msg_fn);
494
495         LIBCFS_PANIC("Lustre debug assertion failure\n");
496
497         /* not reached */
498 }
499
500 static void
501 panic_collect_pages(struct page_collection *pc)
502 {
503         /* Do the collect_pages job on a single CPU: assumes that all other
504          * CPUs have been stopped during a panic.  If this isn't true for some
505          * arch, this will have to be implemented separately in each arch.  */
506         int                        i;
507         int                        j;
508         struct cfs_trace_cpu_data *tcd;
509
510         CFS_INIT_LIST_HEAD(&pc->pc_pages);
511
512         cfs_tcd_for_each(tcd, i, j) {
513                 cfs_list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
514                 tcd->tcd_cur_pages = 0;
515
516                 if (pc->pc_want_daemon_pages) {
517                         cfs_list_splice_init(&tcd->tcd_daemon_pages,
518                                              &pc->pc_pages);
519                         tcd->tcd_cur_daemon_pages = 0;
520                 }
521         }
522 }
523
524 static void collect_pages_on_all_cpus(struct page_collection *pc)
525 {
526         struct cfs_trace_cpu_data *tcd;
527         int i, cpu;
528
529         spin_lock(&pc->pc_lock);
530         cfs_for_each_possible_cpu(cpu) {
531                 cfs_tcd_for_each_type_lock(tcd, i, cpu) {
532                         cfs_list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
533                         tcd->tcd_cur_pages = 0;
534                         if (pc->pc_want_daemon_pages) {
535                                 cfs_list_splice_init(&tcd->tcd_daemon_pages,
536                                                      &pc->pc_pages);
537                                 tcd->tcd_cur_daemon_pages = 0;
538                         }
539                 }
540         }
541         spin_unlock(&pc->pc_lock);
542 }
543
544 static void collect_pages(struct page_collection *pc)
545 {
546         CFS_INIT_LIST_HEAD(&pc->pc_pages);
547
548         if (libcfs_panic_in_progress)
549                 panic_collect_pages(pc);
550         else
551                 collect_pages_on_all_cpus(pc);
552 }
553
554 static void put_pages_back_on_all_cpus(struct page_collection *pc)
555 {
556         struct cfs_trace_cpu_data *tcd;
557         cfs_list_t *cur_head;
558         struct cfs_trace_page *tage;
559         struct cfs_trace_page *tmp;
560         int i, cpu;
561
562         spin_lock(&pc->pc_lock);
563         cfs_for_each_possible_cpu(cpu) {
564                 cfs_tcd_for_each_type_lock(tcd, i, cpu) {
565                         cur_head = tcd->tcd_pages.next;
566
567                         cfs_list_for_each_entry_safe_typed(tage, tmp,
568                                                            &pc->pc_pages,
569                                                            struct cfs_trace_page,
570                                                            linkage) {
571
572                                 __LASSERT_TAGE_INVARIANT(tage);
573
574                                 if (tage->cpu != cpu || tage->type != i)
575                                         continue;
576
577                                 cfs_tage_to_tail(tage, cur_head);
578                                 tcd->tcd_cur_pages++;
579                         }
580                 }
581         }
582         spin_unlock(&pc->pc_lock);
583 }
584
585 static void put_pages_back(struct page_collection *pc)
586 {
587         if (!libcfs_panic_in_progress)
588                 put_pages_back_on_all_cpus(pc);
589 }
590
591 /* Add pages to a per-cpu debug daemon ringbuffer.  This buffer makes sure that
592  * we have a good amount of data at all times for dumping during an LBUG, even
593  * if we have been steadily writing (and otherwise discarding) pages via the
594  * debug daemon. */
595 static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
596                                          struct cfs_trace_cpu_data *tcd)
597 {
598         struct cfs_trace_page *tage;
599         struct cfs_trace_page *tmp;
600
601         spin_lock(&pc->pc_lock);
602         cfs_list_for_each_entry_safe_typed(tage, tmp, &pc->pc_pages,
603                                            struct cfs_trace_page, linkage) {
604
605                 __LASSERT_TAGE_INVARIANT(tage);
606
607                 if (tage->cpu != tcd->tcd_cpu || tage->type != tcd->tcd_type)
608                         continue;
609
610                 cfs_tage_to_tail(tage, &tcd->tcd_daemon_pages);
611                 tcd->tcd_cur_daemon_pages++;
612
613                 if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
614                         struct cfs_trace_page *victim;
615
616                         __LASSERT(!cfs_list_empty(&tcd->tcd_daemon_pages));
617                         victim = cfs_tage_from_list(tcd->tcd_daemon_pages.next);
618
619                         __LASSERT_TAGE_INVARIANT(victim);
620
621                         cfs_list_del(&victim->linkage);
622                         cfs_tage_free(victim);
623                         tcd->tcd_cur_daemon_pages--;
624                 }
625         }
626         spin_unlock(&pc->pc_lock);
627 }
628
629 static void put_pages_on_daemon_list(struct page_collection *pc)
630 {
631         struct cfs_trace_cpu_data *tcd;
632         int i, cpu;
633
634         cfs_for_each_possible_cpu(cpu) {
635                 cfs_tcd_for_each_type_lock(tcd, i, cpu)
636                         put_pages_on_tcd_daemon_list(pc, tcd);
637         }
638 }
639
640 void cfs_trace_debug_print(void)
641 {
642         struct page_collection pc;
643         struct cfs_trace_page *tage;
644         struct cfs_trace_page *tmp;
645
646         spin_lock_init(&pc.pc_lock);
647
648         pc.pc_want_daemon_pages = 1;
649         collect_pages(&pc);
650         cfs_list_for_each_entry_safe_typed(tage, tmp, &pc.pc_pages,
651                                            struct cfs_trace_page, linkage) {
652                 char *p, *file, *fn;
653                 cfs_page_t *page;
654
655                 __LASSERT_TAGE_INVARIANT(tage);
656
657                 page = tage->page;
658                 p = cfs_page_address(page);
659                 while (p < ((char *)cfs_page_address(page) + tage->used)) {
660                         struct ptldebug_header *hdr;
661                         int len;
662                         hdr = (void *)p;
663                         p += sizeof(*hdr);
664                         file = p;
665                         p += strlen(file) + 1;
666                         fn = p;
667                         p += strlen(fn) + 1;
668                         len = hdr->ph_len - (int)(p - (char *)hdr);
669
670                         cfs_print_to_console(hdr, D_EMERG, p, len, file, fn);
671
672                         p += len;
673                 }
674
675                 cfs_list_del(&tage->linkage);
676                 cfs_tage_free(tage);
677         }
678 }
679
680 int cfs_tracefile_dump_all_pages(char *filename)
681 {
682         struct page_collection pc;
683         cfs_file_t *filp;
684         struct cfs_trace_page *tage;
685         struct cfs_trace_page *tmp;
686         int rc;
687
688         CFS_DECL_MMSPACE;
689
690         cfs_tracefile_write_lock();
691
692         filp = cfs_filp_open(filename,
693                              O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600, &rc);
694         if (!filp) {
695                 if (rc != -EEXIST)
696                         printk(CFS_KERN_ERR
697                                "LustreError: can't open %s for dump: rc %d\n",
698                                filename, rc);
699                 goto out;
700         }
701
702         spin_lock_init(&pc.pc_lock);
703         pc.pc_want_daemon_pages = 1;
704         collect_pages(&pc);
705         if (cfs_list_empty(&pc.pc_pages)) {
706                 rc = 0;
707                 goto close;
708         }
709
710         /* ok, for now, just write the pages.  in the future we'll be building
711          * iobufs with the pages and calling generic_direct_IO */
712         CFS_MMSPACE_OPEN;
713         cfs_list_for_each_entry_safe_typed(tage, tmp, &pc.pc_pages,
714                                            struct cfs_trace_page, linkage) {
715
716                 __LASSERT_TAGE_INVARIANT(tage);
717
718                 rc = cfs_filp_write(filp, cfs_page_address(tage->page),
719                                     tage->used, cfs_filp_poff(filp));
720                 if (rc != (int)tage->used) {
721                         printk(CFS_KERN_WARNING "wanted to write %u but wrote "
722                                "%d\n", tage->used, rc);
723                         put_pages_back(&pc);
724                         __LASSERT(cfs_list_empty(&pc.pc_pages));
725                         break;
726                 }
727                 cfs_list_del(&tage->linkage);
728                 cfs_tage_free(tage);
729         }
730         CFS_MMSPACE_CLOSE;
731         rc = cfs_filp_fsync(filp);
732         if (rc)
733                 printk(CFS_KERN_ERR "sync returns %d\n", rc);
734  close:
735         cfs_filp_close(filp);
736  out:
737         cfs_tracefile_write_unlock();
738         return rc;
739 }
740
741 void cfs_trace_flush_pages(void)
742 {
743         struct page_collection pc;
744         struct cfs_trace_page *tage;
745         struct cfs_trace_page *tmp;
746
747         spin_lock_init(&pc.pc_lock);
748
749         pc.pc_want_daemon_pages = 1;
750         collect_pages(&pc);
751         cfs_list_for_each_entry_safe_typed(tage, tmp, &pc.pc_pages,
752                                            struct cfs_trace_page, linkage) {
753
754                 __LASSERT_TAGE_INVARIANT(tage);
755
756                 cfs_list_del(&tage->linkage);
757                 cfs_tage_free(tage);
758         }
759 }
760
761 int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
762                             const char *usr_buffer, int usr_buffer_nob)
763 {
764         int    nob;
765
766         if (usr_buffer_nob > knl_buffer_nob)
767                 return -EOVERFLOW;
768
769         if (cfs_copy_from_user((void *)knl_buffer,
770                            (void *)usr_buffer, usr_buffer_nob))
771                 return -EFAULT;
772
773         nob = strnlen(knl_buffer, usr_buffer_nob);
774         while (nob-- >= 0)                      /* strip trailing whitespace */
775                 if (!isspace(knl_buffer[nob]))
776                         break;
777
778         if (nob < 0)                            /* empty string */
779                 return -EINVAL;
780
781         if (nob == knl_buffer_nob)              /* no space to terminate */
782                 return -EOVERFLOW;
783
784         knl_buffer[nob + 1] = 0;                /* terminate */
785         return 0;
786 }
787 EXPORT_SYMBOL(cfs_trace_copyin_string);
788
789 int cfs_trace_copyout_string(char *usr_buffer, int usr_buffer_nob,
790                              const char *knl_buffer, char *append)
791 {
792         /* NB if 'append' != NULL, it's a single character to append to the
793          * copied out string - usually "\n", for /proc entries and "" (i.e. a
794          * terminating zero byte) for sysctl entries */
795         int   nob = strlen(knl_buffer);
796
797         if (nob > usr_buffer_nob)
798                 nob = usr_buffer_nob;
799
800         if (cfs_copy_to_user(usr_buffer, knl_buffer, nob))
801                 return -EFAULT;
802
803         if (append != NULL && nob < usr_buffer_nob) {
804                 if (cfs_copy_to_user(usr_buffer + nob, append, 1))
805                         return -EFAULT;
806
807                 nob++;
808         }
809
810         return nob;
811 }
812 EXPORT_SYMBOL(cfs_trace_copyout_string);
813
814 int cfs_trace_allocate_string_buffer(char **str, int nob)
815 {
816         if (nob > 2 * CFS_PAGE_SIZE)            /* string must be "sensible" */
817                 return -EINVAL;
818
819         *str = cfs_alloc(nob, CFS_ALLOC_STD | CFS_ALLOC_ZERO);
820         if (*str == NULL)
821                 return -ENOMEM;
822
823         return 0;
824 }
825
826 void cfs_trace_free_string_buffer(char *str, int nob)
827 {
828         cfs_free(str);
829 }
830
831 int cfs_trace_dump_debug_buffer_usrstr(void *usr_str, int usr_str_nob)
832 {
833         char         *str;
834         int           rc;
835
836         rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
837         if (rc != 0)
838                 return rc;
839
840         rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
841                                      usr_str, usr_str_nob);
842         if (rc != 0)
843                 goto out;
844
845 #if !defined(__WINNT__)
846         if (str[0] != '/') {
847                 rc = -EINVAL;
848                 goto out;
849         }
850 #endif
851         rc = cfs_tracefile_dump_all_pages(str);
852 out:
853         cfs_trace_free_string_buffer(str, usr_str_nob + 1);
854         return rc;
855 }
856
857 int cfs_trace_daemon_command(char *str)
858 {
859         int       rc = 0;
860
861         cfs_tracefile_write_lock();
862
863         if (strcmp(str, "stop") == 0) {
864                 cfs_tracefile_write_unlock();
865                 cfs_trace_stop_thread();
866                 cfs_tracefile_write_lock();
867                 memset(cfs_tracefile, 0, sizeof(cfs_tracefile));
868
869         } else if (strncmp(str, "size=", 5) == 0) {
870                 cfs_tracefile_size = simple_strtoul(str + 5, NULL, 0);
871                 if (cfs_tracefile_size < 10 || cfs_tracefile_size > 20480)
872                         cfs_tracefile_size = CFS_TRACEFILE_SIZE;
873                 else
874                         cfs_tracefile_size <<= 20;
875
876         } else if (strlen(str) >= sizeof(cfs_tracefile)) {
877                 rc = -ENAMETOOLONG;
878 #ifndef __WINNT__
879         } else if (str[0] != '/') {
880                 rc = -EINVAL;
881 #endif
882         } else {
883                 strcpy(cfs_tracefile, str);
884
885                 printk(CFS_KERN_INFO
886                        "Lustre: debug daemon will attempt to start writing "
887                        "to %s (%lukB max)\n", cfs_tracefile,
888                        (long)(cfs_tracefile_size >> 10));
889
890                 cfs_trace_start_thread();
891         }
892
893         cfs_tracefile_write_unlock();
894         return rc;
895 }
896
897 int cfs_trace_daemon_command_usrstr(void *usr_str, int usr_str_nob)
898 {
899         char *str;
900         int   rc;
901
902         rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
903         if (rc != 0)
904                 return rc;
905
906         rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
907                                  usr_str, usr_str_nob);
908         if (rc == 0)
909                 rc = cfs_trace_daemon_command(str);
910
911         cfs_trace_free_string_buffer(str, usr_str_nob + 1);
912         return rc;
913 }
914
915 int cfs_trace_set_debug_mb(int mb)
916 {
917         int i;
918         int j;
919         int pages;
920         int limit = cfs_trace_max_debug_mb();
921         struct cfs_trace_cpu_data *tcd;
922
923         if (mb < cfs_num_possible_cpus()) {
924                 printk(CFS_KERN_WARNING
925                        "Lustre: %d MB is too small for debug buffer size, "
926                        "setting it to %d MB.\n", mb, cfs_num_possible_cpus());
927                 mb = cfs_num_possible_cpus();
928         }
929
930         if (mb > limit) {
931                 printk(CFS_KERN_WARNING
932                        "Lustre: %d MB is too large for debug buffer size, "
933                        "setting it to %d MB.\n", mb, limit);
934                 mb = limit;
935         }
936
937         mb /= cfs_num_possible_cpus();
938         pages = mb << (20 - CFS_PAGE_SHIFT);
939
940         cfs_tracefile_write_lock();
941
942         cfs_tcd_for_each(tcd, i, j)
943                 tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
944
945         cfs_tracefile_write_unlock();
946
947         return 0;
948 }
949
950 int cfs_trace_set_debug_mb_usrstr(void *usr_str, int usr_str_nob)
951 {
952         char     str[32];
953         int      rc;
954
955         rc = cfs_trace_copyin_string(str, sizeof(str), usr_str, usr_str_nob);
956         if (rc < 0)
957                 return rc;
958
959         return cfs_trace_set_debug_mb(simple_strtoul(str, NULL, 0));
960 }
961
962 int cfs_trace_get_debug_mb(void)
963 {
964         int i;
965         int j;
966         struct cfs_trace_cpu_data *tcd;
967         int total_pages = 0;
968
969         cfs_tracefile_read_lock();
970
971         cfs_tcd_for_each(tcd, i, j)
972                 total_pages += tcd->tcd_max_pages;
973
974         cfs_tracefile_read_unlock();
975
976         return (total_pages >> (20 - CFS_PAGE_SHIFT)) + 1;
977 }
978
979 static int tracefiled(void *arg)
980 {
981         struct page_collection pc;
982         struct tracefiled_ctl *tctl = arg;
983         struct cfs_trace_page *tage;
984         struct cfs_trace_page *tmp;
985         cfs_file_t *filp;
986         int last_loop = 0;
987         int rc;
988
989         CFS_DECL_MMSPACE;
990
991         /* we're started late enough that we pick up init's fs context */
992         /* this is so broken in uml?  what on earth is going on? */
993         cfs_daemonize("ktracefiled");
994
995         spin_lock_init(&pc.pc_lock);
996         complete(&tctl->tctl_start);
997
998         while (1) {
999                 cfs_waitlink_t __wait;
1000
1001                 pc.pc_want_daemon_pages = 0;
1002                 collect_pages(&pc);
1003                 if (cfs_list_empty(&pc.pc_pages))
1004                         goto end_loop;
1005
1006                 filp = NULL;
1007                 cfs_tracefile_read_lock();
1008                 if (cfs_tracefile[0] != 0) {
1009                         filp = cfs_filp_open(cfs_tracefile,
1010                                              O_CREAT | O_RDWR | O_LARGEFILE,
1011                                              0600, &rc);
1012                         if (!(filp))
1013                                 printk(CFS_KERN_WARNING "couldn't open %s: "
1014                                        "%d\n", cfs_tracefile, rc);
1015                 }
1016                 cfs_tracefile_read_unlock();
1017                 if (filp == NULL) {
1018                         put_pages_on_daemon_list(&pc);
1019                         __LASSERT(cfs_list_empty(&pc.pc_pages));
1020                         goto end_loop;
1021                 }
1022
1023                 CFS_MMSPACE_OPEN;
1024
1025                 cfs_list_for_each_entry_safe_typed(tage, tmp, &pc.pc_pages,
1026                                                    struct cfs_trace_page,
1027                                                    linkage) {
1028                         static loff_t f_pos;
1029
1030                         __LASSERT_TAGE_INVARIANT(tage);
1031
1032                         if (f_pos >= (off_t)cfs_tracefile_size)
1033                                 f_pos = 0;
1034                         else if (f_pos > (off_t)cfs_filp_size(filp))
1035                                 f_pos = cfs_filp_size(filp);
1036
1037                         rc = cfs_filp_write(filp, cfs_page_address(tage->page),
1038                                             tage->used, &f_pos);
1039                         if (rc != (int)tage->used) {
1040                                 printk(CFS_KERN_WARNING "wanted to write %u "
1041                                        "but wrote %d\n", tage->used, rc);
1042                                 put_pages_back(&pc);
1043                                 __LASSERT(cfs_list_empty(&pc.pc_pages));
1044                         }
1045                 }
1046                 CFS_MMSPACE_CLOSE;
1047
1048                 cfs_filp_close(filp);
1049                 put_pages_on_daemon_list(&pc);
1050                 if (!cfs_list_empty(&pc.pc_pages)) {
1051                         int i;
1052
1053                         printk(CFS_KERN_ALERT "Lustre: trace pages aren't "
1054                                " empty\n");
1055                         printk(CFS_KERN_ERR "total cpus(%d): ",
1056                                cfs_num_possible_cpus());
1057                         for (i = 0; i < cfs_num_possible_cpus(); i++)
1058                                 if (cpu_online(i))
1059                                         printk(CFS_KERN_ERR "%d(on) ", i);
1060                                 else
1061                                         printk(CFS_KERN_ERR "%d(off) ", i);
1062                         printk(CFS_KERN_ERR "\n");
1063
1064                         i = 0;
1065                         cfs_list_for_each_entry_safe(tage, tmp, &pc.pc_pages,
1066                                                      linkage)
1067                                 printk(CFS_KERN_ERR "page %d belongs to cpu "
1068                                        "%d\n", ++i, tage->cpu);
1069                         printk(CFS_KERN_ERR "There are %d pages unwritten\n",
1070                                i);
1071                 }
1072                 __LASSERT(cfs_list_empty(&pc.pc_pages));
1073 end_loop:
1074                 if (cfs_atomic_read(&tctl->tctl_shutdown)) {
1075                         if (last_loop == 0) {
1076                                 last_loop = 1;
1077                                 continue;
1078                         } else {
1079                                 break;
1080                         }
1081                 }
1082                 cfs_waitlink_init(&__wait);
1083                 cfs_waitq_add(&tctl->tctl_waitq, &__wait);
1084                 cfs_set_current_state(CFS_TASK_INTERRUPTIBLE);
1085                 cfs_waitq_timedwait(&__wait, CFS_TASK_INTERRUPTIBLE,
1086                                     cfs_time_seconds(1));
1087                 cfs_waitq_del(&tctl->tctl_waitq, &__wait);
1088         }
1089         complete(&tctl->tctl_stop);
1090         return 0;
1091 }
1092
1093 int cfs_trace_start_thread(void)
1094 {
1095         struct tracefiled_ctl *tctl = &trace_tctl;
1096         int rc = 0;
1097
1098         mutex_lock(&cfs_trace_thread_mutex);
1099         if (thread_running)
1100                 goto out;
1101
1102         init_completion(&tctl->tctl_start);
1103         init_completion(&tctl->tctl_stop);
1104         cfs_waitq_init(&tctl->tctl_waitq);
1105         cfs_atomic_set(&tctl->tctl_shutdown, 0);
1106
1107         if (cfs_create_thread(tracefiled, tctl, 0) < 0) {
1108                 rc = -ECHILD;
1109                 goto out;
1110         }
1111
1112         wait_for_completion(&tctl->tctl_start);
1113         thread_running = 1;
1114 out:
1115         mutex_unlock(&cfs_trace_thread_mutex);
1116         return rc;
1117 }
1118
1119 void cfs_trace_stop_thread(void)
1120 {
1121         struct tracefiled_ctl *tctl = &trace_tctl;
1122
1123         mutex_lock(&cfs_trace_thread_mutex);
1124         if (thread_running) {
1125                 printk(CFS_KERN_INFO
1126                        "Lustre: shutting down debug daemon thread...\n");
1127                 cfs_atomic_set(&tctl->tctl_shutdown, 1);
1128                 wait_for_completion(&tctl->tctl_stop);
1129                 thread_running = 0;
1130         }
1131         mutex_unlock(&cfs_trace_thread_mutex);
1132 }
1133
1134 int cfs_tracefile_init(int max_pages)
1135 {
1136         struct cfs_trace_cpu_data *tcd;
1137         int                    i;
1138         int                    j;
1139         int                    rc;
1140         int                    factor;
1141
1142         rc = cfs_tracefile_init_arch();
1143         if (rc != 0)
1144                 return rc;
1145
1146         cfs_tcd_for_each(tcd, i, j) {
1147                 /* tcd_pages_factor is initialized int tracefile_init_arch. */
1148                 factor = tcd->tcd_pages_factor;
1149                 CFS_INIT_LIST_HEAD(&tcd->tcd_pages);
1150                 CFS_INIT_LIST_HEAD(&tcd->tcd_stock_pages);
1151                 CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
1152                 tcd->tcd_cur_pages = 0;
1153                 tcd->tcd_cur_stock_pages = 0;
1154                 tcd->tcd_cur_daemon_pages = 0;
1155                 tcd->tcd_max_pages = (max_pages * factor) / 100;
1156                 LASSERT(tcd->tcd_max_pages > 0);
1157                 tcd->tcd_shutting_down = 0;
1158         }
1159
1160         return 0;
1161 }
1162
1163 static void trace_cleanup_on_all_cpus(void)
1164 {
1165         struct cfs_trace_cpu_data *tcd;
1166         struct cfs_trace_page *tage;
1167         struct cfs_trace_page *tmp;
1168         int i, cpu;
1169
1170         cfs_for_each_possible_cpu(cpu) {
1171                 cfs_tcd_for_each_type_lock(tcd, i, cpu) {
1172                         tcd->tcd_shutting_down = 1;
1173
1174                         cfs_list_for_each_entry_safe_typed(tage, tmp,
1175                                                            &tcd->tcd_pages,
1176                                                            struct cfs_trace_page,
1177                                                            linkage) {
1178                                 __LASSERT_TAGE_INVARIANT(tage);
1179
1180                                 cfs_list_del(&tage->linkage);
1181                                 cfs_tage_free(tage);
1182                         }
1183
1184                         tcd->tcd_cur_pages = 0;
1185                 }
1186         }
1187 }
1188
1189 static void cfs_trace_cleanup(void)
1190 {
1191         struct page_collection pc;
1192
1193         CFS_INIT_LIST_HEAD(&pc.pc_pages);
1194         spin_lock_init(&pc.pc_lock);
1195
1196         trace_cleanup_on_all_cpus();
1197
1198         cfs_tracefile_fini_arch();
1199 }
1200
1201 void cfs_tracefile_exit(void)
1202 {
1203         cfs_trace_stop_thread();
1204         cfs_trace_cleanup();
1205 }