Whamcloud - gitweb
b=13284
[fs/lustre-release.git] / lnet / libcfs / tracefile.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2004 Cluster File Systems, Inc.
5  *   Author: Zach Brown <zab@clusterfs.com>
6  *   Author: Phil Schwan <phil@clusterfs.com>
7  *
8  *   This file is part of Lustre, http://www.lustre.org.
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24
25 #define DEBUG_SUBSYSTEM S_LNET
26 #define LUSTRE_TRACEFILE_PRIVATE
27 #include "tracefile.h"
28
29 #include <libcfs/kp30.h>
30 #include <libcfs/libcfs.h>
31
32 /* XXX move things up to the top, comment */
33 union trace_data_union (*trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned;
34
35 char tracefile[TRACEFILE_NAME_SIZE];
36 long long tracefile_size = TRACEFILE_SIZE;
37 static struct tracefiled_ctl trace_tctl;
38 struct semaphore trace_thread_sem;
39 static int thread_running = 0;
40
41 atomic_t tage_allocated = ATOMIC_INIT(0);
42
43 static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
44                                          struct trace_cpu_data *tcd);
45
46 static inline struct trace_page *tage_from_list(struct list_head *list)
47 {
48         return list_entry(list, struct trace_page, linkage);
49 }
50
51 static struct trace_page *tage_alloc(int gfp)
52 {
53         cfs_page_t        *page;
54         struct trace_page *tage;
55
56         /*
57          * Don't spam console with allocation failures: they will be reported
58          * by upper layer anyway.
59          */
60         gfp |= CFS_ALLOC_NOWARN;
61         page = cfs_alloc_page(gfp);
62         if (page == NULL)
63                 return NULL;
64
65         tage = cfs_alloc(sizeof(*tage), gfp);
66         if (tage == NULL) {
67                 cfs_free_page(page);
68                 return NULL;
69         }
70
71         tage->page = page;
72         atomic_inc(&tage_allocated);
73         return tage;
74 }
75
76 static void tage_free(struct trace_page *tage)
77 {
78         __LASSERT(tage != NULL);
79         __LASSERT(tage->page != NULL);
80
81         cfs_free_page(tage->page);
82         cfs_free(tage);
83         atomic_dec(&tage_allocated);
84 }
85
86 static void tage_to_tail(struct trace_page *tage, struct list_head *queue)
87 {
88         __LASSERT(tage != NULL);
89         __LASSERT(queue != NULL);
90
91         list_move_tail(&tage->linkage, queue);
92 }
93
94 int trace_refill_stock(struct trace_cpu_data *tcd, int gfp,
95                        struct list_head *stock)
96 {
97         int i;
98
99         /*
100          * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
101          * from here: this will lead to infinite recursion.
102          */
103
104         for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++ i) {
105                 struct trace_page *tage;
106
107                 tage = tage_alloc(gfp);
108                 if (tage == NULL)
109                         break;
110                 list_add_tail(&tage->linkage, stock);
111         }
112         return i;
113 }
114
115 /* return a page that has 'len' bytes left at the end */
116 static struct trace_page *trace_get_tage_try(struct trace_cpu_data *tcd,
117                                              unsigned long len)
118 {
119         struct trace_page *tage;
120
121         if (tcd->tcd_cur_pages > 0) {
122                 __LASSERT(!list_empty(&tcd->tcd_pages));
123                 tage = tage_from_list(tcd->tcd_pages.prev);
124                 if (tage->used + len <= CFS_PAGE_SIZE)
125                         return tage;
126         }
127
128         if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
129                 if (tcd->tcd_cur_stock_pages > 0) {
130                         tage = tage_from_list(tcd->tcd_stock_pages.prev);
131                         -- tcd->tcd_cur_stock_pages;
132                         list_del_init(&tage->linkage);
133                 } else {
134                         tage = tage_alloc(CFS_ALLOC_ATOMIC);
135                         if (tage == NULL) {
136                                 printk(KERN_WARNING
137                                        "failure to allocate a tage (%ld)\n",
138                                        tcd->tcd_cur_pages);
139                                 return NULL;
140                         }
141                 }
142
143                 tage->used = 0;
144                 tage->cpu = smp_processor_id();
145                 tage->type = tcd->tcd_type;
146                 list_add_tail(&tage->linkage, &tcd->tcd_pages);
147                 tcd->tcd_cur_pages++;
148
149                 if (tcd->tcd_cur_pages > 8 && thread_running) {
150                         struct tracefiled_ctl *tctl = &trace_tctl;
151                         /*
152                          * wake up tracefiled to process some pages.
153                          */
154                         cfs_waitq_signal(&tctl->tctl_waitq);
155                 }
156                 return tage;
157         }
158         return NULL;
159 }
160
161 static void tcd_shrink(struct trace_cpu_data *tcd)
162 {
163         int pgcount = tcd->tcd_cur_pages / 10;
164         struct page_collection pc;
165         struct trace_page *tage;
166         struct trace_page *tmp;
167
168         /*
169          * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
170          * from here: this will lead to infinite recursion.
171          */
172
173         printk(KERN_WARNING "debug daemon buffer overflowed; discarding"
174                " 10%% of pages (%d of %ld)\n", pgcount + 1, tcd->tcd_cur_pages);
175
176         CFS_INIT_LIST_HEAD(&pc.pc_pages);
177         spin_lock_init(&pc.pc_lock);
178
179         list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
180                 if (pgcount-- == 0)
181                         break;
182
183                 list_move_tail(&tage->linkage, &pc.pc_pages);
184                 tcd->tcd_cur_pages--;
185         }
186         put_pages_on_tcd_daemon_list(&pc, tcd);
187 }
188
189 /* return a page that has 'len' bytes left at the end */
190 static struct trace_page *trace_get_tage(struct trace_cpu_data *tcd,
191                                          unsigned long len)
192 {
193         struct trace_page *tage;
194
195         /*
196          * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
197          * from here: this will lead to infinite recursion.
198          */
199
200         if (len > CFS_PAGE_SIZE) {
201                 printk(KERN_ERR
202                        "cowardly refusing to write %lu bytes in a page\n", len);
203                 return NULL;
204         }
205
206         tage = trace_get_tage_try(tcd, len);
207         if (tage != NULL)
208                 return tage;
209         if (thread_running)
210                 tcd_shrink(tcd);
211         if (tcd->tcd_cur_pages > 0) {
212                 tage = tage_from_list(tcd->tcd_pages.next);
213                 tage->used = 0;
214                 tage_to_tail(tage, &tcd->tcd_pages);
215         }
216         return tage;
217 }
218
219 int libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls, int subsys, int mask,
220                        const char *file, const char *fn, const int line,
221                        const char *format1, va_list args,
222                        const char *format2, ...)                       
223 {
224         struct trace_cpu_data   *tcd = NULL;
225         struct ptldebug_header   header;
226         struct trace_page       *tage;
227         /* string_buf is used only if tcd != NULL, and is always set then */
228         char                    *string_buf = NULL;
229         char                    *debug_buf;
230         int                      known_size;
231         int                      needed = 85; /* average message length */
232         int                      max_nob;
233         va_list                  ap;
234         int                      depth;
235         int                      i;
236         int                      remain;
237
238         if (strchr(file, '/'))
239                 file = strrchr(file, '/') + 1;
240
241
242         set_ptldebug_header(&header, subsys, mask, line, CDEBUG_STACK());
243
244         tcd = trace_get_tcd();
245         if (tcd == NULL)                /* arch may not log in IRQ context */
246                 goto console;
247
248         if (tcd->tcd_shutting_down) {
249                 trace_put_tcd(tcd);
250                 tcd = NULL;
251                 goto console;
252         }
253
254         depth = __current_nesting_level();
255         known_size = strlen(file) + 1 + depth;
256         if (fn)
257                 known_size += strlen(fn) + 1;
258
259         if (libcfs_debug_binary)
260                 known_size += sizeof(header);
261
262         /*/
263          * '2' used because vsnprintf return real size required for output
264          * _without_ terminating NULL.
265          * if needed is to small for this format.
266          */
267         for (i=0;i<2;i++) {
268                 tage = trace_get_tage(tcd, needed + known_size + 1);
269                 if (tage == NULL) {
270                         if (needed + known_size > CFS_PAGE_SIZE)
271                                 mask |= D_ERROR;
272
273                         trace_put_tcd(tcd);
274                         tcd = NULL;
275                         goto console;
276                 }
277
278                 string_buf = (char *)cfs_page_address(tage->page)+tage->used+known_size;
279
280                 max_nob = CFS_PAGE_SIZE - tage->used - known_size;
281                 if (max_nob <= 0) {
282                         printk(KERN_EMERG "negative max_nob: %i\n", max_nob);
283                         mask |= D_ERROR;
284                         trace_put_tcd(tcd);
285                         tcd = NULL;
286                         goto console;
287                 }
288
289                 needed = 0;
290                 if (format1) {
291                         va_copy(ap, args);
292                         needed = vsnprintf(string_buf, max_nob, format1, ap);
293                         va_end(ap);
294                 }
295                 
296
297                 if (format2) {
298                         remain = max_nob - needed;
299                         if (remain < 0)
300                                 remain = 0;
301                 
302                         va_start(ap, format2);
303                         needed += vsnprintf(string_buf+needed, remain, format2, ap);
304                         va_end(ap);
305                 }
306
307                 if (needed < max_nob) /* well. printing ok.. */
308                         break;
309         }
310         
311         if (*(string_buf+needed-1) != '\n')
312                 printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
313                        file, line, fn);
314         
315         header.ph_len = known_size + needed;
316         debug_buf = (char *)cfs_page_address(tage->page) + tage->used;
317
318         if (libcfs_debug_binary) {
319                 memcpy(debug_buf, &header, sizeof(header));
320                 tage->used += sizeof(header);
321                 debug_buf += sizeof(header);
322         }
323
324         /* indent message according to the nesting level */
325         while (depth-- > 0) {
326                 *(debug_buf++) = '.';
327                 ++ tage->used;
328         }
329
330         strcpy(debug_buf, file);
331         tage->used += strlen(file) + 1;
332         debug_buf += strlen(file) + 1;
333
334         if (fn) {
335                 strcpy(debug_buf, fn);
336                 tage->used += strlen(fn) + 1;
337                 debug_buf += strlen(fn) + 1;
338         }
339
340         __LASSERT(debug_buf == string_buf);
341
342         tage->used += needed;
343         __LASSERT (tage->used <= CFS_PAGE_SIZE);
344
345 console:
346         if (!((mask & D_CANTMASK) != 0 || (mask & libcfs_printk) != 0)) {
347                 /* no console output requested */
348                 if (tcd != NULL)
349                         trace_put_tcd(tcd);
350                 return 1;
351         }
352
353         if (cdls != NULL) {
354                 if (libcfs_console_ratelimit &&
355                     cdls->cdls_next != 0 &&     /* not first time ever */
356                     !cfs_time_after(cfs_time_current(), cdls->cdls_next)) {
357                         /* skipping a console message */
358                         cdls->cdls_count++;
359                         if (tcd != NULL)
360                                 trace_put_tcd(tcd);
361                         return 1;
362                 }
363
364                 if (cfs_time_after(cfs_time_current(), cdls->cdls_next +
365                                                        libcfs_console_max_delay
366                                                        + cfs_time_seconds(10))) {
367                         /* last timeout was a long time ago */
368                         cdls->cdls_delay /= libcfs_console_backoff * 4;
369                 } else {
370                         cdls->cdls_delay *= libcfs_console_backoff;
371
372                         if (cdls->cdls_delay < libcfs_console_min_delay)
373                                 cdls->cdls_delay = libcfs_console_min_delay;
374                         else if (cdls->cdls_delay > libcfs_console_max_delay)
375                                 cdls->cdls_delay = libcfs_console_max_delay;
376                 }
377
378                 /* ensure cdls_next is never zero after it's been seen */
379                 cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1;
380         }
381
382         if (tcd != NULL) {
383                 print_to_console(&header, mask, string_buf, needed, file, fn);
384                 trace_put_tcd(tcd);
385         } else {
386                 string_buf = trace_get_console_buffer();
387
388                 needed = 0;
389                 if (format1 != NULL) {
390                         va_copy(ap, args);
391                         needed = vsnprintf(string_buf, TRACE_CONSOLE_BUFFER_SIZE, format1, ap);
392                         va_end(ap);
393                 }
394                 if (format2 != NULL) {
395                         remain = TRACE_CONSOLE_BUFFER_SIZE - needed;
396                         if (remain > 0) {
397                                 va_start(ap, format2);
398                                 needed += vsnprintf(string_buf+needed, remain, format2, ap);
399                                 va_end(ap);
400                         }
401                 }
402                 print_to_console(&header, mask,
403                                  string_buf, needed, file, fn);
404
405                 trace_put_console_buffer(string_buf);
406         }
407
408         if (cdls != NULL && cdls->cdls_count != 0) {
409                 string_buf = trace_get_console_buffer();
410
411                 needed = snprintf(string_buf, TRACE_CONSOLE_BUFFER_SIZE,
412                          "Skipped %d previous similar message%s\n",
413                          cdls->cdls_count, (cdls->cdls_count > 1) ? "s" : "");
414
415                 print_to_console(&header, mask,
416                                  string_buf, needed, file, fn);
417
418                 trace_put_console_buffer(string_buf);
419                 cdls->cdls_count = 0;
420         }
421
422         return 0;
423 }
424 EXPORT_SYMBOL(libcfs_debug_vmsg2);
425
426 void
427 libcfs_assertion_failed(const char *expr, const char *file,
428                         const char *func, const int line)
429 {
430         libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line,
431                          "ASSERTION(%s) failed\n", expr);
432         LBUG();
433 }
434 EXPORT_SYMBOL(libcfs_assertion_failed);
435
436 void
437 trace_assertion_failed(const char *str,
438                        const char *fn, const char *file, int line)
439 {
440         struct ptldebug_header hdr;
441
442         libcfs_panic_in_progress = 1;
443         libcfs_catastrophe = 1;
444         mb();
445
446         set_ptldebug_header(&hdr, DEBUG_SUBSYSTEM, D_EMERG, line,
447                             CDEBUG_STACK());
448
449         print_to_console(&hdr, D_EMERG, str, strlen(str), file, fn);
450
451         LIBCFS_PANIC("Lustre debug assertion failure\n");
452
453         /* not reached */
454 }
455
456 static void
457 panic_collect_pages(struct page_collection *pc)
458 {
459         /* Do the collect_pages job on a single CPU: assumes that all other
460          * CPUs have been stopped during a panic.  If this isn't true for some
461          * arch, this will have to be implemented separately in each arch.  */
462         int                    i;
463         int                    j;
464         struct trace_cpu_data *tcd;
465
466         CFS_INIT_LIST_HEAD(&pc->pc_pages);
467
468         tcd_for_each(tcd, i, j) {
469                 list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
470                 tcd->tcd_cur_pages = 0;
471
472                 if (pc->pc_want_daemon_pages) {
473                         list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages);
474                         tcd->tcd_cur_daemon_pages = 0;
475                 }
476         }
477 }
478
479 static void collect_pages_on_cpu(void *info)
480 {
481         struct trace_cpu_data *tcd;
482         struct page_collection *pc = info;
483         int i;
484
485         spin_lock(&pc->pc_lock);
486         tcd_for_each_type_lock(tcd, i) {
487                 list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
488                 tcd->tcd_cur_pages = 0;
489                 if (pc->pc_want_daemon_pages) {
490                         list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages);
491                         tcd->tcd_cur_daemon_pages = 0;
492                 }
493         }
494         spin_unlock(&pc->pc_lock);
495 }
496
497 static void collect_pages(struct page_collection *pc)
498 {
499         CFS_INIT_LIST_HEAD(&pc->pc_pages);
500
501         if (libcfs_panic_in_progress)
502                 panic_collect_pages(pc);
503         else
504                 trace_call_on_all_cpus(collect_pages_on_cpu, pc);
505 }
506
507 static void put_pages_back_on_cpu(void *info)
508 {
509         struct page_collection *pc = info;
510         struct trace_cpu_data *tcd;
511         struct list_head *cur_head;
512         struct trace_page *tage;
513         struct trace_page *tmp;
514         int i;
515
516         spin_lock(&pc->pc_lock);
517         tcd_for_each_type_lock(tcd, i) {
518                 cur_head = tcd->tcd_pages.next;
519
520                 list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
521
522                         __LASSERT_TAGE_INVARIANT(tage);
523
524                         if (tage->cpu != smp_processor_id() || tage->type != i)
525                                 continue;
526
527                         tage_to_tail(tage, cur_head);
528                         tcd->tcd_cur_pages++;
529                 }
530         }
531         spin_unlock(&pc->pc_lock);
532 }
533
534 static void put_pages_back(struct page_collection *pc)
535 {
536         if (!libcfs_panic_in_progress)
537                 trace_call_on_all_cpus(put_pages_back_on_cpu, pc);
538 }
539
540 /* Add pages to a per-cpu debug daemon ringbuffer.  This buffer makes sure that
541  * we have a good amount of data at all times for dumping during an LBUG, even
542  * if we have been steadily writing (and otherwise discarding) pages via the
543  * debug daemon. */
544 static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
545                                          struct trace_cpu_data *tcd)
546 {
547         struct trace_page *tage;
548         struct trace_page *tmp;
549
550         spin_lock(&pc->pc_lock);
551         list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
552
553                 __LASSERT_TAGE_INVARIANT(tage);
554
555                 if (tage->cpu != smp_processor_id() ||
556                     tage->type != tcd->tcd_type)
557                         continue;
558
559                 tage_to_tail(tage, &tcd->tcd_daemon_pages);
560                 tcd->tcd_cur_daemon_pages++;
561
562                 if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
563                         struct trace_page *victim;
564
565                         __LASSERT(!list_empty(&tcd->tcd_daemon_pages));
566                         victim = tage_from_list(tcd->tcd_daemon_pages.next);
567
568                         __LASSERT_TAGE_INVARIANT(victim);
569
570                         list_del(&victim->linkage);
571                         tage_free(victim);
572                         tcd->tcd_cur_daemon_pages--;
573                 }
574         }
575         spin_unlock(&pc->pc_lock);
576 }
577
578 static void put_pages_on_daemon_list_on_cpu(void *info)
579 {
580         struct trace_cpu_data *tcd;
581         int i;
582
583         tcd_for_each_type_lock(tcd, i)
584                 put_pages_on_tcd_daemon_list(info, tcd);
585 }
586
587 static void put_pages_on_daemon_list(struct page_collection *pc)
588 {
589         trace_call_on_all_cpus(put_pages_on_daemon_list_on_cpu, pc);
590 }
591
592 void trace_debug_print(void)
593 {
594         struct page_collection pc;
595         struct trace_page *tage;
596         struct trace_page *tmp;
597
598         spin_lock_init(&pc.pc_lock);
599
600         pc.pc_want_daemon_pages = 1;
601         collect_pages(&pc);
602         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
603                 char *p, *file, *fn;
604                 cfs_page_t *page;
605
606                 __LASSERT_TAGE_INVARIANT(tage);
607
608                 page = tage->page;
609                 p = cfs_page_address(page);
610                 while (p < ((char *)cfs_page_address(page) + tage->used)) {
611                         struct ptldebug_header *hdr;
612                         int len;
613                         hdr = (void *)p;
614                         p += sizeof(*hdr);
615                         file = p;
616                         p += strlen(file) + 1;
617                         fn = p;
618                         p += strlen(fn) + 1;
619                         len = hdr->ph_len - (p - (char *)hdr);
620
621                         print_to_console(hdr, D_EMERG, p, len, file, fn);
622
623                         p += len;
624                 }
625
626                 list_del(&tage->linkage);
627                 tage_free(tage);
628         }
629 }
630
631 int tracefile_dump_all_pages(char *filename)
632 {
633         struct page_collection pc;
634         cfs_file_t *filp;
635         struct trace_page *tage;
636         struct trace_page *tmp;
637         int rc;
638
639         CFS_DECL_MMSPACE;
640
641         tracefile_write_lock();
642
643         filp = cfs_filp_open(filename,
644                              O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600, &rc);
645         if (!filp) {
646                 if (rc != -EEXIST)
647                         printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
648                                filename, rc);
649                 goto out;
650         }
651
652         spin_lock_init(&pc.pc_lock);
653         pc.pc_want_daemon_pages = 1;
654         collect_pages(&pc);
655         if (list_empty(&pc.pc_pages)) {
656                 rc = 0;
657                 goto close;
658         }
659
660         /* ok, for now, just write the pages.  in the future we'll be building
661          * iobufs with the pages and calling generic_direct_IO */
662         CFS_MMSPACE_OPEN;
663         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
664
665                 __LASSERT_TAGE_INVARIANT(tage);
666
667                 rc = cfs_filp_write(filp, cfs_page_address(tage->page),
668                                     tage->used, cfs_filp_poff(filp));
669                 if (rc != (int)tage->used) {
670                         printk(KERN_WARNING "wanted to write %u but wrote "
671                                "%d\n", tage->used, rc);
672                         put_pages_back(&pc);
673                         __LASSERT(list_empty(&pc.pc_pages));
674                         break;
675                 }
676                 list_del(&tage->linkage);
677                 tage_free(tage);
678         }
679         CFS_MMSPACE_CLOSE;
680         rc = cfs_filp_fsync(filp);
681         if (rc)
682                 printk(KERN_ERR "sync returns %d\n", rc);
683  close:
684         cfs_filp_close(filp);
685  out:
686         tracefile_write_unlock();
687         return rc;
688 }
689
690 void trace_flush_pages(void)
691 {
692         struct page_collection pc;
693         struct trace_page *tage;
694         struct trace_page *tmp;
695
696         spin_lock_init(&pc.pc_lock);
697
698         pc.pc_want_daemon_pages = 1;
699         collect_pages(&pc);
700         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
701
702                 __LASSERT_TAGE_INVARIANT(tage);
703
704                 list_del(&tage->linkage);
705                 tage_free(tage);
706         }
707 }
708
709 int trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
710                         const char *usr_buffer, int usr_buffer_nob)
711 {
712         int    nob;
713         
714         if (usr_buffer_nob > knl_buffer_nob)
715                 return -EOVERFLOW;
716         
717         if (copy_from_user((void *)knl_buffer, 
718                            (void *)usr_buffer, usr_buffer_nob))
719                 return -EFAULT;
720
721         nob = strnlen(knl_buffer, usr_buffer_nob);
722         while (nob-- >= 0)                      /* strip trailing whitespace */
723                 if (!isspace(knl_buffer[nob]))
724                         break;
725
726         if (nob < 0)                            /* empty string */
727                 return -EINVAL;
728
729         if (nob == knl_buffer_nob)              /* no space to terminate */
730                 return -EOVERFLOW;
731
732         knl_buffer[nob + 1] = 0;                /* terminate */
733         return 0;
734 }
735
736 int trace_copyout_string(char *usr_buffer, int usr_buffer_nob,
737                          const char *knl_buffer, char *append)
738 {
739         /* NB if 'append' != NULL, it's a single character to append to the
740          * copied out string - usually "\n", for /proc entries and "" (i.e. a
741          * terminating zero byte) for sysctl entries */
742         int   nob = strlen(knl_buffer);
743         
744         if (nob > usr_buffer_nob)
745                 nob = usr_buffer_nob;
746         
747         if (copy_to_user(usr_buffer, knl_buffer, nob))
748                 return -EFAULT;
749         
750         if (append != NULL && nob < usr_buffer_nob) {
751                 if (copy_to_user(usr_buffer + nob, append, 1))
752                         return -EFAULT;
753                 
754                 nob++;
755         }
756
757         return nob;
758 }
759
760 int trace_allocate_string_buffer(char **str, int nob)
761 {
762         if (nob > 2 * CFS_PAGE_SIZE)            /* string must be "sensible" */
763                 return -EINVAL;
764         
765         *str = cfs_alloc(nob, CFS_ALLOC_STD | CFS_ALLOC_ZERO);
766         if (*str == NULL)
767                 return -ENOMEM;
768
769         return 0;
770 }
771
772 void trace_free_string_buffer(char *str, int nob)
773 {
774         cfs_free(str);
775 }
776
777 int trace_dump_debug_buffer_usrstr(void *usr_str, int usr_str_nob)
778 {
779         char         *str;
780         int           rc;
781
782         rc = trace_allocate_string_buffer(&str, usr_str_nob + 1);
783         if (rc != 0)
784                 return rc;
785
786         rc = trace_copyin_string(str, usr_str_nob + 1,
787                                  usr_str, usr_str_nob);
788         if (rc != 0)
789                 goto out;
790
791 #if !defined(__WINNT__)
792         if (str[0] != '/') {
793                 rc = -EINVAL;
794                 goto out;
795         }
796 #endif
797         rc = tracefile_dump_all_pages(str);
798 out:
799         trace_free_string_buffer(str, usr_str_nob + 1);
800         return rc;
801 }
802
803 int trace_daemon_command(char *str)
804 {
805         int       rc = 0;
806         
807         tracefile_write_lock();
808
809         if (strcmp(str, "stop") == 0) {
810                 trace_stop_thread();
811                 memset(tracefile, 0, sizeof(tracefile));
812
813         } else if (strncmp(str, "size=", 5) == 0) {
814                 tracefile_size = simple_strtoul(str + 5, NULL, 0);
815                 if (tracefile_size < 10 || tracefile_size > 20480)
816                         tracefile_size = TRACEFILE_SIZE;
817                 else
818                         tracefile_size <<= 20;
819
820         } else if (strlen(str) >= sizeof(tracefile)) {
821                 rc = -ENAMETOOLONG;
822 #ifndef __WINNT__
823         } else if (str[0] != '/') {
824                 rc = -EINVAL;
825 #endif
826         } else {
827                 strcpy(tracefile, str);
828
829                 printk(KERN_INFO "Lustre: debug daemon will attempt to start writing "
830                        "to %s (%lukB max)\n", tracefile,
831                        (long)(tracefile_size >> 10));
832
833                 trace_start_thread();
834         }
835
836         tracefile_write_unlock();
837         return rc;
838 }
839
840 int trace_daemon_command_usrstr(void *usr_str, int usr_str_nob)
841 {
842         char *str;
843         int   rc;
844
845         rc = trace_allocate_string_buffer(&str, usr_str_nob + 1);
846         if (rc != 0)
847                 return rc;
848
849         rc = trace_copyin_string(str, usr_str_nob + 1,
850                                  usr_str, usr_str_nob);
851         if (rc == 0)
852                 rc = trace_daemon_command(str);
853
854         trace_free_string_buffer(str, usr_str_nob + 1);
855         return rc;
856 }
857
858 int trace_set_debug_mb(int mb)
859 {
860         int i;
861         int j;
862         int pages;
863         int limit = trace_max_debug_mb();
864         struct trace_cpu_data *tcd;
865         
866         if (mb < num_possible_cpus())
867                 return -EINVAL;
868
869         if (mb > limit) {
870                 printk(KERN_ERR "Lustre: Refusing to set debug buffer size to "
871                        "%dMB - limit is %d\n", mb, limit);
872                 return -EINVAL;
873         }
874
875         mb /= num_possible_cpus();
876         pages = mb << (20 - CFS_PAGE_SHIFT);
877
878         tracefile_write_lock();
879
880         tcd_for_each(tcd, i, j)
881                 tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
882
883         tracefile_write_unlock();
884
885         return 0;
886 }
887
888 int trace_set_debug_mb_usrstr(void *usr_str, int usr_str_nob)
889 {
890         char     str[32];
891         int      rc;
892
893         rc = trace_copyin_string(str, sizeof(str), usr_str, usr_str_nob);
894         if (rc < 0)
895                 return rc;
896
897         return trace_set_debug_mb(simple_strtoul(str, NULL, 0));
898 }
899
900 int trace_get_debug_mb(void)
901 {
902         int i;
903         int j;
904         struct trace_cpu_data *tcd;
905         int total_pages = 0;
906         
907         tracefile_read_lock();
908
909         tcd_for_each(tcd, i, j)
910                 total_pages += tcd->tcd_max_pages;
911
912         tracefile_read_unlock();
913
914         return (total_pages >> (20 - CFS_PAGE_SHIFT)) + 1;
915 }
916
917 static int tracefiled(void *arg)
918 {
919         struct page_collection pc;
920         struct tracefiled_ctl *tctl = arg;
921         struct trace_page *tage;
922         struct trace_page *tmp;
923         struct ptldebug_header *hdr;
924         cfs_file_t *filp;
925         int rc;
926
927         CFS_DECL_MMSPACE;
928
929         /* we're started late enough that we pick up init's fs context */
930         /* this is so broken in uml?  what on earth is going on? */
931         cfs_daemonize("ktracefiled");
932
933         spin_lock_init(&pc.pc_lock);
934         complete(&tctl->tctl_start);
935
936         while (1) {
937                 cfs_waitlink_t __wait;
938
939                 cfs_waitlink_init(&__wait);
940                 cfs_waitq_add(&tctl->tctl_waitq, &__wait);
941                 set_current_state(TASK_INTERRUPTIBLE);
942                 cfs_waitq_timedwait(&__wait, CFS_TASK_INTERRUPTIBLE,
943                                     cfs_time_seconds(1));
944                 cfs_waitq_del(&tctl->tctl_waitq, &__wait);
945
946                 if (atomic_read(&tctl->tctl_shutdown))
947                         break;
948
949                 pc.pc_want_daemon_pages = 0;
950                 collect_pages(&pc);
951                 if (list_empty(&pc.pc_pages))
952                         continue;
953
954                 filp = NULL;
955                 tracefile_read_lock();
956                 if (tracefile[0] != 0) {
957                         filp = cfs_filp_open(tracefile,
958                                              O_CREAT | O_RDWR | O_LARGEFILE,
959                                              0600, &rc);
960                         if (!(filp))
961                                 printk(KERN_WARNING "couldn't open %s: %d\n",
962                                        tracefile, rc);
963                 }
964                 tracefile_read_unlock();
965                 if (filp == NULL) {
966                         put_pages_on_daemon_list(&pc);
967                         __LASSERT(list_empty(&pc.pc_pages));
968                         continue;
969                 }
970
971                 CFS_MMSPACE_OPEN;
972
973                 /* mark the first header, so we can sort in chunks */
974                 tage = tage_from_list(pc.pc_pages.next);
975                 __LASSERT_TAGE_INVARIANT(tage);
976
977                 hdr = cfs_page_address(tage->page);
978                 hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
979
980                 list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
981                         static loff_t f_pos;
982
983                         __LASSERT_TAGE_INVARIANT(tage);
984
985                         if (f_pos >= (off_t)tracefile_size)
986                                 f_pos = 0;
987                         else if (f_pos > cfs_filp_size(filp))
988                                 f_pos = cfs_filp_size(filp);
989
990                         rc = cfs_filp_write(filp, cfs_page_address(tage->page),
991                                             tage->used, &f_pos);
992                         if (rc != (int)tage->used) {
993                                 printk(KERN_WARNING "wanted to write %u but "
994                                        "wrote %d\n", tage->used, rc);
995                                 put_pages_back(&pc);
996                                 __LASSERT(list_empty(&pc.pc_pages));
997                         }
998                 }
999                 CFS_MMSPACE_CLOSE;
1000
1001                 cfs_filp_close(filp);
1002                 put_pages_on_daemon_list(&pc);
1003                 __LASSERT(list_empty(&pc.pc_pages));
1004         }
1005         complete(&tctl->tctl_stop);
1006         return 0;
1007 }
1008
1009 int trace_start_thread(void)
1010 {
1011         struct tracefiled_ctl *tctl = &trace_tctl;
1012         int rc = 0;
1013
1014         mutex_down(&trace_thread_sem);
1015         if (thread_running)
1016                 goto out;
1017
1018         init_completion(&tctl->tctl_start);
1019         init_completion(&tctl->tctl_stop);
1020         cfs_waitq_init(&tctl->tctl_waitq);
1021         atomic_set(&tctl->tctl_shutdown, 0);
1022
1023         if (cfs_kernel_thread(tracefiled, tctl, 0) < 0) {
1024                 rc = -ECHILD;
1025                 goto out;
1026         }
1027
1028         wait_for_completion(&tctl->tctl_start);
1029         thread_running = 1;
1030 out:
1031         mutex_up(&trace_thread_sem);
1032         return rc;
1033 }
1034
1035 void trace_stop_thread(void)
1036 {
1037         struct tracefiled_ctl *tctl = &trace_tctl;
1038
1039         mutex_down(&trace_thread_sem);
1040         if (thread_running) {
1041                 printk(KERN_INFO "Lustre: shutting down debug daemon thread...\n");
1042                 atomic_set(&tctl->tctl_shutdown, 1);
1043                 wait_for_completion(&tctl->tctl_stop);
1044                 thread_running = 0;
1045         }
1046         mutex_up(&trace_thread_sem);
1047 }
1048
1049 int tracefile_init(void)
1050 {
1051         struct trace_cpu_data *tcd;
1052         int                    i;
1053         int                    j;
1054         int                    rc;
1055         int                    factor;
1056
1057         rc = tracefile_init_arch();
1058         if (rc != 0)
1059                 return rc;
1060
1061         tcd_for_each(tcd, i, j) {
1062                 /* tcd_pages_factor is initialized int tracefile_init_arch. */
1063                 factor = tcd->tcd_pages_factor;
1064                 CFS_INIT_LIST_HEAD(&tcd->tcd_pages);
1065                 CFS_INIT_LIST_HEAD(&tcd->tcd_stock_pages);
1066                 CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
1067                 tcd->tcd_cur_pages = 0;
1068                 tcd->tcd_cur_stock_pages = 0;
1069                 tcd->tcd_cur_daemon_pages = 0;
1070                 tcd->tcd_max_pages = (TCD_MAX_PAGES * factor) / 100;
1071                 LASSERT(tcd->tcd_max_pages > 0);
1072                 tcd->tcd_shutting_down = 0;
1073         }
1074
1075         return 0;
1076 }
1077
1078 static void trace_cleanup_on_cpu(void *info)
1079 {
1080         struct trace_cpu_data *tcd;
1081         struct trace_page *tage;
1082         struct trace_page *tmp;
1083         int i;
1084
1085         tcd_for_each_type_lock(tcd, i) {
1086                 tcd->tcd_shutting_down = 1;
1087
1088                 list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
1089                         __LASSERT_TAGE_INVARIANT(tage);
1090
1091                         list_del(&tage->linkage);
1092                         tage_free(tage);
1093                 }
1094                 tcd->tcd_cur_pages = 0;
1095         }
1096 }
1097
1098 static void trace_cleanup(void)
1099 {
1100         struct page_collection pc;
1101
1102         CFS_INIT_LIST_HEAD(&pc.pc_pages);
1103         spin_lock_init(&pc.pc_lock);
1104
1105         trace_call_on_all_cpus(trace_cleanup_on_cpu, &pc);
1106
1107         tracefile_fini_arch();
1108 }
1109
1110 void tracefile_exit(void)
1111 {
1112         trace_stop_thread();
1113         trace_cleanup();
1114 }