Whamcloud - gitweb
b=5514
[fs/lustre-release.git] / lnet / libcfs / tracefile.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2004 Cluster File Systems, Inc.
5  *   Author: Zach Brown <zab@clusterfs.com>
6  *   Author: Phil Schwan <phil@clusterfs.com>
7  *
8  *   This file is part of Lustre, http://www.lustre.org.
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24 #include <linux/kernel.h>
25 #include <linux/module.h>
26 #include <linux/init.h>
27 #include <linux/rwsem.h>
28 #include <linux/proc_fs.h>
29 #include <linux/file.h>
30 #include <linux/smp.h>
31 #include <linux/ctype.h>
32 #include <asm/uaccess.h>
33 #ifdef HAVE_MM_INLINE
34 #include <linux/mm_inline.h>
35 #endif
36
37 #define DEBUG_SUBSYSTEM S_PORTALS
38
39 #include <linux/kp30.h>
40 #include <linux/portals_compat25.h>
41 #include <linux/libcfs.h>
42
43 #define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
44
45 /* XXX move things up to the top, comment */
46
47 static union {
48         struct trace_cpu_data {
49                 struct list_head        tcd_pages;
50                 unsigned long           tcd_cur_pages;
51
52                 struct list_head        tcd_daemon_pages;
53                 unsigned long           tcd_cur_daemon_pages;
54
55                 unsigned long           tcd_max_pages;
56                 int                     tcd_shutting_down;
57         } tcd;
58         char __pad[SMP_CACHE_BYTES];
59 } trace_data[NR_CPUS] __cacheline_aligned;
60
61 struct page_collection {
62         struct list_head        pc_pages;
63         spinlock_t              pc_lock;
64         int                     pc_want_daemon_pages;
65 };
66
67 struct tracefiled_ctl {
68         struct completion        tctl_start;
69         struct completion        tctl_stop;
70         wait_queue_head_t        tctl_waitq;
71         pid_t                    tctl_pid;
72         atomic_t                 tctl_shutdown;
73 };
74
75 #define TRACEFILE_SIZE (500 << 20)
76 static DECLARE_RWSEM(tracefile_sem);
77 static char *tracefile = NULL;
78 static long long tracefile_size = TRACEFILE_SIZE;
79 static struct tracefiled_ctl trace_tctl;
80 static DECLARE_MUTEX(trace_thread_sem);
81 static int thread_running = 0;
82
83 #ifndef get_cpu
84 #define get_cpu() smp_processor_id()
85 #define put_cpu() do { } while (0)
86 #endif
87
88 #define trace_get_tcd(FLAGS) ({                 \
89         struct trace_cpu_data *__ret;           \
90         int __cpu = get_cpu();                  \
91         local_irq_save(FLAGS);                  \
92         __ret = &trace_data[__cpu].tcd;         \
93         __ret;                                  \
94 })
95
96 #define trace_put_tcd(TCD, FLAGS) do {          \
97         local_irq_restore(FLAGS);               \
98         put_cpu();                              \
99 } while (0)
100
101 static void put_pages_on_daemon_list_on_cpu(void *info);
102
103 /* return a page that has 'len' bytes left at the end */
104 static struct page *trace_get_page(struct trace_cpu_data *tcd,
105                                    unsigned long len)
106 {
107         struct page *page = NULL;
108
109         if (len > PAGE_SIZE) {
110                 printk(KERN_ERR "cowardly refusing to write %lu bytes in a "
111                        "page\n", len);
112                 return NULL;
113         }
114
115         if (!list_empty(&tcd->tcd_pages)) {
116                 page = list_entry(tcd->tcd_pages.prev, struct page,
117                                   PAGE_LIST_ENTRY);
118                 if (page->index + len <= PAGE_SIZE)
119                         return page;
120         }
121
122         if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
123                 page = alloc_page(GFP_ATOMIC);
124                 if (page == NULL) {
125                         /* the kernel should print a message for us.  fall back
126                          * to using the last page in the ring buffer. */
127                         goto ring_buffer;
128                 }
129                 page->index = 0;
130                 page->mapping = (void *)(long)smp_processor_id();
131                 list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages);
132                 tcd->tcd_cur_pages++;
133
134                 if (tcd->tcd_cur_pages > 8 && thread_running) {
135                         struct tracefiled_ctl *tctl = &trace_tctl;
136                         wake_up(&tctl->tctl_waitq);
137                 }
138                 return page;
139         }
140
141  ring_buffer:
142         if (thread_running) {
143                 int pgcount = tcd->tcd_cur_pages / 10;
144                 struct page_collection pc;
145                 struct list_head *pos, *tmp;
146                 printk(KERN_WARNING "debug daemon buffer overflowed; discarding"
147                        " 10%% of pages (%d)\n", pgcount + 1);
148
149                 INIT_LIST_HEAD(&pc.pc_pages);
150                 spin_lock_init(&pc.pc_lock);
151
152                 list_for_each_safe(pos, tmp, &tcd->tcd_pages) {
153                         struct page *page;
154
155                         if (pgcount-- == 0)
156                                 break;
157
158                         page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
159                         list_del(&PAGE_LIST(page));
160                         list_add_tail(&PAGE_LIST(page), &pc.pc_pages);
161                         tcd->tcd_cur_pages--;
162                 }
163                 put_pages_on_daemon_list_on_cpu(&pc);
164         }
165         LASSERT(!list_empty(&tcd->tcd_pages));
166
167         page = list_entry(tcd->tcd_pages.next, struct page, PAGE_LIST_ENTRY);
168         page->index = 0;
169
170         list_del(&PAGE_LIST(page));
171         list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages);
172         return page;
173 }
174
175 static void print_to_console(struct ptldebug_header *hdr, int mask, char *buf,
176                              int len, char *file, const char *fn)
177 {
178         char *prefix = NULL, *ptype = NULL;
179
180         if ((mask & D_EMERG) != 0) {
181                 prefix = "LustreError";
182                 ptype = KERN_EMERG;
183         } else if ((mask & D_ERROR) != 0) {
184                 prefix = "LustreError";
185                 ptype = KERN_ERR;
186         } else if ((mask & D_WARNING) != 0) {
187                 prefix = "Lustre";
188                 ptype = KERN_WARNING;
189         } else if (portal_printk) {
190                 prefix = "Lustre";
191                 ptype = KERN_INFO;
192         }
193
194         printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid,
195                hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf);
196 }
197
198 void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
199                        const int line, unsigned long stack, char *format, ...)
200 {
201         struct trace_cpu_data *tcd;
202         struct ptldebug_header header;
203         struct page *page;
204         char *debug_buf = format;
205         int known_size, needed = 85 /* average message length */, max_nob;
206         va_list       ap;
207         unsigned long flags;
208         struct timeval tv;
209
210         if (strchr(file, '/'))
211                 file = strrchr(file, '/') + 1;
212
213         if (*(format + strlen(format) - 1) != '\n')
214                 printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
215                        file, line, fn);
216
217         tcd = trace_get_tcd(flags);
218         if (tcd->tcd_shutting_down)
219                 goto out;
220
221         do_gettimeofday(&tv);
222
223         header.ph_subsys = subsys;
224         header.ph_mask = mask;
225         header.ph_cpu_id = smp_processor_id();
226         header.ph_sec = (__u32)tv.tv_sec;
227         header.ph_usec = tv.tv_usec;
228         header.ph_stack = stack;
229         header.ph_pid = current->pid;
230         header.ph_line_num = line;
231
232 #if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
233         header.ph_extern_pid = current->thread.extern_pid;
234 #elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
235         header.ph_extern_pid = current->thread.mode.tt.extern_pid;
236 #else
237         header.ph_extern_pid = 0;
238 #endif
239
240         known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls
241
242  retry:
243         page = trace_get_page(tcd, needed + known_size);
244         if (page == NULL) {
245                 debug_buf = format;
246                 if (needed + known_size > PAGE_SIZE)
247                         mask |= D_ERROR;
248                 needed = strlen(format);
249                 goto out;
250         }
251
252         debug_buf = page_address(page) + page->index + known_size;
253
254         max_nob = PAGE_SIZE - page->index - known_size;
255         LASSERT(max_nob > 0);
256         va_start(ap, format);
257         needed = vsnprintf(debug_buf, max_nob, format, ap);
258         va_end(ap);
259
260         if (needed > max_nob) /* overflow.  oh poop. */
261                 goto retry;
262
263         header.ph_len = known_size + needed;
264         debug_buf = page_address(page) + page->index;
265
266         memcpy(debug_buf, &header, sizeof(header));
267         page->index += sizeof(header);
268         debug_buf += sizeof(header);
269
270         strcpy(debug_buf, file);
271         page->index += strlen(file) + 1;
272         debug_buf += strlen(file) + 1;
273
274         strcpy(debug_buf, fn);
275         page->index += strlen(fn) + 1;
276         debug_buf += strlen(fn) + 1;
277
278         page->index += needed;
279         if (page->index > PAGE_SIZE)
280                 printk(KERN_EMERG "page->index == %lu in portals_debug_msg\n",
281                        page->index);
282
283  out:
284         if ((mask & (D_EMERG | D_ERROR | D_WARNING)) || portal_printk)
285                 print_to_console(&header, mask, debug_buf, needed, file, fn);
286
287         trace_put_tcd(tcd, flags);
288 }
289 EXPORT_SYMBOL(portals_debug_msg);
290
291 static void collect_pages_on_cpu(void *info)
292 {
293         struct trace_cpu_data *tcd;
294         unsigned long flags;
295         struct page_collection *pc = info;
296
297         tcd = trace_get_tcd(flags);
298
299         spin_lock(&pc->pc_lock);
300         list_splice(&tcd->tcd_pages, &pc->pc_pages);
301         INIT_LIST_HEAD(&tcd->tcd_pages);
302         tcd->tcd_cur_pages = 0;
303         if (pc->pc_want_daemon_pages) {
304                 list_splice(&tcd->tcd_daemon_pages, &pc->pc_pages);
305                 INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
306                 tcd->tcd_cur_daemon_pages = 0;
307         }
308         spin_unlock(&pc->pc_lock);
309
310         trace_put_tcd(tcd, flags);
311 }
312
313 static void collect_pages(struct page_collection *pc)
314 {
315         /* needs to be fixed up for preempt */
316         INIT_LIST_HEAD(&pc->pc_pages);
317         collect_pages_on_cpu(pc);
318         smp_call_function(collect_pages_on_cpu, pc, 0, 1);
319 }
320
321 static void put_pages_back_on_cpu(void *info)
322 {
323         struct page_collection *pc = info;
324         struct trace_cpu_data *tcd;
325         struct list_head *pos, *tmp, *cur_head;
326         unsigned long flags;
327
328         tcd = trace_get_tcd(flags);
329
330         cur_head = tcd->tcd_pages.next;
331
332         spin_lock(&pc->pc_lock);
333         list_for_each_safe(pos, tmp, &pc->pc_pages) {
334                 struct page *page;
335
336                 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
337                 LASSERT(page->index <= PAGE_SIZE);
338                 LASSERT(page_count(page) > 0);
339
340                 if ((unsigned long)page->mapping != smp_processor_id())
341                         continue;
342
343                 list_del(&PAGE_LIST(page));
344                 list_add_tail(&PAGE_LIST(page), cur_head);
345                 tcd->tcd_cur_pages++;
346         }
347         spin_unlock(&pc->pc_lock);
348
349         trace_put_tcd(tcd, flags);
350 }
351
352 static void put_pages_back(struct page_collection *pc)
353 {
354         /* needs to be fixed up for preempt */
355         put_pages_back_on_cpu(pc);
356         smp_call_function(put_pages_back_on_cpu, pc, 0, 1);
357 }
358
359 /* Add pages to a per-cpu debug daemon ringbuffer.  This buffer makes sure that
360  * we have a good amount of data at all times for dumping during an LBUG, even
361  * if we have been steadily writing (and otherwise discarding) pages via the
362  * debug daemon. */
363 static void put_pages_on_daemon_list_on_cpu(void *info)
364 {
365         struct page_collection *pc = info;
366         struct trace_cpu_data *tcd;
367         struct list_head *pos, *tmp;
368         unsigned long flags;
369
370         tcd = trace_get_tcd(flags);
371
372         spin_lock(&pc->pc_lock);
373         list_for_each_safe(pos, tmp, &pc->pc_pages) {
374                 struct page *page;
375
376                 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
377                 LASSERT(page->index <= PAGE_SIZE);
378                 LASSERT(page_count(page) > 0);
379                 if ((unsigned long)page->mapping != smp_processor_id())
380                         continue;
381
382                 list_del(&PAGE_LIST(page));
383                 list_add_tail(&PAGE_LIST(page), &tcd->tcd_daemon_pages);
384                 tcd->tcd_cur_daemon_pages++;
385
386                 if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
387                         LASSERT(!list_empty(&tcd->tcd_daemon_pages));
388                         page = list_entry(tcd->tcd_daemon_pages.next,
389                                           struct page, PAGE_LIST_ENTRY);
390
391                         LASSERT(page->index <= PAGE_SIZE);
392                         LASSERT(page_count(page) > 0);
393
394                         page->index = 0;
395                         list_del(&PAGE_LIST(page));
396                         page->mapping = NULL;
397                         __free_page(page);
398                         tcd->tcd_cur_daemon_pages--;
399                 }
400         }
401         spin_unlock(&pc->pc_lock);
402
403         trace_put_tcd(tcd, flags);
404 }
405
406 static void put_pages_on_daemon_list(struct page_collection *pc)
407 {
408         put_pages_on_daemon_list_on_cpu(pc);
409         smp_call_function(put_pages_on_daemon_list_on_cpu, pc, 0, 1);
410 }
411
412 void trace_debug_print(void)
413 {
414         struct page_collection pc;
415         struct list_head *pos, *tmp;
416
417         spin_lock_init(&pc.pc_lock);
418
419         collect_pages(&pc);
420         list_for_each_safe(pos, tmp, &pc.pc_pages) {
421                 struct page *page;
422                 char *p, *file, *fn;
423
424                 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
425                 LASSERT(page->index <= PAGE_SIZE);
426                 LASSERT(page_count(page) > 0);
427
428                 p = page_address(page);
429                 while (p < ((char *)page_address(page) + PAGE_SIZE)) {
430                         struct ptldebug_header *hdr;
431                         int len;
432                         hdr = (void *)p;
433                         p += sizeof(*hdr);
434                         file = p;
435                         p += strlen(file) + 1;
436                         fn = p;
437                         p += strlen(fn) + 1;
438                         len = hdr->ph_len - (p - (char *)hdr);
439
440                         print_to_console(hdr, D_EMERG, p, len, file, fn);
441                 }
442
443                 list_del(&PAGE_LIST(page));
444                 page->mapping = NULL;
445                 __free_page(page);
446         }
447 }
448
449 int tracefile_dump_all_pages(char *filename)
450 {
451         struct page_collection pc;
452         struct file *filp;
453         struct list_head *pos, *tmp;
454         mm_segment_t oldfs;
455         int rc;
456
457         down_write(&tracefile_sem);
458
459         filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600);
460         if (IS_ERR(filp)) {
461                 rc = PTR_ERR(filp);
462                 printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
463                        filename, rc);
464                 goto out;
465         }
466
467         spin_lock_init(&pc.pc_lock);
468         pc.pc_want_daemon_pages = 1;
469         collect_pages(&pc);
470         if (list_empty(&pc.pc_pages)) {
471                 rc = 0;
472                 goto close;
473         }
474
475         /* ok, for now, just write the pages.  in the future we'll be building
476          * iobufs with the pages and calling generic_direct_IO */
477         oldfs = get_fs();
478         set_fs(get_ds());
479         list_for_each_safe(pos, tmp, &pc.pc_pages) {
480                 struct page *page;
481
482                 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
483                 LASSERT(page->index <= PAGE_SIZE);
484                 LASSERT(page_count(page) > 0);
485
486                 rc = filp->f_op->write(filp, page_address(page), page->index,
487                                        &filp->f_pos);
488                 if (rc != page->index) {
489                         printk(KERN_WARNING "wanted to write %lu but wrote "
490                                "%d\n", page->index, rc);
491                         put_pages_back(&pc);
492                         break;
493                 }
494                 list_del(&PAGE_LIST(page));
495                 page->mapping = NULL;
496                 __free_page(page);
497         }
498         set_fs(oldfs);
499         rc = filp->f_op->fsync(filp, filp->f_dentry, 1);
500         if (rc)
501                 printk(KERN_ERR "sync returns %d\n", rc);
502  close:
503         filp_close(filp, 0);
504  out:
505         up_write(&tracefile_sem);
506         return rc;
507 }
508
509 void trace_flush_pages(void)
510 {
511         struct page_collection pc;
512         struct list_head *pos, *tmp;
513
514         spin_lock_init(&pc.pc_lock);
515
516         collect_pages(&pc);
517         list_for_each_safe(pos, tmp, &pc.pc_pages) {
518                 struct page *page;
519
520                 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
521                 LASSERT(page->index <= PAGE_SIZE);
522                 LASSERT(page_count(page) > 0);
523
524                 list_del(&PAGE_LIST(page));
525                 page->mapping = NULL;
526                 __free_page(page);
527         }
528 }
529
530 int trace_dk(struct file *file, const char *buffer, unsigned long count,
531              void *data)
532 {
533         char *name;
534         unsigned long off;
535         int rc;
536
537         name = kmalloc(count + 1, GFP_KERNEL);
538         if (name == NULL)
539                 return -ENOMEM;
540
541         if (copy_from_user(name, buffer, count)) {
542                 rc = -EFAULT;
543                 goto out;
544         }
545
546         if (name[0] != '/') {
547                 rc = -EINVAL;
548                 goto out;
549         }
550
551         /* be nice and strip out trailing '\n' */
552         for (off = count ; off > 2 && isspace(name[off - 1]); off--)
553                 ;
554
555         name[off] = '\0';
556         rc = tracefile_dump_all_pages(name);
557 out:
558         if (name)
559                 kfree(name);
560         return count;
561 }
562 EXPORT_SYMBOL(trace_dk);
563
564 static int tracefiled(void *arg)
565 {
566         struct page_collection pc;
567         struct tracefiled_ctl *tctl = arg;
568         struct list_head *pos, *tmp;
569         struct ptldebug_header *hdr;
570         struct file *filp;
571         struct page *page;
572         mm_segment_t oldfs;
573         int rc;
574
575         /* we're started late enough that we pick up init's fs context */
576         /* this is so broken in uml?  what on earth is going on? */
577         kportal_daemonize("ktracefiled");
578         reparent_to_init();
579
580         spin_lock_init(&pc.pc_lock);
581         complete(&tctl->tctl_start);
582
583         while (1) {
584                 wait_queue_t __wait;
585
586                 init_waitqueue_entry(&__wait, current);
587                 add_wait_queue(&tctl->tctl_waitq, &__wait);
588                 set_current_state(TASK_INTERRUPTIBLE);
589                 schedule_timeout(HZ);
590                 remove_wait_queue(&tctl->tctl_waitq, &__wait);
591
592                 if (atomic_read(&tctl->tctl_shutdown))
593                         break;
594
595                 pc.pc_want_daemon_pages = 0;
596                 collect_pages(&pc);
597                 if (list_empty(&pc.pc_pages))
598                         continue;
599
600                 filp = NULL;
601                 down_read(&tracefile_sem);
602                 if (tracefile != NULL) {
603                         filp = filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE,
604                                          0600);
605                         if (IS_ERR(filp)) {
606                                 printk("couldn't open %s: %ld\n", tracefile,
607                                        PTR_ERR(filp));
608                                 filp = NULL;
609                         }
610                 }
611                 up_read(&tracefile_sem);
612                 if (filp == NULL) {
613                         put_pages_on_daemon_list(&pc);
614                         continue;
615                 }
616
617                 oldfs = get_fs();
618                 set_fs(get_ds());
619
620                 /* mark the first header, so we can sort in chunks */
621                 page = list_entry(pc.pc_pages.next, struct page,
622                                   PAGE_LIST_ENTRY);
623                 LASSERT(page->index <= PAGE_SIZE);
624                 LASSERT(page_count(page) > 0);
625
626                 hdr = page_address(page);
627                 hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
628
629                 list_for_each_safe(pos, tmp, &pc.pc_pages) {
630                         static loff_t f_pos;
631                         page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
632                         LASSERT(page->index <= PAGE_SIZE);
633                         LASSERT(page_count(page) > 0);
634
635                         if (f_pos >= tracefile_size)
636                                 f_pos = 0;
637                         else if (f_pos > filp->f_dentry->d_inode->i_size)
638                                 f_pos = filp->f_dentry->d_inode->i_size;
639
640                         rc = filp->f_op->write(filp, page_address(page),
641                                                page->index, &f_pos);
642                         if (rc != page->index) {
643                                 printk(KERN_WARNING "wanted to write %lu but "
644                                        "wrote %d\n", page->index, rc);
645                                 put_pages_back(&pc);
646                         }
647                 }
648                 set_fs(oldfs);
649                 filp_close(filp, 0);
650
651                 put_pages_on_daemon_list(&pc);
652         }
653         complete(&tctl->tctl_stop);
654         return 0;
655 }
656
657 int trace_start_thread(void)
658 {
659         struct tracefiled_ctl *tctl = &trace_tctl;
660         int rc = 0;
661
662         down(&trace_thread_sem);
663         if (thread_running)
664                 goto out;
665
666         init_completion(&tctl->tctl_start);
667         init_completion(&tctl->tctl_stop);
668         init_waitqueue_head(&tctl->tctl_waitq);
669         atomic_set(&tctl->tctl_shutdown, 0);
670
671         if (kernel_thread(tracefiled, tctl, 0) < 0) {
672                 rc = -ECHILD;
673                 goto out;
674         }
675
676         wait_for_completion(&tctl->tctl_start);
677         thread_running = 1;
678 out:
679         up(&trace_thread_sem);
680         return rc;
681 }
682
683 void trace_stop_thread(void)
684 {
685         struct tracefiled_ctl *tctl = &trace_tctl;
686
687         down(&trace_thread_sem);
688         if (thread_running) {
689                 printk(KERN_INFO "Shutting down debug daemon thread...\n");
690                 atomic_set(&tctl->tctl_shutdown, 1);
691                 wait_for_completion(&tctl->tctl_stop);
692                 thread_running = 0;
693         }
694         up(&trace_thread_sem);
695 }
696
697 int trace_write_daemon_file(struct file *file, const char *buffer,
698                             unsigned long count, void *data)
699 {
700         char *name;
701         unsigned long off;
702         int rc;
703
704         name = kmalloc(count + 1, GFP_KERNEL);
705         if (name == NULL)
706                 return -ENOMEM;
707
708         if (copy_from_user(name, buffer, count)) {
709                 rc = -EFAULT;
710                 goto out;
711         }
712
713         /* be nice and strip out trailing '\n' */
714         for (off = count ; off > 2 && isspace(name[off - 1]); off--)
715                 ;
716
717         name[off] = '\0';
718
719         down_write(&tracefile_sem);
720         if (strcmp(name, "stop") == 0) {
721                 tracefile = NULL;
722                 trace_stop_thread();
723                 goto out_sem;
724         } else if (strncmp(name, "size=", 5) == 0) {
725                 tracefile_size = simple_strtoul(name + 5, NULL, 0);
726                 if (tracefile_size < 10 || tracefile_size > 20480)
727                         tracefile_size = TRACEFILE_SIZE;
728                 else
729                         tracefile_size <<= 20;
730                 goto out_sem;
731         }
732
733         if (name[0] != '/') {
734                 rc = -EINVAL;
735                 goto out_sem;
736         }
737
738         if (tracefile != NULL)
739                 kfree(tracefile);
740
741         tracefile = name;
742         name = NULL;
743
744         printk(KERN_INFO "Lustre: debug daemon will attempt to start writing "
745                "to %s (%lukB max)\n", tracefile, (long)(tracefile_size >> 10));
746
747         trace_start_thread();
748
749  out_sem:
750         up_write(&tracefile_sem);
751
752  out:
753         kfree(name);
754         return count;
755 }
756
757 int trace_read_daemon_file(char *page, char **start, off_t off, int count,
758                            int *eof, void *data)
759 {
760         int rc;
761
762         down_read(&tracefile_sem);
763         rc = snprintf(page, count, "%s", tracefile);
764         up_read(&tracefile_sem);
765
766         return rc;
767 }
768
769 int trace_write_debug_mb(struct file *file, const char *buffer,
770                          unsigned long count, void *data)
771 {
772         char string[32];
773         int i;
774         unsigned max;
775
776         if (count >= sizeof(string)) {
777                 printk(KERN_ERR "Lustre: value too large (length %lu bytes)\n",
778                        count);
779                 return -EOVERFLOW;
780         }
781
782         if (copy_from_user(string, buffer, count))
783                 return -EFAULT;
784
785         max = simple_strtoul(string, NULL, 0);
786         if (max == 0)
787                 return -EINVAL;
788
789         if (max > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5 || max >= 512) {
790                 printk(KERN_ERR "Lustre: Refusing to set debug buffer size to "
791                        "%dMB, which is more than 80%% of available RAM (%lu)\n",
792                        max, (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5);
793                 return -EINVAL;
794         }
795
796         max /= smp_num_cpus;
797
798         for (i = 0; i < NR_CPUS; i++) {
799                 struct trace_cpu_data *tcd;
800                 tcd = &trace_data[i].tcd;
801                 tcd->tcd_max_pages = max << (20 - PAGE_SHIFT);
802         }
803         return count;
804 }
805
806 int trace_read_debug_mb(char *page, char **start, off_t off, int count,
807                         int *eof, void *data)
808 {
809         struct trace_cpu_data *tcd;
810         unsigned long flags;
811         int rc;
812
813         tcd = trace_get_tcd(flags);
814         rc = snprintf(page, count, "%lu\n",
815                       (tcd->tcd_max_pages >> (20 - PAGE_SHIFT)) * smp_num_cpus);
816         trace_put_tcd(tcd, flags);
817
818         return rc;
819 }
820
821 int tracefile_init(void)
822 {
823         struct trace_cpu_data *tcd;
824         int i;
825
826         for (i = 0; i < NR_CPUS; i++) {
827                 tcd = &trace_data[i].tcd;
828                 INIT_LIST_HEAD(&tcd->tcd_pages);
829                 INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
830                 tcd->tcd_cur_pages = 0;
831                 tcd->tcd_cur_daemon_pages = 0;
832                 tcd->tcd_max_pages = TCD_MAX_PAGES;
833                 tcd->tcd_shutting_down = 0;
834         }
835         return 0;
836 }
837
838 static void trace_cleanup_on_cpu(void *info)
839 {
840         struct trace_cpu_data *tcd;
841         struct list_head *pos, *tmp;
842         unsigned long flags;
843
844         tcd = trace_get_tcd(flags);
845
846         tcd->tcd_shutting_down = 1;
847
848         list_for_each_safe(pos, tmp, &tcd->tcd_pages) {
849                 struct page *page;
850
851                 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
852                 LASSERT(page->index <= PAGE_SIZE);
853                 LASSERT(page_count(page) > 0);
854
855                 list_del(&PAGE_LIST(page));
856                 page->mapping = NULL;
857                 __free_page(page);
858         }
859         tcd->tcd_cur_pages = 0;
860
861         trace_put_tcd(tcd, flags);
862 }
863
864 static void trace_cleanup(void)
865 {
866         struct page_collection pc;
867
868         INIT_LIST_HEAD(&pc.pc_pages);
869         spin_lock_init(&pc.pc_lock);
870
871         trace_cleanup_on_cpu(&pc);
872         smp_call_function(trace_cleanup_on_cpu, &pc, 0, 1);
873 }
874
875 void tracefile_exit(void)
876 {
877         trace_stop_thread();
878         trace_cleanup();
879 }