Whamcloud - gitweb
* removed a diff that crept in somehow
[fs/lustre-release.git] / lnet / libcfs / tracefile.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2004 Cluster File Systems, Inc.
5  *   Author: Zach Brown <zab@clusterfs.com>
6  *   Author: Phil Schwan <phil@clusterfs.com>
7  *
8  *   This file is part of Lustre, http://www.lustre.org.
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24 #include <linux/kernel.h>
25 #include <linux/module.h>
26 #include <linux/init.h>
27 #include <linux/rwsem.h>
28 #include <linux/proc_fs.h>
29 #include <linux/file.h>
30 #include <linux/smp.h>
31 #include <linux/ctype.h>
32 #include <asm/uaccess.h>
33 #ifdef HAVE_MM_INLINE
34 #include <linux/mm_inline.h>
35 #endif
36
37 #define DEBUG_SUBSYSTEM S_PORTALS
38
39 #include <linux/kp30.h>
40 #include <linux/portals_compat25.h>
41 #include <linux/libcfs.h>
42
43 #define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
44
45 /* XXX move things up to the top, comment */
46
47 static union {
48         struct trace_cpu_data {
49                 struct list_head        tcd_pages;
50                 unsigned long           tcd_cur_pages;
51
52                 struct list_head        tcd_daemon_pages;
53                 unsigned long           tcd_cur_daemon_pages;
54
55                 unsigned long           tcd_max_pages;
56                 int                     tcd_shutting_down;
57         } tcd;
58         char __pad[SMP_CACHE_BYTES];
59 } trace_data[NR_CPUS] __cacheline_aligned;
60
61 struct page_collection {
62         struct list_head        pc_pages;
63         spinlock_t              pc_lock;
64         int                     pc_want_daemon_pages;
65 };
66
67 struct tracefiled_ctl {
68         struct completion        tctl_start;
69         struct completion        tctl_stop;
70         wait_queue_head_t        tctl_waitq;
71         pid_t                    tctl_pid;
72         atomic_t                 tctl_shutdown;
73 };
74
75 #define TRACEFILE_SIZE (500 << 20)
76 static DECLARE_RWSEM(tracefile_sem);
77 static char *tracefile = NULL;
78 static long long tracefile_size = TRACEFILE_SIZE;
79 static struct tracefiled_ctl trace_tctl;
80 static DECLARE_MUTEX(trace_thread_sem);
81 static int thread_running = 0;
82
83 #ifndef get_cpu
84 #define get_cpu() smp_processor_id()
85 #define put_cpu() do { } while (0)
86 #endif
87
88 #define trace_get_tcd(FLAGS) ({                 \
89         struct trace_cpu_data *__ret;           \
90         int __cpu = get_cpu();                  \
91         local_irq_save(FLAGS);                  \
92         __ret = &trace_data[__cpu].tcd;         \
93         __ret;                                  \
94 })
95
96 #define trace_put_tcd(TCD, FLAGS) do {          \
97         local_irq_restore(FLAGS);               \
98         put_cpu();                              \
99 } while (0)
100
101 static void put_pages_on_daemon_list_on_cpu(void *info);
102
103 /* return a page that has 'len' bytes left at the end */
104 static struct page *trace_get_page(struct trace_cpu_data *tcd,
105                                    unsigned long len)
106 {
107         struct page *page = NULL;
108
109         if (len > PAGE_SIZE) {
110                 printk(KERN_ERR "cowardly refusing to write %lu bytes in a "
111                        "page\n", len);
112                 return NULL;
113         }
114
115         if (!list_empty(&tcd->tcd_pages)) {
116                 page = list_entry(tcd->tcd_pages.prev, struct page,
117                                   PAGE_LIST_ENTRY);
118                 if (page->index + len <= PAGE_SIZE)
119                         return page;
120         }
121
122         if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
123                 page = alloc_page(GFP_ATOMIC);
124                 if (page == NULL) {
125                         /* the kernel should print a message for us.  fall back
126                          * to using the last page in the ring buffer. */
127                         goto ring_buffer;
128                 }
129                 page->index = 0;
130                 page->mapping = (void *)(long)smp_processor_id();
131                 list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages);
132                 tcd->tcd_cur_pages++;
133
134                 if (tcd->tcd_cur_pages > 8 && thread_running) {
135                         struct tracefiled_ctl *tctl = &trace_tctl;
136                         wake_up(&tctl->tctl_waitq);
137                 }
138                 return page;
139         }
140
141  ring_buffer:
142         if (thread_running) {
143                 int pgcount = tcd->tcd_cur_pages / 10;
144                 struct page_collection pc;
145                 struct list_head *pos, *tmp;
146                 printk(KERN_WARNING "debug daemon buffer overflowed; discarding"
147                        " 10%% of pages (%d)\n", pgcount + 1);
148
149                 INIT_LIST_HEAD(&pc.pc_pages);
150                 spin_lock_init(&pc.pc_lock);
151
152                 list_for_each_safe(pos, tmp, &tcd->tcd_pages) {
153                         struct page *page;
154
155                         if (pgcount-- == 0)
156                                 break;
157
158                         page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
159                         list_del(&PAGE_LIST(page));
160                         list_add_tail(&PAGE_LIST(page), &pc.pc_pages);
161                         tcd->tcd_cur_pages--;
162                 }
163                 put_pages_on_daemon_list_on_cpu(&pc);
164         }
165         LASSERT(!list_empty(&tcd->tcd_pages));
166
167         page = list_entry(tcd->tcd_pages.next, struct page, PAGE_LIST_ENTRY);
168         page->index = 0;
169
170         list_del(&PAGE_LIST(page));
171         list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages);
172         return page;
173 }
174
175 static void print_to_console(struct ptldebug_header *hdr, int mask, char *buf,
176                              int len, char *file, const char *fn)
177 {
178         char *prefix = NULL, *ptype = NULL;
179
180         if ((mask & D_EMERG) != 0) {
181                 prefix = "LustreError";
182                 ptype = KERN_EMERG;
183         } else if ((mask & D_ERROR) != 0) {
184                 prefix = "LustreError";
185                 ptype = KERN_ERR;
186         } else if ((mask & D_WARNING) != 0) {
187                 prefix = "Lustre";
188                 ptype = KERN_WARNING;
189         } else if (portal_printk) {
190                 prefix = "Lustre";
191                 ptype = KERN_INFO;
192         }
193
194         printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid,
195                hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf);
196 }
197
198 void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
199                        const int line, unsigned long stack, char *format, ...)
200 {
201         struct trace_cpu_data *tcd;
202         struct ptldebug_header header;
203         struct page *page;
204         char *debug_buf = format;
205         int known_size, needed = 85 /* average message length */, max_nob;
206         va_list       ap;
207         unsigned long flags;
208         struct timeval tv;
209
210         if (*(format + strlen(format) - 1) != '\n')
211                 printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
212                        file, line, fn);
213
214         tcd = trace_get_tcd(flags);
215         if (tcd->tcd_shutting_down)
216                 goto out;
217
218         do_gettimeofday(&tv);
219
220         header.ph_subsys = subsys;
221         header.ph_mask = mask;
222         header.ph_cpu_id = smp_processor_id();
223         header.ph_sec = (__u32)tv.tv_sec;
224         header.ph_usec = tv.tv_usec;
225         header.ph_stack = stack;
226         header.ph_pid = current->pid;
227         header.ph_line_num = line;
228
229 #if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
230         header.ph_extern_pid = current->thread.extern_pid;
231 #elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
232         header.ph_extern_pid = current->thread.mode.tt.extern_pid;
233 #else
234         header.ph_extern_pid = 0;
235 #endif
236
237         known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls
238
239  retry:
240         page = trace_get_page(tcd, needed + known_size);
241         if (page == NULL) {
242                 debug_buf = format;
243                 if (needed + known_size > PAGE_SIZE)
244                         mask |= D_ERROR;
245                 needed = strlen(format);
246                 goto out;
247         }
248
249         debug_buf = page_address(page) + page->index + known_size;
250
251         max_nob = PAGE_SIZE - page->index - known_size;
252         LASSERT(max_nob > 0);
253         va_start(ap, format);
254         needed = vsnprintf(debug_buf, max_nob, format, ap);
255         va_end(ap);
256
257         if (needed > max_nob) /* overflow.  oh poop. */
258                 goto retry;
259
260         header.ph_len = known_size + needed;
261         debug_buf = page_address(page) + page->index;
262
263         memcpy(debug_buf, &header, sizeof(header));
264         page->index += sizeof(header);
265         debug_buf += sizeof(header);
266
267         strcpy(debug_buf, file);
268         page->index += strlen(file) + 1;
269         debug_buf += strlen(file) + 1;
270
271         strcpy(debug_buf, fn);
272         page->index += strlen(fn) + 1;
273         debug_buf += strlen(fn) + 1;
274
275         page->index += needed;
276         if (page->index > PAGE_SIZE)
277                 printk(KERN_EMERG "page->index == %lu in portals_debug_msg\n",
278                        page->index);
279
280  out:
281         if ((mask & (D_EMERG | D_ERROR | D_WARNING)) || portal_printk)
282                 print_to_console(&header, mask, debug_buf, needed, file, fn);
283
284         trace_put_tcd(tcd, flags);
285 }
286 EXPORT_SYMBOL(portals_debug_msg);
287
288 static void collect_pages_on_cpu(void *info)
289 {
290         struct trace_cpu_data *tcd;
291         unsigned long flags;
292         struct page_collection *pc = info;
293
294         tcd = trace_get_tcd(flags);
295
296         spin_lock(&pc->pc_lock);
297         list_splice(&tcd->tcd_pages, &pc->pc_pages);
298         INIT_LIST_HEAD(&tcd->tcd_pages);
299         tcd->tcd_cur_pages = 0;
300         if (pc->pc_want_daemon_pages) {
301                 list_splice(&tcd->tcd_daemon_pages, &pc->pc_pages);
302                 INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
303                 tcd->tcd_cur_daemon_pages = 0;
304         }
305         spin_unlock(&pc->pc_lock);
306
307         trace_put_tcd(tcd, flags);
308 }
309
310 static void collect_pages(struct page_collection *pc)
311 {
312         /* needs to be fixed up for preempt */
313         INIT_LIST_HEAD(&pc->pc_pages);
314         collect_pages_on_cpu(pc);
315         smp_call_function(collect_pages_on_cpu, pc, 0, 1);
316 }
317
318 static void put_pages_back_on_cpu(void *info)
319 {
320         struct page_collection *pc = info;
321         struct trace_cpu_data *tcd;
322         struct list_head *pos, *tmp, *cur_head;
323         unsigned long flags;
324
325         tcd = trace_get_tcd(flags);
326
327         cur_head = tcd->tcd_pages.next;
328
329         spin_lock(&pc->pc_lock);
330         list_for_each_safe(pos, tmp, &pc->pc_pages) {
331                 struct page *page;
332
333                 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
334                 LASSERT(page->index <= PAGE_SIZE);
335                 LASSERT(page_count(page) > 0);
336
337                 if ((unsigned long)page->mapping != smp_processor_id())
338                         continue;
339
340                 list_del(&PAGE_LIST(page));
341                 list_add_tail(&PAGE_LIST(page), cur_head);
342                 tcd->tcd_cur_pages++;
343         }
344         spin_unlock(&pc->pc_lock);
345
346         trace_put_tcd(tcd, flags);
347 }
348
349 static void put_pages_back(struct page_collection *pc)
350 {
351         /* needs to be fixed up for preempt */
352         put_pages_back_on_cpu(pc);
353         smp_call_function(put_pages_back_on_cpu, pc, 0, 1);
354 }
355
356 /* Add pages to a per-cpu debug daemon ringbuffer.  This buffer makes sure that
357  * we have a good amount of data at all times for dumping during an LBUG, even
358  * if we have been steadily writing (and otherwise discarding) pages via the
359  * debug daemon. */
360 static void put_pages_on_daemon_list_on_cpu(void *info)
361 {
362         struct page_collection *pc = info;
363         struct trace_cpu_data *tcd;
364         struct list_head *pos, *tmp;
365         unsigned long flags;
366
367         tcd = trace_get_tcd(flags);
368
369         spin_lock(&pc->pc_lock);
370         list_for_each_safe(pos, tmp, &pc->pc_pages) {
371                 struct page *page;
372
373                 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
374                 LASSERT(page->index <= PAGE_SIZE);
375                 LASSERT(page_count(page) > 0);
376                 if ((unsigned long)page->mapping != smp_processor_id())
377                         continue;
378
379                 list_del(&PAGE_LIST(page));
380                 list_add_tail(&PAGE_LIST(page), &tcd->tcd_daemon_pages);
381                 tcd->tcd_cur_daemon_pages++;
382
383                 if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
384                         LASSERT(!list_empty(&tcd->tcd_daemon_pages));
385                         page = list_entry(tcd->tcd_daemon_pages.next,
386                                           struct page, PAGE_LIST_ENTRY);
387
388                         LASSERT(page->index <= PAGE_SIZE);
389                         LASSERT(page_count(page) > 0);
390
391                         page->index = 0;
392                         list_del(&PAGE_LIST(page));
393                         page->mapping = NULL;
394                         __free_page(page);
395                         tcd->tcd_cur_daemon_pages--;
396                 }
397         }
398         spin_unlock(&pc->pc_lock);
399
400         trace_put_tcd(tcd, flags);
401 }
402
403 static void put_pages_on_daemon_list(struct page_collection *pc)
404 {
405         put_pages_on_daemon_list_on_cpu(pc);
406         smp_call_function(put_pages_on_daemon_list_on_cpu, pc, 0, 1);
407 }
408
409 void trace_debug_print(void)
410 {
411         struct page_collection pc;
412         struct list_head *pos, *tmp;
413
414         spin_lock_init(&pc.pc_lock);
415
416         collect_pages(&pc);
417         list_for_each_safe(pos, tmp, &pc.pc_pages) {
418                 struct page *page;
419                 char *p, *file, *fn;
420
421                 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
422                 LASSERT(page->index <= PAGE_SIZE);
423                 LASSERT(page_count(page) > 0);
424
425                 p = page_address(page);
426                 while (p < ((char *)page_address(page) + PAGE_SIZE)) {
427                         struct ptldebug_header *hdr;
428                         int len;
429                         hdr = (void *)p;
430                         p += sizeof(*hdr);
431                         file = p;
432                         p += strlen(file) + 1;
433                         fn = p;
434                         p += strlen(fn) + 1;
435                         len = hdr->ph_len - (p - (char *)hdr);
436
437                         print_to_console(hdr, D_EMERG, p, len, file, fn);
438                 }
439
440                 list_del(&PAGE_LIST(page));
441                 page->mapping = NULL;
442                 __free_page(page);
443         }
444 }
445
446 int tracefile_dump_all_pages(char *filename)
447 {
448         struct page_collection pc;
449         struct file *filp;
450         struct list_head *pos, *tmp;
451         mm_segment_t oldfs;
452         int rc;
453
454         down_write(&tracefile_sem);
455
456         filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600);
457         if (IS_ERR(filp)) {
458                 rc = PTR_ERR(filp);
459                 printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
460                        filename, rc);
461                 goto out;
462         }
463
464         spin_lock_init(&pc.pc_lock);
465         pc.pc_want_daemon_pages = 1;
466         collect_pages(&pc);
467         if (list_empty(&pc.pc_pages)) {
468                 rc = 0;
469                 goto close;
470         }
471
472         /* ok, for now, just write the pages.  in the future we'll be building
473          * iobufs with the pages and calling generic_direct_IO */
474         oldfs = get_fs();
475         set_fs(get_ds());
476         list_for_each_safe(pos, tmp, &pc.pc_pages) {
477                 struct page *page;
478
479                 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
480                 LASSERT(page->index <= PAGE_SIZE);
481                 LASSERT(page_count(page) > 0);
482
483                 rc = filp->f_op->write(filp, page_address(page), page->index,
484                                        &filp->f_pos);
485                 if (rc != page->index) {
486                         printk(KERN_WARNING "wanted to write %lu but wrote "
487                                "%d\n", page->index, rc);
488                         put_pages_back(&pc);
489                         break;
490                 }
491                 list_del(&PAGE_LIST(page));
492                 page->mapping = NULL;
493                 __free_page(page);
494         }
495         set_fs(oldfs);
496         rc = filp->f_op->fsync(filp, filp->f_dentry, 1);
497         if (rc)
498                 printk(KERN_ERR "sync returns %d\n", rc);
499  close:
500         filp_close(filp, 0);
501  out:
502         up_write(&tracefile_sem);
503         return rc;
504 }
505
506 void trace_flush_pages(void)
507 {
508         struct page_collection pc;
509         struct list_head *pos, *tmp;
510
511         spin_lock_init(&pc.pc_lock);
512
513         collect_pages(&pc);
514         list_for_each_safe(pos, tmp, &pc.pc_pages) {
515                 struct page *page;
516
517                 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
518                 LASSERT(page->index <= PAGE_SIZE);
519                 LASSERT(page_count(page) > 0);
520
521                 list_del(&PAGE_LIST(page));
522                 page->mapping = NULL;
523                 __free_page(page);
524         }
525 }
526
527 int trace_dk(struct file *file, const char *buffer, unsigned long count,
528              void *data)
529 {
530         char *name;
531         unsigned long off;
532         int rc;
533
534         name = kmalloc(count + 1, GFP_KERNEL);
535         if (name == NULL)
536                 return -ENOMEM;
537
538         if (copy_from_user(name, buffer, count)) {
539                 rc = -EFAULT;
540                 goto out;
541         }
542
543         if (name[0] != '/') {
544                 rc = -EINVAL;
545                 goto out;
546         }
547
548         /* be nice and strip out trailing '\n' */
549         for (off = count ; off > 2 && isspace(name[off - 1]); off--)
550                 ;
551
552         name[off] = '\0';
553         rc = tracefile_dump_all_pages(name);
554 out:
555         if (name)
556                 kfree(name);
557         return count;
558 }
559 EXPORT_SYMBOL(trace_dk);
560
561 static int tracefiled(void *arg)
562 {
563         struct page_collection pc;
564         struct tracefiled_ctl *tctl = arg;
565         struct list_head *pos, *tmp;
566         struct ptldebug_header *hdr;
567         struct file *filp;
568         struct page *page;
569         mm_segment_t oldfs;
570         int rc;
571
572         /* we're started late enough that we pick up init's fs context */
573         /* this is so broken in uml?  what on earth is going on? */
574         kportal_daemonize("ktracefiled");
575         reparent_to_init();
576
577         spin_lock_init(&pc.pc_lock);
578         complete(&tctl->tctl_start);
579
580         while (1) {
581                 wait_queue_t __wait;
582
583                 init_waitqueue_entry(&__wait, current);
584                 add_wait_queue(&tctl->tctl_waitq, &__wait);
585                 set_current_state(TASK_INTERRUPTIBLE);
586                 schedule_timeout(HZ);
587                 remove_wait_queue(&tctl->tctl_waitq, &__wait);
588
589                 if (atomic_read(&tctl->tctl_shutdown))
590                         break;
591
592                 pc.pc_want_daemon_pages = 0;
593                 collect_pages(&pc);
594                 if (list_empty(&pc.pc_pages))
595                         continue;
596
597                 filp = NULL;
598                 down_read(&tracefile_sem);
599                 if (tracefile != NULL) {
600                         filp = filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE,
601                                          0600);
602                         if (IS_ERR(filp)) {
603                                 printk("couldn't open %s: %ld\n", tracefile,
604                                        PTR_ERR(filp));
605                                 filp = NULL;
606                         }
607                 }
608                 up_read(&tracefile_sem);
609                 if (filp == NULL) {
610                         put_pages_on_daemon_list(&pc);
611                         continue;
612                 }
613
614                 oldfs = get_fs();
615                 set_fs(get_ds());
616
617                 /* mark the first header, so we can sort in chunks */
618                 page = list_entry(pc.pc_pages.next, struct page,
619                                   PAGE_LIST_ENTRY);
620                 LASSERT(page->index <= PAGE_SIZE);
621                 LASSERT(page_count(page) > 0);
622
623                 hdr = page_address(page);
624                 hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
625
626                 list_for_each_safe(pos, tmp, &pc.pc_pages) {
627                         static loff_t f_pos;
628                         page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
629                         LASSERT(page->index <= PAGE_SIZE);
630                         LASSERT(page_count(page) > 0);
631
632                         if (f_pos >= tracefile_size)
633                                 f_pos = 0;
634                         else if (f_pos > filp->f_dentry->d_inode->i_size)
635                                 f_pos = filp->f_dentry->d_inode->i_size;
636
637                         rc = filp->f_op->write(filp, page_address(page),
638                                                page->index, &f_pos);
639                         if (rc != page->index) {
640                                 printk(KERN_WARNING "wanted to write %lu but "
641                                        "wrote %d\n", page->index, rc);
642                                 put_pages_back(&pc);
643                         }
644                 }
645                 set_fs(oldfs);
646                 filp_close(filp, 0);
647
648                 put_pages_on_daemon_list(&pc);
649         }
650         complete(&tctl->tctl_stop);
651         return 0;
652 }
653
654 int trace_start_thread(void)
655 {
656         struct tracefiled_ctl *tctl = &trace_tctl;
657         int rc = 0;
658
659         down(&trace_thread_sem);
660         if (thread_running)
661                 goto out;
662
663         init_completion(&tctl->tctl_start);
664         init_completion(&tctl->tctl_stop);
665         init_waitqueue_head(&tctl->tctl_waitq);
666         atomic_set(&tctl->tctl_shutdown, 0);
667
668         if (kernel_thread(tracefiled, tctl, 0) < 0) {
669                 rc = -ECHILD;
670                 goto out;
671         }
672
673         wait_for_completion(&tctl->tctl_start);
674         thread_running = 1;
675 out:
676         up(&trace_thread_sem);
677         return rc;
678 }
679
680 void trace_stop_thread(void)
681 {
682         struct tracefiled_ctl *tctl = &trace_tctl;
683
684         down(&trace_thread_sem);
685         if (thread_running) {
686                 printk(KERN_INFO "Shutting down debug daemon thread...\n");
687                 atomic_set(&tctl->tctl_shutdown, 1);
688                 wait_for_completion(&tctl->tctl_stop);
689                 thread_running = 0;
690         }
691         up(&trace_thread_sem);
692 }
693
694 int trace_write_daemon_file(struct file *file, const char *buffer,
695                             unsigned long count, void *data)
696 {
697         char *name;
698         unsigned long off;
699         int rc;
700
701         name = kmalloc(count + 1, GFP_KERNEL);
702         if (name == NULL)
703                 return -ENOMEM;
704
705         if (copy_from_user(name, buffer, count)) {
706                 rc = -EFAULT;
707                 goto out;
708         }
709
710         /* be nice and strip out trailing '\n' */
711         for (off = count ; off > 2 && isspace(name[off - 1]); off--)
712                 ;
713
714         name[off] = '\0';
715
716         down_write(&tracefile_sem);
717         if (strcmp(name, "stop") == 0) {
718                 tracefile = NULL;
719                 trace_stop_thread();
720                 goto out_sem;
721         } else if (strncmp(name, "size=", 5) == 0) {
722                 tracefile_size = simple_strtoul(name + 5, NULL, 0);
723                 if (tracefile_size < 10 || tracefile_size > 20480)
724                         tracefile_size = TRACEFILE_SIZE;
725                 else
726                         tracefile_size <<= 20;
727                 goto out_sem;
728         }
729
730         if (name[0] != '/') {
731                 rc = -EINVAL;
732                 goto out_sem;
733         }
734
735         if (tracefile != NULL)
736                 kfree(tracefile);
737
738         tracefile = name;
739         name = NULL;
740
741         printk(KERN_INFO "Lustre: debug daemon will attempt to start writing "
742                "to %s (%lukB max)\n", tracefile, (long)(tracefile_size >> 10));
743
744         trace_start_thread();
745
746  out_sem:
747         up_write(&tracefile_sem);
748
749  out:
750         kfree(name);
751         return count;
752 }
753
754 int trace_read_daemon_file(char *page, char **start, off_t off, int count,
755                            int *eof, void *data)
756 {
757         int rc;
758
759         down_read(&tracefile_sem);
760         rc = snprintf(page, count, "%s", tracefile);
761         up_read(&tracefile_sem);
762
763         return rc;
764 }
765
766 int trace_write_debug_mb(struct file *file, const char *buffer,
767                          unsigned long count, void *data)
768 {
769         char string[32];
770         int i;
771         unsigned max;
772
773         if (count >= sizeof(string)) {
774                 printk(KERN_ERR "Lustre: value too large (length %lu bytes)\n",
775                        count);
776                 return -EOVERFLOW;
777         }
778
779         if (copy_from_user(string, buffer, count))
780                 return -EFAULT;
781
782         max = simple_strtoul(string, NULL, 0);
783         if (max == 0)
784                 return -EINVAL;
785         max /= smp_num_cpus;
786
787         if (max * smp_num_cpus > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5) {
788                 printk(KERN_ERR "Lustre: Refusing to set debug buffer size to "
789                        "%d MB, which is more than 80%% of physical RAM "
790                        "(%lu).\n", max * smp_num_cpus,
791                        (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5);
792                 return -EINVAL;
793         }
794
795         for (i = 0; i < NR_CPUS; i++) {
796                 struct trace_cpu_data *tcd;
797                 tcd = &trace_data[i].tcd;
798                 tcd->tcd_max_pages = max << (20 - PAGE_SHIFT);
799         }
800         return count;
801 }
802
803 int trace_read_debug_mb(char *page, char **start, off_t off, int count,
804                         int *eof, void *data)
805 {
806         struct trace_cpu_data *tcd;
807         unsigned long flags;
808         int rc;
809
810         tcd = trace_get_tcd(flags);
811         rc = snprintf(page, count, "%lu\n",
812                       (tcd->tcd_max_pages >> (20 - PAGE_SHIFT)) * smp_num_cpus);
813         trace_put_tcd(tcd, flags);
814
815         return rc;
816 }
817
818 int tracefile_init(void)
819 {
820         struct trace_cpu_data *tcd;
821         int i;
822
823         for (i = 0; i < NR_CPUS; i++) {
824                 tcd = &trace_data[i].tcd;
825                 INIT_LIST_HEAD(&tcd->tcd_pages);
826                 INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
827                 tcd->tcd_cur_pages = 0;
828                 tcd->tcd_cur_daemon_pages = 0;
829                 tcd->tcd_max_pages = TCD_MAX_PAGES;
830                 tcd->tcd_shutting_down = 0;
831         }
832         return 0;
833 }
834
835 static void trace_cleanup_on_cpu(void *info)
836 {
837         struct trace_cpu_data *tcd;
838         struct list_head *pos, *tmp;
839         unsigned long flags;
840
841         tcd = trace_get_tcd(flags);
842
843         tcd->tcd_shutting_down = 1;
844
845         list_for_each_safe(pos, tmp, &tcd->tcd_pages) {
846                 struct page *page;
847
848                 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
849                 LASSERT(page->index <= PAGE_SIZE);
850                 LASSERT(page_count(page) > 0);
851
852                 list_del(&PAGE_LIST(page));
853                 page->mapping = NULL;
854                 __free_page(page);
855         }
856         tcd->tcd_cur_pages = 0;
857
858         trace_put_tcd(tcd, flags);
859 }
860
861 static void trace_cleanup(void)
862 {
863         struct page_collection pc;
864
865         INIT_LIST_HEAD(&pc.pc_pages);
866         spin_lock_init(&pc.pc_lock);
867
868         trace_cleanup_on_cpu(&pc);
869         smp_call_function(trace_cleanup_on_cpu, &pc, 0, 1);
870 }
871
872 void tracefile_exit(void)
873 {
874         trace_stop_thread();
875         trace_cleanup();
876 }