Whamcloud - gitweb
Branch: b_cray
[fs/lustre-release.git] / lnet / libcfs / tracefile.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2004 Cluster File Systems, Inc.
5  *   Author: Zach Brown <zab@clusterfs.com>
6  *   Author: Phil Schwan <phil@clusterfs.com>
7  *
8  *   This file is part of Lustre, http://www.lustre.org.
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24 #include <linux/kernel.h>
25 #include <linux/module.h>
26 #include <linux/init.h>
27 #include <linux/rwsem.h>
28 #include <linux/proc_fs.h>
29 #include <linux/file.h>
30 #include <linux/smp.h>
31 #include <linux/ctype.h>
32 #include <asm/uaccess.h>
33 #ifdef HAVE_MM_INLINE
34 #include <linux/mm_inline.h>
35 #endif
36
37 #define DEBUG_SUBSYSTEM S_PORTALS
38
39 #include <linux/kp30.h>
40 #include <linux/portals_compat25.h>
41 #include <linux/libcfs.h>
42
43 #define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
44
45 /* XXX move things up to the top, comment */
46
47 static union {
48         struct trace_cpu_data {
49                 struct list_head        tcd_pages;
50                 unsigned long           tcd_cur_pages;
51
52                 struct list_head        tcd_daemon_pages;
53                 unsigned long           tcd_cur_daemon_pages;
54
55                 unsigned long           tcd_max_pages;
56                 int                     tcd_shutting_down;
57         } tcd;
58         char __pad[SMP_CACHE_BYTES];
59 } trace_data[NR_CPUS] __cacheline_aligned;
60
61 struct page_collection {
62         struct list_head        pc_pages;
63         spinlock_t              pc_lock;
64         int                     pc_want_daemon_pages;
65 };
66
67 struct tracefiled_ctl {
68         struct completion        tctl_start;
69         struct completion        tctl_stop;
70         wait_queue_head_t        tctl_waitq;
71         pid_t                    tctl_pid;
72         atomic_t                 tctl_shutdown;
73 };
74
75 #define TRACEFILE_SIZE (500 << 20)
76 static DECLARE_RWSEM(tracefile_sem);
77 static char *tracefile = NULL;
78 static long long tracefile_size = TRACEFILE_SIZE;
79 static struct tracefiled_ctl trace_tctl;
80 static DECLARE_MUTEX(trace_thread_sem);
81 static int thread_running = 0;
82
83 #ifndef get_cpu
84 #define get_cpu() smp_processor_id()
85 #define put_cpu() do { } while (0)
86 #endif
87
88 #define trace_get_tcd(FLAGS) ({                 \
89         struct trace_cpu_data *__ret;           \
90         int __cpu = get_cpu();                  \
91         local_irq_save(FLAGS);                  \
92         __ret = &trace_data[__cpu].tcd;         \
93         __ret;                                  \
94 })
95
96 #define trace_put_tcd(TCD, FLAGS) do {          \
97         local_irq_restore(FLAGS);               \
98         put_cpu();                              \
99 } while (0)
100
101 static void put_pages_on_daemon_list_on_cpu(void *info);
102
103 /* return a page that has 'len' bytes left at the end */
104 static struct page *trace_get_page(struct trace_cpu_data *tcd,
105                                    unsigned long len)
106 {
107         struct page *page = NULL;
108
109         if (len > PAGE_SIZE) {
110                 printk(KERN_ERR "cowardly refusing to write %lu bytes in a "
111                        "page\n", len);
112                 return NULL;
113         }
114
115         if (!list_empty(&tcd->tcd_pages)) {
116                 page = list_entry(tcd->tcd_pages.prev, struct page,
117                                   PAGE_LIST_ENTRY);
118                 if (page->index + len <= PAGE_SIZE)
119                         return page;
120         }
121
122         if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
123                 page = alloc_page(GFP_ATOMIC);
124                 if (page == NULL) {
125                         /* the kernel should print a message for us.  fall back
126                          * to using the last page in the ring buffer. */
127                         goto ring_buffer;
128                 }
129                 page->index = 0;
130                 page->mapping = (void *)(long)smp_processor_id();
131                 list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages);
132                 tcd->tcd_cur_pages++;
133
134                 if (tcd->tcd_cur_pages > 8 && thread_running) {
135                         struct tracefiled_ctl *tctl = &trace_tctl;
136                         wake_up(&tctl->tctl_waitq);
137                 }
138                 return page;
139         }
140
141  ring_buffer:
142         if (thread_running) {
143                 int pgcount = tcd->tcd_cur_pages / 10;
144                 struct page_collection pc;
145                 struct list_head *pos, *tmp;
146                 printk(KERN_WARNING "debug daemon buffer overflowed; discarding"
147                        " 10%% of pages (%d)\n", pgcount + 1);
148
149                 INIT_LIST_HEAD(&pc.pc_pages);
150                 spin_lock_init(&pc.pc_lock);
151
152                 list_for_each_safe(pos, tmp, &tcd->tcd_pages) {
153                         struct page *page;
154
155                         if (pgcount-- == 0)
156                                 break;
157
158                         page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
159                         list_del(&PAGE_LIST(page));
160                         list_add_tail(&PAGE_LIST(page), &pc.pc_pages);
161                         tcd->tcd_cur_pages--;
162                 }
163                 put_pages_on_daemon_list_on_cpu(&pc);
164         }
165         LASSERT(!list_empty(&tcd->tcd_pages));
166
167         page = list_entry(tcd->tcd_pages.next, struct page, PAGE_LIST_ENTRY);
168         page->index = 0;
169
170         list_del(&PAGE_LIST(page));
171         list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages);
172         return page;
173 }
174
175 static void print_to_console(struct ptldebug_header *hdr, int mask, char *buf,
176                              int len, char *file, const char *fn)
177 {
178         char *prefix = NULL, *ptype = NULL;
179
180         if ((mask & D_EMERG) != 0) {
181                 prefix = "LustreError";
182                 ptype = KERN_EMERG;
183         } else if ((mask & D_ERROR) != 0) {
184                 prefix = "LustreError";
185                 ptype = KERN_ERR;
186         } else if ((mask & D_WARNING) != 0) {
187                 prefix = "Lustre";
188                 ptype = KERN_WARNING;
189         } else if (portal_printk) {
190                 prefix = "Lustre";
191                 ptype = KERN_INFO;
192         }
193
194         printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid,
195                hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf);
196 }
197
198 void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
199                        const int line, unsigned long stack, char *format, ...)
200 {
201         struct trace_cpu_data *tcd;
202         struct ptldebug_header header;
203         struct page *page;
204         char *debug_buf = format;
205         int known_size, needed = 85 /* average message length */, max_nob;
206         va_list       ap;
207         unsigned long flags;
208         struct timeval tv;
209
210 #ifdef CRAY_PORTALS
211         if (mask == D_PORTALS && !(portal_debug & D_PORTALS))
212                 return;
213 #endif
214         if (strchr(file, '/'))
215                 file = strrchr(file, '/') + 1;
216
217         if (*(format + strlen(format) - 1) != '\n')
218                 printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
219                        file, line, fn);
220
221         tcd = trace_get_tcd(flags);
222         if (tcd->tcd_shutting_down)
223                 goto out;
224
225         do_gettimeofday(&tv);
226
227         header.ph_subsys = subsys;
228         header.ph_mask = mask;
229         header.ph_cpu_id = smp_processor_id();
230         header.ph_sec = (__u32)tv.tv_sec;
231         header.ph_usec = tv.tv_usec;
232         header.ph_stack = stack;
233         header.ph_pid = current->pid;
234         header.ph_line_num = line;
235
236 #if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
237         header.ph_extern_pid = current->thread.extern_pid;
238 #elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
239         header.ph_extern_pid = current->thread.mode.tt.extern_pid;
240 #else
241         header.ph_extern_pid = 0;
242 #endif
243
244         known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls
245
246  retry:
247         page = trace_get_page(tcd, needed + known_size);
248         if (page == NULL) {
249                 debug_buf = format;
250                 if (needed + known_size > PAGE_SIZE)
251                         mask |= D_ERROR;
252                 needed = strlen(format);
253                 goto out;
254         }
255
256         debug_buf = page_address(page) + page->index + known_size;
257
258         max_nob = PAGE_SIZE - page->index - known_size;
259         LASSERT(max_nob > 0);
260         va_start(ap, format);
261         needed = vsnprintf(debug_buf, max_nob, format, ap);
262         va_end(ap);
263
264         if (needed > max_nob) /* overflow.  oh poop. */
265                 goto retry;
266
267         header.ph_len = known_size + needed;
268         debug_buf = page_address(page) + page->index;
269
270         memcpy(debug_buf, &header, sizeof(header));
271         page->index += sizeof(header);
272         debug_buf += sizeof(header);
273
274         strcpy(debug_buf, file);
275         page->index += strlen(file) + 1;
276         debug_buf += strlen(file) + 1;
277
278         strcpy(debug_buf, fn);
279         page->index += strlen(fn) + 1;
280         debug_buf += strlen(fn) + 1;
281
282         page->index += needed;
283         if (page->index > PAGE_SIZE)
284                 printk(KERN_EMERG "page->index == %lu in portals_debug_msg\n",
285                        page->index);
286
287  out:
288         if ((mask & (D_EMERG | D_ERROR | D_WARNING)) || portal_printk)
289                 print_to_console(&header, mask, debug_buf, needed, file, fn);
290
291         trace_put_tcd(tcd, flags);
292 }
293 EXPORT_SYMBOL(portals_debug_msg);
294
295 static void collect_pages_on_cpu(void *info)
296 {
297         struct trace_cpu_data *tcd;
298         unsigned long flags;
299         struct page_collection *pc = info;
300
301         tcd = trace_get_tcd(flags);
302
303         spin_lock(&pc->pc_lock);
304         list_splice(&tcd->tcd_pages, &pc->pc_pages);
305         INIT_LIST_HEAD(&tcd->tcd_pages);
306         tcd->tcd_cur_pages = 0;
307         if (pc->pc_want_daemon_pages) {
308                 list_splice(&tcd->tcd_daemon_pages, &pc->pc_pages);
309                 INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
310                 tcd->tcd_cur_daemon_pages = 0;
311         }
312         spin_unlock(&pc->pc_lock);
313
314         trace_put_tcd(tcd, flags);
315 }
316
317 static void collect_pages(struct page_collection *pc)
318 {
319         /* needs to be fixed up for preempt */
320         INIT_LIST_HEAD(&pc->pc_pages);
321         collect_pages_on_cpu(pc);
322         smp_call_function(collect_pages_on_cpu, pc, 0, 1);
323 }
324
325 static void put_pages_back_on_cpu(void *info)
326 {
327         struct page_collection *pc = info;
328         struct trace_cpu_data *tcd;
329         struct list_head *pos, *tmp, *cur_head;
330         unsigned long flags;
331
332         tcd = trace_get_tcd(flags);
333
334         cur_head = tcd->tcd_pages.next;
335
336         spin_lock(&pc->pc_lock);
337         list_for_each_safe(pos, tmp, &pc->pc_pages) {
338                 struct page *page;
339
340                 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
341                 LASSERT(page->index <= PAGE_SIZE);
342                 LASSERT(page_count(page) > 0);
343
344                 if ((unsigned long)page->mapping != smp_processor_id())
345                         continue;
346
347                 list_del(&PAGE_LIST(page));
348                 list_add_tail(&PAGE_LIST(page), cur_head);
349                 tcd->tcd_cur_pages++;
350         }
351         spin_unlock(&pc->pc_lock);
352
353         trace_put_tcd(tcd, flags);
354 }
355
356 static void put_pages_back(struct page_collection *pc)
357 {
358         /* needs to be fixed up for preempt */
359         put_pages_back_on_cpu(pc);
360         smp_call_function(put_pages_back_on_cpu, pc, 0, 1);
361 }
362
363 /* Add pages to a per-cpu debug daemon ringbuffer.  This buffer makes sure that
364  * we have a good amount of data at all times for dumping during an LBUG, even
365  * if we have been steadily writing (and otherwise discarding) pages via the
366  * debug daemon. */
367 static void put_pages_on_daemon_list_on_cpu(void *info)
368 {
369         struct page_collection *pc = info;
370         struct trace_cpu_data *tcd;
371         struct list_head *pos, *tmp;
372         unsigned long flags;
373
374         tcd = trace_get_tcd(flags);
375
376         spin_lock(&pc->pc_lock);
377         list_for_each_safe(pos, tmp, &pc->pc_pages) {
378                 struct page *page;
379
380                 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
381                 LASSERT(page->index <= PAGE_SIZE);
382                 LASSERT(page_count(page) > 0);
383                 if ((unsigned long)page->mapping != smp_processor_id())
384                         continue;
385
386                 list_del(&PAGE_LIST(page));
387                 list_add_tail(&PAGE_LIST(page), &tcd->tcd_daemon_pages);
388                 tcd->tcd_cur_daemon_pages++;
389
390                 if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
391                         LASSERT(!list_empty(&tcd->tcd_daemon_pages));
392                         page = list_entry(tcd->tcd_daemon_pages.next,
393                                           struct page, PAGE_LIST_ENTRY);
394
395                         LASSERT(page->index <= PAGE_SIZE);
396                         LASSERT(page_count(page) > 0);
397
398                         page->index = 0;
399                         list_del(&PAGE_LIST(page));
400                         page->mapping = NULL;
401                         __free_page(page);
402                         tcd->tcd_cur_daemon_pages--;
403                 }
404         }
405         spin_unlock(&pc->pc_lock);
406
407         trace_put_tcd(tcd, flags);
408 }
409
410 static void put_pages_on_daemon_list(struct page_collection *pc)
411 {
412         put_pages_on_daemon_list_on_cpu(pc);
413         smp_call_function(put_pages_on_daemon_list_on_cpu, pc, 0, 1);
414 }
415
416 void trace_debug_print(void)
417 {
418         struct page_collection pc;
419         struct list_head *pos, *tmp;
420
421         spin_lock_init(&pc.pc_lock);
422
423         collect_pages(&pc);
424         list_for_each_safe(pos, tmp, &pc.pc_pages) {
425                 struct page *page;
426                 char *p, *file, *fn;
427
428                 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
429                 LASSERT(page->index <= PAGE_SIZE);
430                 LASSERT(page_count(page) > 0);
431
432                 p = page_address(page);
433                 while (p < ((char *)page_address(page) + PAGE_SIZE)) {
434                         struct ptldebug_header *hdr;
435                         int len;
436                         hdr = (void *)p;
437                         p += sizeof(*hdr);
438                         file = p;
439                         p += strlen(file) + 1;
440                         fn = p;
441                         p += strlen(fn) + 1;
442                         len = hdr->ph_len - (p - (char *)hdr);
443
444                         print_to_console(hdr, D_EMERG, p, len, file, fn);
445                 }
446
447                 list_del(&PAGE_LIST(page));
448                 page->mapping = NULL;
449                 __free_page(page);
450         }
451 }
452
453 int tracefile_dump_all_pages(char *filename)
454 {
455         struct page_collection pc;
456         struct file *filp;
457         struct list_head *pos, *tmp;
458         mm_segment_t oldfs;
459         int rc;
460
461         down_write(&tracefile_sem);
462
463         filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600);
464         if (IS_ERR(filp)) {
465                 rc = PTR_ERR(filp);
466                 printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
467                        filename, rc);
468                 goto out;
469         }
470
471         spin_lock_init(&pc.pc_lock);
472         pc.pc_want_daemon_pages = 1;
473         collect_pages(&pc);
474         if (list_empty(&pc.pc_pages)) {
475                 rc = 0;
476                 goto close;
477         }
478
479         /* ok, for now, just write the pages.  in the future we'll be building
480          * iobufs with the pages and calling generic_direct_IO */
481         oldfs = get_fs();
482         set_fs(get_ds());
483         list_for_each_safe(pos, tmp, &pc.pc_pages) {
484                 struct page *page;
485
486                 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
487                 LASSERT(page->index <= PAGE_SIZE);
488                 LASSERT(page_count(page) > 0);
489
490                 rc = filp->f_op->write(filp, page_address(page), page->index,
491                                        &filp->f_pos);
492                 if (rc != page->index) {
493                         printk(KERN_WARNING "wanted to write %lu but wrote "
494                                "%d\n", page->index, rc);
495                         put_pages_back(&pc);
496                         break;
497                 }
498                 list_del(&PAGE_LIST(page));
499                 page->mapping = NULL;
500                 __free_page(page);
501         }
502         set_fs(oldfs);
503         rc = filp->f_op->fsync(filp, filp->f_dentry, 1);
504         if (rc)
505                 printk(KERN_ERR "sync returns %d\n", rc);
506  close:
507         filp_close(filp, 0);
508  out:
509         up_write(&tracefile_sem);
510         return rc;
511 }
512
513 void trace_flush_pages(void)
514 {
515         struct page_collection pc;
516         struct list_head *pos, *tmp;
517
518         spin_lock_init(&pc.pc_lock);
519
520         collect_pages(&pc);
521         list_for_each_safe(pos, tmp, &pc.pc_pages) {
522                 struct page *page;
523
524                 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
525                 LASSERT(page->index <= PAGE_SIZE);
526                 LASSERT(page_count(page) > 0);
527
528                 list_del(&PAGE_LIST(page));
529                 page->mapping = NULL;
530                 __free_page(page);
531         }
532 }
533
534 int trace_dk(struct file *file, const char *buffer, unsigned long count,
535              void *data)
536 {
537         char *name;
538         unsigned long off;
539         int rc;
540
541         name = kmalloc(count + 1, GFP_KERNEL);
542         if (name == NULL)
543                 return -ENOMEM;
544
545         if (copy_from_user(name, buffer, count)) {
546                 rc = -EFAULT;
547                 goto out;
548         }
549
550         if (name[0] != '/') {
551                 rc = -EINVAL;
552                 goto out;
553         }
554
555         /* be nice and strip out trailing '\n' */
556         for (off = count ; off > 2 && isspace(name[off - 1]); off--)
557                 ;
558
559         name[off] = '\0';
560         rc = tracefile_dump_all_pages(name);
561 out:
562         if (name)
563                 kfree(name);
564         return count;
565 }
566 EXPORT_SYMBOL(trace_dk);
567
568 static int tracefiled(void *arg)
569 {
570         struct page_collection pc;
571         struct tracefiled_ctl *tctl = arg;
572         struct list_head *pos, *tmp;
573         struct ptldebug_header *hdr;
574         struct file *filp;
575         struct page *page;
576         mm_segment_t oldfs;
577         int rc;
578
579         /* we're started late enough that we pick up init's fs context */
580         /* this is so broken in uml?  what on earth is going on? */
581         kportal_daemonize("ktracefiled");
582         reparent_to_init();
583
584         spin_lock_init(&pc.pc_lock);
585         complete(&tctl->tctl_start);
586
587         while (1) {
588                 wait_queue_t __wait;
589
590                 init_waitqueue_entry(&__wait, current);
591                 add_wait_queue(&tctl->tctl_waitq, &__wait);
592                 set_current_state(TASK_INTERRUPTIBLE);
593                 schedule_timeout(HZ);
594                 remove_wait_queue(&tctl->tctl_waitq, &__wait);
595
596                 if (atomic_read(&tctl->tctl_shutdown))
597                         break;
598
599                 pc.pc_want_daemon_pages = 0;
600                 collect_pages(&pc);
601                 if (list_empty(&pc.pc_pages))
602                         continue;
603
604                 filp = NULL;
605                 down_read(&tracefile_sem);
606                 if (tracefile != NULL) {
607                         filp = filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE,
608                                          0600);
609                         if (IS_ERR(filp)) {
610                                 printk("couldn't open %s: %ld\n", tracefile,
611                                        PTR_ERR(filp));
612                                 filp = NULL;
613                         }
614                 }
615                 up_read(&tracefile_sem);
616                 if (filp == NULL) {
617                         put_pages_on_daemon_list(&pc);
618                         continue;
619                 }
620
621                 oldfs = get_fs();
622                 set_fs(get_ds());
623
624                 /* mark the first header, so we can sort in chunks */
625                 page = list_entry(pc.pc_pages.next, struct page,
626                                   PAGE_LIST_ENTRY);
627                 LASSERT(page->index <= PAGE_SIZE);
628                 LASSERT(page_count(page) > 0);
629
630                 hdr = page_address(page);
631                 hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
632
633                 list_for_each_safe(pos, tmp, &pc.pc_pages) {
634                         static loff_t f_pos;
635                         page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
636                         LASSERT(page->index <= PAGE_SIZE);
637                         LASSERT(page_count(page) > 0);
638
639                         if (f_pos >= tracefile_size)
640                                 f_pos = 0;
641                         else if (f_pos > filp->f_dentry->d_inode->i_size)
642                                 f_pos = filp->f_dentry->d_inode->i_size;
643
644                         rc = filp->f_op->write(filp, page_address(page),
645                                                page->index, &f_pos);
646                         if (rc != page->index) {
647                                 printk(KERN_WARNING "wanted to write %lu but "
648                                        "wrote %d\n", page->index, rc);
649                                 put_pages_back(&pc);
650                         }
651                 }
652                 set_fs(oldfs);
653                 filp_close(filp, 0);
654
655                 put_pages_on_daemon_list(&pc);
656         }
657         complete(&tctl->tctl_stop);
658         return 0;
659 }
660
661 int trace_start_thread(void)
662 {
663         struct tracefiled_ctl *tctl = &trace_tctl;
664         int rc = 0;
665
666         down(&trace_thread_sem);
667         if (thread_running)
668                 goto out;
669
670         init_completion(&tctl->tctl_start);
671         init_completion(&tctl->tctl_stop);
672         init_waitqueue_head(&tctl->tctl_waitq);
673         atomic_set(&tctl->tctl_shutdown, 0);
674
675         if (kernel_thread(tracefiled, tctl, 0) < 0) {
676                 rc = -ECHILD;
677                 goto out;
678         }
679
680         wait_for_completion(&tctl->tctl_start);
681         thread_running = 1;
682 out:
683         up(&trace_thread_sem);
684         return rc;
685 }
686
687 void trace_stop_thread(void)
688 {
689         struct tracefiled_ctl *tctl = &trace_tctl;
690
691         down(&trace_thread_sem);
692         if (thread_running) {
693                 printk(KERN_INFO "Shutting down debug daemon thread...\n");
694                 atomic_set(&tctl->tctl_shutdown, 1);
695                 wait_for_completion(&tctl->tctl_stop);
696                 thread_running = 0;
697         }
698         up(&trace_thread_sem);
699 }
700
701 int trace_write_daemon_file(struct file *file, const char *buffer,
702                             unsigned long count, void *data)
703 {
704         char *name;
705         unsigned long off;
706         int rc;
707
708         name = kmalloc(count + 1, GFP_KERNEL);
709         if (name == NULL)
710                 return -ENOMEM;
711
712         if (copy_from_user(name, buffer, count)) {
713                 rc = -EFAULT;
714                 goto out;
715         }
716
717         /* be nice and strip out trailing '\n' */
718         for (off = count ; off > 2 && isspace(name[off - 1]); off--)
719                 ;
720
721         name[off] = '\0';
722
723         down_write(&tracefile_sem);
724         if (strcmp(name, "stop") == 0) {
725                 tracefile = NULL;
726                 trace_stop_thread();
727                 goto out_sem;
728         } else if (strncmp(name, "size=", 5) == 0) {
729                 tracefile_size = simple_strtoul(name + 5, NULL, 0);
730                 if (tracefile_size < 10 || tracefile_size > 20480)
731                         tracefile_size = TRACEFILE_SIZE;
732                 else
733                         tracefile_size <<= 20;
734                 goto out_sem;
735         }
736
737         if (name[0] != '/') {
738                 rc = -EINVAL;
739                 goto out_sem;
740         }
741
742         if (tracefile != NULL)
743                 kfree(tracefile);
744
745         tracefile = name;
746         name = NULL;
747
748         printk(KERN_INFO "Lustre: debug daemon will attempt to start writing "
749                "to %s (%lukB max)\n", tracefile, (long)(tracefile_size >> 10));
750
751         trace_start_thread();
752
753  out_sem:
754         up_write(&tracefile_sem);
755
756  out:
757         kfree(name);
758         return count;
759 }
760
761 int trace_read_daemon_file(char *page, char **start, off_t off, int count,
762                            int *eof, void *data)
763 {
764         int rc;
765
766         down_read(&tracefile_sem);
767         rc = snprintf(page, count, "%s", tracefile);
768         up_read(&tracefile_sem);
769
770         return rc;
771 }
772
773 int trace_write_debug_mb(struct file *file, const char *buffer,
774                          unsigned long count, void *data)
775 {
776         char string[32];
777         int i;
778         unsigned max;
779
780         if (count >= sizeof(string)) {
781                 printk(KERN_ERR "Lustre: value too large (length %lu bytes)\n",
782                        count);
783                 return -EOVERFLOW;
784         }
785
786         if (copy_from_user(string, buffer, count))
787                 return -EFAULT;
788
789         max = simple_strtoul(string, NULL, 0);
790         if (max == 0)
791                 return -EINVAL;
792
793         if (max > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5 || max >= 512) {
794                 printk(KERN_ERR "Lustre: Refusing to set debug buffer size to "
795                        "%dMB, which is more than 80%% of available RAM (%lu)\n",
796                        max, (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5);
797                 return -EINVAL;
798         }
799
800         max /= smp_num_cpus;
801
802         for (i = 0; i < NR_CPUS; i++) {
803                 struct trace_cpu_data *tcd;
804                 tcd = &trace_data[i].tcd;
805                 tcd->tcd_max_pages = max << (20 - PAGE_SHIFT);
806         }
807         return count;
808 }
809
810 int trace_read_debug_mb(char *page, char **start, off_t off, int count,
811                         int *eof, void *data)
812 {
813         struct trace_cpu_data *tcd;
814         unsigned long flags;
815         int rc;
816
817         tcd = trace_get_tcd(flags);
818         rc = snprintf(page, count, "%lu\n",
819                       (tcd->tcd_max_pages >> (20 - PAGE_SHIFT)) * smp_num_cpus);
820         trace_put_tcd(tcd, flags);
821
822         return rc;
823 }
824
825 int tracefile_init(void)
826 {
827         struct trace_cpu_data *tcd;
828         int i;
829
830         for (i = 0; i < NR_CPUS; i++) {
831                 tcd = &trace_data[i].tcd;
832                 INIT_LIST_HEAD(&tcd->tcd_pages);
833                 INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
834                 tcd->tcd_cur_pages = 0;
835                 tcd->tcd_cur_daemon_pages = 0;
836                 tcd->tcd_max_pages = TCD_MAX_PAGES;
837                 tcd->tcd_shutting_down = 0;
838         }
839         return 0;
840 }
841
842 static void trace_cleanup_on_cpu(void *info)
843 {
844         struct trace_cpu_data *tcd;
845         struct list_head *pos, *tmp;
846         unsigned long flags;
847
848         tcd = trace_get_tcd(flags);
849
850         tcd->tcd_shutting_down = 1;
851
852         list_for_each_safe(pos, tmp, &tcd->tcd_pages) {
853                 struct page *page;
854
855                 page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
856                 LASSERT(page->index <= PAGE_SIZE);
857                 LASSERT(page_count(page) > 0);
858
859                 list_del(&PAGE_LIST(page));
860                 page->mapping = NULL;
861                 __free_page(page);
862         }
863         tcd->tcd_cur_pages = 0;
864
865         trace_put_tcd(tcd, flags);
866 }
867
868 static void trace_cleanup(void)
869 {
870         struct page_collection pc;
871
872         INIT_LIST_HEAD(&pc.pc_pages);
873         spin_lock_init(&pc.pc_lock);
874
875         trace_cleanup_on_cpu(&pc);
876         smp_call_function(trace_cleanup_on_cpu, &pc, 0, 1);
877 }
878
879 void tracefile_exit(void)
880 {
881         trace_stop_thread();
882         trace_cleanup();
883 }