Whamcloud - gitweb
LU-17744 ldiskfs: mballoc stats fixes
[fs/lustre-release.git] / libcfs / libcfs / tracefile.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  *
31  * libcfs/libcfs/tracefile.c
32  *
33  * Author: Zach Brown <zab@clusterfs.com>
34  * Author: Phil Schwan <phil@clusterfs.com>
35  */
36
37 #define DEBUG_SUBSYSTEM S_LNET
38 #include "tracefile.h"
39
40 #include <linux/ctype.h>
41 #include <linux/fs.h>
42 #include <linux/kthread.h>
43 #include <linux/pagemap.h>
44 #include <linux/poll.h>
45 #include <linux/uaccess.h>
46 #include <libcfs/linux/linux-fs.h>
47 #include <libcfs/libcfs.h>
48
49
50 enum cfs_trace_buf_type {
51         CFS_TCD_TYPE_PROC = 0,
52         CFS_TCD_TYPE_SOFTIRQ,
53         CFS_TCD_TYPE_IRQ,
54         CFS_TCD_TYPE_CNT
55 };
56
57 union cfs_trace_data_union (*cfs_trace_data[CFS_TCD_TYPE_CNT])[NR_CPUS] __cacheline_aligned;
58
59 /* Pages containing records already processed by daemon.
60  * Link via ->lru, use size in ->private
61  */
62 static LIST_HEAD(daemon_pages);
63 static long daemon_pages_count;
64 static long daemon_pages_max;
65
66 char cfs_tracefile[TRACEFILE_NAME_SIZE];
67 long long cfs_tracefile_size = CFS_TRACEFILE_SIZE;
68
69 struct task_struct *tctl_task;
70
71 static atomic_t cfs_tage_allocated = ATOMIC_INIT(0);
72 static DECLARE_RWSEM(cfs_tracefile_sem);
73
74 /* trace file lock routines */
75 /* The walking argument indicates the locking comes from all tcd types
76  * iterator and we must lock it and dissable local irqs to avoid deadlocks
77  * with other interrupt locks that might be happening. See LU-1311
78  * for details.
79  */
80 static int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
81         __acquires(&tcd->tcd_lock)
82 {
83         __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_CNT);
84         if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
85                 spin_lock_irqsave(&tcd->tcd_lock, tcd->tcd_lock_flags);
86         else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
87                 spin_lock_bh(&tcd->tcd_lock);
88         else if (unlikely(walking))
89                 spin_lock_irq(&tcd->tcd_lock);
90         else
91                 spin_lock(&tcd->tcd_lock);
92         return 1;
93 }
94
95 static void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
96         __releases(&tcd->tcd_lock)
97 {
98         __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_CNT);
99         if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
100                 spin_unlock_irqrestore(&tcd->tcd_lock, tcd->tcd_lock_flags);
101         else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
102                 spin_unlock_bh(&tcd->tcd_lock);
103         else if (unlikely(walking))
104                 spin_unlock_irq(&tcd->tcd_lock);
105         else
106                 spin_unlock(&tcd->tcd_lock);
107 }
108
109 #define cfs_tcd_for_each(tcd, i, j)                                     \
110         for (i = 0; i < CFS_TCD_TYPE_CNT && cfs_trace_data[i]; i++)     \
111                 for (j = 0, ((tcd) = &(*cfs_trace_data[i])[j].tcd);     \
112                      j < num_possible_cpus();                           \
113                      j++, (tcd) = &(*cfs_trace_data[i])[j].tcd)
114
115 #define cfs_tcd_for_each_type_lock(tcd, i, cpu)                         \
116         for (i = 0; i < CFS_TCD_TYPE_CNT && cfs_trace_data[i] &&        \
117              (tcd = &(*cfs_trace_data[i])[cpu].tcd) &&                  \
118              cfs_trace_lock_tcd(tcd, 1); cfs_trace_unlock_tcd(tcd, 1), i++)
119
120 static enum cfs_trace_buf_type cfs_trace_buf_idx_get(void)
121 {
122         if (in_irq())
123                 return CFS_TCD_TYPE_IRQ;
124         if (in_softirq())
125                 return CFS_TCD_TYPE_SOFTIRQ;
126         return CFS_TCD_TYPE_PROC;
127 }
128
129 static inline struct cfs_trace_cpu_data *
130 cfs_trace_get_tcd(void)
131 {
132         struct cfs_trace_cpu_data *tcd =
133                 &(*cfs_trace_data[cfs_trace_buf_idx_get()])[get_cpu()].tcd;
134
135         cfs_trace_lock_tcd(tcd, 0);
136
137         return tcd;
138 }
139
140 static inline void cfs_trace_put_tcd(struct cfs_trace_cpu_data *tcd)
141 {
142         cfs_trace_unlock_tcd(tcd, 0);
143
144         put_cpu();
145 }
146
147 static inline struct cfs_trace_page *
148 cfs_tage_from_list(struct list_head *list)
149 {
150         return list_entry(list, struct cfs_trace_page, linkage);
151 }
152
153 static struct cfs_trace_page *cfs_tage_alloc(gfp_t gfp)
154 {
155         struct page            *page;
156         struct cfs_trace_page *tage;
157
158         /* My caller is trying to free memory */
159         if (!in_interrupt() && (current->flags & PF_MEMALLOC))
160                 return NULL;
161
162         /*
163          * Don't spam console with allocation failures: they will be reported
164          * by upper layer anyway.
165          */
166         gfp |= __GFP_NOWARN;
167         page = alloc_page(gfp);
168         if (page == NULL)
169                 return NULL;
170
171         tage = kmalloc(sizeof(*tage), gfp);
172         if (tage == NULL) {
173                 __free_page(page);
174                 return NULL;
175         }
176
177         tage->page = page;
178         atomic_inc(&cfs_tage_allocated);
179         return tage;
180 }
181
182 static void cfs_tage_free(struct cfs_trace_page *tage)
183 {
184         __LASSERT(tage != NULL);
185         __LASSERT(tage->page != NULL);
186
187         __free_page(tage->page);
188         kfree(tage);
189         atomic_dec(&cfs_tage_allocated);
190 }
191
192 static void cfs_tage_to_tail(struct cfs_trace_page *tage,
193                              struct list_head *queue)
194 {
195         __LASSERT(tage != NULL);
196         __LASSERT(queue != NULL);
197
198         list_move_tail(&tage->linkage, queue);
199 }
200
201 /* return a page that has 'len' bytes left at the end */
202 static struct cfs_trace_page *
203 cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len)
204 {
205         struct cfs_trace_page *tage;
206         struct task_struct *tsk;
207
208         if (tcd->tcd_cur_pages > 0) {
209                 __LASSERT(!list_empty(&tcd->tcd_pages));
210                 tage = cfs_tage_from_list(tcd->tcd_pages.prev);
211                 if (tage->used + len <= PAGE_SIZE)
212                         return tage;
213         }
214
215         if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
216                 if (tcd->tcd_cur_stock_pages > 0) {
217                         tage = cfs_tage_from_list(tcd->tcd_stock_pages.prev);
218                         --tcd->tcd_cur_stock_pages;
219                         list_del_init(&tage->linkage);
220                 } else {
221                         tage = cfs_tage_alloc(GFP_ATOMIC);
222                         if (unlikely(tage == NULL)) {
223                                 if ((!(current->flags & PF_MEMALLOC) ||
224                                      in_interrupt()) && printk_ratelimit())
225                                         pr_warn("Lustre: cannot allocate a tage (%ld)\n",
226                                                 tcd->tcd_cur_pages);
227                                 return NULL;
228                         }
229                 }
230
231                 tage->used = 0;
232                 tage->cpu = smp_processor_id();
233                 tage->type = tcd->tcd_type;
234                 list_add_tail(&tage->linkage, &tcd->tcd_pages);
235                 tcd->tcd_cur_pages++;
236
237                 tsk = tctl_task;
238                 if (tcd->tcd_cur_pages > 8 && tsk)
239                         /*
240                          * wake up tracefiled to process some pages.
241                          */
242                         wake_up_process(tsk);
243
244                 return tage;
245         }
246         return NULL;
247 }
248
249 static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd)
250 {
251         int pgcount = tcd->tcd_cur_pages / 10;
252         struct page_collection pc;
253         struct cfs_trace_page *tage;
254         struct cfs_trace_page *tmp;
255
256         /*
257          * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
258          * from here: this will lead to infinite recursion.
259          */
260
261         if (printk_ratelimit())
262                 pr_warn("Lustre: debug daemon buffer overflowed; discarding 10%% of pages (%d of %ld)\n",
263                         pgcount + 1, tcd->tcd_cur_pages);
264
265         INIT_LIST_HEAD(&pc.pc_pages);
266
267         list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
268                 if (pgcount-- == 0)
269                         break;
270
271                 list_del(&tage->linkage);
272                 cfs_tage_free(tage);
273                 tcd->tcd_cur_pages--;
274         }
275 }
276
277 /* return a page that has 'len' bytes left at the end */
278 static struct cfs_trace_page *cfs_trace_get_tage(struct cfs_trace_cpu_data *tcd,
279                                                  unsigned long len)
280 {
281         struct cfs_trace_page *tage;
282
283         /*
284          * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
285          * from here: this will lead to infinite recursion.
286          */
287
288         if (len > PAGE_SIZE) {
289                 pr_err("LustreError: cowardly refusing to write %lu bytes in a page\n",
290                        len);
291                 return NULL;
292         }
293
294         tage = cfs_trace_get_tage_try(tcd, len);
295         if (tage != NULL)
296                 return tage;
297         if (tctl_task)
298                 cfs_tcd_shrink(tcd);
299         if (tcd->tcd_cur_pages > 0) {
300                 tage = cfs_tage_from_list(tcd->tcd_pages.next);
301                 tage->used = 0;
302                 cfs_tage_to_tail(tage, &tcd->tcd_pages);
303         }
304         return tage;
305 }
306
307 static void cfs_set_ptldebug_header(struct ptldebug_header *header,
308                                     struct libcfs_debug_msg_data *msgdata)
309 {
310         struct timespec64 ts;
311
312         ktime_get_real_ts64(&ts);
313
314         header->ph_subsys = msgdata->msg_subsys;
315         header->ph_mask = msgdata->msg_mask;
316         header->ph_cpu_id = smp_processor_id();
317         header->ph_type = cfs_trace_buf_idx_get();
318         /* y2038 safe since all user space treats this as unsigned, but
319          * will overflow in 2106
320          */
321         header->ph_sec = (u32)ts.tv_sec;
322         header->ph_usec = ts.tv_nsec / NSEC_PER_USEC;
323         header->ph_pid = current->pid;
324         header->ph_line_num = msgdata->msg_line;
325         header->ph_extern_pid = 0;
326 }
327
328 static void cfs_vprint_to_console(struct ptldebug_header *hdr,
329                                   struct va_format *vaf, const char *file,
330                                   const char *fn)
331 {
332         char *prefix = "Lustre";
333         int mask = hdr->ph_mask;
334
335         if (hdr->ph_subsys == S_LND || hdr->ph_subsys == S_LNET)
336                 prefix = "LNet";
337
338         if (mask & D_CONSOLE) {
339                 if (mask & D_EMERG)
340                         pr_emerg("%sError: %pV", prefix, vaf);
341                 else if (mask & D_ERROR)
342                         pr_err("%sError: %pV", prefix, vaf);
343                 else if (mask & D_WARNING)
344                         pr_warn("%s: %pV", prefix, vaf);
345                 else if (mask & libcfs_printk)
346                         pr_info("%s: %pV", prefix, vaf);
347         } else {
348                 if (mask & D_EMERG)
349                         pr_emerg("%sError: %d:%d:(%s:%d:%s()) %pV", prefix,
350                                  hdr->ph_pid, hdr->ph_extern_pid, file,
351                                  hdr->ph_line_num, fn, vaf);
352                 else if (mask & D_ERROR)
353                         pr_err("%sError: %d:%d:(%s:%d:%s()) %pV", prefix,
354                                hdr->ph_pid, hdr->ph_extern_pid, file,
355                                hdr->ph_line_num, fn, vaf);
356                 else if (mask & D_WARNING)
357                         pr_warn("%s: %d:%d:(%s:%d:%s()) %pV", prefix,
358                                 hdr->ph_pid, hdr->ph_extern_pid, file,
359                                 hdr->ph_line_num, fn, vaf);
360                 else if (mask & (D_CONSOLE | libcfs_printk))
361                         pr_info("%s: %pV", prefix, vaf);
362         }
363 }
364
365 static void cfs_print_to_console(struct ptldebug_header *hdr, const char *file,
366                                  const char *fn, const char *fmt, ...)
367 {
368         struct va_format vaf;
369         va_list args;
370
371         va_start(args, fmt);
372         vaf.fmt = fmt;
373         vaf.va = &args;
374         cfs_vprint_to_console(hdr, &vaf, file, fn);
375 }
376
377 #define DEBUG_FORMAT_BUFFER_SIZE        (256 - sizeof(unsigned long))
378
379 struct debug_format_buffer {
380         unsigned long dfb_flags;
381         char dfb_buf[DEBUG_FORMAT_BUFFER_SIZE];
382 };
383 struct pcpu_format_pool {
384         struct debug_format_buffer pf_dfb[4096 / DEBUG_FORMAT_BUFFER_SIZE];
385 };
386
387 enum pool_flags {
388         PF_INUSE,
389 };
390
391 static struct pcpu_format_pool __percpu *debug_format_pool;
392
393 int debug_format_buffer_alloc_buffers(void)
394 {
395         struct pcpu_format_pool __percpu *obj;
396         struct pcpu_format_pool *pbuf;
397         int cpu;
398
399         obj = alloc_percpu(struct pcpu_format_pool);
400         if (!obj)
401                 return -ENOMEM;
402         for_each_possible_cpu(cpu) {
403                 pbuf = per_cpu_ptr(obj, cpu);
404                 memset(pbuf, 0, sizeof(*pbuf));
405         }
406         debug_format_pool = obj;
407
408         return 0;
409 }
410 EXPORT_SYMBOL(debug_format_buffer_alloc_buffers);
411
412 void debug_format_buffer_free_buffers(void)
413 {
414         struct pcpu_format_pool __percpu *tmp = debug_format_pool;
415         struct pcpu_format_pool *pbuf;
416         int cpu;
417         int i;
418
419         if (!debug_format_pool)
420                 return;
421
422         debug_format_pool = NULL;
423         synchronize_rcu();
424
425         for_each_possible_cpu(cpu) {
426                 pbuf = per_cpu_ptr(tmp, cpu);
427                 for (i = 0; i < ARRAY_SIZE(pbuf->pf_dfb); i++)
428                         set_bit(PF_INUSE, &pbuf->pf_dfb[i].dfb_flags);
429         }
430         free_percpu(tmp);
431 }
432 EXPORT_SYMBOL(debug_format_buffer_free_buffers);
433
434 bool libcfs_debug_raw_pointers;
435
436 bool get_debug_raw_pointers(void)
437 {
438         return libcfs_debug_raw_pointers;
439 }
440 EXPORT_SYMBOL(get_debug_raw_pointers);
441
442 void set_debug_raw_pointers(bool value)
443 {
444         libcfs_debug_raw_pointers = value;
445 }
446 EXPORT_SYMBOL(set_debug_raw_pointers);
447
448 #ifndef raw_cpu_ptr
449 #define raw_cpu_ptr(p)  this_cpu_ptr(p)
450 #endif
451
452 static struct debug_format_buffer *debug_format_buffer_get_locked(void)
453 {
454         struct debug_format_buffer *dfb = NULL;
455         struct pcpu_format_pool *pbuf;
456         int i;
457
458         if (!debug_format_pool)
459                 return NULL;
460
461         pbuf = raw_cpu_ptr(debug_format_pool);
462         for (i = 0; i < ARRAY_SIZE(pbuf->pf_dfb); i++) {
463                 if (!test_and_set_bit(PF_INUSE, &pbuf->pf_dfb[i].dfb_flags)) {
464                         dfb = &pbuf->pf_dfb[i];
465                         break;
466                 }
467         }
468         return dfb;
469 }
470
471 static void debug_format_buffer_put_locked(struct debug_format_buffer *dfb)
472 {
473         if (!debug_format_pool || !dfb)
474                 return;
475         clear_bit(PF_INUSE, &dfb->dfb_flags);
476 }
477
478 /* return number of %p to %px replacements or < 0 on error */
479 static bool rewrite_format(const char *fmt, size_t nfsz, char *new_fmt)
480 {
481         const char *p = fmt;
482         char *q = new_fmt;
483         int written = 0;
484         int unhashed = 0;
485
486         if (WARN_ON_ONCE(!fmt))
487                 return 0;
488
489         p = fmt;
490         q = new_fmt;
491         while (*p) {
492                 if (written + 2 >= nfsz)
493                         return false;
494
495                 *q++ = *p++;
496                 written++;
497
498                 /* Replace %p with %px */
499                 if (p[-1] == '%') {
500                         if (p[0] == '%') {
501                                 if (written + 2 >= nfsz)
502                                         return false;
503                                 *q++ = *p++;
504                                 written++;
505                         } else if (p[0] == 'p' && !isalnum(p[1])) {
506                                 if (written + 3 >= nfsz)
507                                         return false;
508                                 *q++ = *p++;
509                                 *q++ = 'x';
510                                 written += 2;
511                                 unhashed++;
512                         }
513                 }
514         }
515         *q = '\0';
516
517         return unhashed > 0;
518 }
519
520 /*
521  * @fmt: caller provided format string
522  * @m: if non-null points to per-cpu object
523  *
524  * return result is format string to use, it will be either:
525  *   fmt: when no changes need to be made to original format
526  *   *m->dfb_buf: when percpu pre-allocated is sufficient to hold updated format
527  */
528 static inline const char *debug_format(const char *fmt,
529                                        struct debug_format_buffer **m)
530 {
531         struct debug_format_buffer *dfb_fmt;
532
533         *m = NULL;
534         if (likely(!libcfs_debug_raw_pointers))
535                 return fmt;
536         if (!strstr(fmt, "%p"))
537                 return fmt;
538
539         /* try to rewrite format into buf */
540         dfb_fmt = debug_format_buffer_get_locked();
541         if (dfb_fmt) {
542                 size_t len = sizeof(dfb_fmt->dfb_buf) - 1;
543
544                 if (rewrite_format(fmt, len, dfb_fmt->dfb_buf)) {
545                         *m = dfb_fmt;
546                         return dfb_fmt->dfb_buf;
547                 }
548                 debug_format_buffer_put_locked(dfb_fmt);
549         }
550
551         return fmt;
552 }
553
554 void libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
555                       const char *format, ...)
556 {
557         struct cfs_trace_cpu_data *tcd = NULL;
558         struct ptldebug_header header = {0};
559         struct cfs_trace_page *tage;
560         /* string_buf is used only if tcd != NULL, and is always set then */
561         char *string_buf = NULL;
562         char *debug_buf;
563         int known_size;
564         int needed = 85; /* seeded with average message length */
565         int max_nob;
566         va_list ap;
567         int retry;
568         const char *file = msgdata->msg_file;
569         const char *fn = msgdata->msg_fn;
570         struct cfs_debug_limit_state *cdls = msgdata->msg_cdls;
571         struct debug_format_buffer *dfb = NULL;
572
573         format = debug_format(format, &dfb);
574
575         if (strchr(file, '/'))
576                 file = strrchr(file, '/') + 1;
577
578         tcd = cfs_trace_get_tcd();
579
580         /* cfs_trace_get_tcd() grabs a lock, which disables preemption and
581          * pins us to a particular CPU.  This avoids an smp_processor_id()
582          * warning on Linux when debugging is enabled.
583          */
584         cfs_set_ptldebug_header(&header, msgdata);
585
586         if (!tcd)                /* arch may not log in IRQ context */
587                 goto console;
588
589         if (tcd->tcd_cur_pages == 0)
590                 header.ph_flags |= PH_FLAG_FIRST_RECORD;
591
592         if (tcd->tcd_shutting_down) {
593                 cfs_trace_put_tcd(tcd);
594                 tcd = NULL;
595                 goto console;
596         }
597
598         known_size = strlen(file) + 1;
599         if (fn)
600                 known_size += strlen(fn) + 1;
601
602         if (libcfs_debug_binary)
603                 known_size += sizeof(header);
604
605         /*
606          * May perform an additional pass to update 'needed' and increase
607          * tage buffer size to match vsnprintf reported size required
608          * On the second pass (retry=1) use vscnprintf [which returns
609          * number of bytes written not including the terminating nul]
610          * to clarify `needed` is used as number of bytes written
611          * for the remainder of this function
612          */
613         for (retry = 0; retry < 2; retry++) {
614                 tage = cfs_trace_get_tage(tcd, needed + known_size + 1);
615                 if (!tage) {
616                         if (needed + known_size > PAGE_SIZE)
617                                 header.ph_mask |= D_ERROR;
618
619                         cfs_trace_put_tcd(tcd);
620                         tcd = NULL;
621                         goto console;
622                 }
623
624                 string_buf = (char *)page_address(tage->page) +
625                              tage->used + known_size;
626
627                 max_nob = PAGE_SIZE - tage->used - known_size;
628                 if (max_nob <= 0) {
629                         pr_emerg("LustreError: negative max_nob: %d\n",
630                                  max_nob);
631                         header.ph_mask |= D_ERROR;
632                         cfs_trace_put_tcd(tcd);
633                         tcd = NULL;
634                         goto console;
635                 }
636
637                 va_start(ap, format);
638                 if (retry)
639                         needed = vscnprintf(string_buf, max_nob, format, ap);
640                 else
641                         needed = vsnprintf(string_buf, max_nob, format, ap);
642                 va_end(ap);
643
644                 if (needed < max_nob) /* well. printing ok.. */
645                         break;
646         }
647
648         /* `needed` is actual bytes written to string_buf */
649         if (unlikely(*(string_buf + needed - 1) != '\n')) {
650                 pr_info("Lustre: format at %s:%d:%s doesn't end in newline\n",
651                         file, header.ph_line_num, fn);
652         }
653
654         header.ph_len = known_size + needed;
655         debug_buf = (char *)page_address(tage->page) + tage->used;
656
657         if (libcfs_debug_binary) {
658                 memcpy(debug_buf, &header, sizeof(header));
659                 tage->used += sizeof(header);
660                 debug_buf += sizeof(header);
661         }
662
663         snprintf(debug_buf, PAGE_SIZE - tage->used, "%s", file);
664         tage->used += strlen(file) + 1;
665         debug_buf += strlen(file) + 1;
666
667         if (fn) {
668                 snprintf(debug_buf, PAGE_SIZE - tage->used, "%s", fn);
669                 tage->used += strlen(fn) + 1;
670                 debug_buf += strlen(fn) + 1;
671         }
672
673         __LASSERT(debug_buf == string_buf);
674
675         tage->used += needed;
676         __LASSERT(tage->used <= PAGE_SIZE);
677
678 console:
679         if ((header.ph_mask & libcfs_printk) == 0) {
680                 /* no console output requested */
681                 if (tcd != NULL)
682                         cfs_trace_put_tcd(tcd);
683                 goto out;
684         }
685
686         if (cdls != NULL) {
687                 if (libcfs_console_ratelimit &&
688                     cdls->cdls_next != 0 &&     /* not first time ever */
689                     time_before(jiffies, cdls->cdls_next)) {
690                         /* skipping a console message */
691                         cdls->cdls_count++;
692                         if (tcd != NULL)
693                                 cfs_trace_put_tcd(tcd);
694                         goto out;
695                 }
696
697                 if (time_after(jiffies, cdls->cdls_next +
698                                         libcfs_console_max_delay +
699                                         cfs_time_seconds(10))) {
700                         /* last timeout was a long time ago */
701                         cdls->cdls_delay /= libcfs_console_backoff * 4;
702                 } else {
703                         cdls->cdls_delay *= libcfs_console_backoff;
704                 }
705
706                 if (cdls->cdls_delay < libcfs_console_min_delay)
707                         cdls->cdls_delay = libcfs_console_min_delay;
708                 else if (cdls->cdls_delay > libcfs_console_max_delay)
709                         cdls->cdls_delay = libcfs_console_max_delay;
710
711                 /* ensure cdls_next is never zero after it's been seen */
712                 cdls->cdls_next = (jiffies + cdls->cdls_delay) | 1;
713         }
714
715         if (tcd) {
716                 cfs_print_to_console(&header, file, fn, "%s", string_buf);
717                 cfs_trace_put_tcd(tcd);
718         } else {
719                 struct va_format vaf;
720
721                 va_start(ap, format);
722                 vaf.fmt = format;
723                 vaf.va = &ap;
724                 cfs_vprint_to_console(&header, &vaf, file, fn);
725                 va_end(ap);
726         }
727
728         if (cdls != NULL && cdls->cdls_count != 0) {
729                 cfs_print_to_console(&header, file, fn,
730                                      "Skipped %d previous similar message%s\n",
731                                      cdls->cdls_count,
732                                      (cdls->cdls_count > 1) ? "s" : "");
733
734                 cdls->cdls_count = 0;
735         }
736 out:
737         debug_format_buffer_put_locked(dfb);
738 }
739 EXPORT_SYMBOL(libcfs_debug_msg);
740
741 void
742 cfs_trace_assertion_failed(const char *str,
743                            struct libcfs_debug_msg_data *msgdata)
744 {
745         struct ptldebug_header hdr;
746
747         libcfs_panic_in_progress = 1;
748         libcfs_catastrophe = 1;
749         smp_mb();
750
751         cfs_set_ptldebug_header(&hdr, msgdata);
752         hdr.ph_mask = D_EMERG;
753
754         cfs_print_to_console(&hdr, msgdata->msg_file, msgdata->msg_fn,
755                              "%s", str);
756
757         panic("Lustre debug assertion failure\n");
758
759         /* not reached */
760 }
761
762 static void
763 panic_collect_pages(struct page_collection *pc)
764 {
765         /* Do the collect_pages job on a single CPU: assumes that all other
766          * CPUs have been stopped during a panic.  If this isn't true for some
767          * arch, this will have to be implemented separately in each arch.  */
768         int                        i;
769         int                        j;
770         struct cfs_trace_cpu_data *tcd;
771
772         INIT_LIST_HEAD(&pc->pc_pages);
773
774         cfs_tcd_for_each(tcd, i, j) {
775                 list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
776                 tcd->tcd_cur_pages = 0;
777         }
778 }
779
780 static void collect_pages_on_all_cpus(struct page_collection *pc)
781 {
782         struct cfs_trace_cpu_data *tcd;
783         int i, cpu;
784
785         for_each_possible_cpu(cpu) {
786                 cfs_tcd_for_each_type_lock(tcd, i, cpu) {
787                         list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
788                         tcd->tcd_cur_pages = 0;
789                 }
790         }
791 }
792
793 static void collect_pages(struct page_collection *pc)
794 {
795         INIT_LIST_HEAD(&pc->pc_pages);
796
797         if (libcfs_panic_in_progress)
798                 panic_collect_pages(pc);
799         else
800                 collect_pages_on_all_cpus(pc);
801 }
802
803 static void put_pages_back_on_all_cpus(struct page_collection *pc)
804 {
805         struct cfs_trace_cpu_data *tcd;
806         struct list_head *cur_head;
807         struct cfs_trace_page *tage;
808         struct cfs_trace_page *tmp;
809         int i, cpu;
810
811         for_each_possible_cpu(cpu) {
812                 cfs_tcd_for_each_type_lock(tcd, i, cpu) {
813                         cur_head = tcd->tcd_pages.next;
814
815                         list_for_each_entry_safe(tage, tmp, &pc->pc_pages,
816                                                  linkage) {
817
818                                 __LASSERT_TAGE_INVARIANT(tage);
819
820                                 if (tage->cpu != cpu || tage->type != i)
821                                         continue;
822
823                                 cfs_tage_to_tail(tage, cur_head);
824                                 tcd->tcd_cur_pages++;
825                         }
826                 }
827         }
828 }
829
830 static void put_pages_back(struct page_collection *pc)
831 {
832         if (!libcfs_panic_in_progress)
833                 put_pages_back_on_all_cpus(pc);
834 }
835
836 #ifdef LNET_DUMP_ON_PANIC
837 void cfs_trace_debug_print(void)
838 {
839         struct page_collection pc;
840         struct cfs_trace_page *tage;
841         struct cfs_trace_page *tmp;
842         struct page *page;
843
844         collect_pages(&pc);
845         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
846                 char *p, *file, *fn;
847
848                 __LASSERT_TAGE_INVARIANT(tage);
849
850                 page = tage->page;
851                 p = page_address(page);
852                 while (p < ((char *)page_address(page) + tage->used)) {
853                         struct ptldebug_header *hdr;
854                         int len;
855
856                         hdr = (void *)p;
857                         p += sizeof(*hdr);
858                         file = p;
859                         p += strlen(file) + 1;
860                         fn = p;
861                         p += strlen(fn) + 1;
862                         len = hdr->ph_len - (int)(p - (char *)hdr);
863
864                         cfs_print_to_console(hdr, D_EMERG, file, fn,
865                                              "%.*s", len, p);
866
867                         p += len;
868                 }
869
870                 list_del(&tage->linkage);
871                 cfs_tage_free(tage);
872         }
873         down_write(&cfs_tracefile_sem);
874         while ((page = list_first_entry_or_null(&daemon_pages,
875                                                 struct page, lru)) != NULL) {
876                 char *p, *file, *fn;
877
878                 p = page_address(page);
879                 while (p < ((char *)page_address(page) + page->private)) {
880                         struct ptldebug_header *hdr;
881                         int len;
882
883                         hdr = (void *)p;
884                         p += sizeof(*hdr);
885                         file = p;
886                         p += strlen(file) + 1;
887                         fn = p;
888                         p += strlen(fn) + 1;
889                         len = hdr->ph_len - (int)(p - (char *)hdr);
890
891                         cfs_print_to_console(hdr, D_EMERG, file, fn,
892                                              "%.*s", len, p);
893
894                         p += len;
895                 }
896                 list_del_init(&page->lru);
897                 daemon_pages_count -= 1;
898                 put_page(page);
899         }
900         up_write(&cfs_tracefile_sem);
901 }
902 #endif /* LNET_DUMP_ON_PANIC */
903
904 int cfs_tracefile_dump_all_pages(char *filename)
905 {
906         struct page_collection pc;
907         struct file *filp;
908         struct cfs_trace_page *tage;
909         struct cfs_trace_page *tmp;
910         char *buf;
911         struct page *page;
912         int rc;
913
914         down_write(&cfs_tracefile_sem);
915
916         filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600);
917         if (IS_ERR(filp)) {
918                 rc = PTR_ERR(filp);
919                 filp = NULL;
920                 pr_err("LustreError: can't open %s for dump: rc = %d\n",
921                        filename, rc);
922                 goto out;
923         }
924
925         collect_pages(&pc);
926         if (list_empty(&pc.pc_pages)) {
927                 rc = 0;
928                 goto close;
929         }
930
931         /* ok, for now, just write the pages.  in the future we'll be building
932          * iobufs with the pages and calling generic_direct_IO */
933         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
934
935                 __LASSERT_TAGE_INVARIANT(tage);
936
937                 buf = kmap(tage->page);
938                 rc = cfs_kernel_write(filp, buf, tage->used, &filp->f_pos);
939                 kunmap(tage->page);
940                 if (rc != (int)tage->used) {
941                         pr_warn("Lustre: wanted to write %u but wrote %d\n",
942                                 tage->used, rc);
943                         put_pages_back(&pc);
944                         __LASSERT(list_empty(&pc.pc_pages));
945                         break;
946                 }
947                 list_del(&tage->linkage);
948                 cfs_tage_free(tage);
949         }
950         while ((page = list_first_entry_or_null(&daemon_pages,
951                                                 struct page, lru)) != NULL) {
952                 buf = page_address(page);
953                 rc = cfs_kernel_write(filp, buf, page->private, &filp->f_pos);
954                 if (rc != (int)page->private) {
955                         pr_warn("Lustre: wanted to write %u but wrote %d\n",
956                                 (int)page->private, rc);
957                         break;
958                 }
959                 list_del(&page->lru);
960                 daemon_pages_count -= 1;
961                 put_page(page);
962         }
963         rc = vfs_fsync_range(filp, 0, LLONG_MAX, 1);
964         if (rc)
965                 pr_err("LustreError: sync returns: rc = %d\n", rc);
966 close:
967         filp_close(filp, NULL);
968 out:
969         up_write(&cfs_tracefile_sem);
970         return rc;
971 }
972
973 void cfs_trace_flush_pages(void)
974 {
975         struct page_collection pc;
976         struct cfs_trace_page *tage;
977         struct page *page;
978
979         collect_pages(&pc);
980         while (!list_empty(&pc.pc_pages)) {
981                 tage = list_first_entry(&pc.pc_pages,
982                                         struct cfs_trace_page, linkage);
983                 __LASSERT_TAGE_INVARIANT(tage);
984
985                 list_del(&tage->linkage);
986                 cfs_tage_free(tage);
987         }
988
989         down_write(&cfs_tracefile_sem);
990         while ((page = list_first_entry_or_null(&daemon_pages,
991                                                 struct page, lru)) != NULL) {
992                 list_del(&page->lru);
993                 daemon_pages_count -= 1;
994                 put_page(page);
995         }
996         up_write(&cfs_tracefile_sem);
997 }
998
999 int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
1000                              const char *knl_buffer, char *append)
1001 {
1002         /* NB if 'append' != NULL, it's a single character to append to the
1003          * copied out string - usually "\n", for /proc entries and "" (i.e. a
1004          * terminating zero byte) for sysctl entries */
1005         int   nob = strlen(knl_buffer);
1006
1007         if (nob > usr_buffer_nob)
1008                 nob = usr_buffer_nob;
1009
1010         if (copy_to_user(usr_buffer, knl_buffer, nob))
1011                 return -EFAULT;
1012
1013         if (append != NULL && nob < usr_buffer_nob) {
1014                 if (copy_to_user(usr_buffer + nob, append, 1))
1015                         return -EFAULT;
1016
1017                 nob++;
1018         }
1019
1020         return nob;
1021 }
1022 EXPORT_SYMBOL(cfs_trace_copyout_string);
1023
1024 int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob)
1025 {
1026         char *str;
1027         char *path;
1028         int rc;
1029
1030         str = memdup_user_nul(usr_str, usr_str_nob);
1031         if (IS_ERR(str))
1032                 return PTR_ERR(str);
1033
1034         path = strim(str);
1035         if (path[0] != '/')
1036                 rc = -EINVAL;
1037         else
1038                 rc = cfs_tracefile_dump_all_pages(path);
1039         kfree(str);
1040
1041         return rc;
1042 }
1043
1044 int cfs_trace_daemon_command(char *str)
1045 {
1046         int       rc = 0;
1047
1048         down_write(&cfs_tracefile_sem);
1049
1050         if (strcmp(str, "stop") == 0) {
1051                 up_write(&cfs_tracefile_sem);
1052                 cfs_trace_stop_thread();
1053                 down_write(&cfs_tracefile_sem);
1054                 memset(cfs_tracefile, 0, sizeof(cfs_tracefile));
1055
1056         } else if (strncmp(str, "size=", 5) == 0) {
1057                 unsigned long tmp;
1058
1059                 rc = kstrtoul(str + 5, 10, &tmp);
1060                 if (!rc) {
1061                         if (tmp < 10 || tmp > 20480)
1062                                 cfs_tracefile_size = CFS_TRACEFILE_SIZE;
1063                         else
1064                                 cfs_tracefile_size = tmp << 20;
1065                 }
1066         } else if (strlen(str) >= sizeof(cfs_tracefile)) {
1067                 rc = -ENAMETOOLONG;
1068         } else if (str[0] != '/') {
1069                 rc = -EINVAL;
1070         } else {
1071                 strcpy(cfs_tracefile, str);
1072
1073                 pr_info("Lustre: debug daemon will attempt to start writing to %s (%lukB max)\n",
1074                         cfs_tracefile, (long)(cfs_tracefile_size >> 10));
1075
1076                 cfs_trace_start_thread();
1077         }
1078
1079         up_write(&cfs_tracefile_sem);
1080         return rc;
1081 }
1082
1083 int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob)
1084 {
1085         char *str;
1086         int   rc;
1087
1088         str = memdup_user_nul(usr_str, usr_str_nob);
1089         if (IS_ERR(str))
1090                 return PTR_ERR(str);
1091
1092         rc = cfs_trace_daemon_command(strim(str));
1093         kfree(str);
1094
1095         return rc;
1096 }
1097
1098 int cfs_trace_set_debug_mb(int mb)
1099 {
1100         int i;
1101         int j;
1102         unsigned long pages;
1103         unsigned long total_mb = (cfs_totalram_pages() >> (20 - PAGE_SHIFT));
1104         unsigned long limit = max_t(unsigned long, 512, (total_mb * 4) / 5);
1105         struct cfs_trace_cpu_data *tcd;
1106
1107         if (mb < num_possible_cpus()) {
1108                 pr_warn("Lustre: %d MB is too small for debug buffer size, setting it to %d MB.\n",
1109                         mb, num_possible_cpus());
1110                 mb = num_possible_cpus();
1111         }
1112
1113         if (mb > limit) {
1114                 pr_warn("Lustre: %d MB is too large for debug buffer size, setting it to %lu MB.\n",
1115                         mb, limit);
1116                 mb = limit;
1117         }
1118
1119         mb /= num_possible_cpus();
1120         pages = mb << (20 - PAGE_SHIFT);
1121
1122         down_write(&cfs_tracefile_sem);
1123
1124         cfs_tcd_for_each(tcd, i, j)
1125                 tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
1126
1127         daemon_pages_max = pages;
1128         up_write(&cfs_tracefile_sem);
1129
1130         return mb;
1131 }
1132
1133 int cfs_trace_get_debug_mb(void)
1134 {
1135         int i;
1136         int j;
1137         struct cfs_trace_cpu_data *tcd;
1138         int total_pages = 0;
1139
1140         down_read(&cfs_tracefile_sem);
1141
1142         cfs_tcd_for_each(tcd, i, j)
1143                 total_pages += tcd->tcd_max_pages;
1144
1145         up_read(&cfs_tracefile_sem);
1146
1147         if (total_pages)
1148                 return (total_pages >> (20 - PAGE_SHIFT)) + 1;
1149         else
1150                 return 0;
1151 }
1152
1153 static int tracefiled(void *arg)
1154 {
1155         struct page_collection pc;
1156         struct cfs_trace_page *tage;
1157         struct cfs_trace_page *tmp;
1158         struct file *filp;
1159         char *buf;
1160         int last_loop = 0;
1161         int rc;
1162
1163         while (!last_loop) {
1164                 LIST_HEAD(for_daemon_pages);
1165                 int for_daemon_pages_count = 0;
1166                 schedule_timeout_interruptible(cfs_time_seconds(1));
1167                 if (kthread_should_stop())
1168                         last_loop = 1;
1169                 collect_pages(&pc);
1170                 if (list_empty(&pc.pc_pages))
1171                         continue;
1172
1173                 filp = NULL;
1174                 down_read(&cfs_tracefile_sem);
1175                 if (cfs_tracefile[0] != 0) {
1176                         filp = filp_open(cfs_tracefile,
1177                                          O_CREAT | O_RDWR | O_LARGEFILE,
1178                                          0600);
1179                         if (IS_ERR(filp)) {
1180                                 rc = PTR_ERR(filp);
1181                                 filp = NULL;
1182                                 pr_warn("Lustre: couldn't open %s: rc = %d\n",
1183                                         cfs_tracefile, rc);
1184                         }
1185                 }
1186                 up_read(&cfs_tracefile_sem);
1187
1188                 list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
1189                         __LASSERT_TAGE_INVARIANT(tage);
1190
1191                         if (filp) {
1192                                 struct dentry *de = file_dentry(filp);
1193                                 static loff_t f_pos;
1194
1195                                 if (f_pos >= (off_t)cfs_tracefile_size)
1196                                         f_pos = 0;
1197                                 else if (f_pos > i_size_read(de->d_inode))
1198                                         f_pos = i_size_read(de->d_inode);
1199
1200                                 buf = kmap(tage->page);
1201                                 rc = cfs_kernel_write(filp, buf, tage->used,
1202                                                       &f_pos);
1203                                 kunmap(tage->page);
1204                                 if (rc != (int)tage->used) {
1205                                         pr_warn("Lustre: wanted to write %u but wrote %d\n",
1206                                                 tage->used, rc);
1207                                         put_pages_back(&pc);
1208                                         __LASSERT(list_empty(&pc.pc_pages));
1209                                         break;
1210                                 }
1211                         }
1212                         list_del_init(&tage->linkage);
1213                         list_add_tail(&tage->page->lru, &for_daemon_pages);
1214                         for_daemon_pages_count += 1;
1215
1216                         tage->page->private = (int)tage->used;
1217                         kfree(tage);
1218                         atomic_dec(&cfs_tage_allocated);
1219                 }
1220
1221                 if (filp)
1222                         filp_close(filp, NULL);
1223
1224                 down_write(&cfs_tracefile_sem);
1225                 list_splice_tail(&for_daemon_pages, &daemon_pages);
1226                 daemon_pages_count += for_daemon_pages_count;
1227                 while (daemon_pages_count > daemon_pages_max) {
1228                         struct page *p = list_first_entry(&daemon_pages,
1229                                                           struct page, lru);
1230                         list_del(&p->lru);
1231                         put_page(p);
1232                         daemon_pages_count -= 1;
1233                 }
1234                 up_write(&cfs_tracefile_sem);
1235
1236                 if (!list_empty(&pc.pc_pages)) {
1237                         int i;
1238
1239                         pr_alert("Lustre: trace pages aren't empty\n");
1240                         pr_err("Lustre: total cpus(%d): ", num_possible_cpus());
1241                         for (i = 0; i < num_possible_cpus(); i++)
1242                                 if (cpu_online(i))
1243                                         pr_cont("%d(on) ", i);
1244                                 else
1245                                         pr_cont("%d(off) ", i);
1246                         pr_cont("\n");
1247
1248                         i = 0;
1249                         list_for_each_entry_safe(tage, tmp, &pc.pc_pages,
1250                                                  linkage)
1251                                 pr_err("Lustre: page %d belongs to cpu %d\n",
1252                                        ++i, tage->cpu);
1253                         pr_err("Lustre: There are %d pages unwritten\n", i);
1254                 }
1255                 __LASSERT(list_empty(&pc.pc_pages));
1256         }
1257
1258         return 0;
1259 }
1260
1261 int cfs_trace_start_thread(void)
1262 {
1263         struct task_struct *tsk;
1264         int rc = 0;
1265
1266         if (tctl_task)
1267                 return 0;
1268
1269         tsk = kthread_create(tracefiled, NULL, "ktracefiled");
1270         if (IS_ERR(tsk))
1271                 rc = -ECHILD;
1272         else if (cmpxchg(&tctl_task, NULL, tsk) != NULL)
1273                 /* already running */
1274                 kthread_stop(tsk);
1275         else
1276                 wake_up_process(tsk);
1277
1278         return rc;
1279 }
1280
1281 void cfs_trace_stop_thread(void)
1282 {
1283         struct task_struct *tsk;
1284
1285         tsk = xchg(&tctl_task, NULL);
1286         if (tsk) {
1287                 pr_info("Lustre: shutting down debug daemon thread...\n");
1288                 kthread_stop(tsk);
1289         }
1290 }
1291
1292 /* percents to share the total debug memory for each type */
1293 static unsigned int pages_factor[CFS_TCD_TYPE_CNT] = {
1294         80, /* 80% pages for CFS_TCD_TYPE_PROC */
1295         10, /* 10% pages for CFS_TCD_TYPE_SOFTIRQ */
1296         10  /* 10% pages for CFS_TCD_TYPE_IRQ */
1297 };
1298
1299 int cfs_tracefile_init(int max_pages)
1300 {
1301         struct cfs_trace_cpu_data *tcd;
1302         int i;
1303         int j;
1304
1305         /* initialize trace_data */
1306         memset(cfs_trace_data, 0, sizeof(cfs_trace_data));
1307         for (i = 0; i < CFS_TCD_TYPE_CNT; i++) {
1308                 cfs_trace_data[i] =
1309                         kmalloc_array(num_possible_cpus(),
1310                                       sizeof(union cfs_trace_data_union),
1311                                       GFP_KERNEL);
1312                 if (!cfs_trace_data[i])
1313                         goto out_trace_data;
1314         }
1315
1316         /* arch related info initialized */
1317         cfs_tcd_for_each(tcd, i, j) {
1318                 int factor = pages_factor[i];
1319
1320                 /* Note that we have three separate spin_lock_init()
1321                  * calls so that the locks get three separate classes
1322                  * and lockdep never thinks they are related.  As they
1323                  * are used in different interrupt contexts, lockdep
1324                  * would otherwise think that the usage would conflict.
1325                  */
1326                 switch(i) {
1327                 case CFS_TCD_TYPE_PROC:
1328                         spin_lock_init(&tcd->tcd_lock);
1329                         break;
1330                 case CFS_TCD_TYPE_SOFTIRQ:
1331                         spin_lock_init(&tcd->tcd_lock);
1332                         break;
1333                 case CFS_TCD_TYPE_IRQ:
1334                         spin_lock_init(&tcd->tcd_lock);
1335                         break;
1336                 }
1337                 tcd->tcd_pages_factor = factor;
1338                 tcd->tcd_type = i;
1339                 tcd->tcd_cpu = j;
1340
1341                 INIT_LIST_HEAD(&tcd->tcd_pages);
1342                 INIT_LIST_HEAD(&tcd->tcd_stock_pages);
1343                 tcd->tcd_cur_pages = 0;
1344                 tcd->tcd_cur_stock_pages = 0;
1345                 tcd->tcd_max_pages = (max_pages * factor) / 100;
1346                 LASSERT(tcd->tcd_max_pages > 0);
1347                 tcd->tcd_shutting_down = 0;
1348         }
1349         daemon_pages_max = max_pages;
1350
1351         return 0;
1352
1353 out_trace_data:
1354         for (i = 0; cfs_trace_data[i]; i++) {
1355                 kfree(cfs_trace_data[i]);
1356                 cfs_trace_data[i] = NULL;
1357         }
1358         pr_err("lnet: Not enough memory\n");
1359         return -ENOMEM;
1360 }
1361
1362 static void trace_cleanup_on_all_cpus(void)
1363 {
1364         struct cfs_trace_cpu_data *tcd;
1365         struct cfs_trace_page *tage;
1366         int i, cpu;
1367
1368         for_each_possible_cpu(cpu) {
1369                 cfs_tcd_for_each_type_lock(tcd, i, cpu) {
1370                         if (!tcd->tcd_pages_factor)
1371                                 /* Not initialised */
1372                                 continue;
1373                         tcd->tcd_shutting_down = 1;
1374
1375                         while (!list_empty(&tcd->tcd_pages)) {
1376                                 tage = list_first_entry(&tcd->tcd_pages,
1377                                                         struct cfs_trace_page,
1378                                                         linkage);
1379                                 __LASSERT_TAGE_INVARIANT(tage);
1380
1381                                 list_del(&tage->linkage);
1382                                 cfs_tage_free(tage);
1383                         }
1384                         tcd->tcd_cur_pages = 0;
1385                 }
1386         }
1387 }
1388
1389 static void cfs_trace_cleanup(void)
1390 {
1391         struct page_collection pc;
1392         int i;
1393
1394         INIT_LIST_HEAD(&pc.pc_pages);
1395
1396         trace_cleanup_on_all_cpus();
1397
1398         for (i = 0; i < CFS_TCD_TYPE_CNT && cfs_trace_data[i]; i++) {
1399                 kfree(cfs_trace_data[i]);
1400                 cfs_trace_data[i] = NULL;
1401         }
1402 }
1403
1404 void cfs_tracefile_exit(void)
1405 {
1406         cfs_trace_stop_thread();
1407         cfs_trace_flush_pages();
1408         cfs_trace_cleanup();
1409 }