Whamcloud - gitweb
06b925a588705963e5238617228e06305ff3b680
[fs/lustre-release.git] / libcfs / libcfs / tracefile.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * libcfs/libcfs/tracefile.c
33  *
34  * Author: Zach Brown <zab@clusterfs.com>
35  * Author: Phil Schwan <phil@clusterfs.com>
36  */
37
38
39 #define DEBUG_SUBSYSTEM S_LNET
40 #define LUSTRE_TRACEFILE_PRIVATE
41 #include "tracefile.h"
42
43 #include <linux/ctype.h>
44 #include <linux/fs.h>
45 #include <linux/kthread.h>
46 #include <linux/pagemap.h>
47 #include <linux/poll.h>
48 #include <linux/tty.h>
49 #include <linux/uaccess.h>
50 #include <libcfs/linux/linux-fs.h>
51 #include <libcfs/libcfs.h>
52
53 /* XXX move things up to the top, comment */
54 union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned;
55
56 char *cfs_trace_console_buffers[NR_CPUS][CFS_TCD_TYPE_MAX];
57 char cfs_tracefile[TRACEFILE_NAME_SIZE];
58 long long cfs_tracefile_size = CFS_TRACEFILE_SIZE;
59 static struct tracefiled_ctl trace_tctl;
60 static DEFINE_MUTEX(cfs_trace_thread_mutex);
61 static int thread_running = 0;
62
63 static atomic_t cfs_tage_allocated = ATOMIC_INIT(0);
64 static DECLARE_RWSEM(cfs_tracefile_sem);
65
66 static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
67                                         struct cfs_trace_cpu_data *tcd);
68
69 /* trace file lock routines */
70 /* The walking argument indicates the locking comes from all tcd types
71  * iterator and we must lock it and dissable local irqs to avoid deadlocks
72  * with other interrupt locks that might be happening. See LU-1311
73  * for details.
74  */
75 int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
76         __acquires(&tcd->tcd_lock)
77 {
78         __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX);
79         if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
80                 spin_lock_irqsave(&tcd->tcd_lock, tcd->tcd_lock_flags);
81         else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
82                 spin_lock_bh(&tcd->tcd_lock);
83         else if (unlikely(walking))
84                 spin_lock_irq(&tcd->tcd_lock);
85         else
86                 spin_lock(&tcd->tcd_lock);
87         return 1;
88 }
89
90 void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
91         __releases(&tcd->tcd_lock)
92 {
93         __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX);
94         if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
95                 spin_unlock_irqrestore(&tcd->tcd_lock, tcd->tcd_lock_flags);
96         else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
97                 spin_unlock_bh(&tcd->tcd_lock);
98         else if (unlikely(walking))
99                 spin_unlock_irq(&tcd->tcd_lock);
100         else
101                 spin_unlock(&tcd->tcd_lock);
102 }
103
104 #define cfs_tcd_for_each_type_lock(tcd, i, cpu)                         \
105         for (i = 0; cfs_trace_data[i] &&                                \
106              (tcd = &(*cfs_trace_data[i])[cpu].tcd) &&                  \
107              cfs_trace_lock_tcd(tcd, 1); cfs_trace_unlock_tcd(tcd, 1), i++)
108
109 enum cfs_trace_buf_type cfs_trace_buf_idx_get(void)
110 {
111         if (in_irq())
112                 return CFS_TCD_TYPE_IRQ;
113         if (in_softirq())
114                 return CFS_TCD_TYPE_SOFTIRQ;
115         return CFS_TCD_TYPE_PROC;
116 }
117
118 static inline struct cfs_trace_cpu_data *
119 cfs_trace_get_tcd(void)
120 {
121         struct cfs_trace_cpu_data *tcd =
122                 &(*cfs_trace_data[cfs_trace_buf_idx_get()])[get_cpu()].tcd;
123
124         cfs_trace_lock_tcd(tcd, 0);
125
126         return tcd;
127 }
128
129 static inline void cfs_trace_put_tcd(struct cfs_trace_cpu_data *tcd)
130 {
131         cfs_trace_unlock_tcd(tcd, 0);
132
133         put_cpu();
134 }
135
136 /* percents to share the total debug memory for each type */
137 static unsigned int pages_factor[CFS_TCD_TYPE_MAX] = {
138         80,     /* 80% pages for CFS_TCD_TYPE_PROC */
139         10,     /* 10% pages for CFS_TCD_TYPE_SOFTIRQ */
140         10      /* 10% pages for CFS_TCD_TYPE_IRQ */
141 };
142
143 int cfs_tracefile_init_arch(void)
144 {
145         struct cfs_trace_cpu_data *tcd;
146         int i;
147         int j;
148
149         /* initialize trace_data */
150         memset(cfs_trace_data, 0, sizeof(cfs_trace_data));
151         for (i = 0; i < CFS_TCD_TYPE_MAX; i++) {
152                 cfs_trace_data[i] =
153                         kmalloc_array(num_possible_cpus(),
154                                       sizeof(union cfs_trace_data_union),
155                                       GFP_KERNEL);
156                 if (!cfs_trace_data[i])
157                         goto out_trace_data;
158         }
159
160         /* arch related info initialized */
161         cfs_tcd_for_each(tcd, i, j) {
162                 spin_lock_init(&tcd->tcd_lock);
163                 tcd->tcd_pages_factor = pages_factor[i];
164                 tcd->tcd_type = i;
165                 tcd->tcd_cpu = j;
166         }
167
168         for (i = 0; i < num_possible_cpus(); i++)
169                 for (j = 0; j < 3; j++) {
170                         cfs_trace_console_buffers[i][j] =
171                                 kmalloc(CFS_TRACE_CONSOLE_BUFFER_SIZE,
172                                         GFP_KERNEL);
173
174                         if (!cfs_trace_console_buffers[i][j])
175                                 goto out_buffers;
176                 }
177
178         return 0;
179
180 out_buffers:
181         for (i = 0; i < num_possible_cpus(); i++)
182                 for (j = 0; j < 3; j++) {
183                         kfree(cfs_trace_console_buffers[i][j]);
184                         cfs_trace_console_buffers[i][j] = NULL;
185                 }
186 out_trace_data:
187         for (i = 0; cfs_trace_data[i]; i++) {
188                 kfree(cfs_trace_data[i]);
189                 cfs_trace_data[i] = NULL;
190         }
191         pr_err("lnet: Not enough memory\n");
192         return -ENOMEM;
193 }
194
195 void cfs_tracefile_fini_arch(void)
196 {
197         int i;
198         int j;
199
200         for (i = 0; i < num_possible_cpus(); i++)
201                 for (j = 0; j < 3; j++) {
202                         kfree(cfs_trace_console_buffers[i][j]);
203                         cfs_trace_console_buffers[i][j] = NULL;
204                 }
205
206         for (i = 0; cfs_trace_data[i]; i++) {
207                 kfree(cfs_trace_data[i]);
208                 cfs_trace_data[i] = NULL;
209         }
210 }
211
212 static inline struct cfs_trace_page *
213 cfs_tage_from_list(struct list_head *list)
214 {
215         return list_entry(list, struct cfs_trace_page, linkage);
216 }
217
218 static struct cfs_trace_page *cfs_tage_alloc(gfp_t gfp)
219 {
220         struct page            *page;
221         struct cfs_trace_page *tage;
222
223         /* My caller is trying to free memory */
224         if (!in_interrupt() && (current->flags & PF_MEMALLOC))
225                 return NULL;
226
227         /*
228          * Don't spam console with allocation failures: they will be reported
229          * by upper layer anyway.
230          */
231         gfp |= __GFP_NOWARN;
232         page = alloc_page(gfp);
233         if (page == NULL)
234                 return NULL;
235
236         tage = kmalloc(sizeof(*tage), gfp);
237         if (tage == NULL) {
238                 __free_page(page);
239                 return NULL;
240         }
241
242         tage->page = page;
243         atomic_inc(&cfs_tage_allocated);
244         return tage;
245 }
246
247 static void cfs_tage_free(struct cfs_trace_page *tage)
248 {
249         __LASSERT(tage != NULL);
250         __LASSERT(tage->page != NULL);
251
252         __free_page(tage->page);
253         kfree(tage);
254         atomic_dec(&cfs_tage_allocated);
255 }
256
257 static void cfs_tage_to_tail(struct cfs_trace_page *tage,
258                              struct list_head *queue)
259 {
260         __LASSERT(tage != NULL);
261         __LASSERT(queue != NULL);
262
263         list_move_tail(&tage->linkage, queue);
264 }
265
266 int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp,
267                            struct list_head *stock)
268 {
269         int i;
270
271         /*
272          * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
273          * from here: this will lead to infinite recursion.
274          */
275
276         for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++ i) {
277                 struct cfs_trace_page *tage;
278
279                 tage = cfs_tage_alloc(gfp);
280                 if (tage == NULL)
281                         break;
282                 list_add_tail(&tage->linkage, stock);
283         }
284         return i;
285 }
286
287 /* return a page that has 'len' bytes left at the end */
288 static struct cfs_trace_page *
289 cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len)
290 {
291         struct cfs_trace_page *tage;
292
293         if (tcd->tcd_cur_pages > 0) {
294                 __LASSERT(!list_empty(&tcd->tcd_pages));
295                 tage = cfs_tage_from_list(tcd->tcd_pages.prev);
296                 if (tage->used + len <= PAGE_SIZE)
297                         return tage;
298         }
299
300         if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
301                 if (tcd->tcd_cur_stock_pages > 0) {
302                         tage = cfs_tage_from_list(tcd->tcd_stock_pages.prev);
303                         --tcd->tcd_cur_stock_pages;
304                         list_del_init(&tage->linkage);
305                 } else {
306                         tage = cfs_tage_alloc(GFP_ATOMIC);
307                         if (unlikely(tage == NULL)) {
308                                 if ((!(current->flags & PF_MEMALLOC) ||
309                                      in_interrupt()) && printk_ratelimit())
310                                         pr_warn("Lustre: cannot allocate a tage (%ld)\n",
311                                                 tcd->tcd_cur_pages);
312                                 return NULL;
313                         }
314                 }
315
316                 tage->used = 0;
317                 tage->cpu = smp_processor_id();
318                 tage->type = tcd->tcd_type;
319                 list_add_tail(&tage->linkage, &tcd->tcd_pages);
320                 tcd->tcd_cur_pages++;
321
322                 if (tcd->tcd_cur_pages > 8 && thread_running) {
323                         struct tracefiled_ctl *tctl = &trace_tctl;
324                         /*
325                          * wake up tracefiled to process some pages.
326                          */
327                         wake_up(&tctl->tctl_waitq);
328                 }
329                 return tage;
330         }
331         return NULL;
332 }
333
334 static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd)
335 {
336         int pgcount = tcd->tcd_cur_pages / 10;
337         struct page_collection pc;
338         struct cfs_trace_page *tage;
339         struct cfs_trace_page *tmp;
340
341         /*
342          * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
343          * from here: this will lead to infinite recursion.
344          */
345
346         if (printk_ratelimit())
347                 pr_warn("Lustre: debug daemon buffer overflowed; discarding 10%% of pages (%d of %ld)\n",
348                         pgcount + 1, tcd->tcd_cur_pages);
349
350         INIT_LIST_HEAD(&pc.pc_pages);
351
352         list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
353                 if (pgcount-- == 0)
354                         break;
355
356                 list_move_tail(&tage->linkage, &pc.pc_pages);
357                 tcd->tcd_cur_pages--;
358         }
359         put_pages_on_tcd_daemon_list(&pc, tcd);
360 }
361
362 /* return a page that has 'len' bytes left at the end */
363 static struct cfs_trace_page *cfs_trace_get_tage(struct cfs_trace_cpu_data *tcd,
364                                                  unsigned long len)
365 {
366         struct cfs_trace_page *tage;
367
368         /*
369          * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
370          * from here: this will lead to infinite recursion.
371          */
372
373         if (len > PAGE_SIZE) {
374                 pr_err("LustreError: cowardly refusing to write %lu bytes in a page\n",
375                        len);
376                 return NULL;
377         }
378
379         tage = cfs_trace_get_tage_try(tcd, len);
380         if (tage != NULL)
381                 return tage;
382         if (thread_running)
383                 cfs_tcd_shrink(tcd);
384         if (tcd->tcd_cur_pages > 0) {
385                 tage = cfs_tage_from_list(tcd->tcd_pages.next);
386                 tage->used = 0;
387                 cfs_tage_to_tail(tage, &tcd->tcd_pages);
388         }
389         return tage;
390 }
391
392 static void cfs_set_ptldebug_header(struct ptldebug_header *header,
393                                     struct libcfs_debug_msg_data *msgdata,
394                                     unsigned long stack)
395 {
396         struct timespec64 ts;
397
398         ktime_get_real_ts64(&ts);
399
400         header->ph_subsys = msgdata->msg_subsys;
401         header->ph_mask = msgdata->msg_mask;
402         header->ph_cpu_id = smp_processor_id();
403         header->ph_type = cfs_trace_buf_idx_get();
404         /* y2038 safe since all user space treats this as unsigned, but
405          * will overflow in 2106
406          */
407         header->ph_sec = (u32)ts.tv_sec;
408         header->ph_usec = ts.tv_nsec / NSEC_PER_USEC;
409         header->ph_stack = stack;
410         header->ph_pid = current->pid;
411         header->ph_line_num = msgdata->msg_line;
412         header->ph_extern_pid = 0;
413 }
414
415 /**
416  * tty_write_msg - write a message to a certain tty, not just the console.
417  * @tty: the destination tty_struct
418  * @msg: the message to write
419  *
420  * tty_write_message is not exported, so write a same function for it
421  *
422  */
423 static void tty_write_msg(struct tty_struct *tty, const char *msg)
424 {
425         mutex_lock(&tty->atomic_write_lock);
426         tty_lock(tty);
427         if (tty->ops->write && tty->count > 0)
428                 tty->ops->write(tty, msg, strlen(msg));
429         tty_unlock(tty);
430         mutex_unlock(&tty->atomic_write_lock);
431         wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
432 }
433
434 static void cfs_tty_write_message(const char *prefix, int mask, const char *msg)
435 {
436         struct tty_struct *tty;
437
438         tty = get_current_tty();
439         if (!tty)
440                 return;
441
442         tty_write_msg(tty, prefix);
443         if ((mask & D_EMERG) || (mask & D_ERROR))
444                 tty_write_msg(tty, "Error");
445         tty_write_msg(tty, ": ");
446         tty_write_msg(tty, msg);
447         tty_kref_put(tty);
448 }
449
450 static void cfs_print_to_console(struct ptldebug_header *hdr, int mask,
451                                  const char *buf, int len, const char *file,
452                                  const char *fn)
453 {
454         char *prefix = "Lustre";
455
456         if (hdr->ph_subsys == S_LND || hdr->ph_subsys == S_LNET)
457                 prefix = "LNet";
458
459         if (mask & D_CONSOLE) {
460                 if (mask & D_EMERG)
461                         pr_emerg("%sError: %.*s", prefix, len, buf);
462                 else if (mask & D_ERROR)
463                         pr_err("%sError: %.*s", prefix, len, buf);
464                 else if (mask & D_WARNING)
465                         pr_warn("%s: %.*s", prefix, len, buf);
466                 else if (mask & libcfs_printk)
467                         pr_info("%s: %.*s", prefix, len, buf);
468         } else {
469                 if (mask & D_EMERG)
470                         pr_emerg("%sError: %d:%d:(%s:%d:%s()) %.*s", prefix,
471                                  hdr->ph_pid, hdr->ph_extern_pid, file,
472                                  hdr->ph_line_num, fn, len, buf);
473                 else if (mask & D_ERROR)
474                         pr_err("%sError: %d:%d:(%s:%d:%s()) %.*s", prefix,
475                                hdr->ph_pid, hdr->ph_extern_pid, file,
476                                hdr->ph_line_num, fn, len, buf);
477                 else if (mask & D_WARNING)
478                         pr_warn("%s: %d:%d:(%s:%d:%s()) %.*s", prefix,
479                                 hdr->ph_pid, hdr->ph_extern_pid, file,
480                                 hdr->ph_line_num, fn, len, buf);
481                 else if (mask & (D_CONSOLE | libcfs_printk))
482                         pr_info("%s: %.*s", prefix, len, buf);
483         }
484
485         if (mask & D_TTY)
486                 cfs_tty_write_message(prefix, mask, buf);
487 }
488
489 int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
490                      const char *format, ...)
491 {
492         struct cfs_trace_cpu_data *tcd = NULL;
493         struct ptldebug_header header = {0};
494         struct cfs_trace_page *tage;
495         /* string_buf is used only if tcd != NULL, and is always set then */
496         char *string_buf = NULL;
497         char *debug_buf;
498         int known_size;
499         int needed = 85; /* seeded with average message length */
500         int max_nob;
501         va_list ap;
502         int retry;
503         int mask = msgdata->msg_mask;
504         char *file = (char *)msgdata->msg_file;
505         struct cfs_debug_limit_state *cdls = msgdata->msg_cdls;
506
507         if (strchr(file, '/'))
508                 file = strrchr(file, '/') + 1;
509
510         tcd = cfs_trace_get_tcd();
511
512         /* cfs_trace_get_tcd() grabs a lock, which disables preemption and
513          * pins us to a particular CPU.  This avoids an smp_processor_id()
514          * warning on Linux when debugging is enabled.
515          */
516         cfs_set_ptldebug_header(&header, msgdata, CDEBUG_STACK());
517
518         if (!tcd)                /* arch may not log in IRQ context */
519                 goto console;
520
521         if (tcd->tcd_cur_pages == 0)
522                 header.ph_flags |= PH_FLAG_FIRST_RECORD;
523
524         if (tcd->tcd_shutting_down) {
525                 cfs_trace_put_tcd(tcd);
526                 tcd = NULL;
527                 goto console;
528         }
529
530         known_size = strlen(file) + 1;
531         if (msgdata->msg_fn)
532                 known_size += strlen(msgdata->msg_fn) + 1;
533
534         if (libcfs_debug_binary)
535                 known_size += sizeof(header);
536
537         /*
538          * May perform an additional pass to update 'needed' and increase
539          * tage buffer size to match vsnprintf reported size required
540          * On the second pass (retry=1) use vscnprintf [which returns
541          * number of bytes written not including the terminating nul]
542          * to clarify `needed` is used as number of bytes written
543          * for the remainder of this function
544          */
545         for (retry = 0; retry < 2; retry++) {
546                 tage = cfs_trace_get_tage(tcd, needed + known_size + 1);
547                 if (!tage) {
548                         if (needed + known_size > PAGE_SIZE)
549                                 mask |= D_ERROR;
550
551                         cfs_trace_put_tcd(tcd);
552                         tcd = NULL;
553                         goto console;
554                 }
555
556                 string_buf = (char *)page_address(tage->page) +
557                              tage->used + known_size;
558
559                 max_nob = PAGE_SIZE - tage->used - known_size;
560                 if (max_nob <= 0) {
561                         pr_emerg("LustreError: negative max_nob: %d\n",
562                                  max_nob);
563                         mask |= D_ERROR;
564                         cfs_trace_put_tcd(tcd);
565                         tcd = NULL;
566                         goto console;
567                 }
568
569                 va_start(ap, format);
570                 if (retry)
571                         needed = vscnprintf(string_buf, max_nob, format, ap);
572                 else
573                         needed = vsnprintf(string_buf, max_nob, format, ap);
574                 va_end(ap);
575
576                 if (needed < max_nob) /* well. printing ok.. */
577                         break;
578         }
579
580         /* `needed` is actual bytes written to string_buf */
581         if (*(string_buf + needed - 1) != '\n') {
582                 pr_info("Lustre: format at %s:%d:%s doesn't end in newline\n",
583                         file, msgdata->msg_line, msgdata->msg_fn);
584         } else if (mask & D_TTY) {
585                 /* TTY needs '\r\n' to move carriage to leftmost position */
586                 if (needed < 2 || *(string_buf + needed - 2) != '\r')
587                         pr_info("Lustre: format at %s:%d:%s doesn't end in '\\r\\n'\n",
588                                 file, msgdata->msg_line, msgdata->msg_fn);
589         }
590
591         header.ph_len = known_size + needed;
592         debug_buf = (char *)page_address(tage->page) + tage->used;
593
594         if (libcfs_debug_binary) {
595                 memcpy(debug_buf, &header, sizeof(header));
596                 tage->used += sizeof(header);
597                 debug_buf += sizeof(header);
598         }
599
600         strlcpy(debug_buf, file, PAGE_SIZE - tage->used);
601         tage->used += strlen(file) + 1;
602         debug_buf += strlen(file) + 1;
603
604         if (msgdata->msg_fn) {
605                 strlcpy(debug_buf, msgdata->msg_fn, PAGE_SIZE - tage->used);
606                 tage->used += strlen(msgdata->msg_fn) + 1;
607                 debug_buf += strlen(msgdata->msg_fn) + 1;
608         }
609
610         __LASSERT(debug_buf == string_buf);
611
612         tage->used += needed;
613         __LASSERT(tage->used <= PAGE_SIZE);
614
615 console:
616         if ((mask & libcfs_printk) == 0) {
617                 /* no console output requested */
618                 if (tcd != NULL)
619                         cfs_trace_put_tcd(tcd);
620                 return 1;
621         }
622
623         if (cdls != NULL) {
624                 if (libcfs_console_ratelimit &&
625                     cdls->cdls_next != 0 &&     /* not first time ever */
626                     time_before(jiffies, cdls->cdls_next)) {
627                         /* skipping a console message */
628                         cdls->cdls_count++;
629                         if (tcd != NULL)
630                                 cfs_trace_put_tcd(tcd);
631                         return 1;
632                 }
633
634                 if (time_after(jiffies, cdls->cdls_next +
635                                         libcfs_console_max_delay +
636                                         cfs_time_seconds(10))) {
637                         /* last timeout was a long time ago */
638                         cdls->cdls_delay /= libcfs_console_backoff * 4;
639                 } else {
640                         cdls->cdls_delay *= libcfs_console_backoff;
641                 }
642
643                 if (cdls->cdls_delay < libcfs_console_min_delay)
644                         cdls->cdls_delay = libcfs_console_min_delay;
645                 else if (cdls->cdls_delay > libcfs_console_max_delay)
646                         cdls->cdls_delay = libcfs_console_max_delay;
647
648                 /* ensure cdls_next is never zero after it's been seen */
649                 cdls->cdls_next = (jiffies + cdls->cdls_delay) | 1;
650         }
651
652         if (tcd) {
653                 cfs_print_to_console(&header, mask, string_buf, needed, file,
654                                      msgdata->msg_fn);
655                 cfs_trace_put_tcd(tcd);
656         } else {
657                 string_buf = cfs_trace_get_console_buffer();
658
659                 va_start(ap, format);
660                 needed = vscnprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE,
661                                     format, ap);
662                 va_end(ap);
663
664                 cfs_print_to_console(&header, mask,
665                                      string_buf, needed, file, msgdata->msg_fn);
666
667                 put_cpu();
668         }
669
670         if (cdls != NULL && cdls->cdls_count != 0) {
671                 string_buf = cfs_trace_get_console_buffer();
672
673                 needed = scnprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE,
674                                    "Skipped %d previous similar message%s\n",
675                                    cdls->cdls_count,
676                                    (cdls->cdls_count > 1) ? "s" : "");
677
678                 /* Do not allow print this to TTY */
679                 cfs_print_to_console(&header, mask & ~D_TTY, string_buf,
680                                      needed, file, msgdata->msg_fn);
681
682                 put_cpu();
683                 cdls->cdls_count = 0;
684         }
685
686         return 0;
687 }
688 EXPORT_SYMBOL(libcfs_debug_msg);
689
690 void
691 cfs_trace_assertion_failed(const char *str,
692                            struct libcfs_debug_msg_data *msgdata)
693 {
694         struct ptldebug_header hdr;
695
696         libcfs_panic_in_progress = 1;
697         libcfs_catastrophe = 1;
698         smp_mb();
699
700         cfs_set_ptldebug_header(&hdr, msgdata, CDEBUG_STACK());
701
702         cfs_print_to_console(&hdr, D_EMERG, str, strlen(str),
703                              msgdata->msg_file, msgdata->msg_fn);
704
705         panic("Lustre debug assertion failure\n");
706
707         /* not reached */
708 }
709
710 static void
711 panic_collect_pages(struct page_collection *pc)
712 {
713         /* Do the collect_pages job on a single CPU: assumes that all other
714          * CPUs have been stopped during a panic.  If this isn't true for some
715          * arch, this will have to be implemented separately in each arch.  */
716         int                        i;
717         int                        j;
718         struct cfs_trace_cpu_data *tcd;
719
720         INIT_LIST_HEAD(&pc->pc_pages);
721
722         cfs_tcd_for_each(tcd, i, j) {
723                 list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
724                 tcd->tcd_cur_pages = 0;
725
726                 if (pc->pc_want_daemon_pages) {
727                         list_splice_init(&tcd->tcd_daemon_pages,
728                                                 &pc->pc_pages);
729                         tcd->tcd_cur_daemon_pages = 0;
730                 }
731         }
732 }
733
734 static void collect_pages_on_all_cpus(struct page_collection *pc)
735 {
736         struct cfs_trace_cpu_data *tcd;
737         int i, cpu;
738
739         for_each_possible_cpu(cpu) {
740                 cfs_tcd_for_each_type_lock(tcd, i, cpu) {
741                         list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
742                         tcd->tcd_cur_pages = 0;
743                         if (pc->pc_want_daemon_pages) {
744                                 list_splice_init(&tcd->tcd_daemon_pages,
745                                                         &pc->pc_pages);
746                                 tcd->tcd_cur_daemon_pages = 0;
747                         }
748                 }
749         }
750 }
751
752 static void collect_pages(struct page_collection *pc)
753 {
754         INIT_LIST_HEAD(&pc->pc_pages);
755
756         if (libcfs_panic_in_progress)
757                 panic_collect_pages(pc);
758         else
759                 collect_pages_on_all_cpus(pc);
760 }
761
762 static void put_pages_back_on_all_cpus(struct page_collection *pc)
763 {
764         struct cfs_trace_cpu_data *tcd;
765         struct list_head *cur_head;
766         struct cfs_trace_page *tage;
767         struct cfs_trace_page *tmp;
768         int i, cpu;
769
770         for_each_possible_cpu(cpu) {
771                 cfs_tcd_for_each_type_lock(tcd, i, cpu) {
772                         cur_head = tcd->tcd_pages.next;
773
774                         list_for_each_entry_safe(tage, tmp, &pc->pc_pages,
775                                                  linkage) {
776
777                                 __LASSERT_TAGE_INVARIANT(tage);
778
779                                 if (tage->cpu != cpu || tage->type != i)
780                                         continue;
781
782                                 cfs_tage_to_tail(tage, cur_head);
783                                 tcd->tcd_cur_pages++;
784                         }
785                 }
786         }
787 }
788
789 static void put_pages_back(struct page_collection *pc)
790 {
791         if (!libcfs_panic_in_progress)
792                 put_pages_back_on_all_cpus(pc);
793 }
794
795 /* Add pages to a per-cpu debug daemon ringbuffer.  This buffer makes sure that
796  * we have a good amount of data at all times for dumping during an LBUG, even
797  * if we have been steadily writing (and otherwise discarding) pages via the
798  * debug daemon. */
799 static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
800                                          struct cfs_trace_cpu_data *tcd)
801 {
802         struct cfs_trace_page *tage;
803         struct cfs_trace_page *tmp;
804
805         list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
806                 __LASSERT_TAGE_INVARIANT(tage);
807
808                 if (tage->cpu != tcd->tcd_cpu || tage->type != tcd->tcd_type)
809                         continue;
810
811                 cfs_tage_to_tail(tage, &tcd->tcd_daemon_pages);
812                 tcd->tcd_cur_daemon_pages++;
813
814                 if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
815                         struct cfs_trace_page *victim;
816
817                         __LASSERT(!list_empty(&tcd->tcd_daemon_pages));
818                         victim = cfs_tage_from_list(tcd->tcd_daemon_pages.next);
819
820                         __LASSERT_TAGE_INVARIANT(victim);
821
822                         list_del(&victim->linkage);
823                         cfs_tage_free(victim);
824                         tcd->tcd_cur_daemon_pages--;
825                 }
826         }
827 }
828
829 static void put_pages_on_daemon_list(struct page_collection *pc)
830 {
831         struct cfs_trace_cpu_data *tcd;
832         int i, cpu;
833
834         for_each_possible_cpu(cpu) {
835                 cfs_tcd_for_each_type_lock(tcd, i, cpu)
836                         put_pages_on_tcd_daemon_list(pc, tcd);
837         }
838 }
839
840 void cfs_trace_debug_print(void)
841 {
842         struct page_collection pc;
843         struct cfs_trace_page *tage;
844         struct cfs_trace_page *tmp;
845
846         pc.pc_want_daemon_pages = 1;
847         collect_pages(&pc);
848         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
849                 char *p, *file, *fn;
850                 struct page *page;
851
852                 __LASSERT_TAGE_INVARIANT(tage);
853
854                 page = tage->page;
855                 p = page_address(page);
856                 while (p < ((char *)page_address(page) + tage->used)) {
857                         struct ptldebug_header *hdr;
858                         int len;
859                         hdr = (void *)p;
860                         p += sizeof(*hdr);
861                         file = p;
862                         p += strlen(file) + 1;
863                         fn = p;
864                         p += strlen(fn) + 1;
865                         len = hdr->ph_len - (int)(p - (char *)hdr);
866
867                         cfs_print_to_console(hdr, D_EMERG, p, len, file, fn);
868
869                         p += len;
870                 }
871
872                 list_del(&tage->linkage);
873                 cfs_tage_free(tage);
874         }
875 }
876
877 int cfs_tracefile_dump_all_pages(char *filename)
878 {
879         struct page_collection  pc;
880         struct file             *filp;
881         struct cfs_trace_page   *tage;
882         struct cfs_trace_page   *tmp;
883         char                    *buf;
884         int rc;
885
886         down_write(&cfs_tracefile_sem);
887
888         filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600);
889         if (IS_ERR(filp)) {
890                 rc = PTR_ERR(filp);
891                 filp = NULL;
892                 pr_err("LustreError: can't open %s for dump: rc = %d\n",
893                       filename, rc);
894                 goto out;
895         }
896
897         pc.pc_want_daemon_pages = 1;
898         collect_pages(&pc);
899         if (list_empty(&pc.pc_pages)) {
900                 rc = 0;
901                 goto close;
902         }
903
904         /* ok, for now, just write the pages.  in the future we'll be building
905          * iobufs with the pages and calling generic_direct_IO */
906         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
907
908                 __LASSERT_TAGE_INVARIANT(tage);
909
910                 buf = kmap(tage->page);
911                 rc = cfs_kernel_write(filp, buf, tage->used, &filp->f_pos);
912                 kunmap(tage->page);
913                 if (rc != (int)tage->used) {
914                         pr_warn("Lustre: wanted to write %u but wrote %d\n",
915                                 tage->used, rc);
916                         put_pages_back(&pc);
917                         __LASSERT(list_empty(&pc.pc_pages));
918                         break;
919                 }
920                 list_del(&tage->linkage);
921                 cfs_tage_free(tage);
922         }
923
924         rc = vfs_fsync_range(filp, 0, LLONG_MAX, 1);
925         if (rc)
926                 pr_err("LustreError: sync returns: rc = %d\n", rc);
927 close:
928         filp_close(filp, NULL);
929 out:
930         up_write(&cfs_tracefile_sem);
931         return rc;
932 }
933
934 void cfs_trace_flush_pages(void)
935 {
936         struct page_collection pc;
937         struct cfs_trace_page *tage;
938         struct cfs_trace_page *tmp;
939
940         pc.pc_want_daemon_pages = 1;
941         collect_pages(&pc);
942         list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
943
944                 __LASSERT_TAGE_INVARIANT(tage);
945
946                 list_del(&tage->linkage);
947                 cfs_tage_free(tage);
948         }
949 }
950
951 int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
952                             const char __user *usr_buffer, int usr_buffer_nob)
953 {
954         int    nob;
955
956         if (usr_buffer_nob > knl_buffer_nob)
957                 return -EOVERFLOW;
958
959         if (copy_from_user(knl_buffer, usr_buffer, usr_buffer_nob))
960                 return -EFAULT;
961
962         nob = strnlen(knl_buffer, usr_buffer_nob);
963         while (--nob >= 0)                      /* strip trailing whitespace */
964                 if (!isspace(knl_buffer[nob]))
965                         break;
966
967         if (nob < 0)                            /* empty string */
968                 return -EINVAL;
969
970         if (nob == knl_buffer_nob)              /* no space to terminate */
971                 return -EOVERFLOW;
972
973         knl_buffer[nob + 1] = 0;                /* terminate */
974         return 0;
975 }
976 EXPORT_SYMBOL(cfs_trace_copyin_string);
977
978 int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
979                              const char *knl_buffer, char *append)
980 {
981         /* NB if 'append' != NULL, it's a single character to append to the
982          * copied out string - usually "\n", for /proc entries and "" (i.e. a
983          * terminating zero byte) for sysctl entries */
984         int   nob = strlen(knl_buffer);
985
986         if (nob > usr_buffer_nob)
987                 nob = usr_buffer_nob;
988
989         if (copy_to_user(usr_buffer, knl_buffer, nob))
990                 return -EFAULT;
991
992         if (append != NULL && nob < usr_buffer_nob) {
993                 if (copy_to_user(usr_buffer + nob, append, 1))
994                         return -EFAULT;
995
996                 nob++;
997         }
998
999         return nob;
1000 }
1001 EXPORT_SYMBOL(cfs_trace_copyout_string);
1002
1003 int cfs_trace_allocate_string_buffer(char **str, int nob)
1004 {
1005         if (nob > 2 * PAGE_SIZE)        /* string must be "sensible" */
1006                 return -EINVAL;
1007
1008         *str = kmalloc(nob, GFP_KERNEL | __GFP_ZERO);
1009         if (*str == NULL)
1010                 return -ENOMEM;
1011
1012         return 0;
1013 }
1014
1015 int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob)
1016 {
1017         char         *str;
1018         int           rc;
1019
1020         rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
1021         if (rc != 0)
1022                 return rc;
1023
1024         rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
1025                                      usr_str, usr_str_nob);
1026         if (rc != 0)
1027                 goto out;
1028
1029         if (str[0] != '/') {
1030                 rc = -EINVAL;
1031                 goto out;
1032         }
1033         rc = cfs_tracefile_dump_all_pages(str);
1034 out:
1035         kfree(str);
1036         return rc;
1037 }
1038
1039 int cfs_trace_daemon_command(char *str)
1040 {
1041         int       rc = 0;
1042
1043         down_write(&cfs_tracefile_sem);
1044
1045         if (strcmp(str, "stop") == 0) {
1046                 up_write(&cfs_tracefile_sem);
1047                 cfs_trace_stop_thread();
1048                 down_write(&cfs_tracefile_sem);
1049                 memset(cfs_tracefile, 0, sizeof(cfs_tracefile));
1050
1051         } else if (strncmp(str, "size=", 5) == 0) {
1052                 unsigned long tmp;
1053
1054                 rc = kstrtoul(str + 5, 10, &tmp);
1055                 if (!rc) {
1056                         if (tmp < 10 || tmp > 20480)
1057                                 cfs_tracefile_size = CFS_TRACEFILE_SIZE;
1058                         else
1059                                 cfs_tracefile_size = tmp << 20;
1060                 }
1061         } else if (strlen(str) >= sizeof(cfs_tracefile)) {
1062                 rc = -ENAMETOOLONG;
1063         } else if (str[0] != '/') {
1064                 rc = -EINVAL;
1065         } else {
1066                 strcpy(cfs_tracefile, str);
1067
1068                 pr_info("Lustre: debug daemon will attempt to start writing to %s (%lukB max)\n",
1069                         cfs_tracefile, (long)(cfs_tracefile_size >> 10));
1070
1071                 cfs_trace_start_thread();
1072         }
1073
1074         up_write(&cfs_tracefile_sem);
1075         return rc;
1076 }
1077
1078 int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob)
1079 {
1080         char *str;
1081         int   rc;
1082
1083         rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
1084         if (rc != 0)
1085                 return rc;
1086
1087         rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
1088                                  usr_str, usr_str_nob);
1089         if (rc == 0)
1090                 rc = cfs_trace_daemon_command(str);
1091
1092         kfree(str);
1093         return rc;
1094 }
1095
1096 int cfs_trace_set_debug_mb(int mb)
1097 {
1098         int i;
1099         int j;
1100         unsigned long pages;
1101         unsigned long total_mb = (cfs_totalram_pages() >> (20 - PAGE_SHIFT));
1102         unsigned long limit = max_t(unsigned long, 512, (total_mb * 4) / 5);
1103         struct cfs_trace_cpu_data *tcd;
1104
1105         if (mb < num_possible_cpus()) {
1106                 pr_warn("Lustre: %d MB is too small for debug buffer size, setting it to %d MB.\n",
1107                         mb, num_possible_cpus());
1108                 mb = num_possible_cpus();
1109         }
1110
1111         if (mb > limit) {
1112                 pr_warn("Lustre: %d MB is too large for debug buffer size, setting it to %lu MB.\n",
1113                         mb, limit);
1114                 mb = limit;
1115         }
1116
1117         mb /= num_possible_cpus();
1118         pages = mb << (20 - PAGE_SHIFT);
1119
1120         down_write(&cfs_tracefile_sem);
1121
1122         cfs_tcd_for_each(tcd, i, j)
1123                 tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
1124
1125         up_write(&cfs_tracefile_sem);
1126
1127         return mb;
1128 }
1129
1130 int cfs_trace_get_debug_mb(void)
1131 {
1132         int i;
1133         int j;
1134         struct cfs_trace_cpu_data *tcd;
1135         int total_pages = 0;
1136
1137         down_read(&cfs_tracefile_sem);
1138
1139         cfs_tcd_for_each(tcd, i, j)
1140                 total_pages += tcd->tcd_max_pages;
1141
1142         up_read(&cfs_tracefile_sem);
1143
1144         return (total_pages >> (20 - PAGE_SHIFT)) + 1;
1145 }
1146
1147 static int tracefiled(void *arg)
1148 {
1149         struct page_collection pc;
1150         struct tracefiled_ctl *tctl = arg;
1151         struct cfs_trace_page *tage;
1152         struct cfs_trace_page *tmp;
1153         struct file *filp;
1154         char *buf;
1155         int last_loop = 0;
1156         int rc;
1157
1158         /* we're started late enough that we pick up init's fs context */
1159         /* this is so broken in uml?  what on earth is going on? */
1160
1161         complete(&tctl->tctl_start);
1162
1163         while (1) {
1164                 wait_queue_entry_t __wait;
1165
1166                 pc.pc_want_daemon_pages = 0;
1167                 collect_pages(&pc);
1168                 if (list_empty(&pc.pc_pages))
1169                         goto end_loop;
1170
1171                 filp = NULL;
1172                 down_read(&cfs_tracefile_sem);
1173                 if (cfs_tracefile[0] != 0) {
1174                         filp = filp_open(cfs_tracefile,
1175                                          O_CREAT | O_RDWR | O_LARGEFILE,
1176                                          0600);
1177                         if (IS_ERR(filp)) {
1178                                 rc = PTR_ERR(filp);
1179                                 filp = NULL;
1180                                 pr_warn("Lustre: couldn't open %s: rc = %d\n",
1181                                         cfs_tracefile, rc);
1182                         }
1183                 }
1184                 up_read(&cfs_tracefile_sem);
1185                 if (filp == NULL) {
1186                         put_pages_on_daemon_list(&pc);
1187                         __LASSERT(list_empty(&pc.pc_pages));
1188                         goto end_loop;
1189                 }
1190
1191                 list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
1192                         struct dentry *de = file_dentry(filp);
1193                         static loff_t f_pos;
1194
1195                         __LASSERT_TAGE_INVARIANT(tage);
1196
1197                         if (f_pos >= (off_t)cfs_tracefile_size)
1198                                 f_pos = 0;
1199                         else if (f_pos > i_size_read(de->d_inode))
1200                                 f_pos = i_size_read(de->d_inode);
1201
1202                         buf = kmap(tage->page);
1203                         rc = cfs_kernel_write(filp, buf, tage->used, &f_pos);
1204                         kunmap(tage->page);
1205                         if (rc != (int)tage->used) {
1206                                 pr_warn("Lustre: wanted to write %u but wrote %d\n",
1207                                         tage->used, rc);
1208                                 put_pages_back(&pc);
1209                                 __LASSERT(list_empty(&pc.pc_pages));
1210                                 break;
1211                         }
1212                 }
1213
1214                 filp_close(filp, NULL);
1215                 put_pages_on_daemon_list(&pc);
1216                 if (!list_empty(&pc.pc_pages)) {
1217                         int i;
1218
1219                         pr_alert("Lustre: trace pages aren't empty\n");
1220                         pr_err("Lustre: total cpus(%d): ", num_possible_cpus());
1221                         for (i = 0; i < num_possible_cpus(); i++)
1222                                 if (cpu_online(i))
1223                                         pr_cont("%d(on) ", i);
1224                                 else
1225                                         pr_cont("%d(off) ", i);
1226                         pr_cont("\n");
1227
1228                         i = 0;
1229                         list_for_each_entry_safe(tage, tmp, &pc.pc_pages,
1230                                                  linkage)
1231                                 pr_err("Lustre: page %d belongs to cpu %d\n",
1232                                        ++i, tage->cpu);
1233                         pr_err("Lustre: There are %d pages unwritten\n", i);
1234                 }
1235                 __LASSERT(list_empty(&pc.pc_pages));
1236 end_loop:
1237                 if (atomic_read(&tctl->tctl_shutdown)) {
1238                         if (last_loop == 0) {
1239                                 last_loop = 1;
1240                                 continue;
1241                         } else {
1242                                 break;
1243                         }
1244                 }
1245                 init_waitqueue_entry(&__wait, current);
1246                 add_wait_queue(&tctl->tctl_waitq, &__wait);
1247                 schedule_timeout_interruptible(cfs_time_seconds(1));
1248                 remove_wait_queue(&tctl->tctl_waitq, &__wait);
1249         }
1250         complete(&tctl->tctl_stop);
1251         return 0;
1252 }
1253
1254 int cfs_trace_start_thread(void)
1255 {
1256         struct tracefiled_ctl *tctl = &trace_tctl;
1257         int rc = 0;
1258
1259         mutex_lock(&cfs_trace_thread_mutex);
1260         if (thread_running)
1261                 goto out;
1262
1263         init_completion(&tctl->tctl_start);
1264         init_completion(&tctl->tctl_stop);
1265         init_waitqueue_head(&tctl->tctl_waitq);
1266         atomic_set(&tctl->tctl_shutdown, 0);
1267
1268         if (IS_ERR(kthread_run(tracefiled, tctl, "ktracefiled"))) {
1269                 rc = -ECHILD;
1270                 goto out;
1271         }
1272
1273         wait_for_completion(&tctl->tctl_start);
1274         thread_running = 1;
1275 out:
1276         mutex_unlock(&cfs_trace_thread_mutex);
1277         return rc;
1278 }
1279
1280 void cfs_trace_stop_thread(void)
1281 {
1282         struct tracefiled_ctl *tctl = &trace_tctl;
1283
1284         mutex_lock(&cfs_trace_thread_mutex);
1285         if (thread_running) {
1286                 pr_info("Lustre: shutting down debug daemon thread...\n");
1287                 atomic_set(&tctl->tctl_shutdown, 1);
1288                 wait_for_completion(&tctl->tctl_stop);
1289                 thread_running = 0;
1290         }
1291         mutex_unlock(&cfs_trace_thread_mutex);
1292 }
1293
1294 int cfs_tracefile_init(int max_pages)
1295 {
1296         struct cfs_trace_cpu_data *tcd;
1297         int     i;
1298         int     j;
1299         int     rc;
1300         int     factor;
1301
1302         rc = cfs_tracefile_init_arch();
1303         if (rc != 0)
1304                 return rc;
1305
1306         cfs_tcd_for_each(tcd, i, j) {
1307                 /* tcd_pages_factor is initialized int tracefile_init_arch. */
1308                 factor = tcd->tcd_pages_factor;
1309                 INIT_LIST_HEAD(&tcd->tcd_pages);
1310                 INIT_LIST_HEAD(&tcd->tcd_stock_pages);
1311                 INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
1312                 tcd->tcd_cur_pages = 0;
1313                 tcd->tcd_cur_stock_pages = 0;
1314                 tcd->tcd_cur_daemon_pages = 0;
1315                 tcd->tcd_max_pages = (max_pages * factor) / 100;
1316                 LASSERT(tcd->tcd_max_pages > 0);
1317                 tcd->tcd_shutting_down = 0;
1318         }
1319         return 0;
1320 }
1321
1322 static void trace_cleanup_on_all_cpus(void)
1323 {
1324         struct cfs_trace_cpu_data *tcd;
1325         struct cfs_trace_page *tage;
1326         struct cfs_trace_page *tmp;
1327         int i, cpu;
1328
1329         for_each_possible_cpu(cpu) {
1330                 cfs_tcd_for_each_type_lock(tcd, i, cpu) {
1331                         tcd->tcd_shutting_down = 1;
1332
1333                         list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
1334                                 __LASSERT_TAGE_INVARIANT(tage);
1335
1336                                 list_del(&tage->linkage);
1337                                 cfs_tage_free(tage);
1338                         }
1339                         tcd->tcd_cur_pages = 0;
1340                 }
1341         }
1342 }
1343
1344 static void cfs_trace_cleanup(void)
1345 {
1346         struct page_collection pc;
1347
1348         INIT_LIST_HEAD(&pc.pc_pages);
1349
1350         trace_cleanup_on_all_cpus();
1351
1352         cfs_tracefile_fini_arch();
1353 }
1354
1355 void cfs_tracefile_exit(void)
1356 {
1357         cfs_trace_stop_thread();
1358         cfs_trace_cleanup();
1359 }