Whamcloud - gitweb
b=5684
[fs/lustre-release.git] / lnet / libcfs / tracefile.c
index 562abcf..e384773 100644 (file)
  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/rwsem.h>
-#include <linux/proc_fs.h>
-#include <linux/file.h>
-#include <linux/smp.h>
-#include <linux/ctype.h>
-#include <asm/uaccess.h>
-#ifdef HAVE_MM_INLINE
-#include <linux/mm_inline.h>
-#endif
 
 #define DEBUG_SUBSYSTEM S_PORTALS
+#define LUSTRE_TRACEFILE_PRIVATE
+#include "tracefile.h"
 
-#include <linux/kp30.h>
-#include <linux/portals_compat25.h>
-#include <linux/lustre_compat25.h>
-#include <linux/libcfs.h>
-
-#define TCD_MAX_PAGES 1280
+#include <libcfs/kp30.h>
+#include <libcfs/libcfs.h>
 
 /* XXX move things up to the top, comment */
+union trace_data_union trace_data[NR_CPUS] __cacheline_aligned;
 
-static union {
-        struct trace_cpu_data {
-                struct list_head        tcd_pages;
-                unsigned long           tcd_cur_pages;
-
-                struct list_head        tcd_daemon_pages;
-                unsigned long           tcd_cur_daemon_pages;
-
-                unsigned long           tcd_max_pages;
-                int                     tcd_shutting_down;
-        } tcd;
-        char __pad[SMP_CACHE_BYTES];
-} trace_data[NR_CPUS] __cacheline_aligned;
-
-struct page_collection {
-        struct list_head        pc_pages;
-        spinlock_t              pc_lock;
-        int                     pc_want_daemon_pages;
-};
-
-struct tracefiled_ctl {
-        struct completion        tctl_start;
-        struct completion        tctl_stop;
-        wait_queue_head_t        tctl_waitq;
-        pid_t                    tctl_pid;
-        atomic_t                 tctl_shutdown;
-};
-
-static DECLARE_RWSEM(tracefile_sem);
-static char *tracefile = NULL;
+struct rw_semaphore tracefile_sem;
+char *tracefile = NULL;
+long long tracefile_size = TRACEFILE_SIZE;
 static struct tracefiled_ctl trace_tctl;
-static DECLARE_MUTEX(trace_thread_sem);
+struct semaphore trace_thread_sem;
 static int thread_running = 0;
 
-#ifndef get_cpu
-#define get_cpu() smp_processor_id()
-#define put_cpu() do { } while (0)
-#endif
+static void put_pages_on_daemon_list_on_cpu(void *info);
 
-#define trace_get_tcd(FLAGS) ({                 \
-        struct trace_cpu_data *__ret;           \
-        int __cpu = get_cpu();                  \
-        local_irq_save(FLAGS);                  \
-        __ret = &trace_data[__cpu].tcd;         \
-        __ret;                                  \
-})
+static inline struct trace_page *tage_from_list(struct list_head *list)
+{
+        return list_entry(list, struct trace_page, linkage);
+}
 
-#define trace_put_tcd(TCD, FLAGS) do {          \
-        local_irq_restore(FLAGS);               \
-        put_cpu();                              \
-} while (0)
+static struct trace_page *tage_alloc(int gfp)
+{
+        cfs_page_t        *page;
+        struct trace_page *tage;
+
+        page = cfs_alloc_page(gfp);
+        if (page != NULL) {
+                tage = cfs_alloc(sizeof *tage, gfp);
+                if (tage == NULL)
+                        cfs_free_page(page);
+                tage->page = page;
+        } else
+                tage = NULL;
+        return tage;
+}
 
-static void put_pages_on_daemon_list_on_cpu(void *info);
+static void tage_free(struct trace_page *tage)
+{
+        LASSERT(tage != NULL);
+
+        if (tage->page != NULL)
+                cfs_free_page(tage->page);
+        cfs_free(tage);
+}
+
+static void tage_to_tail(struct trace_page *tage, struct list_head *queue)
+{
+        LASSERT(tage != NULL);
+        LASSERT(queue != NULL);
+
+        list_move_tail(&tage->linkage, queue);
+}
+
+static int tage_invariant(struct trace_page *tage)
+{
+        return
+                tage != NULL &&
+                tage->used <= CFS_PAGE_SIZE &&
+                cfs_page_count(tage->page) > 0;
+}
 
 /* return a page that has 'len' bytes left at the end */
-static struct page *trace_get_page(struct trace_cpu_data *tcd,
-                                   unsigned long len)
+static struct trace_page *trace_get_tage(struct trace_cpu_data *tcd,
+                                         unsigned long len)
 {
-        struct page *page = NULL;
+        struct trace_page *tage;
 
-        if (len > PAGE_SIZE) {
+        if (len > CFS_PAGE_SIZE) {
                 printk(KERN_ERR "cowardly refusing to write %lu bytes in a "
                        "page\n", len);
                 return NULL;
         }
 
         if (!list_empty(&tcd->tcd_pages)) {
-                page = list_entry(tcd->tcd_pages.prev, struct page,
-                                  PAGE_LIST_ENTRY);
-                if (page->index + len <= PAGE_SIZE)
-                        return page;
+                tage = tage_from_list(tcd->tcd_pages.prev);
+                if (tage->used + len <= CFS_PAGE_SIZE)
+                        return tage;
         }
 
         if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
-                page = alloc_page(GFP_ATOMIC);
-                if (page == NULL) {
+                tage = tage_alloc(CFS_ALLOC_ATOMIC);
+                if (tage == NULL) {
                         /* the kernel should print a message for us.  fall back
                          * to using the last page in the ring buffer. */
                         goto ring_buffer;
-                        return NULL;
                 }
-                page->index = 0;
-                page->mapping = (void *)(long)smp_processor_id();
-                list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages);
+                tage->used = 0;
+                tage->cpu = smp_processor_id();
+                list_add_tail(&tage->linkage, &tcd->tcd_pages);
                 tcd->tcd_cur_pages++;
 
                 if (tcd->tcd_cur_pages > 8 && thread_running) {
                         struct tracefiled_ctl *tctl = &trace_tctl;
-                        wake_up(&tctl->tctl_waitq);
+                        cfs_waitq_signal(&tctl->tctl_waitq);
                 }
-                return page;
+                return tage;
         }
 
  ring_buffer:
         if (thread_running) {
                 int pgcount = tcd->tcd_cur_pages / 10;
                 struct page_collection pc;
-                struct list_head *pos, *tmp;
+                struct trace_page *tage;
+                struct trace_page *tmp;
+
                 printk(KERN_WARNING "debug daemon buffer overflowed; discarding"
                        " 10%% of pages (%d)\n", pgcount + 1);
 
-                INIT_LIST_HEAD(&pc.pc_pages);
+                CFS_INIT_LIST_HEAD(&pc.pc_pages);
                 spin_lock_init(&pc.pc_lock);
 
-                list_for_each_safe(pos, tmp, &tcd->tcd_pages) {
-                        struct page *page;
-
+                list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
                         if (pgcount-- == 0)
                                 break;
 
-                        page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-                        list_del(&PAGE_LIST(page));
-                        list_add_tail(&PAGE_LIST(page), &pc.pc_pages);
+                        list_move_tail(&tage->linkage, &pc.pc_pages);
                         tcd->tcd_cur_pages--;
                 }
                 put_pages_on_daemon_list_on_cpu(&pc);
         }
         LASSERT(!list_empty(&tcd->tcd_pages));
 
-        page = list_entry(tcd->tcd_pages.next, struct page, PAGE_LIST_ENTRY);
-        page->index = 0;
-
-        list_del(&PAGE_LIST(page));
-        list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages);
-        return page;
-}
+        tage = tage_from_list(tcd->tcd_pages.next);
+        tage->used = 0;
+        tage_to_tail(tage, &tcd->tcd_pages);
 
-static void print_to_console(struct ptldebug_header *hdr, int mask, char *buf,
-                             int len, char *file, const char *fn)
-{
-        char *prefix = NULL, *ptype = NULL;
-
-        if ((mask & D_EMERG) != 0) {
-                prefix = "LustreError";
-                ptype = KERN_EMERG;
-        } else if ((mask & D_ERROR) != 0) {
-                prefix = "LustreError";
-                ptype = KERN_ERR;
-        } else if ((mask & D_WARNING) != 0) {
-                prefix = "Lustre";
-                ptype = KERN_WARNING;
-        } else if (portal_printk) {
-                prefix = "Lustre";
-                ptype = KERN_INFO;
-        }
-        
-        printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid,
-               hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf);
+        return tage;
 }
 
 void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
@@ -200,12 +160,18 @@ void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
 {
         struct trace_cpu_data *tcd;
         struct ptldebug_header header;
-        struct page *page;
-        char *debug_buf;
-        int known_size, needed, max_nob;
+        struct trace_page *tage;
+        char *debug_buf = format;
+        int known_size, needed = 85 /* average message length */, max_nob;
         va_list       ap;
         unsigned long flags;
-        struct timeval tv;
+
+#ifdef CRAY_PORTALS
+        if (mask == D_PORTALS && !(portal_debug & D_PORTALS))
+                return;
+#endif
+        if (strchr(file, '/'))
+                file = strrchr(file, '/') + 1;
 
         if (*(format + strlen(format) - 1) != '\n')
                 printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
@@ -215,70 +181,54 @@ void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
         if (tcd->tcd_shutting_down)
                 goto out;
 
-        do_gettimeofday(&tv);
-
-        header.ph_subsys = subsys;
-        header.ph_mask = mask;
-        header.ph_cpu_id = smp_processor_id();
-        header.ph_sec = (__u32)tv.tv_sec;
-        header.ph_usec = tv.tv_usec;
-        header.ph_stack = stack;
-        header.ph_pid = current->pid;
-        header.ph_line_num = line;
-
-#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
-        header.ph_extern_pid = current->thread.extern_pid;
-#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        header.ph_extern_pid = current->thread.mode.tt.extern_pid;
-#else
-        header.ph_extern_pid = 0;
-#endif
-
+        set_ptldebug_header(&header, subsys, mask, line, stack);
         known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls
 
-        page = trace_get_page(tcd, known_size + 40); /* slop */
  retry:
-        if (page == NULL)
+        tage = trace_get_tage(tcd, needed + known_size);
+        if (tage == NULL) {
+                debug_buf = format;
+                if (needed + known_size > CFS_PAGE_SIZE)
+                        mask |= D_ERROR;
+                needed = strlen(format);
                 goto out;
+        }
 
-        debug_buf = page_address(page) + page->index + known_size;
+        debug_buf = cfs_page_address(tage->page) + tage->used + known_size;
 
-        va_start(ap, format);
-        max_nob = PAGE_SIZE - page->index - known_size;
+        max_nob = CFS_PAGE_SIZE - tage->used - known_size;
         LASSERT(max_nob > 0);
+        va_start(ap, format);
         needed = vsnprintf(debug_buf, max_nob, format, ap);
         va_end(ap);
 
-        if (needed > max_nob) {
-                /* overflow.  oh poop. */
-                page = trace_get_page(tcd, needed + known_size);
+        if (needed > max_nob) /* overflow.  oh poop. */
                 goto retry;
-        }
 
         header.ph_len = known_size + needed;
-        debug_buf = page_address(page) + page->index;
+        debug_buf = cfs_page_address(tage->page) + tage->used;
 
         memcpy(debug_buf, &header, sizeof(header));
-        page->index += sizeof(header);
+        tage->used += sizeof(header);
         debug_buf += sizeof(header);
 
         strcpy(debug_buf, file);
-        page->index += strlen(file) + 1;
+        tage->used += strlen(file) + 1;
         debug_buf += strlen(file) + 1;
 
         strcpy(debug_buf, fn);
-        page->index += strlen(fn) + 1;
+        tage->used += strlen(fn) + 1;
         debug_buf += strlen(fn) + 1;
 
-        page->index += needed;
-        if (page->index > PAGE_SIZE)
-                printk(KERN_EMERG "page->index == %lu in portals_debug_msg\n",
-                       page->index);
+        tage->used += needed;
+        if (tage->used > CFS_PAGE_SIZE)
+                printk(KERN_EMERG
+                       "tage->used == %u in portals_debug_msg\n", tage->used);
 
-        if ((mask & (D_EMERG | D_ERROR | D_WARNING)) || portal_printk)
+ out:
+        if ((mask & (D_EMERG | D_ERROR | D_WARNING | D_CONSOLE)) || portal_printk)
                 print_to_console(&header, mask, debug_buf, needed, file, fn);
 
- out:
         trace_put_tcd(tcd, flags);
 }
 EXPORT_SYMBOL(portals_debug_msg);
@@ -292,12 +242,10 @@ static void collect_pages_on_cpu(void *info)
         tcd = trace_get_tcd(flags);
 
         spin_lock(&pc->pc_lock);
-        list_splice(&tcd->tcd_pages, &pc->pc_pages);
-        INIT_LIST_HEAD(&tcd->tcd_pages);
+        list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
         tcd->tcd_cur_pages = 0;
         if (pc->pc_want_daemon_pages) {
-                list_splice(&tcd->tcd_daemon_pages, &pc->pc_pages);
-                INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
+                list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages);
                 tcd->tcd_cur_daemon_pages = 0;
         }
         spin_unlock(&pc->pc_lock);
@@ -308,7 +256,7 @@ static void collect_pages_on_cpu(void *info)
 static void collect_pages(struct page_collection *pc)
 {
         /* needs to be fixed up for preempt */
-        INIT_LIST_HEAD(&pc->pc_pages);
+        CFS_INIT_LIST_HEAD(&pc->pc_pages);
         collect_pages_on_cpu(pc);
         smp_call_function(collect_pages_on_cpu, pc, 0, 1);
 }
@@ -317,26 +265,24 @@ static void put_pages_back_on_cpu(void *info)
 {
         struct page_collection *pc = info;
         struct trace_cpu_data *tcd;
-        struct list_head *pos, *tmp, *cur_head;
+        struct list_head *cur_head;
         unsigned long flags;
+        struct trace_page *tage;
+        struct trace_page *tmp;
 
         tcd = trace_get_tcd(flags);
 
         cur_head = tcd->tcd_pages.next;
 
         spin_lock(&pc->pc_lock);
-        list_for_each_safe(pos, tmp, &pc->pc_pages) {
-                struct page *page;
+        list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
 
-                page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-                LASSERT(page->index <= PAGE_SIZE);
-                LASSERT(page_count(page) > 0);
+                LASSERT(tage_invariant(tage));
 
-                if ((unsigned long)page->mapping != smp_processor_id())
+                if (tage->cpu != smp_processor_id())
                         continue;
 
-                list_del(&PAGE_LIST(page));
-                list_add_tail(&PAGE_LIST(page), cur_head);
+                tage_to_tail(tage, cur_head);
                 tcd->tcd_cur_pages++;
         }
         spin_unlock(&pc->pc_lock);
@@ -359,37 +305,33 @@ static void put_pages_on_daemon_list_on_cpu(void *info)
 {
         struct page_collection *pc = info;
         struct trace_cpu_data *tcd;
-        struct list_head *pos, *tmp;
+        struct trace_page *tage;
+        struct trace_page *tmp;
         unsigned long flags;
 
         tcd = trace_get_tcd(flags);
 
         spin_lock(&pc->pc_lock);
-        list_for_each_safe(pos, tmp, &pc->pc_pages) {
-                struct page *page;
+        list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
 
-                page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-                LASSERT(page->index <= PAGE_SIZE);
-                LASSERT(page_count(page) > 0);
-                if ((unsigned long)page->mapping != smp_processor_id())
+                LASSERT(tage_invariant(tage));
+
+                if (tage->cpu != smp_processor_id())
                         continue;
 
-                list_del(&PAGE_LIST(page));
-                list_add_tail(&PAGE_LIST(page), &tcd->tcd_daemon_pages);
+                tage_to_tail(tage, &tcd->tcd_daemon_pages);
                 tcd->tcd_cur_daemon_pages++;
 
                 if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
+                        struct trace_page *victim;
+
                         LASSERT(!list_empty(&tcd->tcd_daemon_pages));
-                        page = list_entry(tcd->tcd_daemon_pages.next,
-                                          struct page, PAGE_LIST_ENTRY);
+                        victim = tage_from_list(tcd->tcd_daemon_pages.next);
 
-                        LASSERT(page->index <= PAGE_SIZE);
-                        LASSERT(page_count(page) > 0);
+                        LASSERT(tage_invariant(victim));
 
-                        page->index = 0;
-                        list_del(&PAGE_LIST(page));
-                        page->mapping = NULL;
-                        __free_page(page);
+                        list_del(&victim->linkage);
+                        tage_free(victim);
                         tcd->tcd_cur_daemon_pages--;
                 }
         }
@@ -407,21 +349,21 @@ static void put_pages_on_daemon_list(struct page_collection *pc)
 void trace_debug_print(void)
 {
         struct page_collection pc;
-        struct list_head *pos, *tmp;
+        struct trace_page *tage;
+        struct trace_page *tmp;
 
         spin_lock_init(&pc.pc_lock);
 
         collect_pages(&pc);
-        list_for_each_safe(pos, tmp, &pc.pc_pages) {
-                struct page *page;
+        list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
                 char *p, *file, *fn;
+                cfs_page_t *page;
 
-                page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-                LASSERT(page->index <= PAGE_SIZE);
-                LASSERT(page_count(page) > 0);
+                LASSERT(tage_invariant(tage));
 
-                p = page_address(page);
-                while (p < ((char *)page_address(page) + PAGE_SIZE)) {
+                page = tage->page;
+                p = cfs_page_address(page);
+                while (p < ((char *)cfs_page_address(page) + CFS_PAGE_SIZE)) {
                         struct ptldebug_header *hdr;
                         int len;
                         hdr = (void *)p;
@@ -435,27 +377,27 @@ void trace_debug_print(void)
                         print_to_console(hdr, D_EMERG, p, len, file, fn);
                 }
 
-                list_del(&PAGE_LIST(page));
-                page->mapping = NULL;
-                __free_page(page);
+                list_del(&tage->linkage);
+                tage_free(tage);
         }
 }
 
 int tracefile_dump_all_pages(char *filename)
 {
         struct page_collection pc;
-        struct file *filp;
-        struct list_head *pos, *tmp;
-        mm_segment_t oldfs;
+        cfs_file_t *filp;
+        struct trace_page *tage;
+        struct trace_page *tmp;
+        CFS_DECL_MMSPACE;
         int rc;
 
         down_write(&tracefile_sem);
 
-        filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY, 0600);
-        if (IS_ERR(filp)) {
-                rc = PTR_ERR(filp);
+        filp = cfs_filp_open(filename,
+                             O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600, &rc);
+        if (!filp) {
                 printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
-                      filename, rc);
+                       filename, rc);
                 goto out;
         }
 
@@ -469,33 +411,28 @@ int tracefile_dump_all_pages(char *filename)
 
         /* ok, for now, just write the pages.  in the future we'll be building
          * iobufs with the pages and calling generic_direct_IO */
-        oldfs = get_fs();
-        set_fs(get_ds());
-        list_for_each_safe(pos, tmp, &pc.pc_pages) {
-                struct page *page;
-
-                page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-                LASSERT(page->index <= PAGE_SIZE);
-                LASSERT(page_count(page) > 0);
-
-                rc = filp->f_op->write(filp, page_address(page), page->index,
-                                       &filp->f_pos);
-                if (rc != page->index) {
-                        printk(KERN_WARNING "wanted to write %lu but wrote "
-                               "%d\n", page->index, rc);
+        CFS_MMSPACE_OPEN;
+        list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
+
+                LASSERT(tage_invariant(tage));
+
+                rc = cfs_filp_write(filp, cfs_page_address(tage->page),
+                                    tage->used, cfs_filp_poff(filp));
+                if (rc != tage->used) {
+                        printk(KERN_WARNING "wanted to write %u but wrote "
+                               "%d\n", tage->used, rc);
                         put_pages_back(&pc);
                         break;
                 }
-                list_del(&PAGE_LIST(page));
-                page->mapping = NULL;
-                __free_page(page);
+                list_del(&tage->linkage);
+                tage_free(tage);
         }
-        set_fs(oldfs);
-        rc = filp->f_op->fsync(filp, filp->f_dentry, 1);
+        CFS_MMSPACE_CLOSE;
+        rc = cfs_filp_fsync(filp);
         if (rc)
                 printk(KERN_ERR "sync returns %d\n", rc);
  close:
-        filp_close(filp, 0);
+        cfs_filp_close(filp);
  out:
         up_write(&tracefile_sem);
         return rc;
@@ -504,21 +441,18 @@ int tracefile_dump_all_pages(char *filename)
 void trace_flush_pages(void)
 {
         struct page_collection pc;
-        struct list_head *pos, *tmp;
+        struct trace_page *tage;
+        struct trace_page *tmp;
 
         spin_lock_init(&pc.pc_lock);
 
         collect_pages(&pc);
-        list_for_each_safe(pos, tmp, &pc.pc_pages) {
-                struct page *page;
+        list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
 
-                page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-                LASSERT(page->index <= PAGE_SIZE);
-                LASSERT(page_count(page) > 0);
+                LASSERT(tage_invariant(tage));
 
-                list_del(&PAGE_LIST(page));
-                page->mapping = NULL;
-                __free_page(page);
+                list_del(&tage->linkage);
+                tage_free(tage);
         }
 }
 
@@ -529,7 +463,7 @@ int trace_dk(struct file *file, const char *buffer, unsigned long count,
         unsigned long off;
         int rc;
 
-        name = kmalloc(count + 1, GFP_KERNEL);
+        name = cfs_alloc(count + 1, CFS_ALLOC_STD);
         if (name == NULL)
                 return -ENOMEM;
 
@@ -551,7 +485,7 @@ int trace_dk(struct file *file, const char *buffer, unsigned long count,
         rc = tracefile_dump_all_pages(name);
 out:
         if (name)
-                kfree(name);
+                cfs_free(name);
         return count;
 }
 EXPORT_SYMBOL(trace_dk);
@@ -560,11 +494,11 @@ static int tracefiled(void *arg)
 {
         struct page_collection pc;
         struct tracefiled_ctl *tctl = arg;
-        struct list_head *pos, *tmp;
+        struct trace_page *tage;
+        struct trace_page *tmp;
         struct ptldebug_header *hdr;
-        struct file *filp;
-        struct page *page;
-        mm_segment_t oldfs;
+        cfs_file_t *filp;
+        CFS_DECL_MMSPACE;
         int rc;
 
         /* we're started late enough that we pick up init's fs context */
@@ -576,13 +510,13 @@ static int tracefiled(void *arg)
         complete(&tctl->tctl_start);
 
         while (1) {
-                wait_queue_t __wait;
+                cfs_waitlink_t __wait;
 
-                init_waitqueue_entry(&__wait, current);
-                add_wait_queue(&tctl->tctl_waitq, &__wait);
+                cfs_waitlink_init(&__wait);
+                cfs_waitq_add(&tctl->tctl_waitq, &__wait);
                 set_current_state(TASK_INTERRUPTIBLE);
-                schedule_timeout(HZ);
-                remove_wait_queue(&tctl->tctl_waitq, &__wait);
+                cfs_waitq_timedwait(&__wait, cfs_time_seconds(1));
+                cfs_waitq_del(&tctl->tctl_waitq, &__wait);
 
                 if (atomic_read(&tctl->tctl_shutdown))
                         break;
@@ -595,13 +529,10 @@ static int tracefiled(void *arg)
                 filp = NULL;
                 down_read(&tracefile_sem);
                 if (tracefile != NULL) {
-                        filp = filp_open(tracefile, O_CREAT|O_RDWR|O_APPEND|O_LARGEFILE,
-                                        0600);
-                        if (IS_ERR(filp)) {
-                                printk("couldn't open %s: %ld\n", tracefile,
-                                       PTR_ERR(filp));
-                                filp = NULL;
-                        }
+                        filp = cfs_filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE,
+                                        0600, &rc);
+                        if (!(filp))
+                                printk("couldn't open %s: %d\n", tracefile, rc);
                 }
                 up_read(&tracefile_sem);
                 if (filp == NULL) {
@@ -609,33 +540,35 @@ static int tracefiled(void *arg)
                         continue;
                 }
 
-                oldfs = get_fs();
-                set_fs(get_ds());
+                CFS_MMSPACE_OPEN;
 
                 /* mark the first header, so we can sort in chunks */
-                page = list_entry(pc.pc_pages.next, struct page,
-                                  PAGE_LIST_ENTRY);
-                LASSERT(page->index <= PAGE_SIZE);
-                LASSERT(page_count(page) > 0);
+                tage = tage_from_list(pc.pc_pages.next);
+                LASSERT(tage_invariant(tage));
 
-                hdr = page_address(page);
+                hdr = cfs_page_address(tage->page);
                 hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
 
-                list_for_each_safe(pos, tmp, &pc.pc_pages) {
-                        page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-                        LASSERT(page->index <= PAGE_SIZE);
-                        LASSERT(page_count(page) > 0);
+                list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
+                        static loff_t f_pos;
+
+                        LASSERT(tage_invariant(tage));
 
-                        rc = filp->f_op->write(filp, page_address(page),
-                                        page->index, &filp->f_pos);
-                        if (rc != page->index) {
-                                printk(KERN_WARNING "wanted to write %lu but "
-                                       "wrote %d\n", page->index, rc);
+                        if (f_pos >= tracefile_size)
+                                f_pos = 0;
+                        else if (f_pos > cfs_filp_size(filp))
+                                f_pos = cfs_filp_size(filp);
+
+                        rc = cfs_filp_write(filp, cfs_page_address(tage->page),
+                                            tage->used, &f_pos);
+                        if (rc != tage->used) {
+                                printk(KERN_WARNING "wanted to write %u but "
+                                       "wrote %d\n", tage->used, rc);
                                 put_pages_back(&pc);
                         }
                 }
-                set_fs(oldfs);
-                filp_close(filp, 0);
+                CFS_MMSPACE_CLOSE;
+                cfs_filp_close(filp);
 
                 put_pages_on_daemon_list(&pc);
         }
@@ -648,16 +581,16 @@ int trace_start_thread(void)
         struct tracefiled_ctl *tctl = &trace_tctl;
         int rc = 0;
 
-        down(&trace_thread_sem);
+        mutex_down(&trace_thread_sem);
         if (thread_running)
                 goto out;
 
         init_completion(&tctl->tctl_start);
         init_completion(&tctl->tctl_stop);
-        init_waitqueue_head(&tctl->tctl_waitq);
+        cfs_waitq_init(&tctl->tctl_waitq);
         atomic_set(&tctl->tctl_shutdown, 0);
 
-        if (kernel_thread(tracefiled, tctl, 0) < 0) {
+        if (cfs_kernel_thread(tracefiled, tctl, 0) < 0) {
                 rc = -ECHILD;
                 goto out;
         }
@@ -665,7 +598,7 @@ int trace_start_thread(void)
         wait_for_completion(&tctl->tctl_start);
         thread_running = 1;
 out:
-        up(&trace_thread_sem);
+        mutex_up(&trace_thread_sem);
         return rc;
 }
 
@@ -673,128 +606,14 @@ void trace_stop_thread(void)
 {
         struct tracefiled_ctl *tctl = &trace_tctl;
 
-        down(&trace_thread_sem);
+        mutex_down(&trace_thread_sem);
         if (thread_running) {
                 printk(KERN_INFO "Shutting down debug daemon thread...\n");
                 atomic_set(&tctl->tctl_shutdown, 1);
                 wait_for_completion(&tctl->tctl_stop);
                 thread_running = 0;
         }
-        up(&trace_thread_sem);
-}
-
-int trace_write_daemon_file(struct file *file, const char *buffer,
-                            unsigned long count, void *data)
-{
-        char *name;
-        unsigned long off;
-        int rc;
-
-        name = kmalloc(count + 1, GFP_KERNEL);
-        if (name == NULL)
-                return -ENOMEM;
-
-        if (copy_from_user(name, buffer, count)) {
-                rc = -EFAULT;
-                goto out;
-        }
-
-        /* be nice and strip out trailing '\n' */
-        for (off = count ; off > 2 && isspace(name[off - 1]); off--)
-                ;
-
-        name[off] = '\0';
-
-        down_write(&tracefile_sem);
-        if (strcmp(name, "stop") == 0) {
-                tracefile = NULL;
-                trace_stop_thread();
-                goto out_sem;
-        }
-
-        if (name[0] != '/') {
-                rc = -EINVAL;
-                goto out_sem;
-        }
-
-        if (tracefile != NULL)
-                kfree(tracefile);
-
-        tracefile = name;
-        name = NULL;
-        trace_start_thread();
-
- out_sem:
-        up_write(&tracefile_sem);
-
- out:
-        if (name)
-                kfree(name);
-        return count;
-}
-
-int trace_read_daemon_file(char *page, char **start, off_t off, int count,
-                           int *eof, void *data)
-{
-        int rc;
-
-        down_read(&tracefile_sem);
-        rc = snprintf(page, count, "%s", tracefile);
-        up_read(&tracefile_sem);
-
-        return rc;
-}
-
-int trace_write_debug_size(struct file *file, const char *buffer,
-                           unsigned long count, void *data)
-{
-        char *string;
-        int rc, i, max;
-
-        string = kmalloc(count + 1, GFP_KERNEL);
-        if (string == NULL)
-                return -ENOMEM;
-
-        if (copy_from_user(string, buffer, count)) {
-                rc = -EFAULT;
-                goto out;
-        }
-
-        max = simple_strtoul(string, NULL, 0);
-        if (max == 0) {
-                rc = -EINVAL;
-                goto out;
-        }
-        max /= smp_num_cpus;
-
-        if (max > num_physpages / 5 * 4) {
-                printk(KERN_ERR "Lustre: Refusing to set debug buffer size to "
-                       "%d pages, which is more than 80%% of physical pages "
-                       "(%lu).\n", max * smp_num_cpus, num_physpages / 5 * 4);
-                return count;
-        }
-        for (i = 0; i < NR_CPUS; i++) {
-                struct trace_cpu_data *tcd;
-                tcd = &trace_data[i].tcd;
-                tcd->tcd_max_pages = max;
-        }
- out:
-        kfree(string);
-        return count;
-}
-
-int trace_read_debug_size(char *page, char **start, off_t off, int count,
-                          int *eof, void *data)
-{
-        struct trace_cpu_data *tcd;
-        unsigned long flags;
-        int rc;
-
-        tcd = trace_get_tcd(flags);
-        rc = snprintf(page, count, "%lu", tcd->tcd_max_pages);
-        trace_put_tcd(tcd, flags);
-
-        return rc;
+        mutex_up(&trace_thread_sem);
 }
 
 int tracefile_init(void)
@@ -804,8 +623,8 @@ int tracefile_init(void)
 
         for (i = 0; i < NR_CPUS; i++) {
                 tcd = &trace_data[i].tcd;
-                INIT_LIST_HEAD(&tcd->tcd_pages);
-                INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
+                CFS_INIT_LIST_HEAD(&tcd->tcd_pages);
+                CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
                 tcd->tcd_cur_pages = 0;
                 tcd->tcd_cur_daemon_pages = 0;
                 tcd->tcd_max_pages = TCD_MAX_PAGES;
@@ -817,23 +636,19 @@ int tracefile_init(void)
 static void trace_cleanup_on_cpu(void *info)
 {
         struct trace_cpu_data *tcd;
-        struct list_head *pos, *tmp;
+        struct trace_page *tage;
+        struct trace_page *tmp;
         unsigned long flags;
 
         tcd = trace_get_tcd(flags);
 
         tcd->tcd_shutting_down = 1;
 
-        list_for_each_safe(pos, tmp, &tcd->tcd_pages) {
-                struct page *page;
-
-                page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-                LASSERT(page->index <= PAGE_SIZE);
-                LASSERT(page_count(page) > 0);
+        list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
+                LASSERT(tage_invariant(tage));
 
-                list_del(&PAGE_LIST(page));
-                page->mapping = NULL;
-                __free_page(page);
+                list_del(&tage->linkage);
+                tage_free(tage);
         }
         tcd->tcd_cur_pages = 0;
 
@@ -844,7 +659,7 @@ static void trace_cleanup(void)
 {
         struct page_collection pc;
 
-        INIT_LIST_HEAD(&pc.pc_pages);
+        CFS_INIT_LIST_HEAD(&pc.pc_pages);
         spin_lock_init(&pc.pc_lock);
 
         trace_cleanup_on_cpu(&pc);