Whamcloud - gitweb
LU-14428 libcfs: separate daemon_list from cfs_trace_data 93/41493/10
authorMr NeilBrown <neilb@suse.de>
Wed, 10 Feb 2021 22:49:54 +0000 (09:49 +1100)
committerOleg Drokin <green@whamcloud.com>
Mon, 22 Nov 2021 04:45:55 +0000 (04:45 +0000)
cfs_trace_data provides a fifo for trace messages.  To minimize
locking, there is a separate fifo for each CPU, and even for different
interrupt levels per-cpu.

When a page is remove from the fifo to br written to a file, the page
is added to a "daemon_list".  Trace message on the daemon_list have
already been logged to a file, but can be easily dumped to the console
when a bug occurs.

The daemon_list is always accessed from a single thread at a time, so
the per-CPU facilities for cfs_trace_data are not needed.  However
daemon_list is currently managed per-cpu as part of cfs_trace_data.

This patch moves the daemon_list of pages out to a separate structure
- a simple linked list, protected by cfs_tracefile_sem.

Rather than using a 'cfs_trace_page' to hold linkage information and
content size, we use page->lru for linkage and page->private for
the size of the content in each page.

This is a step towards replacing cfs_trace_data with the Linux
ring_buffer which provides similar functionality with even less
locking.

In the current code, if the daemon which writes trace data to a file
cannot keep up with load, excess pages are moved to the daemon_list
temporarily before being discarded.  With the patch, these page are
simply discarded immediately.
If the daemon thread cannot keep up, that is a configuration problem
and temporarily preserving a few pages is unlikely to really help.

Signed-off-by: Mr NeilBrown <neilb@suse.de>
Change-Id: Ie894f7751cadacb515215f18182163ea5d26e969
Reviewed-on: https://review.whamcloud.com/41493
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
contrib/debug_tools/epython_scripts/dk.py
libcfs/libcfs/tracefile.c
libcfs/libcfs/tracefile.h

index 2d9c135..1806e9a 100644 (file)
@@ -88,7 +88,6 @@ def walk_array(options):
         for i in xrange(sys_info.CPUS):
             u = cfstd_array[i]
             walk_pages(tmpfd, options, u.tcd.tcd_pages, trace_page_struct)
         for i in xrange(sys_info.CPUS):
             u = cfstd_array[i]
             walk_pages(tmpfd, options, u.tcd.tcd_pages, trace_page_struct)
-            walk_pages(tmpfd, options, u.tcd.tcd_daemon_pages, trace_page_struct)
             walk_pages(tmpfd, options, u.tcd.tcd_stock_pages, trace_page_struct)
 
     tmpfd.close()
             walk_pages(tmpfd, options, u.tcd.tcd_stock_pages, trace_page_struct)
 
     tmpfd.close()
index 5db7394..c117d1a 100644 (file)
@@ -57,6 +57,13 @@ enum cfs_trace_buf_type {
 
 union cfs_trace_data_union (*cfs_trace_data[CFS_TCD_TYPE_CNT])[NR_CPUS] __cacheline_aligned;
 
 
 union cfs_trace_data_union (*cfs_trace_data[CFS_TCD_TYPE_CNT])[NR_CPUS] __cacheline_aligned;
 
+/* Pages containing records already processed by daemon.
+ * Link via ->lru, use size in ->private
+ */
+static LIST_HEAD(daemon_pages);
+static long daemon_pages_count;
+static long daemon_pages_max;
+
 char cfs_tracefile[TRACEFILE_NAME_SIZE];
 long long cfs_tracefile_size = CFS_TRACEFILE_SIZE;
 
 char cfs_tracefile[TRACEFILE_NAME_SIZE];
 long long cfs_tracefile_size = CFS_TRACEFILE_SIZE;
 
@@ -65,9 +72,6 @@ struct task_struct *tctl_task;
 static atomic_t cfs_tage_allocated = ATOMIC_INIT(0);
 static DECLARE_RWSEM(cfs_tracefile_sem);
 
 static atomic_t cfs_tage_allocated = ATOMIC_INIT(0);
 static DECLARE_RWSEM(cfs_tracefile_sem);
 
-static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
-                                       struct cfs_trace_cpu_data *tcd);
-
 /* trace file lock routines */
 /* The walking argument indicates the locking comes from all tcd types
  * iterator and we must lock it and dissable local irqs to avoid deadlocks
 /* trace file lock routines */
 /* The walking argument indicates the locking comes from all tcd types
  * iterator and we must lock it and dissable local irqs to avoid deadlocks
@@ -265,10 +269,10 @@ static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd)
                if (pgcount-- == 0)
                        break;
 
                if (pgcount-- == 0)
                        break;
 
-               list_move_tail(&tage->linkage, &pc.pc_pages);
+               list_del(&tage->linkage);
+               cfs_tage_free(tage);
                tcd->tcd_cur_pages--;
        }
                tcd->tcd_cur_pages--;
        }
-       put_pages_on_tcd_daemon_list(&pc, tcd);
 }
 
 /* return a page that has 'len' bytes left at the end */
 }
 
 /* return a page that has 'len' bytes left at the end */
@@ -643,12 +647,6 @@ panic_collect_pages(struct page_collection *pc)
        cfs_tcd_for_each(tcd, i, j) {
                list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
                tcd->tcd_cur_pages = 0;
        cfs_tcd_for_each(tcd, i, j) {
                list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
                tcd->tcd_cur_pages = 0;
-
-               if (pc->pc_want_daemon_pages) {
-                       list_splice_init(&tcd->tcd_daemon_pages,
-                                               &pc->pc_pages);
-                       tcd->tcd_cur_daemon_pages = 0;
-               }
        }
 }
 
        }
 }
 
@@ -661,11 +659,6 @@ static void collect_pages_on_all_cpus(struct page_collection *pc)
                cfs_tcd_for_each_type_lock(tcd, i, cpu) {
                        list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
                        tcd->tcd_cur_pages = 0;
                cfs_tcd_for_each_type_lock(tcd, i, cpu) {
                        list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
                        tcd->tcd_cur_pages = 0;
-                       if (pc->pc_want_daemon_pages) {
-                               list_splice_init(&tcd->tcd_daemon_pages,
-                                                       &pc->pc_pages);
-                               tcd->tcd_cur_daemon_pages = 0;
-                       }
                }
        }
 }
                }
        }
 }
@@ -713,63 +706,17 @@ static void put_pages_back(struct page_collection *pc)
                 put_pages_back_on_all_cpus(pc);
 }
 
                 put_pages_back_on_all_cpus(pc);
 }
 
-/* Add pages to a per-cpu debug daemon ringbuffer.  This buffer makes sure that
- * we have a good amount of data at all times for dumping during an LBUG, even
- * if we have been steadily writing (and otherwise discarding) pages via the
- * debug daemon. */
-static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
-                                        struct cfs_trace_cpu_data *tcd)
-{
-       struct cfs_trace_page *tage;
-       struct cfs_trace_page *tmp;
-
-       list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
-               __LASSERT_TAGE_INVARIANT(tage);
-
-               if (tage->cpu != tcd->tcd_cpu || tage->type != tcd->tcd_type)
-                       continue;
-
-               cfs_tage_to_tail(tage, &tcd->tcd_daemon_pages);
-               tcd->tcd_cur_daemon_pages++;
-
-               if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
-                       struct cfs_trace_page *victim;
-
-                       __LASSERT(!list_empty(&tcd->tcd_daemon_pages));
-                       victim = cfs_tage_from_list(tcd->tcd_daemon_pages.next);
-
-                        __LASSERT_TAGE_INVARIANT(victim);
-
-                       list_del(&victim->linkage);
-                       cfs_tage_free(victim);
-                       tcd->tcd_cur_daemon_pages--;
-               }
-       }
-}
-
-static void put_pages_on_daemon_list(struct page_collection *pc)
-{
-        struct cfs_trace_cpu_data *tcd;
-        int i, cpu;
-
-       for_each_possible_cpu(cpu) {
-                cfs_tcd_for_each_type_lock(tcd, i, cpu)
-                        put_pages_on_tcd_daemon_list(pc, tcd);
-        }
-}
-
 #ifdef LNET_DUMP_ON_PANIC
 void cfs_trace_debug_print(void)
 {
        struct page_collection pc;
        struct cfs_trace_page *tage;
        struct cfs_trace_page *tmp;
 #ifdef LNET_DUMP_ON_PANIC
 void cfs_trace_debug_print(void)
 {
        struct page_collection pc;
        struct cfs_trace_page *tage;
        struct cfs_trace_page *tmp;
+       struct page *page;
 
 
-       pc.pc_want_daemon_pages = 1;
        collect_pages(&pc);
        list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
                char *p, *file, *fn;
        collect_pages(&pc);
        list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
                char *p, *file, *fn;
-               struct page *page;
 
                __LASSERT_TAGE_INVARIANT(tage);
 
 
                __LASSERT_TAGE_INVARIANT(tage);
 
@@ -795,16 +742,45 @@ void cfs_trace_debug_print(void)
                list_del(&tage->linkage);
                cfs_tage_free(tage);
        }
                list_del(&tage->linkage);
                cfs_tage_free(tage);
        }
+       down_write(&cfs_tracefile_sem);
+       while ((page = list_first_entry_or_null(&daemon_pages,
+                                               struct page, lru)) != NULL) {
+               char *p, *file, *fn;
+
+               p = page_address(page);
+               while (p < ((char *)page_address(page) + page->private)) {
+                       struct ptldebug_header *hdr;
+                       int len;
+
+                       hdr = (void *)p;
+                       p += sizeof(*hdr);
+                       file = p;
+                       p += strlen(file) + 1;
+                       fn = p;
+                       p += strlen(fn) + 1;
+                       len = hdr->ph_len - (int)(p - (char *)hdr);
+
+                       cfs_print_to_console(hdr, D_EMERG, file, fn,
+                                            "%.*s", len, p);
+
+                       p += len;
+               }
+               list_del_init(&page->lru);
+               daemon_pages_count -= 1;
+               put_page(page);
+       }
+       up_write(&cfs_tracefile_sem);
 }
 #endif /* LNET_DUMP_ON_PANIC */
 
 int cfs_tracefile_dump_all_pages(char *filename)
 {
 }
 #endif /* LNET_DUMP_ON_PANIC */
 
 int cfs_tracefile_dump_all_pages(char *filename)
 {
-       struct page_collection  pc;
-       struct file             *filp;
-       struct cfs_trace_page   *tage;
-       struct cfs_trace_page   *tmp;
-       char                    *buf;
+       struct page_collection pc;
+       struct file *filp;
+       struct cfs_trace_page *tage;
+       struct cfs_trace_page *tmp;
+       char *buf;
+       struct page *page;
        int rc;
 
        down_write(&cfs_tracefile_sem);
        int rc;
 
        down_write(&cfs_tracefile_sem);
@@ -814,16 +790,15 @@ int cfs_tracefile_dump_all_pages(char *filename)
                rc = PTR_ERR(filp);
                filp = NULL;
                pr_err("LustreError: can't open %s for dump: rc = %d\n",
                rc = PTR_ERR(filp);
                filp = NULL;
                pr_err("LustreError: can't open %s for dump: rc = %d\n",
-                     filename, rc);
+                      filename, rc);
                goto out;
        }
 
                goto out;
        }
 
-        pc.pc_want_daemon_pages = 1;
-        collect_pages(&pc);
+       collect_pages(&pc);
        if (list_empty(&pc.pc_pages)) {
        if (list_empty(&pc.pc_pages)) {
-                rc = 0;
-                goto close;
-        }
+               rc = 0;
+               goto close;
+       }
 
        /* ok, for now, just write the pages.  in the future we'll be building
         * iobufs with the pages and calling generic_direct_IO */
 
        /* ok, for now, just write the pages.  in the future we'll be building
         * iobufs with the pages and calling generic_direct_IO */
@@ -842,9 +817,21 @@ int cfs_tracefile_dump_all_pages(char *filename)
                        break;
                }
                list_del(&tage->linkage);
                        break;
                }
                list_del(&tage->linkage);
-                cfs_tage_free(tage);
-        }
-
+               cfs_tage_free(tage);
+       }
+       while ((page = list_first_entry_or_null(&daemon_pages,
+                                               struct page, lru)) != NULL) {
+               buf = page_address(page);
+               rc = cfs_kernel_write(filp, buf, page->private, &filp->f_pos);
+               if (rc != (int)page->private) {
+                       pr_warn("Lustre: wanted to write %u but wrote %d\n",
+                               (int)page->private, rc);
+                       break;
+               }
+               list_del(&page->lru);
+               daemon_pages_count -= 1;
+               put_page(page);
+       }
        rc = vfs_fsync_range(filp, 0, LLONG_MAX, 1);
        if (rc)
                pr_err("LustreError: sync returns: rc = %d\n", rc);
        rc = vfs_fsync_range(filp, 0, LLONG_MAX, 1);
        if (rc)
                pr_err("LustreError: sync returns: rc = %d\n", rc);
@@ -859,8 +846,8 @@ void cfs_trace_flush_pages(void)
 {
        struct page_collection pc;
        struct cfs_trace_page *tage;
 {
        struct page_collection pc;
        struct cfs_trace_page *tage;
+       struct page *page;
 
 
-       pc.pc_want_daemon_pages = 1;
        collect_pages(&pc);
        while (!list_empty(&pc.pc_pages)) {
                tage = list_first_entry(&pc.pc_pages,
        collect_pages(&pc);
        while (!list_empty(&pc.pc_pages)) {
                tage = list_first_entry(&pc.pc_pages,
@@ -870,6 +857,15 @@ void cfs_trace_flush_pages(void)
                list_del(&tage->linkage);
                cfs_tage_free(tage);
        }
                list_del(&tage->linkage);
                cfs_tage_free(tage);
        }
+
+       down_write(&cfs_tracefile_sem);
+       while ((page = list_first_entry_or_null(&daemon_pages,
+                                               struct page, lru)) != NULL) {
+               list_del(&page->lru);
+               daemon_pages_count -= 1;
+               put_page(page);
+       }
+       up_write(&cfs_tracefile_sem);
 }
 
 int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
 }
 
 int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
@@ -1000,6 +996,7 @@ int cfs_trace_set_debug_mb(int mb)
        cfs_tcd_for_each(tcd, i, j)
                tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
 
        cfs_tcd_for_each(tcd, i, j)
                tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
 
+       daemon_pages_max = pages;
        up_write(&cfs_tracefile_sem);
 
        return mb;
        up_write(&cfs_tracefile_sem);
 
        return mb;
@@ -1032,9 +1029,9 @@ static int tracefiled(void *arg)
        int last_loop = 0;
        int rc;
 
        int last_loop = 0;
        int rc;
 
-       pc.pc_want_daemon_pages = 0;
-
        while (!last_loop) {
        while (!last_loop) {
+               LIST_HEAD(for_daemon_pages);
+               int for_daemon_pages_count = 0;
                schedule_timeout_interruptible(cfs_time_seconds(1));
                if (kthread_should_stop())
                        last_loop = 1;
                schedule_timeout_interruptible(cfs_time_seconds(1));
                if (kthread_should_stop())
                        last_loop = 1;
@@ -1056,37 +1053,55 @@ static int tracefiled(void *arg)
                        }
                }
                up_read(&cfs_tracefile_sem);
                        }
                }
                up_read(&cfs_tracefile_sem);
-               if (filp == NULL) {
-                       put_pages_on_daemon_list(&pc);
-                       __LASSERT(list_empty(&pc.pc_pages));
-                       continue;
-               }
 
                list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
 
                list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
-                       struct dentry *de = file_dentry(filp);
-                       static loff_t f_pos;
-
                        __LASSERT_TAGE_INVARIANT(tage);
 
                        __LASSERT_TAGE_INVARIANT(tage);
 
-                       if (f_pos >= (off_t)cfs_tracefile_size)
-                               f_pos = 0;
-                       else if (f_pos > i_size_read(de->d_inode))
-                               f_pos = i_size_read(de->d_inode);
-
-                       buf = kmap(tage->page);
-                       rc = cfs_kernel_write(filp, buf, tage->used, &f_pos);
-                       kunmap(tage->page);
-                       if (rc != (int)tage->used) {
-                               pr_warn("Lustre: wanted to write %u but wrote %d\n",
-                                       tage->used, rc);
-                               put_pages_back(&pc);
-                               __LASSERT(list_empty(&pc.pc_pages));
-                               break;
+                       if (filp) {
+                               struct dentry *de = file_dentry(filp);
+                               static loff_t f_pos;
+
+                               if (f_pos >= (off_t)cfs_tracefile_size)
+                                       f_pos = 0;
+                               else if (f_pos > i_size_read(de->d_inode))
+                                       f_pos = i_size_read(de->d_inode);
+
+                               buf = kmap(tage->page);
+                               rc = cfs_kernel_write(filp, buf, tage->used,
+                                                     &f_pos);
+                               kunmap(tage->page);
+                               if (rc != (int)tage->used) {
+                                       pr_warn("Lustre: wanted to write %u but wrote %d\n",
+                                               tage->used, rc);
+                                       put_pages_back(&pc);
+                                       __LASSERT(list_empty(&pc.pc_pages));
+                                       break;
+                               }
                        }
                        }
+                       list_del_init(&tage->linkage);
+                       list_add_tail(&tage->page->lru, &for_daemon_pages);
+                       for_daemon_pages_count += 1;
+
+                       tage->page->private = (int)tage->used;
+                       kfree(tage);
+                       atomic_dec(&cfs_tage_allocated);
                }
 
                }
 
-               filp_close(filp, NULL);
-               put_pages_on_daemon_list(&pc);
+               if (filp)
+                       filp_close(filp, NULL);
+
+               down_write(&cfs_tracefile_sem);
+               list_splice_tail(&for_daemon_pages, &daemon_pages);
+               daemon_pages_count += for_daemon_pages_count;
+               while (daemon_pages_count > daemon_pages_max) {
+                       struct page *p = list_first_entry(&daemon_pages,
+                                                         struct page, lru);
+                       list_del(&p->lru);
+                       put_page(p);
+                       daemon_pages_count -= 1;
+               }
+               up_write(&cfs_tracefile_sem);
+
                if (!list_empty(&pc.pc_pages)) {
                        int i;
 
                if (!list_empty(&pc.pc_pages)) {
                        int i;
 
@@ -1178,14 +1193,13 @@ int cfs_tracefile_init(int max_pages)
 
                INIT_LIST_HEAD(&tcd->tcd_pages);
                INIT_LIST_HEAD(&tcd->tcd_stock_pages);
 
                INIT_LIST_HEAD(&tcd->tcd_pages);
                INIT_LIST_HEAD(&tcd->tcd_stock_pages);
-               INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
                tcd->tcd_cur_pages = 0;
                tcd->tcd_cur_stock_pages = 0;
                tcd->tcd_cur_pages = 0;
                tcd->tcd_cur_stock_pages = 0;
-               tcd->tcd_cur_daemon_pages = 0;
                tcd->tcd_max_pages = (max_pages * factor) / 100;
                LASSERT(tcd->tcd_max_pages > 0);
                tcd->tcd_shutting_down = 0;
        }
                tcd->tcd_max_pages = (max_pages * factor) / 100;
                LASSERT(tcd->tcd_max_pages > 0);
                tcd->tcd_shutting_down = 0;
        }
+       daemon_pages_max = max_pages;
 
        return 0;
 
 
        return 0;
 
@@ -1242,6 +1256,7 @@ static void cfs_trace_cleanup(void)
 
 void cfs_tracefile_exit(void)
 {
 
 void cfs_tracefile_exit(void)
 {
-        cfs_trace_stop_thread();
-        cfs_trace_cleanup();
+       cfs_trace_stop_thread();
+       cfs_trace_flush_pages();
+       cfs_trace_cleanup();
 }
 }
index 9ced175..406a8d5 100644 (file)
@@ -87,22 +87,7 @@ union cfs_trace_data_union {
                unsigned long           tcd_cur_pages;
 
                /*
                unsigned long           tcd_cur_pages;
 
                /*
-                * pages with trace records already processed by
-                * tracefiled. These pages are kept in memory, so that some
-                * portion of log can be written in the event of LBUG. This
-                * list is maintained in LRU order.
-                *
-                * Pages are moved to ->tcd_daemon_pages by tracefiled()
-                * (put_pages_on_daemon_list()). LRU pages from this list are
-                * discarded when list grows too large.
-                */
-               struct list_head        tcd_daemon_pages;
-               /* number of pages on ->tcd_daemon_pages */
-               unsigned long           tcd_cur_daemon_pages;
-
-               /*
-                * Maximal number of pages allowed on ->tcd_pages and
-                * ->tcd_daemon_pages each.
+                * Maximal number of pages allowed on ->tcd_pages
                 * Always TCD_MAX_PAGES * tcd_pages_factor / 100 in current
                 * implementation.
                 */
                 * Always TCD_MAX_PAGES * tcd_pages_factor / 100 in current
                 * implementation.
                 */
@@ -147,12 +132,6 @@ union cfs_trace_data_union {
  * be moved there */
 struct page_collection {
        struct list_head        pc_pages;
  * be moved there */
 struct page_collection {
        struct list_head        pc_pages;
-       /*
-        * if this flag is set, collect_pages() will spill both
-        * ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise,
-        * only ->tcd_pages are spilled.
-        */
-       int                     pc_want_daemon_pages;
 };
 
 /*
 };
 
 /*