Whamcloud - gitweb
Branch b_release_1_8_1
[fs/lustre-release.git] / libcfs / libcfs / tracefile.c
index 5b9543e..af9250d 100644 (file)
@@ -150,8 +150,9 @@ static struct trace_page *trace_get_tage_try(struct trace_cpu_data *tcd,
                 } else {
                         tage = tage_alloc(CFS_ALLOC_ATOMIC);
                         if (tage == NULL) {
-                                printk(KERN_WARNING
-                                       "failure to allocate a tage (%ld)\n",
+                                if (printk_ratelimit())
+                                        printk(KERN_WARNING
+                                               "cannot allocate a tage (%ld)\n",
                                        tcd->tcd_cur_pages);
                                 return NULL;
                         }
@@ -182,10 +183,10 @@ static void tcd_shrink(struct trace_cpu_data *tcd)
         struct trace_page *tage;
         struct trace_page *tmp;
 
-       /*
-        * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
-        * from here: this will lead to infinite recursion.
-        */
+        /*
+         * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+         * from here: this will lead to infinite recursion.
+         */
 
         if (printk_ratelimit())
                 printk(KERN_WARNING "debug daemon buffer overflowed; "
@@ -195,7 +196,7 @@ static void tcd_shrink(struct trace_cpu_data *tcd)
         CFS_INIT_LIST_HEAD(&pc.pc_pages);
         spin_lock_init(&pc.pc_lock);
 
-        cfs_list_for_each_entry_safe_typed(tage, tmp, &tcd->tcd_pages, 
+        cfs_list_for_each_entry_safe_typed(tage, tmp, &tcd->tcd_pages,
                                            struct trace_page, linkage) {
                 if (pgcount-- == 0)
                         break;
@@ -212,10 +213,10 @@ static struct trace_page *trace_get_tage(struct trace_cpu_data *tcd,
 {
         struct trace_page *tage;
 
-       /*
-        * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
-        * from here: this will lead to infinite recursion.
-        */
+        /*
+         * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+         * from here: this will lead to infinite recursion.
+         */
 
         if (len > CFS_PAGE_SIZE) {
                 printk(KERN_ERR
@@ -315,7 +316,7 @@ int libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls, int subsys, int mask,
                 }
 
                 if (format2) {
-                       remain = max_nob - needed;
+                        remain = max_nob - needed;
                         if (remain < 0)
                                 remain = 0;
 
@@ -450,6 +451,7 @@ libcfs_assertion_failed(const char *expr, const char *file,
 {
         libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line,
                          "ASSERTION(%s) failed\n", expr);
+        /* cfs_enter_debugger(); */
         lbug_with_loc(file, func, line);
 }
 EXPORT_SYMBOL(libcfs_assertion_failed);
@@ -497,19 +499,21 @@ panic_collect_pages(struct page_collection *pc)
         }
 }
 
-static void collect_pages_on_cpu(void *info)
+static void collect_pages_on_all_cpus(struct page_collection *pc)
 {
         struct trace_cpu_data *tcd;
-        struct page_collection *pc = info;
-        int i;
+        int i, cpu;
 
         spin_lock(&pc->pc_lock);
-        tcd_for_each_type_lock(tcd, i) {
-                list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
-                tcd->tcd_cur_pages = 0;
-                if (pc->pc_want_daemon_pages) {
-                        list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages);
-                        tcd->tcd_cur_daemon_pages = 0;
+        for_each_possible_cpu(cpu) {
+                tcd_for_each_type_lock(tcd, i, cpu) {
+                        list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
+                        tcd->tcd_cur_pages = 0;
+                        if (pc->pc_want_daemon_pages) {
+                                list_splice_init(&tcd->tcd_daemon_pages,
+                                                 &pc->pc_pages);
+                                tcd->tcd_cur_daemon_pages = 0;
+                        }
                 }
         }
         spin_unlock(&pc->pc_lock);
@@ -522,32 +526,35 @@ static void collect_pages(struct page_collection *pc)
         if (libcfs_panic_in_progress)
                 panic_collect_pages(pc);
         else
-                trace_call_on_all_cpus(collect_pages_on_cpu, pc);
+                collect_pages_on_all_cpus(pc);
 }
 
-static void put_pages_back_on_cpu(void *info)
+static void put_pages_back_on_all_cpus(struct page_collection *pc)
 {
-        struct page_collection *pc = info;
         struct trace_cpu_data *tcd;
         struct list_head *cur_head;
         struct trace_page *tage;
         struct trace_page *tmp;
-        int i;
+        int i, cpu;
 
         spin_lock(&pc->pc_lock);
-        tcd_for_each_type_lock(tcd, i) {
-                cur_head = tcd->tcd_pages.next;
+        for_each_possible_cpu(cpu) {
+                tcd_for_each_type_lock(tcd, i, cpu) {
+                        cur_head = tcd->tcd_pages.next;
 
-                cfs_list_for_each_entry_safe_typed(tage, tmp, &pc->pc_pages,
-                                                   struct trace_page, linkage) {
+                        cfs_list_for_each_entry_safe_typed(tage, tmp,
+                                                           &pc->pc_pages,
+                                                           struct trace_page,
+                                                           linkage) {
 
-                        __LASSERT_TAGE_INVARIANT(tage);
+                                __LASSERT_TAGE_INVARIANT(tage);
 
-                        if (tage->cpu != smp_processor_id() || tage->type != i)
-                                continue;
+                                if (tage->cpu != cpu || tage->type != i)
+                                        continue;
 
-                        tage_to_tail(tage, cur_head);
-                        tcd->tcd_cur_pages++;
+                                tage_to_tail(tage, cur_head);
+                                tcd->tcd_cur_pages++;
+                        }
                 }
         }
         spin_unlock(&pc->pc_lock);
@@ -556,7 +563,7 @@ static void put_pages_back_on_cpu(void *info)
 static void put_pages_back(struct page_collection *pc)
 {
         if (!libcfs_panic_in_progress)
-                trace_call_on_all_cpus(put_pages_back_on_cpu, pc);
+                put_pages_back_on_all_cpus(pc);
 }
 
 /* Add pages to a per-cpu debug daemon ringbuffer.  This buffer makes sure that
@@ -575,8 +582,7 @@ static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
 
                 __LASSERT_TAGE_INVARIANT(tage);
 
-                if (tage->cpu != smp_processor_id() ||
-                    tage->type != tcd->tcd_type)
+                if (tage->cpu != tcd->tcd_cpu || tage->type != tcd->tcd_type)
                         continue;
 
                 tage_to_tail(tage, &tcd->tcd_daemon_pages);
@@ -598,18 +604,15 @@ static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
         spin_unlock(&pc->pc_lock);
 }
 
-static void put_pages_on_daemon_list_on_cpu(void *info)
+static void put_pages_on_daemon_list(struct page_collection *pc)
 {
         struct trace_cpu_data *tcd;
-        int i;
-
-        tcd_for_each_type_lock(tcd, i)
-                put_pages_on_tcd_daemon_list(info, tcd);
-}
+        int i, cpu;
 
-static void put_pages_on_daemon_list(struct page_collection *pc)
-{
-        trace_call_on_all_cpus(put_pages_on_daemon_list_on_cpu, pc);
+        for_each_possible_cpu(cpu) {
+                tcd_for_each_type_lock(tcd, i, cpu)
+                        put_pages_on_tcd_daemon_list(pc, tcd);
+        }
 }
 
 void trace_debug_print(void)
@@ -830,20 +833,22 @@ int trace_daemon_command(char *str)
 {
         int       rc = 0;
 
-       tracefile_write_lock();
+        tracefile_write_lock();
 
-       if (strcmp(str, "stop") == 0) {
-               trace_stop_thread();
+        if (strcmp(str, "stop") == 0) {
+                tracefile_write_unlock();
+                trace_stop_thread();
+                tracefile_write_lock();
                 memset(tracefile, 0, sizeof(tracefile));
 
-       } else if (strncmp(str, "size=", 5) == 0) {
-               tracefile_size = simple_strtoul(str + 5, NULL, 0);
-               if (tracefile_size < 10 || tracefile_size > 20480)
-                       tracefile_size = TRACEFILE_SIZE;
-               else
-                       tracefile_size <<= 20;
+        } else if (strncmp(str, "size=", 5) == 0) {
+                tracefile_size = simple_strtoul(str + 5, NULL, 0);
+                if (tracefile_size < 10 || tracefile_size > 20480)
+                        tracefile_size = TRACEFILE_SIZE;
+                else
+                        tracefile_size <<= 20;
 
-       } else if (strlen(str) >= sizeof(tracefile)) {
+        } else if (strlen(str) >= sizeof(tracefile)) {
                 rc = -ENAMETOOLONG;
 #ifndef __WINNT__
         } else if (str[0] != '/') {
@@ -859,14 +864,14 @@ int trace_daemon_command(char *str)
                 trace_start_thread();
         }
 
-       tracefile_write_unlock();
-       return rc;
+        tracefile_write_unlock();
+        return rc;
 }
 
 int trace_daemon_command_usrstr(void *usr_str, int usr_str_nob)
 {
-       char *str;
-       int   rc;
+        char *str;
+        int   rc;
 
         rc = trace_allocate_string_buffer(&str, usr_str_nob + 1);
         if (rc != 0)
@@ -878,54 +883,54 @@ int trace_daemon_command_usrstr(void *usr_str, int usr_str_nob)
                 rc = trace_daemon_command(str);
 
         trace_free_string_buffer(str, usr_str_nob + 1);
-       return rc;
+        return rc;
 }
 
 int trace_set_debug_mb(int mb)
 {
-       int i;
+        int i;
         int j;
         int pages;
         int limit = trace_max_debug_mb();
         struct trace_cpu_data *tcd;
 
-       if (mb < num_possible_cpus())
-               return -EINVAL;
+        if (mb < num_possible_cpus())
+                return -EINVAL;
 
-       if (mb > limit) {
-               printk(KERN_ERR "Lustre: Refusing to set debug buffer size to "
-                      "%dMB - limit is %d\n", mb, limit);
-               return -EINVAL;
-       }
+        if (mb > limit) {
+                printk(KERN_ERR "Lustre: Refusing to set debug buffer size to "
+                       "%dMB - limit is %d\n", mb, limit);
+                return -EINVAL;
+        }
 
-       mb /= num_possible_cpus();
+        mb /= num_possible_cpus();
         pages = mb << (20 - CFS_PAGE_SHIFT);
 
         tracefile_write_lock();
 
         tcd_for_each(tcd, i, j)
-               tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
+                tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
 
         tracefile_write_unlock();
 
-       return 0;
+        return 0;
 }
 
 int trace_set_debug_mb_usrstr(void *usr_str, int usr_str_nob)
 {
-       char     str[32];
+        char     str[32];
         int      rc;
 
         rc = trace_copyin_string(str, sizeof(str), usr_str, usr_str_nob);
         if (rc < 0)
                 return rc;
 
-       return trace_set_debug_mb(simple_strtoul(str, NULL, 0));
+        return trace_set_debug_mb(simple_strtoul(str, NULL, 0));
 }
 
 int trace_get_debug_mb(void)
 {
-       int i;
+        int i;
         int j;
         struct trace_cpu_data *tcd;
         int total_pages = 0;
@@ -948,6 +953,7 @@ static int tracefiled(void *arg)
         struct trace_page *tmp;
         struct ptldebug_header *hdr;
         cfs_file_t *filp;
+        int last_loop = 0;
         int rc;
 
         CFS_DECL_MMSPACE;
@@ -962,20 +968,10 @@ static int tracefiled(void *arg)
         while (1) {
                 cfs_waitlink_t __wait;
 
-                cfs_waitlink_init(&__wait);
-                cfs_waitq_add(&tctl->tctl_waitq, &__wait);
-                set_current_state(TASK_INTERRUPTIBLE);
-                cfs_waitq_timedwait(&__wait, CFS_TASK_INTERRUPTIBLE,
-                                    cfs_time_seconds(1));
-                cfs_waitq_del(&tctl->tctl_waitq, &__wait);
-
-                if (atomic_read(&tctl->tctl_shutdown))
-                        break;
-
                 pc.pc_want_daemon_pages = 0;
                 collect_pages(&pc);
                 if (list_empty(&pc.pc_pages))
-                        continue;
+                        goto end_loop;
 
                 filp = NULL;
                 tracefile_read_lock();
@@ -991,7 +987,7 @@ static int tracefiled(void *arg)
                 if (filp == NULL) {
                         put_pages_on_daemon_list(&pc);
                         __LASSERT(list_empty(&pc.pc_pages));
-                        continue;
+                        goto end_loop;
                 }
 
                 CFS_MMSPACE_OPEN;
@@ -1027,7 +1023,41 @@ static int tracefiled(void *arg)
 
                 cfs_filp_close(filp);
                 put_pages_on_daemon_list(&pc);
+                if (!list_empty(&pc.pc_pages)) {
+                        int i;
+
+                        printk(KERN_ALERT "Lustre: trace pages aren't empty\n");
+                        printk(KERN_ERR "total cpus(%d): ", num_possible_cpus());
+                        for (i = 0; i < num_possible_cpus(); i++)
+                                if (cpu_online(i))
+                                        printk(KERN_ERR "%d(on) ", i);
+                                else
+                                        printk(KERN_ERR "%d(off) ", i);
+                        printk(KERN_ERR "\n");
+
+                        i = 0;
+                        list_for_each_entry_safe(tage, tmp, &pc.pc_pages,
+                                                 linkage)
+                                printk(KERN_ERR "page %d belongs to cpu %d\n",
+                                       ++i, tage->cpu);
+                        printk(KERN_ERR "There are %d pages unwritten\n", i);
+                }
                 __LASSERT(list_empty(&pc.pc_pages));
+end_loop:
+                if (atomic_read(&tctl->tctl_shutdown)) {
+                        if (last_loop == 0) {
+                                last_loop = 1;
+                                continue;
+                        } else {
+                                break;
+                        }
+                }
+                cfs_waitlink_init(&__wait);
+                cfs_waitq_add(&tctl->tctl_waitq, &__wait);
+                set_current_state(TASK_INTERRUPTIBLE);
+                cfs_waitq_timedwait(&__wait, CFS_TASK_INTERRUPTIBLE,
+                                    cfs_time_seconds(1));
+                cfs_waitq_del(&tctl->tctl_waitq, &__wait);
         }
         complete(&tctl->tctl_stop);
         return 0;
@@ -1102,24 +1132,29 @@ int tracefile_init(int max_pages)
         return 0;
 }
 
-static void trace_cleanup_on_cpu(void *info)
+static void trace_cleanup_on_all_cpus(void)
 {
         struct trace_cpu_data *tcd;
         struct trace_page *tage;
         struct trace_page *tmp;
-        int i;
+        int i, cpu;
 
-        tcd_for_each_type_lock(tcd, i) {
-                tcd->tcd_shutting_down = 1;
+        for_each_possible_cpu(cpu) {
+                tcd_for_each_type_lock(tcd, i, cpu) {
+                        tcd->tcd_shutting_down = 1;
 
-                cfs_list_for_each_entry_safe_typed(tage, tmp, &tcd->tcd_pages,
-                                                   struct trace_page, linkage) {
-                        __LASSERT_TAGE_INVARIANT(tage);
+                        cfs_list_for_each_entry_safe_typed(tage, tmp,
+                                                           &tcd->tcd_pages,
+                                                           struct trace_page,
+                                                           linkage) {
+                                __LASSERT_TAGE_INVARIANT(tage);
+
+                                list_del(&tage->linkage);
+                                tage_free(tage);
+                        }
 
-                        list_del(&tage->linkage);
-                        tage_free(tage);
+                        tcd->tcd_cur_pages = 0;
                 }
-                tcd->tcd_cur_pages = 0;
         }
 }
 
@@ -1130,7 +1165,7 @@ static void trace_cleanup(void)
         CFS_INIT_LIST_HEAD(&pc.pc_pages);
         spin_lock_init(&pc.pc_lock);
 
-        trace_call_on_all_cpus(trace_cleanup_on_cpu, &pc);
+        trace_cleanup_on_all_cpus();
 
         tracefile_fini_arch();
 }