Whamcloud - gitweb
b=15977 analyse only consistent part of the log
authorVitaly Fertman <Vitaly.Fertman@Sun.COM>
Thu, 11 Feb 2010 20:39:25 +0000 (12:39 -0800)
committerRobert Read <rread@sun.com>
Thu, 11 Feb 2010 20:39:25 +0000 (12:39 -0800)
due to debug log buffer overflow, old messages are lost;
as each CPU has its own buffers which overflows at different time,
it may happen that FREE is lost for still existing ALLOC;
make leak_finder to analyze the log since the oldest overflow time.

i=rread
i=green

libcfs/include/libcfs/libcfs_debug.h
libcfs/libcfs/darwin/darwin-tracefile.c
libcfs/libcfs/linux/linux-tracefile.c
libcfs/libcfs/linux/linux-tracefile.h
libcfs/libcfs/tracefile.c
libcfs/libcfs/winnt/winnt-tracefile.c
lnet/utils/debug.c
lustre/tests/conf-sanity.sh
lustre/tests/leak_finder.pl

index 1cc8844..5380c61 100644 (file)
@@ -65,6 +65,16 @@ extern unsigned int libcfs_catastrophe;
 extern unsigned int libcfs_panic_on_lbug;
 
 /**
+ * three types of trace_data in linux
+ */
+typedef enum {
+       CFS_TCD_TYPE_PROC = 0,
+       CFS_TCD_TYPE_SOFTIRQ,
+       CFS_TCD_TYPE_IRQ,
+       CFS_TCD_TYPE_MAX
+} cfs_trace_buf_type_t;
+
+/**
  * Format for debug message headers
  */
 struct ptldebug_header {
@@ -72,7 +82,8 @@ struct ptldebug_header {
         __u32 ph_flags;
         __u32 ph_subsys;
         __u32 ph_mask;
-        __u32 ph_cpu_id;
+        __u16 ph_cpu_id;
+        __u16 ph_type;
         __u32 ph_sec;
         __u64 ph_usec;
         __u32 ph_stack;
index 74eb1e3..7ab943f 100644 (file)
@@ -173,7 +173,8 @@ set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask,
        do_gettimeofday(&tv);
        header->ph_subsys = subsys;
        header->ph_mask = mask;
-       header->ph_cpu_id = smp_processor_id();
+       header->ph_cpu_id = cfs_smp_processor_id();
+       header->ph_type = 0;
        header->ph_sec = (__u32)tv.tv_sec;
        header->ph_usec = tv.tv_usec;
        header->ph_stack = stack;
index fd6e6b9..3028269 100644 (file)
@@ -190,7 +190,8 @@ cfs_set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask,
 
        header->ph_subsys = subsys;
        header->ph_mask = mask;
-       header->ph_cpu_id = smp_processor_id();
+       header->ph_cpu_id = cfs_smp_processor_id();
+       header->ph_type = cfs_trace_buf_idx_get();
        header->ph_sec = (__u32)tv.tv_sec;
        header->ph_usec = tv.tv_usec;
        header->ph_stack = stack;
index b910074..8ad632c 100644 (file)
 #ifndef __LIBCFS_LINUX_TRACEFILE_H__
 #define __LIBCFS_LINUX_TRACEFILE_H__
 
-/**
- * three types of trace_data in linux
- */
-typedef enum {
-        CFS_TCD_TYPE_PROC = 0,
-        CFS_TCD_TYPE_SOFTIRQ,
-        CFS_TCD_TYPE_IRQ,
-        CFS_TCD_TYPE_MAX
-} cfs_trace_buf_type_t;
-
 #endif
index 5a29dfa..63a23ef 100644 (file)
@@ -245,7 +245,7 @@ int libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls, int subsys, int mask,
                        const char *format2, ...)
 {
         struct cfs_trace_cpu_data *tcd = NULL;
-        struct ptldebug_header     header;
+        struct ptldebug_header     header = {0};
         struct cfs_trace_page     *tage;
         /* string_buf is used only if tcd != NULL, and is always set then */
         char                      *string_buf = NULL;
@@ -268,6 +268,9 @@ int libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls, int subsys, int mask,
         if (tcd == NULL)                /* arch may not log in IRQ context */
                 goto console;
 
+        if (tcd->tcd_cur_pages == 0)
+                header.ph_flags |= PH_FLAG_FIRST_RECORD;
+
         if (tcd->tcd_shutting_down) {
                 cfs_trace_put_tcd(tcd);
                 tcd = NULL;
@@ -961,7 +964,6 @@ static int tracefiled(void *arg)
         struct tracefiled_ctl *tctl = arg;
         struct cfs_trace_page *tage;
         struct cfs_trace_page *tmp;
-        struct ptldebug_header *hdr;
         cfs_file_t *filp;
         int last_loop = 0;
         int rc;
@@ -1002,13 +1004,6 @@ static int tracefiled(void *arg)
 
                 CFS_MMSPACE_OPEN;
 
-                /* mark the first header, so we can sort in chunks */
-                tage = cfs_tage_from_list(pc.pc_pages.next);
-                __LASSERT_TAGE_INVARIANT(tage);
-
-                hdr = cfs_page_address(tage->page);
-                hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
-
                 cfs_list_for_each_entry_safe_typed(tage, tmp, &pc.pc_pages,
                                                    struct cfs_trace_page,
                                                    linkage) {
index 6e63f49..6a99d7b 100644 (file)
@@ -176,6 +176,7 @@ cfs_set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask,
        header->ph_subsys = subsys;
        header->ph_mask = mask;
        header->ph_cpu_id = cfs_smp_processor_id();
+       header->ph_type = cfs_trace_buf_idx_get();
        header->ph_sec = (__u32)tv.tv_sec;
        header->ph_usec = tv.tv_usec;
        header->ph_stack = stack;
index 91200c6..ca17a3d 100644 (file)
@@ -367,8 +367,10 @@ static void print_rec(struct dbg_line ***linevp, int used, int fdout)
                 int bytes;
                 ssize_t bytes_written;
 
-                bytes = sprintf(out, "%08x:%08x:%u:%u.%06llu:%u:%u:%u:(%s:%u:%s()) %s",
-                                hdr->ph_subsys, hdr->ph_mask, hdr->ph_cpu_id,
+                bytes = sprintf(out, "%08x:%08x:%u.%u%s:%u.%06llu:%u:%u:%u:(%s:%u:%s()) %s",
+                                hdr->ph_subsys, hdr->ph_mask,
+                                hdr->ph_cpu_id, hdr->ph_type,
+                                hdr->ph_flags & PH_FLAG_FIRST_RECORD ? "F" : "",
                                 hdr->ph_sec, (unsigned long long)hdr->ph_usec,
                                 hdr->ph_stack, hdr->ph_pid, hdr->ph_extern_pid,
                                 line->file, hdr->ph_line_num, line->fn, line->text);
@@ -415,6 +417,7 @@ static void dump_hdr(unsigned long long offset, struct ptldebug_header *hdr)
         fprintf(stderr, "  subsystem = %x\n", hdr->ph_subsys);
         fprintf(stderr, "  mask = %x\n", hdr->ph_mask);
         fprintf(stderr, "  cpu_id = %u\n", hdr->ph_cpu_id);
+        fprintf(stderr, "  type = %u\n", hdr->ph_type);
         fprintf(stderr, "  seconds = %u\n", hdr->ph_sec);
         fprintf(stderr, "  microseconds = %lu\n", (long)hdr->ph_usec);
         fprintf(stderr, "  stack = %u\n", hdr->ph_stack);
@@ -454,7 +457,7 @@ static int parse_buffer(int fdin, int fdout)
                         goto readhdr;
                 
                 if (hdr->ph_len > 4094 ||       /* is this header bogus? */
-                    hdr->ph_cpu_id > 65536 ||
+                    hdr->ph_type >= CFS_TCD_TYPE_MAX ||
                     hdr->ph_stack > 65536 ||
                     hdr->ph_sec < (1 << 30) ||
                     hdr->ph_usec > 1000000000 ||
index a4badd6..199c73c 100644 (file)
@@ -13,7 +13,7 @@ ONLY=${ONLY:-"$*"}
 
 # bug number for skipped test:
 #               15977
-ALWAYS_EXCEPT=" 39    $CONF_SANITY_EXCEPT"
+ALWAYS_EXCEPT="$CONF_SANITY_EXCEPT"
 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
 
 SRCDIR=`dirname $0`
index 45d2211..a7c7abb 100644 (file)
@@ -11,10 +11,48 @@ my $debug_line = 0;
 my $total = 0;
 my $max = 0;
 
-while ($line = <>) {
+my @parsed;
+my %cpu;
+my $start_time = 0;
+
+if (!defined($ARGV[0])) {
+        print "No log file specified\n";
+        exit
+}
+
+open(INFILE, $ARGV[0]);
+while ($line = <INFILE>) {
+    if ($line =~ m/^(.*)\((.*):(\d+):(.*)\(\)\)/) {
+        @parsed = split(":", $1);
+        if (substr ($parsed[2], -1, 1) eq "F") {
+            chop $parsed[2];
+            $cpu{$parsed[2]} = 0;
+        } else {
+            if (!defined($cpu{$parsed[2]})) {
+                $cpu{$parsed[2]} = $parsed[3];
+            }
+        }
+    }
+}
+
+foreach $time (values %cpu) {
+    if ($start_time < $time) {
+        $start_time = $time;
+    }
+}
+
+print "Starting analysis since $start_time\n";
+
+seek(INFILE, 0, 0);
+while ($line = <INFILE>) {
     $debug_line++;
     my ($file, $func, $lno, $name, $size, $addr, $type);
-    if ($line =~ m/^.*(\.).*\((.*):(\d+):(.*)\(\)\) (k|v|slab-)(.*) '(.*)': (\d+) at ([\da-f]+)/){
+    if ($line =~ m/^(.*)\((.*):(\d+):(.*)\(\)\) (k|v|slab-)(.*) '(.*)': (\d+) at ([\da-f]+)/){
+        @parsed = split(":", $1);
+        if ($parsed[3] <= $start_time) {
+                next;
+        }
+        
         $file = $2;
         $lno  = $3;
         $func = $4;
@@ -69,6 +107,7 @@ while ($line = <>) {
         $total -= $size;
     }
 }
+close(INFILE);
 
 # Sort leak output by allocation time
 my @sorted = sort {