Whamcloud - gitweb
LU-17253 tools: extract lustre logs from cores 47/52947/5
authorAlexey Lyashkov <alexey.lyashkov@hpe.com>
Mon, 23 Dec 2024 17:24:40 +0000 (12:24 -0500)
committerOleg Drokin <green@whamcloud.com>
Sat, 18 Jan 2025 22:03:02 +0000 (22:03 +0000)
Crash extension to extract lustre logs from crash dumps.

Test-Parameters: trivial
Signed-off-by: Alexey Lyashkov <alexey.lyashkov@hpe.com>
Change-Id: I20afb41b1cc90923fe68ccb06618ffacc1a8221c
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/52947
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Artem Blagodarenko <ablagodarenko@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
contrib/debug_tools/crash-mod/README [new file with mode: 0644]
contrib/debug_tools/crash-mod/lustre-ext.c [new file with mode: 0644]
contrib/debug_tools/crash-mod/mk.sh [new file with mode: 0755]

diff --git a/contrib/debug_tools/crash-mod/README b/contrib/debug_tools/crash-mod/README
new file mode 100644 (file)
index 0000000..832e2e9
--- /dev/null
@@ -0,0 +1,44 @@
+Special module to allow the crash utility to extract only the lustre
+debug information from a dump.
+
+Before building we need to setup the environment. For RHEL systems you
+need to install crash-devel so mk.sh will work. Also install the
+debuginfo kernel rpms as well as the lustre debuginfo rpms.
+
+Build as follows:
+
+  contrib/debug_tools/crash-mod/mk.sh
+
+  ...or if you prefer...
+
+  gcc -nostartfiles -shared -rdynamic -I/usr/include/crash -o lustre-ext.so lustre-ext.c -fPIC -D<arch>
+
+  Then copy lustre-ext.so to /usr/lib64/crash/extensions/
+
+Example usage:
+
+  # Launch crash against a crash dump or live kernel to extract
+  # and re-assemble the binary lustre debug log.
+  > crash <your kernel crash dump> <your vmlinux>
+    ...
+    crash> mod -S
+      ...
+    crash> extend lustre.so
+      lustre.so: shared object loaded
+    crash> lustre -l /tmp/foo.dl
+      lustre: Dumped 2560 debug pages from CPU 0
+      lustre: Dumped 2560 debug pages from CPU 1
+      lustre: Dumped 2560 debug pages from CPU 2
+      lustre: Dumped 2560 debug pages from CPU 3
+      lustre: Dumped 2560 debug pages from CPU 4
+      lustre: Dumped 2560 debug pages from CPU 5
+      lustre: Dumped 2560 debug pages from CPU 6
+      lustre: Dumped 2560 debug pages from CPU 7
+      lustre: Dumped 20480 total debug pages from 8 CPUs to /tmp/foo.dl
+    crash> quit
+
+  # Convert the binary debug log to a text debug log
+  > /usr/sbin/lctl df /tmp/foo.dl >/tmp/foo.dk
+
+  # View your debug log
+  > vim /tmp/foo.dk
diff --git a/contrib/debug_tools/crash-mod/lustre-ext.c b/contrib/debug_tools/crash-mod/lustre-ext.c
new file mode 100644 (file)
index 0000000..e3eb6c3
--- /dev/null
@@ -0,0 +1,431 @@
+// SPDX-License-Identifier: GPL-v2.0+
+/*
+ * Copyright (C) 2007, Lawrence Livermore National Labs
+ * Author: Brian Behlendorf
+ * Copyright (C) 2017-2023, Cray Inc.
+ * Author: Alexey Lyashkov
+ */
+
+#include "defs.h"
+
+#define UINT32_LEN   10                /* ceil(log10(UINT_MAX)) */
+#define UINT64_LEN   20                /* ceil(log10(ULONG_MAX)) */
+
+/* three types of trace_data in linux */
+enum {
+       TCD_TYPE_PROC = 0,
+       TCD_TYPE_SOFTIRQ,
+       TCD_TYPE_IRQ,
+       TCD_TYPE_MAX
+};
+
+struct cfs_trace_page {
+       /*
+        * page itself
+        */
+       struct page *page;
+       /*
+        * linkage into one of the lists in trace_data_union or
+        * page_collection
+        */
+       struct kernel_list_head linkage;
+       /*
+        * number of bytes used within this page
+        */
+       unsigned int used;
+       /*
+        * cpu that owns this page
+        */
+       unsigned short cpu;
+       /*
+        * type(context) of this page
+        */
+       unsigned short type;
+};
+
+#define LUSTRE_PAGES           1
+#define LUSTRE_DAEMON_PAGES    2
+#define LUSTRE_STOCK_PAGES     3
+
+#define FMT_PAGE_COUNT         "$%*d = %d"
+
+static const char fmt_page_list_head[] =
+    "$%*d = (struct list_head *) %lx";
+static const char lustre2_pfx[] = "cfs_";
+static const char *name_prefix = lustre2_pfx;
+
+void cmd_lustre(void);
+char *help_lustre[];
+int global_daemon_pages;
+/* not all crash tools have  OFFSET(page_private) */
+unsigned int pg_private_off;
+
+static struct command_table_entry command_table[] = {
+       { "lustre", cmd_lustre, help_lustre, 0 },
+       { NULL }
+};
+
+void _init(void)
+{
+       if (symbol_exists("daemon_pages"))
+               global_daemon_pages = 1;
+
+       pg_private_off = MEMBER_OFFSET("page", "private");
+
+       register_extension(command_table);
+}
+
+void _fini(void)
+{
+}
+
+/* Given a pointer to a page frame append the assoicated linear
+ * address range to the passed file descriptor.
+ */
+static int
+lustre_write_page_frame(int fd, ulong tp_addr, ulong kvaddr, int used)
+{
+       char buf[PAGESIZE()];
+       physaddr_t kpaddr;
+       int retry = 0;
+       ssize_t rc, count = 0;
+
+       if (!is_page_ptr(kvaddr, &kpaddr)) {
+               error(WARNING,
+                     "Skipping trace page %p which references an invalid page pointer (%#x)\n",
+                     tp_addr, kvaddr);
+               return -EADDRNOTAVAIL;
+       }
+
+       if (!readmem(kpaddr, PHYSADDR, buf, used,
+                    "trace page data", RETURN_ON_ERROR)) {
+               error(WARNING,
+                     "Skipping trace page %p, unable to read data from physical address (%#x)\n",
+                     tp_addr, kpaddr);
+               return -EIO;
+       }
+
+       while (count < used) {
+               rc = write(fd, buf + count, used - count);
+               if (rc >= 0) {
+                       count += rc;
+
+                       if ((rc == 0) && (retry++ > 5)) {
+                               error(WARNING,
+                                     "Partial trace page %#lx written to output file (%d/%d bytes)\n",
+                                     tp_addr, count, used);
+                               return count;
+                       }
+               } else {
+                       error(WARNING,
+                             "Unable to write to output file: %s (%d)\n",
+                             strerror(errno), errno);
+                       return -rc;
+               }
+       }
+
+       return count;
+}
+
+static int lustre_walk_trace_pages(int cpu, int fd, ulong lh_addr)
+{
+       static const char *name = "cfs_trace_page";
+       struct list_data ld;
+       int count, rc, i = 0, ret = 0;
+       struct cfs_trace_page buf;
+
+
+       BZERO(&ld, sizeof(struct list_data));
+       ld.end = lh_addr;
+       ld.list_head_offset = MEMBER_OFFSET((char *) name, "linkage");
+       ld.member_offset = ld.list_head_offset;
+
+       readmem(lh_addr + ld.member_offset, KVADDR, &ld.start,
+               sizeof(void *), "LIST_HEAD contents", FAULT_ON_ERROR);
+       ld.structname_args = 1;
+       ld.structname =
+           (char **) GETBUF(sizeof(char *) * ld.structname_args);
+       ld.structname[0] = (char *) name;
+       ld.flags |= LIST_ALLOCATE | LIST_HEAD_FORMAT | LIST_HEAD_POINTER;
+
+       hq_open();
+       count = do_list(&ld);
+       hq_close();
+
+       printf("%s(%d, %d, %#lx) = %d\n", __func__, cpu, fd, lh_addr, count);
+       /* count include a list_head itself for empty */
+       if (count <= 1)
+               return 0;
+
+       for (i = 0; i < count; ++i) {
+#if 0
+               printf("i = %d, count = %d, tp_addr = %#lx, tp_page = %p, tp_next = %p, tp_prev = %p, tp_used = %d tcpu = %d\n",
+                    i, count, ld.list_ptr[i], buf.page,
+                    buf.linkage.next, buf.linkage.prev, buf.used, buf.cpu);
+#endif
+
+               readmem(ld.list_ptr[i], KVADDR, (char *) &buf, sizeof(buf),
+                       "cfs_trace_page buffer", FAULT_ON_ERROR);
+
+               /* Validate the list heads for some sanity */
+               if ((buf.linkage.next == 0) || (buf.linkage.prev == 0)) {
+                       error(WARNING,
+                             "Trace page %p has bogus next (%#p) or prev (%#p) pointers\n",
+                             buf.page, buf.linkage.next, buf.linkage.prev);
+                       continue;
+               }
+
+               if ((buf.used < 0) || (buf.used > PAGESIZE())) {
+                       error(WARNING,
+                             "Trace page %#p has bogus used size (%d)\n",
+                             buf.page, buf.used);
+                       continue;
+               }
+
+               rc = lustre_write_page_frame(fd, ld.list_ptr[i],
+                                            (ulong) buf.page, buf.used);
+               if (rc >= 0)
+                       ret += 1;
+       }
+       FREEBUF(ld.list_ptr);
+
+       return ret;
+}
+
+/* Aquire the debug page list head pointer for this CPU and walk them */
+static int lustre_walk_cpus(int type, int cpu, int fd, int mode)
+{
+       static const char cmd_head_fmt[] =
+           "p &((*%strace_data[%i])[%i].tcd.tcd_%s)";
+       static const char cmd_count_fmt[] =
+           "p (*%strace_data[%i])[%i].tcd.tcd_%s";
+       char cmd_head[sizeof(cmd_head_fmt) + 40];
+       char cmd_count[sizeof(cmd_count_fmt) + 40];
+       int count, count1;
+       int rc;
+       ulong lh_addr;
+
+       printf("%s(%d, %d, %d)\n", __func__, cpu, fd, mode);
+
+       if (global_daemon_pages && (mode == LUSTRE_DAEMON_PAGES))
+               return 0;
+
+       switch (mode) {
+       case LUSTRE_PAGES:
+               snprintf(cmd_head, sizeof(cmd_head), cmd_head_fmt, name_prefix,
+                        type, cpu, "pages");
+               snprintf(cmd_count, sizeof(cmd_head), cmd_count_fmt,
+                        name_prefix, type, cpu, "cur_pages");
+               break;
+       case LUSTRE_DAEMON_PAGES:
+               snprintf(cmd_head, sizeof(cmd_head), cmd_head_fmt, name_prefix,
+                        type, cpu, "daemon_pages");
+               snprintf(cmd_count, sizeof(cmd_head), cmd_count_fmt,
+                        name_prefix, type, cpu, "cur_daemon_pages");
+               break;
+       case LUSTRE_STOCK_PAGES:
+               snprintf(cmd_head, sizeof(cmd_head), cmd_head_fmt, name_prefix,
+                        type, cpu, "stock_pages");
+               snprintf(cmd_count, sizeof(cmd_head), cmd_count_fmt,
+                        name_prefix, type, cpu, "cur_stock_pages");
+
+               break;
+       default:
+               return -EINVAL;
+       }
+       printf("cmd:\t%s\n\t%s\n", cmd_head, cmd_count);
+
+       /* Aquire the expected number of debug pages */
+       open_tmpfile();
+       if (!gdb_pass_through(cmd_count, pc->tmpfile, GNU_RETURN_ON_ERROR)) {
+               close_tmpfile();
+               error(FATAL, "gdb request failed: '%s'\n", cmd_count);
+               return -EINVAL;
+       }
+
+       rewind(pc->tmpfile);
+       rc = fscanf(pc->tmpfile, FMT_PAGE_COUNT, &count);
+       if (rc != 1) {
+               close_tmpfile();
+               error(FATAL, "gdb unexpected result: '%s', rc = %d\n",
+                     cmd_count, rc);
+               return -EINVAL;
+       }
+       close_tmpfile();
+
+       /* Skip CPUs with no debug pages */
+       if (count == 0)
+               return count;
+
+
+       /* Aquire the list head address for the tage list */
+       open_tmpfile();
+       if (!gdb_pass_through(cmd_head, pc->tmpfile, GNU_RETURN_ON_ERROR)) {
+               close_tmpfile();
+               error(FATAL, "gdb request failed: '%s'\n", cmd_head);
+               return -EINVAL;
+       }
+
+       rewind(pc->tmpfile);
+       rc = fscanf(pc->tmpfile, fmt_page_list_head, &lh_addr);
+
+       if (rc != 1) {
+               close_tmpfile();
+               error(FATAL, "gdb unexpected result: '%s', rc = %d\n",
+                     cmd_head, rc);
+               return -EINVAL;
+       }
+       close_tmpfile();
+
+       count1 = lustre_walk_trace_pages(cpu, fd, lh_addr);
+       if (count1 != count)
+               printf("Unexpected write size %d != %d\n", count1, count);
+       return count1;
+}
+
+static int lustre_walk_daemon_pages(int fd)
+{
+       struct list_data ld;
+       int count, rc, i = 0, ret = 0;
+       unsigned long pg_private;
+
+       BZERO(&ld, sizeof(struct list_data));
+       get_symbol_data("daemon_pages", sizeof(void *), &ld.start);
+       ld.end = ld.start;
+       ld.list_head_offset = MEMBER_OFFSET("page", "page_lru");
+       ld.flags |= LIST_ALLOCATE | LIST_HEAD_FORMAT | LIST_HEAD_POINTER;
+
+       hq_open();
+       count = do_list(&ld);
+       hq_close();
+
+       printf("%s(%p:%d)\n", __func__, (void *)ld.start,
+              count);
+       /* count include a list_head itself for empty */
+       if (count <= 1)
+               return 0;
+
+       for (i = 0; i < count; ++i) {
+               readmem(ld.list_ptr[i] + pg_private_off, KVADDR,
+                       (char *) &pg_private, sizeof(void *),
+                       "daemon page private", FAULT_ON_ERROR);
+
+               if ((pg_private < 0) || (pg_private > PAGESIZE())) {
+                       error(WARNING,
+                             "Trace page %p has bogus used size (%d)\n",
+                             ld.list_ptr[i], pg_private);
+                       continue;
+               }
+
+               rc = lustre_write_page_frame(fd, ld.list_ptr[i],
+                                            ld.list_ptr[i], pg_private);
+               if (rc >= 0)
+                       ret += 1;
+       }
+       FREEBUF(ld.list_ptr);
+
+       return ret;
+}
+
+void cmd_lustre_log(char *name)
+{
+       int i, rc, count, total = 0, fd;
+       int type;
+
+       fd = open(name, O_CREAT | O_EXCL | O_APPEND | O_WRONLY,
+                 S_IRUSR | S_IWUSR);
+       if (fd == -1) {
+               error(FATAL, "Unable to open log file '%s': %s (%d)\n",
+                     name, strerror(errno), errno);
+               return;
+       }
+
+       for (type = 0; type < TCD_TYPE_MAX; type++) {
+               for (i = 0; i < kt->cpus; i++) {
+                       count = 0;
+
+                       rc = lustre_walk_cpus(type, i, fd, LUSTRE_PAGES);
+                       if (rc >= 0)
+                               count += rc;
+
+                       rc = lustre_walk_cpus(type, i, fd,
+                                             LUSTRE_DAEMON_PAGES);
+                       if (rc >= 0)
+                               count += rc;
+
+                       rc = lustre_walk_cpus(type, i, fd,
+                                             LUSTRE_STOCK_PAGES);
+                       if (rc >= 0)
+                               count += rc;
+
+                       error(INFO,
+                             "Dumped %d debug pages from type %d - CPU %d\n",
+                             count, type, i);
+                       total += count;
+               }
+       }
+
+       if (global_daemon_pages)
+               lustre_walk_daemon_pages(fd);
+
+       if (fsync(fd) == -1)
+               error(WARNING,
+                     "Unable to sync log file '%s' it may be incomplete: %s (%d)\n",
+                     name, strerror(errno), errno);
+
+       if (close(fd) == -1)
+               error(WARNING, "Unable to close log file '%s': %s %d\n",
+                     name, strerror(errno), errno);
+
+       error(INFO, "Dumped %d total debug pages from %d CPUs to '%s'\n",
+             total, kt->cpus, name);
+}
+
+
+void cmd_lustre(void)
+{
+       int c;
+
+       while ((c = getopt(argcnt, args, "l:")) != EOF) {
+               switch (c) {
+               case 'l':
+                       if (strlen(optarg) == 0)
+                               argerrs++;
+                       else
+                               cmd_lustre_log(optarg);
+
+                       break;
+               default:
+                       argerrs++;
+                       break;
+               }
+       }
+
+       if (argerrs)
+               cmd_usage(pc->curcmd, SYNOPSIS);
+
+       fprintf(fp, "\n");
+}
+
+char *help_lustre[] = {
+       "lustre",               /* command name */
+       "lustre specific debug commands",       /* short description */
+       "[-l <file>]",          /* argument synopsis */
+       "  This command displays lustre specific data.\n",
+       "       -l  Extract lustre kernel debug data to <file>",
+       "           (use 'lctl df <file>' for ascii text)",
+       "\nEXAMPLE",
+       "    crash> lustre -l /tmp/lustre.log",
+       NULL
+};
+
+/*
+ * Local Variables:
+ * mode: C
+ * c-file-style: "stroustrup"
+ * indent-tabs-mode: nil
+ * c-basic-offset: 8
+ * End:
+ *
+ * end of lustre-ext.c
+ */
diff --git a/contrib/debug_tools/crash-mod/mk.sh b/contrib/debug_tools/crash-mod/mk.sh
new file mode 100755 (executable)
index 0000000..3f7102a
--- /dev/null
@@ -0,0 +1,145 @@
+#! /bin/bash
+
+declare -r prog=$(basename $0 .sh)
+declare -r progdir=$(cd $(dirname $0) > /dev/null ; pwd -P)
+declare -r program=${progdir}/$(basename $0)
+declare -r progpid=$$
+
+die() {
+    echo "${prog} fatal error:  $*"
+    kill -TERM $progpid
+    sleep 0.1
+    kill -KILL $progpid
+    exit 1
+} >&2
+
+set_crashdir() {
+    test -d "${CRASHDIR}" && return 0
+    CRASHDIR=$(command -v crash | sed 's@/bin/crash@@')
+    test -d "${CRASHDIR}" && {
+        CRASHDIR=${CRASHDIR}/include/crash
+        return 0
+    }
+    for d in /usr/local /usr "$(
+        rpm -q --list crash | sed -n 's@/bin/crash$@@')"
+    do
+        test -d "$d" || continue
+        test -x ${d}/bin/crash && {
+            CRASHDIR=${d}/include/crash
+            return 0
+        }
+    done
+    die "cannot locate crash installation root"
+}
+
+set_crashexe() {
+    CRASHEXE=${CRASHDIR%/include/crash}/bin/crash
+    test -x "$CRASHEXE" || {
+        CRASHEXE=${CRASHDIR%/include/crash}/crash
+        test -x "$CRASHEXE" || \
+            die "crash executable is not ${CRASHEXE}"
+    }
+}
+
+set_target() {
+    # This is derived from the "configure.c" program in the crash sources.
+    # It is needed to select the correct code from the defs.h header.
+    #
+    declare defs=$(gcc -E -dM - < /dev/null | grep -v '('|\
+        sed -n $'s/^#define[ \t]*__\([a-z]\)/HAVE_\\1/p' | \
+        sed $'s/[ \t].*//;s/^/declare /;s/$/=true/');
+
+    eval "$defs"
+    TARGET=unknown
+    ${HAVE_alpha__:-false}     && TARGET=ALPHA  && return
+    ${HAVE_i386__:-false}      && TARGET=X86    && return
+    ${HAVE_powerpc__:-false}   && TARGET=PPC    && return
+    ${HAVE_ia64__:-false}      && TARGET=IA64   && return
+    ${HAVE_s390__:-false}      && TARGET=S390   && return
+    ${HAVE_s390x__:-false}     && TARGET=S390X  && return
+    ${HAVE_powerpc64__:-false} && TARGET=PPC64  && return
+    ${HAVE_x86_64__:-false}    && TARGET=X86_64 && return
+    ${HAVE_arm__:-false}       && TARGET=ARM    && return
+}
+
+set_gdb_ver() {
+    declare v=$(${CRASHEXE} --version | grep 'GNU gdb ')
+    test ${#v} -eq 0 && die "no gdb version found in $CRASHEXE"
+    set -- $v
+    eval GDB_FLAGS=-DGDB_\${$#}
+    GDB_FLAGS=$(echo $GDB_FLAGS | tr . _)
+}
+
+set_cflags() {
+    declare -A cflags=(
+        [ALPHA]=""
+        [X86]="-D_FILE_OFFSET_BITS=64"
+        [PPC]="-D_FILE_OFFSET_BITS=64"
+        [IA64]=""
+        [S390]="-D_FILE_OFFSET_BITS=64"
+        [S390X]=""
+        [PPC64]="-m64"
+        [X86_64]=""
+        [ARM]="-D_FILE_OFFSET_BITS=64")
+    TARGET_CFLAGS=${cflags[$TARGET]}\ -D${TARGET}\ ${GDB_FLAGS}
+}
+
+build_so() {
+    test ${#CRASHDIR} -eq 0 && set_crashdir
+    test -f ${CRASHDIR}/defs.h || \
+        die "${CRASHDIR} does not contain 'defs.h'"
+    test ${#CRASHEXE} -eq 0 && set_crashexe
+    set_target
+    set_gdb_ver
+    set_cflags
+    declare GCC_OPTS='-Wall -Werror -nostartfiles -shared -rdynamic -fPIC'
+    GCC_CMD="gcc -I${CRASHDIR} ${GCC_OPTS} ${TARGET_CFLAGS}"
+
+    obj='lustre-ext.so'
+    test ! -f $obj -o $obj -ot lustre-ext.c && {
+        echo ${GCC_CMD} -o $obj lustre-ext.c
+        ${GCC_CMD} -o $obj lustre-ext.c || \
+            die could not build lustre-ext.so
+    }
+}
+
+while test $# -gt 0
+do
+    arg=$(echo $1 | sed 's/^-*//')
+
+    case X"$arg" in
+    Xcl* )
+        # check for clean/clobber
+        #
+        rm -f *.so
+        ;;
+
+    Xunin* )
+        # check for uninstall
+        #
+        test -d ~/.crash.d && \
+            rm -f ~/.crash.d/lustre-*.so
+        ;;
+
+    Xall )
+        build_so
+        ;;
+
+    Xin* )
+        test -f lustre-ext.so || build_so
+
+        # check for install
+        #
+        test -d ~/.crash.d || mkdir ~/.crash.d || \
+            die "could not make installation directory $HOME/.crash.d"
+        cp -fp lustre-*.so ~/.crash.d/.
+        ;;
+
+    * )
+        die "invalid option:  $1"
+    esac
+
+    shift
+done
+
+exit 0