From 38433fa7a455487a91bc80f3bfcc37a7844adafb Mon Sep 17 00:00:00 2001 From: Alexey Lyashkov Date: Mon, 23 Dec 2024 12:24:40 -0500 Subject: [PATCH] LU-17253 tools: extract lustre logs from cores Crash extension to extract lustre logs from crash dumps. Test-Parameters: trivial Signed-off-by: Alexey Lyashkov Change-Id: I20afb41b1cc90923fe68ccb06618ffacc1a8221c Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/52947 Tested-by: jenkins Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Artem Blagodarenko Reviewed-by: Oleg Drokin --- contrib/debug_tools/crash-mod/README | 44 +++ contrib/debug_tools/crash-mod/lustre-ext.c | 431 +++++++++++++++++++++++++++++ contrib/debug_tools/crash-mod/mk.sh | 145 ++++++++++ 3 files changed, 620 insertions(+) create mode 100644 contrib/debug_tools/crash-mod/README create mode 100644 contrib/debug_tools/crash-mod/lustre-ext.c create mode 100755 contrib/debug_tools/crash-mod/mk.sh diff --git a/contrib/debug_tools/crash-mod/README b/contrib/debug_tools/crash-mod/README new file mode 100644 index 0000000..832e2e9 --- /dev/null +++ b/contrib/debug_tools/crash-mod/README @@ -0,0 +1,44 @@ +Special module to allow the crash utility to extract only the lustre +debug information from a dump. + +Before building we need to setup the environment. For RHEL systems you +need to install crash-devel so mk.sh will work. Also install the +debuginfo kernel rpms as well as the lustre debuginfo rpms. + +Build as follows: + + contrib/debug_tools/crash-mod/mk.sh + + ...or if you prefer... + + gcc -nostartfiles -shared -rdynamic -I/usr/include/crash -o lustre-ext.so lustre-ext.c -fPIC -D + + Then copy lustre-ext.so to /usr/lib64/crash/extensions/ + +Example usage: + + # Launch crash against a crash dump or live kernel to extract + # and re-assemble the binary lustre debug log. + > crash + ... + crash> mod -S + ... + crash> extend lustre.so + lustre.so: shared object loaded + crash> lustre -l /tmp/foo.dl + lustre: Dumped 2560 debug pages from CPU 0 + lustre: Dumped 2560 debug pages from CPU 1 + lustre: Dumped 2560 debug pages from CPU 2 + lustre: Dumped 2560 debug pages from CPU 3 + lustre: Dumped 2560 debug pages from CPU 4 + lustre: Dumped 2560 debug pages from CPU 5 + lustre: Dumped 2560 debug pages from CPU 6 + lustre: Dumped 2560 debug pages from CPU 7 + lustre: Dumped 20480 total debug pages from 8 CPUs to /tmp/foo.dl + crash> quit + + # Convert the binary debug log to a text debug log + > /usr/sbin/lctl df /tmp/foo.dl >/tmp/foo.dk + + # View your debug log + > vim /tmp/foo.dk diff --git a/contrib/debug_tools/crash-mod/lustre-ext.c b/contrib/debug_tools/crash-mod/lustre-ext.c new file mode 100644 index 0000000..e3eb6c3 --- /dev/null +++ b/contrib/debug_tools/crash-mod/lustre-ext.c @@ -0,0 +1,431 @@ +// SPDX-License-Identifier: GPL-v2.0+ +/* + * Copyright (C) 2007, Lawrence Livermore National Labs + * Author: Brian Behlendorf + * Copyright (C) 2017-2023, Cray Inc. + * Author: Alexey Lyashkov + */ + +#include "defs.h" + +#define UINT32_LEN 10 /* ceil(log10(UINT_MAX)) */ +#define UINT64_LEN 20 /* ceil(log10(ULONG_MAX)) */ + +/* three types of trace_data in linux */ +enum { + TCD_TYPE_PROC = 0, + TCD_TYPE_SOFTIRQ, + TCD_TYPE_IRQ, + TCD_TYPE_MAX +}; + +struct cfs_trace_page { + /* + * page itself + */ + struct page *page; + /* + * linkage into one of the lists in trace_data_union or + * page_collection + */ + struct kernel_list_head linkage; + /* + * number of bytes used within this page + */ + unsigned int used; + /* + * cpu that owns this page + */ + unsigned short cpu; + /* + * type(context) of this page + */ + unsigned short type; +}; + +#define LUSTRE_PAGES 1 +#define LUSTRE_DAEMON_PAGES 2 +#define LUSTRE_STOCK_PAGES 3 + +#define FMT_PAGE_COUNT "$%*d = %d" + +static const char fmt_page_list_head[] = + "$%*d = (struct list_head *) %lx"; +static const char lustre2_pfx[] = "cfs_"; +static const char *name_prefix = lustre2_pfx; + +void cmd_lustre(void); +char *help_lustre[]; +int global_daemon_pages; +/* not all crash tools have OFFSET(page_private) */ +unsigned int pg_private_off; + +static struct command_table_entry command_table[] = { + { "lustre", cmd_lustre, help_lustre, 0 }, + { NULL } +}; + +void _init(void) +{ + if (symbol_exists("daemon_pages")) + global_daemon_pages = 1; + + pg_private_off = MEMBER_OFFSET("page", "private"); + + register_extension(command_table); +} + +void _fini(void) +{ +} + +/* Given a pointer to a page frame append the assoicated linear + * address range to the passed file descriptor. + */ +static int +lustre_write_page_frame(int fd, ulong tp_addr, ulong kvaddr, int used) +{ + char buf[PAGESIZE()]; + physaddr_t kpaddr; + int retry = 0; + ssize_t rc, count = 0; + + if (!is_page_ptr(kvaddr, &kpaddr)) { + error(WARNING, + "Skipping trace page %p which references an invalid page pointer (%#x)\n", + tp_addr, kvaddr); + return -EADDRNOTAVAIL; + } + + if (!readmem(kpaddr, PHYSADDR, buf, used, + "trace page data", RETURN_ON_ERROR)) { + error(WARNING, + "Skipping trace page %p, unable to read data from physical address (%#x)\n", + tp_addr, kpaddr); + return -EIO; + } + + while (count < used) { + rc = write(fd, buf + count, used - count); + if (rc >= 0) { + count += rc; + + if ((rc == 0) && (retry++ > 5)) { + error(WARNING, + "Partial trace page %#lx written to output file (%d/%d bytes)\n", + tp_addr, count, used); + return count; + } + } else { + error(WARNING, + "Unable to write to output file: %s (%d)\n", + strerror(errno), errno); + return -rc; + } + } + + return count; +} + +static int lustre_walk_trace_pages(int cpu, int fd, ulong lh_addr) +{ + static const char *name = "cfs_trace_page"; + struct list_data ld; + int count, rc, i = 0, ret = 0; + struct cfs_trace_page buf; + + + BZERO(&ld, sizeof(struct list_data)); + ld.end = lh_addr; + ld.list_head_offset = MEMBER_OFFSET((char *) name, "linkage"); + ld.member_offset = ld.list_head_offset; + + readmem(lh_addr + ld.member_offset, KVADDR, &ld.start, + sizeof(void *), "LIST_HEAD contents", FAULT_ON_ERROR); + ld.structname_args = 1; + ld.structname = + (char **) GETBUF(sizeof(char *) * ld.structname_args); + ld.structname[0] = (char *) name; + ld.flags |= LIST_ALLOCATE | LIST_HEAD_FORMAT | LIST_HEAD_POINTER; + + hq_open(); + count = do_list(&ld); + hq_close(); + + printf("%s(%d, %d, %#lx) = %d\n", __func__, cpu, fd, lh_addr, count); + /* count include a list_head itself for empty */ + if (count <= 1) + return 0; + + for (i = 0; i < count; ++i) { +#if 0 + printf("i = %d, count = %d, tp_addr = %#lx, tp_page = %p, tp_next = %p, tp_prev = %p, tp_used = %d tcpu = %d\n", + i, count, ld.list_ptr[i], buf.page, + buf.linkage.next, buf.linkage.prev, buf.used, buf.cpu); +#endif + + readmem(ld.list_ptr[i], KVADDR, (char *) &buf, sizeof(buf), + "cfs_trace_page buffer", FAULT_ON_ERROR); + + /* Validate the list heads for some sanity */ + if ((buf.linkage.next == 0) || (buf.linkage.prev == 0)) { + error(WARNING, + "Trace page %p has bogus next (%#p) or prev (%#p) pointers\n", + buf.page, buf.linkage.next, buf.linkage.prev); + continue; + } + + if ((buf.used < 0) || (buf.used > PAGESIZE())) { + error(WARNING, + "Trace page %#p has bogus used size (%d)\n", + buf.page, buf.used); + continue; + } + + rc = lustre_write_page_frame(fd, ld.list_ptr[i], + (ulong) buf.page, buf.used); + if (rc >= 0) + ret += 1; + } + FREEBUF(ld.list_ptr); + + return ret; +} + +/* Aquire the debug page list head pointer for this CPU and walk them */ +static int lustre_walk_cpus(int type, int cpu, int fd, int mode) +{ + static const char cmd_head_fmt[] = + "p &((*%strace_data[%i])[%i].tcd.tcd_%s)"; + static const char cmd_count_fmt[] = + "p (*%strace_data[%i])[%i].tcd.tcd_%s"; + char cmd_head[sizeof(cmd_head_fmt) + 40]; + char cmd_count[sizeof(cmd_count_fmt) + 40]; + int count, count1; + int rc; + ulong lh_addr; + + printf("%s(%d, %d, %d)\n", __func__, cpu, fd, mode); + + if (global_daemon_pages && (mode == LUSTRE_DAEMON_PAGES)) + return 0; + + switch (mode) { + case LUSTRE_PAGES: + snprintf(cmd_head, sizeof(cmd_head), cmd_head_fmt, name_prefix, + type, cpu, "pages"); + snprintf(cmd_count, sizeof(cmd_head), cmd_count_fmt, + name_prefix, type, cpu, "cur_pages"); + break; + case LUSTRE_DAEMON_PAGES: + snprintf(cmd_head, sizeof(cmd_head), cmd_head_fmt, name_prefix, + type, cpu, "daemon_pages"); + snprintf(cmd_count, sizeof(cmd_head), cmd_count_fmt, + name_prefix, type, cpu, "cur_daemon_pages"); + break; + case LUSTRE_STOCK_PAGES: + snprintf(cmd_head, sizeof(cmd_head), cmd_head_fmt, name_prefix, + type, cpu, "stock_pages"); + snprintf(cmd_count, sizeof(cmd_head), cmd_count_fmt, + name_prefix, type, cpu, "cur_stock_pages"); + + break; + default: + return -EINVAL; + } + printf("cmd:\t%s\n\t%s\n", cmd_head, cmd_count); + + /* Aquire the expected number of debug pages */ + open_tmpfile(); + if (!gdb_pass_through(cmd_count, pc->tmpfile, GNU_RETURN_ON_ERROR)) { + close_tmpfile(); + error(FATAL, "gdb request failed: '%s'\n", cmd_count); + return -EINVAL; + } + + rewind(pc->tmpfile); + rc = fscanf(pc->tmpfile, FMT_PAGE_COUNT, &count); + if (rc != 1) { + close_tmpfile(); + error(FATAL, "gdb unexpected result: '%s', rc = %d\n", + cmd_count, rc); + return -EINVAL; + } + close_tmpfile(); + + /* Skip CPUs with no debug pages */ + if (count == 0) + return count; + + + /* Aquire the list head address for the tage list */ + open_tmpfile(); + if (!gdb_pass_through(cmd_head, pc->tmpfile, GNU_RETURN_ON_ERROR)) { + close_tmpfile(); + error(FATAL, "gdb request failed: '%s'\n", cmd_head); + return -EINVAL; + } + + rewind(pc->tmpfile); + rc = fscanf(pc->tmpfile, fmt_page_list_head, &lh_addr); + + if (rc != 1) { + close_tmpfile(); + error(FATAL, "gdb unexpected result: '%s', rc = %d\n", + cmd_head, rc); + return -EINVAL; + } + close_tmpfile(); + + count1 = lustre_walk_trace_pages(cpu, fd, lh_addr); + if (count1 != count) + printf("Unexpected write size %d != %d\n", count1, count); + return count1; +} + +static int lustre_walk_daemon_pages(int fd) +{ + struct list_data ld; + int count, rc, i = 0, ret = 0; + unsigned long pg_private; + + BZERO(&ld, sizeof(struct list_data)); + get_symbol_data("daemon_pages", sizeof(void *), &ld.start); + ld.end = ld.start; + ld.list_head_offset = MEMBER_OFFSET("page", "page_lru"); + ld.flags |= LIST_ALLOCATE | LIST_HEAD_FORMAT | LIST_HEAD_POINTER; + + hq_open(); + count = do_list(&ld); + hq_close(); + + printf("%s(%p:%d)\n", __func__, (void *)ld.start, + count); + /* count include a list_head itself for empty */ + if (count <= 1) + return 0; + + for (i = 0; i < count; ++i) { + readmem(ld.list_ptr[i] + pg_private_off, KVADDR, + (char *) &pg_private, sizeof(void *), + "daemon page private", FAULT_ON_ERROR); + + if ((pg_private < 0) || (pg_private > PAGESIZE())) { + error(WARNING, + "Trace page %p has bogus used size (%d)\n", + ld.list_ptr[i], pg_private); + continue; + } + + rc = lustre_write_page_frame(fd, ld.list_ptr[i], + ld.list_ptr[i], pg_private); + if (rc >= 0) + ret += 1; + } + FREEBUF(ld.list_ptr); + + return ret; +} + +void cmd_lustre_log(char *name) +{ + int i, rc, count, total = 0, fd; + int type; + + fd = open(name, O_CREAT | O_EXCL | O_APPEND | O_WRONLY, + S_IRUSR | S_IWUSR); + if (fd == -1) { + error(FATAL, "Unable to open log file '%s': %s (%d)\n", + name, strerror(errno), errno); + return; + } + + for (type = 0; type < TCD_TYPE_MAX; type++) { + for (i = 0; i < kt->cpus; i++) { + count = 0; + + rc = lustre_walk_cpus(type, i, fd, LUSTRE_PAGES); + if (rc >= 0) + count += rc; + + rc = lustre_walk_cpus(type, i, fd, + LUSTRE_DAEMON_PAGES); + if (rc >= 0) + count += rc; + + rc = lustre_walk_cpus(type, i, fd, + LUSTRE_STOCK_PAGES); + if (rc >= 0) + count += rc; + + error(INFO, + "Dumped %d debug pages from type %d - CPU %d\n", + count, type, i); + total += count; + } + } + + if (global_daemon_pages) + lustre_walk_daemon_pages(fd); + + if (fsync(fd) == -1) + error(WARNING, + "Unable to sync log file '%s' it may be incomplete: %s (%d)\n", + name, strerror(errno), errno); + + if (close(fd) == -1) + error(WARNING, "Unable to close log file '%s': %s %d\n", + name, strerror(errno), errno); + + error(INFO, "Dumped %d total debug pages from %d CPUs to '%s'\n", + total, kt->cpus, name); +} + + +void cmd_lustre(void) +{ + int c; + + while ((c = getopt(argcnt, args, "l:")) != EOF) { + switch (c) { + case 'l': + if (strlen(optarg) == 0) + argerrs++; + else + cmd_lustre_log(optarg); + + break; + default: + argerrs++; + break; + } + } + + if (argerrs) + cmd_usage(pc->curcmd, SYNOPSIS); + + fprintf(fp, "\n"); +} + +char *help_lustre[] = { + "lustre", /* command name */ + "lustre specific debug commands", /* short description */ + "[-l ]", /* argument synopsis */ + " This command displays lustre specific data.\n", + " -l Extract lustre kernel debug data to ", + " (use 'lctl df ' for ascii text)", + "\nEXAMPLE", + " crash> lustre -l /tmp/lustre.log", + NULL +}; + +/* + * Local Variables: + * mode: C + * c-file-style: "stroustrup" + * indent-tabs-mode: nil + * c-basic-offset: 8 + * End: + * + * end of lustre-ext.c + */ diff --git a/contrib/debug_tools/crash-mod/mk.sh b/contrib/debug_tools/crash-mod/mk.sh new file mode 100755 index 0000000..3f7102a --- /dev/null +++ b/contrib/debug_tools/crash-mod/mk.sh @@ -0,0 +1,145 @@ +#! /bin/bash + +declare -r prog=$(basename $0 .sh) +declare -r progdir=$(cd $(dirname $0) > /dev/null ; pwd -P) +declare -r program=${progdir}/$(basename $0) +declare -r progpid=$$ + +die() { + echo "${prog} fatal error: $*" + kill -TERM $progpid + sleep 0.1 + kill -KILL $progpid + exit 1 +} >&2 + +set_crashdir() { + test -d "${CRASHDIR}" && return 0 + CRASHDIR=$(command -v crash | sed 's@/bin/crash@@') + test -d "${CRASHDIR}" && { + CRASHDIR=${CRASHDIR}/include/crash + return 0 + } + for d in /usr/local /usr "$( + rpm -q --list crash | sed -n 's@/bin/crash$@@')" + do + test -d "$d" || continue + test -x ${d}/bin/crash && { + CRASHDIR=${d}/include/crash + return 0 + } + done + die "cannot locate crash installation root" +} + +set_crashexe() { + CRASHEXE=${CRASHDIR%/include/crash}/bin/crash + test -x "$CRASHEXE" || { + CRASHEXE=${CRASHDIR%/include/crash}/crash + test -x "$CRASHEXE" || \ + die "crash executable is not ${CRASHEXE}" + } +} + +set_target() { + # This is derived from the "configure.c" program in the crash sources. + # It is needed to select the correct code from the defs.h header. + # + declare defs=$(gcc -E -dM - < /dev/null | grep -v '('|\ + sed -n $'s/^#define[ \t]*__\([a-z]\)/HAVE_\\1/p' | \ + sed $'s/[ \t].*//;s/^/declare /;s/$/=true/'); + + eval "$defs" + TARGET=unknown + ${HAVE_alpha__:-false} && TARGET=ALPHA && return + ${HAVE_i386__:-false} && TARGET=X86 && return + ${HAVE_powerpc__:-false} && TARGET=PPC && return + ${HAVE_ia64__:-false} && TARGET=IA64 && return + ${HAVE_s390__:-false} && TARGET=S390 && return + ${HAVE_s390x__:-false} && TARGET=S390X && return + ${HAVE_powerpc64__:-false} && TARGET=PPC64 && return + ${HAVE_x86_64__:-false} && TARGET=X86_64 && return + ${HAVE_arm__:-false} && TARGET=ARM && return +} + +set_gdb_ver() { + declare v=$(${CRASHEXE} --version | grep 'GNU gdb ') + test ${#v} -eq 0 && die "no gdb version found in $CRASHEXE" + set -- $v + eval GDB_FLAGS=-DGDB_\${$#} + GDB_FLAGS=$(echo $GDB_FLAGS | tr . _) +} + +set_cflags() { + declare -A cflags=( + [ALPHA]="" + [X86]="-D_FILE_OFFSET_BITS=64" + [PPC]="-D_FILE_OFFSET_BITS=64" + [IA64]="" + [S390]="-D_FILE_OFFSET_BITS=64" + [S390X]="" + [PPC64]="-m64" + [X86_64]="" + [ARM]="-D_FILE_OFFSET_BITS=64") + TARGET_CFLAGS=${cflags[$TARGET]}\ -D${TARGET}\ ${GDB_FLAGS} +} + +build_so() { + test ${#CRASHDIR} -eq 0 && set_crashdir + test -f ${CRASHDIR}/defs.h || \ + die "${CRASHDIR} does not contain 'defs.h'" + test ${#CRASHEXE} -eq 0 && set_crashexe + set_target + set_gdb_ver + set_cflags + declare GCC_OPTS='-Wall -Werror -nostartfiles -shared -rdynamic -fPIC' + GCC_CMD="gcc -I${CRASHDIR} ${GCC_OPTS} ${TARGET_CFLAGS}" + + obj='lustre-ext.so' + test ! -f $obj -o $obj -ot lustre-ext.c && { + echo ${GCC_CMD} -o $obj lustre-ext.c + ${GCC_CMD} -o $obj lustre-ext.c || \ + die could not build lustre-ext.so + } +} + +while test $# -gt 0 +do + arg=$(echo $1 | sed 's/^-*//') + + case X"$arg" in + Xcl* ) + # check for clean/clobber + # + rm -f *.so + ;; + + Xunin* ) + # check for uninstall + # + test -d ~/.crash.d && \ + rm -f ~/.crash.d/lustre-*.so + ;; + + Xall ) + build_so + ;; + + Xin* ) + test -f lustre-ext.so || build_so + + # check for install + # + test -d ~/.crash.d || mkdir ~/.crash.d || \ + die "could not make installation directory $HOME/.crash.d" + cp -fp lustre-*.so ~/.crash.d/. + ;; + + * ) + die "invalid option: $1" + esac + + shift +done + +exit 0 -- 1.8.3.1