LU-12461 contrib: Add epython scripts for crash dump analysis

author Ann Koehler <amk@cray.com>

Thu, 20 Jun 2019 18:25:02 +0000 (13:25 -0500)

committer Oleg Drokin <green@whamcloud.com>

Fri, 14 Feb 2020 05:50:16 +0000 (05:50 +0000)
author Ann Koehler <amk@cray.com>
Thu, 20 Jun 2019 18:25:02 +0000 (13:25 -0500)
committer Oleg Drokin <green@whamcloud.com>
Fri, 14 Feb 2020 05:50:16 +0000 (05:50 +0000)
diff --git a/contrib/debug_tools/epython_scripts/README b/contrib/debug_tools/epython_scripts/README

new file mode 100644 (file)

index 0000000..d33af91
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/README
@@ -0,0 +1,37 @@
+These epython scripts extract the content of various Lustre data
+structures from crash dumps and formats the output for readability.
+They are written for use with PyKdump, a framework for using Python
+as an extension language for the crash tool.
+
+Documentation and source for PyKdump are available at:
+   https://sourceforge.net/p/pykdump/wiki/Home/
+
+These scripts are written in Python2. The above wiki page has instructions
+for converting to Python3. The language extensions are documented in:
+   https://sourceforge.net/p/pykdump/code/ci/master/tree/docs/pykdump.lyx
+
+The scripts were tested with Lustre 2.11, primarily with dumps of client
+nodes. The scripts will work with server dumps as long as the data
+structures match the ones used on clients.
+
+Summary of scripts:
+ * cfs_hashes.py         Displays summary of cfs_hash tables.
+ * cfs_hnodes.py         Displays the specified Lustre hash table.
+ * debug_flags.py        Prints Lustre libcfs_debug flags as strings.
+ * dk.py                 Dumps and sorts the Lustre dk logs.
+ * jiffies2date.py       Prints the date and time of a given jiffies timestamp.
+ * ldlm_dumplocks.py     Lists granted and waiting locks by namespace/resource.
+ * ldlm_lockflags.py     Prints string identifiers for specified LDLM flags.
+ * lu_object.py          Prints contents of an lu_object.
+ * lustre_opcode.py      Maps Lustre rpc opcodes to string identifiers.
+ * obd_devs.py           Displays the contents of global 'obd_devs'.
+ * ptlrpc.py             Displays the RPC queues of the Lustre ptlrpcd daemons.
+ * rpc_stats.py          Dumps the client_obd structure given by client argument.
+ * sbi_ptrs.py           Prints Lustre structs associated with inode.
+ * uniqueStacktrace.py   Prints stack traces for each task.
+
+The scripts require symbols from the Lustre and LNet modules to be loaded
+(mod command in crash). A script is invoked with the command
+"epython <script name>" followed by any parameters. To get usage information
+for a particular script, enter the following at the crash prompt:
+   epython <script_name> -h
diff --git a/contrib/debug_tools/epython_scripts/cfs_hashes.py b/contrib/debug_tools/epython_scripts/cfs_hashes.py

new file mode 100644 (file)

index 0000000..4ca16fb
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/cfs_hashes.py
@@ -0,0 +1,160 @@
+#!/usr/bin/env python
+
+"""
+Utility to display Lustre cfs_hash tables
+Copyright (c) 2019 Cray Inc. All Rights Reserved.
+"""
+
+from pykdump.API import *
+import argparse
+
+import lustrelib as ll
+
+description_short = 'Displays summary of cfs_hash tables'
+
+CFS_HASH_THETA_BITS = 10
+
+def cfs_hash_cur_theta(hs):
+    hs_cnt = readSU('atomic_t', hs.hs_count).counter
+    return ((hs_cnt << CFS_HASH_THETA_BITS) >> hs.hs_cur_bits)
+
+def cfs_hash_theta_int(theta):
+    return (theta >> CFS_HASH_THETA_BITS)
+
+def cfs_hash_theta_frac(theta):
+    frac = ((theta * 1000) >> CFS_HASH_THETA_BITS) - \
+           (cfs_hash_theta_int(theta) * 1000)
+    return frac
+
+def cfs_hash_format_theta(theta):
+    val = str(cfs_hash_theta_int(theta)) + \
+          "." + \
+          str(cfs_hash_theta_frac(theta))
+    return val
+
+def print_theta(hs):
+    theta = cfs_hash_cur_theta(hs)
+    print "Theta: %d %s" % (theta, cfs_hash_format_theta(theta))
+
+def print_thetas(name, hashtable):
+    hs = readSU('struct cfs_hash', hashtable)
+    if hs:
+        print_theta(hs)
+
+def print_separator(count):
+    s = ""
+    for idx in xrange(count):
+        s += "="
+    print s
+
+def print_hash_labels():
+    print "%-15s %-17s\t %-5s %-5s %-5s %-5s %-5s %-5s %-5s " \
+          "%-5s %-5s %-5s %-5s %-11s %-11s %-11s %-5s" % \
+          ("name", "cfs_hash", "cnt", "rhcnt", "xtr", "cur", "min", "max", "rhash", \
+           "bkt", "nbkt", "nhlst", "flags", "theta", "minT", "maxT", "bktsz")
+
+def print_hash_summary(name, hashtable):
+    hs = readSU('struct cfs_hash', hashtable)
+    if hs:
+        hs_cnt = readSU('atomic_t', hs.hs_count).counter
+        hs_ref = readSU('atomic_t', hs.hs_refcount).counter
+        print "%-15s %-17x\t %-5d %-5d %-5d %-5d %-5d %-5d %-5d %-5d %-5d %-5d %-5x %-11s %-11s %-11s %-5d" % \
+              (name, (Addr(hs)), \
+               readSU('atomic_t', hs.hs_count).counter, \
+               hs.hs_rehash_count, \
+               hs.hs_extra_bytes, \
+               hs.hs_cur_bits, \
+               hs.hs_min_bits, \
+               hs.hs_max_bits, \
+               hs.hs_rehash_bits, \
+               hs.hs_bkt_bits, \
+               ll.CFS_HASH_NBKT(hs), \
+               ll.CFS_HASH_BKT_NHLIST(hs), \
+               hs.hs_flags, \
+               cfs_hash_format_theta(cfs_hash_cur_theta(hs)), \
+               cfs_hash_format_theta(hs.hs_min_theta), \
+               cfs_hash_format_theta(hs.hs_max_theta), \
+               ll.cfs_hash_bucket_size(hs))
+    else:
+        print "%-15s %-17x" % \
+              (name, (Addr(hs)))
+
+def obd_print_export_hashes(obd, exp_list, fld):
+    print "\nExport list head %x %s" % (exp_list, fld)
+    for exp in readSUListFromHead(exp_list, fld, 'struct obd_export'):
+        print_hash_summary('exp_lock', exp.exp_lock_hash)
+        print_hash_summary('exp_flock', exp.exp_flock_hash)
+
+def obd_print_one_device_hashes(obd):
+    try:
+        nm = ll.obd2str(obd)
+    except Exception, e:
+        return 1
+
+    print "obd_device %-17x %-22s" % (Addr(obd), ll.obd2str(obd))
+    print_hash_labels()
+
+    print_hash_summary("uuid", obd.obd_uuid_hash)
+    print_hash_summary("nid", obd.obd_nid_hash)
+    print_hash_summary("nid_stats", obd.obd_nid_stats_hash)
+
+    if "clilov" in nm:
+        print_hash_summary("lov_pools", obd.u.lov.lov_pools_hash_body)
+    elif "clilmv" in nm:
+        pass
+    else:
+        print_hash_summary("cl_quota0", obd.u.cli.cl_quota_hash[0])
+        print_hash_summary("cl_quota1", obd.u.cli.cl_quota_hash[1])
+
+#    obd_print_export_hashes(obd, obd.obd_exports, 'exp_obd_chain')
+#    obd_print_export_hashes(obd, obd.obd_exports_timed, 'exp_obd_chain_timed')
+    print ""
+    return 0
+
+def obd_devs_hash():
+    devices = readSymbol('obd_devs')
+
+    for obd in devices:
+       if not obd_print_one_device_hashes(obd) == 0:
+           break
+    print_separator(150)
+
+def ldlm_print_ns_hashes(ns, type):
+    ns_list = readSymbol(ns)
+    print "\n%s namespaces-resources" % type
+    print_hash_labels()
+    for ns in readSUListFromHead(ns_list, 'ns_list_chain', 'struct ldlm_namespace'):
+        nm = ll.obd2str(ns.ns_obd)[0:20]
+        print_hash_summary(nm, ns.ns_rs_hash)
+
+def ldlm_namespaces_hash():
+    ldlm_print_ns_hashes('ldlm_cli_active_namespace_list', "Client")
+    ldlm_print_ns_hashes('ldlm_cli_inactive_namespace_list', "Inactive")
+    ldlm_print_ns_hashes('ldlm_srv_namespace_list', "Server")
+
+def lu_sites_hashes():
+    lu_sites = readSymbol('lu_sites')
+    print_hash_labels()
+    for site in readSUListFromHead(lu_sites, 'ls_linkage', 'struct lu_site'):
+        print_hash_summary("lu_site_vvp", site.ls_obj_hash)
+    print ""
+
+
+def global_hashes():
+    print_hash_labels()
+    print_hash_summary("conn_hash", readSymbol('conn_hash'))
+    if symbol_exists('jobid_hash'):
+        print_hash_summary("jobid_hash", readSymbol('jobid_hash'))
+    if symbol_exists('cl_env_hash'):
+        print_hash_summary("cl_env_hash", readSymbol('cl_env_hash'))
+    print ""
+
+if __name__ == "__main__":
+    description = "Displays summary of hash tables in 'obd_devs'"
+    parser = argparse.ArgumentParser(description=description)
+    args = parser.parse_args()
+
+    global_hashes()
+    lu_sites_hashes()
+    obd_devs_hash()
+    ldlm_namespaces_hash()
diff --git a/contrib/debug_tools/epython_scripts/cfs_hnodes.py b/contrib/debug_tools/epython_scripts/cfs_hnodes.py

new file mode 100644 (file)

index 0000000..b47a81d
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/cfs_hnodes.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+
+"""
+Utility to display a Lustre cfs_hash table
+Copyright (c) 2019 Cray Inc. All Rights Reserved.
+"""
+
+from pykdump.API import *
+#from struct import *
+import argparse
+import os
+
+import lustrelib as ll
+from crashlib.input import toint
+
+description_short = "Displays the specified Lustre hash table "
+
+DEPTH = 3
+RULER = "........................................"
+
+hash_objects = {
+    'ldlm_res_hop_object': ['struct ldlm_resource', 'lr_hash'],
+    'jobid_object':        ['struct jobid_to_pid_map', 'jp_hash'],
+    'lu_obj_hop_object':   ['struct lu_object_header', 'loh_hash'],
+    'uuid_export_object':  ['struct obd_export', 'export_uuid_hash'],
+    'nid_export_object':   ['struct obd_export', 'exp_nid_hash'],
+    'nidstats_object':     ['struct nid_stat', 'nid_hash'],
+    'gen_export_object':   ['struct obd_export', 'exp_gen_hash'],
+    'oqi_object':          ['struct osc_quota_info', 'oqi_hash'],
+    'conn_object':         ['struct ptlrpc_connection', 'c_hash']}
+
+def get_hash_object(hs, hnode):
+    s = addr2sym(hs.hs_ops.hs_object)
+    if s not in hash_objects:
+        return ''
+    obj = hash_objects[s]
+    obj_addr = Addr(hnode) -  member_offset(obj[0], obj[1])
+    return "%s %x" % (obj[0], obj_addr)
+
+def dump_hnodes(hs, hlist, hnode, depth=0, ruler=RULER):
+    while(hnode != hlist & hnode):
+        s = get_hash_object(hs, hnode)
+        print "%*.*shlist_node 0x%x  %s" % (depth, depth, ruler, Addr(hnode), s)
+        hnode = hnode.next
+
+def dump_hlist(hs, hlist, depth=0, ruler=RULER):
+    if hlist.first:
+        hnode = hlist.first
+        print "%*.*shlist_head 0x%x" % (depth, depth, ruler, Addr(hlist))
+        dump_hnodes(hs, hlist, hnode, depth+DEPTH, ruler)
+
+def dump_hash_bucket(hs, bd_bkt, depth=0, ruler=RULER):
+    print "%*.*scfs_hash_bucket 0x%x" % (depth, depth, ruler, Addr(bd_bkt))
+    for bd_offset in range(ll.CFS_HASH_BKT_NHLIST(hs)):
+        hlist = ll.cfs_hash_hhead(hs, bd_bkt, bd_offset)
+        if hlist:
+            dump_hlist(hs, hlist, depth+DEPTH, ruler)
+
+def dump_hash_table(hs):
+    print "cfs_hash@0x%x" % Addr(hs)
+
+    for bd_bkt in ll.cfs_hash_get_buckets(hs):
+        dump_hash_bucket(hs, bd_bkt, DEPTH, RULER)
+
+if __name__ == "__main__":
+    description = "Displays the specified Lustre hash table "
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument("htable", default=False, type=toint,
+        help="address of a cfs_hash struct")
+    args = parser.parse_args()
+
+    hs = readSU('struct cfs_hash', args.htable)
+    dump_hash_table(hs)
diff --git a/contrib/debug_tools/epython_scripts/crashlib/addrlib.py b/contrib/debug_tools/epython_scripts/crashlib/addrlib.py

new file mode 100644 (file)

index 0000000..d06a769
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/crashlib/addrlib.py
@@ -0,0 +1,22 @@
+
+"""
+Set of routines for manipulating addresses.
+Copyright 2014 Cray Inc.  All Rights Reserved
+"""
+
+import crashlib.cid
+import crashlib.cid.machdep_table
+
+# --------------------------------------------------------------------------
+
+def ptov(physaddr):
+    """Convert a physical address to a kernel virtual address."""
+    return int(physaddr) + crashlib.cid.mdtbl.kvbase
+
+def phys2pfn(physaddr):
+    """Convert a physical address to a page offset."""
+    return physaddr >> crashlib.cid.mdtbl.pageshift
+
+def pfn2phys(pfn):
+    """Convert a page offset into a physical address."""
+    return pfn << crashlib.cid.mdtbl.pageshift
diff --git a/contrib/debug_tools/epython_scripts/crashlib/cid/__init__.py b/contrib/debug_tools/epython_scripts/crashlib/cid/__init__.py

new file mode 100644 (file)

index 0000000..e199455
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/crashlib/cid/__init__.py
@@ -0,0 +1,19 @@
+
+"""
+Provide access to internal crash data.
+Copyright 2014 Cray Inc.  All Rights Reserved
+
+Much of the data this package provides is available by reading the dump file,
+but some is information that crash 'knows' about the kernel based on the
+kernel version.
+
+The data is generally extracted by executing various crash commands, parsing
+the output and storing it within a Python object.
+"""
+
+
+class ParseError:
+    """Exception indicating an error while parsing crash information."""
+
+    def __init__(self, msg=None):
+        self.message = msg
diff --git a/contrib/debug_tools/epython_scripts/crashlib/cid/kernel_table.py b/contrib/debug_tools/epython_scripts/crashlib/cid/kernel_table.py

new file mode 100644 (file)

index 0000000..0dc65e0
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/crashlib/cid/kernel_table.py
@@ -0,0 +1,80 @@
+
+"""
+Provide access to kernel_table data.
+Copyright 2014 Cray Inc.  All Rights Reserved
+"""
+
+from pykdump.API import *
+
+import crashlib.cid
+
+class KernelInfo:
+    """Provide access to the crash kernel_table.
+
+    The data is collected by parsing the output of the 'help -k' command.
+    """
+
+    def __init__(self):
+        """Extract kernel data from crash.
+
+        Initialize the table of kernel information by parsing the output
+        of the 'help -k' command.  This only extracts selected data.
+
+        Each item extracted is made available as an instance attribute.
+        """
+
+        # crash 'help -k' doesn't use prefixes on numbers, so we must
+        # know what number base is in use for each numeric field.
+        decFields = ('cpus', 'NR_CPUS', 'kernel_NR_CPUS')
+
+        hexFields = ('flags', 'stext', 'etext', 'stext_init', 'etext_init',
+            'init_begin', 'init_end', 'end', 'module_list', 'kernel_module')
+
+        expected_key_count = len(decFields) + len(hexFields)
+
+        for line in exec_crash_command('help -k').splitlines():
+            # crash> help -k
+            #          flags: b02600
+            #   (PER_CPU_OFF|SMP|KMOD_V2|KALLSYMS_V2|NO_DWARF_UNWIND|DWARF_UNWIND_MEMORY|DWARF_UNWIND_MODULES)
+            #          stext: ffffffff810001f0
+            #          etext: ffffffff813915b5
+            #     stext_init: ffffffff8170b000
+            #     etext_init: ffffffff81740b65
+            #     init_begin: ffffffff816f9000
+            #       init_end: ffffffff81796000
+            #            end: ffffffff818cf000
+            #           cpus: 48
+            #  cpus_override: (null)
+            #        NR_CPUS: 4096 (compiled-in to this version of crash)
+            # kernel_NR_CPUS: 48
+            # ikconfig_flags: 1 (IKCONFIG_AVAIL)
+            #  ikconfig_ents: 0
+            #     display_bh: 0
+            #    highest_irq: (unused/undetermined)
+            #    module_list: ffffffffa05c96e0
+            #  kernel_module: ffffffff81684630
+            # mods_installed: 40
+            #  ...
+            #
+            # Only use the first value after the field name and
+            # only for selected fields.
+            parts = line.split()
+            if len(parts) < 2: continue
+            key = parts[0].rstrip(':')
+            if key in decFields:
+                self.__dict__[key] = int(parts[1],10)
+            elif key in hexFields:
+                self.__dict__[key] = int(parts[1],16)
+
+        # If some versions of crash or the kernel don't have all the
+        # fields, this check code may need to be removed or modified.
+        if len(self.__dict__.keys()) != expected_key_count:
+            raise crashlib.cid.ParseError(
+                'Expected {:d}, but parsed {:d} entries.'.format(
+                    expected_key_count, len(self.__dict__.keys())))
+
+# --------------------------------------------------------------------------
+
+# Create a shared instances.
+
+crashlib.cid.krntbl = KernelInfo()
diff --git a/contrib/debug_tools/epython_scripts/crashlib/cid/machdep_table.py b/contrib/debug_tools/epython_scripts/crashlib/cid/machdep_table.py

new file mode 100644 (file)

index 0000000..f904a4a
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/crashlib/cid/machdep_table.py
@@ -0,0 +1,98 @@
+
+"""
+Provide access to machine-dependent data.
+Copyright 2014, 2017 Cray Inc.  All Rights Reserved
+"""
+
+from pykdump.API import *
+
+import crashlib.cid
+
+class MachDepInfo:
+    """Provide access to the crash machdep_table.
+
+    The data is collected by parsing the output of the 'help -m' command.
+    """
+
+    def __init__(self):
+        """Extract machine-dependent data from crash.
+
+        Initialize the table of machine dependent information by parsing
+        the output of the 'help -m' command.  This only extracts
+        selected data.
+
+        Each item extracted is made available as an instance attribute.
+        """
+
+        # crash 'help -m' doesn't use prefixes on numbers, so we must
+        # know what number base is in use for each numeric field.
+        fieldBase = {
+            'bits' : 10, 'flags' : 16, 'hz' : 10, 'identity_map_base' : 16,
+            'kvbase' : 16, 'last_pgd_read' : 16, 'last_pmd_read' : 16,
+            'last_ptbl_read' : 16, 'machspec' : 16, 'max_physmem_bits' : 10,
+            'memsize' : 10, 'mhz' : 10, 'modules_vaddr' : 16, 'nr_irqs' : 10,
+            'page_offset' : 16, 'pagemask' : 16, 'pageshift' : 10,
+            'pagesize' : 10, 'pgd' : 16, 'pmd' : 16, 'ptbl' : 16,
+            'ptrs_per_pgd' : 10, 'section_size_bits' : 10,
+            'sections_per_root' : 10, 'stacksize' : 10, 'userspace_top' : 16,
+            'vmalloc_end' : 16, 'vmalloc_start_addr' : 16, 'vmemmap_end' : 16,
+            'vmemmap_vaddr' : 16
+        }
+
+        if sys_info.machine in ("x86_64", "k1om", "x86"):
+            # additional x86_64 fields:
+            # Attic: 'last_pml4_read': 16, 'last_umpl_read': 16,
+            #        'umpl': 16, 'pml4': 16
+            x86_64_fields = {
+                'irq_eframe_link' : 10, 'irqstack' : 16,
+                'page_protnone' : 16, 'phys_base' : 16,
+                'thread_return' : 16, 'vsyscall_page' : 16,
+            }
+            fieldBase.update(x86_64_fields)
+        elif sys_info.machine == 'aarch64':
+            # no additional aarch64 field yet
+            pass
+        else:
+            raise crashlib.cid.ParseError(
+                    'Invalid machine type {0}.'.format(sys_info.machine))
+
+        expected_key_count = len(fieldBase)
+
+        for line in exec_crash_command('help -m').splitlines():
+            # crash> help -m
+            #               flags: 30400209 (KSYMS_START|MACHDEP_BT_TEXT|VM_2_6_11|VMEMMAP|FRAMESIZE_DEBUG|FRAMEPOINTER)
+            #              kvbase: ffff880000000000
+            #   identity_map_base: ffff880000000000
+            #            pagesize: 4096
+            #           pageshift: 12
+            #            pagemask: fffffffffffff000
+            #          pageoffset: fff
+            #           stacksize: 8192
+            #                  hz: 250
+            #                 mhz: 2599
+            #             memsize: 68694994944 (0xffe8a7000)
+            #  ...
+            #
+            # Only use the first value after the field name and
+            # only for selected fields.
+            parts = line.split()
+            if len(parts) < 2: continue
+
+            key = parts[0].rstrip(':')
+            base = fieldBase.get(key, None)
+
+            if base is not None:
+                self.__dict__[key] = int(parts[1], base)
+
+        # If some versions of crash or the kernel don't have all the
+        # fields, this check code may need to be removed or modified.
+        if len(self.__dict__.keys()) != expected_key_count:
+            raise crashlib.cid.ParseError(
+                'Expected {:d}, but parsed {:d} entries.'.format(
+                    expected_key_count, len(self.__dict__.keys())))
+
+# --------------------------------------------------------------------------
+
+# Create a shared instances.
+
+crashlib.cid.mdtbl = MachDepInfo()
diff --git a/contrib/debug_tools/epython_scripts/crashlib/cid/page_flags.py b/contrib/debug_tools/epython_scripts/crashlib/cid/page_flags.py

new file mode 100644 (file)

index 0000000..20cb0b7
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/crashlib/cid/page_flags.py
@@ -0,0 +1,61 @@
+
+"""
+Provide access to the page flags known by crash.
+Copyright 2014 Cray Inc.  All Rights Reserved
+
+The data is gathered from the 'kmem -g' command.
+"""
+
+from pykdump.API import *
+
+import crashlib.cid
+
+
+class PageFlag:
+    # Note: This class should probably be abstracted somewhere as a
+    # bit or bitmask class, but since we don't have that yet, just
+    # create a new class here.
+    """Represent a flag as a bit mask and a shift value."""
+    def __init__(self, name, shift_val):
+        self.name  = name
+        self.shift = int(shift_val)
+        self.mask  = 1 << self.shift
+
+    def __call__(self):
+        return self.mask
+
+
+class MachPageFlags:
+    """Extract the machine-specific page flags from crash.
+
+    When instantiated, this class produces an object with data members
+    for each kernel page flag that crash knows about, based on the kernel
+    version.  Each page flag is an instance of class PageFlag.  An example
+    of usage would be:
+
+        page = readSU('struct page', page_addr)
+        kpf = MachPageFlags()
+        if page.flags & kpf.PG_slab.mask:
+            ...
+    """
+
+    def __init__(self):
+        """Extract the page flags from the crash 'kmem -g' command."""
+        for line in exec_crash_command('kmem -g').splitlines():
+            # crash> kmem -g
+            # PAGE-FLAG       BIT  VALUE
+            # PG_locked         0  0000001
+            # PG_waiters        1  0000002
+            # ...
+            fields = line.split()
+            if len(fields) < 3 or fields[0][0:3] != 'PG_': continue
+
+            name  = fields[0]
+            shift = int(fields[1])
+            self.__dict__[name] = PageFlag(name, shift)
+
+# --------------------------------------------------------------------------
+
+# Create a shared instances of the above classes.
+
+crashlib.cid.pgflags = MachPageFlags()
diff --git a/contrib/debug_tools/epython_scripts/crashlib/cid/phys_mem_map.py b/contrib/debug_tools/epython_scripts/crashlib/cid/phys_mem_map.py

new file mode 100644 (file)

index 0000000..9eff1dc
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/crashlib/cid/phys_mem_map.py
@@ -0,0 +1,111 @@
+
+"""
+Provide access to physical memory information.
+Copyright 2014, 2017 Cray Inc.  All Rights Reserved
+"""
+
+from pykdump.API import *
+
+import crashlib.cid
+
+class Resource:
+    """Generates /proc/iomem by traversing iomem Resource tree."""
+    def __init__(self, resource):
+        self.resource = resource
+        self.lvl = 0
+
+    def resource_start(self):
+        return self.resource.start
+    start = property(resource_start)
+
+    def resource_end(self):
+        return self.resource.end
+    end = property(resource_end)
+
+    def resource_name(self):
+        return self.resource.name
+    name = property(resource_name)
+
+    def addr(self):
+        return Addr(self.resource)
+
+    def __str__(self):
+        return '{0:08x}-{1:08x} : {2}'.format(self.start, self.end, self.name)
+
+    def get_child(self):
+        if self.resource.child:
+            return Resource(self.resource.child)
+        else:
+            return None
+
+    def get_sibling(self):
+        if self.resource.sibling:
+            return Resource(self.resource.sibling)
+        else:
+            return None
+
+    def _walk(self, lvl=0):
+        self.lvl = lvl
+        yield self
+        child = self.get_child()
+        if child is not None:
+            for res in child._walk(lvl+1):
+                yield res
+        next = self.get_sibling()
+        if next is not None:
+            for res in next._walk(lvl):
+                yield res
+
+    def iomem(self):
+        """ returns /proc/iomem tree generator """
+        return self.get_child()._walk()
+
+    def is_System_RAM(self):
+        return self.name == "System RAM"
+
+def get_iomem():
+    """ generator wrapper function for iomem """
+    iomem_resource = Resource(readSymbol('iomem_resource'))
+    return iomem_resource.iomem()
+
+class MemMapEntry:
+    """Define a single entry for a memory map.
+
+    A MemMapEntry consists of three attributes:
+
+        start - first address within the range
+        end   - first address past the end of the range
+        name  - name of address space type
+    """
+    start = None
+    end   = None
+    name  = None
+
+    def __init__(self, start_addr, end_addr, name_str):
+        self.start = int(start_addr)
+        self.end   = int(end_addr)
+        self.name  = name_str
+
+
+def GetPhysMemMap():
+    """Define a physical memory map.
+
+    Returns the physical memory map as a list by extracting system ram
+    ranges from iomem Resource class above.
+    The list defines the physical address map as provided the iomem
+    and will be a list of objects of type MemMapEntry.
+    """
+    memmap = []
+
+    for ent in get_iomem():
+        # get System RAM from iomem resource
+        if ent.is_System_RAM():
+            memmap.append(MemMapEntry(ent.start, ent.end+1, ent.name))
+
+    return memmap
+
+# --------------------------------------------------------------------------
+
+# Create shared objects.
+
+crashlib.cid.physmap = GetPhysMemMap()
diff --git a/contrib/debug_tools/epython_scripts/crashlib/cid/vm_table.py b/contrib/debug_tools/epython_scripts/crashlib/cid/vm_table.py

new file mode 100644 (file)

index 0000000..7760723
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/crashlib/cid/vm_table.py
@@ -0,0 +1,73 @@
+
+"""
+Provide access to the crash's vm table.
+Copyright 2014 Cray Inc.  All Rights Reserved
+"""
+
+from pykdump.API import *
+
+import crashlib.cid
+
+class VmInfo:
+    """Make data from the crash vmtbl easily available."""
+
+    def __init__(self):
+        """Extract VM table data from crash.
+
+        Initialize the table of VM information by parsing
+        the output of the 'help -v' command.  This only extracts
+        selected data.
+
+        Each item extracted is made available as an instance attribute.
+        """
+
+        # crash 'help -v' doesn't use prefixes on numbers, so we must
+        # know what number base is in use for each numeric field.
+        decFields = ('total_pages', 'max_mapnr', 'totalram_pages',
+            'totalhigh_pages', 'num_physpages',
+            'page_hash_table_len', 'kmem_max_c_num',
+            'kmem_max_limit', 'kmem_max_cpus', 'kmem_cache_count',
+            'kmem_cache_namelen', 'kmem_cache_len_nodes', 'PG_slab',
+            'paddr_prlen', 'numnodes', 'nr_zones', 'nr_free_areas',
+            'cpu_slab_type', 'nr_swapfiles', 'ZONE_HIGHMEM',
+            'node_online_map_len', 'nr_vm_stat_items',
+            'nr_vm_event_items')
+
+        hexFields = ('flags', 'high_memory', 'vmalloc_start',
+            'mem_map', 'page_hash_table', 'PG_reserved',
+            'PG_head_tail_mask', 'slab_data', 'last_swap_read',
+            'swap_info_struct', 'mem_sec', 'mem_section')
+
+        expected_key_count = len(decFields) + len(hexFields)
+
+        for line in exec_crash_command('help -v').splitlines():
+            #               flags: 10dc52
+            #  (NODES_ONLINE|ZONES|PERCPU_KMALLOC_V2|KMEM_CACHE_INIT|SPARSEMEM|SPARSEMEM_EX|PERCPU_KMALLOC_V2_NODES|VM_STAT|VM_INIT)
+            #      kernel_pgd[NR_CPUS]: ffffffff8163f000 ...
+            #         high_memory: ffff880880000000
+            #       vmalloc_start: ffffc90000000000
+            #             mem_map: 0
+            # ...
+            #
+            # Only use the first value after the field name and
+            # only for selected fields.
+            parts = line.split()
+            if len(parts) < 2: continue
+            key = parts[0].rstrip(':')
+            if key in decFields:
+                self.__dict__[key] = int(parts[1],10)
+            elif key in hexFields:
+                self.__dict__[key] = int(parts[1],16)
+
+        # If some versions of crash or the kernel don't have all the
+        # fields, this check code may need to be removed or modified.
+        if len(self.__dict__.keys()) != expected_key_count:
+            raise crashlib.ida.ParseError(
+                'Expected {:d}, but parsed {:d} entries.'.format(
+                    expected_key_count, len(self.__dict__.keys())))
+
+# --------------------------------------------------------------------------
+
+# Declare a shared instance.
+
+crashlib.cid.vmtbl = VmInfo()
diff --git a/contrib/debug_tools/epython_scripts/crashlib/input/__init__.py b/contrib/debug_tools/epython_scripts/crashlib/input/__init__.py

new file mode 100644 (file)

index 0000000..3e4f0d7
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/crashlib/input/__init__.py
@@ -0,0 +1,241 @@
+
+"""
+Input handling routines
+Copyright 2014 Cray Inc.  All Rights Reserved
+"""
+
+
+import itertools
+
+
+# Define some common integer multiplier suffixes
+
+# Powers of two
+binary_suffixes={
+    'k': 2**10, 'K': 2**10,
+    'm': 2**20, 'M': 2**20,
+    'g': 2**30, 'G': 2**30,
+    't': 2**40, 'T': 2**40,
+    'p': 2**50, 'P': 2**50
+}
+memory_suffixes = binary_suffixes
+
+# Powers of ten
+decimal_suffixes={
+    'k': 10**3,  'K': 10**3,
+    'm': 10**6,  'M': 10**6,
+    'g': 10**9,  'G': 10**9,
+    't': 10**12, 'T': 10**12,
+    'p': 10**15, 'P': 10**15
+}
+disk_suffixes = decimal_suffixes
+
+default_bases = [0, 16]
+
+def toint(string, base=default_bases, suffixes=binary_suffixes):
+    """Convert to integer with flexible base and multiplier support.
+
+    Provide a way to handle input that may be in any of several number
+    bases but may not use the appropriate prefix, e.g. 'deadbeef' rather
+    than the more pedantic '0xdeadbeef'. Also provide support for
+    multiplier suffixes, such as 'K' for kilo.
+
+    Arguments:
+
+        string      - string to convert to integer
+        base        - a single number, as used in int() or an iterable
+                      of base values to try
+        suffixes    - dictionary keyed by the string suffix with a value
+                      to be used as a multiplier
+
+    The default base of [0, 16] allows the automatic recognition of numbers
+    with the standard prefixes and if that fails, tries a base 16 conversion.
+    """
+    try:
+        bases = list(base)
+    except TypeError:
+        # Object isn't iterable, so create one that is
+        bases = [base]
+
+    for b in bases:
+        if not (b == 0 or 2 <= b <= 36):
+            raise ValueError(
+                "toint() base {!s:s} must be >= 2 and <= 36".format(b))
+
+    multiplier = 1
+    try:
+        # Second iteration is after removing any suffix.  This way, if
+        # a suffix happens to contain valid numeric characters, we'll
+        # try the numeric interpretation before we try their multiplier
+        # meaning, e.g. 'g' is a valid numeric value in base 17).
+        for i in xrange(2):
+            for b in bases:
+                try:
+                    return int(string, b) * multiplier
+                except ValueError:
+                    pass
+
+            if i != 0:
+                raise ValueError
+
+            # Find a suffix that matches the end of the string and use it
+            for k, v in suffixes.iteritems():
+                if string.endswith(k):
+                    multiplier = v
+                    string = string[0:-len(k)]
+                    break
+            else:
+                raise ValueError
+
+    except ValueError:
+        suffix_list = suffixes.keys()
+        suffix_list.sort()
+        raise ValueError(
+            "invalid literal '{:s}' for toint() with base {!s:s} "
+            "and suffixes {!s:s}".format(string, list(bases), suffix_list))
+
+
+def hex2int(string):
+    """Wrapper for toint() which prefers base 16 input
+
+    This function is useful in situations where a callable must be passed,
+    such as with argparse.add_argument(type=hex2int, ...
+    """
+    return toint(string, base=[16, 0])
+
+
+def to_rangelist(args, default=xrange(0), base=[0,16],
+                  suffixes=binary_suffixes):
+    """Convert a bunch of range list strings into a list of ranges
+
+    The arguments are:
+
+        args     - iterable containing ranglist strings
+        default  - iterator to return if args is empty
+        base     - number base to use for integer conversion
+        suffixes - integer multiplier suffixes
+
+    Each arg is taken to be a range list, where a range list may be:
+
+        rangelist ::= range[,range]...
+        range     ::= <first>-<last> | <first>#<count> | <value>
+
+    where the range first-last is inclusive.
+    """
+    if len(args) == 0:
+        return default
+
+    ranges = []
+    for range_list_str in args:
+        range_strs = range_list_str.split(',')
+        for range_str in range_strs:
+            if "-" in range_str:
+                fields = range_str.split('-', 1)
+                start = toint(fields[0], base, suffixes=suffixes)
+                end = toint(fields[1], base, suffixes=suffixes) + 1
+                ranges.append(xrange(start, end))
+            elif "#" in range_str:
+                fields = range_str.split('#', 1)
+                start = toint(fields[0], base, suffixes=suffixes)
+                end = start + toint(fields[1], base, suffixes=suffixes)
+                ranges.append(xrange(start, end))
+            else:
+                start = toint(range_str, base, suffixes=suffixes)
+                end = start + 1
+                ranges.append(xrange(start, end))
+
+    return ranges
+
+
+def iter_rangestr(*args, **kwargs):
+    """Convert a bunch of range list strings into a single iterator
+
+    The arguments are the same as for to_rangelist().
+    """
+    return itertools.chain(*to_rangelist(*args, **kwargs))
+
+
+if __name__ == '__main__':
+    import unittest
+
+    # toint()
+    class Test_toint(unittest.TestCase):
+        def test_base_zero(self):
+            self.assertEqual(toint('0b10', 0), 2)
+            self.assertEqual(toint('0o10', 0), 8)
+            self.assertEqual(toint('10', 0), 10)
+            self.assertEqual(toint('0x10', 0), 16)
+
+        def test_base_out_of_range(self):
+            self.assertRaises(ValueError, toint, '10', -1)
+            self.assertRaises(ValueError, toint, '10',  1)
+            self.assertRaises(ValueError, toint, '10', 37)
+
+        def test_base_search(self):
+            bases = [0, 16]
+            self.assertEqual(toint('10', bases), 10)
+            self.assertEqual(toint('f', bases), 15)
+
+            self.assertEqual(toint('0b10', bases), 2)
+            self.assertEqual(toint('0o10', bases), 8)
+            self.assertEqual(toint('10', bases), 10)
+            self.assertEqual(toint('0x10', bases), 16)
+
+        def test_suffixes(self):
+            for k, v in binary_suffixes.iteritems():
+                self.assertEqual(toint('0b10'+k), 0b10*v)
+                self.assertEqual(toint('0o10'+k), 0o10*v)
+                self.assertEqual(toint('10'+k), 10*v)
+                self.assertEqual(toint('0x10'+k), 0x10*v)
+
+        def test_suffix_number_overlap(self):
+            # Verify a valid numeric isn't used as a suffix
+            self.assertEqual(toint('1g', 17), 33)
+            self.assertEqual(toint('1gk', 17), 33*binary_suffixes['k'])
+
+
+    # hex2int()
+    class Test_hex2int(unittest.TestCase):
+        """Verify the hex2int() function"""
+        def test_explicit_base(self):
+            """Verify that explicit base syntax is honored"""
+            self.assertEqual(hex2int('0x10'), 16)
+            self.assertEqual(hex2int('0o10'), 8)
+
+        def test_default_base(self):
+            """Verify that base 16 is preferred"""
+            self.assertEqual(hex2int('10'), 16)
+            self.assertEqual(hex2int('0b10'), 2832)
+
+
+    # iter_rangelist()
+    class Test_iter_rangelist(unittest.TestCase):
+        """Test both iter_rangelist and the underlying to_rangelist."""
+        def test_good_single_ranges(self):
+            self.assertEqual(list(iter_rangestr([])), [])
+            self.assertEqual(list(iter_rangestr(['1-2'])), list(xrange(1,3)))
+            self.assertEqual(list(iter_rangestr(['1#2'])), list(xrange(1,3)))
+            self.assertEqual(list(iter_rangestr(['1'])), list(xrange(1,2)))
+
+        def test_good_multiple_ranges(self):
+            test_rangestrs = [
+                # Test params,        Expected result
+                (['1', '3-5', '1#2'], [1, 3, 4, 5, 1, 2]),
+                ]
+
+            for ranges, expected in test_rangestrs:
+                # Test the ranges as separate list elements
+                self.assertEqual(list(iter_rangestr(ranges)), expected)
+
+                # Test the ranges joined by commas
+                joined = [','.join(ranges)]
+                self.assertEqual(list(iter_rangestr(joined)), expected)
+
+        def test_bad_single_ranges(self):
+            self.assertRaises(ValueError, iter_rangestr, ['1#2#3'])
+            self.assertRaises(ValueError, iter_rangestr, ['1#2-3'])
+            self.assertRaises(ValueError, iter_rangestr, ['1-2#3'])
+            self.assertRaises(ValueError, iter_rangestr, ['1-2-3'])
+
+    # Run all unit tests
+    unittest.main()
diff --git a/contrib/debug_tools/epython_scripts/crashlib/input/argparse_ext.py b/contrib/debug_tools/epython_scripts/crashlib/input/argparse_ext.py

new file mode 100644 (file)

index 0000000..a052f13
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/crashlib/input/argparse_ext.py
@@ -0,0 +1,231 @@
+
+"""
+Module which provides extensions for the standard Python argparse module.
+Copyright 2015 Cray Inc.  All Rights Reserved
+"""
+
+import argparse
+import copy
+
+from argparse import _ensure_value, Action, ArgumentTypeError
+
+
+class ExtendAction(Action):
+    """Action to extend a list of argument values
+
+    This action is similar to the standard AppendAction, but uses the
+    extend() attribute of lists rather than the append() attribute.  As
+    such, it also has an additional requirement:
+
+    -   This action must receive an iterable 'values' argument from the
+        parser.  There are two ways to make this happen:
+
+        1.  Use type= to produce an iterable, e.g. type=str or type=list
+        2.  Use nargs= to cause the parser to produe a list, which it
+            does for any nargs= setting that is not None (default) and
+            is not '?'
+    """
+
+    def __call__(self, parser, namespace, values, option_string):
+        items = copy.copy(_ensure_value(namespace, self.dest, []))
+
+        try:
+            items.extend(values)
+        except TypeError:
+            # Assume the TypeError is because values is not iterable
+            raise ArgumentTypeError(
+                "argument type '{:s}' is not iterable".format(
+                    type(values).__name__))
+
+        setattr(namespace, self.dest, items)
+
+
+def str2list(string, sep=',', totype=None, choices=None):
+    """Split a string into a list with conversion and validation
+
+    Split a string into a list, optionally convert each element to a
+    given type and optionally validate that all resulting values are
+    in a collection of valid values.
+    """
+
+    plural = {False: '', True: 's'}
+
+    # Values should be string or an iterable container of strings.
+    # Split values on the separator into a list
+    try:
+        lst = string.split(sep)
+    except AttributeError:
+        raise ArgumentTypeError(
+            "argument type '{:s}' does not have split() attribute".format(
+                type(string).__name__))
+
+    # Perform type conversion
+    if totype is not None:
+        errs = []
+        for i, v in enumerate(lst):
+            try:
+                lst[i] = totype(v)
+            except (TypeError, ValueError):
+                errs.append(v)
+        if errs:
+            msg = "invalid {:s} value{:s}: {!r:s}".format(
+                totype.__name__, plural[len(errs) > 1], errs)
+            raise ArgumentTypeError(msg)
+
+    # Verify each separate value
+    if choices is not None:
+        errs = filter(lambda x:x not in choices, lst)
+        if errs:
+            msg = "invalid choice{:s}: {!r:s} (choose from {!s:s})".format(
+                plural[len(errs) > 1], errs, choices)
+            raise ArgumentTypeError(msg)
+
+    return lst
+
+
+def tolist(sep=',', totype=None, choices=None):
+    """Returns a parameterized callable for argument parser type conversion
+
+    This function returns a function which accepts a single argument at
+    call time and which uses the supplied arguments to modify its conversion
+    behavior.
+    """
+    return lambda x:str2list(x, sep=sep, totype=totype, choices=choices)
+
+
+if __name__ == '__main__':
+    import unittest
+
+    class Test_Action_Base(unittest.TestCase):
+        """Create a base class for testing argparse Action classes"""
+
+        def setUp(self):
+            """Create the ExtendAction object and args Namespace"""
+            self.action = ExtendAction([], dest='dest')
+            self.args   = argparse.Namespace()
+
+        def actionRun(self, values):
+            """Run the Action instance using values"""
+            self.action(None, self.args, values, '')
+
+        def actionEqual(self, values, expected):
+            """Run the Action and check the expected results"""
+            self.actionRun(values)
+            self.assertEqual(self.args.dest, expected)
+
+        def actionArgTypeErr(self, values):
+            """Run the Action and verify it raises ArgumentTypeError"""
+            self.assertRaises(
+                ArgumentTypeError, self.action, None, self.args, values, '')
+
+
+    class Test_ExtendAction(Test_Action_Base):
+        """Test the ExtendAction class"""
+
+        def test_non_iterable(self):
+            """Test ExtendAction with a non-iterable type
+
+            This is similar to:
+                parser.add_argument('-z', nargs=None, type=int ...)
+
+                parser.parse_args(['-z', '0'])
+            """
+            self.actionArgTypeErr(0)
+
+        def test_single_value(self):
+            """Test ExtendAction with a single value
+
+            This is similar to:
+                parser.add_argument('-z', nargs=None ...)
+
+                parser.parse_args(['-z', 'a'])
+            """
+            self.actionEqual('a', ['a'])
+
+        def test_single_string(self):
+            """Test ExtendAction with a single value
+
+            This is similar to:
+                parser.add_argument('-z', nargs=None ...)
+
+                parser.parse_args(['-z', 'abc'])
+            """
+            self.actionEqual('abc', ['a', 'b', 'c'])
+
+        def test_single_value_multiple_calls(self):
+            """Test ExtendAction with a single value and multiple calls
+
+            This is similar to:
+                parser.add_argument('-z', nargs=None, type=int ...)
+
+                parser.parse_args(['-z', 'a', '-z', 'b'])
+            """
+            self.actionEqual('a', ['a'])
+            self.actionEqual('b', ['a', 'b'])
+
+        def test_value_list(self):
+            """Test ExtendAction with a value list
+
+            This is similar to:
+                parser.add_argument('-z', nargs=1 ...)
+
+                parser.parse_args(['-z', 'abc'])
+            """
+            self.actionEqual(['abc'], ['abc'])
+
+        def test_value_list_multiple_calls(self):
+            """Test ExtendAction with a single value and multiple calls
+
+            This is similar to:
+                parser.add_argument('-z', nargs=1 ...)
+
+                parser.parse_args(['-z', 'abc', '-z', 'def'])
+            """
+            self.actionRun(['abc'])
+            self.actionEqual(['def'], ['abc', 'def'])
+
+        def test_value_list_multiple_values(self):
+            """Test ExtendAction with a value list of length > 1
+
+            This is similar to:
+                parser.add_argument('-z', nargs=2 ...)
+                -or-
+                parser.add_argument('-z', nargs='+' ...)
+                -or-
+                parser.add_argument('-z', nargs='*' ...)
+
+                parser.parse_args(['-z', 'abc', 'def'])
+            """
+            self.actionEqual(['abc', 'def'], ['abc', 'def'])
+
+
+    class Test_tolist_str2list(unittest.TestCase):
+        """Test the str2list and tolist conversion functions"""
+
+        def test_sep(self):
+            """Verify default and non-default separators work"""
+            f = tolist()
+            self.assertEqual(f('a,b,c'), ['a','b','c'])
+            f = tolist(sep=':')
+            self.assertEqual(f('a:b:c'), ['a','b','c'])
+
+        def test_non_iterable(self):
+            """Verify a non-iterable string is caught"""
+            f = tolist()
+            self.assertRaises(ArgumentTypeError, f, 0)
+
+        def test_type_conversion(self):
+            """Verify type conversion works properly"""
+            f = tolist(totype=int)
+            self.assertEqual(f('0,1,2'), [0, 1, 2])
+            self.assertRaises(
+                ArgumentTypeError, f, '1,z,2,q')
+
+        def test_choices(self):
+            """Verify the choices validation works properly"""
+            f = tolist(totype=int, choices=[0, 1, 2, 3])
+            self.assertEqual(f('0,1,2'), [0, 1, 2])
+            self.assertRaises(ArgumentTypeError, f, '0,5,2')
+
+
+    unittest.main()
diff --git a/contrib/debug_tools/epython_scripts/crashlib/input/enumtools.py b/contrib/debug_tools/epython_scripts/crashlib/input/enumtools.py

new file mode 100644 (file)

index 0000000..f9373c9
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/crashlib/input/enumtools.py
@@ -0,0 +1,211 @@
+
+"""
+Routines for handling enums (or other symbolic names)
+Copyright 2015 Cray Inc.  All Rights Reserved
+"""
+
+import uflookup
+
+class NameSet:
+    """Two-way translation between int values (enums, #defines) and strings.
+    Also provides access to value by e.g.:
+    vms = NameSet()  # vmstat nameset
+    vms.AddName("NR_FILE_MAPPED", 8)
+    vms.NR_FILE_MAPPED == 8
+
+    The advantages over just using a dict include:
+
+    * Define the values once, and get value->string, string->value,
+      and python identifier ns.<name> as above.
+    * The auto-incrementing _next_value
+      """
+
+    def __init__(self, mapping=None):
+        """Create and initialize a NameSet object
+
+        Arguments:
+            mapping:    if specified, provides a mapping object, e.g. dict,
+                        that supplies the initial key(name)/value pairs.
+        """
+        self.value_to_name = {}
+        self.name_to_value = {}
+
+        self._next_value = 0
+#        self._sorted_values = []
+#        self._sorted_names = []
+
+        if mapping is not None:
+            self.addMap(mapping)
+
+    def addName(self, name, value=None):
+        """Add a single name, by default using the next value.
+
+        If two names end up with the same value, the value will map
+        only to the first of them.
+        """
+
+        if name in self.name_to_value.keys():
+            raise ValueError("Name {0} already defined (value {1})".format(
+                name, self.name_to_value[name]))
+        try:
+            getattr(self, name)
+        except AttributeError:
+            pass
+        else:
+            raise ValueError("Value {0} already used by NameSet object!".
+                             format(value))
+
+        if value is None:
+            value = self._next_value
+        self._next_value = value + 1
+
+        self.name_to_value[name] = value
+        if value not in self.value_to_name:
+            self.value_to_name[value] = name
+ #       self._sorted_values = []
+ #       self._sorted_names = []
+
+        setattr(self, name, value)
+
+    def addNames(self, *namelist):
+        """Add a list of names, each using the respective next value"""
+        map(self.addName, namelist)
+
+    def addMap(self, mapping):
+        """Add the key/value pairs from a mapping type"""
+        for k, v in mapping.items():
+            self.addName(k, v)
+
+    def UFLookup(self, key, **kwargs):
+        return uflookup.UFLookup(self.name_to_value, key, **kwargs)
+
+#    def somethingUsingSortedArrays:
+#        if not self._sorted_values:
+#            self._sorted_values = sorted(self.value_to_name.keys())
+#            self._sorted_names = sorted(self.name_to_value.keys())
+
+
+
+if __name__ == '__main__':
+    import unittest
+
+    class Test_NameSet(unittest.TestCase):
+        """Test the NameSet class"""
+
+        def VerifyName(self, name, value):
+            """Verify that self.ns has name <-> value"""
+            self.assertEqual(value, self.ns.name_to_value[name])
+            self.assertEqual(value, getattr(self.ns, name))
+            self.assertEqual(name, self.ns.value_to_name[value])
+
+    class Test_Empty(Test_NameSet):
+        """Test an empty NameSet"""
+        def setUp(self):
+            self.ns = NameSet()
+
+        def test_empty_vtn(self):
+            self.assertEqual(0, len(self.ns.value_to_name))
+        def test_empty_ntv(self):
+            self.assertEqual(0, len(self.ns.name_to_value))
+
+
+    class Test_addName(Test_NameSet):
+        """Test addName"""
+        def setUp(self):
+            self.ns = NameSet()
+
+        def test_add_one_name(self):
+            self.ns.addName("FOO")
+
+            self.VerifyName("FOO", 0)
+            self.assertEqual(0, self.ns.FOO)
+
+        def test_add_two_names(self):
+            self.ns.addName("BAR")
+            self.ns.addName("BAZ")
+
+            self.VerifyName("BAR", 0)
+            self.VerifyName("BAZ", 1)
+            self.assertEqual(0, self.ns.BAR)
+            self.assertEqual(1, self.ns.BAZ)
+
+
+        def test_add_namevalue(self):
+            self.ns.addName("FOO", 87)
+            self.VerifyName("FOO", 87)
+            self.assertEqual(87, self.ns.FOO)
+
+        def test_reuse_existing_value(self):
+            self.ns.addName("FOO", 2)
+            self.ns.addName("B0",0)
+            self.ns.addName("B1")
+            self.ns.addName("B2")
+            self.ns.addName("B3")
+
+            self.VerifyName("FOO", 2)
+            self.VerifyName("B0", 0)
+            self.VerifyName("B1", 1)
+            self.assertEqual(2, self.ns.name_to_value["B2"])
+            self.VerifyName("B3", 3)
+
+            self.assertEqual(2, self.ns.FOO)
+            self.assertEqual(0, self.ns.B0)
+            self.assertEqual(1, self.ns.B1)
+            self.assertEqual(3, self.ns.B3)
+
+        def test_addNames(self):
+            self.ns.addNames("FOO", "BAR", "BAZ")
+            self.VerifyName("FOO", 0)
+            self.VerifyName("BAR", 1)
+            self.VerifyName("BAZ", 2)
+
+            self.assertEqual(0, self.ns.FOO)
+            self.assertEqual(1, self.ns.BAR)
+            self.assertEqual(2, self.ns.BAZ)
+
+        def test_addDupName(self):
+            self.ns.addName("FOO", 1)
+            self.assertRaises(ValueError, self.ns.addName, "FOO", 2)
+
+        def test_addDupValue(self):
+            self.ns.addName("FOO")
+            self.ns.addName("BAR", 0)
+
+            self.VerifyName("FOO", 0)
+            self.assertEqual(0, self.ns.name_to_value["BAR"])
+
+        def test_addMoreDupValues(self):
+            self.ns.addName("FOO")
+            self.ns.addName("BAR", 0)
+            self.ns.addName("BAZ", 0)
+
+            self.VerifyName("FOO", 0)
+            self.assertEqual(0, self.ns.name_to_value["BAR"])
+            self.assertEqual(0, self.ns.name_to_value["BAZ"])
+
+
+        def test_addConflicting(self):
+            self.assertRaises(ValueError, self.ns.addName, "addName")
+
+
+    class Test_mapping(Test_NameSet):
+        """Test map handling"""
+        def setUp(self):
+            self.ns = NameSet(mapping={"SLEEPY":1, "GRUMPY": 0})
+
+        def test_constructor(self):
+            self.VerifyName("SLEEPY", 1)
+            self.VerifyName("GRUMPY", 0)
+
+        def test_addMap(self):
+            self.ns.addMap({"DOC": 9, "BASHFUL": 3})
+
+            self.VerifyName("SLEEPY", 1)
+            self.VerifyName("GRUMPY", 0)
+            self.VerifyName("DOC", 9)
+            self.VerifyName("BASHFUL", 3)
+
+
+    # Run all unit tests
+    unittest.main()
+
diff --git a/contrib/debug_tools/epython_scripts/crashlib/input/flagtools.py b/contrib/debug_tools/epython_scripts/crashlib/input/flagtools.py

new file mode 100644 (file)

index 0000000..e2c767b
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/crashlib/input/flagtools.py
@@ -0,0 +1,355 @@
+
+"""
+Flag handling routines
+Copyright 2015 Cray Inc.  All Rights Reserved
+"""
+
+
+### TBD: The "Simple" in the addSimple* interfaces refers to a flag
+### that's a single bit.  It's meant to distinguish from flags that
+### have multibit fields, such as the node/zone indices stuck in the
+### high end of struct page.flags; or a field that's mostly a pointer
+### but with some flags in the low bits.
+#
+### To add cases like that will mean redoing most of the
+### implementation, but all the current interfaces should be ok, with
+### new interfaces added to let users define the non-simple flags.
+
+import uflookup
+
+
+class FlagSet:
+    """A collection of flags and values, with routines for translating
+
+    For decoding a flag int to a string, encoding a flag string to an
+    int, and providing python identifiers for testing by name, e.g.,
+
+    jafs = FlagSet() # job_attach flagset
+    jafs.addSimpleFlag("disable_affinity_apply")
+    if job_attach.flags & jafs.disable_affinity_apply: ...
+
+    The advantages over just using a dict include:
+    * Define the values once, and get value->string, string->value,
+      and python identifiers ns.<name> and ns.<name>_shift as above.
+    * The auto-incrementing _next_bit
+    """
+    def __init__(self, mapping=None):
+        """Create and initialize a FlagSet object
+
+        Arguments:
+            mapping:    if specified, provides a mapping object, e.g. dict,
+                        that supplies the initial key(name)/value pairs.
+        """
+        # Public dict of flag names to flag values (not the bit number)
+        self.str_to_value = {}
+        # Public dict of flag values to flag names
+        self.value_to_str = {}
+
+        self._next_bit = 0
+
+        # sorted_values is so that translating a value to a string
+        # will report the strings in the same order every time.  That
+        # order is by numerically increasing value.
+        self._sorted_values = []
+        self._sorted_strs = []
+
+        if mapping is not None:
+            self.addMap(mapping)
+
+    def addSimpleFlag(self, s, bit=None):
+        """Add a single-bit flag.
+
+        If bit is not specified, uses the bit one greater than the
+        previously defined bit.  If multiple flags are defined to use
+        the same bit, value_to_str will remember only the first."""
+
+        if s in self.str_to_value.keys():
+            raise ValueError("Flag {0} already defined (value {1:x})".format(
+                s, self.str_to_value[s]))
+        if s + "_shift" in self.str_to_value.keys():
+            raise ValueError("Flag {0} conflicts with another "
+                             "flag ({1})".format(s, s + "_shift"))
+
+        try:
+            getattr(self, s)
+        except AttributeError:
+            pass
+        else:
+            raise ValueError("Value {0} already used by FlagSet object!".
+                             format(s))
+
+        try:
+            getattr(self, s + "_shift")
+        except AttributeError:
+            pass
+        else:
+            raise valueError("{0}_shift already used by FlagSet object!".
+                             format(s))
+
+
+        if bit is None:
+            bit = self._next_bit;
+        self._next_bit = bit + 1
+
+        value = 1 << bit
+        if value not in self.value_to_str:
+            self.value_to_str[value] = s
+        self.str_to_value[s] = value
+
+        self._sorted_values = []
+
+        setattr(self, s, value)
+        setattr(self, s+"_shift", bit)
+
+    def addSimpleFlags(self, *l):
+        """Adds a list of single-bit flags."""
+        map(self.addSimpleFlag, l)
+
+    def addMap(self, mapping):
+        """Add the key/value pairs from a mapping type"""
+        for k, v in mapping.items():
+            self.addSimpleFlag(k, v)
+
+    def _EnsureSorted(self):
+        if self._sorted_values:
+            return
+        self._sorted_values = sorted(self.value_to_str.keys())
+#        self._sorted_strs = sorted(self.str_to_value.keys())
+
+
+    def flagsToStringList(self, flagint):
+        """Translate a given flag int to a list of flag strings."""
+        self._EnsureSorted()
+        strs = []
+        for v in self._sorted_values:
+            if flagint & v != 0:
+                strs.append(self.value_to_str[v])
+                flagint &= ~v
+        if flagint != 0:
+            strs.append("{0:#x}".format(flagint))
+        return strs
+
+    def UFLookup(self, key, **kwargs):
+        return uflookup.UFLookup(self.str_to_value, key, **kwargs)
+
+    # TBD: interface to enable a script --dump-flag-translations argument?
+
+
+
+def join_flaglist(fl, sep = "|", empty = "0"):
+    """Helper function to join a list of flag strings."""
+    if fl:
+        return sep.join(fl)
+    else:
+        return empty
+
+
+### Tests
+
+# I'm trying to follow the convention of
+
+#   assertEquals(expectedvalue, function_under_test(args))
+
+# I didn't discover that (on some unittest page) until I was halfway
+# through, so I may not have gotten them all the right order.
+
+if __name__ == '__main__':
+    import unittest
+
+    class Test_join_flaglist(unittest.TestCase):
+        """Test the join_flaglist function"""
+
+        def assertJoinFlaglistEqual(self, expectedstring, flaglist):
+            self.assertEqual(expectedstring, join_flaglist(flaglist))
+
+        def test_single_value(self):
+            """Test join_flaglist() with a single value"""
+            self.assertJoinFlaglistEqual("aflag", ["aflag"])
+
+        def test_two_values(self):
+            """Test join_flaglist() with two values"""
+            self.assertJoinFlaglistEqual("aflag|bflag",["aflag", "bflag"])
+
+        def test_three_values(self):
+            """Test join_flaglist() with three values"""
+            self.assertJoinFlaglistEqual("af|bf|cf", ["af", "bf", "cf"])
+
+        def test_comma_sep(self):
+            """Test join_flaglist() with a non-default sep"""
+            self.assertEqual("af,bf,cf",
+                             join_flaglist(["af", "bf", "cf"], sep=','))
+
+        def test_join_empty(self):
+            """Test join_flaglist() with an empty list"""
+            self.assertEqual("0", join_flaglist([]))
+
+        def test_join_empty_nondefault(self):
+            """Test join_flaglist() with a non-default value of empty"""
+            self.assertEqual(" ", join_flaglist([], empty=" "))
+
+
+    class Test_FlagSet(unittest.TestCase):
+        """Test the FlagSet class"""
+
+        def setUp(self):
+            self.fs = FlagSet()
+
+        def VerifyFlag(self, string, value):
+            """Test string->value and value->string"""
+            self.assertEqual(value, self.fs.str_to_value[string])
+            self.assertEqual(string, self.fs.value_to_str[value])
+            self.assertEqual(value, getattr(self.fs, string))
+            self.assertEqual(value, 1<<getattr(self.fs, string+"_shift"))
+
+    class Test_FlagSet_Constructor(Test_FlagSet):
+        def test_constructor(self):
+            """Too much?"""
+            self.assertEqual(self.fs._next_bit, 0)
+            self.assertFalse(self.fs.value_to_str)
+            # etc.
+
+    class Test_Add_Simple_Flag(Test_FlagSet):
+        def test_add_simple_flag(self):
+            """Test that adding a simple flag to an empty FlagSet works"""
+            self.fs.addSimpleFlag("FOO")
+            self.VerifyFlag("FOO", 1)
+
+        def test_3_add_simple_flag(self):
+            """Test multiple addSimpleFlag calls"""
+            self.fs.addSimpleFlag("FOO")
+            self.fs.addSimpleFlag("BAR")
+            self.fs.addSimpleFlag("BAZ")
+
+            self.VerifyFlag("FOO", 1)
+            self.VerifyFlag("BAR", 2)
+            self.VerifyFlag("BAZ", 4)
+
+            self.assertEqual(1, self.fs.FOO)
+            self.assertEqual(2, self.fs.BAR)
+            self.assertEqual(4, self.fs.BAZ)
+
+            self.assertEqual(0, self.fs.FOO_shift)
+            self.assertEqual(1, self.fs.BAR_shift)
+            self.assertEqual(2, self.fs.BAZ_shift)
+
+            self.fs._EnsureSorted()
+#            self.assertEqual(self.fs._sorted_strs, ["BAR", "BAZ", "FOO"])
+            self.assertEqual(self.fs._sorted_values, [1, 2, 4])
+
+        def test_add_simple_flag_with_value(self):
+            """Test addSimpleFlag calls with explicit bit="""
+            self.fs.addSimpleFlag("FOO")
+            self.fs.addSimpleFlag("BAR", bit=1)
+            self.fs.addSimpleFlag("BAZ")
+            self.fs.addSimpleFlag("BLAT", bit=17)
+            self.fs.addSimpleFlag("FROB")
+            self.fs.addSimpleFlag("SNARF", bit=5)
+
+            self.VerifyFlag("FOO", 1)
+            self.VerifyFlag("BAR", 2)
+            self.VerifyFlag("BAZ", 4)
+            self.VerifyFlag("SNARF", 32)
+            self.VerifyFlag("BLAT", 1<<17)
+            self.VerifyFlag("FROB", 1<<18)
+
+            self.fs._EnsureSorted()
+#            self.assertEqual(self.fs._sorted_strs,
+#                             ["BAR", "BAZ", "BLAT", "FOO", "FROB"])
+            self.assertEqual(self.fs._sorted_values,
+                             [1, 2, 4, 32, 1<<17, 1<<18])
+
+
+        def test_add_simple_flag_dup_name(self):
+            """Test exception on duplicate flag name"""
+            self.fs.addSimpleFlag("FOO")
+            self.assertRaises(ValueError, self.fs.addSimpleFlag, "FOO")
+
+        def test_add_simple_flag_dup_value(self):
+            """Test exception on duplicate flag value"""
+            self.fs.addSimpleFlag("FOO")
+            self.fs.addSimpleFlag("BAR", bit=0)
+
+            self.VerifyFlag("FOO", 1)
+            self.assertEqual(1, self.fs.str_to_value["BAR"])
+
+        def test_add_shift_duplicated_name(self):
+            """Test that name and name_shift can't both be added"""
+            self.fs.addSimpleFlag("FOO_shift")
+            self.assertRaises(ValueError, self.fs.addSimpleFlag, "FOO")
+            self.assertRaises(ValueError,
+                              self.fs.addSimpleFlag, "FOO_shift_shift")
+
+        def test_attr_name_conflict(self):
+            """Test that adding a flag won't clobber an object attribute"""
+            self.assertRaises(ValueError,
+                              self.fs.addSimpleFlag, "addSimpleFlag")
+
+    class Test_Add_Simple_Flags(Test_FlagSet):
+        def test_add_simple_flags(self):
+            """Test that addSimpleFlags() can add several flags"""
+
+            self.fs.addSimpleFlags("FOO", "BAR", "BAZ")
+            self.VerifyFlag("FOO", 1)
+            self.VerifyFlag("BAR", 2)
+            self.VerifyFlag("BAZ", 4)
+
+    class Test_FlagSet_mapping(Test_FlagSet):
+        def setUp(self):
+            self.fs = FlagSet(mapping={"FOO": 9, "BAR": 1})
+
+        def test_constructor(self):
+            self.VerifyFlag("FOO", 1<<9)
+            self.VerifyFlag("BAR", 1<<1)
+
+        def test_addMap(self):
+            self.fs.addMap({"BAZ": 3, "ZING": 7})
+
+            self.VerifyFlag("FOO", 1<<9)
+            self.VerifyFlag("BAR", 1<<1)
+            self.VerifyFlag("BAZ", 1<<3)
+            self.VerifyFlag("ZING", 1<<7)
+
+    class Test_FlagSet_FBBZZ(Test_FlagSet):
+        """FlagSet with certain set of flags"""
+        def setUp(self):
+            self.fs = FlagSet()
+            self.fs.addSimpleFlags("FOO", "BAR", "BAZ")
+            self.fs.addSimpleFlag("ZING", bit=13)
+            self.fs.addSimpleFlag("ZOING", bit=42)
+
+        def Verify_F2SL(self, expectedstrlist, flags):
+            self.assertEqual(expectedstrlist, self.fs.flagsToStringList(flags))
+
+    class Test_FlagSet_FBBZZ_flagsToStringList(Test_FlagSet_FBBZZ):
+        def test_F(self):
+            self.Verify_F2SL(["FOO"], 1)
+        def test_B(self):
+            self.Verify_F2SL(["BAR"], 2)
+        def test_B2(self):
+            self.Verify_F2SL(["BAZ"], 4)
+        def test_Z(self):
+            self.Verify_F2SL(["ZING"], 1<<13)
+        def test_Z2(self):
+            self.Verify_F2SL(["ZOING"], 1<<42)
+
+        def test_FB(self):
+            self.Verify_F2SL(["FOO", "BAR"], 3)
+        def test_FBB(self):
+            self.Verify_F2SL(["FOO", "BAR", "BAZ"], 7)
+        def test_FB2(self):
+            self.Verify_F2SL(["BAR", "BAZ"], 6)
+
+        def test_FBBZZ(self):
+            self.Verify_F2SL(["FOO", "BAR", "BAZ", "ZING", "ZOING"],
+                             7|1<<13|1<<42)
+
+        def test_unknownflag(self):
+            self.Verify_F2SL(["0x10"], 0x10)
+        def test_unknownflags(self):
+            self.Verify_F2SL(["0x30"], 0x30)
+        def test_knownandunknownflags(self):
+            self.Verify_F2SL(["FOO", "0x30"], 0x31)
+
+
+    # Run all unit tests
+    unittest.main()
diff --git a/contrib/debug_tools/epython_scripts/crashlib/input/uflookup.py b/contrib/debug_tools/epython_scripts/crashlib/input/uflookup.py

new file mode 100644 (file)

index 0000000..8f9d4b6
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/crashlib/input/uflookup.py
@@ -0,0 +1,127 @@
+
+"""
+User Friendly Lookup routine
+Copyright 2015 Cray Inc.  All Rights Reserved
+"""
+
+# TBD: Maybe it would be more useful to replace prefixok with
+# substringok, for cases with lots of common prefix, like
+# CAP_SYS_PTRACE and CAP_SYS_TTY_CONFIG
+#
+# Wait until there's a user for it.
+
+def UFLookup(d, key, casesensitive=False, prefixok=True):
+    """User Friendly Lookup
+
+    By default, case-insensitive, unique-prefix-accepting lookups on
+    dict d"""
+
+    def _casesensitive_prefixok(d, key):
+        """case sensitive, prefixes ok"""
+        matches = []
+        for s in d.keys():
+            if s == key:
+                return d[s]
+            if s.startswith(key):
+                matches.append(s)
+        if len(matches) == 1:
+            return d[matches[0]]
+        raise KeyError("{0} matches multiple keys: {1}".format(
+            key, ", ".join(matches)))
+
+    def _caseinsensitive_prefixok(d, key):
+        """case insensitive, prefixes ok"""
+        matches = []
+        lkey = key.lower()
+        for s in d.keys():
+            if s.lower() == lkey:
+                return d[s]
+            if s.lower().startswith(lkey):
+                matches.append(s)
+        if len(matches) == 0:
+            raise KeyError("No match for {0}".format(key))
+        if len(matches) == 1:
+            return d[matches[0]]
+        raise KeyError("{0} matches multiple keys: {1}".format(
+            key, ", ".join(matches)))
+
+    def _caseinsensitive_noprefix(d, key):
+        """case insensitive, prefixes not ok"""
+        lkey = key.lower()
+        for s in d.keys():
+            if s.lower() == lkey:
+                return d[s]
+        raise KeyError("No match for {0}".format(key))
+
+    def _casesensitive_noprefix(d, key):
+        """case sensitive, prefixes not ok"""
+        return d[key]
+
+    if casesensitive and not prefixok:
+        return _casesensitive_noprefix(d, key)
+    if casesensitive:
+        return _casesensitive_prefixok(d, key)
+    if prefixok:
+        return _caseinsensitive_prefixok(d, key)
+    return _caseinsensitive_noprefix(d, key)
+
+
+
+if __name__ == '__main__':
+    import unittest
+
+    class Test_UFLookup_FBBZZ(unittest.TestCase):
+        def setUp(self):
+            self.d = { "FOO": 1,
+                       "BAR": 2,
+                       "baz": 3,
+                       "zing": 4,
+                       "zinGlinG": 5 }
+
+        def checkall(self, expectedlist, key):
+            """Test UFLookup(self.d, key) for all four flags combinations.
+
+            expectedlist[] contains the four expected results,
+            [0]: casesensitive = False, prefixok = False
+            [1]: casesensitive = False, prefixok = True
+            [2]: casesensitive = True,  prefixok = False
+            [3]: casesensitive = True,  prefixok = True
+
+            If expectedlist[i] is None, then UFLookup should raise
+            KeyError for that case.  Otherwise, it's the value that
+            should be returned."""
+
+            kdicts = [{"casesensitive": False, "prefixok": False},
+                      {"casesensitive": False, "prefixok": True},
+                      {"casesensitive": True,  "prefixok": False},
+                      {"casesensitive": True,  "prefixok": True}]
+            for i in xrange(len(expectedlist)):
+                e = expectedlist[i]
+                if e is None:
+                    self.assertRaises(KeyError,
+                                      UFLookup, self.d, key, **kdicts[i])
+                else:
+                    self.assertEqual(e, UFLookup(self.d, key, **kdicts[i]))
+
+        def test_FOO(self):
+            self.checkall([1, 1, 1, 1], "FOO")
+        def test_foo(self):
+            self.checkall([1, 1, None, None], "foo")
+        def test_F(self):
+            self.checkall([None, 1, None, 1], "F")
+        def test_f(self):
+            self.checkall([None, 1, None, None], "f")
+
+
+        def test_ambig_prefix_zin(self):
+            self.checkall([None, None, None, None], "zin")
+        def test_semiambig_prefix_ba(self):
+            self.checkall([None, None, None, 3], "ba")
+        def test_prefix_exactmatch_zing(self):
+            self.checkall([4, 4, 4, 4], "zing")
+        def test_prefix_semiexact_zinG(self):
+            self.checkall([4, 4, None, 5], "zinG")
+
+
+    # Run all unit tests
+    unittest.main()
diff --git a/contrib/debug_tools/epython_scripts/crashlib/page.py b/contrib/debug_tools/epython_scripts/crashlib/page.py

new file mode 100644 (file)

index 0000000..ef87d05
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/crashlib/page.py
@@ -0,0 +1,127 @@
+"""
+Constants and routines for manipulating kernel page struct.
+Copyright 2014-2017 Cray Inc.  All Rights Reserved
+"""
+
+from pykdump.API import *
+
+import crashlib.cid
+import crashlib.cid.machdep_table
+import crashlib.cid.page_flags
+import crashlib.cid.phys_mem_map
+import crashlib.memarray
+
+# --------------------------------------------------------------------------
+
+page_struct_size = getSizeOf('struct page')
+
+# --------------------------------------------------------------------------
+
+# Create a function for determining whether a page is controlled by the
+# buddy allocator.  Note that earlier kernels (< 3.0) have a page flag, while
+# later kernels use the _mapcount field.
+
+if hasattr(crashlib.cid.pgflags, 'PG_buddy'):
+    def is_buddy_page(page):
+        return page.flags & crashlib.cid.pgflags.PG_buddy.mask;
+else:
+    def is_buddy_page(page):
+        # Early implementations used -2, later use -128
+        return page._mapcount.counter == -128 or page._mapcount.counter == -2
+
+if hasattr(crashlib.cid.pgflags, 'PG_compound'):
+    def is_compound_page_head(page):
+        return (page.flags & (crashlib.cid.pgflags.PG_reclaim.mask |
+                              crashlib.cid.pgflags.PG_compound.mask)
+               ) == crashlib.cid.pgflags.PG_compound
+
+    def is_compound_page_tail(page):
+        return (page.flags & (crashlib.cid.pgflags.PG_reclaim.mask |
+                              crashlib.cid.pgflags.PG_compound.mask)
+               ) == (crashlib.cid.pgflags.PG_reclaim.mask |
+                     crashlib.cid.pgflags.PG_compound.mask)
+
+    def is_compound_page(page):
+        return page.flags & crashlib.cid.pgflags.PG_compound.mask
+
+elif hasattr(crashlib.cid.pgflags, 'PG_tail'):
+    # PG_head and PG_tail defined
+    def is_compound_page_head(page):
+        return page.flags & (crashlib.cid.pgflags.PG_head.mask)
+
+    def is_compound_page_tail(page):
+        return page.flags & (crashlib.cid.pgflags.PG_tail.mask)
+
+    def is_compound_page(page):
+        return is_compound_page_head(page) or is_compound_page_tail(page)
+
+else:
+    # Only PG_head is defined
+    def is_compound_page_head(page):
+        return page.flags & (crashlib.cid.pgflags.PG_head.mask)
+
+    def is_compound_page_tail(page):
+        return page.compound_head & 1
+
+    def is_compound_page(page):
+        return is_compound_page_head(page) or is_compound_page_tail(page)
+
+# --------------------------------------------------------------------------
+
+# Find the page order of a buddy page
+
+def buddy_order(page):
+    """Retrieve the order of a page in the buddy allocator"""
+    return page.private
+
+# --------------------------------------------------------------------------
+
+# Create a function to determine the page order of a compound page
+
+if member_offset('struct page', 'compound_order') > -1:
+    def compound_order(page):
+        """Retrieve the page order for a compound page."""
+        # A compound page is a series of contiguous pages, thus there are
+        # at least two page structs.  The second page struct (first tail page)
+        # contains the page order; the head page uses the space for a
+        # different purpose.
+        return page[1].compound_order
+
+else:
+    def compound_order(page):
+        """Retrieve the page order for a compound page."""
+        # A compound page is a series of contiguous pages, thus there are
+        # at least two page structs.  The second page struct (first tail page)
+        # contains the page order stored in the lru.prev field; the head page
+        # uses the space for a different purpose.
+        return page[1].lru.prev
+
+# --------------------------------------------------------------------------
+
+def pfn(page):
+    """Returns the pfn for the supplied page struct or page struct address."""
+    vmemmap_vaddr = crashlib.cid.mdtbl.vmemmap_vaddr
+    return (page - vmemmap_vaddr) / page_struct_size
+
+# --------------------------------------------------------------------------
+
+def page_list():
+    """Return a list-like class of page structs indexed by pfn.
+
+    This implementation assumes the kernel is configured with a virtually
+    contiguous mem_map.
+    """
+    # If the kernel doesn't have a virtually contiguous mem_map, this could
+    # be changed to return a chained list of MemCArray objects.
+
+    PAGE_SHIFT = crashlib.cid.mdtbl.pageshift
+    pfn_start  = crashlib.cid.physmap[0].start >> PAGE_SHIFT
+    pfn_end    = crashlib.cid.physmap[-1].end >> PAGE_SHIFT
+
+    # Find page map and create an array of page_struct
+    vmemmap_addr = crashlib.cid.mdtbl.vmemmap_vaddr
+
+    return crashlib.memarray.MemCArray(vmemmap_addr,
+                                        lambda a:readSU('struct page',a),
+                                        getSizeOf('struct page'),
+                                        pfn_end-pfn_start)
diff --git a/contrib/debug_tools/epython_scripts/crashlib/time.py b/contrib/debug_tools/epython_scripts/crashlib/time.py

new file mode 100644 (file)

index 0000000..e276591
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/crashlib/time.py
@@ -0,0 +1,42 @@
+"""
+Routines for retrieving and manipulating kernel time
+Copyright 2017 Cray Inc.  All Rights Reserved
+"""
+
+from pykdump.API import readSymbol, symbol_exists
+from crashlib.exceptions import *
+
+# --------------------------------------------------------------------------
+# get_wallclock_seconds()
+#
+# There are multiple variants, depending on kernel version.  Attempt to
+# discern the proper method for retrieving the current wall clock time.
+#
+
+if symbol_exists('xtime'):
+    # SLES 11 uses struct timespec xtime to hold the wall time.
+    _wallclock_xtime = readSymbol('xtime')
+    def get_wallclock_seconds():
+        '''Return current time in seconds'''
+        return _wallclock_xtime.tv_sec
+
+elif symbol_exists('timekeeper'):
+    # SLES 12 has a new timekeeper struct for that purpose
+    _wallclock_timekeeper = readSymbol('timekeeper')
+    def get_wallclock_seconds():
+        '''Return current time in seconds'''
+        return _wallclock_timekeeper.xtime_sec
+
+elif symbol_exists('tk_core'):
+    # SLES 12 SP2 embeds the timekeeper struct in tk_core
+    _wallclock_tk_core = readSymbol('tk_core')
+    def get_wallclock_seconds():
+        '''Return current time in seconds'''
+        return _wallclock_tk_core.timekeeper.xtime_sec
+
+else:
+    # Unknown how to read wallclock time in this kernel
+    def get_wallclock_seconds():
+        raise CompatibilityError('Could not find wallclock time in the kernel')
+
+# --------------------------------------------------------------------------
diff --git a/contrib/debug_tools/epython_scripts/crashlib/util.py b/contrib/debug_tools/epython_scripts/crashlib/util.py

new file mode 100644 (file)

index 0000000..aa6a2e1
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/crashlib/util.py
@@ -0,0 +1,210 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2017-2018 Cray Inc. All Rights Reserved.
+
+from collections import namedtuple
+from math import ceil
+
+from crash import addr2sym, PAGESIZE
+from pykdump.API import (Addr, exec_crash_command, getSizeOf, member_offset,
+                         readmem, readU8, readULong, sys_info)
+
+from crashlib.cl import (cl_err, cl_warn, cl_info, cl_trace)
+
+BYTES_1K = 1024
+BYTES_1M = BYTES_1K * 1024
+BYTES_1G = BYTES_1M * 1024
+BYTES_1T = BYTES_1G * 1024
+
+def bytes2size(bytes):
+    '''Return a string representation of bytes, including order,
+    ie '15.0M' or '2.1G'.'''
+    suffix = ""
+    if bytes >= BYTES_1T:
+        suffix = "T"
+        size = BYTES_1T
+    elif bytes >= BYTES_1G:
+        suffix = "G"
+        size = BYTES_1G
+    elif bytes >= BYTES_1M:
+        suffix = "M"
+        size = BYTES_1M
+    elif bytes >= BYTES_1K:
+        suffix = "K"
+        size = BYTES_1K
+    else:
+        size = 1
+    full = bytes / size
+    rem = ((bytes % size) * 10) / size
+    return "%d.%d%s" % (full, rem, suffix)
+
+def pages2size(npages):
+    '''Return a string representation of the number of bytes contained
+    in npages.'''
+    return bytes2size(npages * PAGESIZE)
+
+def page_to_virt(page):
+    # run kmem -p to get the pys addr for the page
+    cmd = "kmem -p %#x" % page
+    kmemp = exec_crash_command(cmd)
+    paddr = kmemp.splitlines()[1].split()[1]
+    cl_trace("*>>> page_to_virt #### phys_addr = %s" % paddr)
+    # find vaddr from crash ptov command
+    res = exec_crash_command("ptov " + paddr)
+    vaddr = res.splitlines()[1].split()[0]
+    cl_trace("*>>> page_to_virt #### vaddr = %s" % vaddr)
+    return long(vaddr, 16)
+
+def get_config(name):
+    cl_trace(">>> get_config: searching system config for %s" % name)
+    res = exec_crash_command("sys config")
+    for line in res.splitlines():
+        if not "=" in line:
+            continue
+        (key, value) = line.split("=", 1)
+        if key == name:
+            cl_trace(">>> get_config: %s has a value of '%s'" % (name, value))
+            return value
+    raise ValueError("Name %s not found in system config" % name)
+
+def atoi(arg):
+    # See if the user specified the format.
+    try:
+        val = int(arg, 0)
+    except:
+        # Nope, be generous and try again as hex.
+        try:
+            val = int(arg, 16)
+        except:
+            # No luck. Return an error.
+            print("Invalid number: %s" % arg)
+            val = None
+    return val
+
+def is_kernel_address(addr):
+    # The top 17 bits should all be ones.
+    val = (1 << 17) - 1
+    if (addr >> 47) != val:
+        return False
+    return True
+
+def is_kernel_text_address(addr):
+    # The top 33 bits should all be ones.
+    val = (1 << 33) - 1
+    if (addr >> 31) != val:
+        return False
+    return True
+
+def is_valid_address(addr):
+    if addr < 0x10000:
+        return False
+    if addr & 7:
+        return False
+    return True
+
+def readString(addr):
+    res = readmem(addr, 64)
+    return res.split('\0')[0]
+
+def symbol_name(addr):
+    if not is_kernel_text_address(addr):
+        return ""
+    (name, offset) = addr2sym(addr, True)
+    if name == None:
+        return ""
+    if offset != 0:
+        name += "+" + hex(offset)
+    return name
+
+def read_bool(addr):
+    '''pykdump can't read bools on its own.'''
+    return bool(readU8(addr))
+
+def read_bool_member(struct, member_name):
+    '''struct must be a pykdump object, member name is the string name
+    of the bool member in the struct.'''
+    struct_type = struct.PYT_symbol
+    return read_bool(Addr(struct) + member_offset(struct_type, member_name))
+
+def read_bitmap(addr, num_bits):
+    '''Return an integer representation of the 'num_bits' sized bitmap
+    at 'addr'. Note Python has arbitrary precision ints so the return
+    value may be very large.'''
+    bits_per_long = 8 * getSizeOf('long')
+    num_longs = int(ceil(float(num_bits) / bits_per_long))
+    total = 0
+    for i in range(num_longs):
+        total |= (readULong(addr + i * getSizeOf('long'))
+                 << ((num_longs - i - 1) * bits_per_long))
+    # Mask off unused bits when num_bits not a multiple of bits/long.
+    mask = 2 ** num_bits - 1
+    return total & mask
+
+def read_cpumask(cpumask_addr):
+    '''Return an integer representation of the cpumask bitmap.'''
+    return read_bitmap(cpumask_addr, sys_info.CPUS)
+
+def read_cpumask_var_t(container_struct, member_name):
+    '''Return an integer representation of the cpumask_var_t bitmap.
+    'container_struct' is the struct object which has a cpumask_var_t
+    as a member. 'member_name' is the name of the cpumask_var_t field
+    within the container struct.
+
+    Pykdump crashes when trying to read a cpumask_var_t. This function
+    provides a workaround which does not read a cpumask_var_t directly.'''
+    container_type = container_struct.PYT_symbol
+    offset = member_offset(container_type, member_name)
+    cpumask_addr = Addr(container_struct) + offset
+    return read_cpumask(cpumask_addr)
+
+
+# Bit offsets and masks for read_qspinlock
+# Copied from linux/include/asm-generic/qspinlock_types.h.
+#
+# Bitfields in the atomic value:
+#
+# When NR_CPUS < 16K
+# 0- 7: locked byte
+#    8: pending
+# 9-15: not used
+# 16-17: tail index
+# 18-31: tail cpu (+1)
+#
+# When NR_CPUS >= 16K
+# 0- 7: locked byte
+#    8: pending
+# 9-10: tail index
+# 11-31: tail cpu (+1)'''
+
+if sys_info.CPUS < 2 ** 14:
+    _q_pending_bits = 8
+else:
+    _q_pending_bits = 1
+_q_tail_index_offset = 9
+_q_tail_index_bits = 2
+_q_tail_index_mask = (2 ** _q_tail_index_bits - 1) << _q_tail_index_offset
+_q_tail_cpu_offset = _q_tail_index_offset + _q_tail_index_bits
+_q_tail_cpu_bits = 32 - _q_tail_cpu_offset
+_q_tail_cpu_mask = (2 ** _q_tail_cpu_bits - 1) << _q_tail_cpu_offset
+
+qspinlock_tuple = namedtuple('qspinlock',
+                             ['locked', 'pending', 'tail_index', 'tail_cpu'])
+
+def read_qspinlock(qspinlock):
+    '''Given a struct qspinlock, which consists of a single 32 bit atomic
+    value, return a namedtuple of ints (locked, pending, tail_index, tail_cpu),
+    representing the bit fields of the qspinlock.'''
+
+    val = qspinlock.val.counter
+    locked_byte = val & 0xff
+    pending = (val & 0x100) >> 8
+
+    tail_index = (val & _q_tail_index_mask) >> _q_tail_index_offset
+
+    _q_tail_cpu_offset = _q_tail_index_offset + _q_tail_index_bits
+    _q_tail_cpu_bits = 32 - _q_tail_cpu_offset
+    _q_tail_cpu_mask = (2 ** _q_tail_cpu_bits - 1) << _q_tail_cpu_offset
+    tail_cpu = ((val & _q_tail_cpu_mask) >> _q_tail_cpu_offset) - 1
+
+    return qspinlock_tuple(locked=locked_byte, pending=pending,
+                           tail_index=tail_index, tail_cpu=tail_cpu)
diff --git a/contrib/debug_tools/epython_scripts/debug_flags.py b/contrib/debug_tools/epython_scripts/debug_flags.py

new file mode 100644 (file)

index 0000000..ddfb376
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/debug_flags.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+
+"""
+Utility to print Lustre libcfs_debug flags
+Copyright (c) 2019 Cray Inc. All Rights Reserved.
+"""
+
+from pykdump.API import *
+from crashlib.input import toint
+import argparse
+
+description_short = "Prints Lustre libcfs_debug flags as strings"
+
+debug_flags_tbl = {
+    0x00000001: 'trace',      #define D_TRACE
+    0x00000002: 'inode',      #define D_INODE
+    0x00000004: 'super',      #define D_SUPER
+    0x00000008: 'ext2',       #define D_EXT2
+    0x00000010: 'malloc',     #define D_MALLOC
+    0x00000020: 'cache',      #define D_CACHE
+    0x00000040: 'info',       #define D_INFO
+    0x00000080: 'ioctl',      #define D_IOCTL
+    0x00000100: 'neterror',   #define D_NETERROR
+    0x00000200: 'net',        #define D_NET
+    0x00000400: 'warning',    #define D_WARNING
+    0x00000800: 'buffs',      #define D_BUFFS
+    0x00001000: 'other',      #define D_OTHER
+    0x00002000: 'dentry',     #define D_DENTRY
+    0x00004000: 'nettrace',   #define D_NETTRACE
+    0x00008000: 'page',       #define D_PAGE
+    0x00010000: 'dlmtrace',   #define D_DLMTRACE
+    0x00020000: 'error',      #define D_ERROR
+    0x00040000: 'emerg',      #define D_EMERG
+    0x00080000: 'ha',         #define D_HA
+    0x00100000: 'rpctrace',   #define D_RPCTRACE
+    0x00200000: 'vfstrace',   #define D_VFSTRACE
+    0x00400000: 'reada',      #define D_READA
+    0x00800000: 'mmap',       #define D_MMAP
+    0x01000000: 'config',     #define D_CONFIG
+    0x02000000: 'console',    #define D_CONSOLE
+    0x04000000: 'quota',      #define D_QUOTA
+    0x08000000: 'sec',        #define D_SEC
+    0x10000000: 'lfsck',      #define D_LFSCK
+    0x20000000: 'hsm',        #define D_HSM
+    0x40000000: 'snapshot',   #define D_SNAPSHOT
+    0x80000000: 'layout'      #define D_LAYOUT
+}
+
+def print_flags(flag_tbl, mask):
+    flags = ""
+    tmp = mask
+    for key, value in flag_tbl.iteritems():
+            if key & mask:
+               flags = flags + value + " "
+               tmp &= ~key
+    print "mask: 0x%x = %s" % (mask, flags)
+    if tmp != 0:
+        print "unknown bits set in mask: 0x%x" % tmp
+
+def dump_debug_flags(bitmask):
+    print bitmask
+    if not bitmask:
+        bitmask = readSymbol('libcfs_debug')
+    print_flags(debug_flags_tbl, bitmask)
+
+if __name__ == "__main__":
+    description = "Prints libcfs_debug flags as strings"
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument("bitmask", nargs="?", type=toint, default=[],
+        help="debug bit mask to be translated; default is current libcfs_debug value")
+    args = parser.parse_args()
+    dump_debug_flags(args.bitmask)
diff --git a/contrib/debug_tools/epython_scripts/dk.py b/contrib/debug_tools/epython_scripts/dk.py

new file mode 100644 (file)

index 0000000..2d9c135
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/dk.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+"""
+Copyright 2015-2019 Cray Inc.  All Rights Reserved
+Utility to dump the Lustre dk logs.
+Based on dump_cfs_trace_data.py
+"""
+
+import sys
+import crash
+import argparse
+from time import localtime
+from operator import itemgetter
+from pykdump.API import getSizeOf, readSU, readmem, readSUListFromHead, readSymbol, sys_info
+from crashlib import page, addrlib
+import os
+
+description_short = 'Dump and sort the Lustre dk logs.'
+
+def do_shell_cmd(cmd):
+    return os.popen(cmd).read()
+
+# ---------------------------------------------------------------------------
+# pfn: 2582e8c, physaddr: 2582e8c000, vaddr: ffff002582e8c000
+def dump_dk_line(tmpfd, options, pfn, used):
+    """Dump the cfs debug messages in the dk format."""
+    physaddr = addrlib.pfn2phys(pfn)
+    vaddr = addrlib.ptov(physaddr)
+    hdr_size = getSizeOf("struct ptldebug_header")
+
+    while (used):
+        hdr = readSU('struct ptldebug_header', vaddr)
+        laddr = vaddr + hdr_size
+        try:
+            line = readmem(laddr, hdr.ph_len - hdr_size)
+       except:
+            print "Skipping pfn: %x, physaddr: %x, vaddr: %x, laddr: %x" % \
+                (pfn, physaddr, vaddr, laddr)
+            return
+
+        (filename,function,text) = line.split('\0')
+        text = text.rstrip()
+
+        used -= hdr.ph_len
+        vaddr += hdr.ph_len
+
+        type = hdr.ph_type
+        prefix = "%08x:%08x:%u.%u%s:%u.%u" % \
+            (hdr.ph_subsys, hdr.ph_mask, hdr.ph_cpu_id, hdr.ph_type,
+            "F" if (hdr.ph_flags & 1) else "", hdr.ph_sec, hdr.ph_usec)
+
+        buf = "%s:%06u:%u:%u:(%s:%d:%s()) %s" % \
+            (prefix, hdr.ph_stack, hdr.ph_pid, hdr.ph_extern_pid, filename,
+            hdr.ph_line_num, function, text)
+
+        tmpfd.write(buf + '\n')
+
+# ---------------------------------------------------------------------------
+def walk_pages(tmpfd, options, cfs_page_head, trace_page_struct):
+
+    cfs_pages = readSUListFromHead(cfs_page_head, 'linkage',
+                                   trace_page_struct,
+                                   maxel=100000, inchead=False)
+
+    for p in cfs_pages:
+        dump_dk_line(tmpfd, options, page.pfn(p.page), p.used)
+
+# ---------------------------------------------------------------------------
+def walk_array(options):
+    """Walk the cfs_trace_data array of array pointers."""
+
+    fname = do_shell_cmd('mktemp .dklogXXXX').rstrip()
+    tmpfd = file(fname, 'w')
+
+    try:
+        cfs_trace_data = readSymbol('cfs_trace_data')
+        trace_page_struct = 'struct cfs_trace_page'
+    except TypeError:
+        try:
+            cfs_trace_data = readSymbol('trace_data')
+            trace_page_struct = 'struct trace_page'
+        except:
+            print "Ensure you have loaded the Lustre modules"
+            return 1
+
+    for cfstd_array in cfs_trace_data:
+        if not cfstd_array: continue
+
+        for i in xrange(sys_info.CPUS):
+            u = cfstd_array[i]
+            walk_pages(tmpfd, options, u.tcd.tcd_pages, trace_page_struct)
+            walk_pages(tmpfd, options, u.tcd.tcd_daemon_pages, trace_page_struct)
+            walk_pages(tmpfd, options, u.tcd.tcd_stock_pages, trace_page_struct)
+
+    tmpfd.close()
+    print do_shell_cmd('sort -n -s -t: -k4,4 ' + fname)
+    print do_shell_cmd('rm ' + fname)
+
+# ---------------------------------------------------------------------------
+def dump_dk_log():
+    parser = argparse.ArgumentParser(
+        description= "Dump and sort the Lustre dk logs.",
+        epilog= "NOTE: the Lustre kernel modules must be loaded.")
+    args = parser.parse_args()
+    return walk_array(args)
+
+if __name__ == '__main__':
+    dump_dk_log()
diff --git a/contrib/debug_tools/epython_scripts/jiffies2date.py b/contrib/debug_tools/epython_scripts/jiffies2date.py

new file mode 100644 (file)

index 0000000..152c00f
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/jiffies2date.py
@@ -0,0 +1,37 @@
+#!usr/bin/env python
+from pykdump.API import *
+"""
+Copyright (c) 2019 Cray Inc. All Rights Reserved.
+Utility to print jiffies as date and time
+"""
+
+import argparse
+import time
+import crashlib.time
+
+description_short = "Print the date and time for a jiffies timestamp."
+
+# Get current time in jiffies and in seconds. Compute the offset of
+# the timestamp in jiffies from current time and convert to seconds.
+# Subtract the offset from current time in seconds and convert result
+# to a datetime string.
+def jiffies2date(jts):
+    scur = crashlib.time.get_wallclock_seconds()
+
+    jcur = readSymbol('jiffies')
+    if jts == 0:
+        jts = jcur
+    soffset = (jcur - int(jts)) / sys_info.HZ
+
+    stime = scur - soffset
+    date = time.asctime(time.localtime(stime))
+    print '%s (epoch: %d)' % (date, stime)
+
+if __name__ == "__main__":
+    description = "Print the date and time of a given jiffies timestamp. " + \
+                  "Also includes seconds since epoch."
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument("timestamp", nargs="?", default=0, type=int,
+        help="the timestamp in jiffies to be converted to date/time")
+    args = parser.parse_args()
+    jiffies2date(args.timestamp)
diff --git a/contrib/debug_tools/epython_scripts/ldlm_dumplocks.py b/contrib/debug_tools/epython_scripts/ldlm_dumplocks.py

new file mode 100644 (file)

index 0000000..2f695d2
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/ldlm_dumplocks.py
@@ -0,0 +1,157 @@
+#!/usr/bin/env python
+
+"""
+Copyright 2015-2019 Cray Inc.  All Rights Reserved
+Utility to list granted and waiting ldlm locks.
+"""
+
+from pykdump.API import *
+import argparse
+import os
+
+import lustrelib as ll
+from crashlib.input import toint
+
+from traceback import print_exc
+
+description = "Dumps lists of granted and waiting ldlm locks for each namespace."
+
+''' Lock Types '''
+enum_LDLM_PLAIN = 10
+enum_LDLM_EXTENT = 11
+enum_LDLM_FLOCK = 12
+enum_LDLM_IBITS = 13
+
+LOCKMODES = {
+    0:"--",
+    1:"EX",
+    2:"PW",
+    4:"PR",
+    8:"CW",
+    16:"CR",
+    32:"NL",
+    64:"GROUP"
+}
+
+def lockmode2str(mode):
+    return LOCKMODES.get(mode, "??")
+
+def ldlm_dump_lock(lock, pos, lstname):
+    obd = None
+    imp = None
+    if(lock == None):
+        print "   NULL LDLM lock"
+        return
+    print "   -- Lock: (ldlm_lock) %#x/%#x (rc: %d) (pos: %d/%s) (pid: %d)" % \
+          (Addr(lock), lock.l_handle.h_cookie, lock.l_refc.counter,
+          pos, lstname, lock.l_pid)
+    if(lock.l_conn_export):
+        obd = lock.l_conn_export.exp_obd
+    if(lock.l_export and lock.l_export.exp_connection):
+        print "       Node: NID %s (remote: %#x) export" % \
+              (ll.nid2str(lock.l_export.exp_connection.c_peer.nid),
+              lock.l_remote_handle.cookie)
+    elif(obd == None):
+        print "       Node: local"
+    else:
+        imp = obd.u.cli.cl_import
+        print "       Node: NID %s (remote: %#x) import " % \
+              (ll.nid2str(imp.imp_connection.c_peer.nid),
+              lock.l_remote_handle.cookie)
+
+    res = lock.l_resource
+    print "       Resource: %#x [0x%x:0x%x:0x%x].%x" % \
+          (Addr(res),
+          res.lr_name.name[0],
+          res.lr_name.name[1],
+          res.lr_name.name[2],
+          res.lr_name.name[3])
+
+    print "       Req mode: %s, grant mode: %s, rc: %d, read: %d, \
+          write: %d flags: %#x" % (lockmode2str(lock.l_req_mode),
+          lockmode2str(lock.l_granted_mode),
+          lock.l_refc.counter, lock.l_readers, lock.l_writers,
+          lock.l_flags)
+
+    lr_type = lock.l_resource.lr_type
+    if(lr_type == enum_LDLM_EXTENT):
+        print "       Extent: %d -> %d (req %d-%d)" % \
+              (lock.l_policy_data.l_extent.start,
+              lock.l_policy_data.l_extent.end,
+              lock.l_req_extent.start, lock.l_req_extent.end)
+    elif(lr_type == enum_LDLM_FLOCK):
+        print "       Pid: %d Flock: 0x%x -> 0x%x" % \
+              (lock.l_policy_data.l_flock.pid,
+              lock.l_policy_data.l_flock.start,
+              lock.l_policy_data.l_flock.end)
+    elif(lr_type == enum_LDLM_IBITS):
+        print "       Bits: %#x" % \
+              (lock.l_policy_data.l_inodebits.bits)
+
+def ldlm_dump_resource(res):
+    res_lr_granted = readSU('struct list_head', Addr(res.lr_granted))
+    res_lr_waiting = readSU('struct list_head', Addr(res.lr_waiting))
+    print "-- Resource: (ldlm_resource) %#x [0x%x:0x%x:0x%x].%x (rc: %d)" % \
+          (Addr(res), res.lr_name.name[0], res.lr_name.name[1],
+           res.lr_name.name[2], res.lr_name.name[3], res.lr_refcount.counter)
+    if not ll.list_empty(res_lr_granted):
+        pos = 0
+        print "   Granted locks: "
+        tmp = res_lr_granted.next
+        while(tmp != res_lr_granted):
+            pos += 1
+            lock = readSU('struct ldlm_lock',
+                          Addr(tmp)-member_offset('struct ldlm_lock', 'l_res_link'))
+            ldlm_dump_lock(lock, pos, "grnt")
+            tmp = tmp.next
+    if not ll.list_empty(res_lr_waiting):
+        pos = 0
+        print "   Waiting locks: "
+        tmp = res_lr_waiting.next
+        while(tmp != res_lr_waiting):
+            pos += 1
+            lock = readSU('struct ldlm_lock',
+                          Addr(tmp)-member_offset('struct ldlm_lock', 'l_res_link'))
+            ldlm_dump_lock(lock, pos, "wait")
+            tmp = tmp.next
+
+def print_namespace(ns, client_server):
+    print "Namespace: (ldlm_namespace) %#x, %s\t(rc: %d, side: %s)\tpoolcnt: %d unused: %d" % \
+          (Addr(ns), ll.obd2str(ns.ns_obd), ns.ns_bref.counter,
+          client_server, ns.ns_pool.pl_granted.counter, ns.ns_nr_unused)
+
+def ldlm_dump_ns_resources(ns):
+    if args.nflag:
+        return
+    for hnode in ll.cfs_hash_get_nodes(ns.ns_rs_hash):
+        offset = member_offset('struct ldlm_resource', 'lr_hash')
+        res = readSU('struct ldlm_resource', Addr(hnode) - offset)
+        ldlm_dump_resource(res)
+
+def ldlm_dump_all_namespaces(ns_name, client_server):
+    ns_list = readSymbol(ns_name)
+    for ns in readSUListFromHead(ns_list, 'ns_list_chain', 'struct ldlm_namespace'):
+        print_namespace(ns, client_server)
+        ldlm_dump_ns_resources(ns)
+
+def ldlm_dumplocks():
+    if args.ns_addr:
+        ns = readSU('struct ldlm_namespace', args.ns_addr)
+        print_namespace(ns, "")
+        ldlm_dump_ns_resources(ns)
+    else:
+        ldlm_dump_all_namespaces('ldlm_srv_namespace_list', "server")
+        ldlm_dump_all_namespaces('ldlm_cli_active_namespace_list', "client")
+        ldlm_dump_all_namespaces('ldlm_cli_inactive_namespace_list', "inactive")
+
+if __name__ == "__main__":
+    description = "Dumps lists of granted and waiting locks for each namespace. " + \
+                  "Requires Lustre .ko files to be loaded (see mod command)."
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument("-n", dest="nflag", action='store_true',
+        help="Print only namespace information")
+    parser.add_argument("ns_addr", nargs="?", default=[], type=toint,
+        help="Print only locks under namespace at given address")
+    args = parser.parse_args()
+
+    ldlm_dumplocks()
diff --git a/contrib/debug_tools/epython_scripts/ldlm_lockflags.py b/contrib/debug_tools/epython_scripts/ldlm_lockflags.py

new file mode 100644 (file)

index 0000000..5788e3e
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/ldlm_lockflags.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+
+"""
+Utility to print LDLM lock flags as strings
+Copyright (c) 2019 Cray Inc. All Rights Reserved.
+"""
+from pykdump.API import *
+from crashlib.input import toint
+import argparse
+
+description_short  = "Prints string identifiers for specified LDLM flags."
+LDLM_FL_ALL_FLAGS_MASK = 0x00FFFFFFC28F932F
+
+ldlm_flags_tbl = {
+    0x0000000000000001:  "LOCK_CHANGED",            # bit  0
+    0x0000000000000002:  "BLOCK_GRANTED",           # bit  1
+    0x0000000000000004:  "BLOCK_CONV",              # bit  2
+    0x0000000000000008:  "BLOCK_WAIT",              # bit  3
+    0x0000000000000010:  "SPECULATIVE",             # bit  4
+    0x0000000000000020:  "AST_SENT",                # bit  5
+    0x0000000000000100:  "REPLAY",                  # bit  8
+    0x0000000000000200:  "INTENT_ONLY",             # bit  9
+    0x0000000000001000:  "HAS_INTENT",              # bit 12
+    0x0000000000008000:  "FLOCK_DEADLOCK",          # bit 15
+    0x0000000000010000:  "DISCARD_DATA",            # bit 16
+    0x0000000000020000:  "NO_TIMEOUT",              # bit 17
+    0x0000000000040000:  "BLOCK_NOWAIT",            # bit 18
+    0x0000000000080000:  "TEST_LOCK",               # bit 19
+    0x0000000000100000:  "MATCH_LOCK",              # bit 20
+    0x0000000000800000:  "CANCEL_ON_BLOCK",         # bit 23
+    0x0000000001000000:  "COS_INCOMPAT",            # bit 24
+    0x0000000002000000:  "CONVERTING",              # bit 25
+    0x0000000010000000:  "LOCKAHEAD_OLD_RESERVED",  # bit 28
+    0x0000000020000000:  "NO_EXPANSION",            # bit 29
+    0x0000000040000000:  "DENY_ON_CONTENTION",      # bit 30
+    0x0000000080000000:  "AST_DISCARD_DATA",        # bit 31
+    0x0000000100000000:  "FAIL_LOC",                # bit 32
+    0x0000000400000000:  "CBPENDING",               # bit 34
+    0x0000000800000000:  "WAIT_NOREPROC",           # bit 35
+    0x0000001000000000:  "CANCEL",                  # bit 36
+    0x0000002000000000:  "LOCAL_ONLY",              # bit 37
+    0x0000004000000000:  "FAILED",                  # bit 38
+    0x0000008000000000:  "CANCELING",               # bit 39
+    0x0000010000000000:  "LOCAL",                   # bit 40
+    0x0000020000000000:  "LVB_READY",               # bit 41
+    0x0000040000000000:  "KMS_IGNORE",              # bit 42
+    0x0000080000000000:  "CP_REQD",                 # bit 43
+    0x0000100000000000:  "CLEANED",                 # bit 44
+    0x0000200000000000:  "ATOMIC_CB",               # bit 45
+    0x0000400000000000:  "BL_AST",                  # bit 46
+    0x0000800000000000:  "BL_DONE",                 # bit 47
+    0x0001000000000000:  "NO_LRU",                  # bit 48
+    0x0002000000000000:  "FAIL_NOTIFIED",           # bit 49
+    0x0004000000000000:  "DESTROYED",               # bit 50
+    0x0008000000000000:  "SERVER_LOCK",             # bit 51
+    0x0010000000000000:  "RES_LOCKED",              # bit 52
+    0x0020000000000000:  "WAITED",                  # bit 53
+    0x0040000000000000:  "NS_SRV",                  # bit 54
+    0x0080000000000000:  "EXCL",                    # bit 55
+    0x0100000000000000:  "RESENT",                  # bit 56
+    0x0200000000000000:  "COS_ENABLED",             # bit 57
+    0x0400000000000000:  "NDELAY"                   # bit 58
+}
+
+def print_flags(flag_dict, mask):
+
+    flags = ""
+    tmp = mask
+    for key, value in flag_dict.iteritems():
+            if key & mask:
+                flags = flags + value + " "
+                tmp &= ~key
+    print "mask: 0x%x = %s" % (mask, flags)
+    if tmp != 0:
+        print "unknown bits set in mask: 0x%x" % tmp
+
+if __name__ == "__main__":
+    description = "Prints string identifiers for specified LDLM flags."
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument("bitmask", type=toint,
+        help="LDLM flag bit mask to be translated")
+    args = parser.parse_args()
+    print_flags(ldlm_flags_tbl, args.bitmask)
diff --git a/contrib/debug_tools/epython_scripts/lu_object.py b/contrib/debug_tools/epython_scripts/lu_object.py

new file mode 100644 (file)

index 0000000..12a669b
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/lu_object.py
@@ -0,0 +1,211 @@
+#!/usr/bin/env python
+
+"""
+Copyright (c) 2019 Cray Inc. All Rights Reserved.
+Utility to display contents of a Lustre lu_object
+"""
+
+from pykdump.API import *
+from struct import *
+import argparse
+import os
+
+import lustrelib as ll
+from crashlib.input import toint
+
+description_short = "Prints contents of an lu_object"
+
+LOHA_EXISTS = 1 << 0
+
+LOV_MAGIC = 0x0BD10BD0
+LOV_MAGIC_V3 = 0x0BD30BD0
+
+DEPTH = 3
+RULER = "........................................"
+
+FID_SEQ_OST_MDT0 = 0
+FID_SEQ_LOV_DEFAULT = 0xffffffffffffffff
+FID_SEQ_IDIF = 0x100000000
+FID_SEQ_IDIF_MAX = 0x1ffffffff
+IDIF_OID_MAX_BITS = 48
+IDIF_OID_MASK = ((1 << IDIF_OID_MAX_BITS) -1)
+
+def lov_print_empty(obj, depth=0, ruler=RULER):
+    print "empty %d" % obj.lo_layout_invalid
+
+def lov_print_raid0(obj, depth=0, ruler=RULER):
+    r0 = None
+    lsm = obj.lo_lsm
+    try:
+        magic = lsm.lsm_magic
+        stripes = lsm.lsm_stripe_count
+        layout_gen = lsm.lsm_layout_gen
+       pattern = lsm.lsm_pattern
+    except Exception, e:
+        magic = lsm.lsm_wire.lw_magic
+        stripes = lsm.lsm_wire.lw_stripe_count
+        layout_gen = lsm.lsm_wire.lw_layout_gen
+       pattern = lsm.lsm_wire.lw_pattern
+    if magic==LOV_MAGIC or magic==LOV_MAGIC_V3:
+        r0 = obj.u.raid0
+    lli = readU32(Addr(obj) + member_offset('struct lov_object', 'lo_layout_invalid'))
+    invalid = "invalid" if lli else "valid"
+    if r0 and r0.lo_nr:
+        print "%*.*sstripes: %d, %s, lsm[0x%x 0x%X %d %d %d %d]:" % \
+             (depth, depth, ruler,
+             r0.lo_nr, invalid, Addr(lsm), magic,
+             lsm.lsm_refc.counter, stripes, layout_gen, pattern)
+        for i in range(r0.lo_nr):
+            los = r0.lo_sub[i]
+            if los:
+                sub = los.lso_cl.co_lu
+                lovsub_object_print(sub, depth+DEPTH, ruler)
+            else:
+                print "sub %d absent" % i
+
+def lov_print_released(obj, depth=0, ruler=RULER):
+    lsm = obj.lo_lsm
+    magic = lsm.lsm_magic
+    entries = lsm.lsm_entry_count
+    layout_gen = lsm.lsm_layout_gen
+    lli = readU32(Addr(obj) + member_offset('struct lov_object', 'lo_layout_invalid'))
+    invalid = "invalid" if lli else "valid"
+    if magic==LOV_MAGIC or magic==LOV_MAGIC_V3:
+        print "%*.*sreleased: %s, lov_stripe_md: 0x%x [0x%X %d %u %u]:" % \
+             (depth, depth, ruler,
+             invalid, Addr(lsm), magic, lsm.lsm_refc.counter,
+             entries, layout_gen)
+
+LOV_PRINT_TYPE = {
+                 0:lov_print_empty,
+                 1:lov_print_raid0,
+                 2:lov_print_released}
+
+def vvp_object_print(o, depth=0, ruler=RULER):
+    obj = readSU('struct vvp_object', Addr(o) - member_offset('struct vvp_object', 'vob_cl.co_lu'))
+    print "%*.*s(trans:%s mmap:%d) inode: 0x%x " % \
+         (depth, depth, ruler,
+         obj.vob_transient_pages.counter,
+         obj.vob_mmap_cnt.counter,
+         Addr(obj.vob_inode))
+
+def lod_object_print(o, depth=0, ruler=RULER):
+    obj = readSU('struct lod_object', Addr(o) - member_offset('struct lod_object', 'ldo_obj.do_lu'))
+    print "%*.*slod_object@0x%x" % (depth, depth, ruler, Addr(obj))
+
+def lov_object_print(o, depth=0, ruler=RULER):
+    obj = readSU('struct lov_object', Addr(o) - member_offset('struct lov_object', 'lo_cl.co_lu'))
+    type = obj.lo_type
+    LOV_PRINT_TYPE[type](obj, depth, ruler)
+
+def lovsub_object_print(o, depth=0, ruler=RULER):
+    obj = readSU('struct lovsub_object', Addr(o) - member_offset('struct lovsub_object', 'lso_cl.co_lu'))
+    print "%*.*slso_index: %d" % (depth, depth, ruler, obj.lso_index)
+
+def mdd_object_print(o, depth=0, ruler=RULER):
+    obj = readSU('struct mdd_object', Addr(o) - member_offset('struct mdd_object', 'mod_obj.mo_lu'))
+    print "%*.*smdd_object@0x%x(open_count=%d, valid=%x, cltime=%u, flags=%x)" % \
+         (depth, depth, ruler, Addr(obj), obj.mod_count, obj.mod_valid,
+         obj.mod_cltime, obj.mod_flags)
+
+def mdt_object_print(o, depth=0, ruler=RULER):
+    obj = readSU('struct mdt_object', Addr(o) - member_offset('struct mdt_object', 'mot_obj'))
+    print "%*.*smdt_object@0x%x(ioepoch=%u, flags=%x, epochcount=%d, writecount-%d" % \
+         (depth, depth, ruler, Addr(obj), obj.mot_ioepoch, obj.mot_flags,
+         obj.mot_ioepoch_count, obj.mot_writecount)
+
+def mgs_object_print(o, depth=0, ruler=RULER):
+    obj = readSU('struct mgs_object', Addr(o) - member_offset('struct mgs_object', 'mgo_obj.do_lu'))
+    print "%*.*smgs_object@0x%x" % (depth, depth, ruler, Addr(obj))
+
+def echo_object_print(o, depth=0, ruler=RULER):
+    clo = readSU('struct cl_object', Addr(o) - member_offset('struct cl_object', 'co_lu'))
+    obj = readSU('struct echo_object', Addr(clo) - member_offset('struct echo_object', 'eo_cl'))
+    print "%*.*sechocl_object@0x%x" % (depth, depth, ruler, Addr(obj))
+
+def ofd_object_print(o, depth=0, ruler=RULER):
+    print "%*.*sofd_object@0x%x" % (depth, depth, ruler, Addr(o))
+
+def osc_object_print(o, depth=0, ruler=RULER):
+    obj = readSU('struct osc_object', Addr(o) - member_offset('struct osc_object', 'oo_cl.co_lu'))
+    oinfo = obj.oo_oinfo
+    ar = oinfo.loi_ar
+    ostid = oinfo.loi_oi
+    ostid_seq = 0
+    ostid_id = 0
+    if ostid.oi.oi_seq == FID_SEQ_OST_MDT0:
+        ostid_seq = FID_SEQ_OST_MDT0
+        ostid_id = ostid.oi.oi_id & IDIF_OID_MASK
+    elif ostid.oi.oi_seq == FID_SEQ_LOV_DEFAULT:
+        ostid_seq = FID_SEQ_LOV_DEFAULT
+        ostid_id = ostid.oi.oi_id
+    elif ostid.oi_fid.f_seq >= FID_SEQ_IDIF and \
+        ostid.oi_fid.f_seq <= FID_SEQ_IDIF_MAX:
+        ostid_seq = FID_SEQ_OST_MDT0
+        ostid_id = ((0 << 48) | (ostid.oi_fid.f_seq & 0xffff << 32) | (ostid.oi_fid.f_oid))
+    else:
+        ostid_seq = ostid.oi_fid.f_seq
+        ostid_id = ostid.oi_fid.f_oid
+    print "%*.*sid: 0x%x:%u idx: %d gen: %d kms_valid: %u kms: %u rc: %d force_sync: %d min_xid: %u" % \
+         (depth, depth, ruler, ostid_seq, ostid_id,
+         oinfo.loi_ost_idx, oinfo.loi_ost_gen, oinfo.loi_kms_valid,
+         oinfo.loi_kms, ar.ar_rc, ar.ar_force_sync, ar.ar_min_xid)
+
+def osd_object_print(o, depth=0, ruler=RULER):
+    obj = readSU('struct osd_object', Addr(o) - member_offset('struct osd_object', 'oo_dt.do_lu'))
+    print "%*.*sosd_object@0x%x" % (depth, depth, ruler, Addr(obj))
+
+def osp_object_print(o, depth=0, ruler=RULER):
+    obj = readSU('struct osp_object', Addr(o) - member_offset('struct osp_object', 'opo_obj.do_lu'))
+    print "%*.*sosp_object@0x%x" % (depth, depth, ruler, Addr(o))
+
+OBJ_PRINT = {
+            "vvp":vvp_object_print,
+            "lod":lod_object_print,
+            "lov":lov_object_print,
+            "lovsub":lovsub_object_print,
+            "mdd":mdd_object_print,
+            "mdt":mdt_object_print,
+            "mgs":mgs_object_print,
+            "echo":echo_object_print,
+            "ofd":ofd_object_print,
+            "osc":osc_object_print,
+            "osd":osd_object_print,
+            "osp":osp_object_print}
+
+def print_object_from_name(name, obj, depth=0, ruler=RULER):
+    if OBJ_PRINT[name]:
+        OBJ_PRINT[name](obj, depth, ruler)
+
+def print_object(pos, depth=0, ruler=RULER):
+    print "%*.*s%s@0x%x" % (depth, depth, ruler, pos.lo_dev.ld_type.ldt_name, Addr(pos))
+    if (pos.lo_ops.loo_object_print):
+        print_object_from_name(pos.lo_dev.ld_type.ldt_name, pos, depth+DEPTH, ruler)
+
+def print_object_from_header(loh, depth=0, ruler=RULER):
+    head = loh.loh_layers
+    empty = "" if (loh.loh_lru.next == loh.loh_lru) else " lru"
+    exists = " exist" if loh.loh_attr & LOHA_EXISTS else ""
+    print "%*.*slu_object_header@0x%x[fl:0x%x, rc:%d, [0x%x:0x%x:0x%x]%s%s] {" % \
+         (depth, depth, ruler,
+         Addr(loh),
+         loh.loh_flags,
+         loh.loh_ref.counter,
+         loh.loh_fid.f_seq,
+         loh.loh_fid.f_oid,
+         loh.loh_fid.f_ver,
+         empty,
+         exists)
+    for obj in readSUListFromHead(head, 'lo_linkage', 'struct lu_object'):
+        print_object(obj, depth+DEPTH, ruler)
+    print "%*.*s} header@0x%x\n" % (depth, depth, ruler, Addr(loh))
+
+if __name__ == "__main__":
+    description = "Prints contents of an lu_object"
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument("lu_object_header", default=False, type=toint,
+        help="address of an lu_object_header")
+
+    args = parser.parse_args()
+    loh = readSU('struct lu_object_header', args.lu_object_header)
+    print_object_from_header(loh)
diff --git a/contrib/debug_tools/epython_scripts/lustrelib.py b/contrib/debug_tools/epython_scripts/lustrelib.py

new file mode 100644 (file)

index 0000000..879282f
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/lustrelib.py
@@ -0,0 +1,244 @@
+#!/usr/bin/env python
+from pykdump.API import *
+
+"""
+Copyright (c) 2015-2019 Cray Inc. All Rights Reserved.
+Library of helper functions for Lustre scripts
+"""
+# hide this file from the output of 'epython scripts'.
+interactive = False
+
+"""Lustre Hash Table Utilities"""
+
+CFS_HASH_ADD_TAIL = 1 << 4
+CFS_HASH_DEPTH = 1 << 12
+CFS_HASH_TYPE_MASK = CFS_HASH_ADD_TAIL | CFS_HASH_DEPTH
+
+HH = 0
+HD = CFS_HASH_DEPTH
+DH = CFS_HASH_ADD_TAIL
+DD = CFS_HASH_DEPTH | CFS_HASH_ADD_TAIL
+
+def hs_get_type(hsh):
+    return hsh.hs_flags & CFS_HASH_TYPE_MASK
+
+def enum(**enums):
+    return type('Enum', (), enums)
+
+HS_INFO_FLDS = enum(dtfld=0, hdfld=1,)
+
+# The type to struct changes and jobid_hash addition were released
+# in the same version, so use existence of jobid_hash as a substitute
+# for cfs_hash type changes.
+if symbol_exists('jobid_hash'):    # 2.11 and later
+    HS_INFO = {
+        HH: ['struct cfs_hash_head', 'hh_head'],
+        HD: ['struct cfs_hash_head_dep', 'hd_head'],
+        DH: ['struct cfs_hash_dhead', 'dh_head'],
+        DD: ['struct cfs_hash_dhead_dep', 'dd_head'],
+    }
+else:
+    HS_INFO = {
+        HH: ['cfs_hash_head_t', 'hh_head'],
+        HD: ['cfs_hash_head_dep_t', 'hd_head'],
+        DH: ['cfs_hash_dhead_t', 'dh_head'],
+        DD: ['cfs_hash_dhead_dep_t', 'dd_head'],
+    }
+
+def CFS_HASH_NBKT(hsh):
+    return (1 << (hsh.hs_cur_bits - hsh.hs_bkt_bits))
+
+def CFS_HASH_BKT_NHLIST(hsh):
+    return (1 << (hsh.hs_bkt_bits))
+
+def cfs_hash_head_size(hsh):
+    size = getSizeOf(HS_INFO[hs_get_type(hsh)][HS_INFO_FLDS.dtfld])
+    return size
+
+def cfs_hash_bucket_size(hsh):
+    size = member_offset('struct cfs_hash_bucket', 'hsb_head')
+    size += cfs_hash_head_size(hsh) * CFS_HASH_BKT_NHLIST(hsh) + \
+            hsh.hs_extra_bytes
+    return size
+
+def cfs_hash_hhead(hsh, bd_bkt, bd_offset):
+    info = HS_INFO[hs_get_type(hsh)]
+    bkt = Addr(bd_bkt) + member_offset('struct cfs_hash_bucket', 'hsb_head')
+    head = readSU(info[HS_INFO_FLDS.dtfld], bkt)
+    offset = member_offset(info[HS_INFO_FLDS.dtfld], info[HS_INFO_FLDS.hdfld])
+    return readSU('struct hlist_head', (Addr(head[bd_offset]) + offset))
+
+def cfs_hash_get_buckets(hsh):
+    hbuckets = []
+    for idx in range(CFS_HASH_NBKT(hsh)):
+        if hsh.hs_buckets[idx]:
+            hbuckets.append(hsh.hs_buckets[idx])
+    return hbuckets
+
+def cfs_hash_get_hlist_nodes(hsh, bd_bkt, bd_offset):
+    hlist = readSU('struct hlist_head', cfs_hash_hhead(hsh, bd_bkt, bd_offset))
+    hnodes = []
+    hnode = hlist.first
+    while (hnode and hnode != hlist):
+        hnodes.append(hnode)
+        hnode = hnode.next
+    return hnodes
+
+def cfs_hash_get_nodes(hsh):
+    hs_nodes = []
+    for bd_bkt in cfs_hash_get_buckets(hsh):
+        for bd_offset in range(CFS_HASH_BKT_NHLIST(hsh)):
+            for hnode in cfs_hash_get_hlist_nodes(hsh, bd_bkt, bd_offset):
+                hs_nodes.append(hnode)
+    return hs_nodes
+
+"""nid"""
+
+def LNET_NIDADDR(nid):
+    return (nid & 0xffffffff)
+
+def LNET_NIDNET(nid):
+    return ((nid >> 32) & 0xffffffff)
+
+def LNET_NETTYP(net):
+    return ((net >> 16) & 0xffff)
+
+def LNET_NETNUM(net):
+    return ((net) & 0xffff)
+
+LNET_NID_ANY = 0xffffffffffffffff
+LNET_NIDSTR_SIZE = 32
+
+O2IBLND = 5
+PTLLND = 4
+GNILND = 13
+
+LP_POISON = 0x5a5a5a5a5a5a5a5a
+
+def nid2str(nid):
+    if nid == LNET_NID_ANY:
+        return 'LNET_NID_ANY'
+    addr = LNET_NIDADDR(nid)
+    net = LNET_NIDNET(nid)
+    lnd = LNET_NETTYP(net)
+    nnum = LNET_NETNUM(net)
+    s = ""
+    if lnd == O2IBLND:
+        s = "%d.%d.%d.%d@o2ib" % \
+            ((addr >> 24) & 0xff, (addr >> 16) & 0xff,
+            (addr >> 8) & 0xff, addr & 0xff)
+    elif lnd == PTLLND:
+        s = "%d@ptl" % addr
+    elif lnd == GNILND:
+        s = "%d@gni" % addr
+    else:
+        nnum = 0
+    if nnum != 0:
+        s = "%s%d" % (s, nnum)
+    return s
+
+def obd2nidstr(obd):
+    obd_import = readSU('struct obd_import', obd.u.cli.cl_import)
+    nid = LNET_NID_ANY
+    imp_invalid = 1
+    if obd_import and obd_import != 0xffffffffffffffff and \
+       obd_import != LP_POISON:
+        imp_invalid = obd_import.imp_invalid
+
+    if not imp_invalid and obd_import.imp_connection:
+        if Addr(obd_import.imp_obd) == Addr(obd):
+            nid = obd_import.imp_connection.c_peer.nid
+    return nid2str(nid)
+
+"""Miscellaneous"""
+
+def obd2str(obd, partitions=2):
+    name = obd.obd_name.split('-', partitions)[:partitions]
+    return '-'.join(name)
+
+def list_empty(head):
+    return head.next == head
+
+"""Red-Black"""
+
+def rb_first(root):
+    n = root.rb_node
+    if not n:
+        return None
+    while(n.rb_left):
+        n = n.rb_left
+    return n
+
+def rb_last(root):
+    n = root.rb_node
+    if not n:
+        return None
+    while(n.rb_right):
+        n = n.rb_right
+    return n
+
+def rb_parent_color(node):
+    return readU64(Addr(node))
+
+def rb_parent(node):
+    addr = rb_parent_color(node) & ~3
+    return readSU('struct rb_node', addr)
+
+#The color of the rb_node; 0 denotes red, 1 denotes black
+def rb_color(node):
+    return rb_parent_color(node) & 1
+
+def rb_next(node):
+    if rb_parent(node) == node:
+        return None
+    #right child exists
+    if node.rb_right:
+        node = node.rb_right
+        while(node.rb_left):
+            node = node.rb_left
+        return node
+    #no right child
+    parent = rb_parent(node)
+    while(parent and node == parent.rb_right):
+        node = parent
+        parent = rb_parent(node)
+    return parent
+
+def rb_prev(node):
+    if rb_parent(node) == node:
+        return None
+    #left child exists
+    if node.rb_left:
+        node = node.rb_left
+        while(node.rb_right):
+            node = node.rb_right
+        return node
+    #no left child
+    parent = rb_parent(node)
+    while(parent.rb_left and node == parent.rb_left):
+        node = parent
+        parent = rb_parent(node)
+    return parent
+
+"""LNET Globals"""
+the_lnet = readSymbol('the_lnet')
+
+tmpsiz = 256
+
+LNET_CPT_BITS = the_lnet.ln_cpt_bits
+LNET_PROC_CPT_BITS = LNET_CPT_BITS + 1
+LNET_LOFFT_BITS = getSizeOf('loff_t') * 8
+LNET_PROC_VER_BITS = max((min(LNET_LOFFT_BITS, 64) / 4), 8)
+LNET_PROC_HASH_BITS = 9
+LNET_PROC_HOFF_BITS = LNET_LOFFT_BITS - LNET_PROC_CPT_BITS - LNET_PROC_VER_BITS - LNET_PROC_HASH_BITS -1
+LNET_PROC_HPOS_BITS = LNET_PROC_HASH_BITS + LNET_PROC_HOFF_BITS
+LNET_PROC_VPOS_BITS = LNET_PROC_HPOS_BITS + LNET_PROC_VER_BITS
+
+LNET_PROC_CPT_MASK = (1 << LNET_PROC_CPT_BITS) - 1
+LNET_PROC_VER_MASK = (1 << LNET_PROC_VER_BITS) - 1
+LNET_PROC_HASH_MASK = (1 << LNET_PROC_HASH_BITS) - 1
+LNET_PROC_HOFF_MASK = (1 << LNET_PROC_HASH_BITS) - 1
+
+LNET_PING_FEAT_NI_STATUS = 1 << 1
+
+HZ = sys_info.HZ
diff --git a/contrib/debug_tools/epython_scripts/obd_devs.py b/contrib/debug_tools/epython_scripts/obd_devs.py

new file mode 100644 (file)

index 0000000..6d88481
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/obd_devs.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python
+
+"""
+Copyright (c) 2015-2019 Cray Inc. All Rights Reserved.
+Utility to display obd_devices
+"""
+
+from pykdump.API import *
+import argparse
+
+from crashlib.input import toint
+import lustrelib as ll
+import rpc_stats as rs
+
+description_short = "Displays the contents of global 'obd_devs'"
+
+SEP_SIZE = 152
+def print_separator(count):
+    s=""
+    for idx in xrange(count):
+        s += "="
+    print s
+
+def print_header():
+    print "%-19s %-22s \t%-22s %-19s %-19s %-12s %-10s %-7s %-10s" % \
+         ("obd_device",
+          "obd_name",
+          "ip_address",
+          "client_obd",
+          "obd_import",
+          "imp_state",
+          "ish_time",
+          "index",
+          "conn_cnt")
+    print_separator(SEP_SIZE)
+
+IMP_STATE = {
+        1:  "CLOSED",
+        2:  "NEW",
+        3:  "DISCON",
+        4:  "CONNECTING",
+        5:  "REPLAY",
+        6:  "REPLAY_LOCKS",
+        7:  "REPLAY_WAIT",
+        8:  "RECOVER",
+        9:  "FULL",
+       10:  "EVICTED",
+       11:  "IDLE"
+}
+
+
+def print_one_device(obd, stats_flag):
+    try:
+        nid = ll.obd2nidstr(obd)
+    except Exception, e:
+        try:
+            print "0x%-17x %-22s" % (Addr(obd), ll.obd2str(obd))
+        except Exception, e:
+            return 1
+        return 0
+
+    impstate = "--"
+    ish_time = 0
+    index=-1
+    connect_cnt = 0
+    inflight=0
+    if obd.u.cli.cl_import:
+          impstate=IMP_STATE.get(obd.u.cli.cl_import.imp_state)
+          index=obd.u.cli.cl_import.imp_state_hist_idx - 1
+          if index > 0 and index < 16:
+               ish_time=obd.u.cli.cl_import.imp_state_hist[index].ish_time
+         inflight=obd.u.cli.cl_import.imp_inflight.counter
+          connect_cnt = obd.u.cli.cl_import.imp_conn_cnt
+
+    print "0x%-17x %-22s\t%-22s\t 0x%-17x 0x%-17x %-10s %-10d %5d %5d" % \
+          (Addr(obd),
+          ll.obd2str(obd),
+          nid,
+          Addr(obd.u.cli),
+          Addr(obd.u.cli.cl_import),
+          impstate,
+          ish_time,
+          index,
+          connect_cnt)
+    if stats_flag:
+        print
+        rs.osc_rpc_stats_seq_show(Addr(obd.u.cli))
+        print_separator(SEP_SIZE)
+    return 0
+
+def print_devices(devices, stats_flag):
+    print_header()
+    for obd in devices:
+        if Addr(obd) == 0:
+            break
+        print_one_device(obd, stats_flag)
+    print_separator(SEP_SIZE)
+
+def obd_devs(args):
+    if args.obd_device:
+        devices = [readSU('struct obd_device', args.obd_device)]
+    else:
+        devices = readSymbol('obd_devs')
+    print_devices(devices, args.stats_flag)
+
+if __name__ == "__main__":
+    description = "Displays the contents of global 'obd_devs'"
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument("obd_device", nargs="?", default = [], type=toint,
+        help="print obd_device at argument address")
+    parser.add_argument("-r", dest="stats_flag", action="count",
+        help="print the rpc_stats sequence for each client_obd")
+    args = parser.parse_args()
+    obd_devs(args)
diff --git a/contrib/debug_tools/epython_scripts/ptlrpc.py b/contrib/debug_tools/epython_scripts/ptlrpc.py

new file mode 100644 (file)

index 0000000..518ef3e
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/ptlrpc.py
@@ -0,0 +1,308 @@
+#!/usr/bin/env python
+
+"""
+Copyright 2015-2019 Cray Inc.  All Rights Reserved
+Dumps the Lustre RPC queues for all ptlrpcd_XX threads.
+"""
+
+from pykdump.API import *
+import sys
+import argparse
+import os
+
+import lustrelib as ll
+from crashlib.input import toint
+
+from traceback import print_exc
+
+description_short = "Displays the RPC queues of the Lustre ptlrpcd daemons"
+
+def print_separator(count):
+    s = ""
+    for idx in xrange(count):
+        s += "="
+    print s
+
+def print_title(title):
+    if title:
+        print "\n" + title
+        print "%-14s %-6s %-19s %-18s %-19s %-4s %-14s %-4s %-22s %-19s" \
+               % ("thread", "pid", "ptlrpc_request", "xid", "nid", "opc",
+                  "phase:flags", "R:W", "sent/deadline", "ptlrpc_body")
+    print_separator(148)
+
+def enum(**enums):
+    return type('Enum', (), enums)
+
+REQ_Q = enum(rq_list=1, replay_list=2, set_chain=3, ctx_chain=4,
+             unreplied_list=5, timed_list=5, exp_list=6, hist_list=7)
+
+RQ_LIST_LNKS = {
+    REQ_Q.rq_list:        ['struct ptlrpc_request', 'rq_list', 'rq_type'],
+    REQ_Q.replay_list:    ['struct ptlrpc_request', 'rq_replay_list', 'rq_type'],
+    REQ_Q.set_chain:      ['struct ptlrpc_cli_req', 'cr_set_chain', 'rq_cli'],
+    REQ_Q.ctx_chain:      ['struct ptlrpc_cli_req', 'cr_ctx_chain', 'rq_cli'],
+    REQ_Q.unreplied_list: ['struct ptlrpc_cli_req', 'cr_unreplied_list', 'rq_cli'],
+    REQ_Q.timed_list:     ['struct ptlrpc_srv_req', 'sr_timed_list', 'rq_srv'],
+    REQ_Q.exp_list:       ['struct ptlrpc_srv_req', 'sr_exp_list', 'rq_srv'],
+    REQ_Q.hist_list:      ['struct ptlrpc_srv_req', 'sr_hist_list', 'rq_srv']
+}
+
+STRUCT_IDX = 0
+MEMBER_IDX = 1
+UNION_IDX = 2
+
+def size_round(val):
+    return ((val + 7) & (~0x7))
+
+LUSTRE_MSG_MAGIC_V2 = 0x0BD00BD3
+
+def get_ptlrpc_body(req):
+    msg = req.rq_reqmsg
+#    msg = req.rq_repmsg
+    if not msg or msg == None:
+        return None
+
+    if msg.lm_magic != LUSTRE_MSG_MAGIC_V2:
+        return None
+
+    bufcount = msg.lm_bufcount
+    if bufcount < 1:
+        return None
+
+    buflen = msg.lm_buflens[0]
+    if buflen < getSizeOf('struct ptlrpc_body_v2'):
+        return None
+
+    offset = member_offset('struct lustre_msg_v2', 'lm_buflens')
+
+    buflen_size = getSizeOf("unsigned int")
+    offset += buflen_size * bufcount
+    offset = size_round(offset)
+    addr = Addr(msg) + offset
+    if addr == 0:
+        print "addr"
+        return None
+    return readSU('struct ptlrpc_body_v2', addr)
+
+RQ_PHASE_NEW = 0xebc0de00
+RQ_PHASE_RPC = 0xebc0de01
+RQ_PHASE_BULK = 0xebc0de02
+RQ_PHASE_INTERPRET = 0xebc0de03
+RQ_PHASE_COMPLETE = 0xebc0de04
+RQ_PHASE_UNREG_RPC =  0xebc0de05
+RQ_PHASE_UNREG_BULK = 0xebc0de06
+RQ_PHASE_UNDEFINED = 0xebc0de07
+
+PHASES = {
+       RQ_PHASE_NEW: "NEW",
+       RQ_PHASE_RPC: "RPC",
+       RQ_PHASE_BULK: "BULK",
+       RQ_PHASE_INTERPRET: "NtrPrt",
+       RQ_PHASE_COMPLETE: "COMP",
+       RQ_PHASE_UNREG_RPC: "UNREG",
+       RQ_PHASE_UNREG_BULK: "UNBULK",
+       RQ_PHASE_UNDEFINED: "UNDEF"
+   }
+
+FLAG_LEGEND = "\nFlag Legend:\n\n" + \
+         "I - rq_intr\tR - rq_replied\t\tE - rq_err\te - rq_net_err\tX - rq_timedout\tS - rq_resend\t\tT - rq_restart\n" + \
+         "P - rq_replay\tN - rq_no_resend\tW - rq_waiting\tC - rq_wait\tH - rq_hp\tM - rq_committed\tq - rq_req_unlinked\tu - rq_reply_unlinked\n"
+
+def get_phase_flags(req):
+    phase = req.rq_phase
+    phasestr = PHASES.get(phase & 0xffffffff, "?%d" % phase)
+    return "%s:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s" % \
+           (phasestr,
+           "I" if req.rq_intr else "",
+           "R" if req.rq_replied else "",
+           "E" if req.rq_err else "",
+           "e" if req.rq_net_err else "",
+           "X" if req.rq_timedout else "",
+           "S" if req.rq_resend else "",
+           "T" if req.rq_restart else "",
+           "P" if req.rq_replay else "",
+           "N" if req.rq_no_resend else "",
+           "W" if req.rq_waiting else "",
+           "C" if req.rq_wait_ctx else "",
+           "H" if req.rq_hp else "",
+           "M" if req.rq_committed else "",
+           "q" if req.rq_req_unlinked else "",
+           "u" if req.rq_reply_unlinked else "")
+
+LP_POISON = 0x5a5a5a5a5a5a5a5a
+
+def print_one_request(sthread, req):
+    pb = get_ptlrpc_body(req)
+    status = -1
+    opc = -1
+    pbaddr = -1
+    if pb:
+        status = pb.pb_status
+        opc = pb.pb_opc
+        pbaddr = Addr(pb)
+
+    imp_invalid = 1
+    nid = "LNET_NID_ANY"
+    obd_name = "Invalid Import"
+    if req.rq_import and req.rq_import != 0xffffffffffffffff and \
+       req.rq_import != LP_POISON:
+        imp_invalid = req.rq_import.imp_invalid
+        obd_name = ll.obd2str(req.rq_import.imp_obd)
+
+    if not imp_invalid and req.rq_import.imp_connection:
+        nid = ll.nid2str(req.rq_import.imp_connection.c_peer.nid)
+    brw = "%1d:%1d" % (req.rq_bulk_read, req.rq_bulk_write)
+    rq_sent_dl = "%d/%d" % (req.rq_sent, req.rq_deadline)
+    print "%-14s %-6s 0x%-17x %-18d %-19s %-4d %-14s %-4s %-22s 0x%-17x" % \
+            (sthread,
+            status,
+            Addr(req),
+            req.rq_xid,
+            obd_name,
+            opc,
+            get_phase_flags(req),
+            brw,
+            rq_sent_dl,
+            pbaddr)
+
+def print_request_list(sthread, lhdr, loffset):
+    try:
+        for reqlnk in readStructNext(lhdr, 'next'):
+            if reqlnk.next == Addr(lhdr):
+                break
+            req = readSU('struct ptlrpc_request', reqlnk.next-loffset)
+            print_one_request(sthread, req)
+
+    except Exception, e:
+        print_exc()
+        return 1
+    return 0
+
+# Find offset from start of ptlrpc_request struct of link field
+# Adjusts for links that are contained in embedded union
+def get_linkfld_offset(lfld):
+    container = RQ_LIST_LNKS[lfld][STRUCT_IDX]
+    linkfld   = RQ_LIST_LNKS[lfld][MEMBER_IDX]
+    req_union = RQ_LIST_LNKS[lfld][UNION_IDX]
+
+    off1 = member_offset('struct ptlrpc_request', req_union)
+    off2 = member_offset(container, linkfld)
+    return off1 + off2
+
+def foreach_ptlrpcd_ctl(callback, *args):
+    pinfo_rpcds = readSymbol('ptlrpcds')
+    pinfo_count = readSymbol('ptlrpcds_num')
+
+    for idx in xrange(pinfo_count):
+        ptlrpcd = pinfo_rpcds[idx]
+        for jdx in xrange(ptlrpcd.pd_nthreads):
+            pd = ptlrpcd.pd_threads[jdx]
+            callback(pd, *args)
+    pd = readSymbol('ptlrpcd_rcv')
+    callback(pd, *args)
+
+def get_daemon_listhdrs(pd, sent_rpcs, pend_rpcs):
+    sent_rpcs.append([pd.pc_name, pd.pc_set.set_requests])
+    pend_rpcs.append([pd.pc_name, pd.pc_set.set_new_requests])
+
+def dump_list_of_lists(rpc_list, loffset):
+    for qinfo in rpc_list:
+        sthread, lhdr = qinfo
+        print_request_list(sthread, lhdr, loffset)
+
+def dump_daemon_rpclists():
+    sent_rpcs = []
+    pend_rpcs = []
+
+    foreach_ptlrpcd_ctl(get_daemon_listhdrs, sent_rpcs, pend_rpcs)
+    offset = get_linkfld_offset(REQ_Q.set_chain)
+
+    print_title("Sent RPCS: ptlrpc_request_set.set_requests->")
+    dump_list_of_lists(sent_rpcs, offset)
+
+    print_title("Pending RPCS: ptlrpc_request_set.set_new_requests->")
+    dump_list_of_lists(pend_rpcs, offset)
+    print_title('')
+
+def print_overview_entry(pd):
+    s = "%s:" % pd.pc_name
+    print "%-14s  ptlrpcd_ctl 0x%x   ptlrpc_request_set 0x%x" % \
+        (s, Addr(pd), pd.pc_set)
+
+def dump_overview():
+    foreach_ptlrpcd_ctl(print_overview_entry)
+
+def print_pcset_stats(pd):
+    if pd.pc_set.set_new_count.counter != 0 or \
+        pd.pc_set.set_remaining.counter != 0:
+        s = "%s:" %pd.pc_name
+        print "%-13s 0x%-18x %-4d %-4d %-6d" % \
+            (s, Addr(pd.pc_set),
+            pd.pc_set.set_refcount.counter,
+            pd.pc_set.set_new_count.counter,
+            pd.pc_set.set_remaining.counter)
+
+def dump_pcsets():
+    print '%-14s %-19s %-4s %-4s %-6s' % \
+        ("thread", "ptlrpc_request_set", "ref", "new", "remain")
+    print_separator(52)
+    foreach_ptlrpcd_ctl(print_pcset_stats)
+
+def dump_one_rpc(addr):
+    print_title("Request")
+    req = readSU('struct ptlrpc_request', addr)
+    print_one_request('', req)
+
+def dump_one_rpclist(addr, link_fld):
+    lhdr = readSU('struct list_head', addr)
+    d = vars(REQ_Q)
+    loffset = get_linkfld_offset(d[link_fld])
+
+    print_title("Request list at %s" % lhdr)
+    print_request_list('', lhdr, loffset)
+
+def dump_rpcs_cmd(args):
+    if args.oflag:
+        dump_overview()
+        return
+    if args.sflag:
+        dump_pcsets()
+        return
+    if args.rpc_addr:
+        if args.link_fld:
+            dump_one_rpclist(args.rpc_addr[0], args.link_fld)
+        else:
+            # dump_one_rpc(args.rpc_addr[0])
+            dump_one_rpc(args.rpc_addr)
+        return
+    dump_daemon_rpclists()
+
+if __name__ == "__main__":
+#    usage = "$(prog)s [-o] [-s] [-l link_field] [addr]\n" + \
+    description = "" +\
+        "Displays lists of Lustre RPC requests. If no arguments are \n" +\
+        "specified, all rpcs in the sent and pending queues of the \n" +\
+        "ptlrpcd daemons are printed. If an address is specified, it \n" +\
+        "must be a pointer to either a ptlrpc_request or a list_head \n" +\
+        "struct. If the addr is a list_head, then a link_field must \n" +\
+        "also be provided. The link_field identifies the member of \n" +\
+        "the ptlrpc_request struct used to link the list together."
+
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        description=description, epilog=FLAG_LEGEND)
+    parser.add_argument("-o", dest="oflag", action="store_true",
+        help="print overview of ptlrpcd_XX threads with ptlrpcd_ctl " + \
+            "structs and the associated pc_set field")
+    parser.add_argument("-s", dest="sflag", action="store_true",
+        help="print rpc counts per ptlrpc_request_set")
+    parser.add_argument("-l", dest="link_fld", default="",
+        choices=['rq_list', 'replay_list', 'set_chain', 'ctx_chain',
+                 'unreplied_list', 'timed_list', 'exp_list', 'hist_list'],
+        help="name of link field in ptlrpc_request for list headed by addr")
+    parser.add_argument("rpc_addr", nargs="?", default=[], type=toint,
+        help="address of either single ptlrpc_request or list_head; list_head requires a -l argument")
+    args = parser.parse_args()
+
+    dump_rpcs_cmd(args)
diff --git a/contrib/debug_tools/epython_scripts/rpc_opcode.py b/contrib/debug_tools/epython_scripts/rpc_opcode.py

new file mode 100644 (file)

index 0000000..6aa8d58
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/rpc_opcode.py
@@ -0,0 +1,129 @@
+#!/usr/bin/env python
+
+"""
+Copyright 2019 Cray Inc.  All Rights Reserved
+Utility to maps numeric opcode to string identifier
+"""
+
+from pykdump.API import *
+import argparse
+
+description_short = "Maps Lustre rpc opcodes string identifiers."
+
+opcodes = {
+    0:  'OST_REPLY',
+    1:  'OST_GETATTR',
+    2:  'OST_SETATTR',
+    3:  'OST_READ',
+    4:  'OST_WRITE',
+    5:  'OST_CREATE',
+    6:  'OST_DESTROY',
+    7:  'OST_GET_INFO',
+    8:  'OST_CONNECT',
+    9:  'OST_DISCONNECT',
+    10: 'OST_PUNCH',
+    11: 'OST_OPEN',
+    12: 'OST_CLOSE',
+    13: 'OST_STATFS',
+    16: 'OST_SYNC',
+    17: 'OST_SET_INFO',
+    18: 'OST_QUOTACHECK',
+    19: 'OST_QUOTACTL',
+    20: 'OST_QUOTA_ADJUST_QUNIT',  # not used since 2.4
+    21: 'OST_LADVISE',
+
+    33: 'MDS_GETATTR',
+    34: 'MDS_GETATTR_NAME',
+    35: 'MDS_CLOSE',
+    36: 'MDS_REINT',
+    37: 'MDS_READPAGE',
+    38: 'MDS_CONNECT',
+    39: 'MDS_DISCONNECT',
+    40: 'MDS_GET_ROOT',
+    41: 'MDS_STATFS',
+    42: 'MDS_PIN',
+    43: 'MDS_UNPIN',          # obsolete, never used in a release
+    44: 'MDS_SYNC',
+    45: 'MDS_DONE_WRITING',
+    46: 'MDS_SET_INFO',
+    47: 'MDS_QUOTACHECK',     # not used since 2.4
+    48: 'MDS_QUOTACTL',
+    49: 'MDS_GETXATTR',
+    50: 'MDS_SETXATTR',       # obsolete, now it's MDS_REINT op
+    51: 'MDS_WRITEPAGE',
+    52: 'MDS_IS_SUBDIR',      # obsolete, never used in a release
+    53: 'MDS_GET_INFO',
+    54: 'MDS_HSM_STATE_GET',
+    55: 'MDS_HSM_STATE_SET',
+    56: 'MDS_HSM_ACTION',
+    57: 'MDS_HSM_PROGRESS',
+    58: 'MDS_HSM_REQUEST',
+    59: 'MDS_HSM_CT_REGISTER',
+    60: 'MDS_HSM_CT_UNREGISTER',
+    61: 'MDS_SWAP_LAYOUTS',
+
+    101: 'LDLM_ENQUEUE',
+    102: 'LDLM_CONVERT',
+    103: 'LDLM_CANCEL',
+    104: 'LDLM_BL_CALLBACK',
+    105: 'LDLM_CP_CALLBACK',
+    106: 'LDLM_GL_CALLBACK',
+    107: 'LDLM_SET_INFO',
+
+    250: 'MGS_CONNECT',
+    251: 'MGS_DISCONNECT',
+    252: 'MGS_EXCEPTION',           # node died, etc.
+    253: 'MGS_TARGET_REG',          # whenever target starts up
+    254: 'MGS_TARGET_DEL',
+    255: 'MGS_SET_INFO',
+    256: 'MGS_CONFIG_READ',
+
+    400: 'OBD_PING',
+    401: 'OBD_LOG_CANCEL',          # obsolete since 1.5
+    402: 'OBD_QC_CALLBACK',         # obsolete since 2.4
+    403: 'OBD_IDX_READ',
+
+    501: 'LLOG_ORIGIN_HANDLE_CREATE',
+    502: 'LLOG_ORIGIN_HANDLE_NEXT_BLOCK',
+    503: 'LLOG_ORIGIN_HANDLE_READ_HEADER',
+    504: 'LLOG_ORIGIN_HANDLE_WRITE_REC',    # Obsolete by 2.1.
+    505: 'LLOG_ORIGIN_HANDLE_CLOSE',        # Obsolete by 1.8.
+    506: 'LLOG_ORIGIN_CONNECT',             # Obsolete by 2.4.
+    507: 'LLOG_CATINFO',                    # Obsolete by 2.3.
+    508: 'LLOG_ORIGIN_HANDLE_PREV_BLOCK',
+    509: 'LLOG_ORIGIN_HANDLE_DESTROY',      # Obsolete by 2.11.
+
+    601: 'QUOTA_DQACQ',
+    602: 'QUOTA_DQREL',
+
+    700: 'SEQ_QUERY',
+
+    801: 'SEC_CTX_INIT',
+    802: 'SEC_CTX_INIT_CONT',
+    803: 'SEC_CTX_FINI',
+
+    900: 'FLD_QUERY',
+    901: 'FLD_READ',
+
+    1000: 'OUT_UPDATE',
+
+    1101: 'LFSCK_NOTIFY',
+    1102: 'LFSCK_QUERY'
+}
+
+def translate_opcodes(opc_list):
+    for opc in opc_list:
+        try:
+            print "o%d \t= %s" % (opc, opcodes[opc])
+        except:
+            print "o%d \t= unknown" % opc
+
+
+if __name__ == "__main__":
+    description = "Maps one or more Lustre rpc opcodes to its string identifier."
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument('opcode', nargs="+", type=int,
+        help="list of one or more opcodes")
+
+    args = parser.parse_args()
+    translate_opcodes(args.opcode)
diff --git a/contrib/debug_tools/epython_scripts/rpc_stats.py b/contrib/debug_tools/epython_scripts/rpc_stats.py

new file mode 100644 (file)

index 0000000..c271182
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/rpc_stats.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python
+
+"""
+Copyright (c) 2015-2018 Cray Inc. All Rights Reserved.
+Utility to display rpc stats for a client_obd
+"""
+
+from pykdump.API import *
+import argparse
+
+import lustrelib as ll
+from crashlib.input import toint
+
+description_short = 'Dumps the rpc stats for a given client_obd'
+
+OBD_HIST_MAX = 32
+
+def get_cli_obd(client_obd):
+    cli = None
+    try:
+        cli = readSU('struct client_obd', client_obd)
+    except Exception, e:
+        for dev in readSymbol('obd_devs'):
+            try:
+                if ll.obd2str(dev, 4) == client_obd:
+                    cli = dev.u.cli
+                    break
+            except Exception, e:
+                continue
+    return cli
+
+def pct(a, b):
+    return 100 * a / b if b else 0
+
+def lprocfs_oh_sum(oh):
+    ret = 0
+    for i in range(OBD_HIST_MAX):
+        ret += oh.oh_buckets[i]
+    return ret
+
+def osc_rpc_stats_seq_show(client_obd):
+    if not client_obd:
+        print "invalid input for field 'client_obd'"
+        return 1
+    cli = readSU('struct client_obd', client_obd)
+    print "read RPCs in flight:  %d" % cli.cl_r_in_flight
+    print "write RPCs in flight: %d" % cli.cl_w_in_flight
+    print "pending write pages:  %d" % cli.cl_pending_w_pages.counter
+    print "pending read pages:   %d" % cli.cl_pending_r_pages.counter
+
+    print "\n\t\t\tread\t\t\twrite"
+    print "pages per rpc         rpcs   % cum % |       rpcs   % cum %\n"
+
+    read_tot = lprocfs_oh_sum(cli.cl_read_page_hist)
+    write_tot = lprocfs_oh_sum(cli.cl_write_page_hist)
+
+    read_cum = 0
+    write_cum = 0
+    for i in range(OBD_HIST_MAX):
+        r = cli.cl_read_page_hist.oh_buckets[i]
+        w = cli.cl_write_page_hist.oh_buckets[i]
+
+        read_cum += r
+        write_cum += w
+        print "%d:\t\t%10d %3d %3d   | %10d %3d %3d" % \
+              (1 << i, r, pct(r, read_tot),
+              pct(read_cum, read_tot), w,
+              pct(w, write_tot),
+              pct(write_cum, write_tot))
+        if read_cum == read_tot and write_cum == write_tot:
+            break
+
+    print "\n\t\t\tread\t\t\twrite"
+    print "rpcs in flight        rpcs   % cum % |       rpcs   % cum %\n"
+
+    read_tot = lprocfs_oh_sum(cli.cl_read_rpc_hist)
+    write_tot = lprocfs_oh_sum(cli.cl_write_rpc_hist)
+
+    read_cum = 0
+    write_cum = 0
+    for i in range(OBD_HIST_MAX):
+        r = cli.cl_read_rpc_hist.oh_buckets[i]
+        w = cli.cl_write_rpc_hist.oh_buckets[i]
+
+        read_cum += r
+        write_cum += w
+        print "%d:\t\t%10d %3d %3d   | %10d %3d %3d" % \
+              (i, r, pct(r, read_tot),
+              pct(read_cum, read_tot), w,
+              pct(w, write_tot),
+              pct(write_cum, write_tot))
+        if read_cum == read_tot and write_cum == write_tot:
+            break
+
+    print "\n\t\t\tread\t\t\twrite"
+    print "offset                rpcs   % cum % |       rpcs   % cum %\n"
+
+    read_tot = lprocfs_oh_sum(cli.cl_read_offset_hist)
+    write_tot = lprocfs_oh_sum(cli.cl_write_offset_hist)
+
+    read_cum = 0
+    write_cum = 0
+    for i in range(OBD_HIST_MAX):
+        r = cli.cl_read_offset_hist.oh_buckets[i]
+        w = cli.cl_write_offset_hist.oh_buckets[i]
+
+        read_cum += r
+        write_cum += w
+        offset = 0 if i == 0 else 1 << (i - 1)
+        print "%d:      \t%10d %3d %3d   | %10d %3d %3d" % \
+              (offset, r, pct(r, read_tot),
+              pct(read_cum, read_tot), w,
+              pct(w, write_tot),
+              pct(write_cum, write_tot))
+        if read_cum == read_tot and write_cum == write_tot:
+            break
+    print
+    return 0
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description=description_short)
+    parser.add_argument("client_obd", nargs="?", default=[], type=toint,
+        help="address of client_obd structure whose stats will be dumped")
+    args = parser.parse_args()
+    cli = get_cli_obd(args.client_obd)
+    osc_rpc_stats_seq_show(cli)
diff --git a/contrib/debug_tools/epython_scripts/sbi_ptrs.py b/contrib/debug_tools/epython_scripts/sbi_ptrs.py

new file mode 100644 (file)

index 0000000..7872039
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/sbi_ptrs.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+"""
+Copyright (c) 2019 Cray Inc. All Rights Reserved.
+Utility to display Lustre inode related struct pointers
+"""
+
+from pykdump.API import *
+import argparse
+from crashlib.input import toint
+
+description_short = "Prints Lustre structs associated with inode."
+
+def dump_inode(inode):
+     offset = member_offset('struct ll_inode_info', 'lli_vfs_inode')
+     lli = readSU('struct ll_inode_info', Addr(inode) - offset)
+     sb = readSU('struct super_block', inode.i_sb)
+     lsi = readSU('struct lustre_sb_info', sb.s_fs_info)
+     llsbi = readSU('struct ll_sb_info', lsi.lsi_llsbi)
+     print "%x %x %x %x %x" % (Addr(inode), lli, sb, lsi, llsbi)
+
+def dump_inode_list(inodes):
+    print "%-16s %-16s %-16s %-16s %-16s" % ("inode", "ll_inode_info",
+          "super_block", "lustre_sb_info", "ll_sb_info")
+    for addr in inodes:
+        dump_inode(readSU('struct inode', addr))
+
+if __name__ == "__main__":
+    description = "Prints ll_inode_info, super_block, \n" + \
+            "lustre_sb_info, and ll_sb_info pointers associated \n" + \
+           "with specified inode(s) \n"
+
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument('inode', nargs="+", type=toint,
+        help="list of one or more inodes")
+    args = parser.parse_args()
+
+    dump_inode_list(args.inode)
diff --git a/contrib/debug_tools/epython_scripts/uniqueStacktrace.py b/contrib/debug_tools/epython_scripts/uniqueStacktrace.py

new file mode 100644 (file)

index 0000000..415aee1
--- /dev/null
+++ b/contrib/debug_tools/epython_scripts/uniqueStacktrace.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+"""
+Copyright (c) 2015-2019 Cray Inc. All Rights Reserved.
+Utility to print unique stack traces
+"""
+
+import re
+import sys
+import StringIO
+import argparse
+from pykdump.API import exec_crash_command
+
+description_short = 'Print stack traces for each task.'
+
+# outer loop indentifies PIDs
+# inner loop looks for # until
+# another PID is found
+def sortInput(swapper, input):
+
+
+    ps = re.compile("^PID:\s+(\d+)\s+TASK:\s+([0-9A-Fa-f]+).*")
+    n = re.compile("^#")
+    swap = re.compile((".*\"swapper/[0-9]+\""))
+    info = dict()
+    PID = ""
+    STK = ""
+    tmp = ""
+
+    # Outer to check for PIDs
+    # this loop never breaks;
+    for line in input:
+        line = line.strip()
+
+        # Inner loop to check for # signs indicating lines we want.
+        # Having two loops allow for the PID and TSK to be associated
+        # with  a particular trace.
+        # This loop breaks if a new PID is found (meaning the end of the
+        # current trace) or if there are no more lines available
+        while True:
+            if ps.match(line): break;
+            line = line.strip()
+            if n.match(line):
+                line = line.split()
+                tmp += " ".join([line[2], line[3], line[4]])
+                if len(line) == 6 : tmp += " " + line[5]
+                tmp += '\n\t'
+            line = input.readline()
+            if not line: break
+
+        if tmp :
+            if tmp in info:
+                info[tmp].append((PID,STK))
+            else:
+                info[tmp] = [(PID,STK)]
+
+        m = ps.match(line)
+        if m:
+            PID, STK = m.group(1), m.group(2)
+            tmp = ""
+
+            # if it's swapper line move on
+            # this prevents entry into inner loop
+            if not swapper and swap.match(line):
+                line = input.readline()
+
+    sort = sorted(info.items(), key=lambda info: len(info[1]))
+    return sort
+
+def printRes(sort, printpid, printptr):
+    """
+    Prints out individual stack traces from lowest to highest.
+    """
+    for stack_trace, ptask_list in sort:
+        if printpid and not printptr:
+            print "PID: %s" % (', '.join(p[0] for p in ptask_list))
+        elif printpid and printptr:
+            print "PID, TSK: %s" % (', '.join(p[0] + ': ' + p[1] for p in ptask_list))
+        elif not printpid and printptr:
+            print "TSK: %s" % (', '.join(p[1] for p in ptask_list))
+        print "TASKS: %d" %(len(ptask_list))
+        print "\t%s" %(stack_trace)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("-p", "--print-pid",
+                        action="store_true", dest="printpid", default=False,
+                        help="Print PIDS corresponding to each ST")
+    parser.add_argument("-q", "--print-taskpntr",
+                        action="store_true", dest="printptr", default=False,
+                        help="Print the task pointers for each ST")
+    parser.add_argument("-s", "--swapper",
+                        action="store_true", dest="swapper", default=False,
+                        help="Print swapper processes")
+    parser.add_argument("task_select", metavar="task_selection", nargs="*",
+                        help="task selection argument (passed to foreach cmd)")
+
+    args = parser.parse_args()
+
+    com = "foreach {ts:s} bt".format(ts=" ".join(args.task_select))
+
+    result = exec_crash_command(com)
+    input = StringIO.StringIO(result)
+    printRes(sortInput(args.swapper, input), args.printpid, args.printptr)
+
+if __name__ == '__main__':
+    main()
author	Ann Koehler <amk@cray.com>
	Thu, 20 Jun 2019 18:25:02 +0000 (13:25 -0500)
committer	Oleg Drokin <green@whamcloud.com>
	Fri, 14 Feb 2020 05:50:16 +0000 (05:50 +0000)
contrib/debug_tools/epython_scripts/README	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/cfs_hashes.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/cfs_hnodes.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/crashlib/addrlib.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/crashlib/cid/__init__.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/crashlib/cid/kernel_table.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/crashlib/cid/machdep_table.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/crashlib/cid/page_flags.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/crashlib/cid/phys_mem_map.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/crashlib/cid/vm_table.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/crashlib/input/__init__.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/crashlib/input/argparse_ext.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/crashlib/input/enumtools.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/crashlib/input/flagtools.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/crashlib/input/uflookup.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/crashlib/page.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/crashlib/time.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/crashlib/util.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/debug_flags.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/dk.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/jiffies2date.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/ldlm_dumplocks.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/ldlm_lockflags.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/lu_object.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/lustrelib.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/obd_devs.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/ptlrpc.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/rpc_opcode.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/rpc_stats.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/sbi_ptrs.py	[new file with mode: 0644]	patch \| blob
contrib/debug_tools/epython_scripts/uniqueStacktrace.py	[new file with mode: 0644]	patch \| blob