Whamcloud - gitweb
LU-12461 contrib: Add epython scripts for crash dump analysis 82/35282/4
authorAnn Koehler <amk@cray.com>
Thu, 20 Jun 2019 18:25:02 +0000 (13:25 -0500)
committerOleg Drokin <green@whamcloud.com>
Fri, 14 Feb 2020 05:50:16 +0000 (05:50 +0000)
This mod creates a new subdirectory, debug_tools/epython_scripts,
in ./contrib to contain PyKdump scripts. These scripts written in
an extended version of Python aid in memory dump analysis by
extracting and formatting the content of Lustre data structures.

The scripts are written using Python 2.7 and tested on Lustre 2.11
client dumps.

Test-Parameters: trivial

Cray-bug-id: LUS-7501
Signed-off-by: Ann Koehler <amk@cray.com>
Change-Id: I0a15eb9025fb604742f4ae99508a080ce04163dc
Reviewed-on: https://review.whamcloud.com/35282
Reviewed-by: Andrew Perepechko <c17827@cray.com>
Reviewed-by: Shaun Tancheff <shaun.tancheff@hpe.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
31 files changed:
contrib/debug_tools/epython_scripts/README [new file with mode: 0644]
contrib/debug_tools/epython_scripts/cfs_hashes.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/cfs_hnodes.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/crashlib/addrlib.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/crashlib/cid/__init__.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/crashlib/cid/kernel_table.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/crashlib/cid/machdep_table.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/crashlib/cid/page_flags.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/crashlib/cid/phys_mem_map.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/crashlib/cid/vm_table.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/crashlib/input/__init__.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/crashlib/input/argparse_ext.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/crashlib/input/enumtools.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/crashlib/input/flagtools.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/crashlib/input/uflookup.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/crashlib/page.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/crashlib/time.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/crashlib/util.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/debug_flags.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/dk.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/jiffies2date.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/ldlm_dumplocks.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/ldlm_lockflags.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/lu_object.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/lustrelib.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/obd_devs.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/ptlrpc.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/rpc_opcode.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/rpc_stats.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/sbi_ptrs.py [new file with mode: 0644]
contrib/debug_tools/epython_scripts/uniqueStacktrace.py [new file with mode: 0644]

diff --git a/contrib/debug_tools/epython_scripts/README b/contrib/debug_tools/epython_scripts/README
new file mode 100644 (file)
index 0000000..d33af91
--- /dev/null
@@ -0,0 +1,37 @@
+These epython scripts extract the content of various Lustre data
+structures from crash dumps and formats the output for readability.
+They are written for use with PyKdump, a framework for using Python
+as an extension language for the crash tool.
+
+Documentation and source for PyKdump are available at:
+   https://sourceforge.net/p/pykdump/wiki/Home/
+
+These scripts are written in Python2. The above wiki page has instructions
+for converting to Python3. The language extensions are documented in:
+   https://sourceforge.net/p/pykdump/code/ci/master/tree/docs/pykdump.lyx
+
+The scripts were tested with Lustre 2.11, primarily with dumps of client
+nodes. The scripts will work with server dumps as long as the data
+structures match the ones used on clients.
+
+Summary of scripts:
+ * cfs_hashes.py         Displays summary of cfs_hash tables.
+ * cfs_hnodes.py         Displays the specified Lustre hash table.
+ * debug_flags.py        Prints Lustre libcfs_debug flags as strings.
+ * dk.py                 Dumps and sorts the Lustre dk logs.
+ * jiffies2date.py       Prints the date and time of a given jiffies timestamp.
+ * ldlm_dumplocks.py     Lists granted and waiting locks by namespace/resource.
+ * ldlm_lockflags.py     Prints string identifiers for specified LDLM flags.
+ * lu_object.py          Prints contents of an lu_object.
+ * lustre_opcode.py      Maps Lustre rpc opcodes to string identifiers.
+ * obd_devs.py           Displays the contents of global 'obd_devs'.
+ * ptlrpc.py             Displays the RPC queues of the Lustre ptlrpcd daemons.
+ * rpc_stats.py          Dumps the client_obd structure given by client argument.
+ * sbi_ptrs.py           Prints Lustre structs associated with inode.
+ * uniqueStacktrace.py   Prints stack traces for each task.
+
+The scripts require symbols from the Lustre and LNet modules to be loaded
+(mod command in crash). A script is invoked with the command
+"epython <script name>" followed by any parameters. To get usage information
+for a particular script, enter the following at the crash prompt:
+   epython <script_name> -h
diff --git a/contrib/debug_tools/epython_scripts/cfs_hashes.py b/contrib/debug_tools/epython_scripts/cfs_hashes.py
new file mode 100644 (file)
index 0000000..4ca16fb
--- /dev/null
@@ -0,0 +1,160 @@
+#!/usr/bin/env python
+
+"""
+Utility to display Lustre cfs_hash tables
+Copyright (c) 2019 Cray Inc. All Rights Reserved.
+"""
+
+from pykdump.API import *
+import argparse
+
+import lustrelib as ll
+
+description_short = 'Displays summary of cfs_hash tables'
+
+CFS_HASH_THETA_BITS = 10
+
+def cfs_hash_cur_theta(hs):
+    hs_cnt = readSU('atomic_t', hs.hs_count).counter
+    return ((hs_cnt << CFS_HASH_THETA_BITS) >> hs.hs_cur_bits)
+
+def cfs_hash_theta_int(theta):
+    return (theta >> CFS_HASH_THETA_BITS)
+
+def cfs_hash_theta_frac(theta):
+    frac = ((theta * 1000) >> CFS_HASH_THETA_BITS) - \
+           (cfs_hash_theta_int(theta) * 1000)
+    return frac
+
+def cfs_hash_format_theta(theta):
+    val = str(cfs_hash_theta_int(theta)) + \
+          "." + \
+          str(cfs_hash_theta_frac(theta))
+    return val
+
+def print_theta(hs):
+    theta = cfs_hash_cur_theta(hs)
+    print "Theta: %d %s" % (theta, cfs_hash_format_theta(theta))
+
+def print_thetas(name, hashtable):
+    hs = readSU('struct cfs_hash', hashtable)
+    if hs:
+        print_theta(hs)
+
+def print_separator(count):
+    s = ""
+    for idx in xrange(count):
+        s += "="
+    print s
+
+def print_hash_labels():
+    print "%-15s %-17s\t %-5s %-5s %-5s %-5s %-5s %-5s %-5s " \
+          "%-5s %-5s %-5s %-5s %-11s %-11s %-11s %-5s" % \
+          ("name", "cfs_hash", "cnt", "rhcnt", "xtr", "cur", "min", "max", "rhash", \
+           "bkt", "nbkt", "nhlst", "flags", "theta", "minT", "maxT", "bktsz")
+
+def print_hash_summary(name, hashtable):
+    hs = readSU('struct cfs_hash', hashtable)
+    if hs:
+        hs_cnt = readSU('atomic_t', hs.hs_count).counter
+        hs_ref = readSU('atomic_t', hs.hs_refcount).counter
+        print "%-15s %-17x\t %-5d %-5d %-5d %-5d %-5d %-5d %-5d %-5d %-5d %-5d %-5x %-11s %-11s %-11s %-5d" % \
+              (name, (Addr(hs)), \
+               readSU('atomic_t', hs.hs_count).counter, \
+               hs.hs_rehash_count, \
+               hs.hs_extra_bytes, \
+               hs.hs_cur_bits, \
+               hs.hs_min_bits, \
+               hs.hs_max_bits, \
+               hs.hs_rehash_bits, \
+               hs.hs_bkt_bits, \
+               ll.CFS_HASH_NBKT(hs), \
+               ll.CFS_HASH_BKT_NHLIST(hs), \
+               hs.hs_flags, \
+               cfs_hash_format_theta(cfs_hash_cur_theta(hs)), \
+               cfs_hash_format_theta(hs.hs_min_theta), \
+               cfs_hash_format_theta(hs.hs_max_theta), \
+               ll.cfs_hash_bucket_size(hs))
+    else:
+        print "%-15s %-17x" % \
+              (name, (Addr(hs)))
+
+def obd_print_export_hashes(obd, exp_list, fld):
+    print "\nExport list head %x %s" % (exp_list, fld)
+    for exp in readSUListFromHead(exp_list, fld, 'struct obd_export'):
+        print_hash_summary('exp_lock', exp.exp_lock_hash)
+        print_hash_summary('exp_flock', exp.exp_flock_hash)
+
+def obd_print_one_device_hashes(obd):
+    try:
+        nm = ll.obd2str(obd)
+    except Exception, e:
+        return 1
+
+    print "obd_device %-17x %-22s" % (Addr(obd), ll.obd2str(obd))
+    print_hash_labels()
+
+    print_hash_summary("uuid", obd.obd_uuid_hash)
+    print_hash_summary("nid", obd.obd_nid_hash)
+    print_hash_summary("nid_stats", obd.obd_nid_stats_hash)
+
+    if "clilov" in nm:
+        print_hash_summary("lov_pools", obd.u.lov.lov_pools_hash_body)
+    elif "clilmv" in nm:
+        pass
+    else:
+        print_hash_summary("cl_quota0", obd.u.cli.cl_quota_hash[0])
+        print_hash_summary("cl_quota1", obd.u.cli.cl_quota_hash[1])
+
+#    obd_print_export_hashes(obd, obd.obd_exports, 'exp_obd_chain')
+#    obd_print_export_hashes(obd, obd.obd_exports_timed, 'exp_obd_chain_timed')
+    print ""
+    return 0
+
+def obd_devs_hash():
+    devices = readSymbol('obd_devs')
+
+    for obd in devices:
+       if not obd_print_one_device_hashes(obd) == 0:
+           break
+    print_separator(150)
+
+def ldlm_print_ns_hashes(ns, type):
+    ns_list = readSymbol(ns)
+    print "\n%s namespaces-resources" % type
+    print_hash_labels()
+    for ns in readSUListFromHead(ns_list, 'ns_list_chain', 'struct ldlm_namespace'):
+        nm = ll.obd2str(ns.ns_obd)[0:20]
+        print_hash_summary(nm, ns.ns_rs_hash)
+
+def ldlm_namespaces_hash():
+    ldlm_print_ns_hashes('ldlm_cli_active_namespace_list', "Client")
+    ldlm_print_ns_hashes('ldlm_cli_inactive_namespace_list', "Inactive")
+    ldlm_print_ns_hashes('ldlm_srv_namespace_list', "Server")
+
+def lu_sites_hashes():
+    lu_sites = readSymbol('lu_sites')
+    print_hash_labels()
+    for site in readSUListFromHead(lu_sites, 'ls_linkage', 'struct lu_site'):
+        print_hash_summary("lu_site_vvp", site.ls_obj_hash)
+    print ""
+
+
+def global_hashes():
+    print_hash_labels()
+    print_hash_summary("conn_hash", readSymbol('conn_hash'))
+    if symbol_exists('jobid_hash'):
+        print_hash_summary("jobid_hash", readSymbol('jobid_hash'))
+    if symbol_exists('cl_env_hash'):
+        print_hash_summary("cl_env_hash", readSymbol('cl_env_hash'))
+    print ""
+
+if __name__ == "__main__":
+    description = "Displays summary of hash tables in 'obd_devs'"
+    parser = argparse.ArgumentParser(description=description)
+    args = parser.parse_args()
+
+    global_hashes()
+    lu_sites_hashes()
+    obd_devs_hash()
+    ldlm_namespaces_hash()
diff --git a/contrib/debug_tools/epython_scripts/cfs_hnodes.py b/contrib/debug_tools/epython_scripts/cfs_hnodes.py
new file mode 100644 (file)
index 0000000..b47a81d
--- /dev/null
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+
+"""
+Utility to display a Lustre cfs_hash table
+Copyright (c) 2019 Cray Inc. All Rights Reserved.
+"""
+
+from pykdump.API import *
+#from struct import *
+import argparse
+import os
+
+import lustrelib as ll
+from crashlib.input import toint
+
+description_short = "Displays the specified Lustre hash table "
+
+DEPTH = 3
+RULER = "........................................"
+
+hash_objects = {
+    'ldlm_res_hop_object': ['struct ldlm_resource', 'lr_hash'],
+    'jobid_object':        ['struct jobid_to_pid_map', 'jp_hash'],
+    'lu_obj_hop_object':   ['struct lu_object_header', 'loh_hash'],
+    'uuid_export_object':  ['struct obd_export', 'export_uuid_hash'],
+    'nid_export_object':   ['struct obd_export', 'exp_nid_hash'],
+    'nidstats_object':     ['struct nid_stat', 'nid_hash'],
+    'gen_export_object':   ['struct obd_export', 'exp_gen_hash'],
+    'oqi_object':          ['struct osc_quota_info', 'oqi_hash'],
+    'conn_object':         ['struct ptlrpc_connection', 'c_hash']}
+
+def get_hash_object(hs, hnode):
+    s = addr2sym(hs.hs_ops.hs_object)
+    if s not in hash_objects:
+        return ''
+    obj = hash_objects[s]
+    obj_addr = Addr(hnode) -  member_offset(obj[0], obj[1])
+    return "%s %x" % (obj[0], obj_addr)
+
+def dump_hnodes(hs, hlist, hnode, depth=0, ruler=RULER):
+    while(hnode != hlist & hnode):
+        s = get_hash_object(hs, hnode)
+        print "%*.*shlist_node 0x%x  %s" % (depth, depth, ruler, Addr(hnode), s)
+        hnode = hnode.next
+
+def dump_hlist(hs, hlist, depth=0, ruler=RULER):
+    if hlist.first:
+        hnode = hlist.first
+        print "%*.*shlist_head 0x%x" % (depth, depth, ruler, Addr(hlist))
+        dump_hnodes(hs, hlist, hnode, depth+DEPTH, ruler)
+
+def dump_hash_bucket(hs, bd_bkt, depth=0, ruler=RULER):
+    print "%*.*scfs_hash_bucket 0x%x" % (depth, depth, ruler, Addr(bd_bkt))
+    for bd_offset in range(ll.CFS_HASH_BKT_NHLIST(hs)):
+        hlist = ll.cfs_hash_hhead(hs, bd_bkt, bd_offset)
+        if hlist:
+            dump_hlist(hs, hlist, depth+DEPTH, ruler)
+
+def dump_hash_table(hs):
+    print "cfs_hash@0x%x" % Addr(hs)
+
+    for bd_bkt in ll.cfs_hash_get_buckets(hs):
+        dump_hash_bucket(hs, bd_bkt, DEPTH, RULER)
+
+if __name__ == "__main__":
+    description = "Displays the specified Lustre hash table "
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument("htable", default=False, type=toint,
+        help="address of a cfs_hash struct")
+    args = parser.parse_args()
+
+    hs = readSU('struct cfs_hash', args.htable)
+    dump_hash_table(hs)
diff --git a/contrib/debug_tools/epython_scripts/crashlib/addrlib.py b/contrib/debug_tools/epython_scripts/crashlib/addrlib.py
new file mode 100644 (file)
index 0000000..d06a769
--- /dev/null
@@ -0,0 +1,22 @@
+
+"""
+Set of routines for manipulating addresses.
+Copyright 2014 Cray Inc.  All Rights Reserved
+"""
+
+import crashlib.cid
+import crashlib.cid.machdep_table
+
+# --------------------------------------------------------------------------
+
+def ptov(physaddr):
+    """Convert a physical address to a kernel virtual address."""
+    return int(physaddr) + crashlib.cid.mdtbl.kvbase
+
+def phys2pfn(physaddr):
+    """Convert a physical address to a page offset."""
+    return physaddr >> crashlib.cid.mdtbl.pageshift
+
+def pfn2phys(pfn):
+    """Convert a page offset into a physical address."""
+    return pfn << crashlib.cid.mdtbl.pageshift
diff --git a/contrib/debug_tools/epython_scripts/crashlib/cid/__init__.py b/contrib/debug_tools/epython_scripts/crashlib/cid/__init__.py
new file mode 100644 (file)
index 0000000..e199455
--- /dev/null
@@ -0,0 +1,19 @@
+
+"""
+Provide access to internal crash data.
+Copyright 2014 Cray Inc.  All Rights Reserved
+
+Much of the data this package provides is available by reading the dump file,
+but some is information that crash 'knows' about the kernel based on the
+kernel version.
+
+The data is generally extracted by executing various crash commands, parsing
+the output and storing it within a Python object.
+"""
+
+
+class ParseError:
+    """Exception indicating an error while parsing crash information."""
+
+    def __init__(self, msg=None):
+        self.message = msg
diff --git a/contrib/debug_tools/epython_scripts/crashlib/cid/kernel_table.py b/contrib/debug_tools/epython_scripts/crashlib/cid/kernel_table.py
new file mode 100644 (file)
index 0000000..0dc65e0
--- /dev/null
@@ -0,0 +1,80 @@
+
+"""
+Provide access to kernel_table data.
+Copyright 2014 Cray Inc.  All Rights Reserved
+"""
+
+from pykdump.API import *
+
+import crashlib.cid
+
+class KernelInfo:
+    """Provide access to the crash kernel_table.
+
+    The data is collected by parsing the output of the 'help -k' command.
+    """
+
+    def __init__(self):
+        """Extract kernel data from crash.
+
+        Initialize the table of kernel information by parsing the output
+        of the 'help -k' command.  This only extracts selected data.
+
+        Each item extracted is made available as an instance attribute.
+        """
+
+        # crash 'help -k' doesn't use prefixes on numbers, so we must
+        # know what number base is in use for each numeric field.
+        decFields = ('cpus', 'NR_CPUS', 'kernel_NR_CPUS')
+
+        hexFields = ('flags', 'stext', 'etext', 'stext_init', 'etext_init',
+            'init_begin', 'init_end', 'end', 'module_list', 'kernel_module')
+
+        expected_key_count = len(decFields) + len(hexFields)
+
+        for line in exec_crash_command('help -k').splitlines():
+            # crash> help -k
+            #          flags: b02600
+            #   (PER_CPU_OFF|SMP|KMOD_V2|KALLSYMS_V2|NO_DWARF_UNWIND|DWARF_UNWIND_MEMORY|DWARF_UNWIND_MODULES)
+            #          stext: ffffffff810001f0
+            #          etext: ffffffff813915b5
+            #     stext_init: ffffffff8170b000
+            #     etext_init: ffffffff81740b65
+            #     init_begin: ffffffff816f9000
+            #       init_end: ffffffff81796000
+            #            end: ffffffff818cf000
+            #           cpus: 48
+            #  cpus_override: (null)
+            #        NR_CPUS: 4096 (compiled-in to this version of crash)
+            # kernel_NR_CPUS: 48
+            # ikconfig_flags: 1 (IKCONFIG_AVAIL)
+            #  ikconfig_ents: 0
+            #     display_bh: 0
+            #    highest_irq: (unused/undetermined)
+            #    module_list: ffffffffa05c96e0
+            #  kernel_module: ffffffff81684630
+            # mods_installed: 40
+            #  ...
+            #
+            # Only use the first value after the field name and
+            # only for selected fields.
+            parts = line.split()
+            if len(parts) < 2: continue
+            key = parts[0].rstrip(':')
+            if key in decFields:
+                self.__dict__[key] = int(parts[1],10)
+            elif key in hexFields:
+                self.__dict__[key] = int(parts[1],16)
+
+        # If some versions of crash or the kernel don't have all the
+        # fields, this check code may need to be removed or modified.
+        if len(self.__dict__.keys()) != expected_key_count:
+            raise crashlib.cid.ParseError(
+                'Expected {:d}, but parsed {:d} entries.'.format(
+                    expected_key_count, len(self.__dict__.keys())))
+
+# --------------------------------------------------------------------------
+
+# Create a shared instances.
+
+crashlib.cid.krntbl = KernelInfo()
diff --git a/contrib/debug_tools/epython_scripts/crashlib/cid/machdep_table.py b/contrib/debug_tools/epython_scripts/crashlib/cid/machdep_table.py
new file mode 100644 (file)
index 0000000..f904a4a
--- /dev/null
@@ -0,0 +1,98 @@
+
+"""
+Provide access to machine-dependent data.
+Copyright 2014, 2017 Cray Inc.  All Rights Reserved
+"""
+
+from pykdump.API import *
+
+import crashlib.cid
+
+class MachDepInfo:
+    """Provide access to the crash machdep_table.
+
+    The data is collected by parsing the output of the 'help -m' command.
+    """
+
+    def __init__(self):
+        """Extract machine-dependent data from crash.
+
+        Initialize the table of machine dependent information by parsing
+        the output of the 'help -m' command.  This only extracts
+        selected data.
+
+        Each item extracted is made available as an instance attribute.
+        """
+
+        # crash 'help -m' doesn't use prefixes on numbers, so we must
+        # know what number base is in use for each numeric field.
+        fieldBase = {
+            'bits' : 10, 'flags' : 16, 'hz' : 10, 'identity_map_base' : 16,
+            'kvbase' : 16, 'last_pgd_read' : 16, 'last_pmd_read' : 16,
+            'last_ptbl_read' : 16, 'machspec' : 16, 'max_physmem_bits' : 10,
+            'memsize' : 10, 'mhz' : 10, 'modules_vaddr' : 16, 'nr_irqs' : 10,
+            'page_offset' : 16, 'pagemask' : 16, 'pageshift' : 10,
+            'pagesize' : 10, 'pgd' : 16, 'pmd' : 16, 'ptbl' : 16,
+            'ptrs_per_pgd' : 10, 'section_size_bits' : 10,
+            'sections_per_root' : 10, 'stacksize' : 10, 'userspace_top' : 16,
+            'vmalloc_end' : 16, 'vmalloc_start_addr' : 16, 'vmemmap_end' : 16,
+            'vmemmap_vaddr' : 16
+        }
+
+        if sys_info.machine in ("x86_64", "k1om", "x86"):
+            # additional x86_64 fields:
+            # Attic: 'last_pml4_read': 16, 'last_umpl_read': 16,
+            #        'umpl': 16, 'pml4': 16
+            x86_64_fields = {
+                'irq_eframe_link' : 10, 'irqstack' : 16,
+                'page_protnone' : 16, 'phys_base' : 16,
+                'thread_return' : 16, 'vsyscall_page' : 16,
+            }
+            fieldBase.update(x86_64_fields)
+        elif sys_info.machine == 'aarch64':
+            # no additional aarch64 field yet
+            pass
+        else:
+            raise crashlib.cid.ParseError(
+                    'Invalid machine type {0}.'.format(sys_info.machine))
+
+        expected_key_count = len(fieldBase)
+
+        for line in exec_crash_command('help -m').splitlines():
+            # crash> help -m
+            #               flags: 30400209 (KSYMS_START|MACHDEP_BT_TEXT|VM_2_6_11|VMEMMAP|FRAMESIZE_DEBUG|FRAMEPOINTER)
+            #              kvbase: ffff880000000000
+            #   identity_map_base: ffff880000000000
+            #            pagesize: 4096
+            #           pageshift: 12
+            #            pagemask: fffffffffffff000
+            #          pageoffset: fff
+            #           stacksize: 8192
+            #                  hz: 250
+            #                 mhz: 2599
+            #             memsize: 68694994944 (0xffe8a7000)
+            #  ...
+            #
+            # Only use the first value after the field name and
+            # only for selected fields.
+            parts = line.split()
+            if len(parts) < 2: continue
+
+            key = parts[0].rstrip(':')
+            base = fieldBase.get(key, None)
+
+            if base is not None:
+                self.__dict__[key] = int(parts[1], base)
+
+        # If some versions of crash or the kernel don't have all the
+        # fields, this check code may need to be removed or modified.
+        if len(self.__dict__.keys()) != expected_key_count:
+            raise crashlib.cid.ParseError(
+                'Expected {:d}, but parsed {:d} entries.'.format(
+                    expected_key_count, len(self.__dict__.keys())))
+
+# --------------------------------------------------------------------------
+
+# Create a shared instances.
+
+crashlib.cid.mdtbl = MachDepInfo()
diff --git a/contrib/debug_tools/epython_scripts/crashlib/cid/page_flags.py b/contrib/debug_tools/epython_scripts/crashlib/cid/page_flags.py
new file mode 100644 (file)
index 0000000..20cb0b7
--- /dev/null
@@ -0,0 +1,61 @@
+
+"""
+Provide access to the page flags known by crash.
+Copyright 2014 Cray Inc.  All Rights Reserved
+
+The data is gathered from the 'kmem -g' command.
+"""
+
+from pykdump.API import *
+
+import crashlib.cid
+
+
+class PageFlag:
+    # Note: This class should probably be abstracted somewhere as a
+    # bit or bitmask class, but since we don't have that yet, just
+    # create a new class here.
+    """Represent a flag as a bit mask and a shift value."""
+    def __init__(self, name, shift_val):
+        self.name  = name
+        self.shift = int(shift_val)
+        self.mask  = 1 << self.shift
+
+    def __call__(self):
+        return self.mask
+
+
+class MachPageFlags:
+    """Extract the machine-specific page flags from crash.
+
+    When instantiated, this class produces an object with data members
+    for each kernel page flag that crash knows about, based on the kernel
+    version.  Each page flag is an instance of class PageFlag.  An example
+    of usage would be:
+
+        page = readSU('struct page', page_addr)
+        kpf = MachPageFlags()
+        if page.flags & kpf.PG_slab.mask:
+            ...
+    """
+
+    def __init__(self):
+        """Extract the page flags from the crash 'kmem -g' command."""
+        for line in exec_crash_command('kmem -g').splitlines():
+            # crash> kmem -g
+            # PAGE-FLAG       BIT  VALUE
+            # PG_locked         0  0000001
+            # PG_waiters        1  0000002
+            # ...
+            fields = line.split()
+            if len(fields) < 3 or fields[0][0:3] != 'PG_': continue
+
+            name  = fields[0]
+            shift = int(fields[1])
+            self.__dict__[name] = PageFlag(name, shift)
+
+# --------------------------------------------------------------------------
+
+# Create a shared instances of the above classes.
+
+crashlib.cid.pgflags = MachPageFlags()
diff --git a/contrib/debug_tools/epython_scripts/crashlib/cid/phys_mem_map.py b/contrib/debug_tools/epython_scripts/crashlib/cid/phys_mem_map.py
new file mode 100644 (file)
index 0000000..9eff1dc
--- /dev/null
@@ -0,0 +1,111 @@
+
+"""
+Provide access to physical memory information.
+Copyright 2014, 2017 Cray Inc.  All Rights Reserved
+"""
+
+from pykdump.API import *
+
+import crashlib.cid
+
+class Resource:
+    """Generates /proc/iomem by traversing iomem Resource tree."""
+    def __init__(self, resource):
+        self.resource = resource
+        self.lvl = 0
+
+    def resource_start(self):
+        return self.resource.start
+    start = property(resource_start)
+
+    def resource_end(self):
+        return self.resource.end
+    end = property(resource_end)
+
+    def resource_name(self):
+        return self.resource.name
+    name = property(resource_name)
+
+    def addr(self):
+        return Addr(self.resource)
+
+    def __str__(self):
+        return '{0:08x}-{1:08x} : {2}'.format(self.start, self.end, self.name)
+
+    def get_child(self):
+        if self.resource.child:
+            return Resource(self.resource.child)
+        else:
+            return None
+
+    def get_sibling(self):
+        if self.resource.sibling:
+            return Resource(self.resource.sibling)
+        else:
+            return None
+
+    def _walk(self, lvl=0):
+        self.lvl = lvl
+        yield self
+        child = self.get_child()
+        if child is not None:
+            for res in child._walk(lvl+1):
+                yield res
+        next = self.get_sibling()
+        if next is not None:
+            for res in next._walk(lvl):
+                yield res
+
+    def iomem(self):
+        """ returns /proc/iomem tree generator """
+        return self.get_child()._walk()
+
+    def is_System_RAM(self):
+        return self.name == "System RAM"
+
+def get_iomem():
+    """ generator wrapper function for iomem """
+    iomem_resource = Resource(readSymbol('iomem_resource'))
+    return iomem_resource.iomem()
+
+class MemMapEntry:
+    """Define a single entry for a memory map.
+
+    A MemMapEntry consists of three attributes:
+
+        start - first address within the range
+        end   - first address past the end of the range
+        name  - name of address space type
+    """
+    start = None
+    end   = None
+    name  = None
+
+    def __init__(self, start_addr, end_addr, name_str):
+        self.start = int(start_addr)
+        self.end   = int(end_addr)
+        self.name  = name_str
+
+
+def GetPhysMemMap():
+    """Define a physical memory map.
+
+    Returns the physical memory map as a list by extracting system ram
+    ranges from iomem Resource class above.
+    The list defines the physical address map as provided the iomem
+    and will be a list of objects of type MemMapEntry.
+    """
+    memmap = []
+
+    for ent in get_iomem():
+        # get System RAM from iomem resource
+        if ent.is_System_RAM():
+            memmap.append(MemMapEntry(ent.start, ent.end+1, ent.name))
+
+    return memmap
+
+# --------------------------------------------------------------------------
+
+# Create shared objects.
+
+crashlib.cid.physmap = GetPhysMemMap()
diff --git a/contrib/debug_tools/epython_scripts/crashlib/cid/vm_table.py b/contrib/debug_tools/epython_scripts/crashlib/cid/vm_table.py
new file mode 100644 (file)
index 0000000..7760723
--- /dev/null
@@ -0,0 +1,73 @@
+
+"""
+Provide access to the crash's vm table.
+Copyright 2014 Cray Inc.  All Rights Reserved
+"""
+
+from pykdump.API import *
+
+import crashlib.cid
+
+class VmInfo:
+    """Make data from the crash vmtbl easily available."""
+
+    def __init__(self):
+        """Extract VM table data from crash.
+
+        Initialize the table of VM information by parsing
+        the output of the 'help -v' command.  This only extracts
+        selected data.
+
+        Each item extracted is made available as an instance attribute.
+        """
+
+        # crash 'help -v' doesn't use prefixes on numbers, so we must
+        # know what number base is in use for each numeric field.
+        decFields = ('total_pages', 'max_mapnr', 'totalram_pages',
+            'totalhigh_pages', 'num_physpages',
+            'page_hash_table_len', 'kmem_max_c_num',
+            'kmem_max_limit', 'kmem_max_cpus', 'kmem_cache_count',
+            'kmem_cache_namelen', 'kmem_cache_len_nodes', 'PG_slab',
+            'paddr_prlen', 'numnodes', 'nr_zones', 'nr_free_areas',
+            'cpu_slab_type', 'nr_swapfiles', 'ZONE_HIGHMEM',
+            'node_online_map_len', 'nr_vm_stat_items',
+            'nr_vm_event_items')
+
+        hexFields = ('flags', 'high_memory', 'vmalloc_start',
+            'mem_map', 'page_hash_table', 'PG_reserved',
+            'PG_head_tail_mask', 'slab_data', 'last_swap_read',
+            'swap_info_struct', 'mem_sec', 'mem_section')
+
+        expected_key_count = len(decFields) + len(hexFields)
+
+        for line in exec_crash_command('help -v').splitlines():
+            #               flags: 10dc52
+            #  (NODES_ONLINE|ZONES|PERCPU_KMALLOC_V2|KMEM_CACHE_INIT|SPARSEMEM|SPARSEMEM_EX|PERCPU_KMALLOC_V2_NODES|VM_STAT|VM_INIT)
+            #      kernel_pgd[NR_CPUS]: ffffffff8163f000 ...
+            #         high_memory: ffff880880000000
+            #       vmalloc_start: ffffc90000000000
+            #             mem_map: 0
+            # ...
+            #
+            # Only use the first value after the field name and
+            # only for selected fields.
+            parts = line.split()
+            if len(parts) < 2: continue
+            key = parts[0].rstrip(':')
+            if key in decFields:
+                self.__dict__[key] = int(parts[1],10)
+            elif key in hexFields:
+                self.__dict__[key] = int(parts[1],16)
+
+        # If some versions of crash or the kernel don't have all the
+        # fields, this check code may need to be removed or modified.
+        if len(self.__dict__.keys()) != expected_key_count:
+            raise crashlib.ida.ParseError(
+                'Expected {:d}, but parsed {:d} entries.'.format(
+                    expected_key_count, len(self.__dict__.keys())))
+
+# --------------------------------------------------------------------------
+
+# Declare a shared instance.
+
+crashlib.cid.vmtbl = VmInfo()
diff --git a/contrib/debug_tools/epython_scripts/crashlib/input/__init__.py b/contrib/debug_tools/epython_scripts/crashlib/input/__init__.py
new file mode 100644 (file)
index 0000000..3e4f0d7
--- /dev/null
@@ -0,0 +1,241 @@
+
+"""
+Input handling routines
+Copyright 2014 Cray Inc.  All Rights Reserved
+"""
+
+
+import itertools
+
+
+# Define some common integer multiplier suffixes
+
+# Powers of two
+binary_suffixes={
+    'k': 2**10, 'K': 2**10,
+    'm': 2**20, 'M': 2**20,
+    'g': 2**30, 'G': 2**30,
+    't': 2**40, 'T': 2**40,
+    'p': 2**50, 'P': 2**50
+}
+memory_suffixes = binary_suffixes
+
+# Powers of ten
+decimal_suffixes={
+    'k': 10**3,  'K': 10**3,
+    'm': 10**6,  'M': 10**6,
+    'g': 10**9,  'G': 10**9,
+    't': 10**12, 'T': 10**12,
+    'p': 10**15, 'P': 10**15
+}
+disk_suffixes = decimal_suffixes
+
+default_bases = [0, 16]
+
+def toint(string, base=default_bases, suffixes=binary_suffixes):
+    """Convert to integer with flexible base and multiplier support.
+
+    Provide a way to handle input that may be in any of several number
+    bases but may not use the appropriate prefix, e.g. 'deadbeef' rather
+    than the more pedantic '0xdeadbeef'. Also provide support for
+    multiplier suffixes, such as 'K' for kilo.
+
+    Arguments:
+
+        string      - string to convert to integer
+        base        - a single number, as used in int() or an iterable
+                      of base values to try
+        suffixes    - dictionary keyed by the string suffix with a value
+                      to be used as a multiplier
+
+    The default base of [0, 16] allows the automatic recognition of numbers
+    with the standard prefixes and if that fails, tries a base 16 conversion.
+    """
+    try:
+        bases = list(base)
+    except TypeError:
+        # Object isn't iterable, so create one that is
+        bases = [base]
+
+    for b in bases:
+        if not (b == 0 or 2 <= b <= 36):
+            raise ValueError(
+                "toint() base {!s:s} must be >= 2 and <= 36".format(b))
+
+    multiplier = 1
+    try:
+        # Second iteration is after removing any suffix.  This way, if
+        # a suffix happens to contain valid numeric characters, we'll
+        # try the numeric interpretation before we try their multiplier
+        # meaning, e.g. 'g' is a valid numeric value in base 17).
+        for i in xrange(2):
+            for b in bases:
+                try:
+                    return int(string, b) * multiplier
+                except ValueError:
+                    pass
+
+            if i != 0:
+                raise ValueError
+
+            # Find a suffix that matches the end of the string and use it
+            for k, v in suffixes.iteritems():
+                if string.endswith(k):
+                    multiplier = v
+                    string = string[0:-len(k)]
+                    break
+            else:
+                raise ValueError
+
+    except ValueError:
+        suffix_list = suffixes.keys()
+        suffix_list.sort()
+        raise ValueError(
+            "invalid literal '{:s}' for toint() with base {!s:s} "
+            "and suffixes {!s:s}".format(string, list(bases), suffix_list))
+
+
+def hex2int(string):
+    """Wrapper for toint() which prefers base 16 input
+
+    This function is useful in situations where a callable must be passed,
+    such as with argparse.add_argument(type=hex2int, ...
+    """
+    return toint(string, base=[16, 0])
+
+
+def to_rangelist(args, default=xrange(0), base=[0,16],
+                  suffixes=binary_suffixes):
+    """Convert a bunch of range list strings into a list of ranges
+
+    The arguments are:
+
+        args     - iterable containing ranglist strings
+        default  - iterator to return if args is empty
+        base     - number base to use for integer conversion
+        suffixes - integer multiplier suffixes
+
+    Each arg is taken to be a range list, where a range list may be:
+
+        rangelist ::= range[,range]...
+        range     ::= <first>-<last> | <first>#<count> | <value>
+
+    where the range first-last is inclusive.
+    """
+    if len(args) == 0:
+        return default
+
+    ranges = []
+    for range_list_str in args:
+        range_strs = range_list_str.split(',')
+        for range_str in range_strs:
+            if "-" in range_str:
+                fields = range_str.split('-', 1)
+                start = toint(fields[0], base, suffixes=suffixes)
+                end = toint(fields[1], base, suffixes=suffixes) + 1
+                ranges.append(xrange(start, end))
+            elif "#" in range_str:
+                fields = range_str.split('#', 1)
+                start = toint(fields[0], base, suffixes=suffixes)
+                end = start + toint(fields[1], base, suffixes=suffixes)
+                ranges.append(xrange(start, end))
+            else:
+                start = toint(range_str, base, suffixes=suffixes)
+                end = start + 1
+                ranges.append(xrange(start, end))
+
+    return ranges
+
+
+def iter_rangestr(*args, **kwargs):
+    """Convert a bunch of range list strings into a single iterator
+
+    The arguments are the same as for to_rangelist().
+    """
+    return itertools.chain(*to_rangelist(*args, **kwargs))
+
+
+if __name__ == '__main__':
+    import unittest
+
+    # toint()
+    class Test_toint(unittest.TestCase):
+        def test_base_zero(self):
+            self.assertEqual(toint('0b10', 0), 2)
+            self.assertEqual(toint('0o10', 0), 8)
+            self.assertEqual(toint('10', 0), 10)
+            self.assertEqual(toint('0x10', 0), 16)
+
+        def test_base_out_of_range(self):
+            self.assertRaises(ValueError, toint, '10', -1)
+            self.assertRaises(ValueError, toint, '10',  1)
+            self.assertRaises(ValueError, toint, '10', 37)
+
+        def test_base_search(self):
+            bases = [0, 16]
+            self.assertEqual(toint('10', bases), 10)
+            self.assertEqual(toint('f', bases), 15)
+
+            self.assertEqual(toint('0b10', bases), 2)
+            self.assertEqual(toint('0o10', bases), 8)
+            self.assertEqual(toint('10', bases), 10)
+            self.assertEqual(toint('0x10', bases), 16)
+
+        def test_suffixes(self):
+            for k, v in binary_suffixes.iteritems():
+                self.assertEqual(toint('0b10'+k), 0b10*v)
+                self.assertEqual(toint('0o10'+k), 0o10*v)
+                self.assertEqual(toint('10'+k), 10*v)
+                self.assertEqual(toint('0x10'+k), 0x10*v)
+
+        def test_suffix_number_overlap(self):
+            # Verify a valid numeric isn't used as a suffix
+            self.assertEqual(toint('1g', 17), 33)
+            self.assertEqual(toint('1gk', 17), 33*binary_suffixes['k'])
+
+
+    # hex2int()
+    class Test_hex2int(unittest.TestCase):
+        """Verify the hex2int() function"""
+        def test_explicit_base(self):
+            """Verify that explicit base syntax is honored"""
+            self.assertEqual(hex2int('0x10'), 16)
+            self.assertEqual(hex2int('0o10'), 8)
+
+        def test_default_base(self):
+            """Verify that base 16 is preferred"""
+            self.assertEqual(hex2int('10'), 16)
+            self.assertEqual(hex2int('0b10'), 2832)
+
+
+    # iter_rangelist()
+    class Test_iter_rangelist(unittest.TestCase):
+        """Test both iter_rangelist and the underlying to_rangelist."""
+        def test_good_single_ranges(self):
+            self.assertEqual(list(iter_rangestr([])), [])
+            self.assertEqual(list(iter_rangestr(['1-2'])), list(xrange(1,3)))
+            self.assertEqual(list(iter_rangestr(['1#2'])), list(xrange(1,3)))
+            self.assertEqual(list(iter_rangestr(['1'])), list(xrange(1,2)))
+
+        def test_good_multiple_ranges(self):
+            test_rangestrs = [
+                # Test params,        Expected result
+                (['1', '3-5', '1#2'], [1, 3, 4, 5, 1, 2]),
+                ]
+
+            for ranges, expected in test_rangestrs:
+                # Test the ranges as separate list elements
+                self.assertEqual(list(iter_rangestr(ranges)), expected)
+
+                # Test the ranges joined by commas
+                joined = [','.join(ranges)]
+                self.assertEqual(list(iter_rangestr(joined)), expected)
+
+        def test_bad_single_ranges(self):
+            self.assertRaises(ValueError, iter_rangestr, ['1#2#3'])
+            self.assertRaises(ValueError, iter_rangestr, ['1#2-3'])
+            self.assertRaises(ValueError, iter_rangestr, ['1-2#3'])
+            self.assertRaises(ValueError, iter_rangestr, ['1-2-3'])
+
+    # Run all unit tests
+    unittest.main()
diff --git a/contrib/debug_tools/epython_scripts/crashlib/input/argparse_ext.py b/contrib/debug_tools/epython_scripts/crashlib/input/argparse_ext.py
new file mode 100644 (file)
index 0000000..a052f13
--- /dev/null
@@ -0,0 +1,231 @@
+
+"""
+Module which provides extensions for the standard Python argparse module.
+Copyright 2015 Cray Inc.  All Rights Reserved
+"""
+
+import argparse
+import copy
+
+from argparse import _ensure_value, Action, ArgumentTypeError
+
+
+class ExtendAction(Action):
+    """Action to extend a list of argument values
+
+    This action is similar to the standard AppendAction, but uses the
+    extend() attribute of lists rather than the append() attribute.  As
+    such, it also has an additional requirement:
+
+    -   This action must receive an iterable 'values' argument from the
+        parser.  There are two ways to make this happen:
+
+        1.  Use type= to produce an iterable, e.g. type=str or type=list
+        2.  Use nargs= to cause the parser to produe a list, which it
+            does for any nargs= setting that is not None (default) and
+            is not '?'
+    """
+
+    def __call__(self, parser, namespace, values, option_string):
+        items = copy.copy(_ensure_value(namespace, self.dest, []))
+
+        try:
+            items.extend(values)
+        except TypeError:
+            # Assume the TypeError is because values is not iterable
+            raise ArgumentTypeError(
+                "argument type '{:s}' is not iterable".format(
+                    type(values).__name__))
+
+        setattr(namespace, self.dest, items)
+
+
+def str2list(string, sep=',', totype=None, choices=None):
+    """Split a string into a list with conversion and validation
+
+    Split a string into a list, optionally convert each element to a
+    given type and optionally validate that all resulting values are
+    in a collection of valid values.
+    """
+
+    plural = {False: '', True: 's'}
+
+    # Values should be string or an iterable container of strings.
+    # Split values on the separator into a list
+    try:
+        lst = string.split(sep)
+    except AttributeError:
+        raise ArgumentTypeError(
+            "argument type '{:s}' does not have split() attribute".format(
+                type(string).__name__))
+
+    # Perform type conversion
+    if totype is not None:
+        errs = []
+        for i, v in enumerate(lst):
+            try:
+                lst[i] = totype(v)
+            except (TypeError, ValueError):
+                errs.append(v)
+        if errs:
+            msg = "invalid {:s} value{:s}: {!r:s}".format(
+                totype.__name__, plural[len(errs) > 1], errs)
+            raise ArgumentTypeError(msg)
+
+    # Verify each separate value
+    if choices is not None:
+        errs = filter(lambda x:x not in choices, lst)
+        if errs:
+            msg = "invalid choice{:s}: {!r:s} (choose from {!s:s})".format(
+                plural[len(errs) > 1], errs, choices)
+            raise ArgumentTypeError(msg)
+
+    return lst
+
+
+def tolist(sep=',', totype=None, choices=None):
+    """Returns a parameterized callable for argument parser type conversion
+
+    This function returns a function which accepts a single argument at
+    call time and which uses the supplied arguments to modify its conversion
+    behavior.
+    """
+    return lambda x:str2list(x, sep=sep, totype=totype, choices=choices)
+
+
+if __name__ == '__main__':
+    import unittest
+
+    class Test_Action_Base(unittest.TestCase):
+        """Create a base class for testing argparse Action classes"""
+
+        def setUp(self):
+            """Create the ExtendAction object and args Namespace"""
+            self.action = ExtendAction([], dest='dest')
+            self.args   = argparse.Namespace()
+
+        def actionRun(self, values):
+            """Run the Action instance using values"""
+            self.action(None, self.args, values, '')
+
+        def actionEqual(self, values, expected):
+            """Run the Action and check the expected results"""
+            self.actionRun(values)
+            self.assertEqual(self.args.dest, expected)
+
+        def actionArgTypeErr(self, values):
+            """Run the Action and verify it raises ArgumentTypeError"""
+            self.assertRaises(
+                ArgumentTypeError, self.action, None, self.args, values, '')
+
+
+    class Test_ExtendAction(Test_Action_Base):
+        """Test the ExtendAction class"""
+
+        def test_non_iterable(self):
+            """Test ExtendAction with a non-iterable type
+
+            This is similar to:
+                parser.add_argument('-z', nargs=None, type=int ...)
+
+                parser.parse_args(['-z', '0'])
+            """
+            self.actionArgTypeErr(0)
+
+        def test_single_value(self):
+            """Test ExtendAction with a single value
+
+            This is similar to:
+                parser.add_argument('-z', nargs=None ...)
+
+                parser.parse_args(['-z', 'a'])
+            """
+            self.actionEqual('a', ['a'])
+
+        def test_single_string(self):
+            """Test ExtendAction with a single value
+
+            This is similar to:
+                parser.add_argument('-z', nargs=None ...)
+
+                parser.parse_args(['-z', 'abc'])
+            """
+            self.actionEqual('abc', ['a', 'b', 'c'])
+
+        def test_single_value_multiple_calls(self):
+            """Test ExtendAction with a single value and multiple calls
+
+            This is similar to:
+                parser.add_argument('-z', nargs=None, type=int ...)
+
+                parser.parse_args(['-z', 'a', '-z', 'b'])
+            """
+            self.actionEqual('a', ['a'])
+            self.actionEqual('b', ['a', 'b'])
+
+        def test_value_list(self):
+            """Test ExtendAction with a value list
+
+            This is similar to:
+                parser.add_argument('-z', nargs=1 ...)
+
+                parser.parse_args(['-z', 'abc'])
+            """
+            self.actionEqual(['abc'], ['abc'])
+
+        def test_value_list_multiple_calls(self):
+            """Test ExtendAction with a single value and multiple calls
+
+            This is similar to:
+                parser.add_argument('-z', nargs=1 ...)
+
+                parser.parse_args(['-z', 'abc', '-z', 'def'])
+            """
+            self.actionRun(['abc'])
+            self.actionEqual(['def'], ['abc', 'def'])
+
+        def test_value_list_multiple_values(self):
+            """Test ExtendAction with a value list of length > 1
+
+            This is similar to:
+                parser.add_argument('-z', nargs=2 ...)
+                -or-
+                parser.add_argument('-z', nargs='+' ...)
+                -or-
+                parser.add_argument('-z', nargs='*' ...)
+
+                parser.parse_args(['-z', 'abc', 'def'])
+            """
+            self.actionEqual(['abc', 'def'], ['abc', 'def'])
+
+
+    class Test_tolist_str2list(unittest.TestCase):
+        """Test the str2list and tolist conversion functions"""
+
+        def test_sep(self):
+            """Verify default and non-default separators work"""
+            f = tolist()
+            self.assertEqual(f('a,b,c'), ['a','b','c'])
+            f = tolist(sep=':')
+            self.assertEqual(f('a:b:c'), ['a','b','c'])
+
+        def test_non_iterable(self):
+            """Verify a non-iterable string is caught"""
+            f = tolist()
+            self.assertRaises(ArgumentTypeError, f, 0)
+
+        def test_type_conversion(self):
+            """Verify type conversion works properly"""
+            f = tolist(totype=int)
+            self.assertEqual(f('0,1,2'), [0, 1, 2])
+            self.assertRaises(
+                ArgumentTypeError, f, '1,z,2,q')
+
+        def test_choices(self):
+            """Verify the choices validation works properly"""
+            f = tolist(totype=int, choices=[0, 1, 2, 3])
+            self.assertEqual(f('0,1,2'), [0, 1, 2])
+            self.assertRaises(ArgumentTypeError, f, '0,5,2')
+
+
+    unittest.main()
diff --git a/contrib/debug_tools/epython_scripts/crashlib/input/enumtools.py b/contrib/debug_tools/epython_scripts/crashlib/input/enumtools.py
new file mode 100644 (file)
index 0000000..f9373c9
--- /dev/null
@@ -0,0 +1,211 @@
+
+"""
+Routines for handling enums (or other symbolic names)
+Copyright 2015 Cray Inc.  All Rights Reserved
+"""
+
+import uflookup
+
+class NameSet:
+    """Two-way translation between int values (enums, #defines) and strings.
+    Also provides access to value by e.g.:
+    vms = NameSet()  # vmstat nameset
+    vms.AddName("NR_FILE_MAPPED", 8)
+    vms.NR_FILE_MAPPED == 8
+
+    The advantages over just using a dict include:
+
+    * Define the values once, and get value->string, string->value,
+      and python identifier ns.<name> as above.
+    * The auto-incrementing _next_value
+      """
+
+    def __init__(self, mapping=None):
+        """Create and initialize a NameSet object
+
+        Arguments:
+            mapping:    if specified, provides a mapping object, e.g. dict,
+                        that supplies the initial key(name)/value pairs.
+        """
+        self.value_to_name = {}
+        self.name_to_value = {}
+
+        self._next_value = 0
+#        self._sorted_values = []
+#        self._sorted_names = []
+
+        if mapping is not None:
+            self.addMap(mapping)
+
+    def addName(self, name, value=None):
+        """Add a single name, by default using the next value.
+
+        If two names end up with the same value, the value will map
+        only to the first of them.
+        """
+
+        if name in self.name_to_value.keys():
+            raise ValueError("Name {0} already defined (value {1})".format(
+                name, self.name_to_value[name]))
+        try:
+            getattr(self, name)
+        except AttributeError:
+            pass
+        else:
+            raise ValueError("Value {0} already used by NameSet object!".
+                             format(value))
+
+        if value is None:
+            value = self._next_value
+        self._next_value = value + 1
+
+        self.name_to_value[name] = value
+        if value not in self.value_to_name:
+            self.value_to_name[value] = name
+ #       self._sorted_values = []
+ #       self._sorted_names = []
+
+        setattr(self, name, value)
+
+    def addNames(self, *namelist):
+        """Add a list of names, each using the respective next value"""
+        map(self.addName, namelist)
+
+    def addMap(self, mapping):
+        """Add the key/value pairs from a mapping type"""
+        for k, v in mapping.items():
+            self.addName(k, v)
+
+    def UFLookup(self, key, **kwargs):
+        return uflookup.UFLookup(self.name_to_value, key, **kwargs)
+
+#    def somethingUsingSortedArrays:
+#        if not self._sorted_values:
+#            self._sorted_values = sorted(self.value_to_name.keys())
+#            self._sorted_names = sorted(self.name_to_value.keys())
+
+
+
+if __name__ == '__main__':
+    import unittest
+
+    class Test_NameSet(unittest.TestCase):
+        """Test the NameSet class"""
+
+        def VerifyName(self, name, value):
+            """Verify that self.ns has name <-> value"""
+            self.assertEqual(value, self.ns.name_to_value[name])
+            self.assertEqual(value, getattr(self.ns, name))
+            self.assertEqual(name, self.ns.value_to_name[value])
+
+    class Test_Empty(Test_NameSet):
+        """Test an empty NameSet"""
+        def setUp(self):
+            self.ns = NameSet()
+
+        def test_empty_vtn(self):
+            self.assertEqual(0, len(self.ns.value_to_name))
+        def test_empty_ntv(self):
+            self.assertEqual(0, len(self.ns.name_to_value))
+
+
+    class Test_addName(Test_NameSet):
+        """Test addName"""
+        def setUp(self):
+            self.ns = NameSet()
+
+        def test_add_one_name(self):
+            self.ns.addName("FOO")
+
+            self.VerifyName("FOO", 0)
+            self.assertEqual(0, self.ns.FOO)
+
+        def test_add_two_names(self):
+            self.ns.addName("BAR")
+            self.ns.addName("BAZ")
+
+            self.VerifyName("BAR", 0)
+            self.VerifyName("BAZ", 1)
+            self.assertEqual(0, self.ns.BAR)
+            self.assertEqual(1, self.ns.BAZ)
+
+
+        def test_add_namevalue(self):
+            self.ns.addName("FOO", 87)
+            self.VerifyName("FOO", 87)
+            self.assertEqual(87, self.ns.FOO)
+
+        def test_reuse_existing_value(self):
+            self.ns.addName("FOO", 2)
+            self.ns.addName("B0",0)
+            self.ns.addName("B1")
+            self.ns.addName("B2")
+            self.ns.addName("B3")
+
+            self.VerifyName("FOO", 2)
+            self.VerifyName("B0", 0)
+            self.VerifyName("B1", 1)
+            self.assertEqual(2, self.ns.name_to_value["B2"])
+            self.VerifyName("B3", 3)
+
+            self.assertEqual(2, self.ns.FOO)
+            self.assertEqual(0, self.ns.B0)
+            self.assertEqual(1, self.ns.B1)
+            self.assertEqual(3, self.ns.B3)
+
+        def test_addNames(self):
+            self.ns.addNames("FOO", "BAR", "BAZ")
+            self.VerifyName("FOO", 0)
+            self.VerifyName("BAR", 1)
+            self.VerifyName("BAZ", 2)
+
+            self.assertEqual(0, self.ns.FOO)
+            self.assertEqual(1, self.ns.BAR)
+            self.assertEqual(2, self.ns.BAZ)
+
+        def test_addDupName(self):
+            self.ns.addName("FOO", 1)
+            self.assertRaises(ValueError, self.ns.addName, "FOO", 2)
+
+        def test_addDupValue(self):
+            self.ns.addName("FOO")
+            self.ns.addName("BAR", 0)
+
+            self.VerifyName("FOO", 0)
+            self.assertEqual(0, self.ns.name_to_value["BAR"])
+
+        def test_addMoreDupValues(self):
+            self.ns.addName("FOO")
+            self.ns.addName("BAR", 0)
+            self.ns.addName("BAZ", 0)
+
+            self.VerifyName("FOO", 0)
+            self.assertEqual(0, self.ns.name_to_value["BAR"])
+            self.assertEqual(0, self.ns.name_to_value["BAZ"])
+
+
+        def test_addConflicting(self):
+            self.assertRaises(ValueError, self.ns.addName, "addName")
+
+
+    class Test_mapping(Test_NameSet):
+        """Test map handling"""
+        def setUp(self):
+            self.ns = NameSet(mapping={"SLEEPY":1, "GRUMPY": 0})
+
+        def test_constructor(self):
+            self.VerifyName("SLEEPY", 1)
+            self.VerifyName("GRUMPY", 0)
+
+        def test_addMap(self):
+            self.ns.addMap({"DOC": 9, "BASHFUL": 3})
+
+            self.VerifyName("SLEEPY", 1)
+            self.VerifyName("GRUMPY", 0)
+            self.VerifyName("DOC", 9)
+            self.VerifyName("BASHFUL", 3)
+
+
+    # Run all unit tests
+    unittest.main()
+
diff --git a/contrib/debug_tools/epython_scripts/crashlib/input/flagtools.py b/contrib/debug_tools/epython_scripts/crashlib/input/flagtools.py
new file mode 100644 (file)
index 0000000..e2c767b
--- /dev/null
@@ -0,0 +1,355 @@
+
+"""
+Flag handling routines
+Copyright 2015 Cray Inc.  All Rights Reserved
+"""
+
+
+### TBD: The "Simple" in the addSimple* interfaces refers to a flag
+### that's a single bit.  It's meant to distinguish from flags that
+### have multibit fields, such as the node/zone indices stuck in the
+### high end of struct page.flags; or a field that's mostly a pointer
+### but with some flags in the low bits.
+#
+### To add cases like that will mean redoing most of the
+### implementation, but all the current interfaces should be ok, with
+### new interfaces added to let users define the non-simple flags.
+
+import uflookup
+
+
+class FlagSet:
+    """A collection of flags and values, with routines for translating
+
+    For decoding a flag int to a string, encoding a flag string to an
+    int, and providing python identifiers for testing by name, e.g.,
+
+    jafs = FlagSet() # job_attach flagset
+    jafs.addSimpleFlag("disable_affinity_apply")
+    if job_attach.flags & jafs.disable_affinity_apply: ...
+
+    The advantages over just using a dict include:
+    * Define the values once, and get value->string, string->value,
+      and python identifiers ns.<name> and ns.<name>_shift as above.
+    * The auto-incrementing _next_bit
+    """
+    def __init__(self, mapping=None):
+        """Create and initialize a FlagSet object
+
+        Arguments:
+            mapping:    if specified, provides a mapping object, e.g. dict,
+                        that supplies the initial key(name)/value pairs.
+        """
+        # Public dict of flag names to flag values (not the bit number)
+        self.str_to_value = {}
+        # Public dict of flag values to flag names
+        self.value_to_str = {}
+
+        self._next_bit = 0
+
+        # sorted_values is so that translating a value to a string
+        # will report the strings in the same order every time.  That
+        # order is by numerically increasing value.
+        self._sorted_values = []
+        self._sorted_strs = []
+
+        if mapping is not None:
+            self.addMap(mapping)
+
+    def addSimpleFlag(self, s, bit=None):
+        """Add a single-bit flag.
+
+        If bit is not specified, uses the bit one greater than the
+        previously defined bit.  If multiple flags are defined to use
+        the same bit, value_to_str will remember only the first."""
+
+        if s in self.str_to_value.keys():
+            raise ValueError("Flag {0} already defined (value {1:x})".format(
+                s, self.str_to_value[s]))
+        if s + "_shift" in self.str_to_value.keys():
+            raise ValueError("Flag {0} conflicts with another "
+                             "flag ({1})".format(s, s + "_shift"))
+
+        try:
+            getattr(self, s)
+        except AttributeError:
+            pass
+        else:
+            raise ValueError("Value {0} already used by FlagSet object!".
+                             format(s))
+
+        try:
+            getattr(self, s + "_shift")
+        except AttributeError:
+            pass
+        else:
+            raise valueError("{0}_shift already used by FlagSet object!".
+                             format(s))
+
+
+        if bit is None:
+            bit = self._next_bit;
+        self._next_bit = bit + 1
+
+        value = 1 << bit
+        if value not in self.value_to_str:
+            self.value_to_str[value] = s
+        self.str_to_value[s] = value
+
+        self._sorted_values = []
+
+        setattr(self, s, value)
+        setattr(self, s+"_shift", bit)
+
+    def addSimpleFlags(self, *l):
+        """Adds a list of single-bit flags."""
+        map(self.addSimpleFlag, l)
+
+    def addMap(self, mapping):
+        """Add the key/value pairs from a mapping type"""
+        for k, v in mapping.items():
+            self.addSimpleFlag(k, v)
+
+    def _EnsureSorted(self):
+        if self._sorted_values:
+            return
+        self._sorted_values = sorted(self.value_to_str.keys())
+#        self._sorted_strs = sorted(self.str_to_value.keys())
+
+
+    def flagsToStringList(self, flagint):
+        """Translate a given flag int to a list of flag strings."""
+        self._EnsureSorted()
+        strs = []
+        for v in self._sorted_values:
+            if flagint & v != 0:
+                strs.append(self.value_to_str[v])
+                flagint &= ~v
+        if flagint != 0:
+            strs.append("{0:#x}".format(flagint))
+        return strs
+
+    def UFLookup(self, key, **kwargs):
+        return uflookup.UFLookup(self.str_to_value, key, **kwargs)
+
+    # TBD: interface to enable a script --dump-flag-translations argument?
+
+
+
+def join_flaglist(fl, sep = "|", empty = "0"):
+    """Helper function to join a list of flag strings."""
+    if fl:
+        return sep.join(fl)
+    else:
+        return empty
+
+
+### Tests
+
+# I'm trying to follow the convention of
+
+#   assertEquals(expectedvalue, function_under_test(args))
+
+# I didn't discover that (on some unittest page) until I was halfway
+# through, so I may not have gotten them all the right order.
+
+if __name__ == '__main__':
+    import unittest
+
+    class Test_join_flaglist(unittest.TestCase):
+        """Test the join_flaglist function"""
+
+        def assertJoinFlaglistEqual(self, expectedstring, flaglist):
+            self.assertEqual(expectedstring, join_flaglist(flaglist))
+
+        def test_single_value(self):
+            """Test join_flaglist() with a single value"""
+            self.assertJoinFlaglistEqual("aflag", ["aflag"])
+
+        def test_two_values(self):
+            """Test join_flaglist() with two values"""
+            self.assertJoinFlaglistEqual("aflag|bflag",["aflag", "bflag"])
+
+        def test_three_values(self):
+            """Test join_flaglist() with three values"""
+            self.assertJoinFlaglistEqual("af|bf|cf", ["af", "bf", "cf"])
+
+        def test_comma_sep(self):
+            """Test join_flaglist() with a non-default sep"""
+            self.assertEqual("af,bf,cf",
+                             join_flaglist(["af", "bf", "cf"], sep=','))
+
+        def test_join_empty(self):
+            """Test join_flaglist() with an empty list"""
+            self.assertEqual("0", join_flaglist([]))
+
+        def test_join_empty_nondefault(self):
+            """Test join_flaglist() with a non-default value of empty"""
+            self.assertEqual(" ", join_flaglist([], empty=" "))
+
+
+    class Test_FlagSet(unittest.TestCase):
+        """Test the FlagSet class"""
+
+        def setUp(self):
+            self.fs = FlagSet()
+
+        def VerifyFlag(self, string, value):
+            """Test string->value and value->string"""
+            self.assertEqual(value, self.fs.str_to_value[string])
+            self.assertEqual(string, self.fs.value_to_str[value])
+            self.assertEqual(value, getattr(self.fs, string))
+            self.assertEqual(value, 1<<getattr(self.fs, string+"_shift"))
+
+    class Test_FlagSet_Constructor(Test_FlagSet):
+        def test_constructor(self):
+            """Too much?"""
+            self.assertEqual(self.fs._next_bit, 0)
+            self.assertFalse(self.fs.value_to_str)
+            # etc.
+
+    class Test_Add_Simple_Flag(Test_FlagSet):
+        def test_add_simple_flag(self):
+            """Test that adding a simple flag to an empty FlagSet works"""
+            self.fs.addSimpleFlag("FOO")
+            self.VerifyFlag("FOO", 1)
+
+        def test_3_add_simple_flag(self):
+            """Test multiple addSimpleFlag calls"""
+            self.fs.addSimpleFlag("FOO")
+            self.fs.addSimpleFlag("BAR")
+            self.fs.addSimpleFlag("BAZ")
+
+            self.VerifyFlag("FOO", 1)
+            self.VerifyFlag("BAR", 2)
+            self.VerifyFlag("BAZ", 4)
+
+            self.assertEqual(1, self.fs.FOO)
+            self.assertEqual(2, self.fs.BAR)
+            self.assertEqual(4, self.fs.BAZ)
+
+            self.assertEqual(0, self.fs.FOO_shift)
+            self.assertEqual(1, self.fs.BAR_shift)
+            self.assertEqual(2, self.fs.BAZ_shift)
+
+            self.fs._EnsureSorted()
+#            self.assertEqual(self.fs._sorted_strs, ["BAR", "BAZ", "FOO"])
+            self.assertEqual(self.fs._sorted_values, [1, 2, 4])
+
+        def test_add_simple_flag_with_value(self):
+            """Test addSimpleFlag calls with explicit bit="""
+            self.fs.addSimpleFlag("FOO")
+            self.fs.addSimpleFlag("BAR", bit=1)
+            self.fs.addSimpleFlag("BAZ")
+            self.fs.addSimpleFlag("BLAT", bit=17)
+            self.fs.addSimpleFlag("FROB")
+            self.fs.addSimpleFlag("SNARF", bit=5)
+
+            self.VerifyFlag("FOO", 1)
+            self.VerifyFlag("BAR", 2)
+            self.VerifyFlag("BAZ", 4)
+            self.VerifyFlag("SNARF", 32)
+            self.VerifyFlag("BLAT", 1<<17)
+            self.VerifyFlag("FROB", 1<<18)
+
+            self.fs._EnsureSorted()
+#            self.assertEqual(self.fs._sorted_strs,
+#                             ["BAR", "BAZ", "BLAT", "FOO", "FROB"])
+            self.assertEqual(self.fs._sorted_values,
+                             [1, 2, 4, 32, 1<<17, 1<<18])
+
+
+        def test_add_simple_flag_dup_name(self):
+            """Test exception on duplicate flag name"""
+            self.fs.addSimpleFlag("FOO")
+            self.assertRaises(ValueError, self.fs.addSimpleFlag, "FOO")
+
+        def test_add_simple_flag_dup_value(self):
+            """Test exception on duplicate flag value"""
+            self.fs.addSimpleFlag("FOO")
+            self.fs.addSimpleFlag("BAR", bit=0)
+
+            self.VerifyFlag("FOO", 1)
+            self.assertEqual(1, self.fs.str_to_value["BAR"])
+
+        def test_add_shift_duplicated_name(self):
+            """Test that name and name_shift can't both be added"""
+            self.fs.addSimpleFlag("FOO_shift")
+            self.assertRaises(ValueError, self.fs.addSimpleFlag, "FOO")
+            self.assertRaises(ValueError,
+                              self.fs.addSimpleFlag, "FOO_shift_shift")
+
+        def test_attr_name_conflict(self):
+            """Test that adding a flag won't clobber an object attribute"""
+            self.assertRaises(ValueError,
+                              self.fs.addSimpleFlag, "addSimpleFlag")
+
+    class Test_Add_Simple_Flags(Test_FlagSet):
+        def test_add_simple_flags(self):
+            """Test that addSimpleFlags() can add several flags"""
+
+            self.fs.addSimpleFlags("FOO", "BAR", "BAZ")
+            self.VerifyFlag("FOO", 1)
+            self.VerifyFlag("BAR", 2)
+            self.VerifyFlag("BAZ", 4)
+
+    class Test_FlagSet_mapping(Test_FlagSet):
+        def setUp(self):
+            self.fs = FlagSet(mapping={"FOO": 9, "BAR": 1})
+
+        def test_constructor(self):
+            self.VerifyFlag("FOO", 1<<9)
+            self.VerifyFlag("BAR", 1<<1)
+
+        def test_addMap(self):
+            self.fs.addMap({"BAZ": 3, "ZING": 7})
+
+            self.VerifyFlag("FOO", 1<<9)
+            self.VerifyFlag("BAR", 1<<1)
+            self.VerifyFlag("BAZ", 1<<3)
+            self.VerifyFlag("ZING", 1<<7)
+
+    class Test_FlagSet_FBBZZ(Test_FlagSet):
+        """FlagSet with certain set of flags"""
+        def setUp(self):
+            self.fs = FlagSet()
+            self.fs.addSimpleFlags("FOO", "BAR", "BAZ")
+            self.fs.addSimpleFlag("ZING", bit=13)
+            self.fs.addSimpleFlag("ZOING", bit=42)
+
+        def Verify_F2SL(self, expectedstrlist, flags):
+            self.assertEqual(expectedstrlist, self.fs.flagsToStringList(flags))
+
+    class Test_FlagSet_FBBZZ_flagsToStringList(Test_FlagSet_FBBZZ):
+        def test_F(self):
+            self.Verify_F2SL(["FOO"], 1)
+        def test_B(self):
+            self.Verify_F2SL(["BAR"], 2)
+        def test_B2(self):
+            self.Verify_F2SL(["BAZ"], 4)
+        def test_Z(self):
+            self.Verify_F2SL(["ZING"], 1<<13)
+        def test_Z2(self):
+            self.Verify_F2SL(["ZOING"], 1<<42)
+
+        def test_FB(self):
+            self.Verify_F2SL(["FOO", "BAR"], 3)
+        def test_FBB(self):
+            self.Verify_F2SL(["FOO", "BAR", "BAZ"], 7)
+        def test_FB2(self):
+            self.Verify_F2SL(["BAR", "BAZ"], 6)
+
+        def test_FBBZZ(self):
+            self.Verify_F2SL(["FOO", "BAR", "BAZ", "ZING", "ZOING"],
+                             7|1<<13|1<<42)
+
+        def test_unknownflag(self):
+            self.Verify_F2SL(["0x10"], 0x10)
+        def test_unknownflags(self):
+            self.Verify_F2SL(["0x30"], 0x30)
+        def test_knownandunknownflags(self):
+            self.Verify_F2SL(["FOO", "0x30"], 0x31)
+
+
+    # Run all unit tests
+    unittest.main()
diff --git a/contrib/debug_tools/epython_scripts/crashlib/input/uflookup.py b/contrib/debug_tools/epython_scripts/crashlib/input/uflookup.py
new file mode 100644 (file)
index 0000000..8f9d4b6
--- /dev/null
@@ -0,0 +1,127 @@
+
+"""
+User Friendly Lookup routine
+Copyright 2015 Cray Inc.  All Rights Reserved
+"""
+
+# TBD: Maybe it would be more useful to replace prefixok with
+# substringok, for cases with lots of common prefix, like
+# CAP_SYS_PTRACE and CAP_SYS_TTY_CONFIG
+#
+# Wait until there's a user for it.
+
+def UFLookup(d, key, casesensitive=False, prefixok=True):
+    """User Friendly Lookup
+
+    By default, case-insensitive, unique-prefix-accepting lookups on
+    dict d"""
+
+    def _casesensitive_prefixok(d, key):
+        """case sensitive, prefixes ok"""
+        matches = []
+        for s in d.keys():
+            if s == key:
+                return d[s]
+            if s.startswith(key):
+                matches.append(s)
+        if len(matches) == 1:
+            return d[matches[0]]
+        raise KeyError("{0} matches multiple keys: {1}".format(
+            key, ", ".join(matches)))
+
+    def _caseinsensitive_prefixok(d, key):
+        """case insensitive, prefixes ok"""
+        matches = []
+        lkey = key.lower()
+        for s in d.keys():
+            if s.lower() == lkey:
+                return d[s]
+            if s.lower().startswith(lkey):
+                matches.append(s)
+        if len(matches) == 0:
+            raise KeyError("No match for {0}".format(key))
+        if len(matches) == 1:
+            return d[matches[0]]
+        raise KeyError("{0} matches multiple keys: {1}".format(
+            key, ", ".join(matches)))
+
+    def _caseinsensitive_noprefix(d, key):
+        """case insensitive, prefixes not ok"""
+        lkey = key.lower()
+        for s in d.keys():
+            if s.lower() == lkey:
+                return d[s]
+        raise KeyError("No match for {0}".format(key))
+
+    def _casesensitive_noprefix(d, key):
+        """case sensitive, prefixes not ok"""
+        return d[key]
+
+    if casesensitive and not prefixok:
+        return _casesensitive_noprefix(d, key)
+    if casesensitive:
+        return _casesensitive_prefixok(d, key)
+    if prefixok:
+        return _caseinsensitive_prefixok(d, key)
+    return _caseinsensitive_noprefix(d, key)
+
+
+
+if __name__ == '__main__':
+    import unittest
+
+    class Test_UFLookup_FBBZZ(unittest.TestCase):
+        def setUp(self):
+            self.d = { "FOO": 1,
+                       "BAR": 2,
+                       "baz": 3,
+                       "zing": 4,
+                       "zinGlinG": 5 }
+
+        def checkall(self, expectedlist, key):
+            """Test UFLookup(self.d, key) for all four flags combinations.
+
+            expectedlist[] contains the four expected results,
+            [0]: casesensitive = False, prefixok = False
+            [1]: casesensitive = False, prefixok = True
+            [2]: casesensitive = True,  prefixok = False
+            [3]: casesensitive = True,  prefixok = True
+
+            If expectedlist[i] is None, then UFLookup should raise
+            KeyError for that case.  Otherwise, it's the value that
+            should be returned."""
+
+            kdicts = [{"casesensitive": False, "prefixok": False},
+                      {"casesensitive": False, "prefixok": True},
+                      {"casesensitive": True,  "prefixok": False},
+                      {"casesensitive": True,  "prefixok": True}]
+            for i in xrange(len(expectedlist)):
+                e = expectedlist[i]
+                if e is None:
+                    self.assertRaises(KeyError,
+                                      UFLookup, self.d, key, **kdicts[i])
+                else:
+                    self.assertEqual(e, UFLookup(self.d, key, **kdicts[i]))
+
+        def test_FOO(self):
+            self.checkall([1, 1, 1, 1], "FOO")
+        def test_foo(self):
+            self.checkall([1, 1, None, None], "foo")
+        def test_F(self):
+            self.checkall([None, 1, None, 1], "F")
+        def test_f(self):
+            self.checkall([None, 1, None, None], "f")
+
+
+        def test_ambig_prefix_zin(self):
+            self.checkall([None, None, None, None], "zin")
+        def test_semiambig_prefix_ba(self):
+            self.checkall([None, None, None, 3], "ba")
+        def test_prefix_exactmatch_zing(self):
+            self.checkall([4, 4, 4, 4], "zing")
+        def test_prefix_semiexact_zinG(self):
+            self.checkall([4, 4, None, 5], "zinG")
+
+
+    # Run all unit tests
+    unittest.main()
diff --git a/contrib/debug_tools/epython_scripts/crashlib/page.py b/contrib/debug_tools/epython_scripts/crashlib/page.py
new file mode 100644 (file)
index 0000000..ef87d05
--- /dev/null
@@ -0,0 +1,127 @@
+"""
+Constants and routines for manipulating kernel page struct.
+Copyright 2014-2017 Cray Inc.  All Rights Reserved
+"""
+
+from pykdump.API import *
+
+import crashlib.cid
+import crashlib.cid.machdep_table
+import crashlib.cid.page_flags
+import crashlib.cid.phys_mem_map
+import crashlib.memarray
+
+# --------------------------------------------------------------------------
+
+page_struct_size = getSizeOf('struct page')
+
+# --------------------------------------------------------------------------
+
+# Create a function for determining whether a page is controlled by the
+# buddy allocator.  Note that earlier kernels (< 3.0) have a page flag, while
+# later kernels use the _mapcount field.
+
+if hasattr(crashlib.cid.pgflags, 'PG_buddy'):
+    def is_buddy_page(page):
+        return page.flags & crashlib.cid.pgflags.PG_buddy.mask;
+else:
+    def is_buddy_page(page):
+        # Early implementations used -2, later use -128
+        return page._mapcount.counter == -128 or page._mapcount.counter == -2
+
+if hasattr(crashlib.cid.pgflags, 'PG_compound'):
+    def is_compound_page_head(page):
+        return (page.flags & (crashlib.cid.pgflags.PG_reclaim.mask |
+                              crashlib.cid.pgflags.PG_compound.mask)
+               ) == crashlib.cid.pgflags.PG_compound
+
+    def is_compound_page_tail(page):
+        return (page.flags & (crashlib.cid.pgflags.PG_reclaim.mask |
+                              crashlib.cid.pgflags.PG_compound.mask)
+               ) == (crashlib.cid.pgflags.PG_reclaim.mask |
+                     crashlib.cid.pgflags.PG_compound.mask)
+
+    def is_compound_page(page):
+        return page.flags & crashlib.cid.pgflags.PG_compound.mask
+
+elif hasattr(crashlib.cid.pgflags, 'PG_tail'):
+    # PG_head and PG_tail defined
+    def is_compound_page_head(page):
+        return page.flags & (crashlib.cid.pgflags.PG_head.mask)
+
+    def is_compound_page_tail(page):
+        return page.flags & (crashlib.cid.pgflags.PG_tail.mask)
+
+    def is_compound_page(page):
+        return is_compound_page_head(page) or is_compound_page_tail(page)
+
+else:
+    # Only PG_head is defined
+    def is_compound_page_head(page):
+        return page.flags & (crashlib.cid.pgflags.PG_head.mask)
+
+    def is_compound_page_tail(page):
+        return page.compound_head & 1
+
+    def is_compound_page(page):
+        return is_compound_page_head(page) or is_compound_page_tail(page)
+
+# --------------------------------------------------------------------------
+
+# Find the page order of a buddy page
+
+def buddy_order(page):
+    """Retrieve the order of a page in the buddy allocator"""
+    return page.private
+
+# --------------------------------------------------------------------------
+
+# Create a function to determine the page order of a compound page
+
+if member_offset('struct page', 'compound_order') > -1:
+    def compound_order(page):
+        """Retrieve the page order for a compound page."""
+        # A compound page is a series of contiguous pages, thus there are
+        # at least two page structs.  The second page struct (first tail page)
+        # contains the page order; the head page uses the space for a
+        # different purpose.
+        return page[1].compound_order
+
+else:
+    def compound_order(page):
+        """Retrieve the page order for a compound page."""
+        # A compound page is a series of contiguous pages, thus there are
+        # at least two page structs.  The second page struct (first tail page)
+        # contains the page order stored in the lru.prev field; the head page
+        # uses the space for a different purpose.
+        return page[1].lru.prev
+
+# --------------------------------------------------------------------------
+
+def pfn(page):
+    """Returns the pfn for the supplied page struct or page struct address."""
+    vmemmap_vaddr = crashlib.cid.mdtbl.vmemmap_vaddr
+    return (page - vmemmap_vaddr) / page_struct_size
+
+# --------------------------------------------------------------------------
+
+def page_list():
+    """Return a list-like class of page structs indexed by pfn.
+
+    This implementation assumes the kernel is configured with a virtually
+    contiguous mem_map.
+    """
+    # If the kernel doesn't have a virtually contiguous mem_map, this could
+    # be changed to return a chained list of MemCArray objects.
+
+    PAGE_SHIFT = crashlib.cid.mdtbl.pageshift
+    pfn_start  = crashlib.cid.physmap[0].start >> PAGE_SHIFT
+    pfn_end    = crashlib.cid.physmap[-1].end >> PAGE_SHIFT
+
+    # Find page map and create an array of page_struct
+    vmemmap_addr = crashlib.cid.mdtbl.vmemmap_vaddr
+
+    return crashlib.memarray.MemCArray(vmemmap_addr,
+                                        lambda a:readSU('struct page',a),
+                                        getSizeOf('struct page'),
+                                        pfn_end-pfn_start)
diff --git a/contrib/debug_tools/epython_scripts/crashlib/time.py b/contrib/debug_tools/epython_scripts/crashlib/time.py
new file mode 100644 (file)
index 0000000..e276591
--- /dev/null
@@ -0,0 +1,42 @@
+"""
+Routines for retrieving and manipulating kernel time
+Copyright 2017 Cray Inc.  All Rights Reserved
+"""
+
+from pykdump.API import readSymbol, symbol_exists
+from crashlib.exceptions import *
+
+# --------------------------------------------------------------------------
+# get_wallclock_seconds()
+#
+# There are multiple variants, depending on kernel version.  Attempt to
+# discern the proper method for retrieving the current wall clock time.
+#
+
+if symbol_exists('xtime'):
+    # SLES 11 uses struct timespec xtime to hold the wall time.
+    _wallclock_xtime = readSymbol('xtime')
+    def get_wallclock_seconds():
+        '''Return current time in seconds'''
+        return _wallclock_xtime.tv_sec
+
+elif symbol_exists('timekeeper'):
+    # SLES 12 has a new timekeeper struct for that purpose
+    _wallclock_timekeeper = readSymbol('timekeeper')
+    def get_wallclock_seconds():
+        '''Return current time in seconds'''
+        return _wallclock_timekeeper.xtime_sec
+
+elif symbol_exists('tk_core'):
+    # SLES 12 SP2 embeds the timekeeper struct in tk_core
+    _wallclock_tk_core = readSymbol('tk_core')
+    def get_wallclock_seconds():
+        '''Return current time in seconds'''
+        return _wallclock_tk_core.timekeeper.xtime_sec
+
+else:
+    # Unknown how to read wallclock time in this kernel
+    def get_wallclock_seconds():
+        raise CompatibilityError('Could not find wallclock time in the kernel')
+
+# --------------------------------------------------------------------------
diff --git a/contrib/debug_tools/epython_scripts/crashlib/util.py b/contrib/debug_tools/epython_scripts/crashlib/util.py
new file mode 100644 (file)
index 0000000..aa6a2e1
--- /dev/null
@@ -0,0 +1,210 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2017-2018 Cray Inc. All Rights Reserved.
+
+from collections import namedtuple
+from math import ceil
+
+from crash import addr2sym, PAGESIZE
+from pykdump.API import (Addr, exec_crash_command, getSizeOf, member_offset,
+                         readmem, readU8, readULong, sys_info)
+
+from crashlib.cl import (cl_err, cl_warn, cl_info, cl_trace)
+
+BYTES_1K = 1024
+BYTES_1M = BYTES_1K * 1024
+BYTES_1G = BYTES_1M * 1024
+BYTES_1T = BYTES_1G * 1024
+
+def bytes2size(bytes):
+    '''Return a string representation of bytes, including order,
+    ie '15.0M' or '2.1G'.'''
+    suffix = ""
+    if bytes >= BYTES_1T:
+        suffix = "T"
+        size = BYTES_1T
+    elif bytes >= BYTES_1G:
+        suffix = "G"
+        size = BYTES_1G
+    elif bytes >= BYTES_1M:
+        suffix = "M"
+        size = BYTES_1M
+    elif bytes >= BYTES_1K:
+        suffix = "K"
+        size = BYTES_1K
+    else:
+        size = 1
+    full = bytes / size
+    rem = ((bytes % size) * 10) / size
+    return "%d.%d%s" % (full, rem, suffix)
+
+def pages2size(npages):
+    '''Return a string representation of the number of bytes contained
+    in npages.'''
+    return bytes2size(npages * PAGESIZE)
+
+def page_to_virt(page):
+    # run kmem -p to get the pys addr for the page
+    cmd = "kmem -p %#x" % page
+    kmemp = exec_crash_command(cmd)
+    paddr = kmemp.splitlines()[1].split()[1]
+    cl_trace("*>>> page_to_virt #### phys_addr = %s" % paddr)
+    # find vaddr from crash ptov command
+    res = exec_crash_command("ptov " + paddr)
+    vaddr = res.splitlines()[1].split()[0]
+    cl_trace("*>>> page_to_virt #### vaddr = %s" % vaddr)
+    return long(vaddr, 16)
+
+def get_config(name):
+    cl_trace(">>> get_config: searching system config for %s" % name)
+    res = exec_crash_command("sys config")
+    for line in res.splitlines():
+        if not "=" in line:
+            continue
+        (key, value) = line.split("=", 1)
+        if key == name:
+            cl_trace(">>> get_config: %s has a value of '%s'" % (name, value))
+            return value
+    raise ValueError("Name %s not found in system config" % name)
+
+def atoi(arg):
+    # See if the user specified the format.
+    try:
+        val = int(arg, 0)
+    except:
+        # Nope, be generous and try again as hex.
+        try:
+            val = int(arg, 16)
+        except:
+            # No luck. Return an error.
+            print("Invalid number: %s" % arg)
+            val = None
+    return val
+
+def is_kernel_address(addr):
+    # The top 17 bits should all be ones.
+    val = (1 << 17) - 1
+    if (addr >> 47) != val:
+        return False
+    return True
+
+def is_kernel_text_address(addr):
+    # The top 33 bits should all be ones.
+    val = (1 << 33) - 1
+    if (addr >> 31) != val:
+        return False
+    return True
+
+def is_valid_address(addr):
+    if addr < 0x10000:
+        return False
+    if addr & 7:
+        return False
+    return True
+
+def readString(addr):
+    res = readmem(addr, 64)
+    return res.split('\0')[0]
+
+def symbol_name(addr):
+    if not is_kernel_text_address(addr):
+        return ""
+    (name, offset) = addr2sym(addr, True)
+    if name == None:
+        return ""
+    if offset != 0:
+        name += "+" + hex(offset)
+    return name
+
+def read_bool(addr):
+    '''pykdump can't read bools on its own.'''
+    return bool(readU8(addr))
+
+def read_bool_member(struct, member_name):
+    '''struct must be a pykdump object, member name is the string name
+    of the bool member in the struct.'''
+    struct_type = struct.PYT_symbol
+    return read_bool(Addr(struct) + member_offset(struct_type, member_name))
+
+def read_bitmap(addr, num_bits):
+    '''Return an integer representation of the 'num_bits' sized bitmap
+    at 'addr'. Note Python has arbitrary precision ints so the return
+    value may be very large.'''
+    bits_per_long = 8 * getSizeOf('long')
+    num_longs = int(ceil(float(num_bits) / bits_per_long))
+    total = 0
+    for i in range(num_longs):
+        total |= (readULong(addr + i * getSizeOf('long'))
+                 << ((num_longs - i - 1) * bits_per_long))
+    # Mask off unused bits when num_bits not a multiple of bits/long.
+    mask = 2 ** num_bits - 1
+    return total & mask
+
+def read_cpumask(cpumask_addr):
+    '''Return an integer representation of the cpumask bitmap.'''
+    return read_bitmap(cpumask_addr, sys_info.CPUS)
+
+def read_cpumask_var_t(container_struct, member_name):
+    '''Return an integer representation of the cpumask_var_t bitmap.
+    'container_struct' is the struct object which has a cpumask_var_t
+    as a member. 'member_name' is the name of the cpumask_var_t field
+    within the container struct.
+
+    Pykdump crashes when trying to read a cpumask_var_t. This function
+    provides a workaround which does not read a cpumask_var_t directly.'''
+    container_type = container_struct.PYT_symbol
+    offset = member_offset(container_type, member_name)
+    cpumask_addr = Addr(container_struct) + offset
+    return read_cpumask(cpumask_addr)
+
+
+# Bit offsets and masks for read_qspinlock
+# Copied from linux/include/asm-generic/qspinlock_types.h.
+#
+# Bitfields in the atomic value:
+#
+# When NR_CPUS < 16K
+# 0- 7: locked byte
+#    8: pending
+# 9-15: not used
+# 16-17: tail index
+# 18-31: tail cpu (+1)
+#
+# When NR_CPUS >= 16K
+# 0- 7: locked byte
+#    8: pending
+# 9-10: tail index
+# 11-31: tail cpu (+1)'''
+
+if sys_info.CPUS < 2 ** 14:
+    _q_pending_bits = 8
+else:
+    _q_pending_bits = 1
+_q_tail_index_offset = 9
+_q_tail_index_bits = 2
+_q_tail_index_mask = (2 ** _q_tail_index_bits - 1) << _q_tail_index_offset
+_q_tail_cpu_offset = _q_tail_index_offset + _q_tail_index_bits
+_q_tail_cpu_bits = 32 - _q_tail_cpu_offset
+_q_tail_cpu_mask = (2 ** _q_tail_cpu_bits - 1) << _q_tail_cpu_offset
+
+qspinlock_tuple = namedtuple('qspinlock',
+                             ['locked', 'pending', 'tail_index', 'tail_cpu'])
+
+def read_qspinlock(qspinlock):
+    '''Given a struct qspinlock, which consists of a single 32 bit atomic
+    value, return a namedtuple of ints (locked, pending, tail_index, tail_cpu),
+    representing the bit fields of the qspinlock.'''
+
+    val = qspinlock.val.counter
+    locked_byte = val & 0xff
+    pending = (val & 0x100) >> 8
+
+    tail_index = (val & _q_tail_index_mask) >> _q_tail_index_offset
+
+    _q_tail_cpu_offset = _q_tail_index_offset + _q_tail_index_bits
+    _q_tail_cpu_bits = 32 - _q_tail_cpu_offset
+    _q_tail_cpu_mask = (2 ** _q_tail_cpu_bits - 1) << _q_tail_cpu_offset
+    tail_cpu = ((val & _q_tail_cpu_mask) >> _q_tail_cpu_offset) - 1
+
+    return qspinlock_tuple(locked=locked_byte, pending=pending,
+                           tail_index=tail_index, tail_cpu=tail_cpu)
diff --git a/contrib/debug_tools/epython_scripts/debug_flags.py b/contrib/debug_tools/epython_scripts/debug_flags.py
new file mode 100644 (file)
index 0000000..ddfb376
--- /dev/null
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+
+"""
+Utility to print Lustre libcfs_debug flags
+Copyright (c) 2019 Cray Inc. All Rights Reserved.
+"""
+
+from pykdump.API import *
+from crashlib.input import toint
+import argparse
+
+description_short = "Prints Lustre libcfs_debug flags as strings"
+
+debug_flags_tbl = {
+    0x00000001: 'trace',      #define D_TRACE
+    0x00000002: 'inode',      #define D_INODE
+    0x00000004: 'super',      #define D_SUPER
+    0x00000008: 'ext2',       #define D_EXT2
+    0x00000010: 'malloc',     #define D_MALLOC
+    0x00000020: 'cache',      #define D_CACHE
+    0x00000040: 'info',       #define D_INFO
+    0x00000080: 'ioctl',      #define D_IOCTL
+    0x00000100: 'neterror',   #define D_NETERROR
+    0x00000200: 'net',        #define D_NET
+    0x00000400: 'warning',    #define D_WARNING
+    0x00000800: 'buffs',      #define D_BUFFS
+    0x00001000: 'other',      #define D_OTHER
+    0x00002000: 'dentry',     #define D_DENTRY
+    0x00004000: 'nettrace',   #define D_NETTRACE
+    0x00008000: 'page',       #define D_PAGE
+    0x00010000: 'dlmtrace',   #define D_DLMTRACE
+    0x00020000: 'error',      #define D_ERROR
+    0x00040000: 'emerg',      #define D_EMERG
+    0x00080000: 'ha',         #define D_HA
+    0x00100000: 'rpctrace',   #define D_RPCTRACE
+    0x00200000: 'vfstrace',   #define D_VFSTRACE
+    0x00400000: 'reada',      #define D_READA
+    0x00800000: 'mmap',       #define D_MMAP
+    0x01000000: 'config',     #define D_CONFIG
+    0x02000000: 'console',    #define D_CONSOLE
+    0x04000000: 'quota',      #define D_QUOTA
+    0x08000000: 'sec',        #define D_SEC
+    0x10000000: 'lfsck',      #define D_LFSCK
+    0x20000000: 'hsm',        #define D_HSM
+    0x40000000: 'snapshot',   #define D_SNAPSHOT
+    0x80000000: 'layout'      #define D_LAYOUT
+}
+
+def print_flags(flag_tbl, mask):
+    flags = ""
+    tmp = mask
+    for key, value in flag_tbl.iteritems():
+            if key & mask:
+               flags = flags + value + " "
+               tmp &= ~key
+    print "mask: 0x%x = %s" % (mask, flags)
+    if tmp != 0:
+        print "unknown bits set in mask: 0x%x" % tmp
+
+def dump_debug_flags(bitmask):
+    print bitmask
+    if not bitmask:
+        bitmask = readSymbol('libcfs_debug')
+    print_flags(debug_flags_tbl, bitmask)
+
+if __name__ == "__main__":
+    description = "Prints libcfs_debug flags as strings"
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument("bitmask", nargs="?", type=toint, default=[],
+        help="debug bit mask to be translated; default is current libcfs_debug value")
+    args = parser.parse_args()
+    dump_debug_flags(args.bitmask)
diff --git a/contrib/debug_tools/epython_scripts/dk.py b/contrib/debug_tools/epython_scripts/dk.py
new file mode 100644 (file)
index 0000000..2d9c135
--- /dev/null
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+"""
+Copyright 2015-2019 Cray Inc.  All Rights Reserved
+Utility to dump the Lustre dk logs.
+Based on dump_cfs_trace_data.py
+"""
+
+import sys
+import crash
+import argparse
+from time import localtime
+from operator import itemgetter
+from pykdump.API import getSizeOf, readSU, readmem, readSUListFromHead, readSymbol, sys_info
+from crashlib import page, addrlib
+import os
+
+description_short = 'Dump and sort the Lustre dk logs.'
+
+def do_shell_cmd(cmd):
+    return os.popen(cmd).read()
+
+# ---------------------------------------------------------------------------
+# pfn: 2582e8c, physaddr: 2582e8c000, vaddr: ffff002582e8c000
+def dump_dk_line(tmpfd, options, pfn, used):
+    """Dump the cfs debug messages in the dk format."""
+    physaddr = addrlib.pfn2phys(pfn)
+    vaddr = addrlib.ptov(physaddr)
+    hdr_size = getSizeOf("struct ptldebug_header")
+
+    while (used):
+        hdr = readSU('struct ptldebug_header', vaddr)
+        laddr = vaddr + hdr_size
+        try:
+            line = readmem(laddr, hdr.ph_len - hdr_size)
+       except:
+            print "Skipping pfn: %x, physaddr: %x, vaddr: %x, laddr: %x" % \
+                (pfn, physaddr, vaddr, laddr)
+            return
+
+        (filename,function,text) = line.split('\0')
+        text = text.rstrip()
+
+        used -= hdr.ph_len
+        vaddr += hdr.ph_len
+
+        type = hdr.ph_type
+        prefix = "%08x:%08x:%u.%u%s:%u.%u" % \
+            (hdr.ph_subsys, hdr.ph_mask, hdr.ph_cpu_id, hdr.ph_type,
+            "F" if (hdr.ph_flags & 1) else "", hdr.ph_sec, hdr.ph_usec)
+
+        buf = "%s:%06u:%u:%u:(%s:%d:%s()) %s" % \
+            (prefix, hdr.ph_stack, hdr.ph_pid, hdr.ph_extern_pid, filename,
+            hdr.ph_line_num, function, text)
+
+        tmpfd.write(buf + '\n')
+
+# ---------------------------------------------------------------------------
+def walk_pages(tmpfd, options, cfs_page_head, trace_page_struct):
+
+    cfs_pages = readSUListFromHead(cfs_page_head, 'linkage',
+                                   trace_page_struct,
+                                   maxel=100000, inchead=False)
+
+    for p in cfs_pages:
+        dump_dk_line(tmpfd, options, page.pfn(p.page), p.used)
+
+# ---------------------------------------------------------------------------
+def walk_array(options):
+    """Walk the cfs_trace_data array of array pointers."""
+
+    fname = do_shell_cmd('mktemp .dklogXXXX').rstrip()
+    tmpfd = file(fname, 'w')
+
+    try:
+        cfs_trace_data = readSymbol('cfs_trace_data')
+        trace_page_struct = 'struct cfs_trace_page'
+    except TypeError:
+        try:
+            cfs_trace_data = readSymbol('trace_data')
+            trace_page_struct = 'struct trace_page'
+        except:
+            print "Ensure you have loaded the Lustre modules"
+            return 1
+
+    for cfstd_array in cfs_trace_data:
+        if not cfstd_array: continue
+
+        for i in xrange(sys_info.CPUS):
+            u = cfstd_array[i]
+            walk_pages(tmpfd, options, u.tcd.tcd_pages, trace_page_struct)
+            walk_pages(tmpfd, options, u.tcd.tcd_daemon_pages, trace_page_struct)
+            walk_pages(tmpfd, options, u.tcd.tcd_stock_pages, trace_page_struct)
+
+    tmpfd.close()
+    print do_shell_cmd('sort -n -s -t: -k4,4 ' + fname)
+    print do_shell_cmd('rm ' + fname)
+
+# ---------------------------------------------------------------------------
+def dump_dk_log():
+    parser = argparse.ArgumentParser(
+        description= "Dump and sort the Lustre dk logs.",
+        epilog= "NOTE: the Lustre kernel modules must be loaded.")
+    args = parser.parse_args()
+    return walk_array(args)
+
+if __name__ == '__main__':
+    dump_dk_log()
diff --git a/contrib/debug_tools/epython_scripts/jiffies2date.py b/contrib/debug_tools/epython_scripts/jiffies2date.py
new file mode 100644 (file)
index 0000000..152c00f
--- /dev/null
@@ -0,0 +1,37 @@
+#!usr/bin/env python
+from pykdump.API import *
+"""
+Copyright (c) 2019 Cray Inc. All Rights Reserved.
+Utility to print jiffies as date and time
+"""
+
+import argparse
+import time
+import crashlib.time
+
+description_short = "Print the date and time for a jiffies timestamp."
+
+# Get current time in jiffies and in seconds. Compute the offset of
+# the timestamp in jiffies from current time and convert to seconds.
+# Subtract the offset from current time in seconds and convert result
+# to a datetime string.
+def jiffies2date(jts):
+    scur = crashlib.time.get_wallclock_seconds()
+
+    jcur = readSymbol('jiffies')
+    if jts == 0:
+        jts = jcur
+    soffset = (jcur - int(jts)) / sys_info.HZ
+
+    stime = scur - soffset
+    date = time.asctime(time.localtime(stime))
+    print '%s (epoch: %d)' % (date, stime)
+
+if __name__ == "__main__":
+    description = "Print the date and time of a given jiffies timestamp. " + \
+                  "Also includes seconds since epoch."
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument("timestamp", nargs="?", default=0, type=int,
+        help="the timestamp in jiffies to be converted to date/time")
+    args = parser.parse_args()
+    jiffies2date(args.timestamp)
diff --git a/contrib/debug_tools/epython_scripts/ldlm_dumplocks.py b/contrib/debug_tools/epython_scripts/ldlm_dumplocks.py
new file mode 100644 (file)
index 0000000..2f695d2
--- /dev/null
@@ -0,0 +1,157 @@
+#!/usr/bin/env python
+
+"""
+Copyright 2015-2019 Cray Inc.  All Rights Reserved
+Utility to list granted and waiting ldlm locks.
+"""
+
+from pykdump.API import *
+import argparse
+import os
+
+import lustrelib as ll
+from crashlib.input import toint
+
+from traceback import print_exc
+
+description = "Dumps lists of granted and waiting ldlm locks for each namespace."
+
+''' Lock Types '''
+enum_LDLM_PLAIN = 10
+enum_LDLM_EXTENT = 11
+enum_LDLM_FLOCK = 12
+enum_LDLM_IBITS = 13
+
+LOCKMODES = {
+    0:"--",
+    1:"EX",
+    2:"PW",
+    4:"PR",
+    8:"CW",
+    16:"CR",
+    32:"NL",
+    64:"GROUP"
+}
+
+def lockmode2str(mode):
+    return LOCKMODES.get(mode, "??")
+
+def ldlm_dump_lock(lock, pos, lstname):
+    obd = None
+    imp = None
+    if(lock == None):
+        print "   NULL LDLM lock"
+        return
+    print "   -- Lock: (ldlm_lock) %#x/%#x (rc: %d) (pos: %d/%s) (pid: %d)" % \
+          (Addr(lock), lock.l_handle.h_cookie, lock.l_refc.counter,
+          pos, lstname, lock.l_pid)
+    if(lock.l_conn_export):
+        obd = lock.l_conn_export.exp_obd
+    if(lock.l_export and lock.l_export.exp_connection):
+        print "       Node: NID %s (remote: %#x) export" % \
+              (ll.nid2str(lock.l_export.exp_connection.c_peer.nid),
+              lock.l_remote_handle.cookie)
+    elif(obd == None):
+        print "       Node: local"
+    else:
+        imp = obd.u.cli.cl_import
+        print "       Node: NID %s (remote: %#x) import " % \
+              (ll.nid2str(imp.imp_connection.c_peer.nid),
+              lock.l_remote_handle.cookie)
+
+    res = lock.l_resource
+    print "       Resource: %#x [0x%x:0x%x:0x%x].%x" % \
+          (Addr(res),
+          res.lr_name.name[0],
+          res.lr_name.name[1],
+          res.lr_name.name[2],
+          res.lr_name.name[3])
+
+    print "       Req mode: %s, grant mode: %s, rc: %d, read: %d, \
+          write: %d flags: %#x" % (lockmode2str(lock.l_req_mode),
+          lockmode2str(lock.l_granted_mode),
+          lock.l_refc.counter, lock.l_readers, lock.l_writers,
+          lock.l_flags)
+
+    lr_type = lock.l_resource.lr_type
+    if(lr_type == enum_LDLM_EXTENT):
+        print "       Extent: %d -> %d (req %d-%d)" % \
+              (lock.l_policy_data.l_extent.start,
+              lock.l_policy_data.l_extent.end,
+              lock.l_req_extent.start, lock.l_req_extent.end)
+    elif(lr_type == enum_LDLM_FLOCK):
+        print "       Pid: %d Flock: 0x%x -> 0x%x" % \
+              (lock.l_policy_data.l_flock.pid,
+              lock.l_policy_data.l_flock.start,
+              lock.l_policy_data.l_flock.end)
+    elif(lr_type == enum_LDLM_IBITS):
+        print "       Bits: %#x" % \
+              (lock.l_policy_data.l_inodebits.bits)
+
+def ldlm_dump_resource(res):
+    res_lr_granted = readSU('struct list_head', Addr(res.lr_granted))
+    res_lr_waiting = readSU('struct list_head', Addr(res.lr_waiting))
+    print "-- Resource: (ldlm_resource) %#x [0x%x:0x%x:0x%x].%x (rc: %d)" % \
+          (Addr(res), res.lr_name.name[0], res.lr_name.name[1],
+           res.lr_name.name[2], res.lr_name.name[3], res.lr_refcount.counter)
+    if not ll.list_empty(res_lr_granted):
+        pos = 0
+        print "   Granted locks: "
+        tmp = res_lr_granted.next
+        while(tmp != res_lr_granted):
+            pos += 1
+            lock = readSU('struct ldlm_lock',
+                          Addr(tmp)-member_offset('struct ldlm_lock', 'l_res_link'))
+            ldlm_dump_lock(lock, pos, "grnt")
+            tmp = tmp.next
+    if not ll.list_empty(res_lr_waiting):
+        pos = 0
+        print "   Waiting locks: "
+        tmp = res_lr_waiting.next
+        while(tmp != res_lr_waiting):
+            pos += 1
+            lock = readSU('struct ldlm_lock',
+                          Addr(tmp)-member_offset('struct ldlm_lock', 'l_res_link'))
+            ldlm_dump_lock(lock, pos, "wait")
+            tmp = tmp.next
+
+def print_namespace(ns, client_server):
+    print "Namespace: (ldlm_namespace) %#x, %s\t(rc: %d, side: %s)\tpoolcnt: %d unused: %d" % \
+          (Addr(ns), ll.obd2str(ns.ns_obd), ns.ns_bref.counter,
+          client_server, ns.ns_pool.pl_granted.counter, ns.ns_nr_unused)
+
+def ldlm_dump_ns_resources(ns):
+    if args.nflag:
+        return
+    for hnode in ll.cfs_hash_get_nodes(ns.ns_rs_hash):
+        offset = member_offset('struct ldlm_resource', 'lr_hash')
+        res = readSU('struct ldlm_resource', Addr(hnode) - offset)
+        ldlm_dump_resource(res)
+
+def ldlm_dump_all_namespaces(ns_name, client_server):
+    ns_list = readSymbol(ns_name)
+    for ns in readSUListFromHead(ns_list, 'ns_list_chain', 'struct ldlm_namespace'):
+        print_namespace(ns, client_server)
+        ldlm_dump_ns_resources(ns)
+
+def ldlm_dumplocks():
+    if args.ns_addr:
+        ns = readSU('struct ldlm_namespace', args.ns_addr)
+        print_namespace(ns, "")
+        ldlm_dump_ns_resources(ns)
+    else:
+        ldlm_dump_all_namespaces('ldlm_srv_namespace_list', "server")
+        ldlm_dump_all_namespaces('ldlm_cli_active_namespace_list', "client")
+        ldlm_dump_all_namespaces('ldlm_cli_inactive_namespace_list', "inactive")
+
+if __name__ == "__main__":
+    description = "Dumps lists of granted and waiting locks for each namespace. " + \
+                  "Requires Lustre .ko files to be loaded (see mod command)."
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument("-n", dest="nflag", action='store_true',
+        help="Print only namespace information")
+    parser.add_argument("ns_addr", nargs="?", default=[], type=toint,
+        help="Print only locks under namespace at given address")
+    args = parser.parse_args()
+
+    ldlm_dumplocks()
diff --git a/contrib/debug_tools/epython_scripts/ldlm_lockflags.py b/contrib/debug_tools/epython_scripts/ldlm_lockflags.py
new file mode 100644 (file)
index 0000000..5788e3e
--- /dev/null
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+
+"""
+Utility to print LDLM lock flags as strings
+Copyright (c) 2019 Cray Inc. All Rights Reserved.
+"""
+from pykdump.API import *
+from crashlib.input import toint
+import argparse
+
+description_short  = "Prints string identifiers for specified LDLM flags."
+LDLM_FL_ALL_FLAGS_MASK = 0x00FFFFFFC28F932F
+
+ldlm_flags_tbl = {
+    0x0000000000000001:  "LOCK_CHANGED",            # bit  0
+    0x0000000000000002:  "BLOCK_GRANTED",           # bit  1
+    0x0000000000000004:  "BLOCK_CONV",              # bit  2
+    0x0000000000000008:  "BLOCK_WAIT",              # bit  3
+    0x0000000000000010:  "SPECULATIVE",             # bit  4
+    0x0000000000000020:  "AST_SENT",                # bit  5
+    0x0000000000000100:  "REPLAY",                  # bit  8
+    0x0000000000000200:  "INTENT_ONLY",             # bit  9
+    0x0000000000001000:  "HAS_INTENT",              # bit 12
+    0x0000000000008000:  "FLOCK_DEADLOCK",          # bit 15
+    0x0000000000010000:  "DISCARD_DATA",            # bit 16
+    0x0000000000020000:  "NO_TIMEOUT",              # bit 17
+    0x0000000000040000:  "BLOCK_NOWAIT",            # bit 18
+    0x0000000000080000:  "TEST_LOCK",               # bit 19
+    0x0000000000100000:  "MATCH_LOCK",              # bit 20
+    0x0000000000800000:  "CANCEL_ON_BLOCK",         # bit 23
+    0x0000000001000000:  "COS_INCOMPAT",            # bit 24
+    0x0000000002000000:  "CONVERTING",              # bit 25
+    0x0000000010000000:  "LOCKAHEAD_OLD_RESERVED",  # bit 28
+    0x0000000020000000:  "NO_EXPANSION",            # bit 29
+    0x0000000040000000:  "DENY_ON_CONTENTION",      # bit 30
+    0x0000000080000000:  "AST_DISCARD_DATA",        # bit 31
+    0x0000000100000000:  "FAIL_LOC",                # bit 32
+    0x0000000400000000:  "CBPENDING",               # bit 34
+    0x0000000800000000:  "WAIT_NOREPROC",           # bit 35
+    0x0000001000000000:  "CANCEL",                  # bit 36
+    0x0000002000000000:  "LOCAL_ONLY",              # bit 37
+    0x0000004000000000:  "FAILED",                  # bit 38
+    0x0000008000000000:  "CANCELING",               # bit 39
+    0x0000010000000000:  "LOCAL",                   # bit 40
+    0x0000020000000000:  "LVB_READY",               # bit 41
+    0x0000040000000000:  "KMS_IGNORE",              # bit 42
+    0x0000080000000000:  "CP_REQD",                 # bit 43
+    0x0000100000000000:  "CLEANED",                 # bit 44
+    0x0000200000000000:  "ATOMIC_CB",               # bit 45
+    0x0000400000000000:  "BL_AST",                  # bit 46
+    0x0000800000000000:  "BL_DONE",                 # bit 47
+    0x0001000000000000:  "NO_LRU",                  # bit 48
+    0x0002000000000000:  "FAIL_NOTIFIED",           # bit 49
+    0x0004000000000000:  "DESTROYED",               # bit 50
+    0x0008000000000000:  "SERVER_LOCK",             # bit 51
+    0x0010000000000000:  "RES_LOCKED",              # bit 52
+    0x0020000000000000:  "WAITED",                  # bit 53
+    0x0040000000000000:  "NS_SRV",                  # bit 54
+    0x0080000000000000:  "EXCL",                    # bit 55
+    0x0100000000000000:  "RESENT",                  # bit 56
+    0x0200000000000000:  "COS_ENABLED",             # bit 57
+    0x0400000000000000:  "NDELAY"                   # bit 58
+}
+
+def print_flags(flag_dict, mask):
+
+    flags = ""
+    tmp = mask
+    for key, value in flag_dict.iteritems():
+            if key & mask:
+                flags = flags + value + " "
+                tmp &= ~key
+    print "mask: 0x%x = %s" % (mask, flags)
+    if tmp != 0:
+        print "unknown bits set in mask: 0x%x" % tmp
+
+if __name__ == "__main__":
+    description = "Prints string identifiers for specified LDLM flags."
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument("bitmask", type=toint,
+        help="LDLM flag bit mask to be translated")
+    args = parser.parse_args()
+    print_flags(ldlm_flags_tbl, args.bitmask)
diff --git a/contrib/debug_tools/epython_scripts/lu_object.py b/contrib/debug_tools/epython_scripts/lu_object.py
new file mode 100644 (file)
index 0000000..12a669b
--- /dev/null
@@ -0,0 +1,211 @@
+#!/usr/bin/env python
+
+"""
+Copyright (c) 2019 Cray Inc. All Rights Reserved.
+Utility to display contents of a Lustre lu_object
+"""
+
+from pykdump.API import *
+from struct import *
+import argparse
+import os
+
+import lustrelib as ll
+from crashlib.input import toint
+
+description_short = "Prints contents of an lu_object"
+
+LOHA_EXISTS = 1 << 0
+
+LOV_MAGIC = 0x0BD10BD0
+LOV_MAGIC_V3 = 0x0BD30BD0
+
+DEPTH = 3
+RULER = "........................................"
+
+FID_SEQ_OST_MDT0 = 0
+FID_SEQ_LOV_DEFAULT = 0xffffffffffffffff
+FID_SEQ_IDIF = 0x100000000
+FID_SEQ_IDIF_MAX = 0x1ffffffff
+IDIF_OID_MAX_BITS = 48
+IDIF_OID_MASK = ((1 << IDIF_OID_MAX_BITS) -1)
+
+def lov_print_empty(obj, depth=0, ruler=RULER):
+    print "empty %d" % obj.lo_layout_invalid
+
+def lov_print_raid0(obj, depth=0, ruler=RULER):
+    r0 = None
+    lsm = obj.lo_lsm
+    try:
+        magic = lsm.lsm_magic
+        stripes = lsm.lsm_stripe_count
+        layout_gen = lsm.lsm_layout_gen
+       pattern = lsm.lsm_pattern
+    except Exception, e:
+        magic = lsm.lsm_wire.lw_magic
+        stripes = lsm.lsm_wire.lw_stripe_count
+        layout_gen = lsm.lsm_wire.lw_layout_gen
+       pattern = lsm.lsm_wire.lw_pattern
+    if magic==LOV_MAGIC or magic==LOV_MAGIC_V3:
+        r0 = obj.u.raid0
+    lli = readU32(Addr(obj) + member_offset('struct lov_object', 'lo_layout_invalid'))
+    invalid = "invalid" if lli else "valid"
+    if r0 and r0.lo_nr:
+        print "%*.*sstripes: %d, %s, lsm[0x%x 0x%X %d %d %d %d]:" % \
+             (depth, depth, ruler,
+             r0.lo_nr, invalid, Addr(lsm), magic,
+             lsm.lsm_refc.counter, stripes, layout_gen, pattern)
+        for i in range(r0.lo_nr):
+            los = r0.lo_sub[i]
+            if los:
+                sub = los.lso_cl.co_lu
+                lovsub_object_print(sub, depth+DEPTH, ruler)
+            else:
+                print "sub %d absent" % i
+
+def lov_print_released(obj, depth=0, ruler=RULER):
+    lsm = obj.lo_lsm
+    magic = lsm.lsm_magic
+    entries = lsm.lsm_entry_count
+    layout_gen = lsm.lsm_layout_gen
+    lli = readU32(Addr(obj) + member_offset('struct lov_object', 'lo_layout_invalid'))
+    invalid = "invalid" if lli else "valid"
+    if magic==LOV_MAGIC or magic==LOV_MAGIC_V3:
+        print "%*.*sreleased: %s, lov_stripe_md: 0x%x [0x%X %d %u %u]:" % \
+             (depth, depth, ruler,
+             invalid, Addr(lsm), magic, lsm.lsm_refc.counter,
+             entries, layout_gen)
+
+LOV_PRINT_TYPE = {
+                 0:lov_print_empty,
+                 1:lov_print_raid0,
+                 2:lov_print_released}
+
+def vvp_object_print(o, depth=0, ruler=RULER):
+    obj = readSU('struct vvp_object', Addr(o) - member_offset('struct vvp_object', 'vob_cl.co_lu'))
+    print "%*.*s(trans:%s mmap:%d) inode: 0x%x " % \
+         (depth, depth, ruler,
+         obj.vob_transient_pages.counter,
+         obj.vob_mmap_cnt.counter,
+         Addr(obj.vob_inode))
+
+def lod_object_print(o, depth=0, ruler=RULER):
+    obj = readSU('struct lod_object', Addr(o) - member_offset('struct lod_object', 'ldo_obj.do_lu'))
+    print "%*.*slod_object@0x%x" % (depth, depth, ruler, Addr(obj))
+
+def lov_object_print(o, depth=0, ruler=RULER):
+    obj = readSU('struct lov_object', Addr(o) - member_offset('struct lov_object', 'lo_cl.co_lu'))
+    type = obj.lo_type
+    LOV_PRINT_TYPE[type](obj, depth, ruler)
+
+def lovsub_object_print(o, depth=0, ruler=RULER):
+    obj = readSU('struct lovsub_object', Addr(o) - member_offset('struct lovsub_object', 'lso_cl.co_lu'))
+    print "%*.*slso_index: %d" % (depth, depth, ruler, obj.lso_index)
+
+def mdd_object_print(o, depth=0, ruler=RULER):
+    obj = readSU('struct mdd_object', Addr(o) - member_offset('struct mdd_object', 'mod_obj.mo_lu'))
+    print "%*.*smdd_object@0x%x(open_count=%d, valid=%x, cltime=%u, flags=%x)" % \
+         (depth, depth, ruler, Addr(obj), obj.mod_count, obj.mod_valid,
+         obj.mod_cltime, obj.mod_flags)
+
+def mdt_object_print(o, depth=0, ruler=RULER):
+    obj = readSU('struct mdt_object', Addr(o) - member_offset('struct mdt_object', 'mot_obj'))
+    print "%*.*smdt_object@0x%x(ioepoch=%u, flags=%x, epochcount=%d, writecount-%d" % \
+         (depth, depth, ruler, Addr(obj), obj.mot_ioepoch, obj.mot_flags,
+         obj.mot_ioepoch_count, obj.mot_writecount)
+
+def mgs_object_print(o, depth=0, ruler=RULER):
+    obj = readSU('struct mgs_object', Addr(o) - member_offset('struct mgs_object', 'mgo_obj.do_lu'))
+    print "%*.*smgs_object@0x%x" % (depth, depth, ruler, Addr(obj))
+
+def echo_object_print(o, depth=0, ruler=RULER):
+    clo = readSU('struct cl_object', Addr(o) - member_offset('struct cl_object', 'co_lu'))
+    obj = readSU('struct echo_object', Addr(clo) - member_offset('struct echo_object', 'eo_cl'))
+    print "%*.*sechocl_object@0x%x" % (depth, depth, ruler, Addr(obj))
+
+def ofd_object_print(o, depth=0, ruler=RULER):
+    print "%*.*sofd_object@0x%x" % (depth, depth, ruler, Addr(o))
+
+def osc_object_print(o, depth=0, ruler=RULER):
+    obj = readSU('struct osc_object', Addr(o) - member_offset('struct osc_object', 'oo_cl.co_lu'))
+    oinfo = obj.oo_oinfo
+    ar = oinfo.loi_ar
+    ostid = oinfo.loi_oi
+    ostid_seq = 0
+    ostid_id = 0
+    if ostid.oi.oi_seq == FID_SEQ_OST_MDT0:
+        ostid_seq = FID_SEQ_OST_MDT0
+        ostid_id = ostid.oi.oi_id & IDIF_OID_MASK
+    elif ostid.oi.oi_seq == FID_SEQ_LOV_DEFAULT:
+        ostid_seq = FID_SEQ_LOV_DEFAULT
+        ostid_id = ostid.oi.oi_id
+    elif ostid.oi_fid.f_seq >= FID_SEQ_IDIF and \
+        ostid.oi_fid.f_seq <= FID_SEQ_IDIF_MAX:
+        ostid_seq = FID_SEQ_OST_MDT0
+        ostid_id = ((0 << 48) | (ostid.oi_fid.f_seq & 0xffff << 32) | (ostid.oi_fid.f_oid))
+    else:
+        ostid_seq = ostid.oi_fid.f_seq
+        ostid_id = ostid.oi_fid.f_oid
+    print "%*.*sid: 0x%x:%u idx: %d gen: %d kms_valid: %u kms: %u rc: %d force_sync: %d min_xid: %u" % \
+         (depth, depth, ruler, ostid_seq, ostid_id,
+         oinfo.loi_ost_idx, oinfo.loi_ost_gen, oinfo.loi_kms_valid,
+         oinfo.loi_kms, ar.ar_rc, ar.ar_force_sync, ar.ar_min_xid)
+
+def osd_object_print(o, depth=0, ruler=RULER):
+    obj = readSU('struct osd_object', Addr(o) - member_offset('struct osd_object', 'oo_dt.do_lu'))
+    print "%*.*sosd_object@0x%x" % (depth, depth, ruler, Addr(obj))
+
+def osp_object_print(o, depth=0, ruler=RULER):
+    obj = readSU('struct osp_object', Addr(o) - member_offset('struct osp_object', 'opo_obj.do_lu'))
+    print "%*.*sosp_object@0x%x" % (depth, depth, ruler, Addr(o))
+
+OBJ_PRINT = {
+            "vvp":vvp_object_print,
+            "lod":lod_object_print,
+            "lov":lov_object_print,
+            "lovsub":lovsub_object_print,
+            "mdd":mdd_object_print,
+            "mdt":mdt_object_print,
+            "mgs":mgs_object_print,
+            "echo":echo_object_print,
+            "ofd":ofd_object_print,
+            "osc":osc_object_print,
+            "osd":osd_object_print,
+            "osp":osp_object_print}
+
+def print_object_from_name(name, obj, depth=0, ruler=RULER):
+    if OBJ_PRINT[name]:
+        OBJ_PRINT[name](obj, depth, ruler)
+
+def print_object(pos, depth=0, ruler=RULER):
+    print "%*.*s%s@0x%x" % (depth, depth, ruler, pos.lo_dev.ld_type.ldt_name, Addr(pos))
+    if (pos.lo_ops.loo_object_print):
+        print_object_from_name(pos.lo_dev.ld_type.ldt_name, pos, depth+DEPTH, ruler)
+
+def print_object_from_header(loh, depth=0, ruler=RULER):
+    head = loh.loh_layers
+    empty = "" if (loh.loh_lru.next == loh.loh_lru) else " lru"
+    exists = " exist" if loh.loh_attr & LOHA_EXISTS else ""
+    print "%*.*slu_object_header@0x%x[fl:0x%x, rc:%d, [0x%x:0x%x:0x%x]%s%s] {" % \
+         (depth, depth, ruler,
+         Addr(loh),
+         loh.loh_flags,
+         loh.loh_ref.counter,
+         loh.loh_fid.f_seq,
+         loh.loh_fid.f_oid,
+         loh.loh_fid.f_ver,
+         empty,
+         exists)
+    for obj in readSUListFromHead(head, 'lo_linkage', 'struct lu_object'):
+        print_object(obj, depth+DEPTH, ruler)
+    print "%*.*s} header@0x%x\n" % (depth, depth, ruler, Addr(loh))
+
+if __name__ == "__main__":
+    description = "Prints contents of an lu_object"
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument("lu_object_header", default=False, type=toint,
+        help="address of an lu_object_header")
+
+    args = parser.parse_args()
+    loh = readSU('struct lu_object_header', args.lu_object_header)
+    print_object_from_header(loh)
diff --git a/contrib/debug_tools/epython_scripts/lustrelib.py b/contrib/debug_tools/epython_scripts/lustrelib.py
new file mode 100644 (file)
index 0000000..879282f
--- /dev/null
@@ -0,0 +1,244 @@
+#!/usr/bin/env python
+from pykdump.API import *
+
+"""
+Copyright (c) 2015-2019 Cray Inc. All Rights Reserved.
+Library of helper functions for Lustre scripts
+"""
+# hide this file from the output of 'epython scripts'.
+interactive = False
+
+"""Lustre Hash Table Utilities"""
+
+CFS_HASH_ADD_TAIL = 1 << 4
+CFS_HASH_DEPTH = 1 << 12
+CFS_HASH_TYPE_MASK = CFS_HASH_ADD_TAIL | CFS_HASH_DEPTH
+
+HH = 0
+HD = CFS_HASH_DEPTH
+DH = CFS_HASH_ADD_TAIL
+DD = CFS_HASH_DEPTH | CFS_HASH_ADD_TAIL
+
+def hs_get_type(hsh):
+    return hsh.hs_flags & CFS_HASH_TYPE_MASK
+
+def enum(**enums):
+    return type('Enum', (), enums)
+
+HS_INFO_FLDS = enum(dtfld=0, hdfld=1,)
+
+# The type to struct changes and jobid_hash addition were released
+# in the same version, so use existence of jobid_hash as a substitute
+# for cfs_hash type changes.
+if symbol_exists('jobid_hash'):    # 2.11 and later
+    HS_INFO = {
+        HH: ['struct cfs_hash_head', 'hh_head'],
+        HD: ['struct cfs_hash_head_dep', 'hd_head'],
+        DH: ['struct cfs_hash_dhead', 'dh_head'],
+        DD: ['struct cfs_hash_dhead_dep', 'dd_head'],
+    }
+else:
+    HS_INFO = {
+        HH: ['cfs_hash_head_t', 'hh_head'],
+        HD: ['cfs_hash_head_dep_t', 'hd_head'],
+        DH: ['cfs_hash_dhead_t', 'dh_head'],
+        DD: ['cfs_hash_dhead_dep_t', 'dd_head'],
+    }
+
+def CFS_HASH_NBKT(hsh):
+    return (1 << (hsh.hs_cur_bits - hsh.hs_bkt_bits))
+
+def CFS_HASH_BKT_NHLIST(hsh):
+    return (1 << (hsh.hs_bkt_bits))
+
+def cfs_hash_head_size(hsh):
+    size = getSizeOf(HS_INFO[hs_get_type(hsh)][HS_INFO_FLDS.dtfld])
+    return size
+
+def cfs_hash_bucket_size(hsh):
+    size = member_offset('struct cfs_hash_bucket', 'hsb_head')
+    size += cfs_hash_head_size(hsh) * CFS_HASH_BKT_NHLIST(hsh) + \
+            hsh.hs_extra_bytes
+    return size
+
+def cfs_hash_hhead(hsh, bd_bkt, bd_offset):
+    info = HS_INFO[hs_get_type(hsh)]
+    bkt = Addr(bd_bkt) + member_offset('struct cfs_hash_bucket', 'hsb_head')
+    head = readSU(info[HS_INFO_FLDS.dtfld], bkt)
+    offset = member_offset(info[HS_INFO_FLDS.dtfld], info[HS_INFO_FLDS.hdfld])
+    return readSU('struct hlist_head', (Addr(head[bd_offset]) + offset))
+
+def cfs_hash_get_buckets(hsh):
+    hbuckets = []
+    for idx in range(CFS_HASH_NBKT(hsh)):
+        if hsh.hs_buckets[idx]:
+            hbuckets.append(hsh.hs_buckets[idx])
+    return hbuckets
+
+def cfs_hash_get_hlist_nodes(hsh, bd_bkt, bd_offset):
+    hlist = readSU('struct hlist_head', cfs_hash_hhead(hsh, bd_bkt, bd_offset))
+    hnodes = []
+    hnode = hlist.first
+    while (hnode and hnode != hlist):
+        hnodes.append(hnode)
+        hnode = hnode.next
+    return hnodes
+
+def cfs_hash_get_nodes(hsh):
+    hs_nodes = []
+    for bd_bkt in cfs_hash_get_buckets(hsh):
+        for bd_offset in range(CFS_HASH_BKT_NHLIST(hsh)):
+            for hnode in cfs_hash_get_hlist_nodes(hsh, bd_bkt, bd_offset):
+                hs_nodes.append(hnode)
+    return hs_nodes
+
+"""nid"""
+
+def LNET_NIDADDR(nid):
+    return (nid & 0xffffffff)
+
+def LNET_NIDNET(nid):
+    return ((nid >> 32) & 0xffffffff)
+
+def LNET_NETTYP(net):
+    return ((net >> 16) & 0xffff)
+
+def LNET_NETNUM(net):
+    return ((net) & 0xffff)
+
+LNET_NID_ANY = 0xffffffffffffffff
+LNET_NIDSTR_SIZE = 32
+
+O2IBLND = 5
+PTLLND = 4
+GNILND = 13
+
+LP_POISON = 0x5a5a5a5a5a5a5a5a
+
+def nid2str(nid):
+    if nid == LNET_NID_ANY:
+        return 'LNET_NID_ANY'
+    addr = LNET_NIDADDR(nid)
+    net = LNET_NIDNET(nid)
+    lnd = LNET_NETTYP(net)
+    nnum = LNET_NETNUM(net)
+    s = ""
+    if lnd == O2IBLND:
+        s = "%d.%d.%d.%d@o2ib" % \
+            ((addr >> 24) & 0xff, (addr >> 16) & 0xff,
+            (addr >> 8) & 0xff, addr & 0xff)
+    elif lnd == PTLLND:
+        s = "%d@ptl" % addr
+    elif lnd == GNILND:
+        s = "%d@gni" % addr
+    else:
+        nnum = 0
+    if nnum != 0:
+        s = "%s%d" % (s, nnum)
+    return s
+
+def obd2nidstr(obd):
+    obd_import = readSU('struct obd_import', obd.u.cli.cl_import)
+    nid = LNET_NID_ANY
+    imp_invalid = 1
+    if obd_import and obd_import != 0xffffffffffffffff and \
+       obd_import != LP_POISON:
+        imp_invalid = obd_import.imp_invalid
+
+    if not imp_invalid and obd_import.imp_connection:
+        if Addr(obd_import.imp_obd) == Addr(obd):
+            nid = obd_import.imp_connection.c_peer.nid
+    return nid2str(nid)
+
+"""Miscellaneous"""
+
+def obd2str(obd, partitions=2):
+    name = obd.obd_name.split('-', partitions)[:partitions]
+    return '-'.join(name)
+
+def list_empty(head):
+    return head.next == head
+
+"""Red-Black"""
+
+def rb_first(root):
+    n = root.rb_node
+    if not n:
+        return None
+    while(n.rb_left):
+        n = n.rb_left
+    return n
+
+def rb_last(root):
+    n = root.rb_node
+    if not n:
+        return None
+    while(n.rb_right):
+        n = n.rb_right
+    return n
+
+def rb_parent_color(node):
+    return readU64(Addr(node))
+
+def rb_parent(node):
+    addr = rb_parent_color(node) & ~3
+    return readSU('struct rb_node', addr)
+
+#The color of the rb_node; 0 denotes red, 1 denotes black
+def rb_color(node):
+    return rb_parent_color(node) & 1
+
+def rb_next(node):
+    if rb_parent(node) == node:
+        return None
+    #right child exists
+    if node.rb_right:
+        node = node.rb_right
+        while(node.rb_left):
+            node = node.rb_left
+        return node
+    #no right child
+    parent = rb_parent(node)
+    while(parent and node == parent.rb_right):
+        node = parent
+        parent = rb_parent(node)
+    return parent
+
+def rb_prev(node):
+    if rb_parent(node) == node:
+        return None
+    #left child exists
+    if node.rb_left:
+        node = node.rb_left
+        while(node.rb_right):
+            node = node.rb_right
+        return node
+    #no left child
+    parent = rb_parent(node)
+    while(parent.rb_left and node == parent.rb_left):
+        node = parent
+        parent = rb_parent(node)
+    return parent
+
+"""LNET Globals"""
+the_lnet = readSymbol('the_lnet')
+
+tmpsiz = 256
+
+LNET_CPT_BITS = the_lnet.ln_cpt_bits
+LNET_PROC_CPT_BITS = LNET_CPT_BITS + 1
+LNET_LOFFT_BITS = getSizeOf('loff_t') * 8
+LNET_PROC_VER_BITS = max((min(LNET_LOFFT_BITS, 64) / 4), 8)
+LNET_PROC_HASH_BITS = 9
+LNET_PROC_HOFF_BITS = LNET_LOFFT_BITS - LNET_PROC_CPT_BITS - LNET_PROC_VER_BITS - LNET_PROC_HASH_BITS -1
+LNET_PROC_HPOS_BITS = LNET_PROC_HASH_BITS + LNET_PROC_HOFF_BITS
+LNET_PROC_VPOS_BITS = LNET_PROC_HPOS_BITS + LNET_PROC_VER_BITS
+
+LNET_PROC_CPT_MASK = (1 << LNET_PROC_CPT_BITS) - 1
+LNET_PROC_VER_MASK = (1 << LNET_PROC_VER_BITS) - 1
+LNET_PROC_HASH_MASK = (1 << LNET_PROC_HASH_BITS) - 1
+LNET_PROC_HOFF_MASK = (1 << LNET_PROC_HASH_BITS) - 1
+
+LNET_PING_FEAT_NI_STATUS = 1 << 1
+
+HZ = sys_info.HZ
diff --git a/contrib/debug_tools/epython_scripts/obd_devs.py b/contrib/debug_tools/epython_scripts/obd_devs.py
new file mode 100644 (file)
index 0000000..6d88481
--- /dev/null
@@ -0,0 +1,114 @@
+#!/usr/bin/env python
+
+"""
+Copyright (c) 2015-2019 Cray Inc. All Rights Reserved.
+Utility to display obd_devices
+"""
+
+from pykdump.API import *
+import argparse
+
+from crashlib.input import toint
+import lustrelib as ll
+import rpc_stats as rs
+
+description_short = "Displays the contents of global 'obd_devs'"
+
+SEP_SIZE = 152
+def print_separator(count):
+    s=""
+    for idx in xrange(count):
+        s += "="
+    print s
+
+def print_header():
+    print "%-19s %-22s \t%-22s %-19s %-19s %-12s %-10s %-7s %-10s" % \
+         ("obd_device",
+          "obd_name",
+          "ip_address",
+          "client_obd",
+          "obd_import",
+          "imp_state",
+          "ish_time",
+          "index",
+          "conn_cnt")
+    print_separator(SEP_SIZE)
+
+IMP_STATE = {
+        1:  "CLOSED",
+        2:  "NEW",
+        3:  "DISCON",
+        4:  "CONNECTING",
+        5:  "REPLAY",
+        6:  "REPLAY_LOCKS",
+        7:  "REPLAY_WAIT",
+        8:  "RECOVER",
+        9:  "FULL",
+       10:  "EVICTED",
+       11:  "IDLE"
+}
+
+
+def print_one_device(obd, stats_flag):
+    try:
+        nid = ll.obd2nidstr(obd)
+    except Exception, e:
+        try:
+            print "0x%-17x %-22s" % (Addr(obd), ll.obd2str(obd))
+        except Exception, e:
+            return 1
+        return 0
+
+    impstate = "--"
+    ish_time = 0
+    index=-1
+    connect_cnt = 0
+    inflight=0
+    if obd.u.cli.cl_import:
+          impstate=IMP_STATE.get(obd.u.cli.cl_import.imp_state)
+          index=obd.u.cli.cl_import.imp_state_hist_idx - 1
+          if index > 0 and index < 16:
+               ish_time=obd.u.cli.cl_import.imp_state_hist[index].ish_time
+         inflight=obd.u.cli.cl_import.imp_inflight.counter
+          connect_cnt = obd.u.cli.cl_import.imp_conn_cnt
+
+    print "0x%-17x %-22s\t%-22s\t 0x%-17x 0x%-17x %-10s %-10d %5d %5d" % \
+          (Addr(obd),
+          ll.obd2str(obd),
+          nid,
+          Addr(obd.u.cli),
+          Addr(obd.u.cli.cl_import),
+          impstate,
+          ish_time,
+          index,
+          connect_cnt)
+    if stats_flag:
+        print
+        rs.osc_rpc_stats_seq_show(Addr(obd.u.cli))
+        print_separator(SEP_SIZE)
+    return 0
+
+def print_devices(devices, stats_flag):
+    print_header()
+    for obd in devices:
+        if Addr(obd) == 0:
+            break
+        print_one_device(obd, stats_flag)
+    print_separator(SEP_SIZE)
+
+def obd_devs(args):
+    if args.obd_device:
+        devices = [readSU('struct obd_device', args.obd_device)]
+    else:
+        devices = readSymbol('obd_devs')
+    print_devices(devices, args.stats_flag)
+
+if __name__ == "__main__":
+    description = "Displays the contents of global 'obd_devs'"
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument("obd_device", nargs="?", default = [], type=toint,
+        help="print obd_device at argument address")
+    parser.add_argument("-r", dest="stats_flag", action="count",
+        help="print the rpc_stats sequence for each client_obd")
+    args = parser.parse_args()
+    obd_devs(args)
diff --git a/contrib/debug_tools/epython_scripts/ptlrpc.py b/contrib/debug_tools/epython_scripts/ptlrpc.py
new file mode 100644 (file)
index 0000000..518ef3e
--- /dev/null
@@ -0,0 +1,308 @@
+#!/usr/bin/env python
+
+"""
+Copyright 2015-2019 Cray Inc.  All Rights Reserved
+Dumps the Lustre RPC queues for all ptlrpcd_XX threads.
+"""
+
+from pykdump.API import *
+import sys
+import argparse
+import os
+
+import lustrelib as ll
+from crashlib.input import toint
+
+from traceback import print_exc
+
+description_short = "Displays the RPC queues of the Lustre ptlrpcd daemons"
+
+def print_separator(count):
+    s = ""
+    for idx in xrange(count):
+        s += "="
+    print s
+
+def print_title(title):
+    if title:
+        print "\n" + title
+        print "%-14s %-6s %-19s %-18s %-19s %-4s %-14s %-4s %-22s %-19s" \
+               % ("thread", "pid", "ptlrpc_request", "xid", "nid", "opc",
+                  "phase:flags", "R:W", "sent/deadline", "ptlrpc_body")
+    print_separator(148)
+
+def enum(**enums):
+    return type('Enum', (), enums)
+
+REQ_Q = enum(rq_list=1, replay_list=2, set_chain=3, ctx_chain=4,
+             unreplied_list=5, timed_list=5, exp_list=6, hist_list=7)
+
+RQ_LIST_LNKS = {
+    REQ_Q.rq_list:        ['struct ptlrpc_request', 'rq_list', 'rq_type'],
+    REQ_Q.replay_list:    ['struct ptlrpc_request', 'rq_replay_list', 'rq_type'],
+    REQ_Q.set_chain:      ['struct ptlrpc_cli_req', 'cr_set_chain', 'rq_cli'],
+    REQ_Q.ctx_chain:      ['struct ptlrpc_cli_req', 'cr_ctx_chain', 'rq_cli'],
+    REQ_Q.unreplied_list: ['struct ptlrpc_cli_req', 'cr_unreplied_list', 'rq_cli'],
+    REQ_Q.timed_list:     ['struct ptlrpc_srv_req', 'sr_timed_list', 'rq_srv'],
+    REQ_Q.exp_list:       ['struct ptlrpc_srv_req', 'sr_exp_list', 'rq_srv'],
+    REQ_Q.hist_list:      ['struct ptlrpc_srv_req', 'sr_hist_list', 'rq_srv']
+}
+
+STRUCT_IDX = 0
+MEMBER_IDX = 1
+UNION_IDX = 2
+
+def size_round(val):
+    return ((val + 7) & (~0x7))
+
+LUSTRE_MSG_MAGIC_V2 = 0x0BD00BD3
+
+def get_ptlrpc_body(req):
+    msg = req.rq_reqmsg
+#    msg = req.rq_repmsg
+    if not msg or msg == None:
+        return None
+
+    if msg.lm_magic != LUSTRE_MSG_MAGIC_V2:
+        return None
+
+    bufcount = msg.lm_bufcount
+    if bufcount < 1:
+        return None
+
+    buflen = msg.lm_buflens[0]
+    if buflen < getSizeOf('struct ptlrpc_body_v2'):
+        return None
+
+    offset = member_offset('struct lustre_msg_v2', 'lm_buflens')
+
+    buflen_size = getSizeOf("unsigned int")
+    offset += buflen_size * bufcount
+    offset = size_round(offset)
+    addr = Addr(msg) + offset
+    if addr == 0:
+        print "addr"
+        return None
+    return readSU('struct ptlrpc_body_v2', addr)
+
+RQ_PHASE_NEW = 0xebc0de00
+RQ_PHASE_RPC = 0xebc0de01
+RQ_PHASE_BULK = 0xebc0de02
+RQ_PHASE_INTERPRET = 0xebc0de03
+RQ_PHASE_COMPLETE = 0xebc0de04
+RQ_PHASE_UNREG_RPC =  0xebc0de05
+RQ_PHASE_UNREG_BULK = 0xebc0de06
+RQ_PHASE_UNDEFINED = 0xebc0de07
+
+PHASES = {
+       RQ_PHASE_NEW: "NEW",
+       RQ_PHASE_RPC: "RPC",
+       RQ_PHASE_BULK: "BULK",
+       RQ_PHASE_INTERPRET: "NtrPrt",
+       RQ_PHASE_COMPLETE: "COMP",
+       RQ_PHASE_UNREG_RPC: "UNREG",
+       RQ_PHASE_UNREG_BULK: "UNBULK",
+       RQ_PHASE_UNDEFINED: "UNDEF"
+   }
+
+FLAG_LEGEND = "\nFlag Legend:\n\n" + \
+         "I - rq_intr\tR - rq_replied\t\tE - rq_err\te - rq_net_err\tX - rq_timedout\tS - rq_resend\t\tT - rq_restart\n" + \
+         "P - rq_replay\tN - rq_no_resend\tW - rq_waiting\tC - rq_wait\tH - rq_hp\tM - rq_committed\tq - rq_req_unlinked\tu - rq_reply_unlinked\n"
+
+def get_phase_flags(req):
+    phase = req.rq_phase
+    phasestr = PHASES.get(phase & 0xffffffff, "?%d" % phase)
+    return "%s:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s" % \
+           (phasestr,
+           "I" if req.rq_intr else "",
+           "R" if req.rq_replied else "",
+           "E" if req.rq_err else "",
+           "e" if req.rq_net_err else "",
+           "X" if req.rq_timedout else "",
+           "S" if req.rq_resend else "",
+           "T" if req.rq_restart else "",
+           "P" if req.rq_replay else "",
+           "N" if req.rq_no_resend else "",
+           "W" if req.rq_waiting else "",
+           "C" if req.rq_wait_ctx else "",
+           "H" if req.rq_hp else "",
+           "M" if req.rq_committed else "",
+           "q" if req.rq_req_unlinked else "",
+           "u" if req.rq_reply_unlinked else "")
+
+LP_POISON = 0x5a5a5a5a5a5a5a5a
+
+def print_one_request(sthread, req):
+    pb = get_ptlrpc_body(req)
+    status = -1
+    opc = -1
+    pbaddr = -1
+    if pb:
+        status = pb.pb_status
+        opc = pb.pb_opc
+        pbaddr = Addr(pb)
+
+    imp_invalid = 1
+    nid = "LNET_NID_ANY"
+    obd_name = "Invalid Import"
+    if req.rq_import and req.rq_import != 0xffffffffffffffff and \
+       req.rq_import != LP_POISON:
+        imp_invalid = req.rq_import.imp_invalid
+        obd_name = ll.obd2str(req.rq_import.imp_obd)
+
+    if not imp_invalid and req.rq_import.imp_connection:
+        nid = ll.nid2str(req.rq_import.imp_connection.c_peer.nid)
+    brw = "%1d:%1d" % (req.rq_bulk_read, req.rq_bulk_write)
+    rq_sent_dl = "%d/%d" % (req.rq_sent, req.rq_deadline)
+    print "%-14s %-6s 0x%-17x %-18d %-19s %-4d %-14s %-4s %-22s 0x%-17x" % \
+            (sthread,
+            status,
+            Addr(req),
+            req.rq_xid,
+            obd_name,
+            opc,
+            get_phase_flags(req),
+            brw,
+            rq_sent_dl,
+            pbaddr)
+
+def print_request_list(sthread, lhdr, loffset):
+    try:
+        for reqlnk in readStructNext(lhdr, 'next'):
+            if reqlnk.next == Addr(lhdr):
+                break
+            req = readSU('struct ptlrpc_request', reqlnk.next-loffset)
+            print_one_request(sthread, req)
+
+    except Exception, e:
+        print_exc()
+        return 1
+    return 0
+
+# Find offset from start of ptlrpc_request struct of link field
+# Adjusts for links that are contained in embedded union
+def get_linkfld_offset(lfld):
+    container = RQ_LIST_LNKS[lfld][STRUCT_IDX]
+    linkfld   = RQ_LIST_LNKS[lfld][MEMBER_IDX]
+    req_union = RQ_LIST_LNKS[lfld][UNION_IDX]
+
+    off1 = member_offset('struct ptlrpc_request', req_union)
+    off2 = member_offset(container, linkfld)
+    return off1 + off2
+
+def foreach_ptlrpcd_ctl(callback, *args):
+    pinfo_rpcds = readSymbol('ptlrpcds')
+    pinfo_count = readSymbol('ptlrpcds_num')
+
+    for idx in xrange(pinfo_count):
+        ptlrpcd = pinfo_rpcds[idx]
+        for jdx in xrange(ptlrpcd.pd_nthreads):
+            pd = ptlrpcd.pd_threads[jdx]
+            callback(pd, *args)
+    pd = readSymbol('ptlrpcd_rcv')
+    callback(pd, *args)
+
+def get_daemon_listhdrs(pd, sent_rpcs, pend_rpcs):
+    sent_rpcs.append([pd.pc_name, pd.pc_set.set_requests])
+    pend_rpcs.append([pd.pc_name, pd.pc_set.set_new_requests])
+
+def dump_list_of_lists(rpc_list, loffset):
+    for qinfo in rpc_list:
+        sthread, lhdr = qinfo
+        print_request_list(sthread, lhdr, loffset)
+
+def dump_daemon_rpclists():
+    sent_rpcs = []
+    pend_rpcs = []
+
+    foreach_ptlrpcd_ctl(get_daemon_listhdrs, sent_rpcs, pend_rpcs)
+    offset = get_linkfld_offset(REQ_Q.set_chain)
+
+    print_title("Sent RPCS: ptlrpc_request_set.set_requests->")
+    dump_list_of_lists(sent_rpcs, offset)
+
+    print_title("Pending RPCS: ptlrpc_request_set.set_new_requests->")
+    dump_list_of_lists(pend_rpcs, offset)
+    print_title('')
+
+def print_overview_entry(pd):
+    s = "%s:" % pd.pc_name
+    print "%-14s  ptlrpcd_ctl 0x%x   ptlrpc_request_set 0x%x" % \
+        (s, Addr(pd), pd.pc_set)
+
+def dump_overview():
+    foreach_ptlrpcd_ctl(print_overview_entry)
+
+def print_pcset_stats(pd):
+    if pd.pc_set.set_new_count.counter != 0 or \
+        pd.pc_set.set_remaining.counter != 0:
+        s = "%s:" %pd.pc_name
+        print "%-13s 0x%-18x %-4d %-4d %-6d" % \
+            (s, Addr(pd.pc_set),
+            pd.pc_set.set_refcount.counter,
+            pd.pc_set.set_new_count.counter,
+            pd.pc_set.set_remaining.counter)
+
+def dump_pcsets():
+    print '%-14s %-19s %-4s %-4s %-6s' % \
+        ("thread", "ptlrpc_request_set", "ref", "new", "remain")
+    print_separator(52)
+    foreach_ptlrpcd_ctl(print_pcset_stats)
+
+def dump_one_rpc(addr):
+    print_title("Request")
+    req = readSU('struct ptlrpc_request', addr)
+    print_one_request('', req)
+
+def dump_one_rpclist(addr, link_fld):
+    lhdr = readSU('struct list_head', addr)
+    d = vars(REQ_Q)
+    loffset = get_linkfld_offset(d[link_fld])
+
+    print_title("Request list at %s" % lhdr)
+    print_request_list('', lhdr, loffset)
+
+def dump_rpcs_cmd(args):
+    if args.oflag:
+        dump_overview()
+        return
+    if args.sflag:
+        dump_pcsets()
+        return
+    if args.rpc_addr:
+        if args.link_fld:
+            dump_one_rpclist(args.rpc_addr[0], args.link_fld)
+        else:
+            # dump_one_rpc(args.rpc_addr[0])
+            dump_one_rpc(args.rpc_addr)
+        return
+    dump_daemon_rpclists()
+
+if __name__ == "__main__":
+#    usage = "$(prog)s [-o] [-s] [-l link_field] [addr]\n" + \
+    description = "" +\
+        "Displays lists of Lustre RPC requests. If no arguments are \n" +\
+        "specified, all rpcs in the sent and pending queues of the \n" +\
+        "ptlrpcd daemons are printed. If an address is specified, it \n" +\
+        "must be a pointer to either a ptlrpc_request or a list_head \n" +\
+        "struct. If the addr is a list_head, then a link_field must \n" +\
+        "also be provided. The link_field identifies the member of \n" +\
+        "the ptlrpc_request struct used to link the list together."
+
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        description=description, epilog=FLAG_LEGEND)
+    parser.add_argument("-o", dest="oflag", action="store_true",
+        help="print overview of ptlrpcd_XX threads with ptlrpcd_ctl " + \
+            "structs and the associated pc_set field")
+    parser.add_argument("-s", dest="sflag", action="store_true",
+        help="print rpc counts per ptlrpc_request_set")
+    parser.add_argument("-l", dest="link_fld", default="",
+        choices=['rq_list', 'replay_list', 'set_chain', 'ctx_chain',
+                 'unreplied_list', 'timed_list', 'exp_list', 'hist_list'],
+        help="name of link field in ptlrpc_request for list headed by addr")
+    parser.add_argument("rpc_addr", nargs="?", default=[], type=toint,
+        help="address of either single ptlrpc_request or list_head; list_head requires a -l argument")
+    args = parser.parse_args()
+
+    dump_rpcs_cmd(args)
diff --git a/contrib/debug_tools/epython_scripts/rpc_opcode.py b/contrib/debug_tools/epython_scripts/rpc_opcode.py
new file mode 100644 (file)
index 0000000..6aa8d58
--- /dev/null
@@ -0,0 +1,129 @@
+#!/usr/bin/env python
+
+"""
+Copyright 2019 Cray Inc.  All Rights Reserved
+Utility to maps numeric opcode to string identifier
+"""
+
+from pykdump.API import *
+import argparse
+
+description_short = "Maps Lustre rpc opcodes string identifiers."
+
+opcodes = {
+    0:  'OST_REPLY',
+    1:  'OST_GETATTR',
+    2:  'OST_SETATTR',
+    3:  'OST_READ',
+    4:  'OST_WRITE',
+    5:  'OST_CREATE',
+    6:  'OST_DESTROY',
+    7:  'OST_GET_INFO',
+    8:  'OST_CONNECT',
+    9:  'OST_DISCONNECT',
+    10: 'OST_PUNCH',
+    11: 'OST_OPEN',
+    12: 'OST_CLOSE',
+    13: 'OST_STATFS',
+    16: 'OST_SYNC',
+    17: 'OST_SET_INFO',
+    18: 'OST_QUOTACHECK',
+    19: 'OST_QUOTACTL',
+    20: 'OST_QUOTA_ADJUST_QUNIT',  # not used since 2.4
+    21: 'OST_LADVISE',
+
+    33: 'MDS_GETATTR',
+    34: 'MDS_GETATTR_NAME',
+    35: 'MDS_CLOSE',
+    36: 'MDS_REINT',
+    37: 'MDS_READPAGE',
+    38: 'MDS_CONNECT',
+    39: 'MDS_DISCONNECT',
+    40: 'MDS_GET_ROOT',
+    41: 'MDS_STATFS',
+    42: 'MDS_PIN',
+    43: 'MDS_UNPIN',          # obsolete, never used in a release
+    44: 'MDS_SYNC',
+    45: 'MDS_DONE_WRITING',
+    46: 'MDS_SET_INFO',
+    47: 'MDS_QUOTACHECK',     # not used since 2.4
+    48: 'MDS_QUOTACTL',
+    49: 'MDS_GETXATTR',
+    50: 'MDS_SETXATTR',       # obsolete, now it's MDS_REINT op
+    51: 'MDS_WRITEPAGE',
+    52: 'MDS_IS_SUBDIR',      # obsolete, never used in a release
+    53: 'MDS_GET_INFO',
+    54: 'MDS_HSM_STATE_GET',
+    55: 'MDS_HSM_STATE_SET',
+    56: 'MDS_HSM_ACTION',
+    57: 'MDS_HSM_PROGRESS',
+    58: 'MDS_HSM_REQUEST',
+    59: 'MDS_HSM_CT_REGISTER',
+    60: 'MDS_HSM_CT_UNREGISTER',
+    61: 'MDS_SWAP_LAYOUTS',
+
+    101: 'LDLM_ENQUEUE',
+    102: 'LDLM_CONVERT',
+    103: 'LDLM_CANCEL',
+    104: 'LDLM_BL_CALLBACK',
+    105: 'LDLM_CP_CALLBACK',
+    106: 'LDLM_GL_CALLBACK',
+    107: 'LDLM_SET_INFO',
+
+    250: 'MGS_CONNECT',
+    251: 'MGS_DISCONNECT',
+    252: 'MGS_EXCEPTION',           # node died, etc.
+    253: 'MGS_TARGET_REG',          # whenever target starts up
+    254: 'MGS_TARGET_DEL',
+    255: 'MGS_SET_INFO',
+    256: 'MGS_CONFIG_READ',
+
+    400: 'OBD_PING',
+    401: 'OBD_LOG_CANCEL',          # obsolete since 1.5
+    402: 'OBD_QC_CALLBACK',         # obsolete since 2.4
+    403: 'OBD_IDX_READ',
+
+    501: 'LLOG_ORIGIN_HANDLE_CREATE',
+    502: 'LLOG_ORIGIN_HANDLE_NEXT_BLOCK',
+    503: 'LLOG_ORIGIN_HANDLE_READ_HEADER',
+    504: 'LLOG_ORIGIN_HANDLE_WRITE_REC',    # Obsolete by 2.1.
+    505: 'LLOG_ORIGIN_HANDLE_CLOSE',        # Obsolete by 1.8.
+    506: 'LLOG_ORIGIN_CONNECT',             # Obsolete by 2.4.
+    507: 'LLOG_CATINFO',                    # Obsolete by 2.3.
+    508: 'LLOG_ORIGIN_HANDLE_PREV_BLOCK',
+    509: 'LLOG_ORIGIN_HANDLE_DESTROY',      # Obsolete by 2.11.
+
+    601: 'QUOTA_DQACQ',
+    602: 'QUOTA_DQREL',
+
+    700: 'SEQ_QUERY',
+
+    801: 'SEC_CTX_INIT',
+    802: 'SEC_CTX_INIT_CONT',
+    803: 'SEC_CTX_FINI',
+
+    900: 'FLD_QUERY',
+    901: 'FLD_READ',
+
+    1000: 'OUT_UPDATE',
+
+    1101: 'LFSCK_NOTIFY',
+    1102: 'LFSCK_QUERY'
+}
+
+def translate_opcodes(opc_list):
+    for opc in opc_list:
+        try:
+            print "o%d \t= %s" % (opc, opcodes[opc])
+        except:
+            print "o%d \t= unknown" % opc
+
+
+if __name__ == "__main__":
+    description = "Maps one or more Lustre rpc opcodes to its string identifier."
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument('opcode', nargs="+", type=int,
+        help="list of one or more opcodes")
+
+    args = parser.parse_args()
+    translate_opcodes(args.opcode)
diff --git a/contrib/debug_tools/epython_scripts/rpc_stats.py b/contrib/debug_tools/epython_scripts/rpc_stats.py
new file mode 100644 (file)
index 0000000..c271182
--- /dev/null
@@ -0,0 +1,126 @@
+#!/usr/bin/env python
+
+"""
+Copyright (c) 2015-2018 Cray Inc. All Rights Reserved.
+Utility to display rpc stats for a client_obd
+"""
+
+from pykdump.API import *
+import argparse
+
+import lustrelib as ll
+from crashlib.input import toint
+
+description_short = 'Dumps the rpc stats for a given client_obd'
+
+OBD_HIST_MAX = 32
+
+def get_cli_obd(client_obd):
+    cli = None
+    try:
+        cli = readSU('struct client_obd', client_obd)
+    except Exception, e:
+        for dev in readSymbol('obd_devs'):
+            try:
+                if ll.obd2str(dev, 4) == client_obd:
+                    cli = dev.u.cli
+                    break
+            except Exception, e:
+                continue
+    return cli
+
+def pct(a, b):
+    return 100 * a / b if b else 0
+
+def lprocfs_oh_sum(oh):
+    ret = 0
+    for i in range(OBD_HIST_MAX):
+        ret += oh.oh_buckets[i]
+    return ret
+
+def osc_rpc_stats_seq_show(client_obd):
+    if not client_obd:
+        print "invalid input for field 'client_obd'"
+        return 1
+    cli = readSU('struct client_obd', client_obd)
+    print "read RPCs in flight:  %d" % cli.cl_r_in_flight
+    print "write RPCs in flight: %d" % cli.cl_w_in_flight
+    print "pending write pages:  %d" % cli.cl_pending_w_pages.counter
+    print "pending read pages:   %d" % cli.cl_pending_r_pages.counter
+
+    print "\n\t\t\tread\t\t\twrite"
+    print "pages per rpc         rpcs   % cum % |       rpcs   % cum %\n"
+
+    read_tot = lprocfs_oh_sum(cli.cl_read_page_hist)
+    write_tot = lprocfs_oh_sum(cli.cl_write_page_hist)
+
+    read_cum = 0
+    write_cum = 0
+    for i in range(OBD_HIST_MAX):
+        r = cli.cl_read_page_hist.oh_buckets[i]
+        w = cli.cl_write_page_hist.oh_buckets[i]
+
+        read_cum += r
+        write_cum += w
+        print "%d:\t\t%10d %3d %3d   | %10d %3d %3d" % \
+              (1 << i, r, pct(r, read_tot),
+              pct(read_cum, read_tot), w,
+              pct(w, write_tot),
+              pct(write_cum, write_tot))
+        if read_cum == read_tot and write_cum == write_tot:
+            break
+
+    print "\n\t\t\tread\t\t\twrite"
+    print "rpcs in flight        rpcs   % cum % |       rpcs   % cum %\n"
+
+    read_tot = lprocfs_oh_sum(cli.cl_read_rpc_hist)
+    write_tot = lprocfs_oh_sum(cli.cl_write_rpc_hist)
+
+    read_cum = 0
+    write_cum = 0
+    for i in range(OBD_HIST_MAX):
+        r = cli.cl_read_rpc_hist.oh_buckets[i]
+        w = cli.cl_write_rpc_hist.oh_buckets[i]
+
+        read_cum += r
+        write_cum += w
+        print "%d:\t\t%10d %3d %3d   | %10d %3d %3d" % \
+              (i, r, pct(r, read_tot),
+              pct(read_cum, read_tot), w,
+              pct(w, write_tot),
+              pct(write_cum, write_tot))
+        if read_cum == read_tot and write_cum == write_tot:
+            break
+
+    print "\n\t\t\tread\t\t\twrite"
+    print "offset                rpcs   % cum % |       rpcs   % cum %\n"
+
+    read_tot = lprocfs_oh_sum(cli.cl_read_offset_hist)
+    write_tot = lprocfs_oh_sum(cli.cl_write_offset_hist)
+
+    read_cum = 0
+    write_cum = 0
+    for i in range(OBD_HIST_MAX):
+        r = cli.cl_read_offset_hist.oh_buckets[i]
+        w = cli.cl_write_offset_hist.oh_buckets[i]
+
+        read_cum += r
+        write_cum += w
+        offset = 0 if i == 0 else 1 << (i - 1)
+        print "%d:      \t%10d %3d %3d   | %10d %3d %3d" % \
+              (offset, r, pct(r, read_tot),
+              pct(read_cum, read_tot), w,
+              pct(w, write_tot),
+              pct(write_cum, write_tot))
+        if read_cum == read_tot and write_cum == write_tot:
+            break
+    print
+    return 0
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description=description_short)
+    parser.add_argument("client_obd", nargs="?", default=[], type=toint,
+        help="address of client_obd structure whose stats will be dumped")
+    args = parser.parse_args()
+    cli = get_cli_obd(args.client_obd)
+    osc_rpc_stats_seq_show(cli)
diff --git a/contrib/debug_tools/epython_scripts/sbi_ptrs.py b/contrib/debug_tools/epython_scripts/sbi_ptrs.py
new file mode 100644 (file)
index 0000000..7872039
--- /dev/null
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+"""
+Copyright (c) 2019 Cray Inc. All Rights Reserved.
+Utility to display Lustre inode related struct pointers
+"""
+
+from pykdump.API import *
+import argparse
+from crashlib.input import toint
+
+description_short = "Prints Lustre structs associated with inode."
+
+def dump_inode(inode):
+     offset = member_offset('struct ll_inode_info', 'lli_vfs_inode')
+     lli = readSU('struct ll_inode_info', Addr(inode) - offset)
+     sb = readSU('struct super_block', inode.i_sb)
+     lsi = readSU('struct lustre_sb_info', sb.s_fs_info)
+     llsbi = readSU('struct ll_sb_info', lsi.lsi_llsbi)
+     print "%x %x %x %x %x" % (Addr(inode), lli, sb, lsi, llsbi)
+
+def dump_inode_list(inodes):
+    print "%-16s %-16s %-16s %-16s %-16s" % ("inode", "ll_inode_info",
+          "super_block", "lustre_sb_info", "ll_sb_info")
+    for addr in inodes:
+        dump_inode(readSU('struct inode', addr))
+
+if __name__ == "__main__":
+    description = "Prints ll_inode_info, super_block, \n" + \
+            "lustre_sb_info, and ll_sb_info pointers associated \n" + \
+           "with specified inode(s) \n"
+
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument('inode', nargs="+", type=toint,
+        help="list of one or more inodes")
+    args = parser.parse_args()
+
+    dump_inode_list(args.inode)
diff --git a/contrib/debug_tools/epython_scripts/uniqueStacktrace.py b/contrib/debug_tools/epython_scripts/uniqueStacktrace.py
new file mode 100644 (file)
index 0000000..415aee1
--- /dev/null
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+"""
+Copyright (c) 2015-2019 Cray Inc. All Rights Reserved.
+Utility to print unique stack traces
+"""
+
+import re
+import sys
+import StringIO
+import argparse
+from pykdump.API import exec_crash_command
+
+description_short = 'Print stack traces for each task.'
+
+# outer loop indentifies PIDs
+# inner loop looks for # until
+# another PID is found
+def sortInput(swapper, input):
+
+
+    ps = re.compile("^PID:\s+(\d+)\s+TASK:\s+([0-9A-Fa-f]+).*")
+    n = re.compile("^#")
+    swap = re.compile((".*\"swapper/[0-9]+\""))
+    info = dict()
+    PID = ""
+    STK = ""
+    tmp = ""
+
+    # Outer to check for PIDs
+    # this loop never breaks;
+    for line in input:
+        line = line.strip()
+
+        # Inner loop to check for # signs indicating lines we want.
+        # Having two loops allow for the PID and TSK to be associated
+        # with  a particular trace.
+        # This loop breaks if a new PID is found (meaning the end of the
+        # current trace) or if there are no more lines available
+        while True:
+            if ps.match(line): break;
+            line = line.strip()
+            if n.match(line):
+                line = line.split()
+                tmp += " ".join([line[2], line[3], line[4]])
+                if len(line) == 6 : tmp += " " + line[5]
+                tmp += '\n\t'
+            line = input.readline()
+            if not line: break
+
+        if tmp :
+            if tmp in info:
+                info[tmp].append((PID,STK))
+            else:
+                info[tmp] = [(PID,STK)]
+
+        m = ps.match(line)
+        if m:
+            PID, STK = m.group(1), m.group(2)
+            tmp = ""
+
+            # if it's swapper line move on
+            # this prevents entry into inner loop
+            if not swapper and swap.match(line):
+                line = input.readline()
+
+    sort = sorted(info.items(), key=lambda info: len(info[1]))
+    return sort
+
+def printRes(sort, printpid, printptr):
+    """
+    Prints out individual stack traces from lowest to highest.
+    """
+    for stack_trace, ptask_list in sort:
+        if printpid and not printptr:
+            print "PID: %s" % (', '.join(p[0] for p in ptask_list))
+        elif printpid and printptr:
+            print "PID, TSK: %s" % (', '.join(p[0] + ': ' + p[1] for p in ptask_list))
+        elif not printpid and printptr:
+            print "TSK: %s" % (', '.join(p[1] for p in ptask_list))
+        print "TASKS: %d" %(len(ptask_list))
+        print "\t%s" %(stack_trace)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("-p", "--print-pid",
+                        action="store_true", dest="printpid", default=False,
+                        help="Print PIDS corresponding to each ST")
+    parser.add_argument("-q", "--print-taskpntr",
+                        action="store_true", dest="printptr", default=False,
+                        help="Print the task pointers for each ST")
+    parser.add_argument("-s", "--swapper",
+                        action="store_true", dest="swapper", default=False,
+                        help="Print swapper processes")
+    parser.add_argument("task_select", metavar="task_selection", nargs="*",
+                        help="task selection argument (passed to foreach cmd)")
+
+    args = parser.parse_args()
+
+    com = "foreach {ts:s} bt".format(ts=" ".join(args.task_select))
+
+    result = exec_crash_command(com)
+    input = StringIO.StringIO(result)
+    printRes(sortInput(args.swapper, input), args.printpid, args.printptr)
+
+if __name__ == '__main__':
+    main()