Whamcloud - gitweb
LU-17744 ldiskfs: mballoc stats fixes
[fs/lustre-release.git] / contrib / debug_tools / epython_scripts / ptlrpc.py
1 #!/usr/bin/env python
2
3 """
4 Copyright 2015-2019 Cray Inc.  All Rights Reserved
5 Dumps the Lustre RPC queues for all ptlrpcd_XX threads.
6 """
7
8 from pykdump.API import *
9 import sys
10 import argparse
11 import os
12
13 import lustrelib as ll
14 from crashlib.input import toint
15
16 from traceback import print_exc
17
18 description_short = "Displays the RPC queues of the Lustre ptlrpcd daemons"
19
20 def print_separator(count):
21     s = ""
22     for idx in range(count):
23         s += "="
24     print(s)
25
26 def print_title(title):
27     if title:
28         print("\n" + title)
29         print("%-14s %-6s %-19s %-18s %-19s %-4s %-14s %-4s %-22s %-19s" \
30                % ("thread", "pid", "ptlrpc_request", "xid", "nid", "opc",
31                   "phase:flags", "R:W", "sent/deadline", "ptlrpc_body"))
32     print_separator(148)
33
34 def enum(**enums):
35     return type('Enum', (), enums)
36
37 REQ_Q = enum(rq_list=1, replay_list=2, set_chain=3, ctx_chain=4,
38              unreplied_list=5, timed_list=5, exp_list=6, hist_list=7)
39
40 RQ_LIST_LNKS = {
41     REQ_Q.rq_list:        ['struct ptlrpc_request', 'rq_list', 'rq_type'],
42     REQ_Q.replay_list:    ['struct ptlrpc_request', 'rq_replay_list', 'rq_type'],
43     REQ_Q.set_chain:      ['struct ptlrpc_cli_req', 'cr_set_chain', 'rq_cli'],
44     REQ_Q.ctx_chain:      ['struct ptlrpc_cli_req', 'cr_ctx_chain', 'rq_cli'],
45     REQ_Q.unreplied_list: ['struct ptlrpc_cli_req', 'cr_unreplied_list', 'rq_cli'],
46     REQ_Q.timed_list:     ['struct ptlrpc_srv_req', 'sr_timed_list', 'rq_srv'],
47     REQ_Q.exp_list:       ['struct ptlrpc_srv_req', 'sr_exp_list', 'rq_srv'],
48     REQ_Q.hist_list:      ['struct ptlrpc_srv_req', 'sr_hist_list', 'rq_srv']
49 }
50
51 STRUCT_IDX = 0
52 MEMBER_IDX = 1
53 UNION_IDX = 2
54
55 def size_round(val):
56     return ((val + 7) & (~0x7))
57
58 LUSTRE_MSG_MAGIC_V2 = 0x0BD00BD3
59
60 def get_ptlrpc_body(req):
61     msg = req.rq_reqmsg
62 #    msg = req.rq_repmsg
63     if not msg or msg == None:
64         return None
65
66     if msg.lm_magic != LUSTRE_MSG_MAGIC_V2:
67         return None
68
69     bufcount = msg.lm_bufcount
70     if bufcount < 1:
71         return None
72
73     buflen = msg.lm_buflens[0]
74     if buflen < getSizeOf('struct ptlrpc_body_v2'):
75         return None
76
77     offset = member_offset('struct lustre_msg_v2', 'lm_buflens')
78
79     buflen_size = getSizeOf("unsigned int")
80     offset += buflen_size * bufcount
81     offset = size_round(offset)
82     addr = Addr(msg) + offset
83     if addr == 0:
84         print("addr")
85         return None
86     return readSU('struct ptlrpc_body_v2', addr)
87
88 RQ_PHASE_NEW = 0xebc0de00
89 RQ_PHASE_RPC = 0xebc0de01
90 RQ_PHASE_BULK = 0xebc0de02
91 RQ_PHASE_INTERPRET = 0xebc0de03
92 RQ_PHASE_COMPLETE = 0xebc0de04
93 RQ_PHASE_UNREG_RPC =  0xebc0de05
94 RQ_PHASE_UNREG_BULK = 0xebc0de06
95 RQ_PHASE_UNDEFINED = 0xebc0de07
96
97 PHASES = {
98        RQ_PHASE_NEW: "NEW",
99        RQ_PHASE_RPC: "RPC",
100        RQ_PHASE_BULK: "BULK",
101        RQ_PHASE_INTERPRET: "NtrPrt",
102        RQ_PHASE_COMPLETE: "COMP",
103        RQ_PHASE_UNREG_RPC: "UNREG",
104        RQ_PHASE_UNREG_BULK: "UNBULK",
105        RQ_PHASE_UNDEFINED: "UNDEF"
106    }
107
108 FLAG_LEGEND = "\nFlag Legend:\n\n" + \
109          "I - rq_intr\tR - rq_replied\t\tE - rq_err\te - rq_net_err\tX - rq_timedout\tS - rq_resend\t\tT - rq_restart\n" + \
110          "P - rq_replay\tN - rq_no_resend\tW - rq_waiting\tC - rq_wait\tH - rq_hp\tM - rq_committed\tq - rq_req_unlinked\tu - rq_reply_unlinked\n"
111
112 def get_phase_flags(req):
113     phase = req.rq_phase
114     phasestr = PHASES.get(phase & 0xffffffff, "?%d" % phase)
115     return "%s:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s" % \
116            (phasestr,
117            "I" if req.rq_intr else "",
118            "R" if req.rq_replied else "",
119            "E" if req.rq_err else "",
120            "e" if req.rq_net_err else "",
121            "X" if req.rq_timedout else "",
122            "S" if req.rq_resend else "",
123            "T" if req.rq_restart else "",
124            "P" if req.rq_replay else "",
125            "N" if req.rq_no_resend else "",
126            "W" if req.rq_waiting else "",
127            "C" if req.rq_wait_ctx else "",
128            "H" if req.rq_hp else "",
129            "M" if req.rq_committed else "",
130            "q" if req.rq_req_unlinked else "",
131            "u" if req.rq_reply_unlinked else "")
132
133 LP_POISON = 0x5a5a5a5a5a5a5a5a
134
135 def print_one_request(sthread, req):
136     pb = get_ptlrpc_body(req)
137     status = -1
138     opc = -1
139     pbaddr = -1
140     if pb:
141         status = pb.pb_status
142         opc = pb.pb_opc
143         pbaddr = Addr(pb)
144
145     imp_invalid = 1
146     nid = "LNET_NID_ANY"
147     obd_name = "Invalid Import"
148     if req.rq_import and req.rq_import != 0xffffffffffffffff and \
149        req.rq_import != LP_POISON:
150         imp_invalid = req.rq_import.imp_invalid
151         obd_name = ll.obd2str(req.rq_import.imp_obd)
152
153     if not imp_invalid and req.rq_import.imp_connection:
154         nid = ll.nid2str(req.rq_import.imp_connection.c_peer.nid)
155     brw = "%1d:%1d" % (req.rq_bulk_read, req.rq_bulk_write)
156     rq_sent_dl = "%d/%d" % (req.rq_sent, req.rq_deadline)
157     print("%-14s %-6s 0x%-17x %-18d %-19s %-4d %-14s %-4s %-22s 0x%-17x" % \
158             (sthread,
159             status,
160             Addr(req),
161             req.rq_xid,
162             obd_name,
163             opc,
164             get_phase_flags(req),
165             brw,
166             rq_sent_dl,
167             pbaddr))
168
169 def print_request_list(sthread, lhdr, loffset):
170     try:
171         for reqlnk in readStructNext(lhdr, 'next'):
172             if reqlnk.next == Addr(lhdr):
173                 break
174             req = readSU('struct ptlrpc_request', reqlnk.next-loffset)
175             print_one_request(sthread, req)
176
177     except Exception as e:
178         print_exc()
179         return 1
180     return 0
181
182 # Find offset from start of ptlrpc_request struct of link field
183 # Adjusts for links that are contained in embedded union
184 def get_linkfld_offset(lfld):
185     container = RQ_LIST_LNKS[lfld][STRUCT_IDX]
186     linkfld   = RQ_LIST_LNKS[lfld][MEMBER_IDX]
187     req_union = RQ_LIST_LNKS[lfld][UNION_IDX]
188
189     off1 = member_offset('struct ptlrpc_request', req_union)
190     off2 = member_offset(container, linkfld)
191     return off1 + off2
192
193 def foreach_ptlrpcd_ctl(callback, *args):
194     pinfo_rpcds = readSymbol('ptlrpcds')
195     pinfo_count = readSymbol('ptlrpcds_num')
196
197     for idx in range(pinfo_count):
198         ptlrpcd = pinfo_rpcds[idx]
199         for jdx in range(ptlrpcd.pd_nthreads):
200             pd = ptlrpcd.pd_threads[jdx]
201             callback(pd, *args)
202     pd = readSymbol('ptlrpcd_rcv')
203     callback(pd, *args)
204
205 def get_daemon_listhdrs(pd, sent_rpcs, pend_rpcs):
206     sent_rpcs.append([pd.pc_name, pd.pc_set.set_requests])
207     pend_rpcs.append([pd.pc_name, pd.pc_set.set_new_requests])
208
209 def dump_list_of_lists(rpc_list, loffset):
210     for qinfo in rpc_list:
211         sthread, lhdr = qinfo
212         print_request_list(sthread, lhdr, loffset)
213
214 def dump_daemon_rpclists():
215     sent_rpcs = []
216     pend_rpcs = []
217
218     foreach_ptlrpcd_ctl(get_daemon_listhdrs, sent_rpcs, pend_rpcs)
219     offset = get_linkfld_offset(REQ_Q.set_chain)
220
221     print_title("Sent RPCS: ptlrpc_request_set.set_requests->")
222     dump_list_of_lists(sent_rpcs, offset)
223
224     print_title("Pending RPCS: ptlrpc_request_set.set_new_requests->")
225     dump_list_of_lists(pend_rpcs, offset)
226     print_title('')
227
228 def print_overview_entry(pd):
229     s = "%s:" % pd.pc_name
230     print("%-14s  ptlrpcd_ctl 0x%x   ptlrpc_request_set 0x%x" % \
231         (s, Addr(pd), pd.pc_set))
232
233 def dump_overview():
234     foreach_ptlrpcd_ctl(print_overview_entry)
235
236 def print_pcset_stats(pd):
237     if pd.pc_set.set_new_count.counter != 0 or \
238         pd.pc_set.set_remaining.counter != 0:
239         s = "%s:" %pd.pc_name
240         print("%-13s 0x%-18x %-4d %-4d %-6d" % \
241             (s, Addr(pd.pc_set),
242             pd.pc_set.set_refcount.counter,
243             pd.pc_set.set_new_count.counter,
244             pd.pc_set.set_remaining.counter))
245
246 def dump_pcsets():
247     print('%-14s %-19s %-4s %-4s %-6s' % \
248         ("thread", "ptlrpc_request_set", "ref", "new", "remain"))
249     print_separator(52)
250     foreach_ptlrpcd_ctl(print_pcset_stats)
251
252 def dump_one_rpc(addr):
253     print_title("Request")
254     req = readSU('struct ptlrpc_request', addr)
255     print_one_request('', req)
256
257 def dump_one_rpclist(addr, link_fld):
258     lhdr = readSU('struct list_head', addr)
259     d = vars(REQ_Q)
260     loffset = get_linkfld_offset(d[link_fld])
261
262     print_title("Request list at %s" % lhdr)
263     print_request_list('', lhdr, loffset)
264
265 def dump_rpcs_cmd(args):
266     if args.oflag:
267         dump_overview()
268         return
269     if args.sflag:
270         dump_pcsets()
271         return
272     if args.rpc_addr:
273         if args.link_fld:
274             dump_one_rpclist(args.rpc_addr[0], args.link_fld)
275         else:
276             # dump_one_rpc(args.rpc_addr[0])
277             dump_one_rpc(args.rpc_addr)
278         return
279     dump_daemon_rpclists()
280
281 if __name__ == "__main__":
282 #    usage = "$(prog)s [-o] [-s] [-l link_field] [addr]\n" + \
283     description = "" +\
284         "Displays lists of Lustre RPC requests. If no arguments are \n" +\
285         "specified, all rpcs in the sent and pending queues of the \n" +\
286         "ptlrpcd daemons are printed. If an address is specified, it \n" +\
287         "must be a pointer to either a ptlrpc_request or a list_head \n" +\
288         "struct. If the addr is a list_head, then a link_field must \n" +\
289         "also be provided. The link_field identifies the member of \n" +\
290         "the ptlrpc_request struct used to link the list together."
291
292     parser = argparse.ArgumentParser(
293         formatter_class=argparse.RawDescriptionHelpFormatter,
294         description=description, epilog=FLAG_LEGEND)
295     parser.add_argument("-o", dest="oflag", action="store_true",
296         help="print overview of ptlrpcd_XX threads with ptlrpcd_ctl " + \
297             "structs and the associated pc_set field")
298     parser.add_argument("-s", dest="sflag", action="store_true",
299         help="print rpc counts per ptlrpc_request_set")
300     parser.add_argument("-l", dest="link_fld", default="",
301         choices=['rq_list', 'replay_list', 'set_chain', 'ctx_chain',
302                  'unreplied_list', 'timed_list', 'exp_list', 'hist_list'],
303         help="name of link field in ptlrpc_request for list headed by addr")
304     parser.add_argument("rpc_addr", nargs="?", default=[], type=toint,
305         help="address of either single ptlrpc_request or list_head; list_head requires a -l argument")
306     args = parser.parse_args()
307
308     dump_rpcs_cmd(args)