Whamcloud - gitweb
LU-13676 tools: find unique stacks from sysrq-t
[fs/lustre-release.git] / contrib / debug_tools / crash-find-unique-traces.awk
1 #!/usr/bin/awk -f
2 # parse stack traces and identify interesting threads, avoiding the common
3 # threads that are just sleeping and not doing anything interesting.
4 #
5 # works with stack traces from "crash foreach bt", and kernel stacks from
6 # "sysrq-t" or "echo t > /proc/sysrq-trigger" with timestamps/kernel:
7 # stripped via "sed -e 's/.*kernel: //'"
8 BEGIN {
9         unique_nr=0
10 }
11 function add_unique_chain(chain) {
12         if (chain=="")
13                 return
14         for (i = 0; i < unique_nr; i++) {
15                 if (unique[i] == chain) {
16                         #print "appending["i"]: "command":"current_pid
17                         unique_num[i]++
18                         unique_pids[i] = unique_pids[i] command":"current_pid" "
19                         return 0
20                 }
21         }
22         #print "adding["unique_nr"]: "command":"current_pid":: "chain
23         unique_num[unique_nr]++
24         unique_pids[unique_nr] = unique_pids[unique_nr] command":"current_pid" "
25         unique[unique_nr++] = chain
26 }
27
28 function add_to_chain(tmp) {
29         # not currently processing a stack
30         if (collect_chain==0)
31                 return
32
33         # skip duplicate/common parts of every stack
34         if (tmp=="?")
35                 return
36         if (index(tmp, "__cond_resched"))
37                 return
38         if (index(tmp, "_cond_resched"))
39                 tmp="schedule"
40         if (index(tmp, "do_syscall_64"))
41                 return
42         if (index(tmp, "entry_SYSCALL"))
43                 return
44         if (index(tmp, "ret_from_fork"))
45                 return
46         if (index(tmp, "__schedule"))
47                 return
48         if (index(tmp, "schedule_timeout"))
49                 return
50         if (index(tmp, "schedule_hrtimeout_range"))
51                 return
52         if (index(tmp, "system_call_fastpath"))
53                 return
54         if (index(tmp, "SyS_select"))
55                 return
56         if (index(tmp, "SyS_pselect"))
57                 return
58         if (index(tmp, "sys_select"))
59                 return
60
61         # strip off function offset/length
62         sub("[+]0x[0-9a-f]+[/].*", "", tmp)
63         if (index(tmp, "kthread"))
64                 return
65
66         if (chain=="")
67                 chain=tmp
68         else
69                 chain=chain","tmp
70 }
71
72 function dump_chain(why)
73 {
74         if (collect_chain) {
75                 #printf "dumping(%s): %s\n", why, command_name
76                 add_unique_chain(chain)
77                 chain=""
78                 collect_chain=0
79                 skip_unknown=0
80         }
81 }
82
83 /Missed [0-9]* kernel messages/ {
84         if (collect_chain) {
85                 incomplete=1
86                 dump_chain("incomplete")
87         }
88 }
89
90 # > crash -s
91 # PID: 0      TASK: ffffffff82013480  CPU: 0   COMMAND: "swapper/0"
92 #  #0 [ffffffff82003e28] __schedule at ffffffff81610f2e
93 #  #1 [ffffffff82003ec8] schedule_idle at ffffffff8161181e
94 /PID: .*TASK: / {
95         dump_chain("PID")
96         collect_chain=1
97         for (i = 1; i < 12; i++) {
98                 if ($i == "PID:") {
99                         start_offset=i-1
100                         break
101                 }
102         }
103         current_pid=$(2+start_offset)
104         command=$(8+start_offset)
105         #print $0
106         #printf "P: offset=%u, pid=%u, command=%s\n",
107         #       start_offset, current_pid, command
108 }
109
110 #  #2 [ffffffff82003ed0] do_idle at ffffffff810cddaf
111 #  #3 [ffffffff82003ef0] cpu_startup_entry at ffffffff810cdfef
112 /#[0-9]* / {
113         add_to_chain($(3+start_offset))
114 }
115
116
117 # SysRq : Show State
118 #   task                        PC stack   pid father
119 # mdt04_084       R  running task        0 141145      2 0x00000080
120 # Call Trace:
121 #  [<ffffffffbc0d66a6>] __cond_resched+0x26/0x30
122 #  [<ffffffffbc77f4ca>] _cond_resched+0x3a/0x50
123 #  [<ffffffffbc223495>] kmem_cache_alloc+0x35/0x1f0
124 #  [<ffffffffc0dc363c>] LNetMDBind+0x7c/0x5e0 [lnet]
125 / R  running task / {
126         dump_chain("running")
127         collect_chain=1
128         #skip_unknown=1
129         for (i = 1; i < 12; i++) {
130                 if ($i == "R") {
131                         start_offset=i-2
132                         break
133                 }
134         }
135         current_pid=$(6+start_offset)
136         command=$(1+start_offset)
137         #print $0
138         #printf "%s: offset=%u, pid=%u, command=%s\n", $(2+start_offset),
139         #       start_offset, current_pid, command
140 }
141
142 # SysRq : Show State
143 #   task                        PC stack   pid father
144 # bash            S ffff8e3295fdb150     0 227559 227404 0x00000080
145 # worker          D ffff8abbbfb1ac80     0  4090      1 0x00000000
146 # Call Trace:
147 #  [<ffffffffbc77f229>] schedule+0x29/0x70
148 #  [<ffffffffbc0a07a6>] do_wait+0x1f6/0x260
149 # this regexp is x86_64-specific
150 / [SD] ffff[0-9a-f]* / {
151         dump_chain("sleeping")
152         collect_chain=1
153         for (i = 1; i < 12; i++) {
154                 if ($i == "S" || $i == "D") {
155                         start_offset=i-2
156                         break
157                 }
158         }
159         current_pid=$(5+start_offset)
160         command=$(1+start_offset)
161         #print $0
162         #printf "%s: offset=%u, pid=%s, command=%s\n", $(2+start_offset),
163         #       start_offset, current_pid, command
164 }
165
166 # mdt01_001: page allocation failure: order:4, mode:0x10c050
167 # CPU: 1 PID: 9374 Comm: mdt01_001 Kdump: loaded Tainted: G
168 # Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
169 # Call Trace:
170 #  [<ffffffffbc563021>] dump_stack+0x19/0x1b
171 #  [<ffffffffbbfbcbf0>] warn_alloc_failed+0x110/0x180
172 /CPU: [0-9]* PID: [0-9]* Comm: / {
173         dump_chain("dump")
174         collect_chain=1
175         for (i = 1; i < 8; i++) {
176                 if ($i == "CPU:")
177                         start_offset=i-1
178         }
179         current_pid=$(4+start_offset)
180         command=$(6+start_offset)
181 }
182
183 #  [<ffffffffbc296ba9>] ? ep_scan_ready_list.isra.7+0x1b9/0x1f0
184 #  [<ffffffffbc77e363>] schedule_hrtimeout_range+0x13/0x20
185 # this regexp is x86_64-specific
186 / \[<ffff[0-9a-f]*>\] / {
187         this_offset=2
188         if (skip_unknown==1) {
189                 if ($(start_offset+this_offset)=="?")
190                         this_offset++
191                 #print "adding @"this_offset": "$(start_offset+this_offset)
192         }
193         add_to_chain($(start_offset+this_offset))
194 }
195
196 /ret_from_fork/ {
197         dump_chain("ret")
198 }
199
200 /entry_SYSCALL/ {
201         dump_chain("syscall")
202 }
203
204 END {
205         if (incomplete)
206                 print "**** messages lost, stack traces may be incomplete ****"
207         i=0
208         # this list should be kept sorted to avoid duplicates
209         # "!}sort" on next line in Vim
210         not_interesting[i++]="default_idle,do_idle,cpu_startup_entry,secondary_startup_64"
211         not_interesting[i++]="default_idle,do_idle,cpu_startup_entry,start_kernel,secondary_startup_64"
212         not_interesting[i++]="sched_show_task,show_state_filter,sysrq_handle_showstate,__handle_sysrq,write_sysrq_trigger,proc_reg_write,vfs_write,SyS_write"
213         not_interesting[i++]="schedule"
214         not_interesting[i++]="schedule,__se_sys_rt_sigtimedwait"
215         not_interesting[i++]="schedule,cfs_wi_scheduler"
216         not_interesting[i++]="schedule,devtmpfsd"
217         not_interesting[i++]="schedule,distribute_txn_commit_thread"
218         not_interesting[i++]="schedule,do_nanosleep,hrtimer_nanosleep,SyS_nanosleep"
219         not_interesting[i++]="schedule,do_select"
220         not_interesting[i++]="schedule,do_select,kern_select"
221         not_interesting[i++]="schedule,do_sys_poll,SyS_poll"
222         not_interesting[i++]="schedule,do_sys_poll,SyS_ppoll"
223         not_interesting[i++]="schedule,do_sys_poll,__se_sys_poll"
224         not_interesting[i++]="schedule,do_wait,SyS_wait4"
225         not_interesting[i++]="schedule,do_wait,kernel_wait4,__se_sys_wait4"
226         not_interesting[i++]="schedule,ep_poll,SyS_epoll_wait"
227         not_interesting[i++]="schedule,ep_poll,SyS_epoll_wait,SyS_epoll_pwait"
228         not_interesting[i++]="schedule,ep_poll,do_epoll_wait,__x64_sys_epoll_wait"
229         not_interesting[i++]="schedule,expired_lock_main"
230         not_interesting[i++]="schedule,futex_wait_queue_me,futex_wait,do_futex,SyS_futex"
231         not_interesting[i++]="schedule,ib_fmr_cleanup_thread"
232         not_interesting[i++]="schedule,ipmi_thread"
233         not_interesting[i++]="schedule,kauditd_thread"
234         not_interesting[i++]="schedule,kcompactd"
235         not_interesting[i++]="schedule,khugepaged"
236         not_interesting[i++]="schedule,khvcd"
237         not_interesting[i++]="schedule,kiblnd_connd"
238         not_interesting[i++]="schedule,kiblnd_scheduler"
239         not_interesting[i++]="schedule,kjournald2"
240         not_interesting[i++]="schedule,ksm_scan_thread"
241         not_interesting[i++]="schedule,ksocknal_connd"
242         not_interesting[i++]="schedule,ksocknal_reaper"
243         not_interesting[i++]="schedule,ksocknal_scheduler"
244         not_interesting[i++]="schedule,kswapd"
245         not_interesting[i++]="schedule,kthread_worker_fn"
246         not_interesting[i++]="schedule,kthreadd"
247         not_interesting[i++]="schedule,lcw_dispatch_main"
248         not_interesting[i++]="schedule,ldlm_bl_thread_main"
249         not_interesting[i++]="schedule,ll_agl_thread"
250         not_interesting[i++]="schedule,ll_statahead_thread"
251         not_interesting[i++]="schedule,lnet_acceptor"
252         not_interesting[i++]="schedule,lnet_monitor_thread"
253         not_interesting[i++]="schedule,lnet_peer_discovery"
254         not_interesting[i++]="schedule,lnet_sock_accept,lnet_acceptor"
255         not_interesting[i++]="schedule,mdt_coordinator"
256         not_interesting[i++]="schedule,mgc_requeue_thread"
257         not_interesting[i++]="schedule,mgs_ir_notify"
258         not_interesting[i++]="schedule,n_tty_read,tty_read,vfs_read,SyS_read"
259         not_interesting[i++]="schedule,ofd_inconsistency_verification_main"
260         not_interesting[i++]="schedule,oom_reaper"
261         not_interesting[i++]="schedule,osp_precreate_thread"
262         not_interesting[i++]="schedule,osp_send_update_thread"
263         not_interesting[i++]="schedule,osp_sync_process_queues,llog_process_thread,llog_process_or_fork,llog_cat_process_cb,llog_process_thread,llog_process_or_fork,llog_cat_process_or_fork,llog_cat_process,osp_sync_thread"
264         not_interesting[i++]="schedule,ping_evictor_main"
265         not_interesting[i++]="schedule,pipe_wait,pipe_read,__vfs_read,vfs_read,ksys_read"
266         not_interesting[i++]="schedule,poll_do_select"
267         not_interesting[i++]="schedule,ptlrpc_hr_main"
268         not_interesting[i++]="schedule,ptlrpc_wait_event,ptlrpc_main"
269         not_interesting[i++]="schedule,ptlrpcd"
270         not_interesting[i++]="schedule,qmt_reba_thread"
271         not_interesting[i++]="schedule,qsd_upd_thread"
272         not_interesting[i++]="schedule,rcu_gp_kthread"
273         not_interesting[i++]="schedule,rescuer_thread"
274         not_interesting[i++]="schedule,sched_show_task,show_state_filter,sysrq_handle_showstate,__handle_sysrq,write_sysrq_trigger,proc_reg_write,vfs_write,SyS_write"
275         not_interesting[i++]="schedule,scsi_error_handler"
276         not_interesting[i++]="schedule,smpboot_thread_fn"
277         not_interesting[i++]="schedule,sys_pause"
278         not_interesting[i++]="schedule,wait_for_common,lnet_monitor_thread"
279         not_interesting[i++]="schedule,watchdog"
280         not_interesting[i++]="schedule,worker_thread"
281
282         for (i = 0; i < unique_nr; i++) {
283                 dump=1
284                 for (j in not_interesting) {
285 #                       if (index(unique[i], not_interesting[j])) {
286 #                               dump=0
287 #                               break
288 #                       }
289                         if (unique[i]==not_interesting[j]) {
290                                 dump=0
291                                 break
292                         }
293                 }
294                 if (dump)
295                         printf("%s\n\tPIDs(%d): %s\n\n",
296                                unique[i],unique_num[i],unique_pids[i])
297         }
298
299 }
300