Whamcloud - gitweb
04e4da7cd2ad3d845e9ac039436f28145846e797
[fs/lustre-release.git] / libcfs / libcfs / watchdog.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, 2014, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * libcfs/libcfs/watchdog.c
33  *
34  * Author: Jacob Berkman <jacob@clusterfs.com>
35  */
36
37 #define DEBUG_SUBSYSTEM S_LNET
38
39 #include <linux/kthread.h>
40 #include <libcfs/libcfs.h>
41 #include "tracefile.h"
42
43 struct lc_watchdog {
44         spinlock_t              lcw_lock;       /* check or change lcw_list */
45         int                     lcw_refcount;   /* must hold lcw_pending_timers_lock */
46         struct timer_list       lcw_timer;      /* kernel timer */
47         struct list_head        lcw_list;       /* chain on pending list */
48         cfs_time_t              lcw_last_touched;/* last touched stamp */
49         struct task_struct     *lcw_task;       /* owner task */
50         void                    (*lcw_callback)(pid_t, void *);
51         void                    *lcw_data;
52
53         pid_t                   lcw_pid;
54
55         enum {
56                 LC_WATCHDOG_DISABLED,
57                 LC_WATCHDOG_ENABLED,
58                 LC_WATCHDOG_EXPIRED
59         } lcw_state;
60 };
61
62 #ifdef WITH_WATCHDOG
63 /*
64  * The dispatcher will complete lcw_start_completion when it starts,
65  * and lcw_stop_completion when it exits.
66  * Wake lcw_event_waitq to signal timer callback dispatches.
67  */
68 static struct completion lcw_start_completion;
69 static struct completion  lcw_stop_completion;
70 static wait_queue_head_t lcw_event_waitq;
71
72 /*
73  * Set this and wake lcw_event_waitq to stop the dispatcher.
74  */
75 enum {
76         LCW_FLAG_STOP = 0
77 };
78 static unsigned long lcw_flags = 0;
79
80 /*
81  * Number of outstanding watchdogs.
82  * When it hits 1, we start the dispatcher.
83  * When it hits 0, we stop the dispatcher.
84  */
85 static __u32         lcw_refcount = 0;
86 static DEFINE_MUTEX(lcw_refcount_mutex);
87
88 /*
89  * List of timers that have fired that need their callbacks run by the
90  * dispatcher.
91  */
92 /* BH lock! */
93 static DEFINE_SPINLOCK(lcw_pending_timers_lock);
94 static struct list_head lcw_pending_timers = LIST_HEAD_INIT(lcw_pending_timers);
95
96 /* Last time a watchdog expired */
97 static cfs_time_t lcw_last_watchdog_time;
98 static int lcw_recent_watchdog_count;
99
100 static void
101 lcw_dump(struct lc_watchdog *lcw)
102 {
103         ENTRY;
104         rcu_read_lock();
105        if (lcw->lcw_task == NULL) {
106                 LCONSOLE_WARN("Process %d was not found in the task "
107                               "list; watchdog callback may be incomplete\n",
108                               (int)lcw->lcw_pid);
109         } else {
110                 libcfs_debug_dumpstack(lcw->lcw_task);
111         }
112
113         rcu_read_unlock();
114         EXIT;
115 }
116
117 static void lcw_cb(uintptr_t data)
118 {
119         struct lc_watchdog *lcw = (struct lc_watchdog *)data;
120         ENTRY;
121
122         if (lcw->lcw_state != LC_WATCHDOG_ENABLED) {
123                 EXIT;
124                 return;
125         }
126
127         lcw->lcw_state = LC_WATCHDOG_EXPIRED;
128
129         spin_lock_bh(&lcw->lcw_lock);
130         LASSERT(list_empty(&lcw->lcw_list));
131
132         spin_lock_bh(&lcw_pending_timers_lock);
133         lcw->lcw_refcount++; /* +1 for pending list */
134         list_add(&lcw->lcw_list, &lcw_pending_timers);
135         wake_up(&lcw_event_waitq);
136
137         spin_unlock_bh(&lcw_pending_timers_lock);
138         spin_unlock_bh(&lcw->lcw_lock);
139         EXIT;
140 }
141
142 static int is_watchdog_fired(void)
143 {
144         int rc;
145
146         if (test_bit(LCW_FLAG_STOP, &lcw_flags))
147                 return 1;
148
149         spin_lock_bh(&lcw_pending_timers_lock);
150         rc = !list_empty(&lcw_pending_timers);
151         spin_unlock_bh(&lcw_pending_timers_lock);
152         return rc;
153 }
154
155 static void lcw_dump_stack(struct lc_watchdog *lcw)
156 {
157         cfs_time_t      current_time;
158         cfs_duration_t  delta_time;
159         struct timeval  timediff;
160
161         current_time = cfs_time_current();
162         delta_time = cfs_time_sub(current_time, lcw->lcw_last_touched);
163         cfs_duration_usec(delta_time, &timediff);
164
165         /*
166          * Check to see if we should throttle the watchdog timer to avoid
167          * too many dumps going to the console thus triggering an NMI.
168          */
169         delta_time = cfs_duration_sec(cfs_time_sub(current_time,
170                                                    lcw_last_watchdog_time));
171
172         if (delta_time < libcfs_watchdog_ratelimit &&
173             lcw_recent_watchdog_count > 3) {
174                 LCONSOLE_WARN("Service thread pid %u was inactive for "
175                               "%lu.%.02lus. Watchdog stack traces are limited "
176                               "to 3 per %d seconds, skipping this one.\n",
177                               (int)lcw->lcw_pid,
178                               timediff.tv_sec,
179                               timediff.tv_usec / 10000,
180                               libcfs_watchdog_ratelimit);
181         } else {
182                 if (delta_time < libcfs_watchdog_ratelimit) {
183                         lcw_recent_watchdog_count++;
184                 } else {
185                         memcpy(&lcw_last_watchdog_time, &current_time,
186                                sizeof(current_time));
187                         lcw_recent_watchdog_count = 0;
188                 }
189
190                 LCONSOLE_WARN("Service thread pid %u was inactive for "
191                               "%lu.%.02lus. The thread might be hung, or it "
192                               "might only be slow and will resume later. "
193                               "Dumping the stack trace for debugging purposes:"
194                               "\n",
195                               (int)lcw->lcw_pid,
196                               timediff.tv_sec,
197                               timediff.tv_usec / 10000);
198                 lcw_dump(lcw);
199         }
200 }
201
202 /*
203  * Provided watchdog handlers
204  */
205
206 static void lc_watchdog_dumplog(pid_t pid, void *data)
207 {
208         libcfs_debug_dumplog_internal((void *)((uintptr_t)pid));
209 }
210
211 static int lcw_dispatch_main(void *data)
212 {
213         int                 rc = 0;
214         struct lc_watchdog *lcw;
215         struct list_head zombies = LIST_HEAD_INIT(zombies);
216
217         ENTRY;
218
219         complete(&lcw_start_completion);
220
221         while (1) {
222                 int dumplog = 1;
223
224                 rc = wait_event_interruptible(lcw_event_waitq,
225                                               is_watchdog_fired());
226                 CDEBUG(D_INFO, "Watchdog got woken up...\n");
227                 if (test_bit(LCW_FLAG_STOP, &lcw_flags)) {
228                         CDEBUG(D_INFO, "LCW_FLAG_STOP set, shutting down...\n");
229
230                         spin_lock_bh(&lcw_pending_timers_lock);
231                         rc = !list_empty(&lcw_pending_timers);
232                         spin_unlock_bh(&lcw_pending_timers_lock);
233                         if (rc) {
234                                 CERROR("pending timers list was not empty at "
235                                        "time of watchdog dispatch shutdown\n");
236                         }
237                         break;
238                 }
239
240                 spin_lock_bh(&lcw_pending_timers_lock);
241                 while (!list_empty(&lcw_pending_timers)) {
242                         int is_dumplog;
243
244                         lcw = list_entry(lcw_pending_timers.next,
245                                          struct lc_watchdog, lcw_list);
246                         /* +1 ref for callback to make sure lwc wouldn't be
247                          * deleted after releasing lcw_pending_timers_lock */
248                         lcw->lcw_refcount++;
249                         spin_unlock_bh(&lcw_pending_timers_lock);
250
251                         /* lock ordering */
252                         spin_lock_bh(&lcw->lcw_lock);
253                         spin_lock_bh(&lcw_pending_timers_lock);
254
255                         if (list_empty(&lcw->lcw_list)) {
256                                 /* already removed from pending list */
257                                 lcw->lcw_refcount--; /* -1 ref for callback */
258                                 if (lcw->lcw_refcount == 0)
259                                         list_add(&lcw->lcw_list, &zombies);
260                                 spin_unlock_bh(&lcw->lcw_lock);
261                                 /* still hold lcw_pending_timers_lock */
262                                 continue;
263                         }
264
265                         list_del_init(&lcw->lcw_list);
266                         lcw->lcw_refcount--; /* -1 ref for pending list */
267
268                         spin_unlock_bh(&lcw_pending_timers_lock);
269                         spin_unlock_bh(&lcw->lcw_lock);
270
271                         CDEBUG(D_INFO, "found lcw for pid %d\n",
272                                lcw->lcw_pid);
273                         lcw_dump_stack(lcw);
274
275                         is_dumplog = lcw->lcw_callback == lc_watchdog_dumplog;
276                         if (lcw->lcw_state != LC_WATCHDOG_DISABLED &&
277                             (dumplog || !is_dumplog)) {
278                                 lcw->lcw_callback(lcw->lcw_pid, lcw->lcw_data);
279                                 if (dumplog && is_dumplog)
280                                         dumplog = 0;
281                         }
282
283                         spin_lock_bh(&lcw_pending_timers_lock);
284                         lcw->lcw_refcount--; /* -1 ref for callback */
285                         if (lcw->lcw_refcount == 0)
286                                 list_add(&lcw->lcw_list, &zombies);
287                 }
288                 spin_unlock_bh(&lcw_pending_timers_lock);
289
290                 while (!list_empty(&zombies)) {
291                         lcw = list_entry(zombies.next,
292                                              struct lc_watchdog, lcw_list);
293                         list_del_init(&lcw->lcw_list);
294                         LIBCFS_FREE(lcw, sizeof(*lcw));
295                 }
296         }
297
298         complete(&lcw_stop_completion);
299
300         RETURN(rc);
301 }
302
303 static void lcw_dispatch_start(void)
304 {
305         struct task_struct *task;
306
307         ENTRY;
308         LASSERT(lcw_refcount == 1);
309
310         init_completion(&lcw_stop_completion);
311         init_completion(&lcw_start_completion);
312         init_waitqueue_head(&lcw_event_waitq);
313
314         CDEBUG(D_INFO, "starting dispatch thread\n");
315         task = kthread_run(lcw_dispatch_main, NULL, "lc_watchdogd");
316         if (IS_ERR(task)) {
317                 CERROR("error spawning watchdog dispatch thread: %ld\n",
318                         PTR_ERR(task));
319                 EXIT;
320                 return;
321         }
322         wait_for_completion(&lcw_start_completion);
323         CDEBUG(D_INFO, "watchdog dispatcher initialization complete.\n");
324
325         EXIT;
326 }
327
328 static void lcw_dispatch_stop(void)
329 {
330         ENTRY;
331         LASSERT(lcw_refcount == 0);
332
333         CDEBUG(D_INFO, "trying to stop watchdog dispatcher.\n");
334
335         set_bit(LCW_FLAG_STOP, &lcw_flags);
336         wake_up(&lcw_event_waitq);
337
338         wait_for_completion(&lcw_stop_completion);
339
340         CDEBUG(D_INFO, "watchdog dispatcher has shut down.\n");
341
342         EXIT;
343 }
344
345 struct lc_watchdog *lc_watchdog_add(int timeout,
346                                     void (*callback)(pid_t, void *),
347                                     void *data)
348 {
349         struct lc_watchdog *lcw = NULL;
350         ENTRY;
351
352         LIBCFS_ALLOC(lcw, sizeof(*lcw));
353         if (lcw == NULL) {
354                 CDEBUG(D_INFO, "Could not allocate new lc_watchdog\n");
355                 RETURN(ERR_PTR(-ENOMEM));
356         }
357
358         spin_lock_init(&lcw->lcw_lock);
359         lcw->lcw_refcount = 1; /* refcount for owner */
360         lcw->lcw_task     = current;
361         lcw->lcw_pid      = current_pid();
362         lcw->lcw_callback = (callback != NULL) ? callback : lc_watchdog_dumplog;
363         lcw->lcw_data     = data;
364         lcw->lcw_state    = LC_WATCHDOG_DISABLED;
365
366         INIT_LIST_HEAD(&lcw->lcw_list);
367         cfs_timer_init(&lcw->lcw_timer, lcw_cb, lcw);
368
369         mutex_lock(&lcw_refcount_mutex);
370         if (++lcw_refcount == 1)
371                 lcw_dispatch_start();
372         mutex_unlock(&lcw_refcount_mutex);
373
374         /* Keep this working in case we enable them by default */
375         if (lcw->lcw_state == LC_WATCHDOG_ENABLED) {
376                 lcw->lcw_last_touched = cfs_time_current();
377                 cfs_timer_arm(&lcw->lcw_timer, cfs_time_seconds(timeout) +
378                               cfs_time_current());
379         }
380
381         RETURN(lcw);
382 }
383 EXPORT_SYMBOL(lc_watchdog_add);
384
385 static void lcw_update_time(struct lc_watchdog *lcw, const char *message)
386 {
387         cfs_time_t newtime = cfs_time_current();
388
389         if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) {
390                 struct timeval timediff;
391                 cfs_time_t delta_time = cfs_time_sub(newtime,
392                                                      lcw->lcw_last_touched);
393                 cfs_duration_usec(delta_time, &timediff);
394
395                 LCONSOLE_WARN("Service thread pid %u %s after %lu.%.02lus. "
396                               "This indicates the system was overloaded (too "
397                               "many service threads, or there were not enough "
398                               "hardware resources).\n",
399                               lcw->lcw_pid,
400                               message,
401                               timediff.tv_sec,
402                               timediff.tv_usec / 10000);
403         }
404         lcw->lcw_last_touched = newtime;
405 }
406
407 static void lc_watchdog_del_pending(struct lc_watchdog *lcw)
408 {
409         spin_lock_bh(&lcw->lcw_lock);
410         if (unlikely(!list_empty(&lcw->lcw_list))) {
411                 spin_lock_bh(&lcw_pending_timers_lock);
412                 list_del_init(&lcw->lcw_list);
413                 lcw->lcw_refcount--; /* -1 ref for pending list */
414                 spin_unlock_bh(&lcw_pending_timers_lock);
415         }
416
417         spin_unlock_bh(&lcw->lcw_lock);
418 }
419
420 void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout)
421 {
422         ENTRY;
423         LASSERT(lcw != NULL);
424
425         lc_watchdog_del_pending(lcw);
426
427         lcw_update_time(lcw, "resumed");
428
429         cfs_timer_arm(&lcw->lcw_timer, cfs_time_current() +
430                       cfs_time_seconds(timeout));
431         lcw->lcw_state = LC_WATCHDOG_ENABLED;
432
433         EXIT;
434 }
435 EXPORT_SYMBOL(lc_watchdog_touch);
436
437 void lc_watchdog_disable(struct lc_watchdog *lcw)
438 {
439         ENTRY;
440         LASSERT(lcw != NULL);
441
442         lc_watchdog_del_pending(lcw);
443
444         lcw_update_time(lcw, "completed");
445         lcw->lcw_state = LC_WATCHDOG_DISABLED;
446
447         EXIT;
448 }
449 EXPORT_SYMBOL(lc_watchdog_disable);
450
451 void lc_watchdog_delete(struct lc_watchdog *lcw)
452 {
453         int dead;
454
455         ENTRY;
456         LASSERT(lcw != NULL);
457
458         cfs_timer_disarm(&lcw->lcw_timer);
459
460         lcw_update_time(lcw, "stopped");
461
462         spin_lock_bh(&lcw->lcw_lock);
463         spin_lock_bh(&lcw_pending_timers_lock);
464         if (unlikely(!list_empty(&lcw->lcw_list))) {
465                 list_del_init(&lcw->lcw_list);
466                 lcw->lcw_refcount--; /* -1 ref for pending list */
467         }
468
469         lcw->lcw_refcount--; /* -1 ref for owner */
470         dead = lcw->lcw_refcount == 0;
471         spin_unlock_bh(&lcw_pending_timers_lock);
472         spin_unlock_bh(&lcw->lcw_lock);
473
474         if (dead)
475                 LIBCFS_FREE(lcw, sizeof(*lcw));
476
477         mutex_lock(&lcw_refcount_mutex);
478         if (--lcw_refcount == 0)
479                 lcw_dispatch_stop();
480         mutex_unlock(&lcw_refcount_mutex);
481
482         EXIT;
483 }
484 EXPORT_SYMBOL(lc_watchdog_delete);
485
486 #else   /* !defined(WITH_WATCHDOG) */
487
488 struct lc_watchdog *lc_watchdog_add(int timeout,
489                                     void (*callback)(pid_t pid, void *),
490                                     void *data)
491 {
492         static struct lc_watchdog      watchdog;
493         return &watchdog;
494 }
495 EXPORT_SYMBOL(lc_watchdog_add);
496
497 void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout)
498 {
499 }
500 EXPORT_SYMBOL(lc_watchdog_touch);
501
502 void lc_watchdog_disable(struct lc_watchdog *lcw)
503 {
504 }
505 EXPORT_SYMBOL(lc_watchdog_disable);
506
507 void lc_watchdog_delete(struct lc_watchdog *lcw)
508 {
509 }
510 EXPORT_SYMBOL(lc_watchdog_delete);
511
512 #endif