Whamcloud - gitweb
b9f488ff59102a46bffd55d71e8baffbcc1866da
[fs/lustre-release.git] / libcfs / libcfs / watchdog.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * libcfs/libcfs/watchdog.c
37  *
38  * Author: Jacob Berkman <jacob@clusterfs.com>
39  */
40
41 #define DEBUG_SUBSYSTEM S_LNET
42
43 #include <libcfs/libcfs.h>
44 #include "tracefile.h"
45
46 struct lc_watchdog {
47         cfs_spinlock_t  lcw_lock;     /* check or change lcw_list */
48         int             lcw_refcount; /* must hold lcw_pending_timers_lock */
49         cfs_timer_t     lcw_timer;    /* kernel timer */
50         cfs_list_t      lcw_list;     /* chain on pending list */
51         cfs_time_t      lcw_last_touched; /* last touched stamp */
52         cfs_task_t     *lcw_task;     /* owner task */
53         void          (*lcw_callback)(pid_t, void *);
54         void           *lcw_data;
55
56         pid_t           lcw_pid;
57
58         enum {
59                 LC_WATCHDOG_DISABLED,
60                 LC_WATCHDOG_ENABLED,
61                 LC_WATCHDOG_EXPIRED
62         } lcw_state;
63 };
64
65 #ifdef WITH_WATCHDOG
66 /*
67  * The dispatcher will complete lcw_start_completion when it starts,
68  * and lcw_stop_completion when it exits.
69  * Wake lcw_event_waitq to signal timer callback dispatches.
70  */
71 static cfs_completion_t lcw_start_completion;
72 static cfs_completion_t  lcw_stop_completion;
73 static cfs_waitq_t lcw_event_waitq;
74
75 /*
76  * Set this and wake lcw_event_waitq to stop the dispatcher.
77  */
78 enum {
79         LCW_FLAG_STOP = 0
80 };
81 static unsigned long lcw_flags = 0;
82
83 /*
84  * Number of outstanding watchdogs.
85  * When it hits 1, we start the dispatcher.
86  * When it hits 0, we stop the dispatcher.
87  */
88 static __u32         lcw_refcount = 0;
89 static CFS_DECLARE_MUTEX(lcw_refcount_sem);
90
91 /*
92  * List of timers that have fired that need their callbacks run by the
93  * dispatcher.
94  */
95 /* BH lock! */
96 static cfs_spinlock_t lcw_pending_timers_lock = CFS_SPIN_LOCK_UNLOCKED;
97 static cfs_list_t lcw_pending_timers = \
98         CFS_LIST_HEAD_INIT(lcw_pending_timers);
99
100 /* Last time a watchdog expired */
101 static cfs_time_t lcw_last_watchdog_time;
102 static int lcw_recent_watchdog_count;
103
104 static void
105 lcw_dump(struct lc_watchdog *lcw)
106 {
107         ENTRY;
108 #if defined(HAVE_TASKLIST_LOCK)
109         cfs_read_lock(&tasklist_lock);
110 #elif defined(HAVE_TASK_RCU)
111         rcu_read_lock();
112 #else
113         CERROR("unable to dump stack because of missing export\n");
114         RETURN_EXIT;
115 #endif
116        if (lcw->lcw_task == NULL) {
117                 LCONSOLE_WARN("Process " LPPID " was not found in the task "
118                               "list; watchdog callback may be incomplete\n",
119                               (int)lcw->lcw_pid);
120         } else {
121                 libcfs_debug_dumpstack(lcw->lcw_task);
122         }
123
124 #if defined(HAVE_TASKLIST_LOCK)
125         cfs_read_unlock(&tasklist_lock);
126 #elif defined(HAVE_TASK_RCU)
127         rcu_read_unlock();
128 #endif
129         EXIT;
130 }
131
132 static void lcw_cb(ulong_ptr_t data)
133 {
134         struct lc_watchdog *lcw = (struct lc_watchdog *)data;
135         ENTRY;
136
137         if (lcw->lcw_state != LC_WATCHDOG_ENABLED) {
138                 EXIT;
139                 return;
140         }
141
142         lcw->lcw_state = LC_WATCHDOG_EXPIRED;
143
144         cfs_spin_lock_bh(&lcw->lcw_lock);
145         LASSERT(cfs_list_empty(&lcw->lcw_list));
146
147         cfs_spin_lock_bh(&lcw_pending_timers_lock);
148         lcw->lcw_refcount++; /* +1 for pending list */
149         cfs_list_add(&lcw->lcw_list, &lcw_pending_timers);
150         cfs_waitq_signal(&lcw_event_waitq);
151
152         cfs_spin_unlock_bh(&lcw_pending_timers_lock);
153         cfs_spin_unlock_bh(&lcw->lcw_lock);
154         EXIT;
155 }
156
157 static int is_watchdog_fired(void)
158 {
159         int rc;
160
161         if (cfs_test_bit(LCW_FLAG_STOP, &lcw_flags))
162                 return 1;
163
164         cfs_spin_lock_bh(&lcw_pending_timers_lock);
165         rc = !cfs_list_empty(&lcw_pending_timers);
166         cfs_spin_unlock_bh(&lcw_pending_timers_lock);
167         return rc;
168 }
169
170 static void lcw_dump_stack(struct lc_watchdog *lcw)
171 {
172         cfs_time_t      current_time;
173         cfs_duration_t  delta_time;
174         struct timeval  timediff;
175
176         current_time = cfs_time_current();
177         delta_time = cfs_time_sub(current_time, lcw->lcw_last_touched);
178         cfs_duration_usec(delta_time, &timediff);
179
180         /*
181          * Check to see if we should throttle the watchdog timer to avoid
182          * too many dumps going to the console thus triggering an NMI.
183          */
184         delta_time = cfs_duration_sec(cfs_time_sub(current_time,
185                                                    lcw_last_watchdog_time));
186
187         if (delta_time < libcfs_watchdog_ratelimit &&
188             lcw_recent_watchdog_count > 3) {
189                 LCONSOLE_WARN("Service thread pid %u was inactive for "
190                               "%lu.%.02lus. Watchdog stack traces are limited "
191                               "to 3 per %d seconds, skipping this one.\n",
192                               (int)lcw->lcw_pid,
193                               timediff.tv_sec,
194                               timediff.tv_usec / 10000,
195                               libcfs_watchdog_ratelimit);
196         } else {
197                 if (delta_time < libcfs_watchdog_ratelimit) {
198                         lcw_recent_watchdog_count++;
199                 } else {
200                         memcpy(&lcw_last_watchdog_time, &current_time,
201                                sizeof(current_time));
202                         lcw_recent_watchdog_count = 0;
203                 }
204
205                 LCONSOLE_WARN("Service thread pid %u was inactive for "
206                               "%lu.%.02lus. The thread might be hung, or it "
207                               "might only be slow and will resume later. "
208                               "Dumping the stack trace for debugging purposes:"
209                               "\n",
210                               (int)lcw->lcw_pid,
211                               timediff.tv_sec,
212                               timediff.tv_usec / 10000);
213                 lcw_dump(lcw);
214         }
215 }
216
217 static int lcw_dispatch_main(void *data)
218 {
219         int                 rc = 0;
220         unsigned long       flags;
221         struct lc_watchdog *lcw;
222         CFS_LIST_HEAD      (zombies);
223
224         ENTRY;
225
226         cfs_daemonize("lc_watchdogd");
227
228         SIGNAL_MASK_LOCK(current, flags);
229         sigfillset(&current->blocked);
230         RECALC_SIGPENDING;
231         SIGNAL_MASK_UNLOCK(current, flags);
232
233         cfs_complete(&lcw_start_completion);
234
235         while (1) {
236                 int dumplog = 1;
237
238                 cfs_wait_event_interruptible(lcw_event_waitq,
239                                              is_watchdog_fired(), rc);
240                 CDEBUG(D_INFO, "Watchdog got woken up...\n");
241                 if (cfs_test_bit(LCW_FLAG_STOP, &lcw_flags)) {
242                         CDEBUG(D_INFO, "LCW_FLAG_STOP was set, shutting down...\n");
243
244                         cfs_spin_lock_bh(&lcw_pending_timers_lock);
245                         rc = !cfs_list_empty(&lcw_pending_timers);
246                         cfs_spin_unlock_bh(&lcw_pending_timers_lock);
247                         if (rc) {
248                                 CERROR("pending timers list was not empty at "
249                                        "time of watchdog dispatch shutdown\n");
250                         }
251                         break;
252                 }
253
254                 cfs_spin_lock_bh(&lcw_pending_timers_lock);
255                 while (!cfs_list_empty(&lcw_pending_timers)) {
256                         int is_dumplog;
257
258                         lcw = cfs_list_entry(lcw_pending_timers.next,
259                                              struct lc_watchdog, lcw_list);
260                         /* +1 ref for callback to make sure lwc wouldn't be
261                          * deleted after releasing lcw_pending_timers_lock */
262                         lcw->lcw_refcount++;
263                         cfs_spin_unlock_bh(&lcw_pending_timers_lock);
264
265                         /* lock ordering */
266                         cfs_spin_lock_bh(&lcw->lcw_lock);
267                         cfs_spin_lock_bh(&lcw_pending_timers_lock);
268
269                         if (cfs_list_empty(&lcw->lcw_list)) {
270                                 /* already removed from pending list */
271                                 lcw->lcw_refcount--; /* -1 ref for callback */
272                                 if (lcw->lcw_refcount == 0)
273                                         cfs_list_add(&lcw->lcw_list, &zombies);
274                                 cfs_spin_unlock_bh(&lcw->lcw_lock);
275                                 /* still hold lcw_pending_timers_lock */
276                                 continue;
277                         }
278
279                         cfs_list_del_init(&lcw->lcw_list);
280                         lcw->lcw_refcount--; /* -1 ref for pending list */
281
282                         cfs_spin_unlock_bh(&lcw_pending_timers_lock);
283                         cfs_spin_unlock_bh(&lcw->lcw_lock);
284
285                         CDEBUG(D_INFO, "found lcw for pid " LPPID "\n",
286                                lcw->lcw_pid);
287                         lcw_dump_stack(lcw);
288
289                         is_dumplog = lcw->lcw_callback == lc_watchdog_dumplog;
290                         if (lcw->lcw_state != LC_WATCHDOG_DISABLED &&
291                             (dumplog || !is_dumplog)) {
292                                 lcw->lcw_callback(lcw->lcw_pid, lcw->lcw_data);
293                                 if (dumplog && is_dumplog)
294                                         dumplog = 0;
295                         }
296
297                         cfs_spin_lock_bh(&lcw_pending_timers_lock);
298                         lcw->lcw_refcount--; /* -1 ref for callback */
299                         if (lcw->lcw_refcount == 0)
300                                 cfs_list_add(&lcw->lcw_list, &zombies);
301                 }
302                 cfs_spin_unlock_bh(&lcw_pending_timers_lock);
303
304                 while (!cfs_list_empty(&zombies)) {
305                         lcw = cfs_list_entry(lcw_pending_timers.next,
306                                          struct lc_watchdog, lcw_list);
307                         cfs_list_del(&lcw->lcw_list);
308                         LIBCFS_FREE(lcw, sizeof(*lcw));
309                 }
310         }
311
312         cfs_complete(&lcw_stop_completion);
313
314         RETURN(rc);
315 }
316
317 static void lcw_dispatch_start(void)
318 {
319         int rc;
320
321         ENTRY;
322         LASSERT(lcw_refcount == 1);
323
324         cfs_init_completion(&lcw_stop_completion);
325         cfs_init_completion(&lcw_start_completion);
326         cfs_waitq_init(&lcw_event_waitq);
327
328         CDEBUG(D_INFO, "starting dispatch thread\n");
329         rc = cfs_create_thread(lcw_dispatch_main, NULL, 0);
330         if (rc < 0) {
331                 CERROR("error spawning watchdog dispatch thread: %d\n", rc);
332                 EXIT;
333                 return;
334         }
335         cfs_wait_for_completion(&lcw_start_completion);
336         CDEBUG(D_INFO, "watchdog dispatcher initialization complete.\n");
337
338         EXIT;
339 }
340
341 static void lcw_dispatch_stop(void)
342 {
343         ENTRY;
344         LASSERT(lcw_refcount == 0);
345
346         CDEBUG(D_INFO, "trying to stop watchdog dispatcher.\n");
347
348         cfs_set_bit(LCW_FLAG_STOP, &lcw_flags);
349         cfs_waitq_signal(&lcw_event_waitq);
350
351         cfs_wait_for_completion(&lcw_stop_completion);
352
353         CDEBUG(D_INFO, "watchdog dispatcher has shut down.\n");
354
355         EXIT;
356 }
357
358 struct lc_watchdog *lc_watchdog_add(int timeout,
359                                     void (*callback)(pid_t, void *),
360                                     void *data)
361 {
362         struct lc_watchdog *lcw = NULL;
363         ENTRY;
364
365         LIBCFS_ALLOC(lcw, sizeof(*lcw));
366         if (lcw == NULL) {
367                 CDEBUG(D_INFO, "Could not allocate new lc_watchdog\n");
368                 RETURN(ERR_PTR(-ENOMEM));
369         }
370
371         cfs_spin_lock_init(&lcw->lcw_lock);
372         lcw->lcw_refcount = 1; /* refcount for owner */
373         lcw->lcw_task     = cfs_current();
374         lcw->lcw_pid      = cfs_curproc_pid();
375         lcw->lcw_callback = (callback != NULL) ? callback : lc_watchdog_dumplog;
376         lcw->lcw_data     = data;
377         lcw->lcw_state    = LC_WATCHDOG_DISABLED;
378
379         CFS_INIT_LIST_HEAD(&lcw->lcw_list);
380         cfs_timer_init(&lcw->lcw_timer, lcw_cb, lcw);
381
382         cfs_down(&lcw_refcount_sem);
383         if (++lcw_refcount == 1)
384                 lcw_dispatch_start();
385         cfs_up(&lcw_refcount_sem);
386
387         /* Keep this working in case we enable them by default */
388         if (lcw->lcw_state == LC_WATCHDOG_ENABLED) {
389                 lcw->lcw_last_touched = cfs_time_current();
390                 cfs_timer_arm(&lcw->lcw_timer, cfs_time_seconds(timeout) +
391                               cfs_time_current());
392         }
393
394         RETURN(lcw);
395 }
396 EXPORT_SYMBOL(lc_watchdog_add);
397
398 static void lcw_update_time(struct lc_watchdog *lcw, const char *message)
399 {
400         cfs_time_t newtime = cfs_time_current();;
401
402         if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) {
403                 struct timeval timediff;
404                 cfs_time_t delta_time = cfs_time_sub(newtime,
405                                                      lcw->lcw_last_touched);
406                 cfs_duration_usec(delta_time, &timediff);
407
408                 LCONSOLE_WARN("Service thread pid %u %s after %lu.%.02lus. "
409                               "This indicates the system was overloaded (too "
410                               "many service threads, or there were not enough "
411                               "hardware resources).\n",
412                               lcw->lcw_pid,
413                               message,
414                               timediff.tv_sec,
415                               timediff.tv_usec / 10000);
416         }
417         lcw->lcw_last_touched = newtime;
418 }
419
420 static void lc_watchdog_del_pending(struct lc_watchdog *lcw)
421 {
422         cfs_spin_lock_bh(&lcw->lcw_lock);
423         if (unlikely(!cfs_list_empty(&lcw->lcw_list))) {
424                 cfs_spin_lock_bh(&lcw_pending_timers_lock);
425                 cfs_list_del_init(&lcw->lcw_list);
426                 lcw->lcw_refcount--; /* -1 ref for pending list */
427                 cfs_spin_unlock_bh(&lcw_pending_timers_lock);
428         }
429
430         cfs_spin_unlock_bh(&lcw->lcw_lock);
431 }
432
433 void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout)
434 {
435         ENTRY;
436         LASSERT(lcw != NULL);
437
438         lc_watchdog_del_pending(lcw);
439
440         lcw_update_time(lcw, "resumed");
441         lcw->lcw_state = LC_WATCHDOG_ENABLED;
442
443         cfs_timer_arm(&lcw->lcw_timer, cfs_time_current() +
444                       cfs_time_seconds(timeout));
445
446         EXIT;
447 }
448 EXPORT_SYMBOL(lc_watchdog_touch);
449
450 void lc_watchdog_disable(struct lc_watchdog *lcw)
451 {
452         ENTRY;
453         LASSERT(lcw != NULL);
454
455         lc_watchdog_del_pending(lcw);
456
457         lcw_update_time(lcw, "completed");
458         lcw->lcw_state = LC_WATCHDOG_DISABLED;
459
460         EXIT;
461 }
462 EXPORT_SYMBOL(lc_watchdog_disable);
463
464 void lc_watchdog_delete(struct lc_watchdog *lcw)
465 {
466         int dead;
467
468         ENTRY;
469         LASSERT(lcw != NULL);
470
471         cfs_timer_disarm(&lcw->lcw_timer);
472
473         lcw_update_time(lcw, "stopped");
474
475         cfs_spin_lock_bh(&lcw->lcw_lock);
476         cfs_spin_lock_bh(&lcw_pending_timers_lock);
477         if (unlikely(!cfs_list_empty(&lcw->lcw_list))) {
478                 cfs_list_del_init(&lcw->lcw_list);
479                 lcw->lcw_refcount--; /* -1 ref for pending list */
480         }
481
482         lcw->lcw_refcount--; /* -1 ref for owner */
483         dead = lcw->lcw_refcount == 0;
484         cfs_spin_unlock_bh(&lcw_pending_timers_lock);
485         cfs_spin_unlock_bh(&lcw->lcw_lock);
486
487         if (dead)
488                 LIBCFS_FREE(lcw, sizeof(*lcw));
489
490         cfs_down(&lcw_refcount_sem);
491         if (--lcw_refcount == 0)
492                 lcw_dispatch_stop();
493         cfs_up(&lcw_refcount_sem);
494
495         EXIT;
496 }
497 EXPORT_SYMBOL(lc_watchdog_delete);
498
499 /*
500  * Provided watchdog handlers
501  */
502
503 void lc_watchdog_dumplog(pid_t pid, void *data)
504 {
505         libcfs_debug_dumplog_internal((void *)((long_ptr_t)pid));
506 }
507 EXPORT_SYMBOL(lc_watchdog_dumplog);
508
509 #else   /* !defined(WITH_WATCHDOG) */
510
511 struct lc_watchdog *lc_watchdog_add(int timeout,
512                                     void (*callback)(pid_t pid, void *),
513                                     void *data)
514 {
515         static struct lc_watchdog      watchdog;
516         return &watchdog;
517 }
518 EXPORT_SYMBOL(lc_watchdog_add);
519
520 void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout)
521 {
522 }
523 EXPORT_SYMBOL(lc_watchdog_touch);
524
525 void lc_watchdog_disable(struct lc_watchdog *lcw)
526 {
527 }
528 EXPORT_SYMBOL(lc_watchdog_disable);
529
530 void lc_watchdog_delete(struct lc_watchdog *lcw)
531 {
532 }
533 EXPORT_SYMBOL(lc_watchdog_delete);
534
535 #endif