Whamcloud - gitweb
Update copyrights on source files changed since 2010-02-15.
[fs/lustre-release.git] / libcfs / libcfs / watchdog.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * libcfs/libcfs/watchdog.c
37  *
38  * Author: Jacob Berkman <jacob@clusterfs.com>
39  */
40
41 #define DEBUG_SUBSYSTEM S_LNET
42
43 #include <libcfs/libcfs.h>
44 #include "tracefile.h"
45
46 struct lc_watchdog {
47         cfs_timer_t           lcw_timer; /* kernel timer */
48         cfs_list_t            lcw_list;
49         cfs_time_t            lcw_last_touched;
50         cfs_task_t           *lcw_task;
51         cfs_atomic_t          lcw_refcount;
52
53         void                (*lcw_callback)(pid_t, void *);
54         void                 *lcw_data;
55
56         pid_t                 lcw_pid;
57
58         enum {
59                 LC_WATCHDOG_DISABLED,
60                 LC_WATCHDOG_ENABLED,
61                 LC_WATCHDOG_EXPIRED
62         } lcw_state;
63 };
64
65 #ifdef WITH_WATCHDOG
66 /*
67  * The dispatcher will complete lcw_start_completion when it starts,
68  * and lcw_stop_completion when it exits.
69  * Wake lcw_event_waitq to signal timer callback dispatches.
70  */
71 static cfs_completion_t lcw_start_completion;
72 static cfs_completion_t  lcw_stop_completion;
73 static cfs_waitq_t lcw_event_waitq;
74
75 /*
76  * Set this and wake lcw_event_waitq to stop the dispatcher.
77  */
78 enum {
79         LCW_FLAG_STOP = 0
80 };
81 static unsigned long lcw_flags = 0;
82
83 /*
84  * Number of outstanding watchdogs.
85  * When it hits 1, we start the dispatcher.
86  * When it hits 0, we stop the distpatcher.
87  */
88 static __u32         lcw_refcount = 0;
89 static CFS_DECLARE_MUTEX(lcw_refcount_sem);
90
91 /*
92  * List of timers that have fired that need their callbacks run by the
93  * dispatcher.
94  */
95 /* BH lock! */
96 static cfs_spinlock_t lcw_pending_timers_lock = CFS_SPIN_LOCK_UNLOCKED;
97 static cfs_list_t lcw_pending_timers = \
98         CFS_LIST_HEAD_INIT(lcw_pending_timers);
99
100 /* Last time a watchdog expired */
101 static cfs_time_t lcw_last_watchdog_time;
102 static int lcw_recent_watchdog_count;
103
104 static void
105 lcw_dump(struct lc_watchdog *lcw)
106 {
107         ENTRY;
108 #if defined(HAVE_TASKLIST_LOCK)
109         cfs_read_lock(&tasklist_lock);
110 #elif defined(HAVE_TASK_RCU)
111         rcu_read_lock();
112 #else
113         CERROR("unable to dump stack because of missing export\n"); 
114         RETURN_EXIT;
115 #endif
116        if (lcw->lcw_task == NULL) { 
117                 LCONSOLE_WARN("Process " LPPID " was not found in the task "
118                               "list; watchdog callback may be incomplete\n",
119                               (int)lcw->lcw_pid);
120         } else {
121                 libcfs_debug_dumpstack(lcw->lcw_task);
122         }
123
124 #if defined(HAVE_TASKLIST_LOCK)
125         cfs_read_unlock(&tasklist_lock);
126 #elif defined(HAVE_TASK_RCU)
127         rcu_read_unlock();
128 #endif
129         EXIT;
130 }
131
132 static void lcw_cb(ulong_ptr_t data)
133 {
134         struct lc_watchdog *lcw = (struct lc_watchdog *)data;
135         ENTRY;
136
137         if (lcw->lcw_state != LC_WATCHDOG_ENABLED) {
138                 EXIT;
139                 return;
140         }
141
142         lcw->lcw_state = LC_WATCHDOG_EXPIRED;
143
144         cfs_spin_lock_bh(&lcw_pending_timers_lock);
145         cfs_list_add(&lcw->lcw_list, &lcw_pending_timers);
146         cfs_waitq_signal(&lcw_event_waitq);
147         cfs_spin_unlock_bh(&lcw_pending_timers_lock);
148
149         EXIT;
150 }
151
152 static inline void lcw_get(struct lc_watchdog *lcw)
153 {
154         cfs_atomic_inc(&lcw->lcw_refcount);
155 }
156
157 static inline void lcw_put(struct lc_watchdog *lcw)
158 {
159         if (cfs_atomic_dec_and_test(&lcw->lcw_refcount)) {
160                 LASSERT(cfs_list_empty(&lcw->lcw_list));
161                 LIBCFS_FREE(lcw, sizeof(*lcw));
162         }
163 }
164
165 static int is_watchdog_fired(void)
166 {
167         int rc;
168
169         if (cfs_test_bit(LCW_FLAG_STOP, &lcw_flags))
170                 return 1;
171
172         cfs_spin_lock_bh(&lcw_pending_timers_lock);
173         rc = !cfs_list_empty(&lcw_pending_timers);
174         cfs_spin_unlock_bh(&lcw_pending_timers_lock);
175         return rc;
176 }
177
178 static void lcw_dump_stack(struct lc_watchdog *lcw)
179 {
180         cfs_time_t      current_time;
181         cfs_duration_t  delta_time;
182         struct timeval  timediff;
183
184         current_time = cfs_time_current();
185         delta_time = cfs_time_sub(current_time, lcw->lcw_last_touched);
186         cfs_duration_usec(delta_time, &timediff);
187
188         /*
189          * Check to see if we should throttle the watchdog timer to avoid
190          * too many dumps going to the console thus triggering an NMI.
191          */
192         delta_time = cfs_duration_sec(cfs_time_sub(current_time,
193                                                    lcw_last_watchdog_time));
194
195         if (delta_time < libcfs_watchdog_ratelimit &&
196             lcw_recent_watchdog_count > 3) {
197                 LCONSOLE_WARN("Service thread pid %u was inactive for "
198                               "%lu.%.02lus. Watchdog stack traces are limited "
199                               "to 3 per %d seconds, skipping this one.\n",
200                               (int)lcw->lcw_pid,
201                               timediff.tv_sec,
202                               timediff.tv_usec / 10000,
203                               libcfs_watchdog_ratelimit);
204         } else {
205                 if (delta_time < libcfs_watchdog_ratelimit) {
206                         lcw_recent_watchdog_count++;
207                 } else {
208                         memcpy(&lcw_last_watchdog_time, &current_time,
209                                sizeof(current_time));
210                         lcw_recent_watchdog_count = 0;
211                 }
212
213                 LCONSOLE_WARN("Service thread pid %u was inactive for "
214                               "%lu.%.02lus. The thread might be hung, or it "
215                               "might only be slow and will resume later. "
216                               "Dumping the stack trace for debugging purposes:"
217                               "\n",
218                               (int)lcw->lcw_pid,
219                               timediff.tv_sec,
220                               timediff.tv_usec / 10000);
221                 lcw_dump(lcw);
222         }
223 }
224
225 static int lcw_dispatch_main(void *data)
226 {
227         int                 rc = 0;
228         unsigned long       flags;
229         struct lc_watchdog *lcw, *lcwcb;
230
231         ENTRY;
232
233         cfs_daemonize("lc_watchdogd");
234
235         SIGNAL_MASK_LOCK(current, flags);
236         sigfillset(&current->blocked);
237         RECALC_SIGPENDING;
238         SIGNAL_MASK_UNLOCK(current, flags);
239
240         cfs_complete(&lcw_start_completion);
241
242         while (1) {
243                 cfs_wait_event_interruptible(lcw_event_waitq,
244                                              is_watchdog_fired(), rc);
245                 CDEBUG(D_INFO, "Watchdog got woken up...\n");
246                 if (cfs_test_bit(LCW_FLAG_STOP, &lcw_flags)) {
247                         CDEBUG(D_INFO, "LCW_FLAG_STOP was set, shutting down...\n");
248
249                         cfs_spin_lock_bh(&lcw_pending_timers_lock);
250                         rc = !cfs_list_empty(&lcw_pending_timers);
251                         cfs_spin_unlock_bh(&lcw_pending_timers_lock);
252                         if (rc) {
253                                 CERROR("pending timers list was not empty at "
254                                        "time of watchdog dispatch shutdown\n");
255                         }
256                         break;
257                 }
258
259                 lcwcb = NULL;
260                 cfs_spin_lock_bh(&lcw_pending_timers_lock);
261                 while (!cfs_list_empty(&lcw_pending_timers)) {
262
263                         lcw = cfs_list_entry(lcw_pending_timers.next,
264                                          struct lc_watchdog,
265                                          lcw_list);
266                         lcw_get(lcw);
267                         cfs_list_del_init(&lcw->lcw_list);
268                         cfs_spin_unlock_bh(&lcw_pending_timers_lock);
269
270                         CDEBUG(D_INFO, "found lcw for pid " LPPID "\n",
271                                lcw->lcw_pid);
272                         lcw_dump_stack(lcw);
273
274                         if (lcwcb == NULL &&
275                             lcw->lcw_state != LC_WATCHDOG_DISABLED)
276                                 lcwcb = lcw;
277                         else
278                                 lcw_put(lcw);
279                         cfs_spin_lock_bh(&lcw_pending_timers_lock);
280                 }
281                 cfs_spin_unlock_bh(&lcw_pending_timers_lock);
282
283                 /* only do callback once for this batch of lcws */
284                 if (lcwcb != NULL) {
285                         lcwcb->lcw_callback(lcwcb->lcw_pid, lcwcb->lcw_data);
286                         lcw_put(lcwcb);
287                 }
288         }
289
290         cfs_complete(&lcw_stop_completion);
291
292         RETURN(rc);
293 }
294
295 static void lcw_dispatch_start(void)
296 {
297         int rc;
298
299         ENTRY;
300         LASSERT(lcw_refcount == 1);
301
302         cfs_init_completion(&lcw_stop_completion);
303         cfs_init_completion(&lcw_start_completion);
304         cfs_waitq_init(&lcw_event_waitq);
305
306         CDEBUG(D_INFO, "starting dispatch thread\n");
307         rc = cfs_kernel_thread(lcw_dispatch_main, NULL, 0);
308         if (rc < 0) {
309                 CERROR("error spawning watchdog dispatch thread: %d\n", rc);
310                 EXIT;
311                 return;
312         }
313         cfs_wait_for_completion(&lcw_start_completion);
314         CDEBUG(D_INFO, "watchdog dispatcher initialization complete.\n");
315
316         EXIT;
317 }
318
319 static void lcw_dispatch_stop(void)
320 {
321         ENTRY;
322         LASSERT(lcw_refcount == 0);
323
324         CDEBUG(D_INFO, "trying to stop watchdog dispatcher.\n");
325
326         cfs_set_bit(LCW_FLAG_STOP, &lcw_flags);
327         cfs_waitq_signal(&lcw_event_waitq);
328
329         cfs_wait_for_completion(&lcw_stop_completion);
330
331         CDEBUG(D_INFO, "watchdog dispatcher has shut down.\n");
332
333         EXIT;
334 }
335
336 struct lc_watchdog *lc_watchdog_add(int timeout,
337                                     void (*callback)(pid_t, void *),
338                                     void *data)
339 {
340         struct lc_watchdog *lcw = NULL;
341         ENTRY;
342
343         LIBCFS_ALLOC(lcw, sizeof(*lcw));
344         if (lcw == NULL) {
345                 CDEBUG(D_INFO, "Could not allocate new lc_watchdog\n");
346                 RETURN(ERR_PTR(-ENOMEM));
347         }
348
349         lcw->lcw_task     = cfs_current();
350         lcw->lcw_pid      = cfs_curproc_pid();
351         lcw->lcw_callback = (callback != NULL) ? callback : lc_watchdog_dumplog;
352         lcw->lcw_data     = data;
353         lcw->lcw_state    = LC_WATCHDOG_DISABLED;
354
355         CFS_INIT_LIST_HEAD(&lcw->lcw_list);
356         cfs_timer_init(&lcw->lcw_timer, lcw_cb, lcw);
357         cfs_atomic_set(&lcw->lcw_refcount, 1);
358
359         cfs_down(&lcw_refcount_sem);
360         if (++lcw_refcount == 1)
361                 lcw_dispatch_start();
362         cfs_up(&lcw_refcount_sem);
363
364         /* Keep this working in case we enable them by default */
365         if (lcw->lcw_state == LC_WATCHDOG_ENABLED) {
366                 lcw->lcw_last_touched = cfs_time_current();
367                 cfs_timer_arm(&lcw->lcw_timer, cfs_time_seconds(timeout) +
368                               cfs_time_current());
369         }
370
371         RETURN(lcw);
372 }
373 EXPORT_SYMBOL(lc_watchdog_add);
374
375 static void lcw_update_time(struct lc_watchdog *lcw, const char *message)
376 {
377         cfs_time_t newtime = cfs_time_current();;
378
379         if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) {
380                 struct timeval timediff;
381                 cfs_time_t delta_time = cfs_time_sub(newtime,
382                                                      lcw->lcw_last_touched);
383                 cfs_duration_usec(delta_time, &timediff);
384
385                 LCONSOLE_WARN("Service thread pid %u %s after %lu.%.02lus. "
386                               "This indicates the system was overloaded (too "
387                               "many service threads, or there were not enough "
388                               "hardware resources).\n",
389                               lcw->lcw_pid,
390                               message,
391                               timediff.tv_sec,
392                               timediff.tv_usec / 10000);
393         }
394         lcw->lcw_last_touched = newtime;
395 }
396
397 void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout)
398 {
399         ENTRY;
400         LASSERT(lcw != NULL);
401         LASSERT(cfs_atomic_read(&lcw->lcw_refcount) > 0);
402
403         cfs_spin_lock_bh(&lcw_pending_timers_lock);
404         cfs_list_del_init(&lcw->lcw_list);
405         cfs_spin_unlock_bh(&lcw_pending_timers_lock);
406
407         lcw_update_time(lcw, "resumed");
408         lcw->lcw_state = LC_WATCHDOG_ENABLED;
409
410         cfs_timer_arm(&lcw->lcw_timer, cfs_time_current() +
411                       cfs_time_seconds(timeout));
412
413         EXIT;
414 }
415 EXPORT_SYMBOL(lc_watchdog_touch);
416
417 void lc_watchdog_disable(struct lc_watchdog *lcw)
418 {
419         ENTRY;
420         LASSERT(lcw != NULL);
421         LASSERT(cfs_atomic_read(&lcw->lcw_refcount) > 0);
422
423         cfs_spin_lock_bh(&lcw_pending_timers_lock);
424         if (!cfs_list_empty(&lcw->lcw_list))
425                 cfs_list_del_init(&lcw->lcw_list);
426         cfs_spin_unlock_bh(&lcw_pending_timers_lock);
427
428         lcw_update_time(lcw, "completed");
429         lcw->lcw_state = LC_WATCHDOG_DISABLED;
430
431         EXIT;
432 }
433 EXPORT_SYMBOL(lc_watchdog_disable);
434
435 void lc_watchdog_delete(struct lc_watchdog *lcw)
436 {
437         ENTRY;
438         LASSERT(lcw != NULL);
439         LASSERT(cfs_atomic_read(&lcw->lcw_refcount) > 0);
440
441         cfs_timer_disarm(&lcw->lcw_timer);
442
443         lcw_update_time(lcw, "stopped");
444
445         cfs_spin_lock_bh(&lcw_pending_timers_lock);
446         if (!cfs_list_empty(&lcw->lcw_list))
447                 cfs_list_del_init(&lcw->lcw_list);
448         cfs_spin_unlock_bh(&lcw_pending_timers_lock);
449         lcw_put(lcw);
450
451         cfs_down(&lcw_refcount_sem);
452         if (--lcw_refcount == 0)
453                 lcw_dispatch_stop();
454         cfs_up(&lcw_refcount_sem);
455
456         EXIT;
457 }
458 EXPORT_SYMBOL(lc_watchdog_delete);
459
460 /*
461  * Provided watchdog handlers
462  */
463
464 void lc_watchdog_dumplog(pid_t pid, void *data)
465 {
466         libcfs_debug_dumplog_internal((void *)((long_ptr_t)pid));
467 }
468 EXPORT_SYMBOL(lc_watchdog_dumplog);
469
470 #else   /* !defined(WITH_WATCHDOG) */
471
472 struct lc_watchdog *lc_watchdog_add(int timeout,
473                                     void (*callback)(pid_t pid, void *),
474                                     void *data)
475 {
476         static struct lc_watchdog      watchdog;
477         return &watchdog;
478 }
479 EXPORT_SYMBOL(lc_watchdog_add);
480
481 void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout)
482 {
483 }
484 EXPORT_SYMBOL(lc_watchdog_touch);
485
486 void lc_watchdog_disable(struct lc_watchdog *lcw)
487 {
488 }
489 EXPORT_SYMBOL(lc_watchdog_disable);
490
491 void lc_watchdog_delete(struct lc_watchdog *lcw)
492 {
493 }
494 EXPORT_SYMBOL(lc_watchdog_delete);
495
496 #endif