Whamcloud - gitweb
4c437ab1fd027daecfc7d357a5cbb696b4a64493
[fs/lustre-release.git] / libcfs / libcfs / watchdog.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * libcfs/libcfs/watchdog.c
33  *
34  * Author: Jacob Berkman <jacob@clusterfs.com>
35  */
36
37 #define DEBUG_SUBSYSTEM S_LNET
38
39 #include <linux/kthread.h>
40 #include <libcfs/libcfs.h>
41 #include "tracefile.h"
42
43 #ifndef WITH_WATCHDOG
44 #define WITH_WATCHDOG
45 #endif
46
47 struct lc_watchdog {
48         spinlock_t              lcw_lock;       /* check or change lcw_list */
49         int                     lcw_refcount;   /* must hold lcw_pending_timers_lock */
50         struct timer_list       lcw_timer;      /* kernel timer */
51         struct list_head        lcw_list;       /* chain on pending list */
52         ktime_t                 lcw_last_touched;/* last touched stamp */
53         struct task_struct     *lcw_task;       /* owner task */
54         void                    (*lcw_callback)(pid_t, void *);
55         void                    *lcw_data;
56
57         pid_t                   lcw_pid;
58
59         enum {
60                 LC_WATCHDOG_DISABLED,
61                 LC_WATCHDOG_ENABLED,
62                 LC_WATCHDOG_EXPIRED
63         } lcw_state;
64 };
65
66 #ifdef WITH_WATCHDOG
67 /*
68  * The dispatcher will complete lcw_start_completion when it starts,
69  * and lcw_stop_completion when it exits.
70  * Wake lcw_event_waitq to signal timer callback dispatches.
71  */
72 static struct completion lcw_start_completion;
73 static struct completion  lcw_stop_completion;
74 static wait_queue_head_t lcw_event_waitq;
75
76 /*
77  * Set this and wake lcw_event_waitq to stop the dispatcher.
78  */
79 enum {
80         LCW_FLAG_STOP = 0
81 };
82 static unsigned long lcw_flags = 0;
83
84 /*
85  * Number of outstanding watchdogs.
86  * When it hits 1, we start the dispatcher.
87  * When it hits 0, we stop the dispatcher.
88  */
89 static __u32         lcw_refcount = 0;
90 static DEFINE_MUTEX(lcw_refcount_mutex);
91
92 /*
93  * List of timers that have fired that need their callbacks run by the
94  * dispatcher.
95  */
96 /* BH lock! */
97 static DEFINE_SPINLOCK(lcw_pending_timers_lock);
98 static struct list_head lcw_pending_timers = LIST_HEAD_INIT(lcw_pending_timers);
99
100 /* Last time a watchdog expired */
101 static time64_t lcw_last_watchdog_time;
102 static int lcw_recent_watchdog_count;
103
104 static void
105 lcw_dump(struct lc_watchdog *lcw)
106 {
107         ENTRY;
108         rcu_read_lock();
109        if (lcw->lcw_task == NULL) {
110                 LCONSOLE_WARN("Process %d was not found in the task "
111                               "list; watchdog callback may be incomplete\n",
112                               (int)lcw->lcw_pid);
113         } else {
114                 libcfs_debug_dumpstack(lcw->lcw_task);
115         }
116
117         rcu_read_unlock();
118         EXIT;
119 }
120
121 static void lcw_cb(cfs_timer_cb_arg_t data)
122 {
123         struct lc_watchdog *lcw = cfs_from_timer(lcw, data, lcw_timer);
124         ENTRY;
125
126         if (lcw->lcw_state != LC_WATCHDOG_ENABLED) {
127                 EXIT;
128                 return;
129         }
130
131         lcw->lcw_state = LC_WATCHDOG_EXPIRED;
132
133         spin_lock_bh(&lcw->lcw_lock);
134         LASSERT(list_empty(&lcw->lcw_list));
135
136         spin_lock_bh(&lcw_pending_timers_lock);
137         lcw->lcw_refcount++; /* +1 for pending list */
138         list_add(&lcw->lcw_list, &lcw_pending_timers);
139         wake_up(&lcw_event_waitq);
140
141         spin_unlock_bh(&lcw_pending_timers_lock);
142         spin_unlock_bh(&lcw->lcw_lock);
143         EXIT;
144 }
145
146 static int is_watchdog_fired(void)
147 {
148         int rc;
149
150         if (test_bit(LCW_FLAG_STOP, &lcw_flags))
151                 return 1;
152
153         spin_lock_bh(&lcw_pending_timers_lock);
154         rc = !list_empty(&lcw_pending_timers);
155         spin_unlock_bh(&lcw_pending_timers_lock);
156         return rc;
157 }
158
159 static void lcw_dump_stack(struct lc_watchdog *lcw)
160 {
161         time64_t current_time = ktime_get_seconds();
162         struct timespec64 timediff;
163         time64_t delta_time;
164
165         timediff = ktime_to_timespec64(ktime_sub(ktime_get(),
166                                        lcw->lcw_last_touched));
167
168         /* LU-9235: Don't dump stack if the thread is just touched. */
169         if (timediff.tv_sec == 0)
170                 return;
171
172         /*
173          * Check to see if we should throttle the watchdog timer to avoid
174          * too many dumps going to the console thus triggering an NMI.
175          */
176         delta_time = current_time - lcw_last_watchdog_time;
177         if (delta_time < libcfs_watchdog_ratelimit &&
178             lcw_recent_watchdog_count > 3) {
179                 LCONSOLE_WARN("Service thread pid %u was inactive for %lu.%.02lus. Watchdog stack traces are limited to 3 per %d seconds, skipping this one.\n",
180                               (int)lcw->lcw_pid,
181                               timediff.tv_sec,
182                               timediff.tv_nsec / (NSEC_PER_SEC / 100),
183                               libcfs_watchdog_ratelimit);
184         } else {
185                 if (delta_time < libcfs_watchdog_ratelimit) {
186                         lcw_recent_watchdog_count++;
187                 } else {
188                         memcpy(&lcw_last_watchdog_time, &current_time,
189                                sizeof(current_time));
190                         lcw_recent_watchdog_count = 0;
191                 }
192
193                 LCONSOLE_WARN("Service thread pid %u was inactive for %lu.%.02lus. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:\n",
194                               (int)lcw->lcw_pid,
195                               timediff.tv_sec,
196                               timediff.tv_nsec / (NSEC_PER_SEC / 100));
197                 lcw_dump(lcw);
198         }
199 }
200
201 /*
202  * Provided watchdog handlers
203  */
204
205 static void lc_watchdog_dumplog(pid_t pid, void *data)
206 {
207         libcfs_debug_dumplog_internal((void *)((uintptr_t)pid));
208 }
209
210 static int lcw_dispatch_main(void *data)
211 {
212         int                 rc = 0;
213         struct lc_watchdog *lcw;
214         struct list_head zombies = LIST_HEAD_INIT(zombies);
215
216         ENTRY;
217
218         complete(&lcw_start_completion);
219
220         while (1) {
221                 int dumplog = 1;
222
223                 rc = wait_event_interruptible(lcw_event_waitq,
224                                               is_watchdog_fired());
225                 CDEBUG(D_INFO, "Watchdog got woken up...\n");
226                 if (test_bit(LCW_FLAG_STOP, &lcw_flags)) {
227                         CDEBUG(D_INFO, "LCW_FLAG_STOP set, shutting down...\n");
228
229                         spin_lock_bh(&lcw_pending_timers_lock);
230                         rc = !list_empty(&lcw_pending_timers);
231                         spin_unlock_bh(&lcw_pending_timers_lock);
232                         if (rc) {
233                                 CERROR("pending timers list was not empty at "
234                                        "time of watchdog dispatch shutdown\n");
235                         }
236                         break;
237                 }
238
239                 spin_lock_bh(&lcw_pending_timers_lock);
240                 while (!list_empty(&lcw_pending_timers)) {
241                         int is_dumplog;
242
243                         lcw = list_entry(lcw_pending_timers.next,
244                                          struct lc_watchdog, lcw_list);
245                         /* +1 ref for callback to make sure lwc wouldn't be
246                          * deleted after releasing lcw_pending_timers_lock */
247                         lcw->lcw_refcount++;
248                         spin_unlock_bh(&lcw_pending_timers_lock);
249
250                         /* lock ordering */
251                         spin_lock_bh(&lcw->lcw_lock);
252                         spin_lock_bh(&lcw_pending_timers_lock);
253
254                         if (list_empty(&lcw->lcw_list)) {
255                                 /* already removed from pending list */
256                                 lcw->lcw_refcount--; /* -1 ref for callback */
257                                 if (lcw->lcw_refcount == 0)
258                                         list_add(&lcw->lcw_list, &zombies);
259                                 spin_unlock_bh(&lcw->lcw_lock);
260                                 /* still hold lcw_pending_timers_lock */
261                                 continue;
262                         }
263
264                         list_del_init(&lcw->lcw_list);
265                         lcw->lcw_refcount--; /* -1 ref for pending list */
266
267                         spin_unlock_bh(&lcw_pending_timers_lock);
268                         spin_unlock_bh(&lcw->lcw_lock);
269
270                         CDEBUG(D_INFO, "found lcw for pid %d\n",
271                                lcw->lcw_pid);
272                         lcw_dump_stack(lcw);
273
274                         is_dumplog = lcw->lcw_callback == lc_watchdog_dumplog;
275                         if (lcw->lcw_state != LC_WATCHDOG_DISABLED &&
276                             (dumplog || !is_dumplog)) {
277                                 lcw->lcw_callback(lcw->lcw_pid, lcw->lcw_data);
278                                 if (dumplog && is_dumplog)
279                                         dumplog = 0;
280                         }
281
282                         spin_lock_bh(&lcw_pending_timers_lock);
283                         lcw->lcw_refcount--; /* -1 ref for callback */
284                         if (lcw->lcw_refcount == 0)
285                                 list_add(&lcw->lcw_list, &zombies);
286                 }
287                 spin_unlock_bh(&lcw_pending_timers_lock);
288
289                 while (!list_empty(&zombies)) {
290                         lcw = list_entry(zombies.next,
291                                              struct lc_watchdog, lcw_list);
292                         list_del_init(&lcw->lcw_list);
293                         LIBCFS_FREE(lcw, sizeof(*lcw));
294                 }
295         }
296
297         complete(&lcw_stop_completion);
298
299         RETURN(rc);
300 }
301
302 static void lcw_dispatch_start(void)
303 {
304         struct task_struct *task;
305
306         ENTRY;
307         LASSERT(lcw_refcount == 1);
308
309         init_completion(&lcw_stop_completion);
310         init_completion(&lcw_start_completion);
311         init_waitqueue_head(&lcw_event_waitq);
312
313         CDEBUG(D_INFO, "starting dispatch thread\n");
314         task = kthread_run(lcw_dispatch_main, NULL, "lc_watchdogd");
315         if (IS_ERR(task)) {
316                 CERROR("error spawning watchdog dispatch thread: %ld\n",
317                         PTR_ERR(task));
318                 EXIT;
319                 return;
320         }
321         wait_for_completion(&lcw_start_completion);
322         CDEBUG(D_INFO, "watchdog dispatcher initialization complete.\n");
323
324         EXIT;
325 }
326
327 static void lcw_dispatch_stop(void)
328 {
329         ENTRY;
330         LASSERT(lcw_refcount == 0);
331
332         CDEBUG(D_INFO, "trying to stop watchdog dispatcher.\n");
333
334         set_bit(LCW_FLAG_STOP, &lcw_flags);
335         wake_up(&lcw_event_waitq);
336
337         wait_for_completion(&lcw_stop_completion);
338         clear_bit(LCW_FLAG_STOP, &lcw_flags);
339
340         CDEBUG(D_INFO, "watchdog dispatcher has shut down.\n");
341
342         EXIT;
343 }
344
345 struct lc_watchdog *lc_watchdog_add(int timeout,
346                                     void (*callback)(pid_t, void *),
347                                     void *data)
348 {
349         struct lc_watchdog *lcw = NULL;
350         ENTRY;
351
352         LIBCFS_ALLOC(lcw, sizeof(*lcw));
353         if (lcw == NULL) {
354                 CDEBUG(D_INFO, "Could not allocate new lc_watchdog\n");
355                 RETURN(ERR_PTR(-ENOMEM));
356         }
357
358         spin_lock_init(&lcw->lcw_lock);
359         lcw->lcw_refcount = 1; /* refcount for owner */
360         lcw->lcw_task     = current;
361         lcw->lcw_pid      = current_pid();
362         lcw->lcw_callback = (callback != NULL) ? callback : lc_watchdog_dumplog;
363         lcw->lcw_data     = data;
364         lcw->lcw_state    = LC_WATCHDOG_DISABLED;
365
366         INIT_LIST_HEAD(&lcw->lcw_list);
367         cfs_timer_setup(&lcw->lcw_timer, lcw_cb, (unsigned long)lcw, 0);
368
369         mutex_lock(&lcw_refcount_mutex);
370         if (++lcw_refcount == 1)
371                 lcw_dispatch_start();
372         mutex_unlock(&lcw_refcount_mutex);
373
374         /* Keep this working in case we enable them by default */
375         if (lcw->lcw_state == LC_WATCHDOG_ENABLED) {
376                 lcw->lcw_last_touched = ktime_get();
377                 mod_timer(&lcw->lcw_timer, cfs_time_seconds(timeout) +
378                           jiffies);
379         }
380
381         RETURN(lcw);
382 }
383 EXPORT_SYMBOL(lc_watchdog_add);
384
385 static void lcw_update_time(struct lc_watchdog *lcw, const char *message)
386 {
387         ktime_t newtime = ktime_get();
388
389         if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) {
390                 ktime_t lapse = ktime_sub(newtime, lcw->lcw_last_touched);
391                 struct timespec64 timediff;
392
393                 timediff = ktime_to_timespec64(lapse);
394                 LCONSOLE_WARN("Service thread pid %u %s after %lu.%.02lus. This indicates the system was overloaded (too many service threads, or there were not enough hardware resources).\n",
395                               lcw->lcw_pid, message,
396                               timediff.tv_sec,
397                               timediff.tv_nsec / (NSEC_PER_SEC / 100));
398         }
399         lcw->lcw_last_touched = newtime;
400 }
401
402 static void lc_watchdog_del_pending(struct lc_watchdog *lcw)
403 {
404         spin_lock_bh(&lcw->lcw_lock);
405         if (unlikely(!list_empty(&lcw->lcw_list))) {
406                 spin_lock_bh(&lcw_pending_timers_lock);
407                 list_del_init(&lcw->lcw_list);
408                 lcw->lcw_refcount--; /* -1 ref for pending list */
409                 spin_unlock_bh(&lcw_pending_timers_lock);
410         }
411
412         spin_unlock_bh(&lcw->lcw_lock);
413 }
414
415 void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout)
416 {
417         ENTRY;
418         LASSERT(lcw != NULL);
419
420         lc_watchdog_del_pending(lcw);
421
422         lcw_update_time(lcw, "resumed");
423
424         mod_timer(&lcw->lcw_timer, jiffies + cfs_time_seconds(timeout));
425         lcw->lcw_state = LC_WATCHDOG_ENABLED;
426
427         EXIT;
428 }
429 EXPORT_SYMBOL(lc_watchdog_touch);
430
431 void lc_watchdog_disable(struct lc_watchdog *lcw)
432 {
433         ENTRY;
434         LASSERT(lcw != NULL);
435
436         lc_watchdog_del_pending(lcw);
437
438         lcw_update_time(lcw, "completed");
439         lcw->lcw_state = LC_WATCHDOG_DISABLED;
440
441         EXIT;
442 }
443 EXPORT_SYMBOL(lc_watchdog_disable);
444
445 void lc_watchdog_delete(struct lc_watchdog *lcw)
446 {
447         int dead;
448
449         ENTRY;
450         LASSERT(lcw != NULL);
451
452         del_timer(&lcw->lcw_timer);
453
454         lcw_update_time(lcw, "stopped");
455
456         spin_lock_bh(&lcw->lcw_lock);
457         spin_lock_bh(&lcw_pending_timers_lock);
458         if (unlikely(!list_empty(&lcw->lcw_list))) {
459                 list_del_init(&lcw->lcw_list);
460                 lcw->lcw_refcount--; /* -1 ref for pending list */
461         }
462
463         lcw->lcw_refcount--; /* -1 ref for owner */
464         dead = lcw->lcw_refcount == 0;
465         spin_unlock_bh(&lcw_pending_timers_lock);
466         spin_unlock_bh(&lcw->lcw_lock);
467
468         if (dead)
469                 LIBCFS_FREE(lcw, sizeof(*lcw));
470
471         mutex_lock(&lcw_refcount_mutex);
472         if (--lcw_refcount == 0)
473                 lcw_dispatch_stop();
474         mutex_unlock(&lcw_refcount_mutex);
475
476         EXIT;
477 }
478 EXPORT_SYMBOL(lc_watchdog_delete);
479
480 #else   /* !defined(WITH_WATCHDOG) */
481
482 struct lc_watchdog *lc_watchdog_add(int timeout,
483                                     void (*callback)(pid_t pid, void *),
484                                     void *data)
485 {
486         static struct lc_watchdog      watchdog;
487         return &watchdog;
488 }
489 EXPORT_SYMBOL(lc_watchdog_add);
490
491 void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout)
492 {
493 }
494 EXPORT_SYMBOL(lc_watchdog_touch);
495
496 void lc_watchdog_disable(struct lc_watchdog *lcw)
497 {
498 }
499 EXPORT_SYMBOL(lc_watchdog_disable);
500
501 void lc_watchdog_delete(struct lc_watchdog *lcw)
502 {
503 }
504 EXPORT_SYMBOL(lc_watchdog_delete);
505
506 #endif