Whamcloud - gitweb
LU-9859 libcfs: delete libcfs/linux/libcfs.h
[fs/lustre-release.git] / libcfs / libcfs / watchdog.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * libcfs/libcfs/watchdog.c
33  *
34  * Author: Jacob Berkman <jacob@clusterfs.com>
35  */
36
37 #define DEBUG_SUBSYSTEM S_LNET
38
39 #include <linux/kthread.h>
40 #include <libcfs/libcfs.h>
41 #include "tracefile.h"
42
43 #ifndef WITH_WATCHDOG
44 #define WITH_WATCHDOG
45 #endif
46
47 struct lc_watchdog {
48         spinlock_t              lcw_lock;       /* check or change lcw_list */
49         int                     lcw_refcount;   /* must hold lcw_pending_timers_lock */
50         struct timer_list       lcw_timer;      /* kernel timer */
51         struct list_head        lcw_list;       /* chain on pending list */
52         ktime_t                 lcw_last_touched;/* last touched stamp */
53         struct task_struct     *lcw_task;       /* owner task */
54         void                    (*lcw_callback)(pid_t, void *);
55         void                    *lcw_data;
56
57         pid_t                   lcw_pid;
58
59         enum {
60                 LC_WATCHDOG_DISABLED,
61                 LC_WATCHDOG_ENABLED,
62                 LC_WATCHDOG_EXPIRED
63         } lcw_state;
64 };
65
66 #ifdef WITH_WATCHDOG
67 /*
68  * The dispatcher will complete lcw_start_completion when it starts,
69  * and lcw_stop_completion when it exits.
70  * Wake lcw_event_waitq to signal timer callback dispatches.
71  */
72 static struct completion lcw_start_completion;
73 static struct completion  lcw_stop_completion;
74 static wait_queue_head_t lcw_event_waitq;
75
76 /*
77  * Set this and wake lcw_event_waitq to stop the dispatcher.
78  */
79 enum {
80         LCW_FLAG_STOP = 0
81 };
82 static unsigned long lcw_flags = 0;
83
84 /*
85  * Number of outstanding watchdogs.
86  * When it hits 1, we start the dispatcher.
87  * When it hits 0, we stop the dispatcher.
88  */
89 static __u32         lcw_refcount = 0;
90 static DEFINE_MUTEX(lcw_refcount_mutex);
91
92 /*
93  * List of timers that have fired that need their callbacks run by the
94  * dispatcher.
95  */
96 /* BH lock! */
97 static DEFINE_SPINLOCK(lcw_pending_timers_lock);
98 static struct list_head lcw_pending_timers = LIST_HEAD_INIT(lcw_pending_timers);
99
100 /* Last time a watchdog expired */
101 static time64_t lcw_last_watchdog_time;
102 static int lcw_recent_watchdog_count;
103
104 static void
105 lcw_dump(struct lc_watchdog *lcw)
106 {
107         ENTRY;
108         rcu_read_lock();
109        if (lcw->lcw_task == NULL) {
110                 LCONSOLE_WARN("Process %d was not found in the task "
111                               "list; watchdog callback may be incomplete\n",
112                               (int)lcw->lcw_pid);
113         } else {
114                 libcfs_debug_dumpstack(lcw->lcw_task);
115         }
116
117         rcu_read_unlock();
118         EXIT;
119 }
120
121 static void lcw_cb(uintptr_t data)
122 {
123         struct lc_watchdog *lcw = (struct lc_watchdog *)data;
124         ENTRY;
125
126         if (lcw->lcw_state != LC_WATCHDOG_ENABLED) {
127                 EXIT;
128                 return;
129         }
130
131         lcw->lcw_state = LC_WATCHDOG_EXPIRED;
132
133         spin_lock_bh(&lcw->lcw_lock);
134         LASSERT(list_empty(&lcw->lcw_list));
135
136         spin_lock_bh(&lcw_pending_timers_lock);
137         lcw->lcw_refcount++; /* +1 for pending list */
138         list_add(&lcw->lcw_list, &lcw_pending_timers);
139         wake_up(&lcw_event_waitq);
140
141         spin_unlock_bh(&lcw_pending_timers_lock);
142         spin_unlock_bh(&lcw->lcw_lock);
143         EXIT;
144 }
145
146 static int is_watchdog_fired(void)
147 {
148         int rc;
149
150         if (test_bit(LCW_FLAG_STOP, &lcw_flags))
151                 return 1;
152
153         spin_lock_bh(&lcw_pending_timers_lock);
154         rc = !list_empty(&lcw_pending_timers);
155         spin_unlock_bh(&lcw_pending_timers_lock);
156         return rc;
157 }
158
159 static void lcw_dump_stack(struct lc_watchdog *lcw)
160 {
161         time64_t current_time = ktime_get_seconds();
162         struct timespec64 timediff;
163         time64_t delta_time;
164
165         timediff = ktime_to_timespec64(ktime_sub(ktime_get(),
166                                        lcw->lcw_last_touched));
167
168         /* LU-9235: Don't dump stack if the thread is just touched. */
169         if (timediff.tv_sec == 0)
170                 return;
171
172         /*
173          * Check to see if we should throttle the watchdog timer to avoid
174          * too many dumps going to the console thus triggering an NMI.
175          */
176         delta_time = current_time - lcw_last_watchdog_time;
177         if (delta_time < libcfs_watchdog_ratelimit &&
178             lcw_recent_watchdog_count > 3) {
179                 LCONSOLE_WARN("Service thread pid %u was inactive for %lu.%.02lus. Watchdog stack traces are limited to 3 per %d seconds, skipping this one.\n",
180                               (int)lcw->lcw_pid,
181                               timediff.tv_sec,
182                               timediff.tv_nsec / (NSEC_PER_SEC / 100),
183                               libcfs_watchdog_ratelimit);
184         } else {
185                 if (delta_time < libcfs_watchdog_ratelimit) {
186                         lcw_recent_watchdog_count++;
187                 } else {
188                         memcpy(&lcw_last_watchdog_time, &current_time,
189                                sizeof(current_time));
190                         lcw_recent_watchdog_count = 0;
191                 }
192
193                 LCONSOLE_WARN("Service thread pid %u was inactive for %lu.%.02lus. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:\n",
194                               (int)lcw->lcw_pid,
195                               timediff.tv_sec,
196                               timediff.tv_nsec / (NSEC_PER_SEC / 100));
197                 lcw_dump(lcw);
198         }
199 }
200
201 /*
202  * Provided watchdog handlers
203  */
204
205 static void lc_watchdog_dumplog(pid_t pid, void *data)
206 {
207         libcfs_debug_dumplog_internal((void *)((uintptr_t)pid));
208 }
209
210 static int lcw_dispatch_main(void *data)
211 {
212         int                 rc = 0;
213         struct lc_watchdog *lcw;
214         struct list_head zombies = LIST_HEAD_INIT(zombies);
215
216         ENTRY;
217
218         complete(&lcw_start_completion);
219
220         while (1) {
221                 int dumplog = 1;
222
223                 rc = wait_event_interruptible(lcw_event_waitq,
224                                               is_watchdog_fired());
225                 CDEBUG(D_INFO, "Watchdog got woken up...\n");
226                 if (test_bit(LCW_FLAG_STOP, &lcw_flags)) {
227                         CDEBUG(D_INFO, "LCW_FLAG_STOP set, shutting down...\n");
228
229                         spin_lock_bh(&lcw_pending_timers_lock);
230                         rc = !list_empty(&lcw_pending_timers);
231                         spin_unlock_bh(&lcw_pending_timers_lock);
232                         if (rc) {
233                                 CERROR("pending timers list was not empty at "
234                                        "time of watchdog dispatch shutdown\n");
235                         }
236                         break;
237                 }
238
239                 spin_lock_bh(&lcw_pending_timers_lock);
240                 while (!list_empty(&lcw_pending_timers)) {
241                         int is_dumplog;
242
243                         lcw = list_entry(lcw_pending_timers.next,
244                                          struct lc_watchdog, lcw_list);
245                         /* +1 ref for callback to make sure lwc wouldn't be
246                          * deleted after releasing lcw_pending_timers_lock */
247                         lcw->lcw_refcount++;
248                         spin_unlock_bh(&lcw_pending_timers_lock);
249
250                         /* lock ordering */
251                         spin_lock_bh(&lcw->lcw_lock);
252                         spin_lock_bh(&lcw_pending_timers_lock);
253
254                         if (list_empty(&lcw->lcw_list)) {
255                                 /* already removed from pending list */
256                                 lcw->lcw_refcount--; /* -1 ref for callback */
257                                 if (lcw->lcw_refcount == 0)
258                                         list_add(&lcw->lcw_list, &zombies);
259                                 spin_unlock_bh(&lcw->lcw_lock);
260                                 /* still hold lcw_pending_timers_lock */
261                                 continue;
262                         }
263
264                         list_del_init(&lcw->lcw_list);
265                         lcw->lcw_refcount--; /* -1 ref for pending list */
266
267                         spin_unlock_bh(&lcw_pending_timers_lock);
268                         spin_unlock_bh(&lcw->lcw_lock);
269
270                         CDEBUG(D_INFO, "found lcw for pid %d\n",
271                                lcw->lcw_pid);
272                         lcw_dump_stack(lcw);
273
274                         is_dumplog = lcw->lcw_callback == lc_watchdog_dumplog;
275                         if (lcw->lcw_state != LC_WATCHDOG_DISABLED &&
276                             (dumplog || !is_dumplog)) {
277                                 lcw->lcw_callback(lcw->lcw_pid, lcw->lcw_data);
278                                 if (dumplog && is_dumplog)
279                                         dumplog = 0;
280                         }
281
282                         spin_lock_bh(&lcw_pending_timers_lock);
283                         lcw->lcw_refcount--; /* -1 ref for callback */
284                         if (lcw->lcw_refcount == 0)
285                                 list_add(&lcw->lcw_list, &zombies);
286                 }
287                 spin_unlock_bh(&lcw_pending_timers_lock);
288
289                 while (!list_empty(&zombies)) {
290                         lcw = list_entry(zombies.next,
291                                              struct lc_watchdog, lcw_list);
292                         list_del_init(&lcw->lcw_list);
293                         LIBCFS_FREE(lcw, sizeof(*lcw));
294                 }
295         }
296
297         complete(&lcw_stop_completion);
298
299         RETURN(rc);
300 }
301
302 static void lcw_dispatch_start(void)
303 {
304         struct task_struct *task;
305
306         ENTRY;
307         LASSERT(lcw_refcount == 1);
308
309         init_completion(&lcw_stop_completion);
310         init_completion(&lcw_start_completion);
311         init_waitqueue_head(&lcw_event_waitq);
312
313         CDEBUG(D_INFO, "starting dispatch thread\n");
314         task = kthread_run(lcw_dispatch_main, NULL, "lc_watchdogd");
315         if (IS_ERR(task)) {
316                 CERROR("error spawning watchdog dispatch thread: %ld\n",
317                         PTR_ERR(task));
318                 EXIT;
319                 return;
320         }
321         wait_for_completion(&lcw_start_completion);
322         CDEBUG(D_INFO, "watchdog dispatcher initialization complete.\n");
323
324         EXIT;
325 }
326
327 static void lcw_dispatch_stop(void)
328 {
329         ENTRY;
330         LASSERT(lcw_refcount == 0);
331
332         CDEBUG(D_INFO, "trying to stop watchdog dispatcher.\n");
333
334         set_bit(LCW_FLAG_STOP, &lcw_flags);
335         wake_up(&lcw_event_waitq);
336
337         wait_for_completion(&lcw_stop_completion);
338
339         CDEBUG(D_INFO, "watchdog dispatcher has shut down.\n");
340
341         EXIT;
342 }
343
344 struct lc_watchdog *lc_watchdog_add(int timeout,
345                                     void (*callback)(pid_t, void *),
346                                     void *data)
347 {
348         struct lc_watchdog *lcw = NULL;
349         ENTRY;
350
351         LIBCFS_ALLOC(lcw, sizeof(*lcw));
352         if (lcw == NULL) {
353                 CDEBUG(D_INFO, "Could not allocate new lc_watchdog\n");
354                 RETURN(ERR_PTR(-ENOMEM));
355         }
356
357         spin_lock_init(&lcw->lcw_lock);
358         lcw->lcw_refcount = 1; /* refcount for owner */
359         lcw->lcw_task     = current;
360         lcw->lcw_pid      = current_pid();
361         lcw->lcw_callback = (callback != NULL) ? callback : lc_watchdog_dumplog;
362         lcw->lcw_data     = data;
363         lcw->lcw_state    = LC_WATCHDOG_DISABLED;
364
365         INIT_LIST_HEAD(&lcw->lcw_list);
366         setup_timer(&lcw->lcw_timer, lcw_cb, (unsigned long)lcw);
367
368         mutex_lock(&lcw_refcount_mutex);
369         if (++lcw_refcount == 1)
370                 lcw_dispatch_start();
371         mutex_unlock(&lcw_refcount_mutex);
372
373         /* Keep this working in case we enable them by default */
374         if (lcw->lcw_state == LC_WATCHDOG_ENABLED) {
375                 lcw->lcw_last_touched = ktime_get();
376                 mod_timer(&lcw->lcw_timer, cfs_time_seconds(timeout) +
377                           jiffies);
378         }
379
380         RETURN(lcw);
381 }
382 EXPORT_SYMBOL(lc_watchdog_add);
383
384 static void lcw_update_time(struct lc_watchdog *lcw, const char *message)
385 {
386         ktime_t newtime = ktime_get();
387
388         if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) {
389                 ktime_t lapse = ktime_sub(newtime, lcw->lcw_last_touched);
390                 struct timespec64 timediff;
391
392                 timediff = ktime_to_timespec64(lapse);
393                 LCONSOLE_WARN("Service thread pid %u %s after %lu.%.02lus. This indicates the system was overloaded (too many service threads, or there were not enough hardware resources).\n",
394                               lcw->lcw_pid, message,
395                               timediff.tv_sec,
396                               timediff.tv_nsec / (NSEC_PER_SEC / 100));
397         }
398         lcw->lcw_last_touched = newtime;
399 }
400
401 static void lc_watchdog_del_pending(struct lc_watchdog *lcw)
402 {
403         spin_lock_bh(&lcw->lcw_lock);
404         if (unlikely(!list_empty(&lcw->lcw_list))) {
405                 spin_lock_bh(&lcw_pending_timers_lock);
406                 list_del_init(&lcw->lcw_list);
407                 lcw->lcw_refcount--; /* -1 ref for pending list */
408                 spin_unlock_bh(&lcw_pending_timers_lock);
409         }
410
411         spin_unlock_bh(&lcw->lcw_lock);
412 }
413
414 void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout)
415 {
416         ENTRY;
417         LASSERT(lcw != NULL);
418
419         lc_watchdog_del_pending(lcw);
420
421         lcw_update_time(lcw, "resumed");
422
423         mod_timer(&lcw->lcw_timer, jiffies + cfs_time_seconds(timeout));
424         lcw->lcw_state = LC_WATCHDOG_ENABLED;
425
426         EXIT;
427 }
428 EXPORT_SYMBOL(lc_watchdog_touch);
429
430 void lc_watchdog_disable(struct lc_watchdog *lcw)
431 {
432         ENTRY;
433         LASSERT(lcw != NULL);
434
435         lc_watchdog_del_pending(lcw);
436
437         lcw_update_time(lcw, "completed");
438         lcw->lcw_state = LC_WATCHDOG_DISABLED;
439
440         EXIT;
441 }
442 EXPORT_SYMBOL(lc_watchdog_disable);
443
444 void lc_watchdog_delete(struct lc_watchdog *lcw)
445 {
446         int dead;
447
448         ENTRY;
449         LASSERT(lcw != NULL);
450
451         del_timer(&lcw->lcw_timer);
452
453         lcw_update_time(lcw, "stopped");
454
455         spin_lock_bh(&lcw->lcw_lock);
456         spin_lock_bh(&lcw_pending_timers_lock);
457         if (unlikely(!list_empty(&lcw->lcw_list))) {
458                 list_del_init(&lcw->lcw_list);
459                 lcw->lcw_refcount--; /* -1 ref for pending list */
460         }
461
462         lcw->lcw_refcount--; /* -1 ref for owner */
463         dead = lcw->lcw_refcount == 0;
464         spin_unlock_bh(&lcw_pending_timers_lock);
465         spin_unlock_bh(&lcw->lcw_lock);
466
467         if (dead)
468                 LIBCFS_FREE(lcw, sizeof(*lcw));
469
470         mutex_lock(&lcw_refcount_mutex);
471         if (--lcw_refcount == 0)
472                 lcw_dispatch_stop();
473         mutex_unlock(&lcw_refcount_mutex);
474
475         EXIT;
476 }
477 EXPORT_SYMBOL(lc_watchdog_delete);
478
479 #else   /* !defined(WITH_WATCHDOG) */
480
481 struct lc_watchdog *lc_watchdog_add(int timeout,
482                                     void (*callback)(pid_t pid, void *),
483                                     void *data)
484 {
485         static struct lc_watchdog      watchdog;
486         return &watchdog;
487 }
488 EXPORT_SYMBOL(lc_watchdog_add);
489
490 void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout)
491 {
492 }
493 EXPORT_SYMBOL(lc_watchdog_touch);
494
495 void lc_watchdog_disable(struct lc_watchdog *lcw)
496 {
497 }
498 EXPORT_SYMBOL(lc_watchdog_disable);
499
500 void lc_watchdog_delete(struct lc_watchdog *lcw)
501 {
502 }
503 EXPORT_SYMBOL(lc_watchdog_delete);
504
505 #endif