static struct list_head lcw_pending_timers = \
CFS_LIST_HEAD_INIT(lcw_pending_timers);
-#ifdef HAVE_TASKLIST_LOCK
+/* Last time a watchdog expired */
+static cfs_time_t lcw_last_watchdog_time;
+static int lcw_recent_watchdog_count;
+static spinlock_t lcw_last_watchdog_lock = SPIN_LOCK_UNLOCKED;
+
static void
lcw_dump(struct lc_watchdog *lcw)
{
cfs_task_t *tsk;
+#if defined(HAVE_TASKLIST_LOCK)
+ read_lock(&tasklist_lock);
+#elif defined(HAVE_TASK_RCU)
+ rcu_read_lock();
+#else
+ CERROR("unable to dump stack because of missing export\n");
+ return;
+#endif
ENTRY;
- read_lock(&tasklist_lock);
tsk = find_task_by_pid(lcw->lcw_pid);
if (tsk == NULL) {
} else {
libcfs_debug_dumpstack(tsk);
}
-
+
+#if defined(HAVE_TASKLIST_LOCK)
read_unlock(&tasklist_lock);
+#elif defined(HAVE_TASK_RCU)
+ rcu_read_unlock();
+#endif
EXIT;
}
-#else
-static void
-lcw_dump(struct lc_watchdog *lcw)
-{
- CERROR("unable to dump stack because of missing export\n");
-}
-#endif
static void lcw_cb(ulong_ptr_t data)
{
struct lc_watchdog *lcw = (struct lc_watchdog *)data;
+ cfs_time_t current_time;
+ cfs_duration_t delta_time;
ENTRY;
}
lcw->lcw_state = LC_WATCHDOG_EXPIRED;
+ current_time = cfs_time_current();
+
+ /* Check to see if we should throttle the watchdog timer to avoid
+ * too many dumps going to the console thus triggering an NMI.
+ * Normally we would not hold the spin lock over the CWARN but in
+ * this case we hold it to ensure non ratelimited lcw_dumps are not
+ * interleaved on the console making them hard to read. */
+ spin_lock_bh(&lcw_last_watchdog_lock);
+ delta_time = cfs_duration_sec(current_time - lcw_last_watchdog_time);
+
+ if (delta_time < libcfs_watchdog_ratelimit && lcw_recent_watchdog_count > 3) {
+ CWARN("Refusing to fire watchdog for pid %d: it was inactive "
+ "for %ldms. Rate limiting 1 per %d seconds.\n",
+ (int)lcw->lcw_pid,cfs_duration_sec(lcw->lcw_time) * 1000,
+ libcfs_watchdog_ratelimit);
+ } else {
+ if (delta_time < libcfs_watchdog_ratelimit) {
+ lcw_recent_watchdog_count++;
+ } else {
+ memcpy(&lcw_last_watchdog_time, ¤t_time,
+ sizeof(current_time));
+ lcw_recent_watchdog_count = 0;
+ }
- /* NB this warning should appear on the console, but may not get into
- * the logs since we're running in a softirq handler */
-
- CWARN("Watchdog triggered for pid: " LPPID " it was inactive for %lds\n",
- lcw->lcw_pid, cfs_duration_sec(lcw->lcw_time));
- lcw_dump(lcw);
+ /* This warning should appear on the console, but may not get
+ * into the logs since we're running in a softirq handler */
+ CWARN("Watchdog triggered for pid %d: it was inactive for %lds\n",
+ (int)lcw->lcw_pid, cfs_duration_sec(lcw->lcw_time));
+ lcw_dump(lcw);
+ }
+ spin_unlock_bh(&lcw_last_watchdog_lock);
spin_lock_bh(&lcw_pending_timers_lock);
if (list_empty(&lcw->lcw_list)) {