+/*
+ * If a thread runs too long or spends to much time on a single request,
+ * we want to know about it, so we set up a delayed work item as a watchdog.
+ * If it fires, we display a stack trace of the delayed thread,
+ * providing we aren't rate-limited
+ *
+ * Watchdog stack traces are limited to 3 per 'libcfs_watchdog_ratelimit'
+ * seconds
+ */
+static struct ratelimit_state watchdog_limit;
+
+static void ptlrpc_watchdog_fire(struct work_struct *w)
+{
+ struct ptlrpc_thread *thread = container_of(w, struct ptlrpc_thread,
+ t_watchdog.work);
+ u64 ms_lapse = ktime_ms_delta(ktime_get(), thread->t_touched);
+ u32 ms_frac = do_div(ms_lapse, MSEC_PER_SEC);
+
+ /* ___ratelimit() returns true if the action is NOT ratelimited */
+ if (__ratelimit(&watchdog_limit)) {
+ /* below message is checked in sanity-quota.sh test_6,18 */
+ LCONSOLE_WARN("%s: service thread pid %u was inactive for %llu.%03u seconds. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:\n",
+ thread->t_task->comm, thread->t_task->pid,
+ ms_lapse, ms_frac);
+
+ libcfs_debug_dumpstack(thread->t_task);
+ } else {
+ /* below message is checked in sanity-quota.sh test_6,18 */
+ LCONSOLE_WARN("%s: service thread pid %u was inactive for %llu.%03u seconds. Watchdog stack traces are limited to 3 per %u seconds, skipping this one.\n",
+ thread->t_task->comm, thread->t_task->pid,
+ ms_lapse, ms_frac, libcfs_watchdog_ratelimit);
+ }
+}
+
+static void ptlrpc_watchdog_init(struct delayed_work *work, timeout_t timeout)
+{
+ INIT_DELAYED_WORK(work, ptlrpc_watchdog_fire);
+ schedule_delayed_work(work, cfs_time_seconds(timeout));
+}
+
+static void ptlrpc_watchdog_disable(struct delayed_work *work)
+{
+ cancel_delayed_work_sync(work);
+}
+
+static void ptlrpc_watchdog_touch(struct delayed_work *work, timeout_t timeout)
+{
+ struct ptlrpc_thread *thread = container_of(&work->work,
+ struct ptlrpc_thread,
+ t_watchdog.work);
+ thread->t_touched = ktime_get();
+ mod_delayed_work(system_wq, work, cfs_time_seconds(timeout));
+}
+