Whamcloud - gitweb
b=20339
[fs/lustre-release.git] / libcfs / libcfs / watchdog.c
index 4e5cc42..565c71c 100644 (file)
 struct lc_watchdog {
         cfs_timer_t       lcw_timer; /* kernel timer */
         struct list_head  lcw_list;
-        struct timeval    lcw_last_touched;
+        cfs_time_t        lcw_last_touched;
         cfs_task_t       *lcw_task;
 
         void            (*lcw_callback)(pid_t, void *);
         void             *lcw_data;
 
         pid_t             lcw_pid;
-        cfs_duration_t    lcw_time; /* time until watchdog fires, jiffies */
 
         enum {
                 LC_WATCHDOG_DISABLED,
@@ -104,27 +103,21 @@ static spinlock_t lcw_last_watchdog_lock = SPIN_LOCK_UNLOCKED;
 static void
 lcw_dump(struct lc_watchdog *lcw)
 {
-        cfs_task_t *tsk;
+        ENTRY;
 #if defined(HAVE_TASKLIST_LOCK)
         read_lock(&tasklist_lock);
 #elif defined(HAVE_TASK_RCU)
         rcu_read_lock();
 #else
         CERROR("unable to dump stack because of missing export\n"); 
-        return;
+        RETURN_EXIT;
 #endif
-        ENTRY;
-
-        tsk = find_task_by_pid(lcw->lcw_pid);
-
-        if (tsk == NULL) {
+       if (lcw->lcw_task == NULL) { 
                 CWARN("Process " LPPID " was not found in the task list; "
-                      "watchdog callback may be incomplete\n", lcw->lcw_pid);
-        } else if (tsk != lcw->lcw_task) {
-                CWARN("The current process " LPPID " did not set the watchdog; "
-                      "watchdog callback may be incomplete\n", lcw->lcw_pid);
+                      "watchdog callback may be incomplete\n", 
+                      (int)lcw->lcw_pid);
         } else {
-                libcfs_debug_dumpstack(tsk);
+                libcfs_debug_dumpstack(lcw->lcw_task);
         }
 
 #if defined(HAVE_TASKLIST_LOCK)
@@ -140,6 +133,7 @@ static void lcw_cb(ulong_ptr_t data)
         struct lc_watchdog *lcw = (struct lc_watchdog *)data;
         cfs_time_t current_time;
         cfs_duration_t delta_time;
+        struct timeval timediff;
 
         ENTRY;
 
@@ -151,19 +145,24 @@ static void lcw_cb(ulong_ptr_t data)
         lcw->lcw_state = LC_WATCHDOG_EXPIRED;
         current_time = cfs_time_current();
 
+        delta_time = cfs_time_sub(current_time, lcw->lcw_last_touched);
+        cfs_duration_usec(delta_time, &timediff);
+
         /* Check to see if we should throttle the watchdog timer to avoid
          * too many dumps going to the console thus triggering an NMI.
          * Normally we would not hold the spin lock over the CWARN but in
          * this case we hold it to ensure non ratelimited lcw_dumps are not
          * interleaved on the console making them hard to read. */
         spin_lock_bh(&lcw_last_watchdog_lock);
-        delta_time = cfs_duration_sec(current_time - lcw_last_watchdog_time);
+        delta_time = cfs_duration_sec(cfs_time_sub(current_time,
+                                                   lcw_last_watchdog_time));
 
-        if (delta_time < libcfs_watchdog_ratelimit && lcw_recent_watchdog_count > 3) {
+        if (delta_time < libcfs_watchdog_ratelimit &&
+            lcw_recent_watchdog_count > 3) {
                 CWARN("Refusing to fire watchdog for pid %d: it was inactive "
-                      "for %ldms. Rate limiting 1 per %d seconds.\n",
-                      (int)lcw->lcw_pid,cfs_duration_sec(lcw->lcw_time) * 1000,
-                      libcfs_watchdog_ratelimit);
+                      "for %lu.%.02lus. Rate limiting 3 per %d seconds.\n",
+                      (int)lcw->lcw_pid, timediff.tv_sec,
+                      timediff.tv_usec / 10000, libcfs_watchdog_ratelimit);
         } else {
                 if (delta_time < libcfs_watchdog_ratelimit) {
                         lcw_recent_watchdog_count++;
@@ -175,8 +174,9 @@ static void lcw_cb(ulong_ptr_t data)
 
                /* This warning should appear on the console, but may not get
                 * into the logs since we're running in a softirq handler */
-                CWARN("Watchdog triggered for pid %d: it was inactive for %lds\n",
-                      (int)lcw->lcw_pid, cfs_duration_sec(lcw->lcw_time));
+                CWARN("Watchdog triggered for pid %d: it was inactive for "
+                      "%lu.%.02lus\n", (int)lcw->lcw_pid, timediff.tv_sec,
+                      timediff.tv_usec / 10000);
                 lcw_dump(lcw);
        }
 
@@ -248,8 +248,7 @@ static int lcw_dispatch_main(void *data)
                         list_del_init(&lcw->lcw_list);
                         spin_unlock_bh(&lcw_pending_timers_lock);
 
-                        CDEBUG(D_INFO, "found lcw for pid " LPPID ": inactive for "
-                               "%lds\n", lcw->lcw_pid, cfs_duration_sec(lcw->lcw_time));
+                        CDEBUG(D_INFO, "found lcw for pid " LPPID "\n", lcw->lcw_pid);
 
                         if (lcw->lcw_state != LC_WATCHDOG_DISABLED)
                                 lcw->lcw_callback(lcw->lcw_pid, lcw->lcw_data);
@@ -305,7 +304,7 @@ static void lcw_dispatch_stop(void)
         EXIT;
 }
 
-struct lc_watchdog *lc_watchdog_add(int timeout_ms,
+struct lc_watchdog *lc_watchdog_add(int timeout,
                                     void (*callback)(pid_t, void *),
                                     void *data)
 {
@@ -320,7 +319,6 @@ struct lc_watchdog *lc_watchdog_add(int timeout_ms,
 
         lcw->lcw_task     = cfs_current();
         lcw->lcw_pid      = cfs_curproc_pid();
-        lcw->lcw_time     = cfs_time_seconds(timeout_ms) / 1000;
         lcw->lcw_callback = (callback != NULL) ? callback : lc_watchdog_dumplog;
         lcw->lcw_data     = data;
         lcw->lcw_state    = LC_WATCHDOG_DISABLED;
@@ -335,8 +333,8 @@ struct lc_watchdog *lc_watchdog_add(int timeout_ms,
 
         /* Keep this working in case we enable them by default */
         if (lcw->lcw_state == LC_WATCHDOG_ENABLED) {
-                do_gettimeofday(&lcw->lcw_last_touched);
-                cfs_timer_arm(&lcw->lcw_timer, lcw->lcw_time + 
+                lcw->lcw_last_touched = cfs_time_current();
+                cfs_timer_arm(&lcw->lcw_timer, cfs_time_seconds(timeout) +
                               cfs_time_current());
         }
 
@@ -346,22 +344,22 @@ EXPORT_SYMBOL(lc_watchdog_add);
 
 static void lcw_update_time(struct lc_watchdog *lcw, const char *message)
 {
-        struct timeval newtime;
-        struct timeval timediff;
+        cfs_time_t newtime = cfs_time_current();;
 
-        do_gettimeofday(&newtime);
         if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) {
-                cfs_timeval_sub(&newtime, &lcw->lcw_last_touched, &timediff);
-                CWARN("Expired watchdog for pid " LPPID " %s after %lu.%.4lus\n",
-                      lcw->lcw_pid,
-                      message,
-                      timediff.tv_sec,
-                      timediff.tv_usec / 100);
+                struct timeval timediff;
+                cfs_time_t delta_time = cfs_time_sub(newtime,
+                                                     lcw->lcw_last_touched);
+                cfs_duration_usec(delta_time, &timediff);
+
+                CWARN("Expired watchdog for pid " LPPID " %s after %lu.%.02lus\n",
+                      lcw->lcw_pid, message, timediff.tv_sec,
+                      timediff.tv_usec / 10000);
         }
         lcw->lcw_last_touched = newtime;
 }
 
-void lc_watchdog_touch_ms(struct lc_watchdog *lcw, int timeout_ms)
+void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout)
 {
         ENTRY;
         LASSERT(lcw != NULL);
@@ -374,17 +372,10 @@ void lc_watchdog_touch_ms(struct lc_watchdog *lcw, int timeout_ms)
         lcw->lcw_state = LC_WATCHDOG_ENABLED;
 
         cfs_timer_arm(&lcw->lcw_timer, cfs_time_current() +
-                      cfs_time_seconds(timeout_ms) / 1000);
+                      cfs_time_seconds(timeout));
 
         EXIT;
 }
-EXPORT_SYMBOL(lc_watchdog_touch_ms);
-
-/* deprecated - use above instead */
-void lc_watchdog_touch(struct lc_watchdog *lcw)
-{
-        lc_watchdog_touch_ms(lcw, (int)cfs_duration_sec(lcw->lcw_time) * 1000);
-}
 EXPORT_SYMBOL(lc_watchdog_touch);
 
 void lc_watchdog_disable(struct lc_watchdog *lcw)
@@ -441,7 +432,7 @@ EXPORT_SYMBOL(lc_watchdog_dumplog);
 
 #else   /* !defined(WITH_WATCHDOG) */
 
-struct lc_watchdog *lc_watchdog_add(int timeout_ms,
+struct lc_watchdog *lc_watchdog_add(int timeout,
                                     void (*callback)(pid_t pid, void *),
                                     void *data)
 {
@@ -450,12 +441,7 @@ struct lc_watchdog *lc_watchdog_add(int timeout_ms,
 }
 EXPORT_SYMBOL(lc_watchdog_add);
 
-void lc_watchdog_touch_ms(struct lc_watchdog *lcw, int timeout_ms)
-{
-}
-EXPORT_SYMBOL(lc_watchdog_touch_ms);
-
-void lc_watchdog_touch(struct lc_watchdog *lcw)
+void lc_watchdog_touch(struct lc_watchdog *lcw, int timeout)
 {
 }
 EXPORT_SYMBOL(lc_watchdog_touch);