Whamcloud - gitweb
LU-17788 ptlrpc: restore watchdog revival message 42/54942/12
authorAndreas Dilger <adilger@whamcloud.com>
Sat, 27 Apr 2024 02:48:15 +0000 (20:48 -0600)
committerOleg Drokin <green@whamcloud.com>
Tue, 21 May 2024 18:46:24 +0000 (18:46 +0000)
Restore the "Service thread pid NNN completed after SSS.mmm
seconds.  This likely indicates the system was overloaded"
message that was lost during ptlrpc watchdog restructuring.

Do not rate limit this message, so that it is possible to see
when all threads are restored, even if their corresponding
"Service thread pid NNN was inactive" message was throttled.

Update recovery-small test_10a to check for these messages,
so that they are not removed again in the future.

Test-Parameters: testlist=recovery-small env=ONLY=10a
Test-Parameters: testlist=recovery-small env=ONLY=10a
Test-Parameters: testlist=recovery-small env=ONLY=10a
Test-Parameters: testlist=recovery-small env=ONLY=10a
Test-Parameters: testlist=recovery-small env=ONLY=10a
Fixes: fc9de679a4 ("LU-9859 libcfs: add watchdog for ptlrpc service threads.")
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: I0c7e96fb7f73ca5562a6f5ad780a79ffc83ebbe5
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54942
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
lustre/include/lustre_net.h
lustre/ptlrpc/service.c
lustre/tests/recovery-small.sh

index 81f6b97..2491f0d 100644 (file)
@@ -1440,12 +1440,13 @@ struct ptlrpc_bulk_desc {
        struct bio_vec *bd_vec;
 };
 
-enum {
+enum ptlrpc_thread_state {
        SVC_INIT        = 0,
        SVC_STOPPED     = BIT(0),
        SVC_STOPPING    = BIT(1),
        SVC_STARTING    = BIT(2),
        SVC_RUNNING     = BIT(3),
+       SVC_WATCHDOG    = BIT(4),
 };
 
 #define PTLRPC_THR_NAME_LEN            32
@@ -1453,36 +1454,18 @@ enum {
  * Definition of server service thread structure
  */
 struct ptlrpc_thread {
-       /**
-        * List of active threads in svcpt->scp_threads
-        */
-       struct list_head t_link;
-       /**
-        * thread-private data (preallocated vmalloc'd memory)
-        */
-       void *t_data;
-       __u32 t_flags;
-       /**
-        * service thread index, from ptlrpc_start_threads
-        */
-       unsigned int t_id;
-       /**
-        * service thread
-        */
-       struct task_struct *t_task;
-       pid_t t_pid;
-       ktime_t t_touched;
-       /**
-        * put watchdog in the structure per thread b=14840
-        */
-       struct delayed_work t_watchdog;
-       /**
-        * the svc this thread belonged to b=18582
-        */
-       struct ptlrpc_service_part      *t_svcpt;
-       wait_queue_head_t               t_ctl_waitq;
-       struct lu_env                   *t_env;
-       char                            t_name[PTLRPC_THR_NAME_LEN];
+       struct list_head         t_link; /* active threads svcpt->scp_threads */
+       void                    *t_data; /* thread-private memory (vmalloc) */
+       enum ptlrpc_thread_state t_flags;
+       unsigned int             t_id;   /* index, from ptlrpc_start_threads */
+       struct task_struct      *t_task; /* service thread */
+       pid_t                    t_pid;      /* kernel process ID */
+       ktime_t                  t_touched;  /* last active time */
+       struct delayed_work      t_watchdog; /* inactivity watchdog timer */
+       struct ptlrpc_service_part *t_svcpt; /* service the thread belongs to */
+       wait_queue_head_t        t_ctl_waitq;/* waiter for thread starter */
+       struct lu_env           *t_env;
+       char                     t_name[PTLRPC_THR_NAME_LEN];
 };
 
 static inline int thread_is_init(struct ptlrpc_thread *thread)
@@ -2256,6 +2239,7 @@ void ptlrpc_hr_fini(void);
 void ptlrpc_watchdog_init(struct delayed_work *work, timeout_t timeout);
 void ptlrpc_watchdog_disable(struct delayed_work *work);
 void ptlrpc_watchdog_touch(struct delayed_work *work, timeout_t timeout);
+void ptlrpc_watchdog_delete(struct delayed_work *work);
 
 /** @} */
 
index c1bdf7b..77f8f87 100644 (file)
@@ -2625,49 +2625,86 @@ ptlrpc_at_check(struct ptlrpc_service_part *svcpt)
  */
 static struct ratelimit_state watchdog_limit;
 
-static void ptlrpc_watchdog_fire(struct work_struct *w)
+static void ptlrpc_watchdog_fire(struct work_struct *work)
 {
-       struct ptlrpc_thread *thread = container_of(w, struct ptlrpc_thread,
+       struct ptlrpc_thread *thread = container_of(work, struct ptlrpc_thread,
                                                    t_watchdog.work);
-       u64 ms_lapse = ktime_ms_delta(ktime_get(), thread->t_touched);
-       u32 ms_frac = do_div(ms_lapse, MSEC_PER_SEC);
+       u64 ms_elapsed = ktime_ms_delta(ktime_get(), thread->t_touched);
+       u32 ms_frac = do_div(ms_elapsed, MSEC_PER_SEC);
+
+       thread->t_flags |= SVC_WATCHDOG;
 
        /* ___ratelimit() returns true if the action is NOT ratelimited */
        if (__ratelimit(&watchdog_limit)) {
                /* below message is checked in sanity-quota.sh test_6,18 */
+               /* below message is checked in recovery-small test 10a. */
                LCONSOLE_WARN("%s: service thread pid %u was inactive for %llu.%03u seconds. The thread might be hung, or it might only be slow and will resume later. Dumping the stack trace for debugging purposes:\n",
                              thread->t_task->comm, thread->t_task->pid,
-                             ms_lapse, ms_frac);
+                             ms_elapsed, ms_frac);
 
                libcfs_debug_dumpstack(thread->t_task);
        } else {
-               /* below message is checked in sanity-quota.sh test_6,18 */
                LCONSOLE_WARN("%s: service thread pid %u was inactive for %llu.%03u seconds. Watchdog stack traces are limited to 3 per %u seconds, skipping this one.\n",
                              thread->t_task->comm, thread->t_task->pid,
-                             ms_lapse, ms_frac, libcfs_watchdog_ratelimit);
+                             ms_elapsed, ms_frac, libcfs_watchdog_ratelimit);
        }
 }
 
 void ptlrpc_watchdog_init(struct delayed_work *work, timeout_t timeout)
 {
+       struct ptlrpc_thread *thread = container_of(&work->work,
+                                                   struct ptlrpc_thread,
+                                                   t_watchdog.work);
+
+       thread->t_touched = ktime_get();
        INIT_DELAYED_WORK(work, ptlrpc_watchdog_fire);
        schedule_delayed_work(work, cfs_time_seconds(timeout));
 }
 
-void ptlrpc_watchdog_disable(struct delayed_work *work)
+static void ptlrpc_watchdog_update(struct delayed_work *work, const char *msg)
 {
-       cancel_delayed_work_sync(work);
+       struct ptlrpc_thread *thread = container_of(&work->work,
+                                                   struct ptlrpc_thread,
+                                                   t_watchdog.work);
+       ktime_t now = ktime_get();
+
+       if (unlikely(thread->t_flags & SVC_WATCHDOG)) {
+               u64 ms_elapsed = ktime_ms_delta(now, thread->t_touched);
+               u32 ms_frac = do_div(ms_elapsed, MSEC_PER_SEC);
+
+               /* Don't ratelimit this message, since it is already limited
+                * by the watchdog (obd_timeout) and it is important to know
+                * if/when a service thread has revived after being hung.
+                * below message is checked in recovery-small test 10a.
+                */
+               LCONSOLE(D_WARNING,
+                        "%s: service thread pid %u %s after %llu.%03us. This likely indicates the system was overloaded (too many service threads, or not enough hardware resources).\n",
+                        thread->t_task->comm, thread->t_pid, msg,
+                        ms_elapsed, ms_frac);
+               thread->t_flags &= ~SVC_WATCHDOG;
+       }
+
+       thread->t_touched = now;
 }
 
 void ptlrpc_watchdog_touch(struct delayed_work *work, timeout_t timeout)
 {
-       struct ptlrpc_thread *thread = container_of(&work->work,
-                                                   struct ptlrpc_thread,
-                                                   t_watchdog.work);
-       thread->t_touched = ktime_get();
+       ptlrpc_watchdog_update(work, "resumed");
        mod_delayed_work(system_wq, work, cfs_time_seconds(timeout));
 }
 
+void ptlrpc_watchdog_disable(struct delayed_work *work)
+{
+       ptlrpc_watchdog_update(work, "completed");
+       cancel_delayed_work_sync(work);
+}
+
+void ptlrpc_watchdog_delete(struct delayed_work *work)
+{
+       ptlrpc_watchdog_update(work, "stopped");
+       cancel_delayed_work_sync(work);
+}
+
 /**
  * requests wait on preprocessing
  * user can call it w/o any lock but need to hold
@@ -2819,9 +2856,8 @@ static int ptlrpc_main(void *arg)
        /* wake up our creator in case he's still waiting. */
        wake_up(&thread->t_ctl_waitq);
 
-       thread->t_touched = ktime_get();
        ptlrpc_watchdog_init(&thread->t_watchdog,
-                        ptlrpc_server_get_timeout(svcpt));
+                            ptlrpc_server_get_timeout(svcpt));
 
        spin_lock(&svcpt->scp_rep_lock);
        list_add(&rs->rs_list, &svcpt->scp_rep_idle);
@@ -2910,7 +2946,7 @@ static int ptlrpc_main(void *arg)
                        ptlrpc_thread_stop(thread);
        }
 
-       ptlrpc_watchdog_disable(&thread->t_watchdog);
+       ptlrpc_watchdog_delete(&thread->t_watchdog);
 
 out_ctx_fini:
        lu_context_fini(&env->le_ctx);
@@ -3267,10 +3303,7 @@ static int ptlrpc_start_thread(struct ptlrpc_service_part *svcpt, int wait)
                spin_lock(&svcpt->scp_lock);
                --svcpt->scp_nthrs_starting;
                if (thread_is_stopping(thread)) {
-                       /*
-                        * this ptlrpc_thread is being hanled
-                        * by ptlrpc_svcpt_stop_threads now
-                        */
+                       /* thread now handled by ptlrpc_svcpt_stop_threads() */
                        thread_add_flags(thread, SVC_STOPPED);
                        wake_up(&thread->t_ctl_waitq);
                        spin_unlock(&svcpt->scp_lock);
index f627d90..41859b9 100755 (executable)
@@ -150,6 +150,18 @@ test_10a() {
 
        do_facet client checkstat -v -p 0777 $DIR ||
                error "client checkstat failed: $?"
+
+       # check that the thread watchdog is working properly
+       do_facet mds1 dmesg | tac | sed "/${TESTNAME/_/ }/,$ d" |
+               grep "[Ss]ervice thread pid .* was inactive" ||
+               error "no service thread inactive message for ${TESTNAME/_/ }"
+
+       # can't check for "completed" message where it was removed
+       (( $MDS1_VERSION > $(version_code v2_12_50-83-gfc9de679a4) &&
+          $MDS1_VERSION < $(version_code 2.15.62.59) )) ||
+               do_facet mds1 dmesg | tac | sed "/${TESTNAME/_/ }/,$ d" |
+                       grep "[Ss]ervice thread pid .* completed after" ||
+               error "no service thread completed  message for ${TESTNAME/_/ }"
 }
 run_test 10a "finish request on server after client eviction (bug 1521)"