Whamcloud - gitweb
LU-17242 libcfs: use sched_show_task() for thread dumping 94/59394/2
authorShaun Tancheff <shaun.tancheff@hpe.com>
Fri, 23 May 2025 01:16:04 +0000 (08:16 +0700)
committerOleg Drokin <green@whamcloud.com>
Sat, 7 Jun 2025 23:04:59 +0000 (23:04 +0000)
Use sched_show_task() for thread dumping, since it should be
available on all kernels that Lustre supports. On some kernels,
libcfs_debug_dumpstack() is unable to show the thread stack.
Replacing this function avoid that issue.

Test-Parameters: trivial
Signed-off-by: Timothy Day <timday@amazon.com>
Signed-off-by: Shaun Tancheff <shaun.tancheff@hpe.com>
Change-Id: I421560b0d4223fd3503f4a3697a7615dd43bad8f
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/59394
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Patrick Farrell <pfarrell@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
config/lustre-core.m4
lustre/ptlrpc/ptlrpc_internal.h
lustre/ptlrpc/service.c

index ee5a456..56442f3 100644 (file)
@@ -2647,6 +2647,18 @@ is_encrypted, [
 ]) # LC_IS_ENCRYPTED used by LC_CONFIG_CRYPTO
 
 #
+# LC_EXPORT_HAVE_SCHED_SHOW_TASK
+#
+# Linux commit v4.14-rc4-2-g0032f4e88976
+#  rcutorture: Dump writer stack if stalled
+#
+AC_DEFUN([LC_EXPORT_HAVE_SCHED_SHOW_TASK], [
+LB_CHECK_EXPORT([sched_show_task], [kernel/sched/core.c],
+       [AC_DEFINE(HAVE_SCHED_SHOW_TASK, 1,
+                       [sched_show_task() is exported by the kernel])])
+]) # LC_EXPORT_HAVE_SCHED_SHOW_TASK
+
+#
 # LC_I_PAGES
 #
 # kernel 4.17 commit b93b016313b3ba8003c3b8bb71f569af91f19fc7
@@ -5745,6 +5757,9 @@ AC_DEFUN([LC_PROG_LINUX], [
        # 4.8 - Check export
        LC_EXPORT_DEFAULT_FILE_SPLICE_READ
 
+       # 4.15 - Check export
+       LC_EXPORT_HAVE_SCHED_SHOW_TASK
+
        # 5.2 - Check export
        LC_ACCOUNT_PAGE_DIRTIED
 
index bac3ff2..fc4d99c 100644 (file)
 #include "../ldlm/ldlm_internal.h"
 #include "heap.h"
 
+#include <linux/sched.h>
+#ifdef HAVE_SCHED_SHOW_TASK
+#include <linux/sched/debug.h>
+#endif
+
 struct ldlm_namespace;
 struct obd_import;
 struct ldlm_res_id;
@@ -48,6 +53,10 @@ extern struct mutex pinger_mutex;
 extern lnet_handler_t ptlrpc_handler;
 extern struct percpu_ref ptlrpc_pending;
 
+#ifndef HAVE_SCHED_SHOW_TASK
+#define sched_show_task(task)          libcfs_debug_dumpstack((task))
+#endif
+
 /* ptlrpcd.c */
 int ptlrpcd_start(struct ptlrpcd_ctl *pc);
 
index 67ff96f..d6412cc 100644 (file)
@@ -2791,7 +2791,7 @@ static void ptlrpc_watchdog_fire(struct work_struct *work)
                              thread->t_task->comm, thread->t_task->pid,
                              ms_elapsed, ms_frac);
 
-               libcfs_debug_dumpstack(thread->t_task);
+               sched_show_task(thread->t_task);
        } else {
                LCONSOLE_WARN("%s: service thread pid %u was inactive for %llu.%03u seconds. Watchdog stack traces are limited to 3 per %u seconds, skipping this one.\n",
                              thread->t_task->comm, thread->t_task->pid,