From 18bf112e96ceb3f45505be86f3357369c9122992 Mon Sep 17 00:00:00 2001 From: Frederick Dilger Date: Thu, 13 Jun 2024 20:39:28 -0400 Subject: [PATCH] LU-17432 libcfs: new CDEBUG_SLOW message type Created new CDEBUG_SLOW message type that will skip the first SECONDS of messages, then continue printing to console as normal. Specifically CWARN_SLOW and CERROR_SLOW have been created which mirror the functionality of their respective CWARN and CERROR functions but with the additional SLOW behavior. Signed-off-by: Frederick Dilger Change-Id: I905fdff795488ff937faf4d04d5d3d6fec24950a Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/55439 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Arshad Hussain Reviewed-by: Oleg Drokin Reviewed-by: Andreas Dilger --- contrib/scripts/checkpatch.pl | 1 + contrib/scripts/lustre-checkpatch.patch | 1 + libcfs/include/libcfs/libcfs_debug.h | 16 ++++++++++++++++ libcfs/libcfs/tracefile.c | 10 ++++++++-- lustre/ldlm/ldlm_lib.c | 7 ++++--- lustre/ptlrpc/client.c | 1 + 6 files changed, 31 insertions(+), 5 deletions(-) diff --git a/contrib/scripts/checkpatch.pl b/contrib/scripts/checkpatch.pl index 4f581ea..a802abe 100755 --- a/contrib/scripts/checkpatch.pl +++ b/contrib/scripts/checkpatch.pl @@ -601,6 +601,7 @@ our $logFunctions = qr{(?x: TP_printk| WARN(?:_RATELIMIT|_ONCE|)| CDEBUG|CERROR|CNETERR|CEMERG|CL_LOCK_DEBUG|CWARN|DEBUG_REQ|LCONSOLE_[A-Z]*| + CERROR_SLOW|CWARN_SLOW| panic| MODULE_[A-Z_]+| seq_vprintf|seq_printf|seq_puts| diff --git a/contrib/scripts/lustre-checkpatch.patch b/contrib/scripts/lustre-checkpatch.patch index 1fa7d12..b46eef5 100644 --- a/contrib/scripts/lustre-checkpatch.patch +++ b/contrib/scripts/lustre-checkpatch.patch @@ -63,6 +63,7 @@ index 2b812210b4..a6c52d361c 100755 TP_printk| WARN(?:_RATELIMIT|_ONCE|)| + CDEBUG|CERROR|CNETERR|CEMERG|CL_LOCK_DEBUG|CWARN|DEBUG_REQ|LCONSOLE_[A-Z]*| ++ CERROR_SLOW|CWARN_SLOW| panic| MODULE_[A-Z_]+| - seq_vprintf|seq_printf|seq_puts diff --git a/libcfs/include/libcfs/libcfs_debug.h b/libcfs/include/libcfs/libcfs_debug.h index 8e4157a..4bdbd63 100644 --- a/libcfs/include/libcfs/libcfs_debug.h +++ b/libcfs/include/libcfs/libcfs_debug.h @@ -158,6 +158,16 @@ do { \ mask, &cdls, format, ## __VA_ARGS__); \ } while (0) +# define CDEBUG_SLOW(delay, mask, format, ...) \ +do { \ + static struct cfs_debug_limit_state cdls = { \ + .cdls_count = -delay, \ + .cdls_delay = delay, \ + }; \ + \ + __CDEBUG_WITH_LOC(__FILE__, __func__, __LINE__, \ + mask, &cdls, format, ## __VA_ARGS__); \ +} while (0) # else /* !CDEBUG_ENABLED */ static inline int cfs_cdebug_show(unsigned int mask, unsigned int subsystem) { @@ -166,6 +176,7 @@ static inline int cfs_cdebug_show(unsigned int mask, unsigned int subsystem) # define CDEBUG(mask, format, ...) (void)(0) # define CDEBUG_LIMIT(mask, format, ...) (void)(0) # define CDEBUG_LIMIT_LOC(file, func, line, mask, format, ...) (void)(0) +# define CDEBUG_SLOW(delay, mask, format, ...) (void)(0) # warning "CDEBUG IS DISABLED. THIS SHOULD NEVER BE DONE FOR PRODUCTION!" # endif /* CDEBUG_ENABLED */ @@ -181,6 +192,11 @@ static inline int cfs_cdebug_show(unsigned int mask, unsigned int subsystem) #define CNETERR(format, a...) CDEBUG_LIMIT(D_NETERROR, format, ## a) #define CEMERG(format, ...) CDEBUG_LIMIT(D_EMERG, format, ## __VA_ARGS__) +#define CWARN_SLOW(delay, format, ...) CDEBUG_SLOW(delay, D_WARNING, format, \ + ## __VA_ARGS__) +#define CERROR_SLOW(delay, format, ...) CDEBUG_SLOW(delay, D_ERROR, format, \ + ## __VA_ARGS__) + #define LCONSOLE(mask, format, ...) CDEBUG(D_CONSOLE | (mask), format, ## __VA_ARGS__) #define LCONSOLE_INFO(format, ...) CDEBUG_LIMIT(D_CONSOLE, format, ## __VA_ARGS__) #define LCONSOLE_WARN(format, ...) CDEBUG_LIMIT(D_CONSOLE | D_WARNING, format, ## __VA_ARGS__) diff --git a/libcfs/libcfs/tracefile.c b/libcfs/libcfs/tracefile.c index 1a0ce6f..583e540 100644 --- a/libcfs/libcfs/tracefile.c +++ b/libcfs/libcfs/tracefile.c @@ -684,6 +684,13 @@ console: } if (cdls != NULL) { + /* avoid unlikely case of many errors between printing */ + if (unlikely(cdls->cdls_count < 0 && + cdls->cdls_count >= -600)) { + cdls->cdls_next = jiffies + + cfs_time_seconds(-cdls->cdls_count); + cdls->cdls_count = 1; + } if (libcfs_console_ratelimit && cdls->cdls_next != 0 && /* not first time ever */ time_before(jiffies, cdls->cdls_next)) { @@ -729,8 +736,7 @@ console: cfs_print_to_console(&header, file, fn, "Skipped %d previous similar message%s\n", cdls->cdls_count, - (cdls->cdls_count > 1) ? "s" : ""); - + cdls->cdls_count > 1 ? "s" : ""); cdls->cdls_count = 0; } out: diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index f2490c8..26fb2cd 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -1086,9 +1086,10 @@ int target_handle_connect(struct ptlrpc_request *req) target = class_str2obd(str); if (!target) { deuuidify(str, NULL, &target_start, &target_len); - LCONSOLE_ERROR("%.*s: not available for connect from %s (no target). If you are running an HA pair check that the target is mounted on the other server.\n", - target_len, target_start, - libcfs_nidstr(&req->rq_peer.nid)); + CERROR_SLOW(5, + "%.*s: not available for connect from %s (no target). If you are running an HA pair check that the target is mounted on the other server.\n", + target_len, target_start, + libcfs_nidstr(&req->rq_peer.nid)); GOTO(out, rc = -ENODEV); } diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 3795720..8cd8b63 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -2354,6 +2354,7 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink) if (ptlrpc_console_allow(req, opc, lustre_msg_get_status(req->rq_reqmsg))) debug_mask = D_WARNING; + /* this message is used in replay-single test_200, DO NOT MODIFY */ DEBUG_REQ(debug_mask, req, "Request sent has %s: [sent %lld/real %lld]", req->rq_net_err ? "failed due to network error" : ((req->rq_real_sent == 0 || -- 1.8.3.1