Whamcloud - gitweb
LU-17432 libcfs: new CDEBUG_SLOW message type 39/55439/19
authorFrederick Dilger <fdilger@whamcloud.com>
Fri, 14 Jun 2024 00:39:28 +0000 (20:39 -0400)
committerOleg Drokin <green@whamcloud.com>
Sun, 24 Nov 2024 06:04:06 +0000 (06:04 +0000)
Created new CDEBUG_SLOW message type that will skip the first
SECONDS of messages, then continue printing to console as normal.

Specifically CWARN_SLOW and CERROR_SLOW have been created which
mirror the functionality of their respective CWARN and CERROR
functions but with the additional SLOW behavior.

Signed-off-by: Frederick Dilger <fdilger@whamcloud.com>
Change-Id: I905fdff795488ff937faf4d04d5d3d6fec24950a
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/55439
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Arshad Hussain <arshad.hussain@aeoncomputing.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
contrib/scripts/checkpatch.pl
contrib/scripts/lustre-checkpatch.patch
libcfs/include/libcfs/libcfs_debug.h
libcfs/libcfs/tracefile.c
lustre/ldlm/ldlm_lib.c
lustre/ptlrpc/client.c

index 4f581ea..a802abe 100755 (executable)
@@ -601,6 +601,7 @@ our $logFunctions = qr{(?x:
        TP_printk|
        WARN(?:_RATELIMIT|_ONCE|)|
        CDEBUG|CERROR|CNETERR|CEMERG|CL_LOCK_DEBUG|CWARN|DEBUG_REQ|LCONSOLE_[A-Z]*|
+       CERROR_SLOW|CWARN_SLOW|
        panic|
        MODULE_[A-Z_]+|
        seq_vprintf|seq_printf|seq_puts|
index 1fa7d12..b46eef5 100644 (file)
@@ -63,6 +63,7 @@ index 2b812210b4..a6c52d361c 100755
        TP_printk|
        WARN(?:_RATELIMIT|_ONCE|)|
 +      CDEBUG|CERROR|CNETERR|CEMERG|CL_LOCK_DEBUG|CWARN|DEBUG_REQ|LCONSOLE_[A-Z]*|
++      CERROR_SLOW|CWARN_SLOW|
        panic|
        MODULE_[A-Z_]+|
 -      seq_vprintf|seq_printf|seq_puts
index 8e4157a..4bdbd63 100644 (file)
@@ -158,6 +158,16 @@ do {                                                                       \
                          mask, &cdls, format, ## __VA_ARGS__);         \
 } while (0)
 
+#  define CDEBUG_SLOW(delay, mask, format, ...)                                \
+do {                                                                   \
+       static struct cfs_debug_limit_state cdls = {                    \
+       .cdls_count = -delay,                                           \
+       .cdls_delay = delay,                                            \
+       };                                                              \
+                                                                       \
+       __CDEBUG_WITH_LOC(__FILE__, __func__, __LINE__,                 \
+                         mask, &cdls, format, ## __VA_ARGS__);         \
+} while (0)
 # else /* !CDEBUG_ENABLED */
 static inline int cfs_cdebug_show(unsigned int mask, unsigned int subsystem)
 {
@@ -166,6 +176,7 @@ static inline int cfs_cdebug_show(unsigned int mask, unsigned int subsystem)
 #  define CDEBUG(mask, format, ...) (void)(0)
 #  define CDEBUG_LIMIT(mask, format, ...) (void)(0)
 #  define CDEBUG_LIMIT_LOC(file, func, line, mask, format, ...) (void)(0)
+#  define CDEBUG_SLOW(delay, mask, format, ...) (void)(0)
 #  warning "CDEBUG IS DISABLED. THIS SHOULD NEVER BE DONE FOR PRODUCTION!"
 # endif /* CDEBUG_ENABLED */
 
@@ -181,6 +192,11 @@ static inline int cfs_cdebug_show(unsigned int mask, unsigned int subsystem)
 #define CNETERR(format, a...)       CDEBUG_LIMIT(D_NETERROR, format, ## a)
 #define CEMERG(format, ...)         CDEBUG_LIMIT(D_EMERG, format, ## __VA_ARGS__)
 
+#define CWARN_SLOW(delay, format, ...)  CDEBUG_SLOW(delay, D_WARNING, format, \
+                  ## __VA_ARGS__)
+#define CERROR_SLOW(delay, format, ...) CDEBUG_SLOW(delay, D_ERROR, format, \
+                   ## __VA_ARGS__)
+
 #define LCONSOLE(mask, format, ...) CDEBUG(D_CONSOLE | (mask), format, ## __VA_ARGS__)
 #define LCONSOLE_INFO(format, ...)  CDEBUG_LIMIT(D_CONSOLE, format, ## __VA_ARGS__)
 #define LCONSOLE_WARN(format, ...)  CDEBUG_LIMIT(D_CONSOLE | D_WARNING, format, ## __VA_ARGS__)
index 1a0ce6f..583e540 100644 (file)
@@ -684,6 +684,13 @@ console:
        }
 
        if (cdls != NULL) {
+               /* avoid unlikely case of many errors between printing */
+               if (unlikely(cdls->cdls_count < 0 &&
+                            cdls->cdls_count >= -600)) {
+                       cdls->cdls_next = jiffies +
+                                   cfs_time_seconds(-cdls->cdls_count);
+                       cdls->cdls_count = 1;
+               }
                if (libcfs_console_ratelimit &&
                    cdls->cdls_next != 0 &&     /* not first time ever */
                    time_before(jiffies, cdls->cdls_next)) {
@@ -729,8 +736,7 @@ console:
                cfs_print_to_console(&header, file, fn,
                                     "Skipped %d previous similar message%s\n",
                                     cdls->cdls_count,
-                                    (cdls->cdls_count > 1) ? "s" : "");
-
+                                    cdls->cdls_count > 1 ? "s" : "");
                cdls->cdls_count = 0;
        }
 out:
index f2490c8..26fb2cd 100644 (file)
@@ -1086,9 +1086,10 @@ int target_handle_connect(struct ptlrpc_request *req)
        target = class_str2obd(str);
        if (!target) {
                deuuidify(str, NULL, &target_start, &target_len);
-               LCONSOLE_ERROR("%.*s: not available for connect from %s (no target). If you are running an HA pair check that the target is mounted on the other server.\n",
-                              target_len, target_start,
-                              libcfs_nidstr(&req->rq_peer.nid));
+               CERROR_SLOW(5,
+                           "%.*s: not available for connect from %s (no target). If you are running an HA pair check that the target is mounted on the other server.\n",
+                           target_len, target_start,
+                           libcfs_nidstr(&req->rq_peer.nid));
                GOTO(out, rc = -ENODEV);
        }
 
index 3795720..8cd8b63 100644 (file)
@@ -2354,6 +2354,7 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink)
        if (ptlrpc_console_allow(req, opc,
                                 lustre_msg_get_status(req->rq_reqmsg)))
                debug_mask = D_WARNING;
+       /* this message is used in replay-single test_200, DO NOT MODIFY */
        DEBUG_REQ(debug_mask, req, "Request sent has %s: [sent %lld/real %lld]",
                  req->rq_net_err ? "failed due to network error" :
                     ((req->rq_real_sent == 0 ||