From 9ce04000fba07706c73b8adb3605c959e5b62712 Mon Sep 17 00:00:00 2001 From: Aurelien Degremont Date: Tue, 18 Jan 2022 13:55:01 +0000 Subject: [PATCH] LU-930 ptlrpc: clarify AT error message Clarify the error message related to passed deadline for AT early replies. It was indicating that the system was CPU bound which is most of the time wrong, as the issue is rather communication failure delaying RPC traffic. This could be confusing to people which will look for CPU resource consumption where the network traffic is more at cause. Also try to use less cryptic keywords which makes only sense to the feature developer, and not to admins. Test-Parameters: trivial Signed-off-by: Aurelien Degremont Change-Id: Icdff8f4c6fb9905233f6b8ed1b961b2fd1127667 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49548 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Yang Sheng Reviewed-by: Oleg Drokin --- lustre/ptlrpc/service.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 4135de3..4cda678 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -1622,12 +1622,11 @@ static int ptlrpc_at_check_timed(struct ptlrpc_service_part *svcpt) * We're already past request deadlines before we even get a * chance to send early replies */ - LCONSOLE_WARN("%s: This server is not able to keep up with request traffic (cpu-bound).\n", - svcpt->scp_service->srv_name); - CWARN("earlyQ=%d reqQ=%d recA=%d, svcEst=%d, delay=%lldms\n", - counter, svcpt->scp_nreqs_incoming, - svcpt->scp_nreqs_active, - at_get(&svcpt->scp_at_estimate), delay_ms); + LCONSOLE_WARN("'%s' is processing requests too slowly, client may timeout. Late by %ds, missed %d early replies (reqs waiting=%d active=%d, at_estimate=%d, delay=%lldms)\n", + svcpt->scp_service->srv_name, -first, counter, + svcpt->scp_nreqs_incoming, + svcpt->scp_nreqs_active, + at_get(&svcpt->scp_at_estimate), delay_ms); } /* -- 1.8.3.1