From e500f49c302c6f10ba3b701e83db4da2b4b68a11 Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Tue, 1 Nov 2022 13:39:39 -0600 Subject: [PATCH 1/1] LU-16451 kfilnd: Improve CQ error logging Improve CQ error logging for send events by printing the errno from the CQ event as well as the provider error. This should allow us to better root cause TN failures. Also remove an extra newline character. HPE-bug-id: LUS-11314 Test-Parameters: trivial Signed-off-by: Chris Horn Change-Id: I79bbe0312a9124dd34285d43b6e83f9d897923c1 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49589 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Ron Gredvig Reviewed-by: Ian Ziemba Reviewed-by: Oleg Drokin --- lnet/klnds/kfilnd/kfilnd_cq.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lnet/klnds/kfilnd/kfilnd_cq.c b/lnet/klnds/kfilnd/kfilnd_cq.c index d070afe..5b5678e 100644 --- a/lnet/klnds/kfilnd/kfilnd_cq.c +++ b/lnet/klnds/kfilnd/kfilnd_cq.c @@ -48,7 +48,7 @@ void kfilnd_cq_process_error(struct kfilnd_ep *ep, switch (error->flags) { case KFI_MSG | KFI_RECV: if (error->err != ECANCELED) { - KFILND_EP_ERROR(ep, "Dropping error receive event %d\n", + KFILND_EP_ERROR(ep, "Dropping error receive event %d", -error->err); return; } @@ -76,6 +76,10 @@ void kfilnd_cq_process_error(struct kfilnd_ep *ep, tn = error->op_context; tn_event = TN_EVENT_TX_FAIL; status = -error->err; + KFILND_EP_ERROR(ep, + "msg send error %d prov error %d flags %llx", + status, -error->prov_errno, error->flags); + break; case KFI_TAGGED | KFI_SEND: @@ -84,6 +88,9 @@ void kfilnd_cq_process_error(struct kfilnd_ep *ep, tn = error->op_context; tn_event = TN_EVENT_TAG_TX_FAIL; status = -error->err; + KFILND_EP_ERROR(ep, + "tagged error %d prov error %d flags %llx", + status, -error->prov_errno, error->flags); break; default: -- 1.8.3.1