From 6e5909b72ff0b21a328c0aefbab931033f539eb7 Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Tue, 25 Oct 2022 13:21:17 -0600 Subject: [PATCH] LU-16450 kfilnd: Cancel TNs if handshake fails When sending a message to a new peer a HELLO is sent first and the original message waits for the handshake to complete. If the HELLO fails to be sent then the original message will continue to wait for the full LND timeout. When we retry the original message we should check whether there is actually an outstanding HELLO. If not, then this indicates the HELLO failed and we should cancel the TN. HPE-bug-id: LUS-11310 Test-Parameters: trivial Signed-off-by: Chris Horn Change-Id: I4ed07964d5af0bcc3bdca33c1ea46fd436af2e98 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49590 Reviewed-by: Ron Gredvig Reviewed-by: Ian Ziemba Reviewed-by: Oleg Drokin Tested-by: jenkins Tested-by: Maloo --- lnet/klnds/kfilnd/kfilnd_tn.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/lnet/klnds/kfilnd/kfilnd_tn.c b/lnet/klnds/kfilnd/kfilnd_tn.c index 505577e..9666d85 100644 --- a/lnet/klnds/kfilnd/kfilnd_tn.c +++ b/lnet/klnds/kfilnd/kfilnd_tn.c @@ -634,6 +634,20 @@ static int kfilnd_tn_state_idle(struct kfilnd_transaction *tn, KFILND_TN_DEBUG(tn, "Dropping message to stale peer %s\n", libcfs_nid2str(tn->tn_kp->kp_nid)); + } else if (kfilnd_peer_needs_hello(tn->tn_kp, false)) { + /* This transaction was setup against a new peer, which + * implies a HELLO was sent. If a HELLO is no longer + * in flight then that means it has failed, and we + * should cancel this TN. Otherwise we are stuck + * waiting for the TN deadline. + * + * We assign NETWORK_TIMEOUT health status below because + * we do not know why the HELLO failed. + */ + rc = -ECANCELED; + KFILND_TN_DEBUG(tn, + "Peer is new but there is no outstanding hello %s\n", + libcfs_nid2str(tn->tn_kp->kp_nid)); } else if (ktime_after(tn->deadline, ktime_get_seconds())) { /* If transaction deadline has not been met, return * -EAGAIN. This will cause this transaction event to be -- 1.8.3.1