From f9d837b479232bfc4f271f23cd3729ca67cb6c1d Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Fri, 19 Mar 2021 13:22:26 -0500 Subject: [PATCH] LU-14540 o2iblnd: Use REMOTE_DROPPED for ECONNREFUSED ECONNREFUSED means that we received a response from the remote end, so setting the LNet health status to REMOTE_DROPPED is more appropriate than setting LOCAL_DROPPED. Using REMOTE_DROPPED will decrement the peer NI health and allow us to try other peer NIs for future sends. Decrementing the peer NI health will also result in routes being marked down, as appropriate, for cases where a router has refused the connection request. Test-Parameters: trivial HPE-bug-id: LUS-9853 Signed-off-by: Chris Horn Change-Id: I8190f5d78a76ec25553908c4f215362c0c2051fc Reviewed-on: https://review.whamcloud.com/42114 Tested-by: jenkins Reviewed-by: James Simmons Tested-by: Maloo Reviewed-by: Alexander Boyko Reviewed-by: Oleg Drokin --- lnet/klnds/o2iblnd/o2iblnd_cb.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index 90ba35b..e22f946 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -2208,10 +2208,11 @@ kiblnd_peer_connect_failed(struct kib_peer_ni *peer_ni, int active, int error) { LIST_HEAD(zombies); - unsigned long flags; + unsigned long flags; + enum lnet_msg_hstatus hstatus; - LASSERT (error != 0); - LASSERT (!in_interrupt()); + LASSERT(error != 0); + LASSERT(!in_interrupt()); write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); @@ -2254,12 +2255,20 @@ kiblnd_peer_connect_failed(struct kib_peer_ni *peer_ni, int active, CNETERR("Deleting messages for %s: connection failed\n", libcfs_nid2str(peer_ni->ibp_nid)); - if (error == -EHOSTUNREACH || error == -ETIMEDOUT) - kiblnd_txlist_done(&zombies, error, - LNET_MSG_STATUS_NETWORK_TIMEOUT); - else - kiblnd_txlist_done(&zombies, error, - LNET_MSG_STATUS_LOCAL_DROPPED); + switch (error) { + case -EHOSTUNREACH: + case -ETIMEDOUT: + hstatus = LNET_MSG_STATUS_NETWORK_TIMEOUT; + break; + case -ECONNREFUSED: + hstatus = LNET_MSG_STATUS_REMOTE_DROPPED; + break; + default: + hstatus = LNET_MSG_STATUS_LOCAL_DROPPED; + break; + } + + kiblnd_txlist_done(&zombies, error, hstatus); } static void -- 1.8.3.1