From 39a169cd02738a13866f3b88fbe3304dc20565d6 Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Wed, 15 Jul 2020 22:38:52 -0500 Subject: [PATCH] LU-13569 lnet: Only recover known good peer NIs A peer NI should not be eligible for recovery if we've never received a message from it. Test-Parameters: trivial HPE-bug-id: LUS-9109 Signed-off-by: Chris Horn Change-Id: Iec2fd015f6410ab91c6ef7c222cbed0204243106 Reviewed-on: https://review.whamcloud.com/39719 Reviewed-by: Serguei Smirnov Reviewed-by: James Simmons Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- lnet/lnet/peer.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index a1d2552..903bf78 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -4017,6 +4017,14 @@ lnet_peer_ni_add_to_recoveryq_locked(struct lnet_peer_ni *lpni, if (atomic_read(&lpni->lpni_healthv) == LNET_MAX_HEALTH_VALUE) return; + if (!lpni->lpni_last_alive) { + CDEBUG(D_NET, + "lpni %s(%p) not eligible for recovery last alive %lld\n", + libcfs_nid2str(lpni->lpni_nid), lpni, + lpni->lpni_last_alive); + return; + } + if (now > lpni->lpni_last_alive + lnet_recovery_limit) { CDEBUG(D_NET, "lpni %s aged out last alive %lld\n", libcfs_nid2str(lpni->lpni_nid), -- 1.8.3.1