From 07427ce089e33f7fd8d305c265e447b95b934477 Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Wed, 15 Jul 2020 22:38:52 -0500 Subject: [PATCH] LU-13569 lnet: Only recover known good peer NIs A peer NI should not be eligible for recovery if we've never received a message from it. Lustre-change: https://review.whamcloud.com/39719 Lustre-commit: 39a169cd02738a13866f3b88fbe3304dc20565d6 Test-Parameters: trivial HPE-bug-id: LUS-9109 Signed-off-by: Chris Horn Change-Id: Iec2fd015f6410ab91c6ef7c222cbed0204243106 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/54401 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Frank Sehr Reviewed-by: Cyril Bordage Reviewed-by: Andreas Dilger --- lnet/lnet/peer.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index a4e5966..20ac9f6c 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -4190,6 +4190,14 @@ lnet_peer_ni_add_to_recoveryq_locked(struct lnet_peer_ni *lpni, if (atomic_read(&lpni->lpni_healthv) == LNET_MAX_HEALTH_VALUE) return; + if (!lpni->lpni_last_alive) { + CDEBUG(D_NET, + "lpni %s(%p) not eligible for recovery last alive %lld\n", + libcfs_nid2str(lpni->lpni_nid), lpni, + lpni->lpni_last_alive); + return; + } + if (now > lpni->lpni_last_alive + lnet_recovery_limit) { CDEBUG(D_NET, "lpni %s aged out last alive %lld\n", libcfs_nid2str(lpni->lpni_nid), -- 1.8.3.1