From 94f4e1f517d71ffd6662fb4a82e3dee9aa8f6796 Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Wed, 2 Feb 2022 18:37:00 +0000 Subject: [PATCH] LU-15512 lnet: Stop discovery on deleted peer NI lnet_discover_peer_locked() needs to check whether the peer NI that is undergoing discovery has been deleted (i.e. its assocaited peer has LNET_PEER_MARK_DELETED state). Otherwise, we may enter an infinite loop because this peer will never be considered up to date. Test-Parameters: trivial testlist=sanity-lnet Fixes: fd32cd817c ("LU-13895 lnet: Prevent discovery on deleted peer") Signed-off-by: Chris Horn Change-Id: I43d276fc460241c1724c8e30913bb6c5cbb7c8f4 Reviewed-on: https://review.whamcloud.com/46429 Tested-by: jenkins Reviewed-by: Serguei Smirnov Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- lnet/lnet/peer.c | 6 +++++- lustre/tests/sanity-lnet.sh | 21 +++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index ba2f84d..ad77384 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -2593,6 +2593,8 @@ again: break; if (lnet_peer_is_uptodate(lp)) break; + if (lp->lp_state & LNET_PEER_MARK_DELETED) + break; lnet_peer_queue_for_discovery(lp); count++; CDEBUG(D_NET, "Discovery attempt # %d\n", count); @@ -2637,7 +2639,9 @@ again: rc = lp->lp_dc_error; else if (!block) CDEBUG(D_NET, "non-blocking discovery\n"); - else if (!lnet_peer_is_uptodate(lp) && !lnet_is_discovery_disabled(lp)) + else if (!lnet_peer_is_uptodate(lp) && + !(lnet_is_discovery_disabled(lp) || + (lp->lp_state & LNET_PEER_MARK_DELETED))) goto again; CDEBUG(D_NET, "peer %s NID %s: %d. %s\n", diff --git a/lustre/tests/sanity-lnet.sh b/lustre/tests/sanity-lnet.sh index c2d6f34..aa52943 100755 --- a/lustre/tests/sanity-lnet.sh +++ b/lustre/tests/sanity-lnet.sh @@ -2402,6 +2402,27 @@ test_218() { } run_test 218 "Local recovery pings should exercise all available paths" +test_219() { + reinit_dlc || return $? + add_net "tcp" "${INTERFACES[0]}" || return $? + add_net "tcp1" "${INTERFACES[0]}" || return $? + + local nid1=$(lctl list_nids | head -n 1) + local nid2=$(lctl list_nids | tail --lines 1) + + do_lnetctl ping $nid1 || + error "Ping failed $?" + do_lnetctl ping $nid2 || + error "Ping failed $?" + + do_lnetctl discover $nid2 || + error "Discovery failed" + + $LNETCTL peer show --nid $nid1 | grep -q $nid2 || + error "$nid2 is not listed under $nid1" +} +run_test 219 "Consolidate peer entries" + test_230() { # LU-12815 echo "Check valid values; Should succeed" -- 1.8.3.1