Whamcloud - gitweb
LU-15512 lnet: Stop discovery on deleted peer NI 29/46429/2
authorChris Horn <chris.horn@hpe.com>
Wed, 2 Feb 2022 18:37:00 +0000 (18:37 +0000)
committerOleg Drokin <green@whamcloud.com>
Wed, 23 Feb 2022 17:14:55 +0000 (17:14 +0000)
lnet_discover_peer_locked() needs to check whether the peer NI that is
undergoing discovery has been deleted (i.e. its assocaited peer has
LNET_PEER_MARK_DELETED state). Otherwise, we may enter an infinite
loop because this peer will never be considered up to date.

Test-Parameters: trivial testlist=sanity-lnet
Fixes: fd32cd817c ("LU-13895 lnet: Prevent discovery on deleted peer")
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Change-Id: I43d276fc460241c1724c8e30913bb6c5cbb7c8f4
Reviewed-on: https://review.whamcloud.com/46429
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/lnet/peer.c
lustre/tests/sanity-lnet.sh

index ba2f84d..ad77384 100644 (file)
@@ -2593,6 +2593,8 @@ again:
                        break;
                if (lnet_peer_is_uptodate(lp))
                        break;
+               if (lp->lp_state & LNET_PEER_MARK_DELETED)
+                       break;
                lnet_peer_queue_for_discovery(lp);
                count++;
                CDEBUG(D_NET, "Discovery attempt # %d\n", count);
@@ -2637,7 +2639,9 @@ again:
                rc = lp->lp_dc_error;
        else if (!block)
                CDEBUG(D_NET, "non-blocking discovery\n");
-       else if (!lnet_peer_is_uptodate(lp) && !lnet_is_discovery_disabled(lp))
+       else if (!lnet_peer_is_uptodate(lp) &&
+                !(lnet_is_discovery_disabled(lp) ||
+                  (lp->lp_state & LNET_PEER_MARK_DELETED)))
                goto again;
 
        CDEBUG(D_NET, "peer %s NID %s: %d. %s\n",
index c2d6f34..aa52943 100755 (executable)
@@ -2402,6 +2402,27 @@ test_218() {
 }
 run_test 218 "Local recovery pings should exercise all available paths"
 
+test_219() {
+       reinit_dlc || return $?
+       add_net "tcp" "${INTERFACES[0]}" || return $?
+       add_net "tcp1" "${INTERFACES[0]}" || return $?
+
+       local nid1=$(lctl list_nids | head -n 1)
+       local nid2=$(lctl list_nids | tail --lines 1)
+
+       do_lnetctl ping $nid1 ||
+               error "Ping failed $?"
+       do_lnetctl ping $nid2 ||
+               error "Ping failed $?"
+
+       do_lnetctl discover $nid2 ||
+               error "Discovery failed"
+
+       $LNETCTL peer show --nid $nid1 | grep -q $nid2 ||
+               error "$nid2 is not listed under $nid1"
+}
+run_test 219 "Consolidate peer entries"
+
 test_230() {
        # LU-12815
        echo "Check valid values; Should succeed"