From 143893381d428466d4c71e075a041a9cbbd28818 Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Wed, 27 Jan 2021 12:22:09 -0600 Subject: [PATCH] LU-14661 lnet: Check if discovery toggled off in ping reply If a peer is initially discovered and found to have discovery enabled, but the peer later reloads LNet with discovery disabled, then we can delete the peer and re-create it the next time the peer is discovered. It is safe to delete and re-create the peer as long as it wasn't configured manually. In lnet_peer_deletion(), we need to use lnet_del_init() when removing the peer from the discovery queue because the lnet_peer_del() code path can result in a call to lnet_peer_queue_for_discovery() where we check if the lp_dc_list is empty. Test-Parameters: trivial HPE-bug-id: LUS-9178 Fixes: aa7de0af69 ("LU-13895 lnet: Prevent discovery on peer marked deletion") Signed-off-by: Chris Horn Change-Id: I0b43d7541711a3b94c492082d4a29487ebe72b09 Reviewed-on: https://review.whamcloud.com/43508 Reviewed-by: Serguei Smirnov Tested-by: jenkins Tested-by: Maloo Reviewed-by: Alexander Boyko Reviewed-by: Oleg Drokin --- lnet/lnet/peer.c | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 155419d..92701fa 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -2565,20 +2565,41 @@ lnet_discovery_event_reply(struct lnet_peer *lp, struct lnet_event *ev) goto out; } - /* * The peer may have discovery disabled at its end. Set * NO_DISCOVERY as appropriate. */ - if ((pbuf->pb_info.pi_features & LNET_PING_FEAT_DISCOVERY) && - !lnet_peer_discovery_disabled) { - CDEBUG(D_NET, "Peer %s has discovery enabled\n", - libcfs_nid2str(lp->lp_primary_nid)); - lp->lp_state &= ~LNET_PEER_NO_DISCOVERY; - } else { + if (!(pbuf->pb_info.pi_features & LNET_PING_FEAT_DISCOVERY) || + lnet_peer_discovery_disabled) { CDEBUG(D_NET, "Peer %s has discovery disabled\n", libcfs_nid2str(lp->lp_primary_nid)); + + /* Detect whether this peer has toggled discovery from on to + * off and whether we can delete and re-create the peer. Peers + * that were manually configured cannot be deleted by discovery. + * We need to delete this peer and re-create it if the peer was + * not configured manually, is currently considered DD capable, + * and either: + * 1. We've already discovered the peer (the peer has toggled + * the discovery feature from on to off), or + * 2. The peer is considered MR, but it was not user configured + * (this was a "temporary" peer created via the kernel APIs + * that we're discovering for the first time) + */ + if (!(lp->lp_state & (LNET_PEER_CONFIGURED | + LNET_PEER_NO_DISCOVERY)) && + (lp->lp_state & (LNET_PEER_DISCOVERED | + LNET_PEER_MULTI_RAIL))) { + CDEBUG(D_NET, "Marking %s:0x%x for deletion\n", + libcfs_nid2str(lp->lp_primary_nid), + lp->lp_state); + lp->lp_state |= LNET_PEER_MARK_DELETION; + } lp->lp_state |= LNET_PEER_NO_DISCOVERY; + } else { + CDEBUG(D_NET, "Peer %s has discovery enabled\n", + libcfs_nid2str(lp->lp_primary_nid)); + lp->lp_state &= ~LNET_PEER_NO_DISCOVERY; } /* @@ -3099,7 +3120,7 @@ __must_hold(&lp->lp_lock) * of deleting it. */ if (!list_empty(&lp->lp_dc_list)) - list_del(&lp->lp_dc_list); + list_del_init(&lp->lp_dc_list); list_for_each_entry_safe(route, tmp, &lp->lp_routes, lr_gwlist) -- 1.8.3.1