From: Chris Horn Date: Thu, 6 Aug 2020 21:21:29 +0000 (-0500) Subject: LU-13895 lnet: Prevent discovery on deleted peer X-Git-Tag: 2.14.51~68 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=fd32cd817cba336c684fe3ab7aac79705061e8b5;hp=aa7de0af6969df77a896e3a2e90c971a5081e324 LU-13895 lnet: Prevent discovery on deleted peer We needn't perform any discovery activities on a peer that has had lnet_peer_del() called on it. Test-Parameters: trivial HPE-bug-id: LUS-9192 Signed-off-by: Chris Horn Change-Id: I5c89dc89038d2c8bf4d2a29029af7720963b81a2 Reviewed-on: https://review.whamcloud.com/39605 Reviewed-by: Serguei Smirnov Tested-by: jenkins Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 4fdcd87..b719414 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -969,6 +969,8 @@ lnet_peer_needs_push(struct lnet_peer *lp) { if (!(lp->lp_state & LNET_PEER_MULTI_RAIL)) return false; + if (lp->lp_state & LNET_PEER_MARK_DELETED) + return false; if (lp->lp_state & LNET_PEER_FORCE_PUSH) return true; if (lp->lp_state & LNET_PEER_NO_DISCOVERY) diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 22a1bdd..41565ee 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -764,6 +764,8 @@ struct lnet_peer { /* peer is marked for deletion */ #define LNET_PEER_MARK_DELETION BIT(18) +/* lnet_peer_del()/lnet_peer_del_locked() has been called on the peer */ +#define LNET_PEER_MARK_DELETED BIT(19) struct lnet_peer_net { /* chain on lp_peer_nets */ diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index ff2ce69..e3eb262 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -459,6 +459,10 @@ lnet_peer_del_locked(struct lnet_peer *peer) CDEBUG(D_NET, "peer %s\n", libcfs_nid2str(peer->lp_primary_nid)); + spin_lock(&peer->lp_lock); + peer->lp_state |= LNET_PEER_MARK_DELETED; + spin_unlock(&peer->lp_lock); + lpni = lnet_get_next_peer_ni_locked(peer, NULL, lpni); while (lpni != NULL) { lpni2 = lnet_get_next_peer_ni_locked(peer, NULL, lpni); @@ -471,9 +475,41 @@ lnet_peer_del_locked(struct lnet_peer *peer) return rc2; } +/* + * Discovering this peer is taking too long. Cancel any Ping or Push + * that discovery is waiting on by unlinking the relevant MDs. The + * lnet_discovery_event_handler() will proceed from here and complete + * the cleanup. + */ +static void lnet_peer_cancel_discovery(struct lnet_peer *lp) +{ + struct lnet_handle_md ping_mdh; + struct lnet_handle_md push_mdh; + + LNetInvalidateMDHandle(&ping_mdh); + LNetInvalidateMDHandle(&push_mdh); + + spin_lock(&lp->lp_lock); + if (lp->lp_state & LNET_PEER_PING_SENT) { + ping_mdh = lp->lp_ping_mdh; + LNetInvalidateMDHandle(&lp->lp_ping_mdh); + } + if (lp->lp_state & LNET_PEER_PUSH_SENT) { + push_mdh = lp->lp_push_mdh; + LNetInvalidateMDHandle(&lp->lp_push_mdh); + } + spin_unlock(&lp->lp_lock); + + if (!LNetMDHandleIsInvalid(ping_mdh)) + LNetMDUnlink(ping_mdh); + if (!LNetMDHandleIsInvalid(push_mdh)) + LNetMDUnlink(push_mdh); +} + static int lnet_peer_del(struct lnet_peer *peer) { + lnet_peer_cancel_discovery(peer); lnet_net_lock(LNET_LOCK_EX); lnet_peer_del_locked(peer); lnet_net_unlock(LNET_LOCK_EX); @@ -2970,6 +3006,10 @@ __must_hold(&lp->lp_lock) CDEBUG(D_NET, "peer %s(%p) state %#x\n", libcfs_nid2str(lp->lp_primary_nid), lp, lp->lp_state); + /* no-op if lnet_peer_del() has already been called on this peer */ + if (lp->lp_state & LNET_PEER_MARK_DELETED) + return 0; + if (the_lnet.ln_dc_state != LNET_DC_STATE_RUNNING) return -ESHUTDOWN; @@ -3402,37 +3442,6 @@ static void lnet_peer_discovery_error(struct lnet_peer *lp, int error) } /* - * Discovering this peer is taking too long. Cancel any Ping or Push - * that discovery is waiting on by unlinking the relevant MDs. The - * lnet_discovery_event_handler() will proceed from here and complete - * the cleanup. - */ -static void lnet_peer_cancel_discovery(struct lnet_peer *lp) -{ - struct lnet_handle_md ping_mdh; - struct lnet_handle_md push_mdh; - - LNetInvalidateMDHandle(&ping_mdh); - LNetInvalidateMDHandle(&push_mdh); - - spin_lock(&lp->lp_lock); - if (lp->lp_state & LNET_PEER_PING_SENT) { - ping_mdh = lp->lp_ping_mdh; - LNetInvalidateMDHandle(&lp->lp_ping_mdh); - } - if (lp->lp_state & LNET_PEER_PUSH_SENT) { - push_mdh = lp->lp_push_mdh; - LNetInvalidateMDHandle(&lp->lp_push_mdh); - } - spin_unlock(&lp->lp_lock); - - if (!LNetMDHandleIsInvalid(ping_mdh)) - LNetMDUnlink(ping_mdh); - if (!LNetMDHandleIsInvalid(push_mdh)) - LNetMDUnlink(push_mdh); -} - -/* * Wait for work to be queued or some other change that must be * attended to. Returns non-zero if the discovery thread should shut * down. @@ -3588,7 +3597,8 @@ static int lnet_peer_discovery(void *arg) CDEBUG(D_NET, "peer %s(%p) state %#x\n", libcfs_nid2str(lp->lp_primary_nid), lp, lp->lp_state); - if (lp->lp_state & LNET_PEER_MARK_DELETION) + if (lp->lp_state & (LNET_PEER_MARK_DELETION | + LNET_PEER_MARK_DELETED)) rc = lnet_peer_deletion(lp); else if (lp->lp_state & LNET_PEER_DATA_PRESENT) rc = lnet_peer_data_present(lp);