From: Chris Horn Date: Thu, 6 Aug 2020 21:21:29 +0000 (-0500) Subject: LU-13895 lnet: Prevent discovery on deleted peer X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=d393b4d2b5e92d35e704f953157a0d8ffbbbdb52;p=fs%2Flustre-release.git LU-13895 lnet: Prevent discovery on deleted peer We needn't perform any discovery activities on a peer that has had lnet_peer_del() called on it. Lustre-change: https://review.whamcloud.com/39605 Lustre-commit: fd32cd817cba336c684fe3ab7aac79705061e8b5 Test-Parameters: trivial testlist=sanity-lnet HPE-bug-id: LUS-9192 Signed-off-by: Chris Horn Change-Id: I5c89dc89038d2c8bf4d2a29029af7720963b81a2 Reviewed-on: https://review.whamcloud.com/48292 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Frank Sehr Reviewed-by: Andreas Dilger --- diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index e79e53d..81e7bb1 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -961,6 +961,8 @@ lnet_peer_needs_push(struct lnet_peer *lp) { if (!(lp->lp_state & LNET_PEER_MULTI_RAIL)) return false; + if (lp->lp_state & LNET_PEER_MARK_DELETED) + return false; if (lp->lp_state & LNET_PEER_FORCE_PUSH) return true; if (lp->lp_state & LNET_PEER_NO_DISCOVERY) diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 0d1d00d..12d29bd 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -749,6 +749,8 @@ struct lnet_peer { /* peer is marked for deletion */ #define LNET_PEER_MARK_DELETION BIT(18) +/* lnet_peer_del()/lnet_peer_del_locked() has been called on the peer */ +#define LNET_PEER_MARK_DELETED BIT(19) struct lnet_peer_net { /* chain on lp_peer_nets */ diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 4eb4e3a..56c20667 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -455,6 +455,10 @@ lnet_peer_del_locked(struct lnet_peer *peer) CDEBUG(D_NET, "peer %s\n", libcfs_nid2str(peer->lp_primary_nid)); + spin_lock(&peer->lp_lock); + peer->lp_state |= LNET_PEER_MARK_DELETED; + spin_unlock(&peer->lp_lock); + lpni = lnet_get_next_peer_ni_locked(peer, NULL, lpni); while (lpni != NULL) { lpni2 = lnet_get_next_peer_ni_locked(peer, NULL, lpni); @@ -467,9 +471,41 @@ lnet_peer_del_locked(struct lnet_peer *peer) return rc2; } +/* + * Discovering this peer is taking too long. Cancel any Ping or Push + * that discovery is waiting on by unlinking the relevant MDs. The + * lnet_discovery_event_handler() will proceed from here and complete + * the cleanup. + */ +static void lnet_peer_cancel_discovery(struct lnet_peer *lp) +{ + struct lnet_handle_md ping_mdh; + struct lnet_handle_md push_mdh; + + LNetInvalidateMDHandle(&ping_mdh); + LNetInvalidateMDHandle(&push_mdh); + + spin_lock(&lp->lp_lock); + if (lp->lp_state & LNET_PEER_PING_SENT) { + ping_mdh = lp->lp_ping_mdh; + LNetInvalidateMDHandle(&lp->lp_ping_mdh); + } + if (lp->lp_state & LNET_PEER_PUSH_SENT) { + push_mdh = lp->lp_push_mdh; + LNetInvalidateMDHandle(&lp->lp_push_mdh); + } + spin_unlock(&lp->lp_lock); + + if (!LNetMDHandleIsInvalid(ping_mdh)) + LNetMDUnlink(ping_mdh); + if (!LNetMDHandleIsInvalid(push_mdh)) + LNetMDUnlink(push_mdh); +} + static int lnet_peer_del(struct lnet_peer *peer) { + lnet_peer_cancel_discovery(peer); lnet_net_lock(LNET_LOCK_EX); lnet_peer_del_locked(peer); lnet_net_unlock(LNET_LOCK_EX); @@ -2841,6 +2877,10 @@ __must_hold(&lp->lp_lock) CDEBUG(D_NET, "peer %s(%p) state %#x\n", libcfs_nid2str(lp->lp_primary_nid), lp, lp->lp_state); + /* no-op if lnet_peer_del() has already been called on this peer */ + if (lp->lp_state & LNET_PEER_MARK_DELETED) + return 0; + if (the_lnet.ln_dc_state != LNET_DC_STATE_RUNNING) return -ESHUTDOWN; @@ -3273,37 +3313,6 @@ static void lnet_peer_discovery_error(struct lnet_peer *lp, int error) } /* - * Discovering this peer is taking too long. Cancel any Ping or Push - * that discovery is waiting on by unlinking the relevant MDs. The - * lnet_discovery_event_handler() will proceed from here and complete - * the cleanup. - */ -static void lnet_peer_cancel_discovery(struct lnet_peer *lp) -{ - struct lnet_handle_md ping_mdh; - struct lnet_handle_md push_mdh; - - LNetInvalidateMDHandle(&ping_mdh); - LNetInvalidateMDHandle(&push_mdh); - - spin_lock(&lp->lp_lock); - if (lp->lp_state & LNET_PEER_PING_SENT) { - ping_mdh = lp->lp_ping_mdh; - LNetInvalidateMDHandle(&lp->lp_ping_mdh); - } - if (lp->lp_state & LNET_PEER_PUSH_SENT) { - push_mdh = lp->lp_push_mdh; - LNetInvalidateMDHandle(&lp->lp_push_mdh); - } - spin_unlock(&lp->lp_lock); - - if (!LNetMDHandleIsInvalid(ping_mdh)) - LNetMDUnlink(ping_mdh); - if (!LNetMDHandleIsInvalid(push_mdh)) - LNetMDUnlink(push_mdh); -} - -/* * Wait for work to be queued or some other change that must be * attended to. Returns non-zero if the discovery thread should shut * down. @@ -3459,7 +3468,8 @@ static int lnet_peer_discovery(void *arg) CDEBUG(D_NET, "peer %s(%p) state %#x\n", libcfs_nid2str(lp->lp_primary_nid), lp, lp->lp_state); - if (lp->lp_state & LNET_PEER_MARK_DELETION) + if (lp->lp_state & (LNET_PEER_MARK_DELETION | + LNET_PEER_MARK_DELETED)) rc = lnet_peer_deletion(lp); else if (lp->lp_state & LNET_PEER_DATA_PRESENT) rc = lnet_peer_data_present(lp);