From: Chris Horn Date: Mon, 9 Sep 2019 17:54:08 +0000 (-0500) Subject: LU-12739 lnet: Don't queue msg when discovery has completed X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=0ae34155c59de68a1cf3b9c58cd539081ff851ac;p=fs%2Flustre-release.git LU-12739 lnet: Don't queue msg when discovery has completed In lnet_initiate_peer_discovery(), it is possible for the peer object to change after the call to lnet_discover_peer_locked(), and it is also possible for the peer to complete discovery between the first call to lnet_peer_is_uptodate() and our placing the lnet_msg onto the peer's lp_dc_pendq. After the call to lnet_discover_peer_locked() check whether the, potentially new, peer object is up to date while holding the lp_lock. If the peer is up to date, then we needn't queue the message. Otherwise, we continue to hold the lock to place the message on the peer's lp_dc_pendq. Lustre-change: https://review.whamcloud.com/36139 Lustre-commit: 4ef62976448d6821df9aab3e720fd8d9d0bdefce Test-Parameters: trivial testlist=sanity-lnet Cray-bug-id: LUS-7596 Signed-off-by: Chris Horn Change-Id: Ib3da7447588479bb35afcc3fe176b9120d915a89 --- diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 048531b..3e572d3 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -994,6 +994,7 @@ lnet_peer_ni_is_primary(struct lnet_peer_ni *lpni) } bool lnet_peer_is_uptodate(struct lnet_peer *lp); +bool lnet_peer_is_uptodate_locked(struct lnet_peer *lp); static inline bool lnet_peer_needs_push(struct lnet_peer *lp) diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index b178fd0..9bcc5cd 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -2574,9 +2574,9 @@ again: msg->msg_src_nid_param = src_nid; /* - * Now that we have a peer_ni, check if we want to discover - * the peer. Traffic to the LNET_RESERVED_PORTAL should not - * trigger discovery. + * If necessary, perform discovery on the peer that owns this peer_ni. + * Note, this can result in the ownership of this peer_ni changing + * to another peer object. */ peer = lpni->lpni_peer_net->lpn_peer; if (lnet_msg_discovery(msg) && !lnet_peer_is_uptodate(peer)) { @@ -2589,20 +2589,23 @@ again: } /* The peer may have changed. */ peer = lpni->lpni_peer_net->lpn_peer; - /* queue message and return */ - msg->msg_rtr_nid_param = rtr_nid; - msg->msg_sending = 0; spin_lock(&peer->lp_lock); - list_add_tail(&msg->msg_list, &peer->lp_dc_pendq); - spin_unlock(&peer->lp_lock); - lnet_peer_ni_decref_locked(lpni); - primary_nid = peer->lp_primary_nid; - lnet_net_unlock(cpt); + if (!lnet_peer_is_uptodate_locked(peer)) { + /* queue message and return */ + msg->msg_rtr_nid_param = rtr_nid; + msg->msg_sending = 0; + list_add_tail(&msg->msg_list, &peer->lp_dc_pendq); + lnet_peer_ni_decref_locked(lpni); + primary_nid = peer->lp_primary_nid; + spin_unlock(&peer->lp_lock); + lnet_net_unlock(cpt); - CDEBUG(D_NET, "%s pending discovery\n", - libcfs_nid2str(primary_nid)); + CDEBUG(D_NET, "%s pending discovery\n", + libcfs_nid2str(primary_nid)); - return LNET_DC_WAIT; + return LNET_DC_WAIT; + } + spin_unlock(&peer->lp_lock); } lnet_peer_ni_decref_locked(lpni); diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index ea70e38..27aa94b 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -1744,6 +1744,17 @@ out_mutex_unlock: return lpni; } +bool +lnet_peer_is_uptodate(struct lnet_peer *lp) +{ + bool rc; + + spin_lock(&lp->lp_lock); + rc = lnet_peer_is_uptodate_locked(lp); + spin_unlock(&lp->lp_lock); + return rc; +} + /* * Is a peer uptodate from the point of view of discovery? * @@ -1753,11 +1764,11 @@ out_mutex_unlock: * Otherwise look at whether the peer needs rediscovering. */ bool -lnet_peer_is_uptodate(struct lnet_peer *lp) +lnet_peer_is_uptodate_locked(struct lnet_peer *lp) +__must_hold(&lp->lp_lock) { bool rc; - spin_lock(&lp->lp_lock); if (lp->lp_state & (LNET_PEER_DISCOVERING | LNET_PEER_FORCE_PING | LNET_PEER_FORCE_PUSH)) { @@ -1779,7 +1790,6 @@ lnet_peer_is_uptodate(struct lnet_peer *lp) } else { rc = false; } - spin_unlock(&lp->lp_lock); return rc; }