From a966b624ac76e34e8ed28c6980c3f58cb441eeb0 Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Mon, 12 Sep 2022 15:09:38 -0600 Subject: [PATCH] LU-16149 lnet: Discovery queue and deletion race lnet_peer_deletion() can race with another thread calling lnet_peer_queue_for_discovery. Discovery thread: - Calls lnet_peer_deletion(): - LNET_PEER_DISCOVERING bit is cleared from lnet_peer::lp_state - releases lnet_peer::lp_lock Another thread: - Acquires lnet_net_lock/EX - Calls lnet_peer_queue_for_discovery() - Takes lnet_peer::lp_lock - Sets LNET_PEER_DISCOVERING bit - Releases lnet_peer::lp_lock - Sees lnet_peer::lp_dc_list is not empty, so it does not add peer to dc request queue - lnet_peer_queue_for_discovery() returns, lnet_net_lock/EX releases Discovery thread: - Acquires lnet_net_lock/EX - Deletes peer from ln_dc_working list - performs the peer deletion At this point, the peer is not on any discovery list, and it has LNET_PEER_DISCOVERING bit set. This peer is now stranded, and any messages on the peer's lnet_peer::lp_dc_pendq are likewise stranded. To solve this, we modify lnet_peer_deletion() so that it waits to clear the LNET_PEER_DISCOVERING bit until it has completed deleting the peer and re-acquired the lnet_peer::lp_lock. This ensures we cannot race with any other thread that may add the LNET_PEER_DISCOVERING bit back to the peer. We also avoid deleting the peer from the ln_dc_working list in lnet_peer_deletion(). This is already done by lnet_peer_discovery_complete(). There is another window where the LNET_PEER_DISCOVERING bit can be added when the discovery thread drops the lp_lock just before acquiring the net_lock/EX and calling lnet_peer_discovery_complete(). Have lnet_peer_discovery_complete() clear LNET_PEER_DISCOVERING to deal with this (it already does this for the case where discovery hit an error). Also move the deletion of lp_dc_list to after we clear the DISCOVERING bit. This is to mirror the behavior of lnet_peer_queue_for_discovery() which sets the DISCOVERING bit and then manipulates the lp_dc_list. Also tweak the logic in lnet_peer_deletion() to call lnet_peer_del_locked() in order to avoid extra calls to lnet_net_lock()/lnet_net_unlock(). HPE-bug-id: LUS-11237 Signed-off-by: Chris Horn Change-Id: Ifcfef1d49f216af4ddfcdaf928024e8ee3952555 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/48532 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Frank Sehr Reviewed-by: Cyril Bordage Reviewed-by: Oleg Drokin --- lnet/lnet/peer.c | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 1650b9f..00bbe87 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -2216,15 +2216,19 @@ static void lnet_peer_discovery_complete(struct lnet_peer *lp, int dc_error) CDEBUG(D_NET, "Discovery complete. Dequeue peer %s\n", libcfs_nidstr(&lp->lp_primary_nid)); - list_del_init(&lp->lp_dc_list); spin_lock(&lp->lp_lock); + /* Our caller dropped lp_lock which may have allowed another thread to + * set LNET_PEER_DISCOVERING, or it may be set if dc_error is non-zero. + * Ensure it is cleared. + */ + lp->lp_state &= ~LNET_PEER_DISCOVERING; if (dc_error) { lp->lp_dc_error = dc_error; - lp->lp_state &= ~LNET_PEER_DISCOVERING; lp->lp_state |= LNET_PEER_REDISCOVER; } list_splice_init(&lp->lp_dc_pendq, &pending_msgs); spin_unlock(&lp->lp_lock); + list_del_init(&lp->lp_dc_list); wake_up(&lp->lp_dc_waitq); if (lp->lp_rtr_refcount > 0) @@ -3167,18 +3171,16 @@ __must_hold(&lp->lp_lock) struct list_head rlist; struct lnet_route *route, *tmp; int sensitivity = lp->lp_health_sensitivity; - int rc; + int rc = 0; INIT_LIST_HEAD(&rlist); - lp->lp_state &= ~(LNET_PEER_DISCOVERING | LNET_PEER_FORCE_PING | - LNET_PEER_FORCE_PUSH); CDEBUG(D_NET, "peer %s(%p) state %#x\n", libcfs_nidstr(&lp->lp_primary_nid), lp, lp->lp_state); /* no-op if lnet_peer_del() has already been called on this peer */ if (lp->lp_state & LNET_PEER_MARK_DELETED) - return 0; + goto clear_discovering; spin_unlock(&lp->lp_lock); @@ -3187,28 +3189,25 @@ __must_hold(&lp->lp_lock) the_lnet.ln_dc_state != LNET_DC_STATE_RUNNING) { mutex_unlock(&the_lnet.ln_api_mutex); spin_lock(&lp->lp_lock); - return -ESHUTDOWN; + rc = -ESHUTDOWN; + goto clear_discovering; } + lnet_peer_cancel_discovery(lp); lnet_net_lock(LNET_LOCK_EX); - /* remove the peer from the discovery work - * queue if it's on there in preparation - * of deleting it. - */ - if (!list_empty(&lp->lp_dc_list)) - list_del_init(&lp->lp_dc_list); list_for_each_entry_safe(route, tmp, &lp->lp_routes, lr_gwlist) lnet_move_route(route, NULL, &rlist); - lnet_net_unlock(LNET_LOCK_EX); - /* lnet_peer_del() deletes all the peer NIs owned by this peer */ - rc = lnet_peer_del(lp); + /* lnet_peer_del_locked() deletes all the peer NIs owned by this peer */ + rc = lnet_peer_del_locked(lp); if (rc) CNETERR("Internal error: Unable to delete peer %s rc %d\n", libcfs_nidstr(&lp->lp_primary_nid), rc); + lnet_net_unlock(LNET_LOCK_EX); + list_for_each_entry_safe(route, tmp, &rlist, lr_list) { /* re-add these routes */ @@ -3224,7 +3223,13 @@ __must_hold(&lp->lp_lock) spin_lock(&lp->lp_lock); - return 0; + rc = 0; + +clear_discovering: + lp->lp_state &= ~(LNET_PEER_DISCOVERING | LNET_PEER_FORCE_PING | + LNET_PEER_FORCE_PUSH); + + return rc; } /* -- 1.8.3.1