Whamcloud - gitweb
LU-16149 lnet: Discovery queue and deletion race 32/48532/3
authorChris Horn <chris.horn@hpe.com>
Mon, 12 Sep 2022 21:09:38 +0000 (15:09 -0600)
committerOleg Drokin <green@whamcloud.com>
Tue, 25 Oct 2022 17:24:24 +0000 (17:24 +0000)
lnet_peer_deletion() can race with another thread calling
lnet_peer_queue_for_discovery.

Discovery thread:
 - Calls lnet_peer_deletion():
 - LNET_PEER_DISCOVERING bit is cleared from lnet_peer::lp_state
 - releases lnet_peer::lp_lock

Another thread:
 - Acquires lnet_net_lock/EX
 - Calls lnet_peer_queue_for_discovery()
 - Takes lnet_peer::lp_lock
 - Sets LNET_PEER_DISCOVERING bit
 - Releases lnet_peer::lp_lock
 - Sees lnet_peer::lp_dc_list is not empty, so it does not add peer
   to dc request queue
 - lnet_peer_queue_for_discovery() returns, lnet_net_lock/EX releases

Discovery thread:
 - Acquires lnet_net_lock/EX
 - Deletes peer from ln_dc_working list
 - performs the peer deletion

At this point, the peer is not on any discovery list, and it has
LNET_PEER_DISCOVERING bit set. This peer is now stranded, and any
messages on the peer's lnet_peer::lp_dc_pendq are likewise stranded.

To solve this, we modify lnet_peer_deletion() so that it waits to
clear the LNET_PEER_DISCOVERING bit until it has completed deleting
the peer and re-acquired the lnet_peer::lp_lock. This ensures we
cannot race with any other thread that may add the
LNET_PEER_DISCOVERING bit back to the peer. We also avoid deleting
the peer from the ln_dc_working list in lnet_peer_deletion(). This is
already done by lnet_peer_discovery_complete().

There is another window where the LNET_PEER_DISCOVERING bit can be
added when the discovery thread drops the lp_lock just before
acquiring the net_lock/EX and calling lnet_peer_discovery_complete().
Have lnet_peer_discovery_complete() clear LNET_PEER_DISCOVERING to
deal with this (it already does this for the case where discovery hit
an error). Also move the deletion of lp_dc_list to after we clear the
DISCOVERING bit. This is to mirror the behavior of
lnet_peer_queue_for_discovery() which sets the DISCOVERING bit and
then manipulates the lp_dc_list.

Also tweak the logic in lnet_peer_deletion() to call
lnet_peer_del_locked() in order to avoid extra calls to
lnet_net_lock()/lnet_net_unlock().

HPE-bug-id: LUS-11237
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Change-Id: Ifcfef1d49f216af4ddfcdaf928024e8ee3952555
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/48532
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/lnet/peer.c

index 1650b9f..00bbe87 100644 (file)
@@ -2216,15 +2216,19 @@ static void lnet_peer_discovery_complete(struct lnet_peer *lp, int dc_error)
        CDEBUG(D_NET, "Discovery complete. Dequeue peer %s\n",
               libcfs_nidstr(&lp->lp_primary_nid));
 
-       list_del_init(&lp->lp_dc_list);
        spin_lock(&lp->lp_lock);
+       /* Our caller dropped lp_lock which may have allowed another thread to
+        * set LNET_PEER_DISCOVERING, or it may be set if dc_error is non-zero.
+        * Ensure it is cleared.
+        */
+       lp->lp_state &= ~LNET_PEER_DISCOVERING;
        if (dc_error) {
                lp->lp_dc_error = dc_error;
-               lp->lp_state &= ~LNET_PEER_DISCOVERING;
                lp->lp_state |= LNET_PEER_REDISCOVER;
        }
        list_splice_init(&lp->lp_dc_pendq, &pending_msgs);
        spin_unlock(&lp->lp_lock);
+       list_del_init(&lp->lp_dc_list);
        wake_up(&lp->lp_dc_waitq);
 
        if (lp->lp_rtr_refcount > 0)
@@ -3167,18 +3171,16 @@ __must_hold(&lp->lp_lock)
        struct list_head rlist;
        struct lnet_route *route, *tmp;
        int sensitivity = lp->lp_health_sensitivity;
-       int rc;
+       int rc = 0;
 
        INIT_LIST_HEAD(&rlist);
 
-       lp->lp_state &= ~(LNET_PEER_DISCOVERING | LNET_PEER_FORCE_PING |
-                         LNET_PEER_FORCE_PUSH);
        CDEBUG(D_NET, "peer %s(%p) state %#x\n",
               libcfs_nidstr(&lp->lp_primary_nid), lp, lp->lp_state);
 
        /* no-op if lnet_peer_del() has already been called on this peer */
        if (lp->lp_state & LNET_PEER_MARK_DELETED)
-               return 0;
+               goto clear_discovering;
 
        spin_unlock(&lp->lp_lock);
 
@@ -3187,28 +3189,25 @@ __must_hold(&lp->lp_lock)
            the_lnet.ln_dc_state != LNET_DC_STATE_RUNNING) {
                mutex_unlock(&the_lnet.ln_api_mutex);
                spin_lock(&lp->lp_lock);
-               return -ESHUTDOWN;
+               rc = -ESHUTDOWN;
+               goto clear_discovering;
        }
 
+       lnet_peer_cancel_discovery(lp);
        lnet_net_lock(LNET_LOCK_EX);
-       /* remove the peer from the discovery work
-        * queue if it's on there in preparation
-        * of deleting it.
-        */
-       if (!list_empty(&lp->lp_dc_list))
-               list_del_init(&lp->lp_dc_list);
        list_for_each_entry_safe(route, tmp,
                                 &lp->lp_routes,
                                 lr_gwlist)
                lnet_move_route(route, NULL, &rlist);
-       lnet_net_unlock(LNET_LOCK_EX);
 
-       /* lnet_peer_del() deletes all the peer NIs owned by this peer */
-       rc = lnet_peer_del(lp);
+       /* lnet_peer_del_locked() deletes all the peer NIs owned by this peer */
+       rc = lnet_peer_del_locked(lp);
        if (rc)
                CNETERR("Internal error: Unable to delete peer %s rc %d\n",
                        libcfs_nidstr(&lp->lp_primary_nid), rc);
 
+       lnet_net_unlock(LNET_LOCK_EX);
+
        list_for_each_entry_safe(route, tmp,
                                 &rlist, lr_list) {
                /* re-add these routes */
@@ -3224,7 +3223,13 @@ __must_hold(&lp->lp_lock)
 
        spin_lock(&lp->lp_lock);
 
-       return 0;
+       rc = 0;
+
+clear_discovering:
+       lp->lp_state &= ~(LNET_PEER_DISCOVERING | LNET_PEER_FORCE_PING |
+                         LNET_PEER_FORCE_PUSH);
+
+       return rc;
 }
 
 /*