Whamcloud - gitweb
LU-13362 lnet: Disc reply race with finalize and routed recv 37/37937/2
authorChris Horn <hornc@cray.com>
Fri, 13 Mar 2020 19:34:23 +0000 (14:34 -0500)
committerOleg Drokin <green@whamcloud.com>
Tue, 24 Mar 2020 05:16:25 +0000 (05:16 +0000)
A race exists between a thread handling a discovery reply, and
another thread in the lnet_finalize() call path, or in any of
the code paths that result in lnet_post_routed_recv_locked().

The discovery reply handler takes the lp_lock, and while holding
that lock, tries to acquire the lpni_lock for each lpni associated
with the lnet_peer object.

In lnet_return_rx_credits_locked() (lnet_finalize() code path) and
lnet_post_routed_recv_locked() (called via a couple different code
paths) the lpni_lock is taken, and then the lp_lock is taken for the
associated lnet_peer object.

Thread A: spin_lock(lp_lock)
Thread B: spin_lock(lpni_lock)
Thread B: spin_lock(lp_lock)
Thread A: spin_lock(lpni_lock)

This results in deadlock. The lp_lock and lpni_lock do not need to be
held at the same time in lnet_return_rx_credits_locked() nor in
lnet_post_routed_recv_locked().

Cray-bug-id: LUS-8607
Signed-off-by: Chris Horn <hornc@cray.com>
Change-Id: Ie4e9a172b4705d9f5723a6da1ff251b380ad47ac
Reviewed-on: https://review.whamcloud.com/37937
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Amir Shehata <ashehata@whamcloud.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/lnet/lib-move.c

index ffcebb7..d74cfcc 100644 (file)
@@ -1012,8 +1012,6 @@ lnet_post_routed_recv_locked(struct lnet_msg *msg, int do_recv)
        if (!msg->msg_peerrtrcredit) {
                /* lpni_lock protects the credit manipulation */
                spin_lock(&lpni->lpni_lock);
        if (!msg->msg_peerrtrcredit) {
                /* lpni_lock protects the credit manipulation */
                spin_lock(&lpni->lpni_lock);
-               /* lp_lock protects the lp_rtrq */
-               spin_lock(&lp->lp_lock);
 
                msg->msg_peerrtrcredit = 1;
                lpni->lpni_rtrcredits--;
 
                msg->msg_peerrtrcredit = 1;
                lpni->lpni_rtrcredits--;
@@ -1021,15 +1019,16 @@ lnet_post_routed_recv_locked(struct lnet_msg *msg, int do_recv)
                        lpni->lpni_minrtrcredits = lpni->lpni_rtrcredits;
 
                if (lpni->lpni_rtrcredits < 0) {
                        lpni->lpni_minrtrcredits = lpni->lpni_rtrcredits;
 
                if (lpni->lpni_rtrcredits < 0) {
+                       spin_unlock(&lpni->lpni_lock);
                        /* must have checked eager_recv before here */
                        LASSERT(msg->msg_rx_ready_delay);
                        msg->msg_rx_delayed = 1;
                        /* must have checked eager_recv before here */
                        LASSERT(msg->msg_rx_ready_delay);
                        msg->msg_rx_delayed = 1;
+                       /* lp_lock protects the lp_rtrq */
+                       spin_lock(&lp->lp_lock);
                        list_add_tail(&msg->msg_list, &lp->lp_rtrq);
                        spin_unlock(&lp->lp_lock);
                        list_add_tail(&msg->msg_list, &lp->lp_rtrq);
                        spin_unlock(&lp->lp_lock);
-                       spin_unlock(&lpni->lpni_lock);
                        return LNET_CREDIT_WAIT;
                }
                        return LNET_CREDIT_WAIT;
                }
-               spin_unlock(&lp->lp_lock);
                spin_unlock(&lpni->lpni_lock);
        }
 
                spin_unlock(&lpni->lpni_lock);
        }
 
@@ -1256,15 +1255,15 @@ routing_off:
                LASSERT(rxpeerni->lpni_peer_net);
                LASSERT(rxpeerni->lpni_peer_net->lpn_peer);
 
                LASSERT(rxpeerni->lpni_peer_net);
                LASSERT(rxpeerni->lpni_peer_net->lpn_peer);
 
-               lp = rxpeerni->lpni_peer_net->lpn_peer;
-
                /* give back peer router credits */
                msg->msg_peerrtrcredit = 0;
 
                spin_lock(&rxpeerni->lpni_lock);
                /* give back peer router credits */
                msg->msg_peerrtrcredit = 0;
 
                spin_lock(&rxpeerni->lpni_lock);
-               spin_lock(&lp->lp_lock);
-
                rxpeerni->lpni_rtrcredits++;
                rxpeerni->lpni_rtrcredits++;
+               spin_unlock(&rxpeerni->lpni_lock);
+
+               lp = rxpeerni->lpni_peer_net->lpn_peer;
+               spin_lock(&lp->lp_lock);
 
                /* drop all messages which are queued to be routed on that
                 * peer. */
 
                /* drop all messages which are queued to be routed on that
                 * peer. */
@@ -1272,7 +1271,6 @@ routing_off:
                        LIST_HEAD(drop);
                        list_splice_init(&lp->lp_rtrq, &drop);
                        spin_unlock(&lp->lp_lock);
                        LIST_HEAD(drop);
                        list_splice_init(&lp->lp_rtrq, &drop);
                        spin_unlock(&lp->lp_lock);
-                       spin_unlock(&rxpeerni->lpni_lock);
                        lnet_drop_routed_msgs_locked(&drop, msg->msg_rx_cpt);
                } else if (!list_empty(&lp->lp_rtrq)) {
                        int msg2_cpt;
                        lnet_drop_routed_msgs_locked(&drop, msg->msg_rx_cpt);
                } else if (!list_empty(&lp->lp_rtrq)) {
                        int msg2_cpt;
@@ -1282,7 +1280,6 @@ routing_off:
                        list_del(&msg2->msg_list);
                        msg2_cpt = msg2->msg_rx_cpt;
                        spin_unlock(&lp->lp_lock);
                        list_del(&msg2->msg_list);
                        msg2_cpt = msg2->msg_rx_cpt;
                        spin_unlock(&lp->lp_lock);
-                       spin_unlock(&rxpeerni->lpni_lock);
                        /*
                         * messages on the lp_rtrq can be from any NID in
                         * the peer, which means they might have different
                        /*
                         * messages on the lp_rtrq can be from any NID in
                         * the peer, which means they might have different
@@ -1300,7 +1297,6 @@ routing_off:
                        }
                } else {
                        spin_unlock(&lp->lp_lock);
                        }
                } else {
                        spin_unlock(&lp->lp_lock);
-                       spin_unlock(&rxpeerni->lpni_lock);
                }
        }
        if (rxni != NULL) {
                }
        }
        if (rxni != NULL) {