From c700b4a410ab5542391d006ce541023ecf9b7a5d Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Fri, 13 Mar 2020 14:34:23 -0500 Subject: [PATCH] LU-13362 lnet: Disc reply race with finalize and routed recv A race exists between a thread handling a discovery reply, and another thread in the lnet_finalize() call path, or in any of the code paths that result in lnet_post_routed_recv_locked(). The discovery reply handler takes the lp_lock, and while holding that lock, tries to acquire the lpni_lock for each lpni associated with the lnet_peer object. In lnet_return_rx_credits_locked() (lnet_finalize() code path) and lnet_post_routed_recv_locked() (called via a couple different code paths) the lpni_lock is taken, and then the lp_lock is taken for the associated lnet_peer object. Thread A: spin_lock(lp_lock) Thread B: spin_lock(lpni_lock) Thread B: spin_lock(lp_lock) Thread A: spin_lock(lpni_lock) This results in deadlock. The lp_lock and lpni_lock do not need to be held at the same time in lnet_return_rx_credits_locked() nor in lnet_post_routed_recv_locked(). Cray-bug-id: LUS-8607 Signed-off-by: Chris Horn Change-Id: Ie4e9a172b4705d9f5723a6da1ff251b380ad47ac Reviewed-on: https://review.whamcloud.com/37937 Tested-by: jenkins Reviewed-by: Amir Shehata Reviewed-by: Serguei Smirnov Tested-by: Maloo Reviewed-by: Oleg Drokin --- lnet/lnet/lib-move.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index ffcebb7..d74cfcc 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -1012,8 +1012,6 @@ lnet_post_routed_recv_locked(struct lnet_msg *msg, int do_recv) if (!msg->msg_peerrtrcredit) { /* lpni_lock protects the credit manipulation */ spin_lock(&lpni->lpni_lock); - /* lp_lock protects the lp_rtrq */ - spin_lock(&lp->lp_lock); msg->msg_peerrtrcredit = 1; lpni->lpni_rtrcredits--; @@ -1021,15 +1019,16 @@ lnet_post_routed_recv_locked(struct lnet_msg *msg, int do_recv) lpni->lpni_minrtrcredits = lpni->lpni_rtrcredits; if (lpni->lpni_rtrcredits < 0) { + spin_unlock(&lpni->lpni_lock); /* must have checked eager_recv before here */ LASSERT(msg->msg_rx_ready_delay); msg->msg_rx_delayed = 1; + /* lp_lock protects the lp_rtrq */ + spin_lock(&lp->lp_lock); list_add_tail(&msg->msg_list, &lp->lp_rtrq); spin_unlock(&lp->lp_lock); - spin_unlock(&lpni->lpni_lock); return LNET_CREDIT_WAIT; } - spin_unlock(&lp->lp_lock); spin_unlock(&lpni->lpni_lock); } @@ -1256,15 +1255,15 @@ routing_off: LASSERT(rxpeerni->lpni_peer_net); LASSERT(rxpeerni->lpni_peer_net->lpn_peer); - lp = rxpeerni->lpni_peer_net->lpn_peer; - /* give back peer router credits */ msg->msg_peerrtrcredit = 0; spin_lock(&rxpeerni->lpni_lock); - spin_lock(&lp->lp_lock); - rxpeerni->lpni_rtrcredits++; + spin_unlock(&rxpeerni->lpni_lock); + + lp = rxpeerni->lpni_peer_net->lpn_peer; + spin_lock(&lp->lp_lock); /* drop all messages which are queued to be routed on that * peer. */ @@ -1272,7 +1271,6 @@ routing_off: LIST_HEAD(drop); list_splice_init(&lp->lp_rtrq, &drop); spin_unlock(&lp->lp_lock); - spin_unlock(&rxpeerni->lpni_lock); lnet_drop_routed_msgs_locked(&drop, msg->msg_rx_cpt); } else if (!list_empty(&lp->lp_rtrq)) { int msg2_cpt; @@ -1282,7 +1280,6 @@ routing_off: list_del(&msg2->msg_list); msg2_cpt = msg2->msg_rx_cpt; spin_unlock(&lp->lp_lock); - spin_unlock(&rxpeerni->lpni_lock); /* * messages on the lp_rtrq can be from any NID in * the peer, which means they might have different @@ -1300,7 +1297,6 @@ routing_off: } } else { spin_unlock(&lp->lp_lock); - spin_unlock(&rxpeerni->lpni_lock); } } if (rxni != NULL) { -- 1.8.3.1