A race exists between a thread handling a discovery reply, and
another thread in the lnet_finalize() call path, or in any of
the code paths that result in lnet_post_routed_recv_locked().
The discovery reply handler takes the lp_lock, and while holding
that lock, tries to acquire the lpni_lock for each lpni associated
with the lnet_peer object.
In lnet_return_rx_credits_locked() (lnet_finalize() code path) and
lnet_post_routed_recv_locked() (called via a couple different code
paths) the lpni_lock is taken, and then the lp_lock is taken for the
associated lnet_peer object.
Thread A: spin_lock(lp_lock)
Thread B: spin_lock(lpni_lock)
Thread B: spin_lock(lp_lock)
Thread A: spin_lock(lpni_lock)
This results in deadlock. The lp_lock and lpni_lock do not need to be
held at the same time in lnet_return_rx_credits_locked() nor in
lnet_post_routed_recv_locked().
Cray-bug-id: LUS-8607
Signed-off-by: Chris Horn <hornc@cray.com>
Change-Id: Ie4e9a172b4705d9f5723a6da1ff251b380ad47ac
Reviewed-on: https://review.whamcloud.com/37937
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Amir Shehata <ashehata@whamcloud.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
if (!msg->msg_peerrtrcredit) {
/* lpni_lock protects the credit manipulation */
spin_lock(&lpni->lpni_lock);
if (!msg->msg_peerrtrcredit) {
/* lpni_lock protects the credit manipulation */
spin_lock(&lpni->lpni_lock);
- /* lp_lock protects the lp_rtrq */
- spin_lock(&lp->lp_lock);
msg->msg_peerrtrcredit = 1;
lpni->lpni_rtrcredits--;
msg->msg_peerrtrcredit = 1;
lpni->lpni_rtrcredits--;
lpni->lpni_minrtrcredits = lpni->lpni_rtrcredits;
if (lpni->lpni_rtrcredits < 0) {
lpni->lpni_minrtrcredits = lpni->lpni_rtrcredits;
if (lpni->lpni_rtrcredits < 0) {
+ spin_unlock(&lpni->lpni_lock);
/* must have checked eager_recv before here */
LASSERT(msg->msg_rx_ready_delay);
msg->msg_rx_delayed = 1;
/* must have checked eager_recv before here */
LASSERT(msg->msg_rx_ready_delay);
msg->msg_rx_delayed = 1;
+ /* lp_lock protects the lp_rtrq */
+ spin_lock(&lp->lp_lock);
list_add_tail(&msg->msg_list, &lp->lp_rtrq);
spin_unlock(&lp->lp_lock);
list_add_tail(&msg->msg_list, &lp->lp_rtrq);
spin_unlock(&lp->lp_lock);
- spin_unlock(&lpni->lpni_lock);
return LNET_CREDIT_WAIT;
}
return LNET_CREDIT_WAIT;
}
- spin_unlock(&lp->lp_lock);
spin_unlock(&lpni->lpni_lock);
}
spin_unlock(&lpni->lpni_lock);
}
LASSERT(rxpeerni->lpni_peer_net);
LASSERT(rxpeerni->lpni_peer_net->lpn_peer);
LASSERT(rxpeerni->lpni_peer_net);
LASSERT(rxpeerni->lpni_peer_net->lpn_peer);
- lp = rxpeerni->lpni_peer_net->lpn_peer;
-
/* give back peer router credits */
msg->msg_peerrtrcredit = 0;
spin_lock(&rxpeerni->lpni_lock);
/* give back peer router credits */
msg->msg_peerrtrcredit = 0;
spin_lock(&rxpeerni->lpni_lock);
- spin_lock(&lp->lp_lock);
-
rxpeerni->lpni_rtrcredits++;
rxpeerni->lpni_rtrcredits++;
+ spin_unlock(&rxpeerni->lpni_lock);
+
+ lp = rxpeerni->lpni_peer_net->lpn_peer;
+ spin_lock(&lp->lp_lock);
/* drop all messages which are queued to be routed on that
* peer. */
/* drop all messages which are queued to be routed on that
* peer. */
LIST_HEAD(drop);
list_splice_init(&lp->lp_rtrq, &drop);
spin_unlock(&lp->lp_lock);
LIST_HEAD(drop);
list_splice_init(&lp->lp_rtrq, &drop);
spin_unlock(&lp->lp_lock);
- spin_unlock(&rxpeerni->lpni_lock);
lnet_drop_routed_msgs_locked(&drop, msg->msg_rx_cpt);
} else if (!list_empty(&lp->lp_rtrq)) {
int msg2_cpt;
lnet_drop_routed_msgs_locked(&drop, msg->msg_rx_cpt);
} else if (!list_empty(&lp->lp_rtrq)) {
int msg2_cpt;
list_del(&msg2->msg_list);
msg2_cpt = msg2->msg_rx_cpt;
spin_unlock(&lp->lp_lock);
list_del(&msg2->msg_list);
msg2_cpt = msg2->msg_rx_cpt;
spin_unlock(&lp->lp_lock);
- spin_unlock(&rxpeerni->lpni_lock);
/*
* messages on the lp_rtrq can be from any NID in
* the peer, which means they might have different
/*
* messages on the lp_rtrq can be from any NID in
* the peer, which means they might have different
}
} else {
spin_unlock(&lp->lp_lock);
}
} else {
spin_unlock(&lp->lp_lock);
- spin_unlock(&rxpeerni->lpni_lock);