From: Amir Shehata Date: Wed, 24 May 2017 00:15:49 +0000 (-0700) Subject: LU-9549 lnet: prevent assert on ln_state X-Git-Tag: 2.9.59~55 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=43f0058037a376c0d5a4f0b88ac4b095dee84e19;hp=01d7ddd0edc1517b05136cb36314d7a39dbfeff3;p=fs%2Flustre-release.git LU-9549 lnet: prevent assert on ln_state lnet_peer_primary_nid() is called from lnet_parse. It checks ln_state outside the net lock, causing a race condition during shutdown where the code expects the state to be running, but it's stopping or shutdown. Fixed the issue by renaming lnet_peer_primary_nid() to lnet_peer_primary_nid_locked(). This function is now called when lnet_net_lock is held in lnet_parse(). In lnet_create_reply_msg() we already have access to the msg_txpeer, so we lookup the primary_nid directly Signed-off-by: Amir Shehata Change-Id: I0518cdbec95b38bd8690517320b601676ae259f0 Reviewed-on: https://review.whamcloud.com/27262 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Doug Oucharek Reviewed-by: Sonia Sharma Reviewed-by: Olaf Weber Reviewed-by: Oleg Drokin --- diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 37a0b11..ee75b19 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -805,7 +805,7 @@ struct lnet_peer_ni *lnet_nid2peerni_locked(lnet_nid_t nid, int cpt); struct lnet_peer_ni *lnet_nid2peerni_ex(lnet_nid_t nid, int cpt); struct lnet_peer_ni *lnet_find_peer_ni_locked(lnet_nid_t nid); void lnet_peer_net_added(struct lnet_net *net); -lnet_nid_t lnet_peer_primary_nid(lnet_nid_t nid); +lnet_nid_t lnet_peer_primary_nid_locked(lnet_nid_t nid); void lnet_peer_tables_cleanup(struct lnet_net *net); void lnet_peer_uninit(void); int lnet_peer_tables_create(void); diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index 9dc6565..b05df55 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -2524,8 +2524,6 @@ lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr, lnet_nid_t from_nid, msg->msg_hdr.dest_pid = dest_pid; msg->msg_hdr.payload_length = payload_length; } - /* Multi-Rail: Primary NID of source. */ - msg->msg_initiator = lnet_peer_primary_nid(src_nid); lnet_net_lock(cpt); lpni = lnet_nid2peerni_locked(from_nid, cpt); @@ -2544,6 +2542,8 @@ lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr, lnet_nid_t from_nid, msg->msg_rxpeer = lpni; msg->msg_rxni = ni; lnet_ni_addref_locked(ni, cpt); + /* Multi-Rail: Primary NID of source. */ + msg->msg_initiator = lnet_peer_primary_nid_locked(src_nid); if (lnet_isrouter(msg->msg_rxpeer)) { lnet_peer_set_alive(msg->msg_rxpeer); @@ -2845,8 +2845,7 @@ lnet_create_reply_msg(struct lnet_ni *ni, struct lnet_msg *getmsg) libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id), getmd); /* setup information for lnet_build_msg_event */ - msg->msg_initiator = lnet_peer_primary_nid(peer_id.nid); - /* Cheaper: msg->msg_initiator = getmsg->msg_txpeer->lp_nid; */ + msg->msg_initiator = getmsg->msg_txpeer->lpni_peer_net->lpn_peer->lp_primary_nid; msg->msg_from = peer_id.nid; msg->msg_type = LNET_MSG_GET; /* flag this msg as an "optimized" GET */ msg->msg_hdr.src_nid = peer_id.nid; diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 47e5ce2..612af87 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -587,19 +587,16 @@ lnet_peer_is_ni_pref_locked(struct lnet_peer_ni *lpni, struct lnet_ni *ni) } lnet_nid_t -lnet_peer_primary_nid(lnet_nid_t nid) +lnet_peer_primary_nid_locked(lnet_nid_t nid) { struct lnet_peer_ni *lpni; lnet_nid_t primary_nid = nid; - int cpt; - cpt = lnet_net_lock_current(); lpni = lnet_find_peer_ni_locked(nid); if (lpni) { primary_nid = lpni->lpni_peer_net->lpn_peer->lp_primary_nid; lnet_peer_ni_decref_locked(lpni); } - lnet_net_unlock(cpt); return primary_nid; }