We fixed a race for the case that LNet is shutdown while the second
thread is in progress of creating a peer, but there is another race
if LNet is shutdown _before_ lnet_nid2peer_locked is called by the
second thread, for example: called from lnet_parse() by LND thread,
in this case, we should return error w/o trying to create peer,
otherwise the shuttingdown thread could think peer-table is empty
and move to the next step with leaving a peer on deathrow list.
This patch also passed error code into lnet_finalize() when it's
called from lnet_drop_delayed_msg_list, otherwise lnet_finalize()
will treat the message as a complete message and hit assertion
because the message is actually incomplet.
We also did some coding style cleanup in this patch.
Signed-off-by: Liang Zhen <liang@whamcloud.com>
Change-Id: I445298d639173840687412b11da41006ddc20c10
Reviewed-on: http://review.whamcloud.com/3369
Tested-by: Hudson
Reviewed-by: Bobi Jam <bobijam@whamcloud.com>
Reviewed-by: Doug Oucharek <doug@whamcloud.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
LASSERT(src_nid != LNET_NID_ANY);
lnet_msg_commit(msg, cpt);
LASSERT(src_nid != LNET_NID_ANY);
lnet_msg_commit(msg, cpt);
- if (!msg->msg_routing)
- msg->msg_hdr.src_nid = cpu_to_le64(src_nid);
+ if (!msg->msg_routing)
+ msg->msg_hdr.src_nid = cpu_to_le64(src_nid);
- if (src_ni == the_lnet.ln_loni) {
- /* No send credit hassles with LOLND */
+ if (src_ni == the_lnet.ln_loni) {
+ /* No send credit hassles with LOLND */
lnet_net_unlock(cpt);
lnet_ni_send(src_ni, msg);
lnet_net_unlock(cpt);
lnet_ni_send(src_ni, msg);
lnet_build_msg_event(msg, LNET_EVENT_PUT);
lnet_build_msg_event(msg, LNET_EVENT_PUT);
- /* Must I ACK? If so I'll grab the ack_wmd out of the header and put
- * it back into the ACK during lnet_finalize() */
+ /* Must I ACK? If so I'll grab the ack_wmd out of the header and put
+ * it back into the ACK during lnet_finalize() */
msg->msg_ack = (!lnet_is_wire_handle_none(&hdr->msg.put.ack_wmd) &&
(msg->msg_md->md_options & LNET_MD_ACK_DISABLE) == 0);
msg->msg_ack = (!lnet_is_wire_handle_none(&hdr->msg.put.ack_wmd) &&
(msg->msg_md->md_options & LNET_MD_ACK_DISABLE) == 0);
rc = lnet_send(ni->ni_nid, msg, LNET_NID_ANY);
if (rc < 0) {
/* didn't get as far as lnet_ni_send() */
rc = lnet_send(ni->ni_nid, msg, LNET_NID_ANY);
if (rc < 0) {
/* didn't get as far as lnet_ni_send() */
- CERROR("%s: Unable to send REPLY for GET from %s: %d\n",
+ CERROR("%s: Unable to send REPLY for GET from %s: %d\n",
libcfs_nid2str(ni->ni_nid),
libcfs_id2str(info.mi_id), rc);
libcfs_nid2str(ni->ni_nid),
libcfs_id2str(info.mi_id), rc);
lnet_drop_message(msg->msg_rxpeer->lp_ni,
msg->msg_rxpeer->lp_cpt,
msg->msg_private, msg->msg_len);
lnet_drop_message(msg->msg_rxpeer->lp_ni,
msg->msg_rxpeer->lp_cpt,
msg->msg_private, msg->msg_len);
- /* NB: message will not generate event because w/o attached MD,
- * so we just use 0 as the third parameter */
- lnet_finalize(msg->msg_rxpeer->lp_ni, msg, 0);
+ /*
+ * NB: message will not generate event because w/o attached MD,
+ * but we still should give error code so lnet_msg_decommit()
+ * can skip counters operations and other checks.
+ */
+ lnet_finalize(msg->msg_rxpeer->lp_ni, msg, -ENOENT);
cfs_list_t *peers;
lnet_peer_t *lp;
cfs_list_t *peers;
lnet_peer_t *lp;
- if (the_lnet.ln_shutdown)
- return NULL;
+ LASSERT(!the_lnet.ln_shutdown);
peers = &ptable->pt_hash[lnet_nid2peerhash(nid)];
cfs_list_for_each_entry(lp, peers, lp_hashlist) {
peers = &ptable->pt_hash[lnet_nid2peerhash(nid)];
cfs_list_for_each_entry(lp, peers, lp_hashlist) {
+ *lpp = NULL;
+ if (the_lnet.ln_shutdown) /* it's shutting down */
+ return -ESHUTDOWN;
+
/* cpt can be LNET_LOCK_EX if it's called from router functions */
cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid);
/* cpt can be LNET_LOCK_EX if it's called from router functions */
cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid);
cfs_list_del(&lp->lp_hashlist);
}
cfs_list_del(&lp->lp_hashlist);
}
/*
* take extra refcount in case another thread has shutdown LNet
* and destroyed locks and peer-table before I finish the allocation
/*
* take extra refcount in case another thread has shutdown LNet
* and destroyed locks and peer-table before I finish the allocation
+ if (the_lnet.ln_shutdown) {
+ lnet_net_unlock(cpt);
+ return -ESHUTDOWN;
+ }
+
lp = lnet_find_peer_locked(the_lnet.ln_peer_tables[cpt], nid);
if (lp == NULL) {
/* nid not found */
lp = lnet_find_peer_locked(the_lnet.ln_peer_tables[cpt], nid);
if (lp == NULL) {
/* nid not found */