return rc;
}
+/* NB: caller shall hold a ref on 'lp' as I'd drop LNET_LOCK */
+void
+lnet_ni_peer_alive(lnet_peer_t *lp)
+{
+ cfs_time_t last_alive = 0;
+ lnet_ni_t *ni = lp->lp_ni;
+
+ LASSERT (ni != NULL);
+ LASSERT (ni->ni_peertimeout > 0);
+ LASSERT (ni->ni_lnd->lnd_query != NULL);
+
+ LNET_UNLOCK();
+ (ni->ni_lnd->lnd_query)(ni, lp->lp_nid, &last_alive);
+ LNET_LOCK();
+
+ lp->lp_last_query = cfs_time_current();
+
+ if (last_alive != 0) /* NI has updated timestamp */
+ lp->lp_last_alive = last_alive;
+ return;
+}
+
+/* NB: always called with LNET_LOCK held */
+static inline int
+lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now)
+{
+ lnet_ni_t *ni = lp->lp_ni;
+ cfs_time_t deadline;
+ int alive;
+
+ LASSERT (ni != NULL);
+ LASSERT (ni->ni_peertimeout > 0);
+
+ /* Trust lnet_notify() if it has more recent aliveness news, but
+ * ignore the initial assumed death (see lnet_peers_start_down()).
+ */
+ if (!lp->lp_alive && lp->lp_alive_count > 0 &&
+ cfs_time_aftereq(lp->lp_timestamp, lp->lp_last_alive))
+ return 0;
+
+ deadline = cfs_time_add(lp->lp_last_alive,
+ cfs_time_seconds(ni->ni_peertimeout));
+ alive = cfs_time_after(deadline, now);
+
+ /* Update obsolete lp_alive */
+ if (alive && !lp->lp_alive && lp->lp_timestamp != 0 &&
+ cfs_time_before(lp->lp_timestamp, lp->lp_last_alive))
+ lnet_notify_locked(lp, 0, 1, lp->lp_last_alive);
+
+ return alive;
+}
+
+
+/* NB: returns 1 when alive, 0 when dead, negative when error;
+ * may drop the LNET_LOCK */
+int
+lnet_peer_alive_locked (lnet_peer_t *lp)
+{
+ lnet_ni_t *ni = lp->lp_ni;
+ cfs_time_t now = cfs_time_current();
+
+ LASSERT (ni != NULL);
+
+ if (ni->ni_peertimeout <= 0) /* disabled */
+ return -ENODEV;
+
+ if (lnet_peer_is_alive(lp, now))
+ return 1;
+
+ /* Peer appears dead, but we should avoid frequent NI queries (at
+ * most once per lnet_queryinterval seconds). */
+ if (lp->lp_last_query != 0) {
+ static const int lnet_queryinterval = 1;
+
+ cfs_time_t next_query =
+ cfs_time_add(lp->lp_last_query,
+ cfs_time_seconds(lnet_queryinterval));
+
+ if (cfs_time_before(now, next_query)) {
+ if (lp->lp_alive)
+ CWARN("Unexpected aliveness of peer %s: "
+ "%d < %d (%d/%d)\n",
+ libcfs_nid2str(lp->lp_nid),
+ (int)now, (int)next_query,
+ lnet_queryinterval, ni->ni_peertimeout);
+ return 0;
+ }
+ }
+
+ /* query NI for latest aliveness news */
+ lnet_ni_peer_alive(lp);
+
+ if (lnet_peer_is_alive(lp, now))
+ return 1;
+
+ lnet_notify_locked(lp, 0, 0, lp->lp_last_alive);
+ return 0;
+}
+
int
lnet_post_send_locked (lnet_msg_t *msg, int do_send)
{
/* lnet_send is going to LNET_UNLOCK immediately after this, so it sets
* do_send FALSE and I don't do the unlock/send/lock bit. I return
- * EAGAIN if msg blocked and 0 if sent or OK to send */
+ * EAGAIN if msg blocked, EHOSTUNREACH if msg_txpeer appears dead, and
+ * 0 if sent or OK to send */
lnet_peer_t *lp = msg->msg_txpeer;
lnet_ni_t *ni = lp->lp_ni;
LASSERT (!do_send || msg->msg_delayed);
LASSERT (!msg->msg_receiving);
+ /* NB 'lp' is always the next hop */
+ if ((msg->msg_target.pid & LNET_PID_USERFLAG) == 0 &&
+ lnet_peer_alive_locked(lp) == 0) {
+ LNET_UNLOCK();
+
+ CDEBUG(D_NETERROR, "Dropping message for %s: peer not alive\n",
+ libcfs_id2str(msg->msg_target));
+ if (do_send)
+ lnet_finalize(ni, msg, -EHOSTUNREACH);
+
+ LNET_LOCK();
+ return EHOSTUNREACH;
+ }
+
if (!msg->msg_peertxcredit) {
LASSERT ((lp->lp_txcredits < 0) == !list_empty(&lp->lp_txq));
{
/* lnet_parse is going to LNET_UNLOCK immediately after this, so it
* sets do_recv FALSE and I don't do the unlock/send/lock bit. I
- * return EAGAIN if msg blocked and 0 if sent or OK to send */
+ * return EAGAIN if msg blocked and 0 if received or OK to receive */
lnet_peer_t *lp = msg->msg_rxpeer;
lnet_rtrbufpool_t *rbp;
lnet_rtrbuf_t *rb;
}
LASSERT (lp->lp_ni == src_ni);
} else {
+#ifndef __KERNEL__
+ LNET_UNLOCK();
+
+ /* NB
+ * - once application finishes computation, check here to update
+ * router states before it waits for pending IO in LNetEQPoll
+ * - recursion breaker: router checker sends no message
+ * to remote networks */
+ if (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING)
+ lnet_router_checker();
+
+ LNET_LOCK();
+#endif
/* sending to a remote network */
rnet = lnet_find_net_locked(LNET_NIDNET(dst_nid));
if (rnet == NULL) {
lp2 = route->lr_gateway;
if (lp2->lp_alive &&
+ lnet_router_down_ni(lp2, rnet->lrn_net) <= 0 &&
(src_ni == NULL || lp2->lp_ni == src_ni) &&
(lp == NULL || lnet_compare_routers(lp2, lp) > 0)) {
best_route = route;
rc = lnet_post_send_locked(msg, 0);
LNET_UNLOCK();
+ if (rc == EHOSTUNREACH)
+ return -EHOSTUNREACH;
+
if (rc == 0)
lnet_ni_send(src_ni, msg);
{
int rc;
int index;
+ __u64 version;
lnet_hdr_t *hdr = &msg->msg_hdr;
unsigned int rlength = hdr->payload_length;
unsigned int mlength = 0;
unsigned int offset = 0;
lnet_process_id_t src= {0};
lnet_libmd_t *md;
+ lnet_portal_t *ptl;
src.nid = hdr->src_nid;
src.pid = hdr->src_pid;
hdr->msg.put.offset = le32_to_cpu(hdr->msg.put.offset);
index = hdr->msg.put.ptl_index;
+ ptl = &the_lnet.ln_portals[index];
LNET_LOCK();
+ again:
rc = lnet_match_md(index, LNET_MD_OP_PUT, src,
rlength, hdr->msg.put.offset,
hdr->msg.put.match_bits, msg,
case LNET_MATCHMD_OK:
LNET_UNLOCK();
- lnet_recv_put(md, msg, 0, offset, mlength);
+ lnet_recv_put(md, msg, msg->msg_delayed, offset, mlength);
return 0;
case LNET_MATCHMD_NONE:
- rc = lnet_eager_recv_locked(msg);
- if (rc == 0 && !the_lnet.ln_shutdown) {
- list_add_tail(&msg->msg_list,
- &the_lnet.ln_portals[index].ptl_msgq);
+ version = ptl->ptl_ml_version;
- the_lnet.ln_portals[index].ptl_msgq_version++;
+ rc = 0;
+ if (!msg->msg_delayed)
+ rc = lnet_eager_recv_locked(msg);
+
+ if (rc == 0 &&
+ !the_lnet.ln_shutdown &&
+ ((ptl->ptl_options & LNET_PTL_LAZY) != 0)) {
+ if (version != ptl->ptl_ml_version)
+ goto again;
+
+ list_add_tail(&msg->msg_list, &ptl->ptl_msgq);
+ ptl->ptl_msgq_version++;
+ LNET_UNLOCK();
CDEBUG(D_NET, "Delaying PUT from %s portal %d match "
LPU64" offset %d length %d: no match \n",
libcfs_id2str(src), index,
hdr->msg.put.match_bits,
hdr->msg.put.offset, rlength);
-
- LNET_UNLOCK();
return 0;
}
/* fall through */
}
-
int
lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid,
void *private, int rdma_req)
int rc = 0;
int for_me;
lnet_msg_t *msg;
+ lnet_pid_t dest_pid;
lnet_nid_t dest_nid;
lnet_nid_t src_nid;
__u32 payload_length;
type = le32_to_cpu(hdr->type);
src_nid = le64_to_cpu(hdr->src_nid);
dest_nid = le64_to_cpu(hdr->dest_nid);
+ dest_pid = le32_to_cpu(hdr->dest_pid);
payload_length = le32_to_cpu(hdr->payload_length);
for_me = (ni->ni_nid == dest_nid);
return -EPROTO;
}
+ if (the_lnet.ln_routing) {
+ cfs_time_t now = cfs_time_current();
+
+ LNET_LOCK();
+
+ ni->ni_last_alive = now;
+ if (ni->ni_status != NULL &&
+ ni->ni_status->ns_status == LNET_NI_STATUS_DOWN)
+ ni->ni_status->ns_status = LNET_NI_STATUS_UP;
+
+ LNET_UNLOCK();
+ }
+
/* Regard a bad destination NID as a protocol error. Senders should
* know what they're doing; if they don't they're misconfigured, buggy
* or malicious so we chop them off at the knees :) */
LASSERT (for_me);
#else
if (!for_me) {
- msg->msg_target.pid = le32_to_cpu(hdr->dest_pid);
+ msg->msg_target.pid = dest_pid;
msg->msg_target.nid = dest_nid;
msg->msg_routing = 1;
msg->msg_offset = 0;
goto free_drop;
}
}
-
lnet_commit_routedmsg(msg);
rc = lnet_post_routed_recv_locked(msg, 0);
LNET_UNLOCK();
msg->msg_hdr.src_nid = src_nid;
msg->msg_hdr.src_pid = le32_to_cpu(msg->msg_hdr.src_pid);
msg->msg_hdr.dest_nid = dest_nid;
- msg->msg_hdr.dest_pid = le32_to_cpu(msg->msg_hdr.dest_pid);
+ msg->msg_hdr.dest_pid = dest_pid;
msg->msg_hdr.payload_length = payload_length;
msg->msg_ev.sender = from_nid;