return LNET_MATCHMD_DROP;
}
- list_for_each_entry_safe (me, tmp, &ptl->ptl_ml, me_list) {
+ cfs_list_for_each_entry_safe_typed (me, tmp, &ptl->ptl_ml,
+ lnet_me_t, me_list) {
md = me->me_md;
/* ME attached but MD not attached yet */
niov = 1;
for (;;) {
LASSERT (src_niov > 0);
- LASSERT (niov <= dst_niov);
+ LASSERT ((int)niov <= dst_niov);
frag_len = src->iov_len - offset;
dst->iov_base = ((char *)src->iov_base) + offset;
niov = 1;
for (;;) {
LASSERT (src_niov > 0);
- LASSERT (niov <= dst_niov);
+ LASSERT ((int)niov <= dst_niov);
frag_len = src->kiov_len - offset;
dst->kiov_page = src->kiov_page;
return rc;
}
+/* NB: caller shall hold a ref on 'lp' as I'd drop LNET_LOCK */
+void
+lnet_ni_peer_alive(lnet_peer_t *lp)
+{
+ time_t last_alive = 0;
+ lnet_ni_t *ni = lp->lp_ni;
+
+ LASSERT (ni != NULL);
+ LASSERT (ni->ni_peertimeout > 0);
+ LASSERT (ni->ni_lnd->lnd_query != NULL);
+
+ LNET_UNLOCK();
+ (ni->ni_lnd->lnd_query)(ni, lp->lp_nid, &last_alive);
+ LNET_LOCK();
+
+ lp->lp_last_query = cfs_time_current_sec();
+
+ if (last_alive != 0) /* NI has updated timestamp */
+ lp->lp_last_alive = last_alive;
+ return;
+}
+
+/* NB: always called with LNET_LOCK held */
+static inline int
+lnet_peer_is_alive (lnet_peer_t *lp, time_t now)
+{
+ lnet_ni_t *ni = lp->lp_ni;
+ time_t deadline;
+ int alive;
+
+ LASSERT (ni != NULL);
+ LASSERT (ni->ni_peertimeout > 0);
+
+ if (!lp->lp_alive && lp->lp_alive_count > 0 &&
+ cfs_time_aftereq(lp->lp_timestamp, lp->lp_last_alive))
+ return 0;
+
+ deadline = cfs_time_add(lp->lp_last_alive, ni->ni_peertimeout);
+ alive = cfs_time_after(deadline, now);
+ if (alive && !lp->lp_alive) /* update obsolete lp_alive */
+ lnet_notify_locked(lp, 0, 1, lp->lp_last_alive);
+
+ return alive;
+}
+
+/* don't query LND about aliveness of a dead peer more frequently than: */
+static int lnet_queryinterval = 1; /* 1 second */
+
+/* NB: returns 1 when alive, 0 when dead, negative when error;
+ * may drop the LNET_LOCK */
+int
+lnet_peer_alive_locked (lnet_peer_t *lp)
+{
+ lnet_ni_t *ni = lp->lp_ni;
+ time_t now = cfs_time_current_sec();
+
+ LASSERT (ni != NULL);
+
+ if (ni->ni_peertimeout <= 0) /* disabled */
+ return -ENODEV;
+
+ if (lnet_peer_is_alive(lp, now))
+ return 1;
+
+ /* peer appears dead, should we query right now? */
+ if (lp->lp_last_query != 0) {
+ time_t deadline =
+ cfs_time_add(lp->lp_last_query,
+ lnet_queryinterval);
+
+ if (cfs_time_before(now, deadline)) {
+ if (lp->lp_alive)
+ CWARN("Unexpected aliveness of peer %s: "
+ "%d < %d (%d/%d)\n",
+ libcfs_nid2str(lp->lp_nid),
+ (int)now, (int)deadline,
+ lnet_queryinterval, ni->ni_peertimeout);
+ return 0;
+ }
+ }
+
+ /* query LND for latest aliveness news */
+ lnet_ni_peer_alive(lp);
+
+ if (lnet_peer_is_alive(lp, now))
+ return 1;
+
+ lnet_notify_locked(lp, 0, 0, lp->lp_last_alive);
+ return 0;
+}
+
int
lnet_post_send_locked (lnet_msg_t *msg, int do_send)
{
/* lnet_send is going to LNET_UNLOCK immediately after this, so it sets
* do_send FALSE and I don't do the unlock/send/lock bit. I return
- * EAGAIN if msg blocked and 0 if sent or OK to send */
+ * EAGAIN if msg blocked, EHOSTUNREACH if msg_txpeer appears dead, and
+ * 0 if sent or OK to send */
lnet_peer_t *lp = msg->msg_txpeer;
lnet_ni_t *ni = lp->lp_ni;
LASSERT (!do_send || msg->msg_delayed);
LASSERT (!msg->msg_receiving);
+ /* NB 'lp' is always the next hop */
+ if ((msg->msg_target.pid & LNET_PID_USERFLAG) == 0 &&
+ lnet_peer_alive_locked(lp) == 0) {
+ LNET_UNLOCK();
+
+ CDEBUG(D_NETERROR, "Dropping message for %s: peer not alive\n",
+ libcfs_id2str(msg->msg_target));
+ if (do_send)
+ lnet_finalize(ni, msg, -EHOSTUNREACH);
+
+ LNET_LOCK();
+ return EHOSTUNREACH;
+ }
+
if (!msg->msg_peertxcredit) {
LASSERT ((lp->lp_txcredits < 0) == !list_empty(&lp->lp_txq));
lnet_rtrbufpool_t *rbp = &the_lnet.ln_rtrpools[0];
LASSERT (msg->msg_len <= LNET_MTU);
- while (msg->msg_len > rbp->rbp_npages * CFS_PAGE_SIZE) {
+ while (msg->msg_len > (unsigned int)rbp->rbp_npages * CFS_PAGE_SIZE) {
rbp++;
LASSERT (rbp < &the_lnet.ln_rtrpools[LNET_NRBPOOLS]);
}
{
/* lnet_parse is going to LNET_UNLOCK immediately after this, so it
* sets do_recv FALSE and I don't do the unlock/send/lock bit. I
- * return EAGAIN if msg blocked and 0 if sent or OK to send */
+ * return EAGAIN if msg blocked and 0 if received or OK to receive */
lnet_peer_t *lp = msg->msg_rxpeer;
lnet_rtrbufpool_t *rbp;
lnet_rtrbuf_t *rb;
}
LASSERT (lp->lp_ni == src_ni);
} else {
+#ifndef __KERNEL__
+ LNET_UNLOCK();
+
+ /* NB
+ * - once application finishes computation, check here to update
+ * router states before it waits for pending IO in LNetEQPoll
+ * - recursion breaker: router checker sends no message
+ * to remote networks */
+ if (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING)
+ lnet_router_checker();
+
+ LNET_LOCK();
+#endif
/* sending to a remote network */
rnet = lnet_find_net_locked(LNET_NIDNET(dst_nid));
if (rnet == NULL) {
rc = lnet_post_send_locked(msg, 0);
LNET_UNLOCK();
+ if (rc == EHOSTUNREACH)
+ return -EHOSTUNREACH;
+
if (rc == 0)
lnet_ni_send(src_ni, msg);
static void
lnet_drop_delayed_put(lnet_msg_t *msg, char *reason)
{
+ lnet_process_id_t id = {0};
+
+ id.nid = msg->msg_hdr.src_nid;
+ id.pid = msg->msg_hdr.src_pid;
+
LASSERT (msg->msg_md == NULL);
LASSERT (msg->msg_delayed);
LASSERT (msg->msg_rxpeer != NULL);
CWARN("Dropping delayed PUT from %s portal %d match "LPU64
" offset %d length %d: %s\n",
- libcfs_id2str((lnet_process_id_t){
- .nid = msg->msg_hdr.src_nid,
- .pid = msg->msg_hdr.src_pid}),
+ libcfs_id2str(id),
msg->msg_hdr.msg.put.ptl_index,
msg->msg_hdr.msg.put.match_bits,
msg->msg_hdr.msg.put.offset,
lnet_me_t *me = md->md_me;
lnet_portal_t *ptl = &the_lnet.ln_portals[me->me_portal];
- LASSERT (me->me_portal < the_lnet.ln_nportals);
+ LASSERT (me->me_portal < (unsigned int)the_lnet.ln_nportals);
if ((ptl->ptl_options & LNET_PTL_LAZY) == 0) {
LASSERT (list_empty(&ptl->ptl_msgq));
{
int rc;
int index;
+ __u64 version;
lnet_hdr_t *hdr = &msg->msg_hdr;
unsigned int rlength = hdr->payload_length;
unsigned int mlength = 0;
unsigned int offset = 0;
- lnet_process_id_t src = {/* .nid = */ hdr->src_nid,
- /* .pid = */ hdr->src_pid};
+ lnet_process_id_t src= {0};
lnet_libmd_t *md;
+ lnet_portal_t *ptl;
+
+ src.nid = hdr->src_nid;
+ src.pid = hdr->src_pid;
/* Convert put fields to host byte order */
hdr->msg.put.match_bits = le64_to_cpu(hdr->msg.put.match_bits);
hdr->msg.put.offset = le32_to_cpu(hdr->msg.put.offset);
index = hdr->msg.put.ptl_index;
+ ptl = &the_lnet.ln_portals[index];
LNET_LOCK();
+ again:
rc = lnet_match_md(index, LNET_MD_OP_PUT, src,
rlength, hdr->msg.put.offset,
hdr->msg.put.match_bits, msg,
case LNET_MATCHMD_OK:
LNET_UNLOCK();
- lnet_recv_put(md, msg, 0, offset, mlength);
+ lnet_recv_put(md, msg, msg->msg_delayed, offset, mlength);
return 0;
case LNET_MATCHMD_NONE:
- rc = lnet_eager_recv_locked(msg);
- if (rc == 0 && !the_lnet.ln_shutdown) {
- list_add_tail(&msg->msg_list,
- &the_lnet.ln_portals[index].ptl_msgq);
+ version = ptl->ptl_ml_version;
+
+ rc = 0;
+ if (!msg->msg_delayed)
+ rc = lnet_eager_recv_locked(msg);
- the_lnet.ln_portals[index].ptl_msgq_version++;
+ if (rc == 0 &&
+ !the_lnet.ln_shutdown &&
+ ((ptl->ptl_options & LNET_PTL_LAZY) != 0)) {
+ if (version != ptl->ptl_ml_version)
+ goto again;
+
+ list_add_tail(&msg->msg_list, &ptl->ptl_msgq);
+ ptl->ptl_msgq_version++;
+ LNET_UNLOCK();
CDEBUG(D_NET, "Delaying PUT from %s portal %d match "
LPU64" offset %d length %d: no match \n",
libcfs_id2str(src), index,
hdr->msg.put.match_bits,
hdr->msg.put.offset, rlength);
-
- LNET_UNLOCK();
return 0;
}
/* fall through */
lnet_hdr_t *hdr = &msg->msg_hdr;
unsigned int mlength = 0;
unsigned int offset = 0;
- lnet_process_id_t src = {/* .nid = */ hdr->src_nid,
- /* .pid = */ hdr->src_pid};
+ lnet_process_id_t src = {0};
lnet_handle_wire_t reply_wmd;
lnet_libmd_t *md;
int rc;
+ src.nid = hdr->src_nid;
+ src.pid = hdr->src_pid;
+
/* Convert get fields to host byte order */
hdr->msg.get.match_bits = le64_to_cpu(hdr->msg.get.match_bits);
hdr->msg.get.ptl_index = le32_to_cpu(hdr->msg.get.ptl_index);
LNET_UNLOCK();
+ msg->msg_ev.type = LNET_EVENT_GET;
+ msg->msg_ev.target.pid = hdr->dest_pid;
+ msg->msg_ev.target.nid = hdr->dest_nid;
+ msg->msg_ev.hdr_data = 0;
+
reply_wmd = hdr->msg.get.return_wmd;
lnet_prep_send(msg, LNET_MSG_REPLY, src, offset, mlength);
msg->msg_hdr.msg.reply.dst_wmd = reply_wmd;
- msg->msg_ev.type = LNET_EVENT_GET;
- msg->msg_ev.target.pid = hdr->dest_pid;
- msg->msg_ev.target.nid = hdr->dest_nid;
- msg->msg_ev.hdr_data = 0;
-
if (rdma_get) {
/* The LND completes the REPLY from her recv procedure */
lnet_ni_recv(ni, msg->msg_private, msg, 0,
{
void *private = msg->msg_private;
lnet_hdr_t *hdr = &msg->msg_hdr;
- lnet_process_id_t src = {/* .nid = */ hdr->src_nid,
- /* .pid = */ hdr->src_pid};
+ lnet_process_id_t src = {0};
lnet_libmd_t *md;
int rlength;
int mlength;
LNET_LOCK();
+ src.nid = hdr->src_nid;
+ src.pid = hdr->src_pid;
+
/* NB handles only looked up by creator (no flips) */
md = lnet_wire_handle2md(&hdr->msg.reply.dst_wmd);
if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) {
LASSERT (md->md_offset == 0);
rlength = hdr->payload_length;
- mlength = MIN(rlength, md->md_length);
+ mlength = MIN(rlength, (int)md->md_length);
if (mlength < rlength &&
(md->md_options & LNET_MD_TRUNCATE) == 0) {
lnet_parse_ack(lnet_ni_t *ni, lnet_msg_t *msg)
{
lnet_hdr_t *hdr = &msg->msg_hdr;
- lnet_process_id_t src = {/* .nid = */ hdr->src_nid,
- /* .pid = */ hdr->src_pid};
- lnet_libmd_t *md;
+ lnet_process_id_t src = {0};
+ lnet_libmd_t *md;
+
+ src.nid = hdr->src_nid;
+ src.pid = hdr->src_pid;
/* Convert ack fields to host byte order */
hdr->msg.ack.match_bits = le64_to_cpu(hdr->msg.ack.match_bits);
void
lnet_print_hdr(lnet_hdr_t * hdr)
{
- lnet_process_id_t src = {/* .nid = */ hdr->src_nid,
- /* .pid = */ hdr->src_pid};
- lnet_process_id_t dst = {/* .nid = */ hdr->dest_nid,
- /* .pid = */ hdr->dest_pid};
+ lnet_process_id_t src = {0};
+ lnet_process_id_t dst = {0};
char *type_str = lnet_msgtyp2str (hdr->type);
+ src.nid = hdr->src_nid;
+ src.pid = hdr->src_pid;
+
+ dst.nid = hdr->dest_nid;
+ dst.pid = hdr->dest_pid;
+
CWARN("P3 Header at %p of type %s\n", hdr, type_str);
CWARN(" From %s\n", libcfs_id2str(src));
CWARN(" To %s\n", libcfs_id2str(dst));
int rc = 0;
int for_me;
lnet_msg_t *msg;
+ lnet_pid_t dest_pid;
lnet_nid_t dest_nid;
lnet_nid_t src_nid;
__u32 payload_length;
type = le32_to_cpu(hdr->type);
src_nid = le64_to_cpu(hdr->src_nid);
dest_nid = le64_to_cpu(hdr->dest_nid);
+ dest_pid = le32_to_cpu(hdr->dest_pid);
payload_length = le32_to_cpu(hdr->payload_length);
for_me = (ni->ni_nid == dest_nid);
case LNET_MSG_PUT:
case LNET_MSG_REPLY:
- if (payload_length > (for_me ? LNET_MAX_PAYLOAD : LNET_MTU)) {
+ if (payload_length > (__u32)(for_me ? LNET_MAX_PAYLOAD : LNET_MTU)) {
CERROR("%s, src %s: bad %s payload %d "
"(%d max expected)\n",
libcfs_nid2str(from_nid),
LASSERT (for_me);
#else
if (!for_me) {
- msg->msg_target.pid = le32_to_cpu(hdr->dest_pid);
+ msg->msg_target.pid = dest_pid;
msg->msg_target.nid = dest_nid;
msg->msg_routing = 1;
msg->msg_offset = 0;
goto free_drop;
}
}
-
lnet_commit_routedmsg(msg);
rc = lnet_post_routed_recv_locked(msg, 0);
LNET_UNLOCK();
msg->msg_hdr.src_nid = src_nid;
msg->msg_hdr.src_pid = le32_to_cpu(msg->msg_hdr.src_pid);
msg->msg_hdr.dest_nid = dest_nid;
- msg->msg_hdr.dest_pid = le32_to_cpu(msg->msg_hdr.dest_pid);
+ msg->msg_hdr.dest_pid = dest_pid;
msg->msg_hdr.payload_length = payload_length;
msg->msg_ev.sender = from_nid;
msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie =
md->md_lh.lh_cookie;
} else {
- msg->msg_hdr.msg.put.ack_wmd = LNET_WIRE_HANDLE_NONE;
+ msg->msg_hdr.msg.put.ack_wmd.wh_interface_cookie =
+ LNET_WIRE_HANDLE_COOKIE_NONE;
+ msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie =
+ LNET_WIRE_HANDLE_COOKIE_NONE;
}
msg->msg_ev.type = LNET_EVENT_SEND;