spin_lock_irqsave(&peer->peer_lock, flags);
peer->peer_outstanding_credits++;
- LASSERT (peer->peer_outstanding_credits <=
+ LASSERT (peer->peer_outstanding_credits +
+ peer->peer_sent_credits <=
*kptllnd_tunables.kptl_peercredits);
- CDEBUG(D_NETTRACE, "%s[%d/%d]: rx %p done\n",
- libcfs_id2str(peer->peer_id),
- peer->peer_credits, peer->peer_outstanding_credits, rx);
+ CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: rx %p done\n",
+ libcfs_id2str(peer->peer_id), peer->peer_credits,
+ peer->peer_outstanding_credits, peer->peer_sent_credits,
+ rx);
spin_unlock_irqrestore(&peer->peer_lock, flags);
unlinked = ev->type == PTL_EVENT_UNLINK;
#endif
- CDEBUG(D_NET, "RXB Callback %s(%d) rxb=%p id=%s unlink=%d rc %d\n",
- kptllnd_evtype2str(ev->type), ev->type, rxb,
+ CDEBUG(D_NET, "%s: %s(%d) rxb=%p fail=%s(%d) unlink=%d\n",
kptllnd_ptlid2str(ev->initiator),
- unlinked, ev->ni_fail_type);
+ kptllnd_evtype2str(ev->type), ev->type, rxb,
+ kptllnd_errtype2str(ev->ni_fail_type), ev->ni_fail_type,
+ unlinked);
LASSERT (!rxb->rxb_idle);
LASSERT (ev->md.start == rxb->rxb_buffer);
ev->match_bits == LNET_MSG_MATCHBITS);
if (ev->ni_fail_type != PTL_NI_OK)
- CERROR("event type %d, status %d from %s\n",
- ev->type, ev->ni_fail_type,
- kptllnd_ptlid2str(ev->initiator));
+ CERROR("Portals error from %s: %s(%d) rxb=%p fail=%s(%d) unlink=%dn",
+ kptllnd_ptlid2str(ev->initiator),
+ kptllnd_evtype2str(ev->type), ev->type, rxb,
+ kptllnd_errtype2str(ev->ni_fail_type),
+ ev->ni_fail_type, unlinked);
if (ev->type == PTL_EVENT_PUT_END &&
ev->ni_fail_type == PTL_NI_OK &&
}
rx->rx_initiator = ev->initiator;
+ rx->rx_treceived = jiffies;
#ifdef CRAY_XT3
rx->rx_uid = ev->uid;
#endif
kptl_msg_t *msg = rx->rx_msg;
kptl_peer_t *peer;
int rc;
- int credits;
unsigned long flags;
lnet_process_id_t srcid;
srcid.nid = msg->ptlm_srcnid;
srcid.pid = msg->ptlm_srcpid;
- CDEBUG(D_NETTRACE, "%s: RX %s c %d %p\n", libcfs_id2str(srcid),
- kptllnd_msgtype2str(msg->ptlm_type), msg->ptlm_credits, rx);
+ CDEBUG(D_NETTRACE, "%s: RX %s c %d %p rxb %p queued %lu ticks\n",
+ libcfs_id2str(srcid), kptllnd_msgtype2str(msg->ptlm_type),
+ msg->ptlm_credits, rx, rx->rx_rxb, jiffies - rx->rx_treceived);
if (srcid.nid != kptllnd_ptl2lnetnid(rx->rx_initiator.nid)) {
CERROR("Bad source id %s from %s\n",
if (msg->ptlm_type == PTLLND_MSG_TYPE_HELLO) {
peer = kptllnd_peer_handle_hello(rx->rx_initiator, msg);
- if (peer == NULL) {
- CWARN("No peer for %s\n",
- kptllnd_ptlid2str(rx->rx_initiator));
+ if (peer == NULL)
goto rx_done;
- }
} else {
peer = kptllnd_id2peer(srcid);
if (peer == NULL) {
goto rx_done;
}
- /* Ignore anything else while I'm waiting for HELLO */
- if (peer->peer_state == PEER_STATE_WAITING_HELLO) {
+ /* Ignore anything apart from HELLO while I'm waiting for it and
+ * any messages for a previous incarnation of the connection */
+ if (peer->peer_state == PEER_STATE_WAITING_HELLO ||
+ msg->ptlm_dststamp < peer->peer_myincarnation) {
kptllnd_peer_decref(peer);
goto rx_done;
}
+
+ if (msg->ptlm_srcstamp != peer->peer_incarnation) {
+ CERROR("%s: Unexpected srcstamp "LPX64" "
+ "("LPX64" expected)\n",
+ libcfs_id2str(peer->peer_id),
+ msg->ptlm_srcstamp,
+ peer->peer_incarnation);
+ rc = -EPROTO;
+ goto failed;
+ }
+
+ if (msg->ptlm_dststamp != peer->peer_myincarnation) {
+ CERROR("%s: Unexpected dststamp "LPX64" "
+ "("LPX64" expected)\n",
+ libcfs_id2str(peer->peer_id), msg->ptlm_dststamp,
+ peer->peer_myincarnation);
+ rc = -EPROTO;
+ goto failed;
+ }
}
LASSERT (msg->ptlm_srcnid == peer->peer_id.nid &&
msg->ptlm_srcpid == peer->peer_id.pid);
- if (msg->ptlm_srcstamp != peer->peer_incarnation) {
- CERROR("Stale rx from %s srcstamp "LPX64" expected "LPX64"\n",
- libcfs_id2str(peer->peer_id),
- msg->ptlm_srcstamp,
- peer->peer_incarnation);
- rc = -EPROTO;
- goto failed;
- }
+ spin_lock_irqsave(&peer->peer_lock, flags);
- if (msg->ptlm_dststamp != kptllnd_data.kptl_incarnation &&
- (msg->ptlm_type != PTLLND_MSG_TYPE_HELLO || /* HELLO sends a */
- msg->ptlm_dststamp != 0)) { /* zero dststamp */
- CERROR("Stale rx from %s dststamp "LPX64" expected "LPX64"\n",
- libcfs_id2str(peer->peer_id), msg->ptlm_dststamp,
- kptllnd_data.kptl_incarnation);
- rc = -EPROTO;
+ /* Check peer only sends when I've sent her credits */
+ if (peer->peer_sent_credits == 0) {
+ int c = peer->peer_credits;
+ int oc = peer->peer_outstanding_credits;
+ int sc = peer->peer_sent_credits;
+
+ spin_unlock_irqrestore(&peer->peer_lock, flags);
+
+ CERROR("%s: buffer overrun [%d/%d+%d]\n",
+ libcfs_id2str(peer->peer_id), c, sc, oc);
goto failed;
}
+ peer->peer_sent_credits--;
- if (msg->ptlm_credits != 0) {
- spin_lock_irqsave(&peer->peer_lock, flags);
+ /* No check for credit overflow - the peer may post new
+ * buffers after the startup handshake. */
+ peer->peer_credits += msg->ptlm_credits;
- if (peer->peer_credits + msg->ptlm_credits >
- *kptllnd_tunables.kptl_peercredits) {
- credits = peer->peer_credits;
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- CERROR("Credit overflow from %s: %d + %d > %d\n",
- libcfs_id2str(peer->peer_id),
- credits, msg->ptlm_credits,
- *kptllnd_tunables.kptl_peercredits);
- rc = -EPROTO;
- goto failed;
- }
-
- peer->peer_credits += msg->ptlm_credits;
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
+ spin_unlock_irqrestore(&peer->peer_lock, flags);
+ /* See if something can go out now that credits have come in */
+ if (msg->ptlm_credits != 0)
kptllnd_peer_check_sends(peer);
- }
/* ptllnd-level protocol correct - rx takes my ref on peer and increments
* peer_outstanding_credits when it completes */