Whamcloud - gitweb
git://git.whamcloud.com
/
fs
/
lustre-release.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
| inline |
side by side
Severity : major
[fs/lustre-release.git]
/
lnet
/
ulnds
/
ptllnd
/
ptllnd_cb.c
diff --git
a/lnet/ulnds/ptllnd/ptllnd_cb.c
b/lnet/ulnds/ptllnd/ptllnd_cb.c
index
bc62e80
..
96b0345
100644
(file)
--- a/
lnet/ulnds/ptllnd/ptllnd_cb.c
+++ b/
lnet/ulnds/ptllnd/ptllnd_cb.c
@@
-38,6
+38,10
@@
ptllnd_destroy_peer(ptllnd_peer_t *peer)
{
lnet_ni_t *ni = peer->plp_ni;
ptllnd_ni_t *plni = ni->ni_data;
+ int nmsg = peer->plp_lazy_credits +
+ plni->plni_peer_credits;
+
+ ptllnd_size_buffers(ni, -nmsg);
LASSERT (peer->plp_closing);
LASSERT (plni->plni_npeers > 0);
@@
-111,7
+115,7
@@
ptllnd_find_peer(lnet_ni_t *ni, lnet_process_id_t id, int create)
/* New peer: check first for enough posted buffers */
plni->plni_npeers++;
- rc = ptllnd_
grow_buffers(ni
);
+ rc = ptllnd_
size_buffers(ni, plni->plni_peer_credits
);
if (rc != 0) {
plni->plni_npeers--;
return NULL;
@@
-121,19
+125,20
@@
ptllnd_find_peer(lnet_ni_t *ni, lnet_process_id_t id, int create)
if (plp == NULL) {
CERROR("Can't allocate new peer %s\n", libcfs_id2str(id));
plni->plni_npeers--;
+ ptllnd_size_buffers(ni, -plni->plni_peer_credits);
return NULL;
}
- CDEBUG(D_NET, "new peer=%p\n",plp);
-
plp->plp_ni = ni;
plp->plp_id = id;
plp->plp_ptlid.nid = LNET_NIDADDR(id.nid);
plp->plp_ptlid.pid = plni->plni_ptllnd_pid;
- plp->plp_max_credits =
plp->plp_credits = 1; /* add more later when she gives me credits */
plp->plp_max_msg_size = plni->plni_max_msg_size; /* until I hear from her */
+ plp->plp_sent_credits = 1; /* Implicit credit for HELLO */
plp->plp_outstanding_credits = plni->plni_peer_credits - 1;
+ plp->plp_lazy_credits = 0;
+ plp->plp_extra_lazy_credits = 0;
plp->plp_match = 0;
plp->plp_stamp = 0;
plp->plp_recvd_hello = 0;
@@
-157,9
+162,12
@@
ptllnd_find_peer(lnet_ni_t *ni, lnet_process_id_t id, int create)
tx->tx_msg.ptlm_u.hello.kptlhm_matchbits = PTL_RESERVED_MATCHBITS;
tx->tx_msg.ptlm_u.hello.kptlhm_max_msg_size = plni->plni_max_msg_size;
- PTLLND_HISTORY("%s[%d/%d]: post hello %p", libcfs_id2str(id),
+ PTLLND_HISTORY("%s[%d/%d
+%d(%d)
]: post hello %p", libcfs_id2str(id),
tx->tx_peer->plp_credits,
- tx->tx_peer->plp_outstanding_credits, tx);
+ tx->tx_peer->plp_outstanding_credits,
+ tx->tx_peer->plp_sent_credits,
+ plni->plni_peer_credits +
+ tx->tx_peer->plp_lazy_credits, tx);
ptllnd_post_tx(tx);
return plp;
@@
-233,7
+241,7
@@
ptllnd_debug_peer(lnet_ni_t *ni, lnet_process_id_t id)
return;
}
- CDEBUG(D_WARNING, "%s %s%s [%d] "LPD64".%06d m "LPD64" q %d/%d c %d/%d(%d)\n",
+ CDEBUG(D_WARNING, "%s %s%s [%d] "LPD64".%06d m "LPD64" q %d/%d c %d/%d
+%d
(%d)\n",
libcfs_id2str(id),
plp->plp_recvd_hello ? "H" : "_",
plp->plp_closing ? "C" : "_",
@@
-242,7
+250,8
@@
ptllnd_debug_peer(lnet_ni_t *ni, lnet_process_id_t id)
plp->plp_match,
ptllnd_count_q(&plp->plp_txq),
ptllnd_count_q(&plp->plp_activeq),
- plp->plp_credits, plp->plp_outstanding_credits, plp->plp_max_credits);
+ plp->plp_credits, plp->plp_outstanding_credits, plp->plp_sent_credits,
+ plni->plni_peer_credits + plp->plp_lazy_credits);
CDEBUG(D_WARNING, "txq:\n");
list_for_each (tmp, &plp->plp_txq) {
@@
-287,7
+296,7
@@
ptllnd_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive)
ptllnd_peer_t *peer;
time_t start = cfs_time_current_sec();
int w = PTLLND_WARN_LONG_WAIT;
-
+
/* This is only actually used to connect to routers at startup! */
if (!alive) {
LBUG();
@@
-315,6
+324,46
@@
ptllnd_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive)
ptllnd_peer_decref(peer);
}
+int
+ptllnd_setasync(lnet_ni_t *ni, lnet_process_id_t id, int nasync)
+{
+ ptllnd_peer_t *peer = ptllnd_find_peer(ni, id, nasync > 0);
+ int rc;
+
+ if (peer == NULL)
+ return -ENOMEM;
+
+ LASSERT (peer->plp_lazy_credits >= 0);
+ LASSERT (peer->plp_extra_lazy_credits >= 0);
+
+ /* If nasync < 0, we're being told we can reduce the total message
+ * headroom. We can't do this right now because our peer might already
+ * have credits for the extra buffers, so we just account the extra
+ * headroom in case we need it later and only destroy buffers when the
+ * peer closes.
+ *
+ * Note that the following condition handles this case, where it
+ * actually increases the extra lazy credit counter. */
+
+ if (nasync <= peer->plp_extra_lazy_credits) {
+ peer->plp_extra_lazy_credits -= nasync;
+ return 0;
+ }
+
+ LASSERT (nasync > 0);
+
+ nasync -= peer->plp_extra_lazy_credits;
+ peer->plp_extra_lazy_credits = 0;
+
+ rc = ptllnd_size_buffers(ni, nasync);
+ if (rc == 0) {
+ peer->plp_lazy_credits += nasync;
+ peer->plp_outstanding_credits += nasync;
+ }
+
+ return rc;
+}
+
__u32
ptllnd_cksum (void *ptr, int nob)
{
@@
-336,7
+385,7
@@
ptllnd_new_tx(ptllnd_peer_t *peer, int type, int payload_nob)
ptllnd_tx_t *tx;
int msgsize;
- CDEBUG(D_NET, "peer=%p type=%d payload=%d\n",
peer,type,
payload_nob);
+ CDEBUG(D_NET, "peer=%p type=%d payload=%d\n",
peer, type,
payload_nob);
switch (type) {
default:
@@
-375,8
+424,6
@@
ptllnd_new_tx(ptllnd_peer_t *peer, int type, int payload_nob)
msgsize = (msgsize + 7) & ~7;
LASSERT (msgsize <= peer->plp_max_msg_size);
- CDEBUG(D_NET, "msgsize=%d\n",msgsize);
-
LIBCFS_ALLOC(tx, offsetof(ptllnd_tx_t, tx_msg) + msgsize);
if (tx == NULL) {
@@
-534,11
+581,6
@@
ptllnd_set_txiov(ptllnd_tx_t *tx,
return 0;
}
- CDEBUG(D_NET, "niov =%d\n",niov);
- CDEBUG(D_NET, "offset=%d\n",offset);
- CDEBUG(D_NET, "len =%d\n",len);
-
-
/*
* Remove iovec's at the beginning that
* are skipped because of the offset.
@@
-553,10
+595,6
@@
ptllnd_set_txiov(ptllnd_tx_t *tx,
iov++;
}
- CDEBUG(D_NET, "niov =%d (after)\n",niov);
- CDEBUG(D_NET, "offset=%d (after)\n",offset);
- CDEBUG(D_NET, "len =%d (after)\n",len);
-
for (;;) {
int temp_offset = offset;
int resid = len;
@@
-565,11
+603,6
@@
ptllnd_set_txiov(ptllnd_tx_t *tx,
return -ENOMEM;
for (npiov = 0;; npiov++) {
- CDEBUG(D_NET, "npiov=%d\n",npiov);
- CDEBUG(D_NET, "offset=%d\n",temp_offset);
- CDEBUG(D_NET, "len=%d\n",resid);
- CDEBUG(D_NET, "iov[npiov].iov_len=%lu\n",iov[npiov].iov_len);
-
LASSERT (npiov < niov);
LASSERT (iov->iov_len >= temp_offset);
@@
-588,8
+621,6
@@
ptllnd_set_txiov(ptllnd_tx_t *tx,
if (npiov == niov) {
tx->tx_niov = niov;
tx->tx_iov = piov;
- CDEBUG(D_NET, "tx->tx_iov=%p\n",tx->tx_iov);
- CDEBUG(D_NET, "tx->tx_niov=%d\n",tx->tx_niov);
return 0;
}
@@
-681,7
+712,10
@@
ptllnd_check_sends(ptllnd_peer_t *peer)
ptl_handle_md_t mdh;
int rc;
- CDEBUG(D_NET, "plp_outstanding_credits=%d\n",peer->plp_outstanding_credits);
+ CDEBUG(D_NET, "%s: [%d/%d+%d(%d)\n",
+ libcfs_id2str(peer->plp_id), peer->plp_credits,
+ peer->plp_outstanding_credits, peer->plp_sent_credits,
+ plni->plni_peer_credits + peer->plp_lazy_credits);
if (list_empty(&peer->plp_txq) &&
peer->plp_outstanding_credits >= PTLLND_CREDIT_HIGHWATER(plni) &&
@@
-700,32
+734,34
@@
ptllnd_check_sends(ptllnd_peer_t *peer)
while (!list_empty(&peer->plp_txq)) {
tx = list_entry(peer->plp_txq.next, ptllnd_tx_t, tx_list);
- CDEBUG(D_NET, "Looking at TX=%p\n",tx);
- CDEBUG(D_NET, "plp_credits=%d\n",peer->plp_credits);
- CDEBUG(D_NET, "plp_outstanding_credits=%d\n",peer->plp_outstanding_credits);
-
LASSERT (tx->tx_msgsize > 0);
LASSERT (peer->plp_outstanding_credits >= 0);
- LASSERT (peer->plp_outstanding_credits <=
- plni->plni_peer_credits);
+ LASSERT (peer->plp_sent_credits >= 0);
+ LASSERT (peer->plp_outstanding_credits + peer->plp_sent_credits
+ <= plni->plni_peer_credits + peer->plp_lazy_credits);
LASSERT (peer->plp_credits >= 0);
- LASSERT (peer->plp_credits <= peer->plp_max_credits);
if (peer->plp_credits == 0) { /* no credits */
- PTLLND_HISTORY("%s[%d/%d]: no creds for %p",
+ PTLLND_HISTORY("%s[%d/%d
+%d(%d)
]: no creds for %p",
libcfs_id2str(peer->plp_id),
peer->plp_credits,
- peer->plp_outstanding_credits, tx);
+ peer->plp_outstanding_credits,
+ peer->plp_sent_credits,
+ plni->plni_peer_credits +
+ peer->plp_lazy_credits, tx);
break;
}
if (peer->plp_credits == 1 && /* last credit reserved for */
peer->plp_outstanding_credits == 0) { /* returning credits */
- PTLLND_HISTORY("%s[%d/%d]: too few creds for %p",
+ PTLLND_HISTORY("%s[%d/%d
+%d(%d)
]: too few creds for %p",
libcfs_id2str(peer->plp_id),
peer->plp_credits,
- peer->plp_outstanding_credits, tx);
+ peer->plp_outstanding_credits,
+ peer->plp_sent_credits,
+ plni->plni_peer_credits +
+ peer->plp_lazy_credits, tx);
break;
}
@@
-748,12
+784,11
@@
ptllnd_check_sends(ptllnd_peer_t *peer)
* until I receive the HELLO back */
tx->tx_msg.ptlm_dststamp = peer->plp_stamp;
- CDEBUG(D_NET, "Returning %d to peer\n",peer->plp_outstanding_credits);
-
/*
* Return all the credits we have
*/
tx->tx_msg.ptlm_credits = peer->plp_outstanding_credits;
+ peer->plp_sent_credits += peer->plp_outstanding_credits;
peer->plp_outstanding_credits = 0;
/*
@@
-782,11
+817,19
@@
ptllnd_check_sends(ptllnd_peer_t *peer)
break;
}
+ LASSERT (tx->tx_type != PTLLND_RDMA_WRITE &&
+ tx->tx_type != PTLLND_RDMA_READ);
+
tx->tx_reqmdh = mdh;
PTLLND_DBGT_STAMP(tx->tx_req_posted);
- PTLLND_HISTORY("%s[%d/%d]: %s %p c %d", libcfs_id2str(peer->plp_id),
- peer->plp_credits, peer->plp_outstanding_credits,
+ PTLLND_HISTORY("%s[%d/%d+%d(%d)]: %s %p c %d",
+ libcfs_id2str(peer->plp_id),
+ peer->plp_credits,
+ peer->plp_outstanding_credits,
+ peer->plp_sent_credits,
+ plni->plni_peer_credits +
+ peer->plp_lazy_credits,
ptllnd_msgtype2str(tx->tx_type), tx,
tx->tx_msg.ptlm_credits);
@@
-881,13
+924,6
@@
ptllnd_passive_rdma(ptllnd_peer_t *peer, int type, lnet_msg_t *msg,
goto failed;
}
- CDEBUG(D_NET, "md.start=%p\n",md.start);
- CDEBUG(D_NET, "md.length=%llu\n",md.length);
- CDEBUG(D_NET, "md.threshold=%d\n",md.threshold);
- CDEBUG(D_NET, "md.max_size=%d\n",md.max_size);
- CDEBUG(D_NET, "md.options=0x%x\n",md.options);
- CDEBUG(D_NET, "md.user_ptr=%p\n",md.user_ptr);
-
PTLLND_DBGT_STAMP(tx->tx_bulk_posted);
rc = PtlMDAttach(meh, md, LNET_UNLINK, &mdh);
@@
-922,9
+958,11
@@
ptllnd_passive_rdma(ptllnd_peer_t *peer, int type, lnet_msg_t *msg,
}
tx->tx_lnetmsg = msg;
- PTLLND_HISTORY("%s[%d/%d]: post passive %s p %d %p",
+ PTLLND_HISTORY("%s[%d/%d
+%d(%d)
]: post passive %s p %d %p",
libcfs_id2str(msg->msg_target),
peer->plp_credits, peer->plp_outstanding_credits,
+ peer->plp_sent_credits,
+ plni->plni_peer_credits + peer->plp_lazy_credits,
lnet_msgtyp2str(msg->msg_type),
(le32_to_cpu(msg->msg_type) == LNET_MSG_PUT) ?
le32_to_cpu(msg->msg_hdr.msg.put.ptl_index) :
@@
-1049,14
+1087,10
@@
ptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *msg)
LBUG();
case LNET_MSG_ACK:
- CDEBUG(D_NET, "LNET_MSG_ACK\n");
-
LASSERT (msg->msg_len == 0);
break; /* send IMMEDIATE */
case LNET_MSG_GET:
- CDEBUG(D_NET, "LNET_MSG_GET nob=%d\n",msg->msg_md->md_length);
-
if (msg->msg_target_is_router)
break; /* send IMMEDIATE */
@@
-1075,10
+1109,8
@@
ptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *msg)
case LNET_MSG_REPLY:
case LNET_MSG_PUT:
- CDEBUG(D_NET, "LNET_MSG_PUT nob=%d\n",msg->msg_len);
nob = msg->msg_len;
nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[nob]);
- CDEBUG(D_NET, "msg_size=%d max=%d\n",msg->msg_len,plp->plp_max_msg_size);
if (nob <= plp->plp_max_msg_size)
break; /* send IMMEDIATE */
@@
-1092,7
+1124,6
@@
ptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *msg)
/* send IMMEDIATE
* NB copy the payload so we don't have to do a fragmented send */
- CDEBUG(D_NET, "IMMEDIATE len=%d\n", msg->msg_len);
tx = ptllnd_new_tx(plp, PTLLND_MSG_TYPE_IMMEDIATE, msg->msg_len);
if (tx == NULL) {
CERROR("Can't allocate tx for lnet type %d to %s\n",
@@
-1108,9
+1139,11
@@
ptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *msg)
tx->tx_msg.ptlm_u.immediate.kptlim_hdr = msg->msg_hdr;
tx->tx_lnetmsg = msg;
- PTLLND_HISTORY("%s[%d/%d]: post immediate %s p %d %p",
+ PTLLND_HISTORY("%s[%d/%d
+%d(%d)
]: post immediate %s p %d %p",
libcfs_id2str(msg->msg_target),
plp->plp_credits, plp->plp_outstanding_credits,
+ plp->plp_sent_credits,
+ plni->plni_peer_credits + plp->plp_lazy_credits,
lnet_msgtyp2str(msg->msg_type),
(le32_to_cpu(msg->msg_type) == LNET_MSG_PUT) ?
le32_to_cpu(msg->msg_hdr.msg.put.ptl_index) :
@@
-1131,8
+1164,11
@@
ptllnd_rx_done(ptllnd_rx_t *rx)
plp->plp_outstanding_credits++;
- PTLLND_HISTORY("%s[%d/%d]: rx=%p done\n", libcfs_id2str(plp->plp_id),
- plp->plp_credits, plp->plp_outstanding_credits, rx);
+ PTLLND_HISTORY("%s[%d/%d+%d(%d)]: rx=%p done\n",
+ libcfs_id2str(plp->plp_id),
+ plp->plp_credits, plp->plp_outstanding_credits,
+ plp->plp_sent_credits,
+ plni->plni_peer_credits + plp->plp_lazy_credits, rx);
ptllnd_check_sends(rx->rx_peer);
@@
-1168,7
+1204,6
@@
ptllnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg,
case PTLLND_MSG_TYPE_IMMEDIATE:
nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[mlen]);
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_IMMEDIATE nob=%d\n",nob);
if (nob > rx->rx_nob) {
CERROR("Immediate message from %s too big: %d(%d)\n",
libcfs_id2str(rx->rx_peer->plp_id),
@@
-1184,14
+1219,12
@@
ptllnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg,
break;
case PTLLND_MSG_TYPE_PUT:
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_PUT offset=%d mlen=%d\n",offset,mlen);
rc = ptllnd_active_rdma(rx->rx_peer, PTLLND_RDMA_READ, msg,
rx->rx_msg->ptlm_u.rdma.kptlrm_matchbits,
niov, iov, offset, mlen);
break;
case PTLLND_MSG_TYPE_GET:
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_GET\n");
if (msg != NULL)
rc = ptllnd_active_rdma(rx->rx_peer, PTLLND_RDMA_WRITE, msg,
rx->rx_msg->ptlm_u.rdma.kptlrm_matchbits,
@@
-1213,6
+1246,9
@@
ptllnd_abort_on_nak(lnet_ni_t *ni)
{
ptllnd_ni_t *plni = ni->ni_data;
+ if (plni->plni_dump_on_nak)
+ ptllnd_dump_history();
+
if (plni->plni_abort_on_nak)
abort();
}
@@
-1324,13
+1360,12
@@
ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator,
}
PTLLND_HISTORY("RX %s: %s %d %p", libcfs_id2str(srcid),
- ptllnd_msgtype2str(msg->ptlm_type), msg->ptlm_credits, &rx);
+ ptllnd_msgtype2str(msg->ptlm_type),
+ msg->ptlm_credits, &rx);
switch (msg->ptlm_type) {
case PTLLND_MSG_TYPE_PUT:
case PTLLND_MSG_TYPE_GET:
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_%s\n",
- msg->ptlm_type==PTLLND_MSG_TYPE_PUT ? "PUT" : "GET");
if (nob < basenob + sizeof(kptl_rdma_msg_t)) {
CERROR("Short rdma request from %s(%s)\n",
libcfs_id2str(srcid),
@@
-1342,7
+1377,6
@@
ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator,
break;
case PTLLND_MSG_TYPE_IMMEDIATE:
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_IMMEDIATE\n");
if (nob < offsetof(kptl_msg_t,
ptlm_u.immediate.kptlim_payload)) {
CERROR("Short immediate from %s(%s)\n",
@@
-1353,9
+1387,6
@@
ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator,
break;
case PTLLND_MSG_TYPE_HELLO:
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_HELLO from %s(%s)\n",
- libcfs_id2str(srcid),
- ptllnd_ptlid2str(initiator));
if (nob < basenob + sizeof(kptl_hello_msg_t)) {
CERROR("Short hello from %s(%s)\n",
libcfs_id2str(srcid),
@@
-1369,9
+1400,6
@@
ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator,
break;
case PTLLND_MSG_TYPE_NOOP:
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_NOOP from %s(%s)\n",
- libcfs_id2str(srcid),
- ptllnd_ptlid2str(initiator));
break;
default:
@@
-1381,8
+1409,7
@@
ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator,
return;
}
- plp = ptllnd_find_peer(ni, srcid,
- msg->ptlm_type == PTLLND_MSG_TYPE_HELLO);
+ plp = ptllnd_find_peer(ni, srcid, 0);
if (plp == NULL) {
CERROR("Can't find peer %s\n", libcfs_id2str(srcid));
return;
@@
-1396,20
+1423,11
@@
ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator,
return;
}
- CDEBUG(D_NET, "maxsz %d match "LPX64" stamp "LPX64"\n",
- msg->ptlm_u.hello.kptlhm_max_msg_size,
- msg->ptlm_u.hello.kptlhm_matchbits,
- msg->ptlm_srcstamp);
-
- plp->plp_max_msg_size = MAX(plni->plni_max_msg_size,
- msg->ptlm_u.hello.kptlhm_max_msg_size);
+ plp->plp_max_msg_size = msg->ptlm_u.hello.kptlhm_max_msg_size;
plp->plp_match = msg->ptlm_u.hello.kptlhm_matchbits;
plp->plp_stamp = msg->ptlm_srcstamp;
- plp->plp_max_credits += msg->ptlm_credits;
plp->plp_recvd_hello = 1;
- CDEBUG(D_NET, "plp_max_msg_size=%d\n",plp->plp_max_msg_size);
-
} else if (!plp->plp_recvd_hello) {
CERROR("Bad message type %d (HELLO expected) from %s\n",
@@
-1426,18
+1444,21
@@
ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator,
return;
}
+ /* Check peer only sends when I've sent her credits */
+ if (plp->plp_sent_credits == 0) {
+ CERROR("%s[%d/%d+%d(%d)]: unexpected message\n",
+ libcfs_id2str(plp->plp_id),
+ plp->plp_credits, plp->plp_outstanding_credits,
+ plp->plp_sent_credits,
+ plni->plni_peer_credits + plp->plp_lazy_credits);
+ return;
+ }
+ plp->plp_sent_credits--;
+
+ /* No check for credit overflow - the peer may post new buffers after
+ * the startup handshake. */
if (msg->ptlm_credits > 0) {
- CDEBUG(D_NET, "Getting back %d credits from peer\n",msg->ptlm_credits);
- if (plp->plp_credits + msg->ptlm_credits >
- plp->plp_max_credits) {
- CWARN("Too many credits from %s: %d + %d > %d\n",
- libcfs_id2str(srcid),
- plp->plp_credits, msg->ptlm_credits,
- plp->plp_max_credits);
- plp->plp_credits = plp->plp_max_credits;
- } else {
- plp->plp_credits += msg->ptlm_credits;
- }
+ plp->plp_credits += msg->ptlm_credits;
ptllnd_check_sends(plp);
}
@@
-1448,8
+1469,6
@@
ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator,
rx.rx_nob = nob;
plni->plni_nrxs++;
- CDEBUG(D_NET, "rx=%p type=%d\n",&rx,msg->ptlm_type);
-
switch (msg->ptlm_type) {
default: /* message types have been checked already */
ptllnd_rx_done(&rx);
@@
-1457,20
+1476,15
@@
ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator,
case PTLLND_MSG_TYPE_PUT:
case PTLLND_MSG_TYPE_GET:
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_%s\n",
- msg->ptlm_type==PTLLND_MSG_TYPE_PUT ? "PUT" : "GET");
rc = lnet_parse(ni, &msg->ptlm_u.rdma.kptlrm_hdr,
msg->ptlm_srcnid, &rx, 1);
- CDEBUG(D_NET, "lnet_parse rc=%d\n",rc);
if (rc < 0)
ptllnd_rx_done(&rx);
break;
case PTLLND_MSG_TYPE_IMMEDIATE:
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_IMMEDIATE\n");
rc = lnet_parse(ni, &msg->ptlm_u.immediate.kptlim_hdr,
msg->ptlm_srcnid, &rx, 0);
- CDEBUG(D_NET, "lnet_parse rc=%d\n",rc);
if (rc < 0)
ptllnd_rx_done(&rx);
break;
@@
-1492,12
+1506,12
@@
ptllnd_buf_event (lnet_ni_t *ni, ptl_event_t *event)
LASSERT (event->type == PTL_EVENT_PUT_END ||
event->type == PTL_EVENT_UNLINK);
- CDEBUG(D_NET, "buf=%p event=%d\n",buf,event->type);
-
if (event->ni_fail_type != PTL_NI_OK) {
- CERROR("event type %d, status %d from %s\n",
- event->type, event->ni_fail_type,
+ CERROR("event type %s(%d), status %s(%d) from %s\n",
+ ptllnd_evtype2str(event->type), event->type,
+ ptllnd_errtype2str(event->ni_fail_type),
+ event->ni_fail_type,
ptllnd_ptlid2str(event->initiator));
} else if (event->type == PTL_EVENT_PUT_END) {
@@
-1528,8
+1542,6
@@
ptllnd_buf_event (lnet_ni_t *ni, ptl_event_t *event)
repost = (event->type == PTL_EVENT_UNLINK);
#endif
- CDEBUG(D_NET, "repost=%d unlinked=%d\n",repost,unlinked);
-
if (unlinked) {
LASSERT(buf->plb_posted);
buf->plb_posted = 0;
@@
-1555,19
+1567,16
@@
ptllnd_tx_event (lnet_ni_t *ni, ptl_event_t *event)
#endif
if (error)
- CERROR("Error event type %d for %s for %s\n",
- event->type, ptllnd_msgtype2str(tx->tx_type),
+ CERROR("Error %s(%d) event %s(%d) unlinked %d, %s(%d) for %s\n",
+ ptllnd_errtype2str(event->ni_fail_type),
+ event->ni_fail_type,
+ ptllnd_evtype2str(event->type), event->type,
+ unlinked, ptllnd_msgtype2str(tx->tx_type), tx->tx_type,
libcfs_id2str(tx->tx_peer->plp_id));
LASSERT (!PtlHandleIsEqual(event->md_handle, PTL_INVALID_HANDLE));
- CDEBUG(D_NET, "tx=%p type=%s (%d)\n",tx,
- ptllnd_msgtype2str(tx->tx_type),tx->tx_type);
- CDEBUG(D_NET, "unlinked=%d\n",unlinked);
- CDEBUG(D_NET, "error=%d\n",error);
-
isreq = PtlHandleIsEqual(event->md_handle, tx->tx_reqmdh);
- CDEBUG(D_NET, "isreq=%d\n",isreq);
if (isreq) {
LASSERT (event->md.start == (void *)&tx->tx_msg);
if (unlinked) {
@@
-1577,7
+1586,6
@@
ptllnd_tx_event (lnet_ni_t *ni, ptl_event_t *event)
}
isbulk = PtlHandleIsEqual(event->md_handle, tx->tx_bulkmdh);
- CDEBUG(D_NET, "isbulk=%d\n",isbulk);
if ( isbulk && unlinked ) {
tx->tx_bulkmdh = PTL_INVALID_HANDLE;
PTLLND_DBGT_STAMP(tx->tx_bulk_done);
@@
-1585,10
+1593,12
@@
ptllnd_tx_event (lnet_ni_t *ni, ptl_event_t *event)
LASSERT (!isreq != !isbulk); /* always one and only 1 match */
- PTLLND_HISTORY("%s[%d/%d]: TX done %p %s%s",
+ PTLLND_HISTORY("%s[%d/%d
+%d(%d)
]: TX done %p %s%s",
libcfs_id2str(tx->tx_peer->plp_id),
tx->tx_peer->plp_credits,
tx->tx_peer->plp_outstanding_credits,
+ tx->tx_peer->plp_sent_credits,
+ plni->plni_peer_credits + tx->tx_peer->plp_lazy_credits,
tx, isreq ? "REQ" : "BULK", unlinked ? "(unlinked)" : "");
LASSERT (!isreq != !isbulk); /* always one and only 1 match */
@@
-1650,7
+1660,6
@@
ptllnd_tx_event (lnet_ni_t *ni, ptl_event_t *event)
tx->tx_status = -EIO;
list_del(&tx->tx_list);
list_add_tail(&tx->tx_list, &plni->plni_zombie_txs);
- CDEBUG(D_NET, "tx=%p ONTO ZOMBIE LIST\n",tx);
}
}
@@
-1683,8
+1692,6
@@
ptllnd_wait (lnet_ni_t *ni, int milliseconds)
for (;;) {
time_t then = cfs_time_current_sec();
- CDEBUG(D_NET, "Poll(%d)\n", timeout);
-
rc = PtlEQPoll(&plni->plni_eqh, 1,
(timeout < 0) ? PTL_TIME_FOREVER : timeout,
&event, &which);
@@
-1696,7
+1703,6
@@
ptllnd_wait (lnet_ni_t *ni, int milliseconds)
(int)(cfs_time_current_sec() - then));
}
- CDEBUG(D_NET, "PtlEQPoll rc=%d\n",rc);
timeout = 0;
if (rc == PTL_EQ_EMPTY) {
@@
-1717,9
+1723,6
@@
ptllnd_wait (lnet_ni_t *ni, int milliseconds)
CERROR("Event queue: size %d is too small\n",
plni->plni_eq_size);
- CDEBUG(D_NET, "event.type=%s(%d)\n",
- ptllnd_evtype2str(event.type),event.type);
-
found = 1;
switch (ptllnd_eventarg2type(event.md.user_ptr)) {
default:
@@
-1738,7
+1741,6
@@
ptllnd_wait (lnet_ni_t *ni, int milliseconds)
while (!list_empty(&plni->plni_zombie_txs)) {
tx = list_entry(plni->plni_zombie_txs.next,
ptllnd_tx_t, tx_list);
- CDEBUG(D_NET, "Process ZOMBIE tx=%p\n",tx);
ptllnd_tx_done(tx);
}