SOCKNAL = 2,
GMNAL = 3,
PTLLND = 4,
- TCPNAL = 5,
+ /* unused 5 */
/* unused 6 */
OPENIBNAL = 7,
IIBNAL = 8,
* lnet_finalize() */
int (*nal_send) (struct ptl_ni *ni, void *private, ptl_msg_t *msg,
ptl_hdr_t *hdr, int type, lnet_process_id_t target,
- int routing, unsigned int niov,
+ int target_is_router, int routing, unsigned int niov,
struct iovec *iov, lnet_kiov_t *kiov,
unsigned int offset, unsigned int mlen);
unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
unsigned int offset, unsigned int mlen, unsigned int rlen);
int gmnal_send(ptl_ni_t *ni, void *private, ptl_msg_t *ptlmsg,
- ptl_hdr_t *hdr, int type,
- lnet_process_id_t tgt, int routing,
+ ptl_hdr_t *hdr, int type, lnet_process_id_t tgt,
+ int target_is_router, int routing,
unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
unsigned int offset, unsigned int len);
int
gmnal_send(ptl_ni_t *ni, void *private, ptl_msg_t *ptlmsg,
- ptl_hdr_t *hdr, int type, lnet_process_id_t pid, int routing,
+ ptl_hdr_t *hdr, int type, lnet_process_id_t target,
+ int target_is_router, int routing,
unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
unsigned int offset, unsigned int len)
{
/* I may not block for a tx if I'm responding to an incoming message */
tx = gmnal_get_tx(gmni,
- !(type == PTL_MSG_ACK || type == PTL_MSG_REPLY));
+ !(routing ||
+ type == PTL_MSG_ACK ||
+ type == PTL_MSG_REPLY));
if (tx == NULL) {
if (!gmni->gmni_shutdown)
CERROR ("Can't get tx for msg type %d for %s\n",
- type, libcfs_nid2str(pid.nid));
+ type, libcfs_nid2str(target.nid));
return -EIO;
}
- tx->tx_nid = pid.nid;
+ tx->tx_nid = target.nid;
- gmrc = gm_global_id_to_node_id(gmni->gmni_port, PTL_NIDADDR(pid.nid),
+ gmrc = gm_global_id_to_node_id(gmni->gmni_port, PTL_NIDADDR(target.nid),
&tx->tx_gmlid);
if (gmrc != GM_SUCCESS) {
CERROR("Can't map Nid %s to a GM local ID: %d\n",
- libcfs_nid2str(pid.nid), gmrc);
+ libcfs_nid2str(target.nid), gmrc);
/* NB tx_ptlmsg not set => doesn't finalize */
gmnal_tx_done(tx, -EIO);
return -EIO;
}
gmnal_pack_msg(gmni, GMNAL_NETBUF_MSG(&tx->tx_buf),
- pid.nid, GMNAL_MSG_IMMEDIATE);
+ target.nid, GMNAL_MSG_IMMEDIATE);
GMNAL_NETBUF_MSG(&tx->tx_buf)->gmm_u.immediate.gmim_hdr = *hdr;
tx->tx_msgnob = offsetof(gmnal_msg_t, gmm_u.immediate.gmim_payload[0]);
extern int kibnal_ctl(ptl_ni_t *ni, unsigned int cmd, void *arg);
int kibnal_send (ptl_ni_t *ni, void *private,
ptl_msg_t *ptlmsg, ptl_hdr_t *hdr,
- int type, lnet_process_id_t tgt, int routing,
+ int type, lnet_process_id_t tgt,
+ int tgt_is_router, int routing,
unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
unsigned int offset, unsigned int nob);
extern int kibnal_recv (ptl_ni_t *ni, void *private, ptl_msg_t *msg,
}
static int
-kibnal_start_passive_rdma (int type, lnet_nid_t nid,
+kibnal_start_passive_rdma (int type, int may_block, lnet_nid_t nid,
ptl_msg_t *ptlmsg, ptl_hdr_t *hdr)
{
int nob = ptlmsg->msg_md->md_length;
access.s.RdmaRead = 1;
access.s.RdmaWrite = 1;
- tx = kibnal_get_idle_tx (1); /* May block; caller is an app thread */
- LASSERT (tx != NULL);
+ tx = kibnal_get_idle_tx (may_block);
+ if (tx == NULL) {
+ CERROR("Can't allocate %s txd for %s\n",
+ (type == IBNAL_MSG_PUT_RDMA) ? "PUT/REPLY" : "GET",
+ libcfs_nid2str(nid));
+ return -ENOMEM;
+ }
if ((ptlmsg->msg_md->md_options & LNET_MD_KIOV) == 0)
rc = kibnal_map_iov (tx, access,
ptl_hdr_t *hdr,
int type,
lnet_process_id_t target,
+ int target_is_router,
int routing,
unsigned int payload_niov,
struct iovec *payload_iov,
/* payload is either all vaddrs or all pages */
LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
- if (routing) {
- CERROR ("Can't route\n");
- return -EIO;
- }
-
switch (type) {
default:
LBUG();
return (-EIO);
+ case PTL_MSG_ACK:
+ LASSERT (payload_nob == 0);
+ break;
+
+ case PTL_MSG_GET:
+ if (routing || target_is_router)
+ break; /* send IMMEDIATE */
+
+ /* is the REPLY message too small for RDMA? */
+ nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[ptlmsg->msg_md->md_length]);
+ if (nob <= IBNAL_MSG_SIZE)
+ break; /* send IMMEDIATE */
+
+ return kibnal_start_passive_rdma(IBNAL_MSG_GET_RDMA, 1,
+ target.nid, ptlmsg, hdr);
+
case PTL_MSG_REPLY: {
/* reply's 'private' is the incoming receive */
kib_rx_t *rx = private;
+ LASSERT (routing || rx != NULL);
+
/* RDMA reply expected? */
- if (rx->rx_msg->ibm_type == IBNAL_MSG_GET_RDMA) {
+ if (!routing && rx->rx_msg->ibm_type != IBNAL_MSG_IMMEDIATE) {
+ /* Incoming message consistent with RDMA */
+ if (rx->rx_msg->ibm_type != IBNAL_MSG_GET_RDMA) {
+ CERROR ("REPLY to %s bad ibm type %d!!!\n",
+ libcfs_nid2str(target.nid),
+ rx->rx_msg->ibm_type);
+ return (-EIO);
+ }
+
kibnal_start_active_rdma(IBNAL_MSG_GET_DONE, 0,
rx, ptlmsg, payload_niov,
payload_iov, payload_kiov,
payload_offset, payload_nob);
return (0);
}
-
- /* Incoming message consistent with immediate reply? */
- if (rx->rx_msg->ibm_type != IBNAL_MSG_IMMEDIATE) {
- CERROR ("REPLY to %s bad ibm type %d!!!\n",
- libcfs_nid2str(target.nid),
- rx->rx_msg->ibm_type);
- return (-EIO);
- }
-
- /* Will it fit in a message? */
- nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]);
- if (nob >= IBNAL_MSG_SIZE) {
- CERROR("REPLY for %s too big (RDMA not requested): %d\n",
- libcfs_nid2str(target.nid), payload_nob);
- return (-EIO);
- }
- break;
+ /* Fall through to handle like PUT */
}
- case PTL_MSG_GET:
- /* might the REPLY message be big enough to need RDMA? */
- nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[ptlmsg->msg_md->md_length]);
- if (nob > IBNAL_MSG_SIZE)
- return (kibnal_start_passive_rdma(IBNAL_MSG_GET_RDMA,
- target.nid, ptlmsg, hdr));
- break;
-
- case PTL_MSG_ACK:
- LASSERT (payload_nob == 0);
- break;
-
case PTL_MSG_PUT:
- /* Is the payload big enough to need RDMA? */
+ /* Is the payload small enough not to need RDMA? */
nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]);
- if (nob > IBNAL_MSG_SIZE)
- return (kibnal_start_passive_rdma(IBNAL_MSG_PUT_RDMA,
- target.nid, ptlmsg, hdr));
+ if (nob <= IBNAL_MSG_SIZE)
+ break; /* send IMMEDIATE */
- break;
+ return kibnal_start_passive_rdma(IBNAL_MSG_PUT_RDMA,
+ !(routing || type == PTL_MSG_REPLY),
+ target.nid, ptlmsg, hdr);
}
- tx = kibnal_get_idle_tx(!(type == PTL_MSG_ACK ||
+ /* send IMMEDIATE */
+
+ tx = kibnal_get_idle_tx(!(routing ||
+ type == PTL_MSG_ACK ||
type == PTL_MSG_REPLY ||
in_interrupt()));
if (tx == NULL) {
int kibnal_ctl(ptl_ni_t *ni, unsigned int cmd, void *arg);
int kibnal_send (ptl_ni_t *ni, void *private,
ptl_msg_t *ptlmsg, ptl_hdr_t *hdr,
- int type, lnet_process_id_t tgt, int routing,
+ int type, lnet_process_id_t tgt,
+ int tgt_is_router, int routing,
unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
unsigned int offset, unsigned int nob);
int kibnal_recv(ptl_ni_t *ni, void *private, ptl_msg_t *ptlmsg,
}
int
-kibnal_start_passive_rdma (int type, lnet_nid_t nid,
- ptl_msg_t *ptlmsg, ptl_hdr_t *hdr)
+kibnal_start_passive_rdma (int type, lnet_nid_t nid, int may_block,
+ ptl_msg_t *ptlmsg, ptl_hdr_t *hdr)
{
int nob = ptlmsg->msg_md->md_length;
kib_tx_t *tx;
IB_ACCESS_LOCAL_WRITE;
}
- tx = kibnal_get_idle_tx (1); /* May block; caller is an app thread */
- LASSERT (tx != NULL);
+ tx = kibnal_get_idle_tx (may_block);
+ if (tx == NULL) {
+ CERROR("Can't allocate %s txd for %s\n",
+ (type == IBNAL_MSG_PUT_RDMA) ? "PUT/REPLY" : "GET",
+ libcfs_nid2str(nid));
+ return -ENOMEM;
+ }
if ((ptlmsg->msg_md->md_options & LNET_MD_KIOV) == 0)
rc = kibnal_map_iov (tx, access,
ptl_hdr_t *hdr,
int type,
lnet_process_id_t target,
+ int target_is_router,
int routing,
unsigned int payload_niov,
struct iovec *payload_iov,
/* payload is either all vaddrs or all pages */
LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
- if (routing) {
- CERROR ("Can't route\n");
- return -EIO;
- }
-
switch (type) {
default:
LBUG();
return (-EIO);
+ case PTL_MSG_ACK:
+ LASSERT (payload_nob == 0);
+ break;
+
+ case PTL_MSG_GET:
+ if (routing || target_is_router)
+ break; /* send IMMEDIATE */
+
+ /* is the REPLY message too small for RDMA? */
+ nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[ptlmsg->msg_md->md_length]);
+ if (nob <= IBNAL_MSG_SIZE)
+ break; /* send IMMEDIATE */
+
+ return kibnal_start_passive_rdma(IBNAL_MSG_GET_RDMA, 1,
+ target.nid, ptlmsg, hdr);
+
case PTL_MSG_REPLY: {
/* reply's 'private' is the incoming receive */
kib_rx_t *rx = private;
+ LASSERT (routing || rx != NULL);
+
/* RDMA reply expected? */
- if (rx->rx_msg->ibm_type == IBNAL_MSG_GET_RDMA) {
+ if (!routing && rx->rx_msg->ibm_type != IBNAL_MSG_IMMEDIATE) {
+ /* Incoming message consistent with RDMA? */
+ if (rx->rx_msg->ibm_type != IBNAL_MSG_GET_RDMA) {
+ CERROR ("REPLY to %s bad ibm type %d!!!\n",
+ libcfs_nid2str(target.nid),
+ rx->rx_msg->ibm_type);
+ return (-EIO);
+ }
+
kibnal_start_active_rdma(IBNAL_MSG_GET_DONE, 0,
rx, ptlmsg, payload_niov,
payload_iov, payload_kiov,
payload_offset, payload_nob);
return (0);
}
-
- /* Incoming message consistent with immediate reply? */
- if (rx->rx_msg->ibm_type != IBNAL_MSG_IMMEDIATE) {
- CERROR ("REPLY to %s bad opbm type %d!!!\n",
- libcfs_nid2str(target.nid),
- rx->rx_msg->ibm_type);
- return (-EIO);
- }
-
- /* Will it fit in a message? */
- nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]);
- if (nob > IBNAL_MSG_SIZE) {
- CERROR("REPLY for %s too big (RDMA not requested): %d\n",
- libcfs_nid2str(target.nid), payload_nob);
- return (-EIO);
- }
- break;
+ /* Fall through to handle like PUT */
}
- case PTL_MSG_GET:
- /* might the REPLY message be big enough to need RDMA? */
- nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[ptlmsg->msg_md->md_length]);
- if (nob > IBNAL_MSG_SIZE)
- return (kibnal_start_passive_rdma(IBNAL_MSG_GET_RDMA,
- target.nid, ptlmsg, hdr));
- break;
-
- case PTL_MSG_ACK:
- LASSERT (payload_nob == 0);
- break;
-
case PTL_MSG_PUT:
- /* Is the payload big enough to need RDMA? */
+ /* Is the payload small enough not to need RDMA? */
nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]);
- if (nob > IBNAL_MSG_SIZE)
- return (kibnal_start_passive_rdma(IBNAL_MSG_PUT_RDMA,
- target.nid, ptlmsg, hdr));
+ if (nob <= IBNAL_MSG_SIZE)
+ break; /* send IMMEDIATE */
- break;
+ return kibnal_start_passive_rdma(IBNAL_MSG_PUT_RDMA,
+ !(routing || type == PTL_MSG_REPLY),
+ target.nid, ptlmsg, hdr);
}
- tx = kibnal_get_idle_tx(!(type == PTL_MSG_ACK ||
+ /* Send IMMEDIATE */
+
+ tx = kibnal_get_idle_tx(!(routing ||
+ type == PTL_MSG_ACK ||
type == PTL_MSG_REPLY ||
in_interrupt()));
if (tx == NULL) {
int kqswnal_ctl (ptl_ni_t *ni, unsigned int cmd, void *arg);
int kqswnal_send (ptl_ni_t *ni, void *private,
ptl_msg_t *ptlmsg, ptl_hdr_t *hdr,
- int type, lnet_process_id_t tgt, int routing,
+ int type, lnet_process_id_t tgt,
+ int tgt_is_router, int routing,
unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
unsigned int offset, unsigned int nob);
int kqswnal_recv(ptl_ni_t *ni, void *private, ptl_msg_t *ptlmsg,
ptl_hdr_t *hdr,
int type,
lnet_process_id_t target,
+ int target_is_router,
int routing,
unsigned int payload_niov,
struct iovec *payload_iov,
return (-EIO);
}
- if (type == PTL_MSG_REPLY && /* can I look in 'private' */
- ((kqswnal_rx_t *)private)->krx_rpc_reply_needed) { /* is it an RPC */
- /* Must be a REPLY for an optimized GET */
- rc = kqswnal_rdma ((kqswnal_rx_t *)private, ptlmsg, PTL_MSG_GET,
- payload_niov, payload_iov, payload_kiov,
- payload_offset, payload_nob);
- return ((rc == 0) ? 0 : -EIO);
+ if (type == PTL_MSG_REPLY) {
+ kqswnal_rx_t *rx = (kqswnal_rx_t *)private;
+
+ LASSERT (routing || rx != NULL);
+
+ if (!routing && rx->krx_rpc_reply_needed) { /* is it an RPC */
+ /* Must be a REPLY for an optimized GET */
+ rc = kqswnal_rdma (
+ rx, ptlmsg, PTL_MSG_GET,
+ payload_niov, payload_iov, payload_kiov,
+ payload_offset, payload_nob);
+ return ((rc == 0) ? 0 : -EIO);
+ }
}
-
if (kqswnal_nid2elanid (target.nid) < 0) {
CERROR("%s not in my cluster\n", libcfs_nid2str(target.nid));
return -EIO;
}
/* I may not block for a transmit descriptor if I might block the
- * receiver, or an interrupt handler. */
- ktx = kqswnal_get_idle_tx(NULL, !(type == PTL_MSG_ACK ||
+ * router, receiver, or an interrupt handler. */
+ ktx = kqswnal_get_idle_tx(NULL, !(routing ||
+ type == PTL_MSG_ACK ||
type == PTL_MSG_REPLY ||
in_interrupt()));
if (ktx == NULL) {
* portals header. */
ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
- if ((!routing && /* target.nid is final dest */
+ if ((!target_is_router && /* target.nid is final dest */
+ !routing && /* I'm the source */
type == PTL_MSG_GET && /* optimize GET? */
*kqswnal_tunables.kqn_optimized_gets != 0 &&
ptlmsg->msg_md->md_length >=
out:
CDEBUG(rc == 0 ? D_NET : D_ERROR, "%s %u bytes to %s%s: rc %d\n",
- rc == 0 ? "Sent" : "Failed to send",
+ routing ? (rc == 0 ? "Routed" : "Failed to route") :
+ (rc == 0 ? "Sent" : "Failed to send"),
payload_nob, libcfs_nid2str(target.nid),
- routing ? "(routing)" : "", rc);
+ target_is_router ? "(router)" : "", rc);
if (rc != 0) {
if (ktx->ktx_state == KTX_GETTING &&
int kranal_ctl(ptl_ni_t *ni, unsigned int cmd, void *arg);
int kranal_send (ptl_ni_t *ni, void *private,
ptl_msg_t *ptlmsg, ptl_hdr_t *hdr,
- int type, lnet_process_id_t tgt, int routing,
+ int type, lnet_process_id_t tgt,
+ int tgt_is_router, int routing,
unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
unsigned int offset, unsigned int nob);
int kranal_recv(ptl_ni_t *ni, void *private, ptl_msg_t *ptlmsg,
ptl_hdr_t *hdr,
int type,
lnet_process_id_t target,
+ int target_is_router,
int routing,
unsigned int niov,
struct iovec *iov,
default:
LBUG();
- case PTL_MSG_REPLY: {
- /* reply's 'private' is the conn that received the GET_REQ */
- conn = private;
- LASSERT (conn->rac_rxmsg != NULL);
-
- if (conn->rac_rxmsg->ram_type == RANAL_MSG_IMMEDIATE) {
- if (nob > RANAL_FMA_MAX_DATA) {
- CERROR("Can't REPLY IMMEDIATE %d to %s\n",
- nob, libcfs_nid2str(target.nid));
- return -EIO;
- }
- break; /* RDMA not expected */
- }
-
- /* Incoming message consistent with RDMA? */
- if (conn->rac_rxmsg->ram_type != RANAL_MSG_GET_REQ) {
- CERROR("REPLY to %s bad msg type %x!!!\n",
- libcfs_nid2str(target.nid),
- conn->rac_rxmsg->ram_type);
- return -EIO;
- }
-
- tx = kranal_get_idle_tx(0);
- if (tx == NULL)
- return -EIO;
-
- rc = kranal_setup_rdma_buffer(tx, niov, iov, kiov, offset, nob);
- if (rc != 0) {
- kranal_tx_done(tx, rc);
- return -EIO;
- }
-
- tx->tx_conn = conn;
- tx->tx_ptlmsg[0] = ptlmsg;
-
- rc = kranal_map_buffer(tx);
- if (rc != 0) {
- kranal_tx_done(tx, rc);
- return -EIO;
- }
-
- kranal_rdma(tx, RANAL_MSG_GET_DONE,
- &conn->rac_rxmsg->ram_u.get.ragm_desc, nob,
- conn->rac_rxmsg->ram_u.get.ragm_cookie);
-
- /* flag matched by consuming rx message */
- kranal_consume_rxmsg(conn, NULL, 0);
- return 0;
- }
+ case PTL_MSG_ACK:
+ LASSERT (nob == 0);
+ break;
case PTL_MSG_GET:
LASSERT (niov == 0);
* IMMEDIATE GET if the sink buffer is mapped already and small
* enough for FMA */
+ if (routing || target_is_router)
+ break; /* send IMMEDIATE */
+
if ((ptlmsg->msg_md->md_options & LNET_MD_KIOV) == 0 &&
ptlmsg->msg_md->md_length <= RANAL_FMA_MAX_DATA &&
ptlmsg->msg_md->md_length <= *kranal_tunables.kra_max_immediate)
- break;
+ break; /* send IMMEDIATE */
tx = kranal_new_tx_msg(!in_interrupt(), RANAL_MSG_GET_REQ);
if (tx == NULL)
kranal_launch_tx(tx, target.nid);
return 0;
- case PTL_MSG_ACK:
- LASSERT (nob == 0);
- break;
+ case PTL_MSG_REPLY:
+ /* reply's 'private' is the conn that received the GET_REQ */
+ conn = private;
+
+ LASSERT (routing || conn != NULL);
+
+ LASSERT (conn->rac_rxmsg != NULL);
+
+ if (!routing && conn->rac_rxmsg->ram_type != RANAL_MSG_IMMEDIATE) {
+ /* Incoming message consistent with RDMA? */
+ if (conn->rac_rxmsg->ram_type != RANAL_MSG_GET_REQ) {
+ CERROR("REPLY to %s bad msg type %x!!!\n",
+ libcfs_nid2str(target.nid),
+ conn->rac_rxmsg->ram_type);
+ return -EIO;
+ }
+
+ tx = kranal_get_idle_tx(0);
+ if (tx == NULL)
+ return -EIO;
+
+ rc = kranal_setup_rdma_buffer(tx, niov, iov, kiov,
+ offset, nob);
+ if (rc != 0) {
+ kranal_tx_done(tx, rc);
+ return -EIO;
+ }
+
+ tx->tx_conn = conn;
+ tx->tx_ptlmsg[0] = ptlmsg;
+
+ rc = kranal_map_buffer(tx);
+ if (rc != 0) {
+ kranal_tx_done(tx, rc);
+ return -EIO;
+ }
+
+ kranal_rdma(tx, RANAL_MSG_GET_DONE,
+ &conn->rac_rxmsg->ram_u.get.ragm_desc, nob,
+ conn->rac_rxmsg->ram_u.get.ragm_cookie);
+
+ /* flag matched by consuming rx message */
+ kranal_consume_rxmsg(conn, NULL, 0);
+ return 0;
+ }
+
+ /* Fall through and handle like PUT */
case PTL_MSG_PUT:
if (kiov == NULL && /* not paged */
nob <= *kranal_tunables.kra_max_immediate)
break; /* send IMMEDIATE */
- tx = kranal_new_tx_msg(!in_interrupt(), RANAL_MSG_PUT_REQ);
+ tx = kranal_new_tx_msg(!(routing ||
+ type == PTL_MSG_REPLY ||
+ in_interrupt()),
+ RANAL_MSG_PUT_REQ);
if (tx == NULL)
return -ENOMEM;
return 0;
}
+ /* send IMMEDIATE */
+
LASSERT (kiov == NULL);
LASSERT (nob <= RANAL_FMA_MAX_DATA);
- tx = kranal_new_tx_msg(!(type == PTL_MSG_ACK ||
+ tx = kranal_new_tx_msg(!(routing ||
+ type == PTL_MSG_ACK ||
type == PTL_MSG_REPLY ||
in_interrupt()),
RANAL_MSG_IMMEDIATE);
int ksocknal_ctl(ptl_ni_t *ni, unsigned int cmd, void *arg);
int ksocknal_send (ptl_ni_t *ni, void *private,
ptl_msg_t *ptlmsg, ptl_hdr_t *hdr,
- int type, lnet_process_id_t tgt, int routing,
+ int type, lnet_process_id_t tgt,
+ int tgt_is_router, int routing,
unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
unsigned int offset, unsigned int nob);
int ksocknal_recv(ptl_ni_t *ni, void *private, ptl_msg_t *ptlmsg,
ptl_hdr_t *hdr,
int type,
lnet_process_id_t target,
+ int target_is_router,
int routing,
unsigned int payload_niov,
struct iovec *payload_iov,
int kibnal_ctl(ptl_ni_t *ni, unsigned int cmd, void *arg);
int kibnal_send (ptl_ni_t *ni, void *private,
ptl_msg_t *ptlmsg, ptl_hdr_t *hdr,
- int type, lnet_process_id_t tgt, int routing,
+ int type, lnet_process_id_t tgt,
+ int tgt_is_router, int routing,
unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
unsigned int offset, unsigned int nob);
int kibnal_recv(ptl_ni_t *ni, void *private, ptl_msg_t *ptlmsg,
ptl_hdr_t *hdr,
int type,
lnet_process_id_t target,
+ int target_is_router,
int routing,
unsigned int payload_niov,
struct iovec *payload_iov,
/* payload is either all vaddrs or all pages */
LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
- if (routing) {
- CERROR ("Can't route\n");
- return -EIO;
- }
-
switch (type) {
default:
LBUG();
return (-EIO);
- case PTL_MSG_REPLY: {
- /* reply's 'private' is the incoming receive */
- kib_rx_t *rx = private;
-
- LASSERT(rx != NULL);
-
- if (rx->rx_msg->ibm_type == IBNAL_MSG_IMMEDIATE) {
- /* RDMA not expected */
- nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]);
- if (nob > IBNAL_MSG_SIZE) {
- CERROR("REPLY for %s too big (RDMA not requested):"
- "%d (max for message is %d)\n",
- libcfs_nid2str(target.nid), payload_nob,
- IBNAL_MSG_SIZE);
- CERROR("Can't REPLY IMMEDIATE %d to %s\n",
- nob, libcfs_nid2str(target.nid));
- return -EIO;
- }
- break;
- }
-
- /* Incoming message consistent with RDMA? */
- if (rx->rx_msg->ibm_type != IBNAL_MSG_GET_REQ) {
- CERROR("REPLY to %s bad msg type %x!!!\n",
- libcfs_nid2str(target.nid), rx->rx_msg->ibm_type);
- return -EIO;
- }
-
- /* NB rx_complete() will send GET_NAK when I return to it from
- * here, unless I set rx_responded! */
-
- tx = kibnal_get_idle_tx(0);
- if (tx == NULL) {
- CERROR("Can't get tx for REPLY to %s\n",
- libcfs_nid2str(target.nid));
- return -ENOMEM;
- }
-
- if (payload_nob == 0)
- rc = 0;
- else if (payload_kiov == NULL)
- rc = kibnal_setup_rd_iov(tx, tx->tx_rd, 0,
- payload_niov, payload_iov,
- payload_offset, payload_nob);
- else
- rc = kibnal_setup_rd_kiov(tx, tx->tx_rd, 0,
- payload_niov, payload_kiov,
- payload_offset, payload_nob);
- if (rc != 0) {
- CERROR("Can't setup GET src for %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- kibnal_tx_done(tx);
- return -EIO;
- }
-
- rc = kibnal_init_rdma(tx, IBNAL_MSG_GET_DONE, payload_nob,
- &rx->rx_msg->ibm_u.get.ibgm_rd,
- rx->rx_msg->ibm_u.get.ibgm_cookie);
- if (rc < 0) {
- CERROR("Can't setup rdma for GET from %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- } else if (rc == 0) {
- /* No RDMA: local completion may happen now! */
- lnet_finalize (kibnal_data.kib_ni, NULL, ptlmsg, 0);
- } else {
- /* RDMA: lnet_finalize(ptlmsg) when it completes */
- tx->tx_ptlmsg[0] = ptlmsg;
- }
-
- kibnal_queue_tx(tx, rx->rx_conn);
- rx->rx_responded = 1;
- return (rc >= 0) ? 0 : -EIO;
- }
+ case PTL_MSG_ACK:
+ LASSERT (payload_nob == 0);
+ break;
case PTL_MSG_GET:
- /* will the REPLY message be small enough not to need RDMA? */
+ if (routing || target_is_router)
+ break; /* send IMMEDIATE */
+
+ /* is the REPLY message too small for RDMA? */
nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[ptlmsg->msg_md->md_length]);
if (nob <= IBNAL_MSG_SIZE)
- break;
+ break; /* send IMMEDIATE */
tx = kibnal_get_idle_tx(1); /* may block; caller is an app thread */
LASSERT (tx != NULL);
#endif
kibnal_init_tx_msg(tx, IBNAL_MSG_GET_REQ, nob);
- tx->tx_ptlmsg[1] = lnet_create_reply_msg(kibnal_data.kib_ni, target.nid, ptlmsg);
+ tx->tx_ptlmsg[1] = lnet_create_reply_msg(kibnal_data.kib_ni,
+ target.nid, ptlmsg);
if (tx->tx_ptlmsg[1] == NULL) {
CERROR("Can't create reply for GET -> %s\n",
libcfs_nid2str(target.nid));
kibnal_launch_tx(tx, target.nid);
return 0;
- case PTL_MSG_ACK:
- LASSERT (payload_nob == 0);
- break;
+ case PTL_MSG_REPLY: {
+ /* reply's 'private' is the incoming receive */
+ kib_rx_t *rx = private;
+
+ LASSERT(routing || rx != NULL);
+
+ if (!routing && rx->rx_msg->ibm_type != IBNAL_MSG_IMMEDIATE) {
+ /* Incoming message consistent with RDMA? */
+ if (rx->rx_msg->ibm_type != IBNAL_MSG_GET_REQ) {
+ CERROR("REPLY to %s bad msg type %x!!!\n",
+ libcfs_nid2str(target.nid),
+ rx->rx_msg->ibm_type);
+ return -EIO;
+ }
+
+ /* NB handle_rx() will send GET_NAK when I return to
+ * it from here, unless I set rx_responded! */
+
+ tx = kibnal_get_idle_tx(0);
+ if (tx == NULL) {
+ CERROR("Can't get tx for REPLY to %s\n",
+ libcfs_nid2str(target.nid));
+ return -ENOMEM;
+ }
+
+ if (payload_nob == 0)
+ rc = 0;
+ else if (payload_kiov == NULL)
+ rc = kibnal_setup_rd_iov(
+ tx, tx->tx_rd, 0,
+ payload_niov, payload_iov,
+ payload_offset, payload_nob);
+ else
+ rc = kibnal_setup_rd_kiov(
+ tx, tx->tx_rd, 0,
+ payload_niov, payload_kiov,
+ payload_offset, payload_nob);
+ if (rc != 0) {
+ CERROR("Can't setup GET src for %s: %d\n",
+ libcfs_nid2str(target.nid), rc);
+ kibnal_tx_done(tx);
+ return -EIO;
+ }
+
+ rc = kibnal_init_rdma(tx, IBNAL_MSG_GET_DONE,
+ payload_nob,
+ &rx->rx_msg->ibm_u.get.ibgm_rd,
+ rx->rx_msg->ibm_u.get.ibgm_cookie);
+ if (rc < 0) {
+ CERROR("Can't setup rdma for GET from %s: %d\n",
+ libcfs_nid2str(target.nid), rc);
+ } else if (rc == 0) {
+ /* No RDMA: local completion may happen now! */
+ lnet_finalize (kibnal_data.kib_ni, NULL,
+ ptlmsg, 0);
+ } else {
+ /* RDMA: lnet_finalize(ptlmsg) when it
+ * completes */
+ tx->tx_ptlmsg[0] = ptlmsg;
+ }
+
+ kibnal_queue_tx(tx, rx->rx_conn);
+ rx->rx_responded = 1;
+ return (rc >= 0) ? 0 : -EIO;
+ }
+ /* fall through to handle like PUT */
+ }
case PTL_MSG_PUT:
/* Is the payload small enough not to need RDMA? */
nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]);
if (nob <= IBNAL_MSG_SIZE)
- break;
+ break; /* send IMMEDIATE */
- tx = kibnal_get_idle_tx(1); /* may block: caller is app thread */
- LASSERT (tx != NULL);
+ /* may block if caller is app thread */
+ tx = kibnal_get_idle_tx(!(routing || type == PTL_MSG_REPLY));
+ if (tx == NULL) {
+ CERROR("Can't allocate %s txd for %s\n",
+ type == PTL_MSG_PUT ? "PUT" : "REPLY",
+ libcfs_nid2str(target.nid));
+ return -ENOMEM;
+ }
if (payload_kiov == NULL)
rc = kibnal_setup_rd_iov(tx, tx->tx_rd, 0,
return 0;
}
+ /* send IMMEDIATE */
+
LASSERT (offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob])
<= IBNAL_MSG_SIZE);
- tx = kibnal_get_idle_tx(!(type == PTL_MSG_ACK ||
+ tx = kibnal_get_idle_tx(!(routing ||
+ type == PTL_MSG_ACK ||
type == PTL_MSG_REPLY));
if (tx == NULL) {
CERROR ("Can't send %d to %s: tx descs exhausted\n",
return (0);
case IBNAL_MSG_PUT_REQ:
- /* NB rx_complete() will send PUT_NAK when I return to it from
+ /* NB handle_rx() will send PUT_NAK when I return to it from
* here, unless I set rx_responded! */
if (mlen == 0) { /* No payload to RDMA */
int niov = 0;
struct iovec *iov = NULL;
lnet_kiov_t *kiov = NULL;
- int routing = 0;
+ int target_is_router = 0;
int rc;
/* CAVEAT EMPTOR! ni != NULL == interface pre-determined (ACK) */
/* it's not for me: will the gateway have to forward? */
if (gw_nid != target.nid &&
lnet_apini.apini_ptlcompat == 0) {
- routing = 1;
+ target_is_router = 1;
target.pid = LUSTRE_SRV_PTL_PID;
target.nid = gw_nid;
}
iov = md->md_iov.iov;
}
- rc = (ni->ni_nal->nal_send)(ni, private, msg, hdr, type, target, routing,
+ rc = (ni->ni_nal->nal_send)(ni, private, msg, hdr, type, target,
+ target_is_router, 0,
niov, iov, kiov, offset, len);
ptl_ni_decref(ni); /* lose ref from lnet_lookup */
ptl_hdr_t *hdr,
int type,
lnet_process_id_t target,
+ int target_is_router,
int routing,
unsigned int payload_niov,
struct iovec *payload_iov,