void vibnal_assert_wire_constants (void)
{
/* Wire protocol assertions generated by 'wirecheck'
- * running on Linux robert.bartonsoftware.com 2.6.5-1.358 #1 Sat May 8 09:04:50 EDT 2004 i686
- * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */
+ * running on Linux robert 2.6.11-1.27_FC3 #1 Tue May 17 20:27:37 EDT 2005 i686 athlon i386 G
+ * with gcc version 3.4.3 20050227 (Red Hat 3.4.3-22.fc3) */
/* Constants... */
CLASSERT (IBNAL_MSG_MAGIC == 0x0be91b91);
- CLASSERT (IBNAL_MSG_VERSION == 6);
+ CLASSERT (IBNAL_MSG_VERSION == 0x10);
CLASSERT (IBNAL_MSG_CONNREQ == 0xc0);
CLASSERT (IBNAL_MSG_CONNACK == 0xc1);
CLASSERT (IBNAL_MSG_NOOP == 0xd0);
CLASSERT ((int)sizeof(((kib_immediate_msg_t *)0)->ibim_hdr) == 72);
CLASSERT ((int)offsetof(kib_immediate_msg_t, ibim_payload[13]) == 85);
CLASSERT ((int)sizeof(((kib_immediate_msg_t *)0)->ibim_payload[13]) == 1);
-
- /* Checks for struct kib_rdma_frag_t */
- CLASSERT ((int)sizeof(kib_rdma_frag_t) == 12);
- CLASSERT ((int)offsetof(kib_rdma_frag_t, rf_nob) == 0);
- CLASSERT ((int)sizeof(((kib_rdma_frag_t *)0)->rf_nob) == 4);
- CLASSERT ((int)offsetof(kib_rdma_frag_t, rf_addr_lo) == 4);
- CLASSERT ((int)sizeof(((kib_rdma_frag_t *)0)->rf_addr_lo) == 4);
- CLASSERT ((int)offsetof(kib_rdma_frag_t, rf_addr_hi) == 8);
- CLASSERT ((int)sizeof(((kib_rdma_frag_t *)0)->rf_addr_hi) == 4);
+ CLASSERT (IBNAL_USE_FMR == 1);
/* Checks for struct kib_rdma_desc_t */
- CLASSERT ((int)sizeof(kib_rdma_desc_t) == 8);
- CLASSERT ((int)offsetof(kib_rdma_desc_t, rd_key) == 0);
+ CLASSERT ((int)sizeof(kib_rdma_desc_t) == 16);
+ CLASSERT ((int)offsetof(kib_rdma_desc_t, rd_addr) == 0);
+ CLASSERT ((int)sizeof(((kib_rdma_desc_t *)0)->rd_addr) == 8);
+ CLASSERT ((int)offsetof(kib_rdma_desc_t, rd_nob) == 8);
+ CLASSERT ((int)sizeof(((kib_rdma_desc_t *)0)->rd_nob) == 4);
+ CLASSERT ((int)offsetof(kib_rdma_desc_t, rd_key) == 12);
CLASSERT ((int)sizeof(((kib_rdma_desc_t *)0)->rd_key) == 4);
- CLASSERT ((int)offsetof(kib_rdma_desc_t, rd_nfrag) == 4);
- CLASSERT ((int)sizeof(((kib_rdma_desc_t *)0)->rd_nfrag) == 4);
- CLASSERT ((int)offsetof(kib_rdma_desc_t, rd_frags[13]) == 164);
- CLASSERT ((int)sizeof(((kib_rdma_desc_t *)0)->rd_frags[13]) == 12);
/* Checks for struct kib_putreq_msg_t */
CLASSERT ((int)sizeof(kib_putreq_msg_t) == 80);
CLASSERT ((int)sizeof(((kib_putreq_msg_t *)0)->ibprm_cookie) == 8);
/* Checks for struct kib_putack_msg_t */
- CLASSERT ((int)sizeof(kib_putack_msg_t) == 24);
+ CLASSERT ((int)sizeof(kib_putack_msg_t) == 32);
CLASSERT ((int)offsetof(kib_putack_msg_t, ibpam_src_cookie) == 0);
CLASSERT ((int)sizeof(((kib_putack_msg_t *)0)->ibpam_src_cookie) == 8);
CLASSERT ((int)offsetof(kib_putack_msg_t, ibpam_dst_cookie) == 8);
CLASSERT ((int)sizeof(((kib_putack_msg_t *)0)->ibpam_dst_cookie) == 8);
CLASSERT ((int)offsetof(kib_putack_msg_t, ibpam_rd) == 16);
- CLASSERT ((int)sizeof(((kib_putack_msg_t *)0)->ibpam_rd) == 8);
+ CLASSERT ((int)sizeof(((kib_putack_msg_t *)0)->ibpam_rd) == 16);
/* Checks for struct kib_get_msg_t */
- CLASSERT ((int)sizeof(kib_get_msg_t) == 88);
+ CLASSERT ((int)sizeof(kib_get_msg_t) == 96);
CLASSERT ((int)offsetof(kib_get_msg_t, ibgm_hdr) == 0);
CLASSERT ((int)sizeof(((kib_get_msg_t *)0)->ibgm_hdr) == 72);
CLASSERT ((int)offsetof(kib_get_msg_t, ibgm_cookie) == 72);
CLASSERT ((int)sizeof(((kib_get_msg_t *)0)->ibgm_cookie) == 8);
CLASSERT ((int)offsetof(kib_get_msg_t, ibgm_rd) == 80);
- CLASSERT ((int)sizeof(((kib_get_msg_t *)0)->ibgm_rd) == 8);
+ CLASSERT ((int)sizeof(((kib_get_msg_t *)0)->ibgm_rd) == 16);
/* Checks for struct kib_completion_msg_t */
CLASSERT ((int)sizeof(kib_completion_msg_t) == 12);
CLASSERT ((int)sizeof(((kib_completion_msg_t *)0)->ibcm_status) == 4);
/* Checks for struct kib_msg_t */
- CLASSERT ((int)sizeof(kib_msg_t) == 144);
+ CLASSERT ((int)sizeof(kib_msg_t) == 152);
CLASSERT ((int)offsetof(kib_msg_t, ibm_magic) == 0);
CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_magic) == 4);
CLASSERT ((int)offsetof(kib_msg_t, ibm_version) == 4);
CLASSERT ((int)offsetof(kib_msg_t, ibm_u.putreq) == 56);
CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.putreq) == 80);
CLASSERT ((int)offsetof(kib_msg_t, ibm_u.putack) == 56);
- CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.putack) == 24);
+ CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.putack) == 32);
CLASSERT ((int)offsetof(kib_msg_t, ibm_u.get) == 56);
- CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.get) == 88);
+ CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.get) == 96);
CLASSERT ((int)offsetof(kib_msg_t, ibm_u.completion) == 56);
CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.completion) == 12);
}
__u32 msg_cksum;
int flip;
int msg_nob;
+#if !IBNAL_USE_FMR
int i;
int n;
-
+#endif
/* 6 bytes are enough to have received magic + version */
if (nob < 6) {
CERROR("Short message: %d\n", nob);
break;
case IBNAL_MSG_PUT_REQ:
- if (msg_nob < sizeof(msg->ibm_u.putreq)) {
+ if (msg_nob < hdr_size + sizeof(msg->ibm_u.putreq)) {
CERROR("Short PUT_REQ: %d(%d)\n", msg_nob,
(int)(hdr_size + sizeof(msg->ibm_u.putreq)));
return -EPROTO;
break;
case IBNAL_MSG_PUT_ACK:
- if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[0])) {
+#if IBNAL_USE_FMR
+ if (msg_nob < hdr_size + sizeof(msg->ibm_u.putack)) {
CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,
- (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[0]));
+ (int)(hdr_size + sizeof(msg->ibm_u.putack)));
return -EPROTO;
}
if (flip) {
+ __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_addr);
+ __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nob);
+ __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key);
+ }
+#else
+ if (flip) {
__swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key);
__swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nfrag);
}
return -EPROTO;
}
- if (flip)
+ if (flip) {
for (i = 0; i < n; i++) {
__swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_nob);
__swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr_lo);
__swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr_hi);
}
+ }
+#endif
break;
case IBNAL_MSG_GET_REQ:
(int)(hdr_size + sizeof(msg->ibm_u.get)));
return -EPROTO;
}
+#if IBNAL_USE_FMR
+ if (flip) {
+ __swab64s(&msg->ibm_u.get.ibgm_rd.rd_addr);
+ __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nob);
+ __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);
+ }
+#else
if (flip) {
__swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);
__swab32s(&msg->ibm_u.get.ibgm_rd.rd_nfrag);
__swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr_lo);
__swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr_hi);
}
+#endif
break;
case IBNAL_MSG_PUT_NAK:
{
kib_conn_t *conn;
int i;
- __u64 vaddr = 0;
- __u64 vaddr_base;
int page_offset;
int ipage;
vv_return_t vvrc;
if (rc != 0)
goto failed;
- vaddr_base = vaddr = conn->ibc_rx_pages->ibp_vaddr;
-
for (i = ipage = page_offset = 0; i < IBNAL_RX_MSGS; i++) {
- struct page *page = conn->ibc_rx_pages->ibp_pages[ipage];
- kib_rx_t *rx = &conn->ibc_rxs[i];
+ struct page *page = conn->ibc_rx_pages->ibp_pages[ipage];
+ kib_rx_t *rx = &conn->ibc_rxs[i];
+ vv_mem_reg_h_t mem_h;
+ vv_r_key_t r_key;
rx->rx_conn = conn;
rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) +
page_offset);
-#if IBNAL_WHOLE_MEM
- {
- vv_mem_reg_h_t mem_h;
- vv_r_key_t r_key;
-
- /* Voltaire stack already registers the whole
- * memory, so use that API. */
- vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca,
- rx->rx_msg,
- IBNAL_MSG_SIZE,
- &mem_h,
- &rx->rx_lkey,
- &r_key);
- LASSERT (vvrc == vv_return_ok);
- }
-#else
- rx->rx_vaddr = vaddr;
-#endif
- CDEBUG(D_NET, "Rx[%d] %p->%p[%x:"LPX64"]\n", i, rx,
- rx->rx_msg, KIBNAL_RX_LKEY(rx), KIBNAL_RX_VADDR(rx));
+ vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca,
+ rx->rx_msg,
+ IBNAL_MSG_SIZE,
+ &mem_h,
+ &rx->rx_lkey,
+ &r_key);
+ LASSERT (vvrc == vv_return_ok);
+
+ CDEBUG(D_NET, "Rx[%d] %p->%p[%x]\n", i, rx,
+ rx->rx_msg, rx->rx_lkey);
- vaddr += IBNAL_MSG_SIZE;
- LASSERT (vaddr <= vaddr_base + IBNAL_RX_MSG_BYTES);
-
page_offset += IBNAL_MSG_SIZE;
LASSERT (page_offset <= PAGE_SIZE);
kibnal_free_pages (kib_pages_t *p)
{
int npages = p->ibp_npages;
- vv_return_t vvrc;
int i;
- if (p->ibp_mapped) {
- vvrc = vv_mem_region_destroy(kibnal_data.kib_hca,
- p->ibp_handle);
- if (vvrc != vv_return_ok)
- CERROR ("Deregister error: %d\n", vvrc);
- }
-
for (i = 0; i < npages; i++)
if (p->ibp_pages[i] != NULL)
__free_page(p->ibp_pages[i]);
{
kib_pages_t *p;
int i;
-#if !IBNAL_WHOLE_MEM
- vv_phy_list_t vv_phys;
- vv_phy_buf_t *phys_pages;
- vv_return_t vvrc;
- vv_access_con_bit_mask_t access;
-#endif
PORTAL_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages]));
if (p == NULL) {
}
}
-#if !IBNAL_WHOLE_MEM
- PORTAL_ALLOC(phys_pages, npages * sizeof(*phys_pages));
- if (phys_pages == NULL) {
- CERROR ("Can't allocate physarray for %d pages\n", npages);
- kibnal_free_pages(p);
- return (-ENOMEM);
- }
-
- vv_phys.number_of_buff = npages;
- vv_phys.phy_list = phys_pages;
-
- for (i = 0; i < npages; i++) {
- phys_pages[i].size = PAGE_SIZE;
- phys_pages[i].start = kibnal_page2phys(p->ibp_pages[i]);
- }
-
- VV_ACCESS_CONTROL_MASK_SET_ALL(access);
-
- vvrc = vv_phy_mem_region_register(kibnal_data.kib_hca,
- &vv_phys,
- 0, /* requested vaddr */
- npages * PAGE_SIZE, 0, /* offset */
- kibnal_data.kib_pd,
- access,
- &p->ibp_handle,
- &p->ibp_vaddr,
- &p->ibp_lkey,
- &p->ibp_rkey);
-
- PORTAL_FREE(phys_pages, npages * sizeof(*phys_pages));
-
- if (vvrc != vv_return_ok) {
- CERROR ("Error %d mapping %d pages\n", vvrc, npages);
- kibnal_free_pages(p);
- return (-EFAULT);
- }
-
- CDEBUG(D_NET, "registered %d pages; handle: %x vaddr "LPX64" "
- "lkey %x rkey %x\n", npages, p->ibp_handle,
- p->ibp_vaddr, p->ibp_lkey, p->ibp_rkey);
-
- p->ibp_mapped = 1;
-#endif
*pp = p;
return (0);
}
for (i = 0; i < IBNAL_TX_MSGS; i++) {
kib_tx_t *tx = &kibnal_data.kib_tx_descs[i];
+#if IBNAL_USE_FMR
+ PORTAL_ALLOC(tx->tx_pages, PTL_MD_MAX_IOV *
+ sizeof(*tx->tx_pages));
+ if (tx->tx_pages == NULL)
+ return -ENOMEM;
+#else
PORTAL_ALLOC(tx->tx_wrq,
(1 + IBNAL_MAX_RDMA_FRAGS) *
sizeof(*tx->tx_wrq));
rd_frags[IBNAL_MAX_RDMA_FRAGS]));
if (tx->tx_rd == NULL)
return -ENOMEM;
+#endif
}
return 0;
for (i = 0; i < IBNAL_TX_MSGS; i++) {
kib_tx_t *tx = &kibnal_data.kib_tx_descs[i];
+#if IBNAL_USE_FMR
+ if (tx->tx_pages != NULL)
+ PORTAL_FREE(tx->tx_pages, PTL_MD_MAX_IOV *
+ sizeof(*tx->tx_pages));
+#else
if (tx->tx_wrq != NULL)
PORTAL_FREE(tx->tx_wrq,
(1 + IBNAL_MAX_RDMA_FRAGS) *
PORTAL_FREE(tx->tx_rd,
offsetof(kib_rdma_desc_t,
rd_frags[IBNAL_MAX_RDMA_FRAGS]));
+#endif
}
PORTAL_FREE(kibnal_data.kib_tx_descs,
IBNAL_TX_MSGS * sizeof(kib_tx_t));
}
+#if IBNAL_USE_FMR
+void
+kibnal_free_fmrs (int n)
+{
+ int i;
+ vv_return_t vvrc;
+ kib_tx_t *tx;
+
+ for (i = 0; i < n; i++) {
+ tx = &kibnal_data.kib_tx_descs[i];
+
+ vvrc = vv_free_fmr(kibnal_data.kib_hca,
+ tx->tx_md.md_fmrhandle);
+ if (vvrc != vv_return_ok)
+ CWARN("vv_free_fmr[%d]: %d\n", i, vvrc);
+ }
+}
+#endif
+
int
kibnal_setup_tx_descs (void)
{
- int ipage = 0;
- int page_offset = 0;
- __u64 vaddr;
- __u64 vaddr_base;
- struct page *page;
- kib_tx_t *tx;
- int i;
- int rc;
+ int ipage = 0;
+ int page_offset = 0;
+ struct page *page;
+ kib_tx_t *tx;
+ vv_mem_reg_h_t mem_h;
+ vv_r_key_t rkey;
+ vv_return_t vvrc;
+ int i;
+ int rc;
+#if IBNAL_USE_FMR
+ vv_fmr_t fmr_props;
+#endif
/* pre-mapped messages are not bigger than 1 page */
CLASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE);
if (rc != 0)
return (rc);
- /* ignored for the whole_mem case */
- vaddr = vaddr_base = kibnal_data.kib_tx_pages->ibp_vaddr;
-
for (i = 0; i < IBNAL_TX_MSGS; i++) {
page = kibnal_data.kib_tx_pages->ibp_pages[ipage];
tx = &kibnal_data.kib_tx_descs[i];
- tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) +
- page_offset);
-#if IBNAL_WHOLE_MEM
- {
- vv_mem_reg_h_t mem_h;
- vv_r_key_t rkey;
- vv_return_t vvrc;
-
- /* Voltaire stack already registers the whole
- * memory, so use that API. */
- vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca,
- tx->tx_msg,
- IBNAL_MSG_SIZE,
- &mem_h,
- &tx->tx_lkey,
- &rkey);
- LASSERT (vvrc == vv_return_ok);
+#if IBNAL_USE_FMR
+ memset(&fmr_props, 0, sizeof(fmr_props));
+ fmr_props.pd_hndl = kibnal_data.kib_pd;
+ fmr_props.acl = (vv_acc_r_mem_read |
+ vv_acc_r_mem_write |
+ vv_acc_l_mem_write);
+ fmr_props.max_pages = PTL_MD_MAX_IOV;
+ fmr_props.log2_page_sz = PAGE_SHIFT;
+ fmr_props.max_outstanding_maps = IBNAL_FMR_NMAPS;
+
+ vvrc = vv_alloc_fmr(kibnal_data.kib_hca,
+ &fmr_props,
+ &tx->tx_md.md_fmrhandle);
+ if (vvrc != vv_return_ok) {
+ CERROR("Can't allocate fmr %d: %d\n", i, vvrc);
+
+ kibnal_free_fmrs(i);
+ kibnal_free_pages (kibnal_data.kib_tx_pages);
+ return -ENOMEM;
}
-#else
- tx->tx_vaddr = vaddr;
+
+ tx->tx_md.md_fmrcount = IBNAL_FMR_NMAPS;
+ tx->tx_md.md_active = 0;
#endif
+ tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) +
+ page_offset);
+
+ vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca,
+ tx->tx_msg,
+ IBNAL_MSG_SIZE,
+ &mem_h,
+ &tx->tx_lkey,
+ &rkey);
+ LASSERT (vvrc == vv_return_ok);
+
tx->tx_isnblk = (i >= IBNAL_NTX);
- tx->tx_mapped = KIB_TX_UNMAPPED;
- CDEBUG(D_NET, "Tx[%d] %p->%p[%x:"LPX64"]\n", i, tx,
- tx->tx_msg, KIBNAL_TX_LKEY(tx), KIBNAL_TX_VADDR(tx));
+ CDEBUG(D_NET, "Tx[%d] %p->%p[%x]\n", i, tx,
+ tx->tx_msg, tx->tx_lkey);
if (tx->tx_isnblk)
list_add (&tx->tx_list,
list_add (&tx->tx_list,
&kibnal_data.kib_idle_txs);
- vaddr += IBNAL_MSG_SIZE;
- LASSERT (vaddr <= vaddr_base + IBNAL_TX_MSG_BYTES);
-
page_offset += IBNAL_MSG_SIZE;
LASSERT (page_offset <= PAGE_SIZE);
case IBNAL_INIT_TXD:
kibnal_free_pages (kibnal_data.kib_tx_pages);
+#if IBNAL_USE_FMR
+ kibnal_free_fmrs(IBNAL_TX_MSGS);
+#endif
/* fall through */
case IBNAL_INIT_PD:
-#if !IBNAL_WHOLE_MEM
+#if 0
+ /* Only deallocate a PD if we actually allocated one */
vvrc = vv_pd_deallocate(kibnal_data.kib_hca,
kibnal_data.kib_pd);
if (vvrc != vv_return_ok)
/*****************************************************/
-#if !IBNAL_WHOLE_MEM
- vvrc = vv_pd_allocate(kibnal_data.kib_hca, &kibnal_data.kib_pd);
-#else
+#if 1
+ /* We use a pre-allocated PD */
vvrc = vv_get_gen_pd_h(kibnal_data.kib_hca, &kibnal_data.kib_pd);
+#else
+ vvrc = vv_pd_allocate(kibnal_data.kib_hca, &kibnal_data.kib_pd);
#endif
- if (vvrc != 0) {
- CERROR ("Can't create PD: %d\n", vvrc);
+ if (vvrc != vv_return_ok) {
+ CERROR ("Can't init PD: %d\n", vvrc);
goto failed;
}
<= cm_REQ_priv_data_len);
CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t)
<= cm_REP_priv_data_len);
+ CLASSERT (sizeof(kib_msg_t) <= IBNAL_MSG_SIZE);
+#if !IBNAL_USE_FMR
CLASSERT (offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[IBNAL_MAX_RDMA_FRAGS])
<= IBNAL_MSG_SIZE);
CLASSERT (offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[IBNAL_MAX_RDMA_FRAGS])
<= IBNAL_MSG_SIZE);
-
+#endif
/* the following must be sizeof(int) for proc_dointvec() */
CLASSERT (sizeof (kibnal_tunables.kib_io_timeout) == sizeof (int));
#define IBNAL_CONCURRENT_PEERS 1000 /* # nodes all talking at once to me */
-#define IBNAL_RDMA_BASE 0x0eeb0000
#define IBNAL_CKSUM 0
-#define IBNAL_WHOLE_MEM 1
-#if !IBNAL_WHOLE_MEM
-# error "incompatible with voltaire adaptor-tavor (REGISTER_RAM_IN_ONE_PHY_MR)"
-#endif
/* default vals for runtime tunables */
#define IBNAL_IO_TIMEOUT 50 /* default comms timeout (seconds) */
#define IBNAL_TX_MSG_BYTES (IBNAL_TX_MSGS * IBNAL_MSG_SIZE)
#define IBNAL_TX_MSG_PAGES ((IBNAL_TX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE)
-#if IBNAL_WHOLE_MEM
-# define IBNAL_MAX_RDMA_FRAGS PTL_MD_MAX_IOV
-#else
+#define IBNAL_USE_FMR 1
+
+#if IBNAL_USE_FMR
# define IBNAL_MAX_RDMA_FRAGS 1
+# define IBNAL_FMR_NMAPS 1000
+#else
+# define IBNAL_MAX_RDMA_FRAGS PTL_MD_MAX_IOV
#endif
/* RX messages (per connection) */
typedef struct
{
int ibp_npages; /* # pages */
- int ibp_mapped; /* mapped? */
- __u64 ibp_vaddr; /* mapped region vaddr */
- __u32 ibp_lkey; /* mapped region lkey */
- __u32 ibp_rkey; /* mapped region rkey */
- vv_mem_reg_h_t ibp_handle; /* mapped region handle */
struct page *ibp_pages[0];
} kib_pages_t;
+#if IBNAL_USE_FMR
typedef struct
{
- vv_mem_reg_h_t md_handle;
- __u32 md_lkey;
- __u32 md_rkey;
- __u64 md_addr;
+ vv_fmr_h_t md_fmrhandle; /* FMR handle */
+ int md_fmrcount; /* # mappings left */
+ int md_active; /* mapping in use? */
+ __u32 md_lkey; /* local key */
+ __u32 md_rkey; /* remote key */
+ __u64 md_addr; /* IO VM address */
} kib_md_t;
+#endif
typedef struct
{
struct kib_conn *rx_conn; /* owning conn */
int rx_responded; /* responded to peer? */
int rx_posted; /* posted? */
-#if IBNAL_WHOLE_MEM
vv_l_key_t rx_lkey; /* local key */
-#else
- __u64 rx_vaddr; /* pre-mapped buffer (hca vaddr) */
-#endif
kib_msg_t *rx_msg; /* pre-mapped buffer (host vaddr) */
vv_wr_t rx_wrq; /* receive work item */
vv_scatgat_t rx_gl; /* and its memory */
} kib_rx_t;
-#if IBNAL_WHOLE_MEM
-# define KIBNAL_RX_VADDR(rx) ((__u64)((unsigned long)((rx)->rx_msg)))
-# define KIBNAL_RX_LKEY(rx) ((rx)->rx_lkey)
-#else
-# define KIBNAL_RX_VADDR(rx) ((rx)->rx_vaddr)
-# define KIBNAL_RX_LKEY(rx) ((rx)->rx_conn->ibc_rx_pages->ibp_lkey)
-#endif
-
typedef struct kib_tx /* transmit message */
{
struct list_head tx_list; /* queue on idle_txs ibc_tx_queue etc. */
int tx_isnblk; /* I'm reserved for non-blocking sends */
struct kib_conn *tx_conn; /* owning conn */
- int tx_mapped; /* mapped for RDMA? */
int tx_sending; /* # tx callbacks outstanding */
int tx_queued; /* queued for sending */
int tx_waiting; /* waiting for peer */
unsigned long tx_deadline; /* completion deadline */
__u64 tx_cookie; /* completion cookie */
lib_msg_t *tx_libmsg[2]; /* lib msgs to finalize on completion */
-#if IBNAL_WHOLE_MEM
vv_l_key_t tx_lkey; /* local key for message buffer */
-#else
- kib_md_t tx_md; /* RDMA mapping (active/passive) */
- __u64 tx_vaddr; /* pre-mapped buffer (hca vaddr) */
-#endif
kib_msg_t *tx_msg; /* message buffer (host vaddr) */
int tx_nwrq; /* # send work items */
+#if IBNAL_USE_FMR
+ vv_wr_t tx_wrq[2]; /* send work items... */
+ vv_scatgat_t tx_gl[2]; /* ...and their memory */
+ kib_rdma_desc_t tx_rd[1]; /* rdma descriptor */
+ kib_md_t tx_md; /* FMA mapping descriptor */
+ __u64 *tx_pages; /* page array for mapping */
+#else
vv_wr_t *tx_wrq; /* send work items... */
vv_scatgat_t *tx_gl; /* ...and their memory */
kib_rdma_desc_t *tx_rd; /* rdma descriptor (src buffers) */
-} kib_tx_t;
-
-#if IBNAL_WHOLE_MEM
-# define KIBNAL_TX_VADDR(tx) ((__u64)((unsigned long)((tx)->tx_msg)))
-# define KIBNAL_TX_LKEY(tx) ((tx)->tx_lkey)
-#else
-# define KIBNAL_TX_VADDR(tx) ((tx)->tx_vaddr)
-# define KIBNAL_TX_LKEY(tx) (kibnal_data.kib_tx_pages->ibp_lkey)
#endif
+} kib_tx_t;
#define KIB_TX_UNMAPPED 0
#define KIB_TX_MAPPED 1
mb();
}
+#if IBNAL_USE_FMR
+
+static inline int
+kibnal_rd_size (kib_rdma_desc_t *rd)
+{
+ return rd->rd_nob;
+}
+
+#else
static inline __u64
kibnal_rf_addr (kib_rdma_frag_t *rf)
{
return size;
}
+#endif
LASSERT (tx->tx_sending == 0); /* mustn't be awaiting sent callback */
LASSERT (!tx->tx_waiting); /* mustn't be awaiting peer response */
-#if !IBNAL_WHOLE_MEM
- switch (tx->tx_mapped) {
- default:
- LBUG();
-
- case KIB_TX_UNMAPPED:
- break;
-
- case KIB_TX_MAPPED: {
+#if IBNAL_USE_FMR
+ if (tx->tx_md.md_fmrcount == 0) {
vv_return_t vvrc;
- vvrc = vv_mem_region_destroy(kibnal_data.kib_hca,
- tx->tx_md.md_handle);
+ /* mapping must be active (it dropped fmrcount to 0) */
+ LASSERT (tx->tx_md.md_active);
+
+ vvrc = vv_unmap_fmr(kibnal_data.kib_hca,
+ 1, &tx->tx_md.md_fmrhandle);
LASSERT (vvrc == vv_return_ok);
- tx->tx_mapped = KIB_TX_UNMAPPED;
- break;
- }
+
+ tx->tx_md.md_fmrcount = IBNAL_FMR_NMAPS;
}
+ tx->tx_md.md_active = 0;
#endif
for (i = 0; i < 2; i++) {
/* tx may have up to 2 libmsgs to finalise */
spin_lock(&kibnal_data.kib_tx_lock);
if (tx->tx_isnblk) {
- list_add_tail (&tx->tx_list, &kibnal_data.kib_idle_nblk_txs);
+ list_add (&tx->tx_list, &kibnal_data.kib_idle_nblk_txs);
} else {
- list_add_tail (&tx->tx_list, &kibnal_data.kib_idle_txs);
+ list_add (&tx->tx_list, &kibnal_data.kib_idle_txs);
wake_up (&kibnal_data.kib_idle_tx_waitq);
}
* but we've got a lock right now and we're unlikely to
* wrap... */
tx->tx_cookie = kibnal_data.kib_next_tx_cookie++;
-#if IBNAL_WHOLE_MEM
- LASSERT (tx->tx_mapped == KIB_TX_UNMAPPED);
-#endif
+
LASSERT (tx->tx_nwrq == 0);
LASSERT (!tx->tx_queued);
LASSERT (tx->tx_sending == 0);
{
kib_conn_t *conn = rx->rx_conn;
int rc = 0;
+ __u64 addr = (__u64)((unsigned long)((rx)->rx_msg));
vv_return_t vvrc;
LASSERT (!in_interrupt());
rx->rx_gl = (vv_scatgat_t) {
- .v_address = KIBNAL_ADDR2SG(KIBNAL_RX_VADDR(rx)),
- .l_key = KIBNAL_RX_LKEY(rx),
+ .v_address = KIBNAL_ADDR2SG(addr),
+ .l_key = rx->rx_lkey,
.length = IBNAL_MSG_SIZE,
};
kibnal_conn_decref(conn);
}
-#if IBNAL_WHOLE_MEM
+struct page *
+kibnal_kvaddr_to_page (unsigned long vaddr)
+{
+ struct page *page;
+
+ if (vaddr >= VMALLOC_START &&
+ vaddr < VMALLOC_END) {
+ page = vmalloc_to_page ((void *)vaddr);
+ LASSERT (page != NULL);
+ return page;
+ }
+#if CONFIG_HIGHMEM
+ if (vaddr >= PKMAP_BASE &&
+ vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) {
+ /* No highmem pages only used for bulk (kiov) I/O */
+ CERROR("find page for address in highmem\n");
+ LBUG();
+ }
+#endif
+ page = virt_to_page (vaddr);
+ LASSERT (page != NULL);
+ return page;
+}
+
+#if !IBNAL_USE_FMR
int
kibnal_append_rdfrag(kib_rdma_desc_t *rd, int active, struct page *page,
unsigned long page_offset, unsigned long len)
return -EMSGSIZE;
}
- /* Try to create an address that adapter-tavor will munge into a valid
+ /* Try to create an address that adaptor-tavor will munge into a valid
* network address, given how it maps all phys mem into 1 region */
addr = kibnal_page2phys(page) + page_offset + PAGE_OFFSET;
return 0;
}
-struct page *
-kibnal_kvaddr_to_page (unsigned long vaddr)
-{
- struct page *page;
-
- if (vaddr >= VMALLOC_START &&
- vaddr < VMALLOC_END) {
- page = vmalloc_to_page ((void *)vaddr);
- LASSERT (page != NULL);
- return page;
- }
-#if CONFIG_HIGHMEM
- if (vaddr >= PKMAP_BASE &&
- vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) {
- /* No highmem pages only used for bulk (kiov) I/O */
- CERROR("find page for address in highmem\n");
- LBUG();
- }
-#endif
- page = virt_to_page (vaddr);
- LASSERT (page != NULL);
- return page;
-}
-
int
kibnal_setup_rd_iov(kib_tx_t *tx, kib_rdma_desc_t *rd,
vv_access_con_bit_mask_t access,
}
#else
int
+kibnal_map_tx (kib_tx_t *tx, kib_rdma_desc_t *rd, int active,
+ int npages, unsigned long page_offset, int nob)
+{
+ vv_return_t vvrc;
+ vv_fmr_map_t map_props;
+
+ LASSERT ((rd != tx->tx_rd) == !active);
+ LASSERT (!tx->tx_md.md_active);
+ LASSERT (tx->tx_md.md_fmrcount > 0);
+ LASSERT (page_offset < PAGE_SIZE);
+ LASSERT (npages >= (1 + ((page_offset + nob - 1)>>PAGE_SHIFT)));
+ LASSERT (npages <= PTL_MD_MAX_IOV);
+
+ memset(&map_props, 0, sizeof(map_props));
+
+ map_props.start = (void *)page_offset;
+ map_props.size = nob;
+ map_props.page_array_len = npages;
+ map_props.page_array = tx->tx_pages;
+
+ vvrc = vv_map_fmr(kibnal_data.kib_hca, tx->tx_md.md_fmrhandle,
+ &map_props, &tx->tx_md.md_lkey, &tx->tx_md.md_rkey);
+ if (vvrc != vv_return_ok) {
+ CERROR ("Can't map vaddr %p for %d in %d pages: %d\n",
+ map_props.start, nob, npages, vvrc);
+ return -EFAULT;
+ }
+
+ tx->tx_md.md_addr = (unsigned long)map_props.start;
+ tx->tx_md.md_active = 1;
+ tx->tx_md.md_fmrcount--;
+
+ rd->rd_key = active ? tx->tx_md.md_lkey : tx->tx_md.md_rkey;
+ rd->rd_nob = nob;
+ rd->rd_addr = tx->tx_md.md_addr;
+
+ /* Compensate for adaptor-tavor's munging of gatherlist addresses */
+ if (active)
+ rd->rd_addr += PAGE_OFFSET;
+
+ return 0;
+}
+
+int
kibnal_setup_rd_iov (kib_tx_t *tx, kib_rdma_desc_t *rd,
vv_access_con_bit_mask_t access,
int niov, struct iovec *iov, int offset, int nob)
{
/* active if I'm sending */
- int active = ((access & vv_acc_r_mem_write) == 0);
- void *vaddr;
- vv_return_t vvrc;
-
+ int active = ((access & vv_acc_r_mem_write) == 0);
+ int resid;
+ int fragnob;
+ struct page *page;
+ int npages;
+ unsigned long page_offset;
+ unsigned long vaddr;
+
LASSERT (nob > 0);
LASSERT (niov > 0);
- LASSERT (tx->tx_mapped == KIB_TX_UNMAPPED);
- LASSERT ((rd != tx->tx_rd) == !active);
while (offset >= iov->iov_len) {
offset -= iov->iov_len;
return (-EMSGSIZE);
}
- vaddr = (void *)(((unsigned long)iov->iov_base) + offset);
- tx->tx_md.md_addr = (__u64)((unsigned long)vaddr);
+ vaddr = ((unsigned long)iov->iov_base) + offset;
+
+ page_offset = vaddr & (PAGE_SIZE - 1);
+ resid = nob;
+ npages = 0;
- vvrc = vv_mem_region_register(kibnal_data.kib_hca, vaddr, nob,
- kibnal_data.kib_pd, access,
- &tx->tx_md.md_handle,
- &tx->tx_md.md_lkey,
- &tx->tx_md.md_rkey);
- if (vvrc != vv_return_ok) {
- CERROR ("Can't map vaddr %p: %d\n", vaddr, vvrc);
- return -EFAULT;
- }
+ do {
+ LASSERT (npages < PTL_MD_MAX_IOV);
- tx->tx_mapped = KIB_TX_MAPPED;
+ page = kibnal_kvaddr_to_page(vaddr);
+ if (page == NULL) {
+ CERROR("Can't find page for %lu\n", vaddr);
+ return -EFAULT;
+ }
- rd->rd_key = active ? tx->tx_md.md_lkey : tx->tx_md.md_rkey;
- rd->rd_nfrag = 1;
- kibnal_rf_set(&rd->rd_frags[0], tx->tx_md.md_addr, nob);
-
- return (0);
+ tx->tx_pages[npages++] = kibnal_page2phys(page);
+
+ fragnob = PAGE_SIZE - (vaddr & (PAGE_SIZE - 1));
+ vaddr += fragnob;
+ resid -= fragnob;
+
+ } while (resid > 0);
+
+ return kibnal_map_tx(tx, rd, active, npages, page_offset, nob);
}
int
{
/* active if I'm sending */
int active = ((access & vv_acc_r_mem_write) == 0);
- vv_return_t vvrc;
- vv_phy_list_t phys_pages;
- vv_phy_buf_t *phys;
- int page_offset;
- int nphys;
int resid;
- int phys_size;
- int rc;
-
+ int npages;
+ unsigned long page_offset;
+
CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
LASSERT (nob > 0);
LASSERT (nkiov > 0);
- LASSERT (tx->tx_mapped == KIB_TX_UNMAPPED);
+ LASSERT (nkiov <= PTL_MD_MAX_IOV);
+ LASSERT (!tx->tx_md.md_active);
LASSERT ((rd != tx->tx_rd) == !active);
while (offset >= kiov->kiov_len) {
LASSERT (nkiov > 0);
}
- phys_size = nkiov * sizeof (*phys);
- PORTAL_ALLOC(phys, phys_size);
- if (phys == NULL) {
- CERROR ("Can't allocate tmp phys\n");
- return (-ENOMEM);
- }
-
page_offset = kiov->kiov_offset + offset;
+
+ resid = offset + nob;
+ npages = 0;
- phys[0].start = kibnal_page2phys(kiov->kiov_page);
- phys[0].size = PAGE_SIZE;
-
- nphys = 1;
- resid = nob - (kiov->kiov_len - offset);
-
- while (resid > 0) {
- kiov++;
- nkiov--;
+ do {
+ LASSERT (npages < PTL_MD_MAX_IOV);
LASSERT (nkiov > 0);
- if (kiov->kiov_offset != 0 ||
- ((resid > PAGE_SIZE) &&
- kiov->kiov_len < PAGE_SIZE)) {
- int i;
+ if ((npages > 0 && kiov->kiov_offset != 0) ||
+ (resid > kiov->kiov_len &&
+ (kiov->kiov_offset + kiov->kiov_len) != PAGE_SIZE)) {
/* Can't have gaps */
CERROR ("Can't make payload contiguous in I/O VM:"
- "page %d, offset %d, len %d \n", nphys,
- kiov->kiov_offset, kiov->kiov_len);
-
- for (i = -nphys; i < nkiov; i++)
- CERROR("kiov[%d] %p +%d for %d\n",
- i, kiov[i].kiov_page,
- kiov[i].kiov_offset,
- kiov[i].kiov_len);
+ "page %d, offset %d, len %d \n",
+ npages, kiov->kiov_offset, kiov->kiov_len);
- rc = -EINVAL;
- goto out;
+ return -EINVAL;
}
- LASSERT (nphys * sizeof (*phys) < phys_size);
- phys[nphys].start = kibnal_page2phys(kiov->kiov_page);
- phys[nphys].size = PAGE_SIZE;
-
- nphys++;
- resid -= PAGE_SIZE;
- }
-
-#if 0
- CWARN ("nphys %d, nob %d, page_offset %d\n", nphys, nob, page_offset);
- for (i = 0; i < nphys; i++)
- CWARN (" [%d] "LPX64"\n", i, phys[i]);
-#endif
-
- vvrc = vv_phy_mem_region_register(kibnal_data.kib_hca,
- &phys_pages,
- IBNAL_RDMA_BASE,
- nphys,
- page_offset,
- kibnal_data.kib_pd,
- access,
- &tx->tx_md.md_handle,
- &tx->tx_md.md_addr,
- &tx->tx_md.md_lkey,
- &tx->tx_md.md_rkey);
-
- if (vvrc != vv_return_ok) {
- CERROR ("Can't map phys: %d\n", vvrc);
- rc = -EFAULT;
- goto out;
- }
-
- CDEBUG(D_NET, "Mapped %d pages %d bytes @ offset %d: "
- "lkey %x, rkey %x, addr "LPX64"\n",
- nphys, nob, page_offset, tx->tx_md.md_lkey, tx->tx_md.md_rkey,
- tx->tx_md.md_addr);
-
- tx->tx_mapped = KIB_TX_MAPPED;
- rc = 0;
+ tx->tx_pages[npages++] = kibnal_page2phys(kiov->kiov_page);
+ resid -= kiov->kiov_len;
+ kiov++;
+ nkiov--;
+ } while (resid > 0);
- rd->rd_key = active ? tx->tx_md.md_lkey : tx->tx_md.md_rkey;
- rd->rd_nfrag = 1;
- kibnal_rf_set(&rd->rd_frags[0], tx->tx_md.md_addr, nob);
-
- out:
- PORTAL_FREE(phys, phys_size);
- return (rc);
+ return kibnal_map_tx(tx, rd, active, npages, page_offset, nob);
}
#endif
* QP!! */
LASSERT (tx->tx_nwrq > 0);
-
+#if 0
+ if (tx->tx_wrq[0].wr_type == vv_wr_rdma_write)
+ CDEBUG(D_WARNING, "WORK[0]: RDMA gl %p for %d k %x -> "LPX64" k %x\n",
+ tx->tx_wrq[0].scatgat_list->v_address,
+ tx->tx_wrq[0].scatgat_list->length,
+ tx->tx_wrq[0].scatgat_list->l_key,
+ tx->tx_wrq[0].type.send.send_qp_type.rc_type.r_addr,
+ tx->tx_wrq[0].type.send.send_qp_type.rc_type.r_r_key);
+ else
+ CDEBUG(D_WARNING, "WORK[0]: %s gl %p for %d k %x\n",
+ tx->tx_wrq[0].wr_type == vv_wr_send ? "SEND" : "????",
+ tx->tx_wrq[0].scatgat_list->v_address,
+ tx->tx_wrq[0].scatgat_list->length,
+ tx->tx_wrq[0].scatgat_list->l_key);
+
+ if (tx->tx_nwrq > 1) {
+ if (tx->tx_wrq[1].wr_type == vv_wr_rdma_write)
+ CDEBUG(D_WARNING, "WORK[1]: RDMA gl %p for %d k %x -> "LPX64" k %x\n",
+ tx->tx_wrq[1].scatgat_list->v_address,
+ tx->tx_wrq[1].scatgat_list->length,
+ tx->tx_wrq[1].scatgat_list->l_key,
+ tx->tx_wrq[1].type.send.send_qp_type.rc_type.r_addr,
+ tx->tx_wrq[1].type.send.send_qp_type.rc_type.r_r_key);
+ else
+ CDEBUG(D_WARNING, "WORK[1]: %s gl %p for %d k %x\n",
+ tx->tx_wrq[1].wr_type == vv_wr_send ? "SEND" : "????",
+ tx->tx_wrq[1].scatgat_list->v_address,
+ tx->tx_wrq[1].scatgat_list->length,
+ tx->tx_wrq[1].scatgat_list->l_key);
+ }
+#endif
rc = -ECONNABORTED;
vvrc = vv_return_ok;
if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) {
vv_scatgat_t *gl = &tx->tx_gl[tx->tx_nwrq];
vv_wr_t *wrq = &tx->tx_wrq[tx->tx_nwrq];
int nob = offsetof (kib_msg_t, ibm_u) + body_nob;
+ __u64 addr = (__u64)((unsigned long)((tx)->tx_msg));
LASSERT (tx->tx_nwrq >= 0 &&
tx->tx_nwrq < (1 + IBNAL_MAX_RDMA_FRAGS));
kibnal_init_msg(tx->tx_msg, type, body_nob);
*gl = (vv_scatgat_t) {
- .v_address = KIBNAL_ADDR2SG(KIBNAL_TX_VADDR(tx)),
- .l_key = KIBNAL_TX_LKEY(tx),
+ .v_address = KIBNAL_ADDR2SG(addr),
+ .l_key = tx->tx_lkey,
.length = nob,
};
kibnal_init_rdma (kib_tx_t *tx, int type, int nob,
kib_rdma_desc_t *dstrd, __u64 dstcookie)
{
- /* CAVEAT EMPTOR: this 'consumes' the frags in 'dstrd' */
- int resid = nob;
kib_msg_t *ibmsg = tx->tx_msg;
kib_rdma_desc_t *srcrd = tx->tx_rd;
+ vv_scatgat_t *gl;
+ vv_wr_t *wrq;
+ int rc;
+
+#if IBNAL_USE_FMR
+ LASSERT (tx->tx_nwrq == 0);
+
+ gl = &tx->tx_gl[0];
+ gl->length = nob;
+ gl->v_address = KIBNAL_ADDR2SG(srcrd->rd_addr);
+ gl->l_key = srcrd->rd_key;
+
+ wrq = &tx->tx_wrq[0];
+
+ wrq->wr_id = kibnal_ptr2wreqid(tx, IBNAL_WID_RDMA);
+ wrq->completion_notification = 0;
+ wrq->scatgat_list = gl;
+ wrq->num_of_data_segments = 1;
+ wrq->wr_type = vv_wr_rdma_write;
+ wrq->type.send.solicited_event = 0;
+ wrq->type.send.send_qp_type.rc_type.fance_indicator = 0;
+ wrq->type.send.send_qp_type.rc_type.r_addr = dstrd->rd_addr;
+ wrq->type.send.send_qp_type.rc_type.r_r_key = dstrd->rd_key;
+
+ tx->tx_nwrq = 1;
+ rc = nob;
+#else
+ /* CAVEAT EMPTOR: this 'consumes' the frags in 'dstrd' */
+ int resid = nob;
kib_rdma_frag_t *srcfrag;
int srcidx;
kib_rdma_frag_t *dstfrag;
int dstidx;
- vv_scatgat_t *gl;
- vv_wr_t *wrq;
int wrknob;
- int rc;
/* Called by scheduler */
LASSERT (!in_interrupt());
if (rc < 0) /* no RDMA if completing with failure */
tx->tx_nwrq = 0;
+#endif
ibmsg->ibm_u.completion.ibcm_status = rc;
ibmsg->ibm_u.completion.ibcm_cookie = dstcookie;
kib_tx_t *tx;
int nob;
int rc;
- int n;
/* NB 'private' is different depending on what we're sending.... */
return PTL_FAIL;
}
- n = ibmsg->ibm_u.get.ibgm_rd.rd_nfrag;
- nob = offsetof(kib_get_msg_t, ibgm_rd.rd_frags[n]);
+#if IBNAL_USE_FMR
+ nob = sizeof(kib_get_msg_t);
+#else
+ {
+ int n = ibmsg->ibm_u.get.ibgm_rd.rd_nfrag;
+
+ nob = offsetof(kib_get_msg_t, ibgm_rd.rd_frags[n]);
+ }
+#endif
kibnal_init_tx_msg(tx, IBNAL_MSG_GET_REQ, nob);
tx->tx_libmsg[1] = lib_create_reply_msg(&kibnal_lib, nid, libmsg);
kib_msg_t *txmsg;
int nob;
int rc;
- int n;
LASSERT (mlen <= rlen);
LASSERT (mlen >= 0);
txmsg->ibm_u.putack.ibpam_src_cookie = rxmsg->ibm_u.putreq.ibprm_cookie;
txmsg->ibm_u.putack.ibpam_dst_cookie = tx->tx_cookie;
+#if IBNAL_USE_FMR
+ nob = sizeof(kib_putack_msg_t);
+#else
+ {
+ int n = tx->tx_msg->ibm_u.putack.ibpam_rd.rd_nfrag;
- n = tx->tx_msg->ibm_u.putack.ibpam_rd.rd_nfrag;
- nob = offsetof(kib_putack_msg_t, ibpam_rd.rd_frags[n]);
+ nob = offsetof(kib_putack_msg_t, ibpam_rd.rd_frags[n]);
+ }
+#endif
kibnal_init_tx_msg(tx, IBNAL_MSG_PUT_ACK, nob);
tx->tx_libmsg[0] = libmsg; /* finalise libmsg on completion */
* already dealing with it (either to set it up or tear it down).
* Caller holds kib_global_lock exclusively in irq context */
kib_peer_t *peer = conn->ibc_peer;
- struct list_head *tmp;
LASSERT (error != 0 || conn->ibc_state >= IBNAL_CONN_ESTABLISHED);
/* CAVEAT EMPTOR: tasklet context */
kib_conn_t *conn = (kib_conn_t *)arg;
kib_connvars_t *cv = conn->ibc_connvars;
- unsigned long flags;
LASSERT (conn->ibc_state == IBNAL_CONN_ACTIVE_CONNECT);
cv->cv_conndata = *cd;
/* CAVEAT EMPTOR: tasklet context */
kib_conn_t *conn = (kib_conn_t *)arg;
kib_peer_t *peer = conn->ibc_peer;
- unsigned long flags;
if (arprc != ibat_stat_ok)
CERROR("Arp "LPX64"@%u.%u.%u.%u failed: %d\n",
char ibim_payload[0]; /* piggy-backed payload */
} WIRE_ATTR kib_immediate_msg_t;
+#ifndef IBNAL_USE_FMR
+# error "IBNAL_USE_FMR must be defined 1 or 0 before including this file"
+#endif
+
+#if IBNAL_USE_FMR
+typedef struct
+{
+ __u64 rd_addr; /* IO VMA address */
+ __u32 rd_nob; /* # of bytes */
+ __u32 rd_key; /* remote key */
+} WIRE_ATTR kib_rdma_desc_t;
+#else
/* YEUCH! the __u64 address is split into 2 __u32 fields to ensure proper
* packing. Otherwise we can't fit enough frags into an IBNAL message (<=
* smallest page size on any arch). */
__u32 rd_nfrag; /* # fragments */
kib_rdma_frag_t rd_frags[0]; /* buffer frags */
} WIRE_ATTR kib_rdma_desc_t;
-
-/* CAVEAT EMPTOR! We don't actually put ibprm_rd on the wire; it's just there
- * to remember the source buffers while we wait for the PUT_ACK */
+#endif
typedef struct
{
} WIRE_ATTR kib_msg_t;
#define IBNAL_MSG_MAGIC 0x0be91b91 /* unique magic */
-#define IBNAL_MSG_VERSION 6 /* current protocol version */
+
+#if IBNAL_USE_FMA /* ensure version changes on FMA */
+#define IBNAL_MSG_VERSION 0x11
+#else
+#define IBNAL_MSG_VERSION 0x10
+#endif
#define IBNAL_MSG_CONNREQ 0xc0 /* connection request */
#define IBNAL_MSG_CONNACK 0xc1 /* connection acknowledge */
#include <portals/api-support.h>
#include <portals/lib-types.h>
+#define IBNAL_USE_FMR 1
#include "vibnal_wire.h"
#ifndef HAVE_STRNLEN
CHECK_MEMBER (kib_immediate_msg_t, ibim_hdr);
CHECK_MEMBER (kib_immediate_msg_t, ibim_payload[13]);
+ CHECK_DEFINE (IBNAL_USE_FMR);
+#if IBNAL_USE_FMR
+ CHECK_STRUCT (kib_rdma_desc_t);
+ CHECK_MEMBER (kib_rdma_desc_t, rd_addr);
+ CHECK_MEMBER (kib_rdma_desc_t, rd_nob);
+ CHECK_MEMBER (kib_rdma_desc_t, rd_key);
+#else
CHECK_STRUCT (kib_rdma_frag_t);
CHECK_MEMBER (kib_rdma_frag_t, rf_nob);
CHECK_MEMBER (kib_rdma_frag_t, rf_addr_lo);
CHECK_MEMBER (kib_rdma_desc_t, rd_key);
CHECK_MEMBER (kib_rdma_desc_t, rd_nfrag);
CHECK_MEMBER (kib_rdma_desc_t, rd_frags[13]);
-
+#endif
CHECK_STRUCT (kib_putreq_msg_t);
CHECK_MEMBER (kib_putreq_msg_t, ibprm_hdr);
CHECK_MEMBER (kib_putreq_msg_t, ibprm_cookie);