From f2600449ced3a2f78c4b3e650ff29bb88d1e7e45 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin Date: Tue, 26 Jan 2016 16:49:17 -0500 Subject: [PATCH] LU-5783 o2iblnd: Add Fast Reg memory registration support FMR is deprecated and it not supported by the mlx5 driver. This patch add memory management extensions support as backup of FMR. Change-Id: I58f01aac3cbef0edc0934d75bcf13888f84beb0d Signed-off-by: Dmitry Eremin Reviewed-on: http://review.whamcloud.com/17606 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: James Simmons Tested-by: James Simmons Reviewed-by: Doug Oucharek Reviewed-by: Oleg Drokin --- lnet/autoconf/lustre-lnet.m4 | 28 ++++ lnet/klnds/o2iblnd/o2iblnd.c | 331 +++++++++++++++++++++++++++++++++------- lnet/klnds/o2iblnd/o2iblnd.h | 54 +++++-- lnet/klnds/o2iblnd/o2iblnd_cb.c | 58 ++++--- 4 files changed, 381 insertions(+), 90 deletions(-) diff --git a/lnet/autoconf/lustre-lnet.m4 b/lnet/autoconf/lustre-lnet.m4 index 842873d1..f049677 100644 --- a/lnet/autoconf/lustre-lnet.m4 +++ b/lnet/autoconf/lustre-lnet.m4 @@ -528,6 +528,33 @@ EXTRA_KCFLAGS="$tmp_flags" ]) # LN_CONFIG_SK_DATA_READY # +# LN_CONFIG_IB_INC_RKEY +# +AC_DEFUN([LN_CONFIG_IB_INC_RKEY], [ +tmp_flags="$EXTRA_KCFLAGS" +EXTRA_KCFLAGS="-Werror" +LB_CHECK_COMPILE([if function 'ib_inc_rkey' is defined], +ib_inc_rkey, [ + #ifdef HAVE_COMPAT_RDMA + #undef PACKAGE_NAME + #undef PACKAGE_TARNAME + #undef PACKAGE_VERSION + #undef PACKAGE_STRING + #undef PACKAGE_BUGREPORT + #undef PACKAGE_URL + #include + #endif + #include +],[ + (void)ib_inc_rkey(0); +],[ + AC_DEFINE(HAVE_IB_INC_RKEY, 1, + [function ib_inc_rkey exist]) +]) +EXTRA_KCFLAGS="$tmp_flags" +]) # LN_CONFIG_IB_INC_RKEY + +# # LN_PROG_LINUX # # LNet linux kernel checks @@ -540,6 +567,7 @@ LN_CONFIG_AFFINITY LN_CONFIG_BACKOFF LN_CONFIG_O2IB LN_CONFIG_GNILND +LN_CONFIG_IB_INC_RKEY # 2.6.35 LN_CONFIG_SK_SLEEP # 2.6.36 diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index d9761ee..957dc0e 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -1390,28 +1390,44 @@ kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev, kib_rdma_desc_t *rd, } static void -kiblnd_destroy_fmr_pool(kib_fmr_pool_t *pool) +kiblnd_destroy_fmr_pool(kib_fmr_pool_t *fpo) { - LASSERT (pool->fpo_map_count == 0); + LASSERT(fpo->fpo_map_count == 0); - if (pool->fpo_fmr_pool != NULL) - ib_destroy_fmr_pool(pool->fpo_fmr_pool); + if (fpo->fpo_is_fmr) { + if (fpo->fmr.fpo_fmr_pool) + ib_destroy_fmr_pool(fpo->fmr.fpo_fmr_pool); + } else { + struct kib_fast_reg_descriptor *frd, *tmp; + int i = 0; + + list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list, + frd_list) { + list_del(&frd->frd_list); + ib_free_fast_reg_page_list(frd->frd_frpl); + ib_dereg_mr(frd->frd_mr); + LIBCFS_FREE(frd, sizeof(*frd)); + i++; + } + if (i < fpo->fast_reg.fpo_pool_size) + CERROR("FastReg pool still has %d regions registered\n", + fpo->fast_reg.fpo_pool_size - i); + } - if (pool->fpo_hdev != NULL) - kiblnd_hdev_decref(pool->fpo_hdev); + if (fpo->fpo_hdev) + kiblnd_hdev_decref(fpo->fpo_hdev); - LIBCFS_FREE(pool, sizeof(kib_fmr_pool_t)); + LIBCFS_FREE(fpo, sizeof(*fpo)); } static void kiblnd_destroy_fmr_pool_list(struct list_head *head) { - kib_fmr_pool_t *pool; + kib_fmr_pool_t *fpo, *tmp; - while (!list_empty(head)) { - pool = list_entry(head->next, kib_fmr_pool_t, fpo_list); - list_del(&pool->fpo_list); - kiblnd_destroy_fmr_pool(pool); + list_for_each_entry_safe(fpo, tmp, head, fpo_list) { + list_del(&fpo->fpo_list); + kiblnd_destroy_fmr_pool(fpo); } } @@ -1429,45 +1445,159 @@ static int kiblnd_fmr_flush_trigger(int ncpts) return max(IBLND_FMR_POOL_FLUSH, size); } -static int -kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t **pp_fpo) +static int kiblnd_alloc_fmr_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo) { - /* FMR pool for RDMA */ - kib_dev_t *dev = fps->fps_net->ibn_dev; - kib_fmr_pool_t *fpo; - struct ib_fmr_pool_param param = { - .max_pages_per_fmr = LNET_MAX_PAYLOAD/PAGE_SIZE, - .page_shift = PAGE_SHIFT, - .access = (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE), + struct ib_fmr_pool_param param = { + .max_pages_per_fmr = LNET_MAX_PAYLOAD/PAGE_SIZE, + .page_shift = PAGE_SHIFT, + .access = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE), .pool_size = fps->fps_pool_size, .dirty_watermark = fps->fps_flush_trigger, .flush_function = NULL, .flush_arg = NULL, .cache = !!*kiblnd_tunables.kib_fmr_cache}; + int rc = 0; + + fpo->fmr.fpo_fmr_pool = ib_create_fmr_pool(fpo->fpo_hdev->ibh_pd, + ¶m); + if (IS_ERR(fpo->fmr.fpo_fmr_pool)) { + rc = PTR_ERR(fpo->fmr.fpo_fmr_pool); + if (rc != -ENOSYS) + CERROR("Failed to create FMR pool: %d\n", rc); + else + CERROR("FMRs are not supported\n"); + } + + return rc; +} + +static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo) +{ + struct kib_fast_reg_descriptor *frd, *tmp; + int i, rc; + + INIT_LIST_HEAD(&fpo->fast_reg.fpo_pool_list); + fpo->fast_reg.fpo_pool_size = 0; + for (i = 0; i < fps->fps_pool_size; i++) { + LIBCFS_CPT_ALLOC(frd, lnet_cpt_table(), fps->fps_cpt, + sizeof(*frd)); + if (!frd) { + CERROR("Failed to allocate a new fast_reg descriptor\n"); + rc = -ENOMEM; + goto out; + } + frd->frd_mr = NULL; + + frd->frd_frpl = ib_alloc_fast_reg_page_list(fpo->fpo_hdev->ibh_ibdev, + LNET_MAX_PAYLOAD/PAGE_SIZE); + if (IS_ERR(frd->frd_frpl)) { + rc = PTR_ERR(frd->frd_frpl); + CERROR("Failed to allocate ib_fast_reg_page_list: %d\n", + rc); + goto out_middle; + } + + frd->frd_mr = ib_alloc_fast_reg_mr(fpo->fpo_hdev->ibh_pd, + LNET_MAX_PAYLOAD/PAGE_SIZE); + if (IS_ERR(frd->frd_mr)) { + rc = PTR_ERR(frd->frd_mr); + CERROR("Failed to allocate ib_fast_reg_mr: %d\n", rc); + goto out_middle; + } + + frd->frd_valid = true; + + list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list); + fpo->fast_reg.fpo_pool_size++; + } + + return 0; + +out_middle: + if (frd->frd_mr) + ib_dereg_mr(frd->frd_mr); + if (frd->frd_frpl) + ib_free_fast_reg_page_list(frd->frd_frpl); + LIBCFS_FREE(frd, sizeof(*frd)); + +out: + list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list, + frd_list) { + list_del(&frd->frd_list); + ib_free_fast_reg_page_list(frd->frd_frpl); + ib_dereg_mr(frd->frd_mr); + LIBCFS_FREE(frd, sizeof(*frd)); + } + + return rc; +} + +static int +kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t **pp_fpo) +{ + struct ib_device_attr *dev_attr; + kib_dev_t *dev = fps->fps_net->ibn_dev; + kib_fmr_pool_t *fpo; int rc; - LIBCFS_CPT_ALLOC(fpo, lnet_cpt_table(), fps->fps_cpt, sizeof(*fpo)); - if (fpo == NULL) + dev_attr = kmalloc(sizeof(*dev_attr), GFP_KERNEL); + if (!dev_attr) return -ENOMEM; + LIBCFS_CPT_ALLOC(fpo, lnet_cpt_table(), fps->fps_cpt, sizeof(*fpo)); + if (!fpo) { + rc = -ENOMEM; + goto out_dev_attr; + } + fpo->fpo_hdev = kiblnd_current_hdev(dev); - fpo->fpo_fmr_pool = ib_create_fmr_pool(fpo->fpo_hdev->ibh_pd, ¶m); - if (IS_ERR(fpo->fpo_fmr_pool)) { - rc = PTR_ERR(fpo->fpo_fmr_pool); - CERROR("Failed to create FMR pool: %d\n", rc); + rc = ib_query_device(fpo->fpo_hdev->ibh_ibdev, dev_attr); + if (rc) { + CERROR("Query device failed for %s: %d\n", + fpo->fpo_hdev->ibh_ibdev->name, rc); + goto out_dev_attr; + } - kiblnd_hdev_decref(fpo->fpo_hdev); - LIBCFS_FREE(fpo, sizeof(kib_fmr_pool_t)); - return rc; - } + /* Check for FMR or FastReg support */ + fpo->fpo_is_fmr = 0; + if (fpo->fpo_hdev->ibh_ibdev->alloc_fmr && + fpo->fpo_hdev->ibh_ibdev->dealloc_fmr && + fpo->fpo_hdev->ibh_ibdev->map_phys_fmr && + fpo->fpo_hdev->ibh_ibdev->unmap_fmr) { + LCONSOLE_INFO("Using FMR for registration\n"); + fpo->fpo_is_fmr = 1; + } else if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { + LCONSOLE_INFO("Using FastReg for registration\n"); + } else { + rc = -ENOSYS; + LCONSOLE_ERROR_MSG(rc, "IB device does not support FMRs nor FastRegs, can't register memory\n"); + goto out_dev_attr; + } - fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE); - fpo->fpo_owner = fps; - *pp_fpo = fpo; + if (fpo->fpo_is_fmr) + rc = kiblnd_alloc_fmr_pool(fps, fpo); + else + rc = kiblnd_alloc_freg_pool(fps, fpo); + if (rc) + goto out_fpo; - return 0; + kfree(dev_attr); + fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE); + fpo->fpo_owner = fps; + *pp_fpo = fpo; + + return 0; + +out_fpo: + kiblnd_hdev_decref(fpo->fpo_hdev); + LIBCFS_FREE(fpo, sizeof(*fpo)); + +out_dev_attr: + kfree(dev_attr); + + return rc; } static void @@ -1540,21 +1670,38 @@ kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status) { struct list_head zombies = LIST_HEAD_INIT(zombies); kib_fmr_pool_t *fpo = fmr->fmr_pool; - kib_fmr_poolset_t *fps = fpo->fpo_owner; + kib_fmr_poolset_t *fps; cfs_time_t now = cfs_time_current(); kib_fmr_pool_t *tmp; int rc; - rc = ib_fmr_pool_unmap(fmr->fmr_pfmr); - LASSERT(rc == 0); + if (!fpo) + return; - if (status != 0) { - rc = ib_flush_fmr_pool(fpo->fpo_fmr_pool); - LASSERT(rc == 0); - } + fps = fpo->fpo_owner; + if (fpo->fpo_is_fmr) { + if (fmr->fmr_pfmr) { + rc = ib_fmr_pool_unmap(fmr->fmr_pfmr); + LASSERT(!rc); + fmr->fmr_pfmr = NULL; + } + if (status) { + rc = ib_flush_fmr_pool(fpo->fmr.fpo_fmr_pool); + LASSERT(!rc); + } + } else { + struct kib_fast_reg_descriptor *frd = fmr->fmr_frd; + + if (frd) { + frd->frd_valid = false; + spin_lock(&fps->fps_lock); + list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list); + spin_unlock(&fps->fps_lock); + fmr->fmr_frd = NULL; + } + } fmr->fmr_pool = NULL; - fmr->fmr_pfmr = NULL; spin_lock(&fps->fps_lock); fpo->fpo_map_count--; /* decref the pool */ @@ -1577,12 +1724,11 @@ kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status) int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages, - __u64 iov, kib_fmr_t *fmr) + __u32 nob, __u64 iov, bool is_rx, kib_fmr_t *fmr) { - struct ib_pool_fmr *pfmr; - kib_fmr_pool_t *fpo; - __u64 version; - int rc; + kib_fmr_pool_t *fpo; + __u64 version; + int rc; again: spin_lock(&fps->fps_lock); @@ -1590,21 +1736,88 @@ again: list_for_each_entry(fpo, &fps->fps_pool_list, fpo_list) { fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE); fpo->fpo_map_count++; - spin_unlock(&fps->fps_lock); - pfmr = ib_fmr_pool_map_phys(fpo->fpo_fmr_pool, - pages, npages, iov); - if (likely(!IS_ERR(pfmr))) { - fmr->fmr_pool = fpo; - fmr->fmr_pfmr = pfmr; - return 0; - } + if (fpo->fpo_is_fmr) { + struct ib_pool_fmr *pfmr; + + spin_unlock(&fps->fps_lock); + pfmr = ib_fmr_pool_map_phys(fpo->fmr.fpo_fmr_pool, + pages, npages, iov); + if (likely(!IS_ERR(pfmr))) { + fmr->fmr_key = is_rx ? pfmr->fmr->rkey + : pfmr->fmr->lkey; + fmr->fmr_frd = NULL; + fmr->fmr_pfmr = pfmr; + fmr->fmr_pool = fpo; + return 0; + } + rc = PTR_ERR(pfmr); + } else { + if (!list_empty(&fpo->fast_reg.fpo_pool_list)) { + struct ib_send_wr *wr; + struct kib_fast_reg_descriptor *frd; + struct ib_fast_reg_page_list *frpl; + struct ib_mr *mr; + + frd = list_first_entry(&fpo->fast_reg.fpo_pool_list, + struct kib_fast_reg_descriptor, + frd_list); + list_del(&frd->frd_list); + spin_unlock(&fps->fps_lock); + + frpl = frd->frd_frpl; + mr = frd->frd_mr; + + if (!frd->frd_valid) { + struct ib_send_wr *inv_wr; + __u32 key = is_rx ? mr->rkey : mr->lkey; + + inv_wr = &frd->frd_inv_wr; + memset(inv_wr, 0, sizeof(*inv_wr)); + inv_wr->opcode = IB_WR_LOCAL_INV; + inv_wr->wr_id = IBLND_WID_MR; + inv_wr->ex.invalidate_rkey = key; + + /* Bump the key */ + key = ib_inc_rkey(key); + ib_update_fast_reg_key(mr, key); + } + + LASSERT(npages <= frpl->max_page_list_len); + memcpy(frpl->page_list, pages, + sizeof(*pages) * npages); + + /* Prepare FastReg WR */ + wr = &frd->frd_fastreg_wr; + memset(wr, 0, sizeof(*wr)); + wr->opcode = IB_WR_FAST_REG_MR; + wr->wr_id = IBLND_WID_MR; + wr->wr.fast_reg.iova_start = iov; + wr->wr.fast_reg.page_list = frpl; + wr->wr.fast_reg.page_list_len = npages; + wr->wr.fast_reg.page_shift = PAGE_SHIFT; + wr->wr.fast_reg.length = nob; + wr->wr.fast_reg.rkey = is_rx ? mr->rkey + : mr->lkey; + wr->wr.fast_reg.access_flags = + (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE); + + fmr->fmr_key = is_rx ? mr->rkey : mr->lkey; + fmr->fmr_frd = frd; + fmr->fmr_pfmr = NULL; + fmr->fmr_pool = fpo; + return 0; + } + spin_unlock(&fps->fps_lock); + rc = -EBUSY; + } spin_lock(&fps->fps_lock); fpo->fpo_map_count--; - if (PTR_ERR(pfmr) != -EAGAIN) { + if (rc != -EAGAIN) { spin_unlock(&fps->fps_lock); - return PTR_ERR(pfmr); + return rc; } /* EAGAIN and ... */ diff --git a/lnet/klnds/o2iblnd/o2iblnd.h b/lnet/klnds/o2iblnd/o2iblnd.h index b3d9332..162742b 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.h +++ b/lnet/klnds/o2iblnd/o2iblnd.h @@ -337,20 +337,40 @@ typedef struct cfs_time_t fps_next_retry; } kib_fmr_poolset_t; +struct kib_fast_reg_descriptor { /* For fast registration */ + struct list_head frd_list; + struct ib_send_wr frd_inv_wr; + struct ib_send_wr frd_fastreg_wr; + struct ib_mr *frd_mr; + struct ib_fast_reg_page_list *frd_frpl; + bool frd_valid; +}; + typedef struct { struct list_head fpo_list; /* chain on pool list */ struct kib_hca_dev *fpo_hdev; /* device for this pool */ kib_fmr_poolset_t *fpo_owner; /* owner of this pool */ - struct ib_fmr_pool *fpo_fmr_pool; /* IB FMR pool */ + union { + struct { + struct ib_fmr_pool *fpo_fmr_pool; /* IB FMR pool */ + } fmr; + struct { /* For fast registration */ + struct list_head fpo_pool_list; + int fpo_pool_size; + } fast_reg; + }; cfs_time_t fpo_deadline; /* deadline of this pool */ int fpo_failed; /* fmr pool is failed */ int fpo_map_count; /* # of mapped FMR */ + int fpo_is_fmr; } kib_fmr_pool_t; typedef struct { - struct ib_pool_fmr *fmr_pfmr; /* IB pool fmr */ - kib_fmr_pool_t *fmr_pool; /* pool of FMR */ + kib_fmr_pool_t *fmr_pool; /* pool of FMR */ + struct ib_pool_fmr *fmr_pfmr; /* IB pool fmr */ + struct kib_fast_reg_descriptor *fmr_frd; + u32 fmr_key; } kib_fmr_t; typedef struct kib_net @@ -755,6 +775,19 @@ typedef struct kib_peer __u16 ibp_queue_depth; } kib_peer_t; +#ifndef HAVE_IB_INC_RKEY +/** + * ib_inc_rkey - increments the key portion of the given rkey. Can be used + * for calculating a new rkey for type 2 memory windows. + * @rkey - the rkey to increment. + */ +static inline u32 ib_inc_rkey(u32 rkey) +{ + const u32 mask = 0x000000ff; + return ((rkey + 1) & mask) | (rkey & ~mask); +} +#endif + extern kib_data_t kiblnd_data; extern void kiblnd_hdev_destroy(kib_hca_dev_t *hdev); @@ -939,11 +972,12 @@ kiblnd_queue2str(kib_conn_t *conn, struct list_head *q) /* CAVEAT EMPTOR: We rely on descriptor alignment to allow us to use the * lowest bits of the work request id to stash the work item type. */ -#define IBLND_WID_INVAL 0 -#define IBLND_WID_TX 1 -#define IBLND_WID_RX 2 -#define IBLND_WID_RDMA 3 -#define IBLND_WID_MASK 3UL +#define IBLND_WID_INVAL 0 +#define IBLND_WID_TX 1 +#define IBLND_WID_RX 2 +#define IBLND_WID_RDMA 3 +#define IBLND_WID_MR 4 +#define IBLND_WID_MASK 7UL static inline __u64 kiblnd_ptr2wreqid (void *ptr, int type) @@ -1099,8 +1133,8 @@ void kiblnd_unmap_rx_descs(kib_conn_t *conn); void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node); struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps); -int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, - int npages, __u64 iov, kib_fmr_t *fmr); +int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages, + __u32 nob, __u64 iov, bool is_rx, kib_fmr_t *fmr); void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status); int kiblnd_tunables_init(void); diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index 6887c07..5ba9d24 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -562,7 +562,7 @@ kiblnd_kvaddr_to_page (unsigned long vaddr) } static int -kiblnd_fmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, int nob) +kiblnd_fmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, __u32 nob) { kib_hca_dev_t *hdev; __u64 *pages = tx->tx_pages; @@ -589,16 +589,16 @@ kiblnd_fmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, int nob) cpt = tx->tx_pool->tpo_pool.po_owner->ps_cpt; fps = net->ibn_fmr_ps[cpt]; - rc = kiblnd_fmr_pool_map(fps, pages, npages, 0, &tx->fmr); - if (rc != 0) { - CERROR ("Can't map %d pages: %d\n", npages, rc); - return rc; - } + rc = kiblnd_fmr_pool_map(fps, pages, npages, nob, 0, (rd != tx->tx_rd), + &tx->fmr); + if (rc != 0) { + CERROR("Can't map %d pages: %d\n", npages, rc); + return rc; + } /* If rd is not tx_rd, it's going to get sent to a peer, who will need * the rkey */ - rd->rd_key = (rd != tx->tx_rd) ? tx->fmr.fmr_pfmr->fmr->rkey : - tx->fmr.fmr_pfmr->fmr->lkey; + rd->rd_key = tx->fmr.fmr_key; rd->rd_frags[0].rf_addr &= ~hdev->ibh_page_mask; rd->rd_frags[0].rf_nob = nob; rd->rd_nfrags = 1; @@ -613,10 +613,8 @@ kiblnd_unmap_tx(lnet_ni_t *ni, kib_tx_t *tx) LASSERT(net != NULL); - if (net->ibn_fmr_ps != NULL && tx->fmr.fmr_pfmr != NULL) { + if (net->ibn_fmr_ps != NULL) kiblnd_fmr_pool_unmap(&tx->fmr, tx->tx_status); - tx->fmr.fmr_pfmr = NULL; - } if (tx->tx_nfrags != 0) { kiblnd_dma_unmap_sg(tx->tx_pool->tpo_hdev->ibh_ibdev, @@ -631,8 +629,8 @@ kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, int nfrags) kib_hca_dev_t *hdev = tx->tx_pool->tpo_hdev; kib_net_t *net = ni->ni_data; struct ib_mr *mr = NULL; - __u32 nob; - int i; + __u32 nob; + int i; /* If rd is not tx_rd, it's going to get sent to a peer and I'm the * RDMA sink */ @@ -847,14 +845,26 @@ __must_hold(&conn->ibc_lock) /* close_conn will launch failover */ rc = -ENETDOWN; } else { - struct ib_send_wr *wrq = &tx->tx_wrq[tx->tx_nwrq - 1]; + struct kib_fast_reg_descriptor *frd = tx->fmr.fmr_frd; + struct ib_send_wr *bad = &tx->tx_wrq[tx->tx_nwrq - 1]; + struct ib_send_wr *wrq = tx->tx_wrq; + + if (frd != NULL) { + if (!frd->frd_valid) { + wrq = &frd->frd_inv_wr; + wrq->next = &frd->frd_fastreg_wr; + } else { + wrq = &frd->frd_fastreg_wr; + } + frd->frd_fastreg_wr.next = tx->tx_wrq; + } - LASSERTF(wrq->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX), + LASSERTF(bad->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX), "bad wr_id "LPX64", opc %d, flags %d, peer: %s\n", - wrq->wr_id, wrq->opcode, wrq->send_flags, + bad->wr_id, bad->opcode, bad->send_flags, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - wrq = NULL; - rc = ib_post_send(conn->ibc_cmid->qp, tx->tx_wrq, &wrq); + bad = NULL; + rc = ib_post_send(conn->ibc_cmid->qp, wrq, &bad); } conn->ibc_last_send = jiffies; @@ -3398,9 +3408,15 @@ kiblnd_qp_event(struct ib_event *event, void *arg) static void kiblnd_complete (struct ib_wc *wc) { - switch (kiblnd_wreqid2type(wc->wr_id)) { - default: - LBUG(); + switch (kiblnd_wreqid2type(wc->wr_id)) { + default: + LBUG(); + + case IBLND_WID_MR: + if (wc->status != IB_WC_SUCCESS && + wc->status != IB_WC_WR_FLUSH_ERR) + CNETERR("FastReg failed: %d\n", wc->status); + return; case IBLND_WID_RDMA: /* We only get RDMA completion notification if it fails. All -- 1.8.3.1