From 2366ba2b6f8ac2a6eeff796e89f1df9ce7ac546e Mon Sep 17 00:00:00 2001 From: Li Dongyang Date: Wed, 4 May 2016 13:11:15 -0400 Subject: [PATCH] LU-6215 o2iblnd: port to new fast reg API introduced in 4.4 Remove the allocation of fastreg page list, as the page vector is now private to the provider. Just pass tx_frags to ib_map_mr_sg() and construct ib_reg_wr. Defer the conversion of tx_frags to tx_pages and only do it when the new API is not available. Linux-commit: 4c67e2bfc8b7121d51434362fa7c2d012f8bcf1b Linux-commit: 39bfc271bd687be2c8e396e976c0fb9a97963400 Signed-off-by: Li Dongyang Change-Id: I5c62b0370ad6ddcc93102a29343491968e4446d0 Reviewed-on: http://review.whamcloud.com/19186 Tested-by: Jenkins Reviewed-by: James Simmons Reviewed-by: Dmitry Eremin Tested-by: Maloo Reviewed-by: Oleg Drokin --- lnet/autoconf/lustre-lnet.m4 | 22 +++++++++++ lnet/klnds/o2iblnd/o2iblnd.c | 81 +++++++++++++++++++++++++++++++++++++++-- lnet/klnds/o2iblnd/o2iblnd.h | 11 ++++-- lnet/klnds/o2iblnd/o2iblnd_cb.c | 28 ++++++-------- 4 files changed, 119 insertions(+), 23 deletions(-) diff --git a/lnet/autoconf/lustre-lnet.m4 b/lnet/autoconf/lustre-lnet.m4 index 253d590..221feda 100644 --- a/lnet/autoconf/lustre-lnet.m4 +++ b/lnet/autoconf/lustre-lnet.m4 @@ -423,6 +423,28 @@ AS_IF([test $ENABLEO2IB != "no"], [ [rdma_create_id wants 5 args]) ]) ]) + +# new fast registration API introduced in 4.4 +AS_IF([test $ENABLEO2IB != "no"], [ + LB_CHECK_COMPILE([if 'ib_map_mr_sg' exists], + ib_map_mr_sg, [ + #ifdef HAVE_COMPAT_RDMA + #undef PACKAGE_NAME + #undef PACKAGE_TARNAME + #undef PACKAGE_VERSION + #undef PACKAGE_STRING + #undef PACKAGE_BUGREPORT + #undef PACKAGE_URL + #include + #endif + #include + ],[ + ib_map_mr_sg(NULL, NULL, 0, 0); + ],[ + AC_DEFINE(HAVE_IB_MAP_MR_SG, 1, + [ib_map_mr_sg exists]) + ]) +]) ]) # LN_CONFIG_O2IB # diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index b6396d2..e4bb8b9 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -1422,7 +1422,9 @@ kiblnd_destroy_fmr_pool(kib_fmr_pool_t *fpo) list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list, frd_list) { list_del(&frd->frd_list); +#ifndef HAVE_IB_MAP_MR_SG ib_free_fast_reg_page_list(frd->frd_frpl); +#endif ib_dereg_mr(frd->frd_mr); LIBCFS_FREE(frd, sizeof(*frd)); i++; @@ -1511,6 +1513,7 @@ static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo) } frd->frd_mr = NULL; +#ifndef HAVE_IB_MAP_MR_SG frd->frd_frpl = ib_alloc_fast_reg_page_list(fpo->fpo_hdev->ibh_ibdev, LNET_MAX_PAYLOAD/PAGE_SIZE); if (IS_ERR(frd->frd_frpl)) { @@ -1520,6 +1523,7 @@ static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo) frd->frd_frpl = NULL; goto out_middle; } +#endif #ifdef HAVE_IB_ALLOC_FAST_REG_MR frd->frd_mr = ib_alloc_fast_reg_mr(fpo->fpo_hdev->ibh_pd, @@ -1547,15 +1551,19 @@ static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo) out_middle: if (frd->frd_mr) ib_dereg_mr(frd->frd_mr); +#ifndef HAVE_IB_MAP_MR_SG if (frd->frd_frpl) ib_free_fast_reg_page_list(frd->frd_frpl); +#endif LIBCFS_FREE(frd, sizeof(*frd)); out: list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list, frd_list) { list_del(&frd->frd_list); +#ifndef HAVE_IB_MAP_MR_SG ib_free_fast_reg_page_list(frd->frd_frpl); +#endif ib_dereg_mr(frd->frd_mr); LIBCFS_FREE(frd, sizeof(*frd)); } @@ -1699,6 +1707,28 @@ kiblnd_fmr_pool_is_idle(kib_fmr_pool_t *fpo, cfs_time_t now) return cfs_time_aftereq(now, fpo->fpo_deadline); } +static int +kiblnd_map_tx_pages(kib_tx_t *tx, kib_rdma_desc_t *rd) +{ + kib_hca_dev_t *hdev; + __u64 *pages = tx->tx_pages; + int npages; + int size; + int i; + + hdev = tx->tx_pool->tpo_hdev; + + for (i = 0, npages = 0; i < rd->rd_nfrags; i++) { + for (size = 0; size < rd->rd_frags[i].rf_nob; + size += hdev->ibh_page_size) { + pages[npages++] = (rd->rd_frags[i].rf_addr & + hdev->ibh_page_mask) + size; + } + } + + return npages; +} + void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status) { @@ -1757,11 +1787,15 @@ kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status) } int -kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages, - __u32 nob, __u64 iov, bool is_rx, kib_fmr_t *fmr) +kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, kib_tx_t *tx, kib_rdma_desc_t *rd, + __u32 nob, __u64 iov, kib_fmr_t *fmr) { kib_fmr_pool_t *fpo; + __u64 *pages = tx->tx_pages; __u64 version; + bool is_rx = (rd != tx->tx_rd); + bool tx_pages_mapped = 0; + int npages = 0; int rc; again: @@ -1775,6 +1809,12 @@ again: struct ib_pool_fmr *pfmr; spin_unlock(&fps->fps_lock); + + if (!tx_pages_mapped) { + npages = kiblnd_map_tx_pages(tx, rd); + tx_pages_mapped = 1; + } + pfmr = ib_fmr_pool_map_phys(fpo->fmr.fpo_fmr_pool, pages, npages, iov); if (likely(!IS_ERR(pfmr))) { @@ -1788,9 +1828,14 @@ again: rc = PTR_ERR(pfmr); } else { if (!list_empty(&fpo->fast_reg.fpo_pool_list)) { - struct ib_send_wr *wr; struct kib_fast_reg_descriptor *frd; +#ifdef HAVE_IB_MAP_MR_SG + struct ib_reg_wr *wr; + int n; +#else + struct ib_send_wr *wr; struct ib_fast_reg_page_list *frpl; +#endif struct ib_mr *mr; frd = list_first_entry(&fpo->fast_reg.fpo_pool_list, @@ -1799,7 +1844,9 @@ again: list_del(&frd->frd_list); spin_unlock(&fps->fps_lock); +#ifndef HAVE_IB_MAP_MR_SG frpl = frd->frd_frpl; +#endif mr = frd->frd_mr; if (!frd->frd_valid) { @@ -1817,6 +1864,33 @@ again: ib_update_fast_reg_key(mr, key); } +#ifdef HAVE_IB_MAP_MR_SG + n = ib_map_mr_sg(mr, tx->tx_frags, + tx->tx_nfrags, PAGE_SIZE); + if (unlikely(n != tx->tx_nfrags)) { + CERROR("Failed to map mr %d/%d " + "elements\n", n, tx->tx_nfrags); + return n < 0 ? n : -EINVAL; + } + + mr->iova = iov; + + wr = &frd->frd_fastreg_wr; + memset(wr, 0, sizeof(*wr)); + wr->wr.opcode = IB_WR_REG_MR; + wr->wr.wr_id = IBLND_WID_MR; + wr->wr.num_sge = 0; + wr->wr.send_flags = 0; + wr->mr = mr; + wr->key = is_rx ? mr->rkey : mr->lkey; + wr->access = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE); +#else + if (!tx_pages_mapped) { + npages = kiblnd_map_tx_pages(tx, rd); + tx_pages_mapped = 1; + } + LASSERT(npages <= frpl->max_page_list_len); memcpy(frpl->page_list, pages, sizeof(*pages) * npages); @@ -1836,6 +1910,7 @@ again: wr->wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE); +#endif fmr->fmr_key = is_rx ? mr->rkey : mr->lkey; fmr->fmr_frd = frd; diff --git a/lnet/klnds/o2iblnd/o2iblnd.h b/lnet/klnds/o2iblnd/o2iblnd.h index 23cc03c..3e40cc7 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.h +++ b/lnet/klnds/o2iblnd/o2iblnd.h @@ -313,9 +313,13 @@ typedef struct struct kib_fast_reg_descriptor { /* For fast registration */ struct list_head frd_list; struct ib_send_wr frd_inv_wr; +#ifdef HAVE_IB_MAP_MR_SG + struct ib_reg_wr frd_fastreg_wr; +#else struct ib_send_wr frd_fastreg_wr; + struct ib_fast_reg_page_list *frd_frpl; +#endif struct ib_mr *frd_mr; - struct ib_fast_reg_page_list *frd_frpl; bool frd_valid; }; @@ -1151,8 +1155,9 @@ void kiblnd_unmap_rx_descs(kib_conn_t *conn); void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node); struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps); -int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages, - __u32 nob, __u64 iov, bool is_rx, kib_fmr_t *fmr); +int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, kib_tx_t *tx, + kib_rdma_desc_t *rd, __u32 nob, __u64 iov, + kib_fmr_t *fmr); void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status); int kiblnd_tunables_setup(struct lnet_ni *ni); diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index 6160ea5..1917dc4 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -565,34 +565,20 @@ static int kiblnd_fmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, __u32 nob) { kib_hca_dev_t *hdev; - __u64 *pages = tx->tx_pages; kib_fmr_poolset_t *fps; - int npages; - int size; int cpt; int rc; - int i; LASSERT(tx->tx_pool != NULL); LASSERT(tx->tx_pool->tpo_pool.po_owner != NULL); - hdev = tx->tx_pool->tpo_hdev; - - for (i = 0, npages = 0; i < rd->rd_nfrags; i++) { - for (size = 0; size < rd->rd_frags[i].rf_nob; - size += hdev->ibh_page_size) { - pages[npages ++] = (rd->rd_frags[i].rf_addr & - hdev->ibh_page_mask) + size; - } - } - + hdev = tx->tx_pool->tpo_hdev; cpt = tx->tx_pool->tpo_pool.po_owner->ps_cpt; fps = net->ibn_fmr_ps[cpt]; - rc = kiblnd_fmr_pool_map(fps, pages, npages, nob, 0, (rd != tx->tx_rd), - &tx->fmr); + rc = kiblnd_fmr_pool_map(fps, tx, rd, nob, 0, &tx->fmr); if (rc != 0) { - CERROR("Can't map %d pages: %d\n", npages, rc); + CERROR("Can't map %u pages: %d\n", nob, rc); return rc; } @@ -854,11 +840,19 @@ __must_hold(&conn->ibc_lock) if (frd != NULL) { if (!frd->frd_valid) { wrq = &frd->frd_inv_wr; +#ifdef HAVE_IB_MAP_MR_SG + wrq->next = &frd->frd_fastreg_wr.wr; + } else { + wrq = &frd->frd_fastreg_wr.wr; + } + frd->frd_fastreg_wr.wr.next = tx->tx_wrq; +#else wrq->next = &frd->frd_fastreg_wr; } else { wrq = &frd->frd_fastreg_wr; } frd->frd_fastreg_wr.next = tx->tx_wrq; +#endif } LASSERTF(bad->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX), -- 1.8.3.1