Whamcloud - gitweb
LU-6215 o2iblnd: port to new fast reg API introduced in 4.4 86/19186/5
authorLi Dongyang <dongyang.li@anu.edu.au>
Wed, 4 May 2016 17:11:15 +0000 (13:11 -0400)
committerOleg Drokin <oleg.drokin@intel.com>
Mon, 16 May 2016 16:48:04 +0000 (16:48 +0000)
Remove the allocation of fastreg page list, as the page
vector is now private to the provider. Just pass tx_frags
to ib_map_mr_sg() and construct ib_reg_wr.
Defer the conversion of tx_frags to tx_pages and only do
it when the new API is not available.

Linux-commit: 4c67e2bfc8b7121d51434362fa7c2d012f8bcf1b
Linux-commit: 39bfc271bd687be2c8e396e976c0fb9a97963400

Signed-off-by: Li Dongyang <dongyang.li@anu.edu.au>
Change-Id: I5c62b0370ad6ddcc93102a29343491968e4446d0
Reviewed-on: http://review.whamcloud.com/19186
Tested-by: Jenkins
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-by: Dmitry Eremin <dmitry.eremin@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lnet/autoconf/lustre-lnet.m4
lnet/klnds/o2iblnd/o2iblnd.c
lnet/klnds/o2iblnd/o2iblnd.h
lnet/klnds/o2iblnd/o2iblnd_cb.c

index 253d590..221feda 100644 (file)
@@ -423,6 +423,28 @@ AS_IF([test $ENABLEO2IB != "no"], [
                        [rdma_create_id wants 5 args])
        ])
 ])
+
+# new fast registration API introduced in 4.4
+AS_IF([test $ENABLEO2IB != "no"], [
+       LB_CHECK_COMPILE([if 'ib_map_mr_sg' exists],
+       ib_map_mr_sg, [
+               #ifdef HAVE_COMPAT_RDMA
+               #undef PACKAGE_NAME
+               #undef PACKAGE_TARNAME
+               #undef PACKAGE_VERSION
+               #undef PACKAGE_STRING
+               #undef PACKAGE_BUGREPORT
+               #undef PACKAGE_URL
+               #include <linux/compat-2.6.h>
+               #endif
+               #include <rdma/ib_verbs.h>
+       ],[
+               ib_map_mr_sg(NULL, NULL, 0, 0);
+       ],[
+               AC_DEFINE(HAVE_IB_MAP_MR_SG, 1,
+                       [ib_map_mr_sg exists])
+       ])
+])
 ]) # LN_CONFIG_O2IB
 
 #
index b6396d2..e4bb8b9 100644 (file)
@@ -1422,7 +1422,9 @@ kiblnd_destroy_fmr_pool(kib_fmr_pool_t *fpo)
                list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list,
                                         frd_list) {
                        list_del(&frd->frd_list);
+#ifndef HAVE_IB_MAP_MR_SG
                        ib_free_fast_reg_page_list(frd->frd_frpl);
+#endif
                        ib_dereg_mr(frd->frd_mr);
                        LIBCFS_FREE(frd, sizeof(*frd));
                        i++;
@@ -1511,6 +1513,7 @@ static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo)
                }
                frd->frd_mr = NULL;
 
+#ifndef HAVE_IB_MAP_MR_SG
                frd->frd_frpl = ib_alloc_fast_reg_page_list(fpo->fpo_hdev->ibh_ibdev,
                                                            LNET_MAX_PAYLOAD/PAGE_SIZE);
                if (IS_ERR(frd->frd_frpl)) {
@@ -1520,6 +1523,7 @@ static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo)
                        frd->frd_frpl = NULL;
                        goto out_middle;
                }
+#endif
 
 #ifdef HAVE_IB_ALLOC_FAST_REG_MR
                frd->frd_mr = ib_alloc_fast_reg_mr(fpo->fpo_hdev->ibh_pd,
@@ -1547,15 +1551,19 @@ static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo)
 out_middle:
        if (frd->frd_mr)
                ib_dereg_mr(frd->frd_mr);
+#ifndef HAVE_IB_MAP_MR_SG
        if (frd->frd_frpl)
                ib_free_fast_reg_page_list(frd->frd_frpl);
+#endif
        LIBCFS_FREE(frd, sizeof(*frd));
 
 out:
        list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list,
                                 frd_list) {
                list_del(&frd->frd_list);
+#ifndef HAVE_IB_MAP_MR_SG
                ib_free_fast_reg_page_list(frd->frd_frpl);
+#endif
                ib_dereg_mr(frd->frd_mr);
                LIBCFS_FREE(frd, sizeof(*frd));
        }
@@ -1699,6 +1707,28 @@ kiblnd_fmr_pool_is_idle(kib_fmr_pool_t *fpo, cfs_time_t now)
         return cfs_time_aftereq(now, fpo->fpo_deadline);
 }
 
+static int
+kiblnd_map_tx_pages(kib_tx_t *tx, kib_rdma_desc_t *rd)
+{
+       kib_hca_dev_t   *hdev;
+       __u64           *pages = tx->tx_pages;
+       int             npages;
+       int             size;
+       int             i;
+
+       hdev = tx->tx_pool->tpo_hdev;
+
+       for (i = 0, npages = 0; i < rd->rd_nfrags; i++) {
+               for (size = 0; size <  rd->rd_frags[i].rf_nob;
+                       size += hdev->ibh_page_size) {
+                       pages[npages++] = (rd->rd_frags[i].rf_addr &
+                                          hdev->ibh_page_mask) + size;
+               }
+       }
+
+       return npages;
+}
+
 void
 kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status)
 {
@@ -1757,11 +1787,15 @@ kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status)
 }
 
 int
-kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages,
-                   __u32 nob, __u64 iov, bool is_rx, kib_fmr_t *fmr)
+kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, kib_tx_t *tx, kib_rdma_desc_t *rd,
+                   __u32 nob, __u64 iov, kib_fmr_t *fmr)
 {
        kib_fmr_pool_t *fpo;
+       __u64 *pages = tx->tx_pages;
        __u64 version;
+       bool is_rx = (rd != tx->tx_rd);
+       bool tx_pages_mapped = 0;
+       int npages = 0;
        int rc;
 
 again:
@@ -1775,6 +1809,12 @@ again:
                        struct ib_pool_fmr *pfmr;
 
                        spin_unlock(&fps->fps_lock);
+
+                       if (!tx_pages_mapped) {
+                               npages = kiblnd_map_tx_pages(tx, rd);
+                               tx_pages_mapped = 1;
+                       }
+
                        pfmr = ib_fmr_pool_map_phys(fpo->fmr.fpo_fmr_pool,
                                                    pages, npages, iov);
                        if (likely(!IS_ERR(pfmr))) {
@@ -1788,9 +1828,14 @@ again:
                        rc = PTR_ERR(pfmr);
                } else {
                        if (!list_empty(&fpo->fast_reg.fpo_pool_list)) {
-                               struct ib_send_wr *wr;
                                struct kib_fast_reg_descriptor *frd;
+#ifdef HAVE_IB_MAP_MR_SG
+                               struct ib_reg_wr *wr;
+                               int n;
+#else
+                               struct ib_send_wr *wr;
                                struct ib_fast_reg_page_list *frpl;
+#endif
                                struct ib_mr *mr;
 
                                frd = list_first_entry(&fpo->fast_reg.fpo_pool_list,
@@ -1799,7 +1844,9 @@ again:
                                list_del(&frd->frd_list);
                                spin_unlock(&fps->fps_lock);
 
+#ifndef HAVE_IB_MAP_MR_SG
                                frpl = frd->frd_frpl;
+#endif
                                mr   = frd->frd_mr;
 
                                if (!frd->frd_valid) {
@@ -1817,6 +1864,33 @@ again:
                                        ib_update_fast_reg_key(mr, key);
                                }
 
+#ifdef HAVE_IB_MAP_MR_SG
+                               n = ib_map_mr_sg(mr, tx->tx_frags,
+                                                tx->tx_nfrags, PAGE_SIZE);
+                               if (unlikely(n != tx->tx_nfrags)) {
+                                       CERROR("Failed to map mr %d/%d "
+                                              "elements\n", n, tx->tx_nfrags);
+                                       return n < 0 ? n : -EINVAL;
+                               }
+
+                               mr->iova = iov;
+
+                               wr = &frd->frd_fastreg_wr;
+                               memset(wr, 0, sizeof(*wr));
+                               wr->wr.opcode = IB_WR_REG_MR;
+                               wr->wr.wr_id = IBLND_WID_MR;
+                               wr->wr.num_sge = 0;
+                               wr->wr.send_flags = 0;
+                               wr->mr = mr;
+                               wr->key = is_rx ? mr->rkey : mr->lkey;
+                               wr->access = (IB_ACCESS_LOCAL_WRITE |
+                                             IB_ACCESS_REMOTE_WRITE);
+#else
+                               if (!tx_pages_mapped) {
+                                       npages = kiblnd_map_tx_pages(tx, rd);
+                                       tx_pages_mapped = 1;
+                               }
+
                                LASSERT(npages <= frpl->max_page_list_len);
                                memcpy(frpl->page_list, pages,
                                        sizeof(*pages) * npages);
@@ -1836,6 +1910,7 @@ again:
                                wr->wr.fast_reg.access_flags =
                                                (IB_ACCESS_LOCAL_WRITE |
                                                 IB_ACCESS_REMOTE_WRITE);
+#endif
 
                                fmr->fmr_key  = is_rx ? mr->rkey : mr->lkey;
                                fmr->fmr_frd  = frd;
index 23cc03c..3e40cc7 100644 (file)
@@ -313,9 +313,13 @@ typedef struct
 struct kib_fast_reg_descriptor { /* For fast registration */
        struct list_head                 frd_list;
        struct ib_send_wr                frd_inv_wr;
+#ifdef HAVE_IB_MAP_MR_SG
+       struct ib_reg_wr                 frd_fastreg_wr;
+#else
        struct ib_send_wr                frd_fastreg_wr;
+       struct ib_fast_reg_page_list    *frd_frpl;
+#endif
        struct ib_mr                    *frd_mr;
-       struct ib_fast_reg_page_list    *frd_frpl;
        bool                             frd_valid;
 };
 
@@ -1151,8 +1155,9 @@ void kiblnd_unmap_rx_descs(kib_conn_t *conn);
 void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node);
 struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps);
 
-int  kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages,
-                        __u32 nob, __u64 iov, bool is_rx, kib_fmr_t *fmr);
+int  kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, kib_tx_t *tx,
+                        kib_rdma_desc_t *rd, __u32 nob, __u64 iov,
+                        kib_fmr_t *fmr);
 void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status);
 
 int  kiblnd_tunables_setup(struct lnet_ni *ni);
index 6160ea5..1917dc4 100644 (file)
@@ -565,34 +565,20 @@ static int
 kiblnd_fmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, __u32 nob)
 {
        kib_hca_dev_t           *hdev;
-       __u64                   *pages = tx->tx_pages;
        kib_fmr_poolset_t       *fps;
-       int                     npages;
-       int                     size;
        int                     cpt;
        int                     rc;
-       int                     i;
 
        LASSERT(tx->tx_pool != NULL);
        LASSERT(tx->tx_pool->tpo_pool.po_owner != NULL);
 
-       hdev  = tx->tx_pool->tpo_hdev;
-
-        for (i = 0, npages = 0; i < rd->rd_nfrags; i++) {
-                for (size = 0; size <  rd->rd_frags[i].rf_nob;
-                               size += hdev->ibh_page_size) {
-                        pages[npages ++] = (rd->rd_frags[i].rf_addr &
-                                            hdev->ibh_page_mask) + size;
-                }
-        }
-
+       hdev = tx->tx_pool->tpo_hdev;
        cpt = tx->tx_pool->tpo_pool.po_owner->ps_cpt;
 
        fps = net->ibn_fmr_ps[cpt];
-       rc = kiblnd_fmr_pool_map(fps, pages, npages, nob, 0, (rd != tx->tx_rd),
-                                &tx->fmr);
+       rc = kiblnd_fmr_pool_map(fps, tx, rd, nob, 0, &tx->fmr);
        if (rc != 0) {
-               CERROR("Can't map %d pages: %d\n", npages, rc);
+               CERROR("Can't map %u pages: %d\n", nob, rc);
                return rc;
        }
 
@@ -854,11 +840,19 @@ __must_hold(&conn->ibc_lock)
                if (frd != NULL) {
                        if (!frd->frd_valid) {
                                wrq = &frd->frd_inv_wr;
+#ifdef HAVE_IB_MAP_MR_SG
+                               wrq->next = &frd->frd_fastreg_wr.wr;
+                       } else {
+                               wrq = &frd->frd_fastreg_wr.wr;
+                       }
+                       frd->frd_fastreg_wr.wr.next = tx->tx_wrq;
+#else
                                wrq->next = &frd->frd_fastreg_wr;
                        } else {
                                wrq = &frd->frd_fastreg_wr;
                        }
                        frd->frd_fastreg_wr.next = tx->tx_wrq;
+#endif
                }
 
                LASSERTF(bad->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX),