Whamcloud - gitweb
LU-5783 o2iblnd: Add Fast Reg memory registration support 06/17606/9
authorDmitry Eremin <dmitry.eremin@intel.com>
Tue, 26 Jan 2016 21:49:17 +0000 (16:49 -0500)
committerOleg Drokin <oleg.drokin@intel.com>
Sun, 13 Mar 2016 06:26:03 +0000 (06:26 +0000)
FMR is deprecated and it not supported by the mlx5 driver.
This patch add memory management extensions support as
backup of FMR.

Change-Id: I58f01aac3cbef0edc0934d75bcf13888f84beb0d
Signed-off-by: Dmitry Eremin <dmitry.eremin@intel.com>
Reviewed-on: http://review.whamcloud.com/17606
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Tested-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-by: Doug Oucharek <doug.s.oucharek@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lnet/autoconf/lustre-lnet.m4
lnet/klnds/o2iblnd/o2iblnd.c
lnet/klnds/o2iblnd/o2iblnd.h
lnet/klnds/o2iblnd/o2iblnd_cb.c

index 842873d..f049677 100644 (file)
@@ -528,6 +528,33 @@ EXTRA_KCFLAGS="$tmp_flags"
 ]) # LN_CONFIG_SK_DATA_READY
 
 #
 ]) # LN_CONFIG_SK_DATA_READY
 
 #
+# LN_CONFIG_IB_INC_RKEY
+#
+AC_DEFUN([LN_CONFIG_IB_INC_RKEY], [
+tmp_flags="$EXTRA_KCFLAGS"
+EXTRA_KCFLAGS="-Werror"
+LB_CHECK_COMPILE([if function 'ib_inc_rkey' is defined],
+ib_inc_rkey, [
+       #ifdef HAVE_COMPAT_RDMA
+       #undef PACKAGE_NAME
+       #undef PACKAGE_TARNAME
+       #undef PACKAGE_VERSION
+       #undef PACKAGE_STRING
+       #undef PACKAGE_BUGREPORT
+       #undef PACKAGE_URL
+       #include <linux/compat-2.6.h>
+       #endif
+       #include <rdma/ib_verbs.h>
+],[
+       (void)ib_inc_rkey(0);
+],[
+       AC_DEFINE(HAVE_IB_INC_RKEY, 1,
+                 [function ib_inc_rkey exist])
+])
+EXTRA_KCFLAGS="$tmp_flags"
+]) # LN_CONFIG_IB_INC_RKEY
+
+#
 # LN_PROG_LINUX
 #
 # LNet linux kernel checks
 # LN_PROG_LINUX
 #
 # LNet linux kernel checks
@@ -540,6 +567,7 @@ LN_CONFIG_AFFINITY
 LN_CONFIG_BACKOFF
 LN_CONFIG_O2IB
 LN_CONFIG_GNILND
 LN_CONFIG_BACKOFF
 LN_CONFIG_O2IB
 LN_CONFIG_GNILND
+LN_CONFIG_IB_INC_RKEY
 # 2.6.35
 LN_CONFIG_SK_SLEEP
 # 2.6.36
 # 2.6.35
 LN_CONFIG_SK_SLEEP
 # 2.6.36
index d9761ee..957dc0e 100644 (file)
@@ -1390,28 +1390,44 @@ kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev, kib_rdma_desc_t *rd,
 }
 
 static void
 }
 
 static void
-kiblnd_destroy_fmr_pool(kib_fmr_pool_t *pool)
+kiblnd_destroy_fmr_pool(kib_fmr_pool_t *fpo)
 {
 {
-        LASSERT (pool->fpo_map_count == 0);
+       LASSERT(fpo->fpo_map_count == 0);
 
 
-        if (pool->fpo_fmr_pool != NULL)
-                ib_destroy_fmr_pool(pool->fpo_fmr_pool);
+       if (fpo->fpo_is_fmr) {
+               if (fpo->fmr.fpo_fmr_pool)
+                       ib_destroy_fmr_pool(fpo->fmr.fpo_fmr_pool);
+       } else {
+               struct kib_fast_reg_descriptor *frd, *tmp;
+               int i = 0;
+
+               list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list,
+                                        frd_list) {
+                       list_del(&frd->frd_list);
+                       ib_free_fast_reg_page_list(frd->frd_frpl);
+                       ib_dereg_mr(frd->frd_mr);
+                       LIBCFS_FREE(frd, sizeof(*frd));
+                       i++;
+               }
+               if (i < fpo->fast_reg.fpo_pool_size)
+                       CERROR("FastReg pool still has %d regions registered\n",
+                               fpo->fast_reg.fpo_pool_size - i);
+       }
 
 
-        if (pool->fpo_hdev != NULL)
-                kiblnd_hdev_decref(pool->fpo_hdev);
+       if (fpo->fpo_hdev)
+               kiblnd_hdev_decref(fpo->fpo_hdev);
 
 
-        LIBCFS_FREE(pool, sizeof(kib_fmr_pool_t));
+       LIBCFS_FREE(fpo, sizeof(*fpo));
 }
 
 static void
 kiblnd_destroy_fmr_pool_list(struct list_head *head)
 {
 }
 
 static void
 kiblnd_destroy_fmr_pool_list(struct list_head *head)
 {
-       kib_fmr_pool_t *pool;
+       kib_fmr_pool_t *fpo, *tmp;
 
 
-       while (!list_empty(head)) {
-               pool = list_entry(head->next, kib_fmr_pool_t, fpo_list);
-               list_del(&pool->fpo_list);
-               kiblnd_destroy_fmr_pool(pool);
+       list_for_each_entry_safe(fpo, tmp, head, fpo_list) {
+               list_del(&fpo->fpo_list);
+               kiblnd_destroy_fmr_pool(fpo);
        }
 }
 
        }
 }
 
@@ -1429,45 +1445,159 @@ static int kiblnd_fmr_flush_trigger(int ncpts)
        return max(IBLND_FMR_POOL_FLUSH, size);
 }
 
        return max(IBLND_FMR_POOL_FLUSH, size);
 }
 
-static int
-kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t **pp_fpo)
+static int kiblnd_alloc_fmr_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo)
 {
 {
-        /* FMR pool for RDMA */
-        kib_dev_t               *dev = fps->fps_net->ibn_dev;
-        kib_fmr_pool_t          *fpo;
-        struct ib_fmr_pool_param param = {
-                .max_pages_per_fmr = LNET_MAX_PAYLOAD/PAGE_SIZE,
-                .page_shift        = PAGE_SHIFT,
-                .access            = (IB_ACCESS_LOCAL_WRITE |
-                                      IB_ACCESS_REMOTE_WRITE),
+       struct ib_fmr_pool_param param = {
+               .max_pages_per_fmr = LNET_MAX_PAYLOAD/PAGE_SIZE,
+               .page_shift        = PAGE_SHIFT,
+               .access            = (IB_ACCESS_LOCAL_WRITE |
+                                     IB_ACCESS_REMOTE_WRITE),
                .pool_size         = fps->fps_pool_size,
                .dirty_watermark   = fps->fps_flush_trigger,
                .flush_function    = NULL,
                .flush_arg         = NULL,
                .cache             = !!*kiblnd_tunables.kib_fmr_cache};
                .pool_size         = fps->fps_pool_size,
                .dirty_watermark   = fps->fps_flush_trigger,
                .flush_function    = NULL,
                .flush_arg         = NULL,
                .cache             = !!*kiblnd_tunables.kib_fmr_cache};
+       int rc = 0;
+
+       fpo->fmr.fpo_fmr_pool = ib_create_fmr_pool(fpo->fpo_hdev->ibh_pd,
+                                                  &param);
+       if (IS_ERR(fpo->fmr.fpo_fmr_pool)) {
+               rc = PTR_ERR(fpo->fmr.fpo_fmr_pool);
+               if (rc != -ENOSYS)
+                       CERROR("Failed to create FMR pool: %d\n", rc);
+               else
+                       CERROR("FMRs are not supported\n");
+       }
+
+       return rc;
+}
+
+static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo)
+{
+       struct kib_fast_reg_descriptor *frd, *tmp;
+       int i, rc;
+
+       INIT_LIST_HEAD(&fpo->fast_reg.fpo_pool_list);
+       fpo->fast_reg.fpo_pool_size = 0;
+       for (i = 0; i < fps->fps_pool_size; i++) {
+               LIBCFS_CPT_ALLOC(frd, lnet_cpt_table(), fps->fps_cpt,
+                                sizeof(*frd));
+               if (!frd) {
+                       CERROR("Failed to allocate a new fast_reg descriptor\n");
+                       rc = -ENOMEM;
+                       goto out;
+               }
+               frd->frd_mr = NULL;
+
+               frd->frd_frpl = ib_alloc_fast_reg_page_list(fpo->fpo_hdev->ibh_ibdev,
+                                                           LNET_MAX_PAYLOAD/PAGE_SIZE);
+               if (IS_ERR(frd->frd_frpl)) {
+                       rc = PTR_ERR(frd->frd_frpl);
+                       CERROR("Failed to allocate ib_fast_reg_page_list: %d\n",
+                               rc);
+                       goto out_middle;
+               }
+
+               frd->frd_mr = ib_alloc_fast_reg_mr(fpo->fpo_hdev->ibh_pd,
+                                                  LNET_MAX_PAYLOAD/PAGE_SIZE);
+               if (IS_ERR(frd->frd_mr)) {
+                       rc = PTR_ERR(frd->frd_mr);
+                       CERROR("Failed to allocate ib_fast_reg_mr: %d\n", rc);
+                       goto out_middle;
+               }
+
+               frd->frd_valid = true;
+
+               list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list);
+               fpo->fast_reg.fpo_pool_size++;
+       }
+
+       return 0;
+
+out_middle:
+       if (frd->frd_mr)
+               ib_dereg_mr(frd->frd_mr);
+       if (frd->frd_frpl)
+               ib_free_fast_reg_page_list(frd->frd_frpl);
+       LIBCFS_FREE(frd, sizeof(*frd));
+
+out:
+       list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list,
+                                frd_list) {
+               list_del(&frd->frd_list);
+               ib_free_fast_reg_page_list(frd->frd_frpl);
+               ib_dereg_mr(frd->frd_mr);
+               LIBCFS_FREE(frd, sizeof(*frd));
+       }
+
+       return rc;
+}
+
+static int
+kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t **pp_fpo)
+{
+       struct ib_device_attr *dev_attr;
+       kib_dev_t *dev = fps->fps_net->ibn_dev;
+       kib_fmr_pool_t *fpo;
        int rc;
 
        int rc;
 
-       LIBCFS_CPT_ALLOC(fpo, lnet_cpt_table(), fps->fps_cpt, sizeof(*fpo));
-       if (fpo == NULL)
+       dev_attr = kmalloc(sizeof(*dev_attr), GFP_KERNEL);
+       if (!dev_attr)
                return -ENOMEM;
 
                return -ENOMEM;
 
+       LIBCFS_CPT_ALLOC(fpo, lnet_cpt_table(), fps->fps_cpt, sizeof(*fpo));
+       if (!fpo) {
+               rc = -ENOMEM;
+               goto out_dev_attr;
+       }
+
        fpo->fpo_hdev = kiblnd_current_hdev(dev);
 
        fpo->fpo_hdev = kiblnd_current_hdev(dev);
 
-       fpo->fpo_fmr_pool = ib_create_fmr_pool(fpo->fpo_hdev->ibh_pd, &param);
-       if (IS_ERR(fpo->fpo_fmr_pool)) {
-               rc = PTR_ERR(fpo->fpo_fmr_pool);
-               CERROR("Failed to create FMR pool: %d\n", rc);
+       rc = ib_query_device(fpo->fpo_hdev->ibh_ibdev, dev_attr);
+       if (rc) {
+               CERROR("Query device failed for %s: %d\n",
+                       fpo->fpo_hdev->ibh_ibdev->name, rc);
+               goto out_dev_attr;
+       }
 
 
-                kiblnd_hdev_decref(fpo->fpo_hdev);
-                LIBCFS_FREE(fpo, sizeof(kib_fmr_pool_t));
-                return rc;
-        }
+       /* Check for FMR or FastReg support */
+       fpo->fpo_is_fmr = 0;
+       if (fpo->fpo_hdev->ibh_ibdev->alloc_fmr &&
+           fpo->fpo_hdev->ibh_ibdev->dealloc_fmr &&
+           fpo->fpo_hdev->ibh_ibdev->map_phys_fmr &&
+           fpo->fpo_hdev->ibh_ibdev->unmap_fmr) {
+               LCONSOLE_INFO("Using FMR for registration\n");
+               fpo->fpo_is_fmr = 1;
+       } else if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+               LCONSOLE_INFO("Using FastReg for registration\n");
+       } else {
+               rc = -ENOSYS;
+               LCONSOLE_ERROR_MSG(rc, "IB device does not support FMRs nor FastRegs, can't register memory\n");
+               goto out_dev_attr;
+       }
 
 
-        fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE);
-        fpo->fpo_owner    = fps;
-        *pp_fpo = fpo;
+       if (fpo->fpo_is_fmr)
+               rc = kiblnd_alloc_fmr_pool(fps, fpo);
+       else
+               rc = kiblnd_alloc_freg_pool(fps, fpo);
+       if (rc)
+               goto out_fpo;
 
 
-        return 0;
+       kfree(dev_attr);
+       fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE);
+       fpo->fpo_owner    = fps;
+       *pp_fpo = fpo;
+
+       return 0;
+
+out_fpo:
+       kiblnd_hdev_decref(fpo->fpo_hdev);
+       LIBCFS_FREE(fpo, sizeof(*fpo));
+
+out_dev_attr:
+       kfree(dev_attr);
+
+       return rc;
 }
 
 static void
 }
 
 static void
@@ -1540,21 +1670,38 @@ kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status)
 {
        struct list_head   zombies = LIST_HEAD_INIT(zombies);
        kib_fmr_pool_t    *fpo = fmr->fmr_pool;
 {
        struct list_head   zombies = LIST_HEAD_INIT(zombies);
        kib_fmr_pool_t    *fpo = fmr->fmr_pool;
-       kib_fmr_poolset_t *fps = fpo->fpo_owner;
+       kib_fmr_poolset_t *fps;
        cfs_time_t         now = cfs_time_current();
        kib_fmr_pool_t    *tmp;
        int                rc;
 
        cfs_time_t         now = cfs_time_current();
        kib_fmr_pool_t    *tmp;
        int                rc;
 
-       rc = ib_fmr_pool_unmap(fmr->fmr_pfmr);
-       LASSERT(rc == 0);
+       if (!fpo)
+               return;
 
 
-       if (status != 0) {
-               rc = ib_flush_fmr_pool(fpo->fpo_fmr_pool);
-               LASSERT(rc == 0);
-       }
+       fps = fpo->fpo_owner;
+       if (fpo->fpo_is_fmr) {
+               if (fmr->fmr_pfmr) {
+                       rc = ib_fmr_pool_unmap(fmr->fmr_pfmr);
+                       LASSERT(!rc);
+                       fmr->fmr_pfmr = NULL;
+               }
 
 
+               if (status) {
+                       rc = ib_flush_fmr_pool(fpo->fmr.fpo_fmr_pool);
+                       LASSERT(!rc);
+               }
+       } else {
+               struct kib_fast_reg_descriptor *frd = fmr->fmr_frd;
+
+               if (frd) {
+                       frd->frd_valid = false;
+                       spin_lock(&fps->fps_lock);
+                       list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list);
+                       spin_unlock(&fps->fps_lock);
+                       fmr->fmr_frd = NULL;
+               }
+       }
        fmr->fmr_pool = NULL;
        fmr->fmr_pool = NULL;
-       fmr->fmr_pfmr = NULL;
 
        spin_lock(&fps->fps_lock);
        fpo->fpo_map_count--;   /* decref the pool */
 
        spin_lock(&fps->fps_lock);
        fpo->fpo_map_count--;   /* decref the pool */
@@ -1577,12 +1724,11 @@ kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status)
 
 int
 kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages,
 
 int
 kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages,
-                    __u64 iov, kib_fmr_t *fmr)
+                   __u32 nob, __u64 iov, bool is_rx, kib_fmr_t *fmr)
 {
 {
-        struct ib_pool_fmr *pfmr;
-        kib_fmr_pool_t     *fpo;
-        __u64               version;
-        int                 rc;
+       kib_fmr_pool_t *fpo;
+       __u64 version;
+       int rc;
 
 again:
        spin_lock(&fps->fps_lock);
 
 again:
        spin_lock(&fps->fps_lock);
@@ -1590,21 +1736,88 @@ again:
        list_for_each_entry(fpo, &fps->fps_pool_list, fpo_list) {
                fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE);
                fpo->fpo_map_count++;
        list_for_each_entry(fpo, &fps->fps_pool_list, fpo_list) {
                fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE);
                fpo->fpo_map_count++;
-               spin_unlock(&fps->fps_lock);
 
 
-                pfmr = ib_fmr_pool_map_phys(fpo->fpo_fmr_pool,
-                                            pages, npages, iov);
-                if (likely(!IS_ERR(pfmr))) {
-                        fmr->fmr_pool = fpo;
-                        fmr->fmr_pfmr = pfmr;
-                        return 0;
-                }
+               if (fpo->fpo_is_fmr) {
+                       struct ib_pool_fmr *pfmr;
+
+                       spin_unlock(&fps->fps_lock);
+                       pfmr = ib_fmr_pool_map_phys(fpo->fmr.fpo_fmr_pool,
+                                                   pages, npages, iov);
+                       if (likely(!IS_ERR(pfmr))) {
+                               fmr->fmr_key  = is_rx ? pfmr->fmr->rkey
+                                                     : pfmr->fmr->lkey;
+                               fmr->fmr_frd  = NULL;
+                               fmr->fmr_pfmr = pfmr;
+                               fmr->fmr_pool = fpo;
+                               return 0;
+                       }
+                       rc = PTR_ERR(pfmr);
+               } else {
+                       if (!list_empty(&fpo->fast_reg.fpo_pool_list)) {
+                               struct ib_send_wr *wr;
+                               struct kib_fast_reg_descriptor *frd;
+                               struct ib_fast_reg_page_list *frpl;
+                               struct ib_mr *mr;
+
+                               frd = list_first_entry(&fpo->fast_reg.fpo_pool_list,
+                                                       struct kib_fast_reg_descriptor,
+                                                       frd_list);
+                               list_del(&frd->frd_list);
+                               spin_unlock(&fps->fps_lock);
+
+                               frpl = frd->frd_frpl;
+                               mr   = frd->frd_mr;
+
+                               if (!frd->frd_valid) {
+                                       struct ib_send_wr *inv_wr;
+                                       __u32 key = is_rx ? mr->rkey : mr->lkey;
+
+                                       inv_wr = &frd->frd_inv_wr;
+                                       memset(inv_wr, 0, sizeof(*inv_wr));
+                                       inv_wr->opcode = IB_WR_LOCAL_INV;
+                                       inv_wr->wr_id = IBLND_WID_MR;
+                                       inv_wr->ex.invalidate_rkey = key;
+
+                                       /* Bump the key */
+                                       key = ib_inc_rkey(key);
+                                       ib_update_fast_reg_key(mr, key);
+                               }
+
+                               LASSERT(npages <= frpl->max_page_list_len);
+                               memcpy(frpl->page_list, pages,
+                                       sizeof(*pages) * npages);
+
+                               /* Prepare FastReg WR */
+                               wr = &frd->frd_fastreg_wr;
+                               memset(wr, 0, sizeof(*wr));
+                               wr->opcode = IB_WR_FAST_REG_MR;
+                               wr->wr_id = IBLND_WID_MR;
+                               wr->wr.fast_reg.iova_start = iov;
+                               wr->wr.fast_reg.page_list  = frpl;
+                               wr->wr.fast_reg.page_list_len = npages;
+                               wr->wr.fast_reg.page_shift = PAGE_SHIFT;
+                               wr->wr.fast_reg.length = nob;
+                               wr->wr.fast_reg.rkey = is_rx ? mr->rkey
+                                                            : mr->lkey;
+                               wr->wr.fast_reg.access_flags =
+                                               (IB_ACCESS_LOCAL_WRITE |
+                                                IB_ACCESS_REMOTE_WRITE);
+
+                               fmr->fmr_key  = is_rx ? mr->rkey : mr->lkey;
+                               fmr->fmr_frd  = frd;
+                               fmr->fmr_pfmr = NULL;
+                               fmr->fmr_pool = fpo;
+                               return 0;
+                       }
+                       spin_unlock(&fps->fps_lock);
+                       rc = -EBUSY;
+               }
 
                spin_lock(&fps->fps_lock);
                fpo->fpo_map_count--;
 
                spin_lock(&fps->fps_lock);
                fpo->fpo_map_count--;
-               if (PTR_ERR(pfmr) != -EAGAIN) {
+               if (rc != -EAGAIN) {
                        spin_unlock(&fps->fps_lock);
                        spin_unlock(&fps->fps_lock);
-                       return PTR_ERR(pfmr);
+                       return rc;
                }
 
                /* EAGAIN and ... */
                }
 
                /* EAGAIN and ... */
index b3d9332..162742b 100644 (file)
@@ -337,20 +337,40 @@ typedef struct
        cfs_time_t              fps_next_retry;
 } kib_fmr_poolset_t;
 
        cfs_time_t              fps_next_retry;
 } kib_fmr_poolset_t;
 
+struct kib_fast_reg_descriptor { /* For fast registration */
+       struct list_head                 frd_list;
+       struct ib_send_wr                frd_inv_wr;
+       struct ib_send_wr                frd_fastreg_wr;
+       struct ib_mr                    *frd_mr;
+       struct ib_fast_reg_page_list    *frd_frpl;
+       bool                             frd_valid;
+};
+
 typedef struct
 {
        struct list_head        fpo_list;       /* chain on pool list */
        struct kib_hca_dev     *fpo_hdev;       /* device for this pool */
        kib_fmr_poolset_t      *fpo_owner;      /* owner of this pool */
 typedef struct
 {
        struct list_head        fpo_list;       /* chain on pool list */
        struct kib_hca_dev     *fpo_hdev;       /* device for this pool */
        kib_fmr_poolset_t      *fpo_owner;      /* owner of this pool */
-       struct ib_fmr_pool     *fpo_fmr_pool;   /* IB FMR pool */
+       union {
+               struct {
+                       struct ib_fmr_pool *fpo_fmr_pool; /* IB FMR pool */
+               } fmr;
+               struct { /* For fast registration */
+                       struct list_head  fpo_pool_list;
+                       int               fpo_pool_size;
+               } fast_reg;
+       };
        cfs_time_t              fpo_deadline;   /* deadline of this pool */
        int                     fpo_failed;     /* fmr pool is failed */
        int                     fpo_map_count;  /* # of mapped FMR */
        cfs_time_t              fpo_deadline;   /* deadline of this pool */
        int                     fpo_failed;     /* fmr pool is failed */
        int                     fpo_map_count;  /* # of mapped FMR */
+       int                     fpo_is_fmr;
 } kib_fmr_pool_t;
 
 typedef struct {
 } kib_fmr_pool_t;
 
 typedef struct {
-        struct ib_pool_fmr     *fmr_pfmr;               /* IB pool fmr */
-        kib_fmr_pool_t         *fmr_pool;               /* pool of FMR */
+       kib_fmr_pool_t                  *fmr_pool;      /* pool of FMR */
+       struct ib_pool_fmr              *fmr_pfmr;      /* IB pool fmr */
+       struct kib_fast_reg_descriptor  *fmr_frd;
+       u32                              fmr_key;
 } kib_fmr_t;
 
 typedef struct kib_net
 } kib_fmr_t;
 
 typedef struct kib_net
@@ -755,6 +775,19 @@ typedef struct kib_peer
        __u16                   ibp_queue_depth;
 } kib_peer_t;
 
        __u16                   ibp_queue_depth;
 } kib_peer_t;
 
+#ifndef HAVE_IB_INC_RKEY
+/**
+ * ib_inc_rkey - increments the key portion of the given rkey. Can be used
+ * for calculating a new rkey for type 2 memory windows.
+ * @rkey - the rkey to increment.
+ */
+static inline u32 ib_inc_rkey(u32 rkey)
+{
+       const u32 mask = 0x000000ff;
+       return ((rkey + 1) & mask) | (rkey & ~mask);
+}
+#endif
+
 extern kib_data_t      kiblnd_data;
 
 extern void kiblnd_hdev_destroy(kib_hca_dev_t *hdev);
 extern kib_data_t      kiblnd_data;
 
 extern void kiblnd_hdev_destroy(kib_hca_dev_t *hdev);
@@ -939,11 +972,12 @@ kiblnd_queue2str(kib_conn_t *conn, struct list_head *q)
 /* CAVEAT EMPTOR: We rely on descriptor alignment to allow us to use the
  * lowest bits of the work request id to stash the work item type. */
 
 /* CAVEAT EMPTOR: We rely on descriptor alignment to allow us to use the
  * lowest bits of the work request id to stash the work item type. */
 
-#define IBLND_WID_INVAL 0
-#define IBLND_WID_TX    1
-#define IBLND_WID_RX    2
-#define IBLND_WID_RDMA  3
-#define IBLND_WID_MASK  3UL
+#define IBLND_WID_INVAL        0
+#define IBLND_WID_TX   1
+#define IBLND_WID_RX   2
+#define IBLND_WID_RDMA 3
+#define IBLND_WID_MR   4
+#define IBLND_WID_MASK 7UL
 
 static inline __u64
 kiblnd_ptr2wreqid (void *ptr, int type)
 
 static inline __u64
 kiblnd_ptr2wreqid (void *ptr, int type)
@@ -1099,8 +1133,8 @@ void kiblnd_unmap_rx_descs(kib_conn_t *conn);
 void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node);
 struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps);
 
 void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node);
 struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps);
 
-int  kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages,
-                         int npages, __u64 iov, kib_fmr_t *fmr);
+int  kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages,
+                        __u32 nob, __u64 iov, bool is_rx, kib_fmr_t *fmr);
 void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status);
 
 int  kiblnd_tunables_init(void);
 void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status);
 
 int  kiblnd_tunables_init(void);
index 6887c07..5ba9d24 100644 (file)
@@ -562,7 +562,7 @@ kiblnd_kvaddr_to_page (unsigned long vaddr)
 }
 
 static int
 }
 
 static int
-kiblnd_fmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, int nob)
+kiblnd_fmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, __u32 nob)
 {
        kib_hca_dev_t           *hdev;
        __u64                   *pages = tx->tx_pages;
 {
        kib_hca_dev_t           *hdev;
        __u64                   *pages = tx->tx_pages;
@@ -589,16 +589,16 @@ kiblnd_fmr_map_tx(kib_net_t *net, kib_tx_t *tx, kib_rdma_desc_t *rd, int nob)
        cpt = tx->tx_pool->tpo_pool.po_owner->ps_cpt;
 
        fps = net->ibn_fmr_ps[cpt];
        cpt = tx->tx_pool->tpo_pool.po_owner->ps_cpt;
 
        fps = net->ibn_fmr_ps[cpt];
-       rc = kiblnd_fmr_pool_map(fps, pages, npages, 0, &tx->fmr);
-        if (rc != 0) {
-                CERROR ("Can't map %d pages: %d\n", npages, rc);
-                return rc;
-        }
+       rc = kiblnd_fmr_pool_map(fps, pages, npages, nob, 0, (rd != tx->tx_rd),
+                                &tx->fmr);
+       if (rc != 0) {
+               CERROR("Can't map %d pages: %d\n", npages, rc);
+               return rc;
+       }
 
        /* If rd is not tx_rd, it's going to get sent to a peer, who will need
         * the rkey */
 
        /* If rd is not tx_rd, it's going to get sent to a peer, who will need
         * the rkey */
-       rd->rd_key = (rd != tx->tx_rd) ? tx->fmr.fmr_pfmr->fmr->rkey :
-                                        tx->fmr.fmr_pfmr->fmr->lkey;
+       rd->rd_key = tx->fmr.fmr_key;
        rd->rd_frags[0].rf_addr &= ~hdev->ibh_page_mask;
        rd->rd_frags[0].rf_nob   = nob;
        rd->rd_nfrags = 1;
        rd->rd_frags[0].rf_addr &= ~hdev->ibh_page_mask;
        rd->rd_frags[0].rf_nob   = nob;
        rd->rd_nfrags = 1;
@@ -613,10 +613,8 @@ kiblnd_unmap_tx(lnet_ni_t *ni, kib_tx_t *tx)
 
        LASSERT(net != NULL);
 
 
        LASSERT(net != NULL);
 
-       if (net->ibn_fmr_ps != NULL && tx->fmr.fmr_pfmr != NULL) {
+       if (net->ibn_fmr_ps != NULL)
                kiblnd_fmr_pool_unmap(&tx->fmr, tx->tx_status);
                kiblnd_fmr_pool_unmap(&tx->fmr, tx->tx_status);
-               tx->fmr.fmr_pfmr = NULL;
-       }
 
         if (tx->tx_nfrags != 0) {
                 kiblnd_dma_unmap_sg(tx->tx_pool->tpo_hdev->ibh_ibdev,
 
         if (tx->tx_nfrags != 0) {
                 kiblnd_dma_unmap_sg(tx->tx_pool->tpo_hdev->ibh_ibdev,
@@ -631,8 +629,8 @@ kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, int nfrags)
        kib_hca_dev_t *hdev  = tx->tx_pool->tpo_hdev;
        kib_net_t     *net   = ni->ni_data;
        struct ib_mr  *mr    = NULL;
        kib_hca_dev_t *hdev  = tx->tx_pool->tpo_hdev;
        kib_net_t     *net   = ni->ni_data;
        struct ib_mr  *mr    = NULL;
-       __u32          nob;
-       int            i;
+       __u32 nob;
+       int i;
 
         /* If rd is not tx_rd, it's going to get sent to a peer and I'm the
          * RDMA sink */
 
         /* If rd is not tx_rd, it's going to get sent to a peer and I'm the
          * RDMA sink */
@@ -847,14 +845,26 @@ __must_hold(&conn->ibc_lock)
                 /* close_conn will launch failover */
                 rc = -ENETDOWN;
         } else {
                 /* close_conn will launch failover */
                 rc = -ENETDOWN;
         } else {
-               struct ib_send_wr *wrq = &tx->tx_wrq[tx->tx_nwrq - 1];
+               struct kib_fast_reg_descriptor *frd = tx->fmr.fmr_frd;
+               struct ib_send_wr *bad = &tx->tx_wrq[tx->tx_nwrq - 1];
+               struct ib_send_wr *wrq = tx->tx_wrq;
+
+               if (frd != NULL) {
+                       if (!frd->frd_valid) {
+                               wrq = &frd->frd_inv_wr;
+                               wrq->next = &frd->frd_fastreg_wr;
+                       } else {
+                               wrq = &frd->frd_fastreg_wr;
+                       }
+                       frd->frd_fastreg_wr.next = tx->tx_wrq;
+               }
 
 
-               LASSERTF(wrq->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX),
+               LASSERTF(bad->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX),
                         "bad wr_id "LPX64", opc %d, flags %d, peer: %s\n",
                         "bad wr_id "LPX64", opc %d, flags %d, peer: %s\n",
-                        wrq->wr_id, wrq->opcode, wrq->send_flags,
+                        bad->wr_id, bad->opcode, bad->send_flags,
                         libcfs_nid2str(conn->ibc_peer->ibp_nid));
                         libcfs_nid2str(conn->ibc_peer->ibp_nid));
-               wrq = NULL;
-               rc = ib_post_send(conn->ibc_cmid->qp, tx->tx_wrq, &wrq);
+               bad = NULL;
+               rc = ib_post_send(conn->ibc_cmid->qp, wrq, &bad);
        }
 
         conn->ibc_last_send = jiffies;
        }
 
         conn->ibc_last_send = jiffies;
@@ -3398,9 +3408,15 @@ kiblnd_qp_event(struct ib_event *event, void *arg)
 static void
 kiblnd_complete (struct ib_wc *wc)
 {
 static void
 kiblnd_complete (struct ib_wc *wc)
 {
-        switch (kiblnd_wreqid2type(wc->wr_id)) {
-        default:
-                LBUG();
+       switch (kiblnd_wreqid2type(wc->wr_id)) {
+       default:
+               LBUG();
+
+       case IBLND_WID_MR:
+               if (wc->status != IB_WC_SUCCESS &&
+                   wc->status != IB_WC_WR_FLUSH_ERR)
+                       CNETERR("FastReg failed: %d\n", wc->status);
+               return;
 
         case IBLND_WID_RDMA:
                 /* We only get RDMA completion notification if it fails.  All
 
         case IBLND_WID_RDMA:
                 /* We only get RDMA completion notification if it fails.  All