Whamcloud - gitweb
LU-9026 o2iblnd: Adapt to the removal of ib_get_dma_mr() 77/25277/10
authorDmitry Eremin <dmitry.eremin@intel.com>
Thu, 2 Mar 2017 18:32:47 +0000 (21:32 +0300)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 23 Mar 2017 01:42:12 +0000 (01:42 +0000)
In Linux kernel 4.9-rc1, the function ib_get_dma_mr()
was removed and a second parameter was added to ib_alloc_pd().

This patch fixes this breakage by:

- Detect if ib_get_dma_mr() has been removed, if so, do the
  following:
- Make it so the module parameter map_on_demand can no longer be
  zero (we have to configure FMR/FastReg pools; it can no longer be
  off).
- No longer try to use the global DMA memory region, but make use
  of the FMR/FastReg pool for all RDMA Tx operations.
- Everywhere we are using the device DMA mr to derive the
  L-key for non-registered memory regions, use the
  pd->local_dma_lkey value instead.
- Make the default map_on_demand = 256.  This will allow nodes with
  this patch to still connected to older nodes without this patch
  and FMR/FastReg turned off.  When FMR/FastReg is turned off, we
  use 256 as the max frags so the two sides will still be able to
  communicate and work.

Signed-off-by: Doug Oucharek <doug.s.oucharek@intel.com>
Change-Id: Iab967d1fdff760c2b06aed395152772fe71bc26d
Signed-off-by: Dmitry Eremin <dmitry.eremin@intel.com>
Reviewed-on: https://review.whamcloud.com/25277
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lnet/autoconf/lustre-lnet.m4
lnet/klnds/o2iblnd/o2iblnd.c
lnet/klnds/o2iblnd/o2iblnd.h
lnet/klnds/o2iblnd/o2iblnd_cb.c
lnet/klnds/o2iblnd/o2iblnd_modparams.c

index a1fdb5e..32167d5 100644 (file)
@@ -427,6 +427,27 @@ AS_IF([test $ENABLEO2IB != "no"], [
                        [ib_alloc_fast_reg_mr is defined])
        ])
 
                        [ib_alloc_fast_reg_mr is defined])
        ])
 
+       # 4.9 must stop using ib_get_dma_mr and the global MR
+       # We then have to use FMR/Fastreg for all RDMA.
+       LB_CHECK_COMPILE([if 'ib_get_dma_mr' exists],
+       ib_get_dma_mr, [
+               #ifdef HAVE_COMPAT_RDMA
+               #undef PACKAGE_NAME
+               #undef PACKAGE_TARNAME
+               #undef PACKAGE_VERSION
+               #undef PACKAGE_STRING
+               #undef PACKAGE_BUGREPORT
+               #undef PACKAGE_URL
+               #include <linux/compat-2.6.h>
+               #endif
+               #include <rdma/ib_verbs.h>
+       ],[
+               ib_get_dma_mr(NULL, 0);
+       ],[
+               AC_DEFINE(HAVE_IB_GET_DMA_MR, 1,
+                       [ib_get_dma_mr is defined])
+       ])
+
        # In v4.4 Linux kernel,
        # commit e622f2f4ad2142d2a613a57fb85f8cf737935ef5
        # split up struct ib_send_wr so that all non-trivial verbs
        # In v4.4 Linux kernel,
        # commit e622f2f4ad2142d2a613a57fb85f8cf737935ef5
        # split up struct ib_send_wr so that all non-trivial verbs
index 164af12..a5e7543 100644 (file)
@@ -1388,6 +1388,7 @@ kiblnd_map_tx_pool(kib_tx_pool_t *tpo)
        }
 }
 
        }
 }
 
+#ifdef HAVE_IB_GET_DMA_MR
 struct ib_mr *
 kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd,
                      int negotiated_nfrags)
 struct ib_mr *
 kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd,
                      int negotiated_nfrags)
@@ -1409,6 +1410,7 @@ kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd,
 
        return hdev->ibh_mrs;
 }
 
        return hdev->ibh_mrs;
 }
+#endif
 
 static void
 kiblnd_destroy_fmr_pool(kib_fmr_pool_t *fpo)
 
 static void
 kiblnd_destroy_fmr_pool(kib_fmr_pool_t *fpo)
@@ -2398,13 +2400,16 @@ kiblnd_net_init_pools(kib_net_t *net, struct lnet_ni *ni, __u32 *cpts,
                      int ncpts)
 {
        struct lnet_ioctl_config_o2iblnd_tunables *tunables;
                      int ncpts)
 {
        struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+#ifdef HAVE_IB_GET_DMA_MR
        unsigned long   flags;
        unsigned long   flags;
+#endif
        int             cpt;
        int             rc;
        int             i;
 
        tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
 
        int             cpt;
        int             rc;
        int             i;
 
        tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
 
+#ifdef HAVE_IB_GET_DMA_MR
        read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
        if (tunables->lnd_map_on_demand == 0) {
                read_unlock_irqrestore(&kiblnd_data.kib_global_lock,
        read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
        if (tunables->lnd_map_on_demand == 0) {
                read_unlock_irqrestore(&kiblnd_data.kib_global_lock,
@@ -2413,6 +2418,7 @@ kiblnd_net_init_pools(kib_net_t *net, struct lnet_ni *ni, __u32 *cpts,
        }
 
        read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
        }
 
        read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+#endif
 
        if (tunables->lnd_fmr_pool_size < *kiblnd_tunables.kib_ntx / 4) {
                CERROR("Can't set fmr pool size (%d) < ntx / 4(%d)\n",
 
        if (tunables->lnd_fmr_pool_size < *kiblnd_tunables.kib_ntx / 4) {
                CERROR("Can't set fmr pool size (%d) < ntx / 4(%d)\n",
@@ -2451,7 +2457,9 @@ kiblnd_net_init_pools(kib_net_t *net, struct lnet_ni *ni, __u32 *cpts,
        if (i > 0)
                LASSERT(i == ncpts);
 
        if (i > 0)
                LASSERT(i == ncpts);
 
+#ifdef HAVE_IB_GET_DMA_MR
  create_tx_pool:
  create_tx_pool:
+#endif
        net->ibn_tx_ps = cfs_percpt_alloc(lnet_cpt_table(),
                                          sizeof(kib_tx_poolset_t));
        if (net->ibn_tx_ps == NULL) {
        net->ibn_tx_ps = cfs_percpt_alloc(lnet_cpt_table(),
                                          sizeof(kib_tx_poolset_t));
        if (net->ibn_tx_ps == NULL) {
@@ -2526,6 +2534,7 @@ kiblnd_hdev_get_attr(kib_hca_dev_t *hdev)
         return -EINVAL;
 }
 
         return -EINVAL;
 }
 
+#ifdef HAVE_IB_GET_DMA_MR
 static void
 kiblnd_hdev_cleanup_mrs(kib_hca_dev_t *hdev)
 {
 static void
 kiblnd_hdev_cleanup_mrs(kib_hca_dev_t *hdev)
 {
@@ -2536,11 +2545,14 @@ kiblnd_hdev_cleanup_mrs(kib_hca_dev_t *hdev)
 
        hdev->ibh_mrs = NULL;
 }
 
        hdev->ibh_mrs = NULL;
 }
+#endif
 
 void
 kiblnd_hdev_destroy(kib_hca_dev_t *hdev)
 {
 
 void
 kiblnd_hdev_destroy(kib_hca_dev_t *hdev)
 {
+#ifdef HAVE_IB_GET_DMA_MR
         kiblnd_hdev_cleanup_mrs(hdev);
         kiblnd_hdev_cleanup_mrs(hdev);
+#endif
 
         if (hdev->ibh_pd != NULL)
                 ib_dealloc_pd(hdev->ibh_pd);
 
         if (hdev->ibh_pd != NULL)
                 ib_dealloc_pd(hdev->ibh_pd);
@@ -2551,6 +2563,7 @@ kiblnd_hdev_destroy(kib_hca_dev_t *hdev)
         LIBCFS_FREE(hdev, sizeof(*hdev));
 }
 
         LIBCFS_FREE(hdev, sizeof(*hdev));
 }
 
+#ifdef HAVE_IB_GET_DMA_MR
 static int
 kiblnd_hdev_setup_mrs(kib_hca_dev_t *hdev)
 {
 static int
 kiblnd_hdev_setup_mrs(kib_hca_dev_t *hdev)
 {
@@ -2574,6 +2587,7 @@ kiblnd_hdev_setup_mrs(kib_hca_dev_t *hdev)
 
        return 0;
 }
 
        return 0;
 }
+#endif
 
 static int
 kiblnd_dummy_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
 
 static int
 kiblnd_dummy_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
@@ -2710,12 +2724,16 @@ kiblnd_dev_failover(kib_dev_t *dev)
         hdev->ibh_cmid  = cmid;
         hdev->ibh_ibdev = cmid->device;
 
         hdev->ibh_cmid  = cmid;
         hdev->ibh_ibdev = cmid->device;
 
-        pd = ib_alloc_pd(cmid->device);
-        if (IS_ERR(pd)) {
-                rc = PTR_ERR(pd);
-                CERROR("Can't allocate PD: %d\n", rc);
-                goto out;
-        }
+#ifdef HAVE_IB_GET_DMA_MR
+       pd = ib_alloc_pd(cmid->device);
+#else
+       pd = ib_alloc_pd(cmid->device, 0);
+#endif
+       if (IS_ERR(pd)) {
+               rc = PTR_ERR(pd);
+               CERROR("Can't allocate PD: %d\n", rc);
+               goto out;
+       }
 
         hdev->ibh_pd = pd;
 
 
         hdev->ibh_pd = pd;
 
@@ -2725,11 +2743,19 @@ kiblnd_dev_failover(kib_dev_t *dev)
                 goto out;
         }
 
                 goto out;
         }
 
-        rc = kiblnd_hdev_setup_mrs(hdev);
-        if (rc != 0) {
-                CERROR("Can't setup device: %d\n", rc);
-                goto out;
-        }
+#ifdef HAVE_IB_GET_DMA_MR
+       rc = kiblnd_hdev_setup_mrs(hdev);
+       if (rc != 0) {
+               CERROR("Can't setup device: %d\n", rc);
+               goto out;
+       }
+#else
+       rc = kiblnd_hdev_get_attr(hdev);
+       if (rc != 0) {
+               CERROR("Can't get device attributes: %d\n", rc);
+               goto out;
+       }
+#endif
 
        write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
 
 
        write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
 
index 298a355..e0409b5 100644 (file)
@@ -203,7 +203,9 @@ typedef struct kib_hca_dev
        __u64                ibh_page_mask;     /* page mask of current HCA */
        int                  ibh_mr_shift;      /* bits shift of max MR size */
        __u64                ibh_mr_size;       /* size of MR */
        __u64                ibh_page_mask;     /* page mask of current HCA */
        int                  ibh_mr_shift;      /* bits shift of max MR size */
        __u64                ibh_mr_size;       /* size of MR */
+#ifdef HAVE_IB_GET_DMA_MR
        struct ib_mr        *ibh_mrs;           /* global MR */
        struct ib_mr        *ibh_mrs;           /* global MR */
+#endif
        struct ib_pd        *ibh_pd;            /* PD */
        kib_dev_t           *ibh_dev;           /* owner */
        atomic_t             ibh_ref;           /* refcount */
        struct ib_pd        *ibh_pd;            /* PD */
        kib_dev_t           *ibh_dev;           /* owner */
        atomic_t             ibh_ref;           /* refcount */
@@ -1158,8 +1160,10 @@ static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev,
 #define KIBLND_CONN_PARAM(e)            ((e)->param.conn.private_data)
 #define KIBLND_CONN_PARAM_LEN(e)        ((e)->param.conn.private_data_len)
 
 #define KIBLND_CONN_PARAM(e)            ((e)->param.conn.private_data)
 #define KIBLND_CONN_PARAM_LEN(e)        ((e)->param.conn.private_data_len)
 
+#ifdef HAVE_IB_GET_DMA_MR
 struct ib_mr *kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd,
                                    int negotiated_nfrags);
 struct ib_mr *kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd,
                                    int negotiated_nfrags);
+#endif
 void kiblnd_map_rx_descs(kib_conn_t *conn);
 void kiblnd_unmap_rx_descs(kib_conn_t *conn);
 void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node);
 void kiblnd_map_rx_descs(kib_conn_t *conn);
 void kiblnd_unmap_rx_descs(kib_conn_t *conn);
 void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node);
index 309f8c3..603013d 100644 (file)
@@ -153,7 +153,9 @@ kiblnd_post_rx (kib_rx_t *rx, int credit)
        kib_conn_t         *conn = rx->rx_conn;
        kib_net_t          *net = conn->ibc_peer->ibp_ni->ni_data;
        struct ib_recv_wr  *bad_wrq = NULL;
        kib_conn_t         *conn = rx->rx_conn;
        kib_net_t          *net = conn->ibc_peer->ibp_ni->ni_data;
        struct ib_recv_wr  *bad_wrq = NULL;
+#ifdef HAVE_IB_GET_DMA_MR
        struct ib_mr       *mr = conn->ibc_hdev->ibh_mrs;
        struct ib_mr       *mr = conn->ibc_hdev->ibh_mrs;
+#endif
        int                 rc;
 
        LASSERT (net != NULL);
        int                 rc;
 
        LASSERT (net != NULL);
@@ -161,9 +163,13 @@ kiblnd_post_rx (kib_rx_t *rx, int credit)
        LASSERT (credit == IBLND_POSTRX_NO_CREDIT ||
                 credit == IBLND_POSTRX_PEER_CREDIT ||
                 credit == IBLND_POSTRX_RSRVD_CREDIT);
        LASSERT (credit == IBLND_POSTRX_NO_CREDIT ||
                 credit == IBLND_POSTRX_PEER_CREDIT ||
                 credit == IBLND_POSTRX_RSRVD_CREDIT);
+#ifdef HAVE_IB_GET_DMA_MR
        LASSERT(mr != NULL);
 
        LASSERT(mr != NULL);
 
-        rx->rx_sge.lkey   = mr->lkey;
+       rx->rx_sge.lkey   = mr->lkey;
+#else
+       rx->rx_sge.lkey   = conn->ibc_hdev->ibh_pd->local_dma_lkey;
+#endif
         rx->rx_sge.addr   = rx->rx_msgaddr;
         rx->rx_sge.length = IBLND_MSG_SIZE;
 
         rx->rx_sge.addr   = rx->rx_msgaddr;
         rx->rx_sge.length = IBLND_MSG_SIZE;
 
@@ -605,7 +611,9 @@ kiblnd_map_tx(struct lnet_ni *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, int nfrags)
 {
        kib_net_t     *net   = ni->ni_data;
        kib_hca_dev_t *hdev  = net->ibn_dev->ibd_hdev;
 {
        kib_net_t     *net   = ni->ni_data;
        kib_hca_dev_t *hdev  = net->ibn_dev->ibd_hdev;
+#ifdef HAVE_IB_GET_DMA_MR
        struct ib_mr  *mr    = NULL;
        struct ib_mr  *mr    = NULL;
+#endif
        __u32 nob;
        int i;
 
        __u32 nob;
        int i;
 
@@ -625,6 +633,7 @@ kiblnd_map_tx(struct lnet_ni *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, int nfrags)
                 nob += rd->rd_frags[i].rf_nob;
         }
 
                 nob += rd->rd_frags[i].rf_nob;
         }
 
+#ifdef HAVE_IB_GET_DMA_MR
        mr = kiblnd_find_rd_dma_mr(ni, rd,
                                   (tx->tx_conn != NULL) ?
                                   tx->tx_conn->ibc_max_frags : -1);
        mr = kiblnd_find_rd_dma_mr(ni, rd,
                                   (tx->tx_conn != NULL) ?
                                   tx->tx_conn->ibc_max_frags : -1);
@@ -633,6 +642,7 @@ kiblnd_map_tx(struct lnet_ni *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, int nfrags)
                rd->rd_key = (rd != tx->tx_rd) ? mr->rkey : mr->lkey;
                return 0;
        }
                rd->rd_key = (rd != tx->tx_rd) ? mr->rkey : mr->lkey;
                return 0;
        }
+#endif
 
        if (net->ibn_fmr_ps != NULL)
                return kiblnd_fmr_map_tx(net, tx, rd, nob);
 
        if (net->ibn_fmr_ps != NULL)
                return kiblnd_fmr_map_tx(net, tx, rd, nob);
@@ -1017,18 +1027,26 @@ kiblnd_init_tx_msg(struct lnet_ni *ni, kib_tx_t *tx, int type, int body_nob)
        struct ib_sge *sge = &tx->tx_sge[tx->tx_nwrq];
        struct ib_rdma_wr *wrq;
        int nob = offsetof(kib_msg_t, ibm_u) + body_nob;
        struct ib_sge *sge = &tx->tx_sge[tx->tx_nwrq];
        struct ib_rdma_wr *wrq;
        int nob = offsetof(kib_msg_t, ibm_u) + body_nob;
+#ifdef HAVE_IB_GET_DMA_MR
        struct ib_mr *mr = hdev->ibh_mrs;
        struct ib_mr *mr = hdev->ibh_mrs;
+#endif
 
        LASSERT(tx->tx_nwrq >= 0);
        LASSERT(tx->tx_nwrq < IBLND_MAX_RDMA_FRAGS + 1);
        LASSERT(nob <= IBLND_MSG_SIZE);
 
        LASSERT(tx->tx_nwrq >= 0);
        LASSERT(tx->tx_nwrq < IBLND_MAX_RDMA_FRAGS + 1);
        LASSERT(nob <= IBLND_MSG_SIZE);
+#ifdef HAVE_IB_GET_DMA_MR
        LASSERT(mr != NULL);
        LASSERT(mr != NULL);
+#endif
 
 
-        kiblnd_init_msg(tx->tx_msg, type, body_nob);
+       kiblnd_init_msg(tx->tx_msg, type, body_nob);
 
 
-        sge->lkey   = mr->lkey;
-        sge->addr   = tx->tx_msgaddr;
-        sge->length = nob;
+#ifdef HAVE_IB_GET_DMA_MR
+       sge->lkey   = mr->lkey;
+#else
+       sge->lkey   = hdev->ibh_pd->local_dma_lkey;
+#endif
+       sge->addr   = tx->tx_msgaddr;
+       sge->length = nob;
 
        wrq = &tx->tx_wrq[tx->tx_nwrq];
        memset(wrq, 0, sizeof(*wrq));
 
        wrq = &tx->tx_wrq[tx->tx_nwrq];
        memset(wrq, 0, sizeof(*wrq));
index f2195cb..4911431 100644 (file)
@@ -104,7 +104,14 @@ static int concurrent_sends;
 module_param(concurrent_sends, int, 0444);
 MODULE_PARM_DESC(concurrent_sends, "send work-queue sizing");
 
 module_param(concurrent_sends, int, 0444);
 MODULE_PARM_DESC(concurrent_sends, "send work-queue sizing");
 
-static int map_on_demand;
+#ifdef HAVE_IB_GET_DMA_MR
+#define IBLND_DEFAULT_MAP_ON_DEMAND 0
+#define IBLND_MIN_MAP_ON_DEMAND 0
+#else
+#define IBLND_DEFAULT_MAP_ON_DEMAND IBLND_MAX_RDMA_FRAGS
+#define IBLND_MIN_MAP_ON_DEMAND 1
+#endif
+static int map_on_demand = IBLND_DEFAULT_MAP_ON_DEMAND;
 module_param(map_on_demand, int, 0444);
 MODULE_PARM_DESC(map_on_demand, "map on demand");
 
 module_param(map_on_demand, int, 0444);
 MODULE_PARM_DESC(map_on_demand, "map on demand");
 
@@ -228,10 +235,13 @@ kiblnd_tunables_setup(struct lnet_ni *ni)
        if (tunables->lnd_peercredits_hiw >= net_tunables->lct_peer_tx_credits)
                tunables->lnd_peercredits_hiw = net_tunables->lct_peer_tx_credits - 1;
 
        if (tunables->lnd_peercredits_hiw >= net_tunables->lct_peer_tx_credits)
                tunables->lnd_peercredits_hiw = net_tunables->lct_peer_tx_credits - 1;
 
-       if (tunables->lnd_map_on_demand < 0 ||
+       if (tunables->lnd_map_on_demand < IBLND_MIN_MAP_ON_DEMAND ||
            tunables->lnd_map_on_demand > IBLND_MAX_RDMA_FRAGS) {
            tunables->lnd_map_on_demand > IBLND_MAX_RDMA_FRAGS) {
-               /* disable map-on-demand */
-               tunables->lnd_map_on_demand = 0;
+               /* Use the default */
+               CWARN("Invalid map_on_demand (%d), expects %d - %d. Using default of %d\n",
+                     tunables->lnd_map_on_demand, IBLND_MIN_MAP_ON_DEMAND,
+                     IBLND_MAX_RDMA_FRAGS, IBLND_DEFAULT_MAP_ON_DEMAND);
+               tunables->lnd_map_on_demand = IBLND_DEFAULT_MAP_ON_DEMAND;
        }
 
        if (tunables->lnd_map_on_demand == 1) {
        }
 
        if (tunables->lnd_map_on_demand == 1) {