From: Dmitry Eremin Date: Thu, 2 Mar 2017 18:32:47 +0000 (+0300) Subject: LU-9026 o2iblnd: Adapt to the removal of ib_get_dma_mr() X-Git-Tag: 2.9.55~31 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=refs%2Fchanges%2F77%2F25277%2F10;p=fs%2Flustre-release.git LU-9026 o2iblnd: Adapt to the removal of ib_get_dma_mr() In Linux kernel 4.9-rc1, the function ib_get_dma_mr() was removed and a second parameter was added to ib_alloc_pd(). This patch fixes this breakage by: - Detect if ib_get_dma_mr() has been removed, if so, do the following: - Make it so the module parameter map_on_demand can no longer be zero (we have to configure FMR/FastReg pools; it can no longer be off). - No longer try to use the global DMA memory region, but make use of the FMR/FastReg pool for all RDMA Tx operations. - Everywhere we are using the device DMA mr to derive the L-key for non-registered memory regions, use the pd->local_dma_lkey value instead. - Make the default map_on_demand = 256. This will allow nodes with this patch to still connected to older nodes without this patch and FMR/FastReg turned off. When FMR/FastReg is turned off, we use 256 as the max frags so the two sides will still be able to communicate and work. Signed-off-by: Doug Oucharek Change-Id: Iab967d1fdff760c2b06aed395152772fe71bc26d Signed-off-by: Dmitry Eremin Reviewed-on: https://review.whamcloud.com/25277 Reviewed-by: James Simmons Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lnet/autoconf/lustre-lnet.m4 b/lnet/autoconf/lustre-lnet.m4 index a1fdb5e..32167d5 100644 --- a/lnet/autoconf/lustre-lnet.m4 +++ b/lnet/autoconf/lustre-lnet.m4 @@ -427,6 +427,27 @@ AS_IF([test $ENABLEO2IB != "no"], [ [ib_alloc_fast_reg_mr is defined]) ]) + # 4.9 must stop using ib_get_dma_mr and the global MR + # We then have to use FMR/Fastreg for all RDMA. + LB_CHECK_COMPILE([if 'ib_get_dma_mr' exists], + ib_get_dma_mr, [ + #ifdef HAVE_COMPAT_RDMA + #undef PACKAGE_NAME + #undef PACKAGE_TARNAME + #undef PACKAGE_VERSION + #undef PACKAGE_STRING + #undef PACKAGE_BUGREPORT + #undef PACKAGE_URL + #include + #endif + #include + ],[ + ib_get_dma_mr(NULL, 0); + ],[ + AC_DEFINE(HAVE_IB_GET_DMA_MR, 1, + [ib_get_dma_mr is defined]) + ]) + # In v4.4 Linux kernel, # commit e622f2f4ad2142d2a613a57fb85f8cf737935ef5 # split up struct ib_send_wr so that all non-trivial verbs diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index 164af12..a5e7543 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -1388,6 +1388,7 @@ kiblnd_map_tx_pool(kib_tx_pool_t *tpo) } } +#ifdef HAVE_IB_GET_DMA_MR struct ib_mr * kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd, int negotiated_nfrags) @@ -1409,6 +1410,7 @@ kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd, return hdev->ibh_mrs; } +#endif static void kiblnd_destroy_fmr_pool(kib_fmr_pool_t *fpo) @@ -2398,13 +2400,16 @@ kiblnd_net_init_pools(kib_net_t *net, struct lnet_ni *ni, __u32 *cpts, int ncpts) { struct lnet_ioctl_config_o2iblnd_tunables *tunables; +#ifdef HAVE_IB_GET_DMA_MR unsigned long flags; +#endif int cpt; int rc; int i; tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib; +#ifdef HAVE_IB_GET_DMA_MR read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); if (tunables->lnd_map_on_demand == 0) { read_unlock_irqrestore(&kiblnd_data.kib_global_lock, @@ -2413,6 +2418,7 @@ kiblnd_net_init_pools(kib_net_t *net, struct lnet_ni *ni, __u32 *cpts, } read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); +#endif if (tunables->lnd_fmr_pool_size < *kiblnd_tunables.kib_ntx / 4) { CERROR("Can't set fmr pool size (%d) < ntx / 4(%d)\n", @@ -2451,7 +2457,9 @@ kiblnd_net_init_pools(kib_net_t *net, struct lnet_ni *ni, __u32 *cpts, if (i > 0) LASSERT(i == ncpts); +#ifdef HAVE_IB_GET_DMA_MR create_tx_pool: +#endif net->ibn_tx_ps = cfs_percpt_alloc(lnet_cpt_table(), sizeof(kib_tx_poolset_t)); if (net->ibn_tx_ps == NULL) { @@ -2526,6 +2534,7 @@ kiblnd_hdev_get_attr(kib_hca_dev_t *hdev) return -EINVAL; } +#ifdef HAVE_IB_GET_DMA_MR static void kiblnd_hdev_cleanup_mrs(kib_hca_dev_t *hdev) { @@ -2536,11 +2545,14 @@ kiblnd_hdev_cleanup_mrs(kib_hca_dev_t *hdev) hdev->ibh_mrs = NULL; } +#endif void kiblnd_hdev_destroy(kib_hca_dev_t *hdev) { +#ifdef HAVE_IB_GET_DMA_MR kiblnd_hdev_cleanup_mrs(hdev); +#endif if (hdev->ibh_pd != NULL) ib_dealloc_pd(hdev->ibh_pd); @@ -2551,6 +2563,7 @@ kiblnd_hdev_destroy(kib_hca_dev_t *hdev) LIBCFS_FREE(hdev, sizeof(*hdev)); } +#ifdef HAVE_IB_GET_DMA_MR static int kiblnd_hdev_setup_mrs(kib_hca_dev_t *hdev) { @@ -2574,6 +2587,7 @@ kiblnd_hdev_setup_mrs(kib_hca_dev_t *hdev) return 0; } +#endif static int kiblnd_dummy_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) @@ -2710,12 +2724,16 @@ kiblnd_dev_failover(kib_dev_t *dev) hdev->ibh_cmid = cmid; hdev->ibh_ibdev = cmid->device; - pd = ib_alloc_pd(cmid->device); - if (IS_ERR(pd)) { - rc = PTR_ERR(pd); - CERROR("Can't allocate PD: %d\n", rc); - goto out; - } +#ifdef HAVE_IB_GET_DMA_MR + pd = ib_alloc_pd(cmid->device); +#else + pd = ib_alloc_pd(cmid->device, 0); +#endif + if (IS_ERR(pd)) { + rc = PTR_ERR(pd); + CERROR("Can't allocate PD: %d\n", rc); + goto out; + } hdev->ibh_pd = pd; @@ -2725,11 +2743,19 @@ kiblnd_dev_failover(kib_dev_t *dev) goto out; } - rc = kiblnd_hdev_setup_mrs(hdev); - if (rc != 0) { - CERROR("Can't setup device: %d\n", rc); - goto out; - } +#ifdef HAVE_IB_GET_DMA_MR + rc = kiblnd_hdev_setup_mrs(hdev); + if (rc != 0) { + CERROR("Can't setup device: %d\n", rc); + goto out; + } +#else + rc = kiblnd_hdev_get_attr(hdev); + if (rc != 0) { + CERROR("Can't get device attributes: %d\n", rc); + goto out; + } +#endif write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); diff --git a/lnet/klnds/o2iblnd/o2iblnd.h b/lnet/klnds/o2iblnd/o2iblnd.h index 298a355..e0409b5 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.h +++ b/lnet/klnds/o2iblnd/o2iblnd.h @@ -203,7 +203,9 @@ typedef struct kib_hca_dev __u64 ibh_page_mask; /* page mask of current HCA */ int ibh_mr_shift; /* bits shift of max MR size */ __u64 ibh_mr_size; /* size of MR */ +#ifdef HAVE_IB_GET_DMA_MR struct ib_mr *ibh_mrs; /* global MR */ +#endif struct ib_pd *ibh_pd; /* PD */ kib_dev_t *ibh_dev; /* owner */ atomic_t ibh_ref; /* refcount */ @@ -1158,8 +1160,10 @@ static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev, #define KIBLND_CONN_PARAM(e) ((e)->param.conn.private_data) #define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len) +#ifdef HAVE_IB_GET_DMA_MR struct ib_mr *kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd, int negotiated_nfrags); +#endif void kiblnd_map_rx_descs(kib_conn_t *conn); void kiblnd_unmap_rx_descs(kib_conn_t *conn); void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node); diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index 309f8c3..603013d 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -153,7 +153,9 @@ kiblnd_post_rx (kib_rx_t *rx, int credit) kib_conn_t *conn = rx->rx_conn; kib_net_t *net = conn->ibc_peer->ibp_ni->ni_data; struct ib_recv_wr *bad_wrq = NULL; +#ifdef HAVE_IB_GET_DMA_MR struct ib_mr *mr = conn->ibc_hdev->ibh_mrs; +#endif int rc; LASSERT (net != NULL); @@ -161,9 +163,13 @@ kiblnd_post_rx (kib_rx_t *rx, int credit) LASSERT (credit == IBLND_POSTRX_NO_CREDIT || credit == IBLND_POSTRX_PEER_CREDIT || credit == IBLND_POSTRX_RSRVD_CREDIT); +#ifdef HAVE_IB_GET_DMA_MR LASSERT(mr != NULL); - rx->rx_sge.lkey = mr->lkey; + rx->rx_sge.lkey = mr->lkey; +#else + rx->rx_sge.lkey = conn->ibc_hdev->ibh_pd->local_dma_lkey; +#endif rx->rx_sge.addr = rx->rx_msgaddr; rx->rx_sge.length = IBLND_MSG_SIZE; @@ -605,7 +611,9 @@ kiblnd_map_tx(struct lnet_ni *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, int nfrags) { kib_net_t *net = ni->ni_data; kib_hca_dev_t *hdev = net->ibn_dev->ibd_hdev; +#ifdef HAVE_IB_GET_DMA_MR struct ib_mr *mr = NULL; +#endif __u32 nob; int i; @@ -625,6 +633,7 @@ kiblnd_map_tx(struct lnet_ni *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, int nfrags) nob += rd->rd_frags[i].rf_nob; } +#ifdef HAVE_IB_GET_DMA_MR mr = kiblnd_find_rd_dma_mr(ni, rd, (tx->tx_conn != NULL) ? tx->tx_conn->ibc_max_frags : -1); @@ -633,6 +642,7 @@ kiblnd_map_tx(struct lnet_ni *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, int nfrags) rd->rd_key = (rd != tx->tx_rd) ? mr->rkey : mr->lkey; return 0; } +#endif if (net->ibn_fmr_ps != NULL) return kiblnd_fmr_map_tx(net, tx, rd, nob); @@ -1017,18 +1027,26 @@ kiblnd_init_tx_msg(struct lnet_ni *ni, kib_tx_t *tx, int type, int body_nob) struct ib_sge *sge = &tx->tx_sge[tx->tx_nwrq]; struct ib_rdma_wr *wrq; int nob = offsetof(kib_msg_t, ibm_u) + body_nob; +#ifdef HAVE_IB_GET_DMA_MR struct ib_mr *mr = hdev->ibh_mrs; +#endif LASSERT(tx->tx_nwrq >= 0); LASSERT(tx->tx_nwrq < IBLND_MAX_RDMA_FRAGS + 1); LASSERT(nob <= IBLND_MSG_SIZE); +#ifdef HAVE_IB_GET_DMA_MR LASSERT(mr != NULL); +#endif - kiblnd_init_msg(tx->tx_msg, type, body_nob); + kiblnd_init_msg(tx->tx_msg, type, body_nob); - sge->lkey = mr->lkey; - sge->addr = tx->tx_msgaddr; - sge->length = nob; +#ifdef HAVE_IB_GET_DMA_MR + sge->lkey = mr->lkey; +#else + sge->lkey = hdev->ibh_pd->local_dma_lkey; +#endif + sge->addr = tx->tx_msgaddr; + sge->length = nob; wrq = &tx->tx_wrq[tx->tx_nwrq]; memset(wrq, 0, sizeof(*wrq)); diff --git a/lnet/klnds/o2iblnd/o2iblnd_modparams.c b/lnet/klnds/o2iblnd/o2iblnd_modparams.c index f2195cb..4911431 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_modparams.c +++ b/lnet/klnds/o2iblnd/o2iblnd_modparams.c @@ -104,7 +104,14 @@ static int concurrent_sends; module_param(concurrent_sends, int, 0444); MODULE_PARM_DESC(concurrent_sends, "send work-queue sizing"); -static int map_on_demand; +#ifdef HAVE_IB_GET_DMA_MR +#define IBLND_DEFAULT_MAP_ON_DEMAND 0 +#define IBLND_MIN_MAP_ON_DEMAND 0 +#else +#define IBLND_DEFAULT_MAP_ON_DEMAND IBLND_MAX_RDMA_FRAGS +#define IBLND_MIN_MAP_ON_DEMAND 1 +#endif +static int map_on_demand = IBLND_DEFAULT_MAP_ON_DEMAND; module_param(map_on_demand, int, 0444); MODULE_PARM_DESC(map_on_demand, "map on demand"); @@ -228,10 +235,13 @@ kiblnd_tunables_setup(struct lnet_ni *ni) if (tunables->lnd_peercredits_hiw >= net_tunables->lct_peer_tx_credits) tunables->lnd_peercredits_hiw = net_tunables->lct_peer_tx_credits - 1; - if (tunables->lnd_map_on_demand < 0 || + if (tunables->lnd_map_on_demand < IBLND_MIN_MAP_ON_DEMAND || tunables->lnd_map_on_demand > IBLND_MAX_RDMA_FRAGS) { - /* disable map-on-demand */ - tunables->lnd_map_on_demand = 0; + /* Use the default */ + CWARN("Invalid map_on_demand (%d), expects %d - %d. Using default of %d\n", + tunables->lnd_map_on_demand, IBLND_MIN_MAP_ON_DEMAND, + IBLND_MAX_RDMA_FRAGS, IBLND_DEFAULT_MAP_ON_DEMAND); + tunables->lnd_map_on_demand = IBLND_DEFAULT_MAP_ON_DEMAND; } if (tunables->lnd_map_on_demand == 1) {