X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;ds=sidebyside;f=lnet%2Fklnds%2Fo2iblnd%2Fo2iblnd.c;h=92254b8a7fc50fd5bbf11b208557aaa16a2b8ae1;hb=ac25785328d31f63bd76a03f9cbb76f7f31f2ab0;hp=d13ace0ed23d020ba7140cc0eb68ec3bd5758ea2;hpb=e5ef85126953f866e155bb4f55cd85da963bde52;p=fs%2Flustre-release.git diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index d13ace0..92254b8 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -337,8 +337,8 @@ kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid) peer->ibp_nid = nid; peer->ibp_error = 0; peer->ibp_last_alive = 0; - peer->ibp_max_frags = IBLND_CFG_RDMA_FRAGS; - peer->ibp_queue_depth = *kiblnd_tunables.kib_peertxcredits; + peer->ibp_max_frags = kiblnd_cfg_rdma_frags(peer->ibp_ni); + peer->ibp_queue_depth = ni->ni_peertxcredits; atomic_set(&peer->ibp_refcount, 1); /* 1 ref for caller */ INIT_LIST_HEAD(&peer->ibp_list); /* not in the peer table yet */ @@ -1386,16 +1386,22 @@ kiblnd_map_tx_pool(kib_tx_pool_t *tpo) } struct ib_mr * -kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev, kib_rdma_desc_t *rd, +kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd, int negotiated_nfrags) { - __u16 nfrags = (negotiated_nfrags != -1) ? - negotiated_nfrags : *kiblnd_tunables.kib_map_on_demand; + kib_net_t *net = ni->ni_data; + kib_hca_dev_t *hdev = net->ibn_dev->ibd_hdev; + struct lnet_ioctl_config_o2iblnd_tunables *tunables; + int mod; + __u16 nfrags; + + tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; + mod = tunables->lnd_map_on_demand; + nfrags = (negotiated_nfrags != -1) ? negotiated_nfrags : mod; LASSERT(hdev->ibh_mrs != NULL); - if (*kiblnd_tunables.kib_map_on_demand > 0 && - nfrags <= rd->rd_nfrags) + if (mod > 0 && nfrags <= rd->rd_nfrags) return NULL; return hdev->ibh_mrs; @@ -1416,7 +1422,9 @@ kiblnd_destroy_fmr_pool(kib_fmr_pool_t *fpo) list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list, frd_list) { list_del(&frd->frd_list); +#ifndef HAVE_IB_MAP_MR_SG ib_free_fast_reg_page_list(frd->frd_frpl); +#endif ib_dereg_mr(frd->frd_mr); LIBCFS_FREE(frd, sizeof(*frd)); i++; @@ -1443,16 +1451,20 @@ kiblnd_destroy_fmr_pool_list(struct list_head *head) } } -static int kiblnd_fmr_pool_size(int ncpts) +static int +kiblnd_fmr_pool_size(struct lnet_ioctl_config_o2iblnd_tunables *tunables, + int ncpts) { - int size = *kiblnd_tunables.kib_fmr_pool_size / ncpts; + int size = tunables->lnd_fmr_pool_size / ncpts; return max(IBLND_FMR_POOL, size); } -static int kiblnd_fmr_flush_trigger(int ncpts) +static int +kiblnd_fmr_flush_trigger(struct lnet_ioctl_config_o2iblnd_tunables *tunables, + int ncpts) { - int size = *kiblnd_tunables.kib_fmr_flush_trigger / ncpts; + int size = tunables->lnd_fmr_flush_trigger / ncpts; return max(IBLND_FMR_POOL_FLUSH, size); } @@ -1468,7 +1480,7 @@ static int kiblnd_alloc_fmr_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo) .dirty_watermark = fps->fps_flush_trigger, .flush_function = NULL, .flush_arg = NULL, - .cache = !!*kiblnd_tunables.kib_fmr_cache}; + .cache = !!fps->fps_cache }; int rc = 0; fpo->fmr.fpo_fmr_pool = ib_create_fmr_pool(fpo->fpo_hdev->ibh_pd, @@ -1501,20 +1513,30 @@ static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo) } frd->frd_mr = NULL; +#ifndef HAVE_IB_MAP_MR_SG frd->frd_frpl = ib_alloc_fast_reg_page_list(fpo->fpo_hdev->ibh_ibdev, LNET_MAX_PAYLOAD/PAGE_SIZE); if (IS_ERR(frd->frd_frpl)) { rc = PTR_ERR(frd->frd_frpl); CERROR("Failed to allocate ib_fast_reg_page_list: %d\n", rc); + frd->frd_frpl = NULL; goto out_middle; } +#endif +#ifdef HAVE_IB_ALLOC_FAST_REG_MR frd->frd_mr = ib_alloc_fast_reg_mr(fpo->fpo_hdev->ibh_pd, LNET_MAX_PAYLOAD/PAGE_SIZE); +#else + frd->frd_mr = ib_alloc_mr(fpo->fpo_hdev->ibh_pd, + IB_MR_TYPE_MEM_REG, + LNET_MAX_PAYLOAD/PAGE_SIZE); +#endif if (IS_ERR(frd->frd_mr)) { rc = PTR_ERR(frd->frd_mr); CERROR("Failed to allocate ib_fast_reg_mr: %d\n", rc); + frd->frd_mr = NULL; goto out_middle; } @@ -1529,15 +1551,19 @@ static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo) out_middle: if (frd->frd_mr) ib_dereg_mr(frd->frd_mr); +#ifndef HAVE_IB_MAP_MR_SG if (frd->frd_frpl) ib_free_fast_reg_page_list(frd->frd_frpl); +#endif LIBCFS_FREE(frd, sizeof(*frd)); out: list_for_each_entry_safe(frd, tmp, &fpo->fast_reg.fpo_pool_list, frd_list) { list_del(&frd->frd_list); +#ifndef HAVE_IB_MAP_MR_SG ib_free_fast_reg_page_list(frd->frd_frpl); +#endif ib_dereg_mr(frd->frd_mr); LIBCFS_FREE(frd, sizeof(*frd)); } @@ -1644,8 +1670,9 @@ kiblnd_fini_fmr_poolset(kib_fmr_poolset_t *fps) } static int -kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, kib_net_t *net, - int pool_size, int flush_trigger) +kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, int ncpts, + kib_net_t *net, + struct lnet_ioctl_config_o2iblnd_tunables *tunables) { kib_fmr_pool_t *fpo; int rc; @@ -1654,8 +1681,11 @@ kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, kib_net_t *net, fps->fps_net = net; fps->fps_cpt = cpt; - fps->fps_pool_size = pool_size; - fps->fps_flush_trigger = flush_trigger; + + fps->fps_pool_size = kiblnd_fmr_pool_size(tunables, ncpts); + fps->fps_flush_trigger = kiblnd_fmr_flush_trigger(tunables, ncpts); + fps->fps_cache = tunables->lnd_fmr_cache; + spin_lock_init(&fps->fps_lock); INIT_LIST_HEAD(&fps->fps_pool_list); INIT_LIST_HEAD(&fps->fps_failed_pool_list); @@ -1677,6 +1707,28 @@ kiblnd_fmr_pool_is_idle(kib_fmr_pool_t *fpo, cfs_time_t now) return cfs_time_aftereq(now, fpo->fpo_deadline); } +static int +kiblnd_map_tx_pages(kib_tx_t *tx, kib_rdma_desc_t *rd) +{ + kib_hca_dev_t *hdev; + __u64 *pages = tx->tx_pages; + int npages; + int size; + int i; + + hdev = tx->tx_pool->tpo_hdev; + + for (i = 0, npages = 0; i < rd->rd_nfrags; i++) { + for (size = 0; size < rd->rd_frags[i].rf_nob; + size += hdev->ibh_page_size) { + pages[npages++] = (rd->rd_frags[i].rf_addr & + hdev->ibh_page_mask) + size; + } + } + + return npages; +} + void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status) { @@ -1735,11 +1787,15 @@ kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status) } int -kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages, - __u32 nob, __u64 iov, bool is_rx, kib_fmr_t *fmr) +kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, kib_tx_t *tx, kib_rdma_desc_t *rd, + __u32 nob, __u64 iov, kib_fmr_t *fmr) { kib_fmr_pool_t *fpo; + __u64 *pages = tx->tx_pages; __u64 version; + bool is_rx = (rd != tx->tx_rd); + bool tx_pages_mapped = 0; + int npages = 0; int rc; again: @@ -1753,6 +1809,12 @@ again: struct ib_pool_fmr *pfmr; spin_unlock(&fps->fps_lock); + + if (!tx_pages_mapped) { + npages = kiblnd_map_tx_pages(tx, rd); + tx_pages_mapped = 1; + } + pfmr = ib_fmr_pool_map_phys(fpo->fmr.fpo_fmr_pool, pages, npages, iov); if (likely(!IS_ERR(pfmr))) { @@ -1766,9 +1828,14 @@ again: rc = PTR_ERR(pfmr); } else { if (!list_empty(&fpo->fast_reg.fpo_pool_list)) { - struct ib_send_wr *wr; struct kib_fast_reg_descriptor *frd; +#ifdef HAVE_IB_MAP_MR_SG + struct ib_reg_wr *wr; + int n; +#else + struct ib_rdma_wr *wr; struct ib_fast_reg_page_list *frpl; +#endif struct ib_mr *mr; frd = list_first_entry(&fpo->fast_reg.fpo_pool_list, @@ -1777,24 +1844,55 @@ again: list_del(&frd->frd_list); spin_unlock(&fps->fps_lock); +#ifndef HAVE_IB_MAP_MR_SG frpl = frd->frd_frpl; +#endif mr = frd->frd_mr; if (!frd->frd_valid) { - struct ib_send_wr *inv_wr; + struct ib_rdma_wr *inv_wr; __u32 key = is_rx ? mr->rkey : mr->lkey; inv_wr = &frd->frd_inv_wr; memset(inv_wr, 0, sizeof(*inv_wr)); - inv_wr->opcode = IB_WR_LOCAL_INV; - inv_wr->wr_id = IBLND_WID_MR; - inv_wr->ex.invalidate_rkey = key; + + inv_wr->wr.opcode = IB_WR_LOCAL_INV; + inv_wr->wr.wr_id = IBLND_WID_MR; + inv_wr->wr.ex.invalidate_rkey = key; /* Bump the key */ key = ib_inc_rkey(key); ib_update_fast_reg_key(mr, key); } +#ifdef HAVE_IB_MAP_MR_SG + n = ib_map_mr_sg(mr, tx->tx_frags, + tx->tx_nfrags, PAGE_SIZE); + if (unlikely(n != tx->tx_nfrags)) { + CERROR("Failed to map mr %d/%d " + "elements\n", n, tx->tx_nfrags); + return n < 0 ? n : -EINVAL; + } + + mr->iova = iov; + + wr = &frd->frd_fastreg_wr; + memset(wr, 0, sizeof(*wr)); + + wr->wr.opcode = IB_WR_REG_MR; + wr->wr.wr_id = IBLND_WID_MR; + wr->wr.num_sge = 0; + wr->wr.send_flags = 0; + wr->mr = mr; + wr->key = is_rx ? mr->rkey : mr->lkey; + wr->access = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE); +#else + if (!tx_pages_mapped) { + npages = kiblnd_map_tx_pages(tx, rd); + tx_pages_mapped = 1; + } + LASSERT(npages <= frpl->max_page_list_len); memcpy(frpl->page_list, pages, sizeof(*pages) * npages); @@ -1802,18 +1900,21 @@ again: /* Prepare FastReg WR */ wr = &frd->frd_fastreg_wr; memset(wr, 0, sizeof(*wr)); - wr->opcode = IB_WR_FAST_REG_MR; - wr->wr_id = IBLND_WID_MR; - wr->wr.fast_reg.iova_start = iov; - wr->wr.fast_reg.page_list = frpl; - wr->wr.fast_reg.page_list_len = npages; - wr->wr.fast_reg.page_shift = PAGE_SHIFT; - wr->wr.fast_reg.length = nob; - wr->wr.fast_reg.rkey = is_rx ? mr->rkey - : mr->lkey; - wr->wr.fast_reg.access_flags = + + wr->wr.opcode = IB_WR_FAST_REG_MR; + wr->wr.wr_id = IBLND_WID_MR; + + wr->wr.wr.fast_reg.iova_start = iov; + wr->wr.wr.fast_reg.page_list = frpl; + wr->wr.wr.fast_reg.page_list_len = npages; + wr->wr.wr.fast_reg.page_shift = PAGE_SHIFT; + wr->wr.wr.fast_reg.length = nob; + wr->wr.wr.fast_reg.rkey = + is_rx ? mr->rkey : mr->lkey; + wr->wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE); +#endif fmr->fmr_key = is_rx ? mr->rkey : mr->lkey; fmr->fmr_frd = frd; @@ -2271,15 +2372,18 @@ kiblnd_net_fini_pools(kib_net_t *net) } static int -kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts) +kiblnd_net_init_pools(kib_net_t *net, lnet_ni_t *ni, __u32 *cpts, int ncpts) { + struct lnet_ioctl_config_o2iblnd_tunables *tunables; unsigned long flags; int cpt; int rc; int i; + tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; + read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - if (*kiblnd_tunables.kib_map_on_demand == 0) { + if (tunables->lnd_map_on_demand == 0) { read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); goto create_tx_pool; @@ -2287,10 +2391,9 @@ kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts) read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - if (*kiblnd_tunables.kib_fmr_pool_size < - *kiblnd_tunables.kib_ntx / 4) { + if (tunables->lnd_fmr_pool_size < *kiblnd_tunables.kib_ntx / 4) { CERROR("Can't set fmr pool size (%d) < ntx / 4(%d)\n", - *kiblnd_tunables.kib_fmr_pool_size, + tunables->lnd_fmr_pool_size, *kiblnd_tunables.kib_ntx / 4); rc = -EINVAL; goto failed; @@ -2313,9 +2416,8 @@ kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts) for (i = 0; i < ncpts; i++) { cpt = (cpts == NULL) ? i : cpts[i]; - rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, net, - kiblnd_fmr_pool_size(ncpts), - kiblnd_fmr_flush_trigger(ncpts)); + rc = kiblnd_init_fmr_poolset(net->ibn_fmr_ps[cpt], cpt, ncpts, + net, tunables); if (rc != 0) { CERROR("Can't initialize FMR pool for CPT %d: %d\n", cpt, rc); @@ -2677,7 +2779,6 @@ kiblnd_create_dev(char *ifname) if (dev == NULL) return NULL; - memset(dev, 0, sizeof(*dev)); netdev = dev_get_by_name(&init_net, ifname); if (netdev == NULL) { dev->ibd_can_failover = 0; @@ -3068,15 +3169,10 @@ kiblnd_startup (lnet_ni_t *ni) if (net == NULL) goto failed; - memset(net, 0, sizeof(*net)); - do_gettimeofday(&tv); net->ibn_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; - ni->ni_peertimeout = *kiblnd_tunables.kib_peertimeout; - ni->ni_maxtxcredits = *kiblnd_tunables.kib_credits; - ni->ni_peertxcredits = *kiblnd_tunables.kib_peertxcredits; - ni->ni_peerrtrcredits = *kiblnd_tunables.kib_peerrtrcredits; + kiblnd_tunables_setup(ni); if (ni->ni_interfaces[0] != NULL) { /* Use the IPoIB interface specified in 'networks=' */ @@ -3115,7 +3211,7 @@ kiblnd_startup (lnet_ni_t *ni) if (rc != 0) goto failed; - rc = kiblnd_net_init_pools(net, ni->ni_cpts, ni->ni_ncpts); + rc = kiblnd_net_init_pools(net, ni, ni->ni_cpts, ni->ni_ncpts); if (rc != 0) { CERROR("Failed to initialize NI pools: %d\n", rc); goto failed; @@ -3153,7 +3249,6 @@ static lnd_t the_o2iblnd = { static void __exit ko2iblnd_exit(void) { lnet_unregister_lnd(&the_o2iblnd); - kiblnd_tunables_fini(); } static int __init ko2iblnd_init(void)