From: Chris Horn Date: Thu, 16 Sep 2021 17:12:38 +0000 (-0500) Subject: LU-15092 o2iblnd: Fix logic for unaligned transfer X-Git-Tag: 2.14.56~88 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=23a2c92f203ff2f39bcc083e6b6220968c17b475 LU-15092 o2iblnd: Fix logic for unaligned transfer It's possible for there to be an offset for the first page of a transfer. However, there are two bugs with this code in o2iblnd. The first is that this use-case will require LNET_MAX_IOV + 1 local RDMA fragments, but we do not specify the correct corresponding values for the max page list to ib_alloc_fast_reg_page_list(), ib_alloc_fast_reg_mr(), etc. The second issue is that the logic in kiblnd_setup_rd_kiov() attempts to obtain one more scatterlist entry than is actually needed. This causes the transfer to fail with -EFAULT. Test-Parameters: trivial HPE-bug-id: LUS-10407 Fixes: d226464aca ("LU-8057 ko2iblnd: Replace sg++ with sg = sg_next(sg)") Signed-off-by: Chris Horn Change-Id: Ifb843f11ae34a99b7d8f93d94966e3dfa1ce90e5 Reviewed-on: https://review.whamcloud.com/45216 Tested-by: jenkins Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Andriy Skulysh Reviewed-by: Oleg Drokin --- diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index 341bef6..e3a8845 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -1464,7 +1464,7 @@ static int kiblnd_alloc_fmr_pool(struct kib_fmr_poolset *fps, struct kib_fmr_pool *fpo) { struct ib_fmr_pool_param param = { - .max_pages_per_fmr = LNET_MAX_IOV, + .max_pages_per_fmr = IBLND_MAX_RDMA_FRAGS, .page_shift = PAGE_SHIFT, .access = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE), @@ -1515,7 +1515,7 @@ static int kiblnd_alloc_freg_pool(struct kib_fmr_poolset *fps, #ifndef HAVE_IB_MAP_MR_SG frd->frd_frpl = ib_alloc_fast_reg_page_list(fpo->fpo_hdev->ibh_ibdev, - LNET_MAX_IOV); + IBLND_MAX_RDMA_FRAGS); if (IS_ERR(frd->frd_frpl)) { rc = PTR_ERR(frd->frd_frpl); CERROR("Failed to allocate ib_fast_reg_page_list: %d\n", @@ -1527,7 +1527,7 @@ static int kiblnd_alloc_freg_pool(struct kib_fmr_poolset *fps, #ifdef HAVE_IB_ALLOC_FAST_REG_MR frd->frd_mr = ib_alloc_fast_reg_mr(fpo->fpo_hdev->ibh_pd, - LNET_MAX_IOV); + IBLND_MAX_RDMA_FRAGS); #else /* * it is expected to get here if this is an MLX-5 card. @@ -1545,7 +1545,7 @@ static int kiblnd_alloc_freg_pool(struct kib_fmr_poolset *fps, #else IB_MR_TYPE_MEM_REG, #endif - LNET_MAX_IOV); + IBLND_MAX_RDMA_FRAGS); if ((*kiblnd_tunables.kib_use_fastreg_gaps == 1) && (dev_caps & IBLND_DEV_CAPS_FASTREG_GAPS_SUPPORT)) CWARN("using IB_MR_TYPE_SG_GAPS, expect a performance drop\n"); @@ -2223,13 +2223,13 @@ kiblnd_destroy_tx_pool(struct kib_pool *pool) CFS_FREE_PTR_ARRAY(tx->tx_pages, LNET_MAX_IOV); if (tx->tx_frags != NULL) CFS_FREE_PTR_ARRAY(tx->tx_frags, - (1 + IBLND_MAX_RDMA_FRAGS)); + IBLND_MAX_RDMA_FRAGS); if (tx->tx_wrq != NULL) CFS_FREE_PTR_ARRAY(tx->tx_wrq, - (1 + IBLND_MAX_RDMA_FRAGS)); + IBLND_MAX_RDMA_FRAGS); if (tx->tx_sge != NULL) CFS_FREE_PTR_ARRAY(tx->tx_sge, - (1 + IBLND_MAX_RDMA_FRAGS) * + IBLND_MAX_RDMA_FRAGS * wrq_sge); if (tx->tx_rd != NULL) LIBCFS_FREE(tx->tx_rd, @@ -2304,21 +2304,21 @@ kiblnd_create_tx_pool(struct kib_poolset *ps, int size, struct kib_pool **pp_po) } LIBCFS_CPT_ALLOC(tx->tx_frags, lnet_cpt_table(), ps->ps_cpt, - (1 + IBLND_MAX_RDMA_FRAGS) * + IBLND_MAX_RDMA_FRAGS * sizeof(*tx->tx_frags)); if (tx->tx_frags == NULL) break; - sg_init_table(tx->tx_frags, IBLND_MAX_RDMA_FRAGS + 1); + sg_init_table(tx->tx_frags, IBLND_MAX_RDMA_FRAGS); LIBCFS_CPT_ALLOC(tx->tx_wrq, lnet_cpt_table(), ps->ps_cpt, - (1 + IBLND_MAX_RDMA_FRAGS) * + IBLND_MAX_RDMA_FRAGS * sizeof(*tx->tx_wrq)); if (tx->tx_wrq == NULL) break; LIBCFS_CPT_ALLOC(tx->tx_sge, lnet_cpt_table(), ps->ps_cpt, - (1 + IBLND_MAX_RDMA_FRAGS) * wrq_sge * + IBLND_MAX_RDMA_FRAGS * wrq_sge * sizeof(*tx->tx_sge)); if (tx->tx_sge == NULL) break; diff --git a/lnet/klnds/o2iblnd/o2iblnd.h b/lnet/klnds/o2iblnd/o2iblnd.h index 3d3f9f0..f6d2a06 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.h +++ b/lnet/klnds/o2iblnd/o2iblnd.h @@ -149,8 +149,10 @@ extern struct kib_tunables kiblnd_tunables; #define IBLND_OOB_CAPABLE(v) ((v) != IBLND_MSG_VERSION_1) #define IBLND_OOB_MSGS(v) (IBLND_OOB_CAPABLE(v) ? 2 : 0) -#define IBLND_MSG_SIZE (4<<10) /* max size of queued messages (inc hdr) */ -#define IBLND_MAX_RDMA_FRAGS LNET_MAX_IOV /* max # of fragments supported */ +/* max size of queued messages (inc hdr) */ +#define IBLND_MSG_SIZE (4<<10) +/* max # of fragments supported. + 1 for unaligned case */ +#define IBLND_MAX_RDMA_FRAGS (LNET_MAX_IOV + 1) /************************/ /* derived constants... */ diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index 07c2efa..bb5910ec 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -749,8 +749,9 @@ static int kiblnd_setup_rd_kiov(struct lnet_ni *ni, struct kib_tx *tx, { struct kib_net *net = ni->ni_data; struct scatterlist *sg; - int fragnob; - int max_nkiov; + int fragnob; + int max_nkiov; + int sg_count = 0; CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob); @@ -771,6 +772,12 @@ static int kiblnd_setup_rd_kiov(struct lnet_ni *ni, struct kib_tx *tx, do { LASSERT(nkiov > 0); + if (!sg) { + CERROR("lacking enough sg entries to map tx\n"); + return -EFAULT; + } + sg_count++; + fragnob = min((int)(kiov->bv_len - offset), nob); /* @@ -790,10 +797,6 @@ static int kiblnd_setup_rd_kiov(struct lnet_ni *ni, struct kib_tx *tx, sg_set_page(sg, kiov->bv_page, fragnob, kiov->bv_offset + offset); sg = sg_next(sg); - if (!sg) { - CERROR("lacking enough sg entries to map tx\n"); - return -EFAULT; - } offset = 0; kiov++; @@ -801,7 +804,7 @@ static int kiblnd_setup_rd_kiov(struct lnet_ni *ni, struct kib_tx *tx, nob -= fragnob; } while (nob > 0); - return kiblnd_map_tx(ni, tx, rd, sg - tx->tx_frags); + return kiblnd_map_tx(ni, tx, rd, sg_count); } static int @@ -1100,7 +1103,7 @@ kiblnd_init_tx_msg(struct lnet_ni *ni, struct kib_tx *tx, int type, #endif LASSERT(tx->tx_nwrq >= 0); - LASSERT(tx->tx_nwrq < IBLND_MAX_RDMA_FRAGS + 1); + LASSERT(tx->tx_nwrq <= IBLND_MAX_RDMA_FRAGS); LASSERT(nob <= IBLND_MSG_SIZE); #ifdef HAVE_IB_GET_DMA_MR LASSERT(mr != NULL);