From 9f772b0a47fd786606f05bb1643af99efeb4b9c3 Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Thu, 16 Sep 2021 12:12:38 -0500 Subject: [PATCH] LU-15092 o2iblnd: Fix logic for unaligned transfer It's possible for there to be an offset for the first page of a transfer. However, there are two bugs with this code in o2iblnd. The first is that this use-case will require LNET_MAX_IOV + 1 local RDMA fragments, but we do not specify the correct corresponding values for the max page list to ib_alloc_fast_reg_page_list(), ib_alloc_fast_reg_mr(), etc. The second issue is that the logic in kiblnd_setup_rd_kiov() attempts to obtain one more scatterlist entry than is actually needed. This causes the transfer to fail with -EFAULT. Lustre-change: https://review.whamcloud.com/45216 Lustre-commit: 23a2c92f203ff2f39bcc083e6b6220968c17b475 Test-Parameters: trivial HPE-bug-id: LUS-10407 Fixes: d226464aca ("LU-8057 ko2iblnd: Replace sg++ with sg = sg_next(sg)") Signed-off-by: Chris Horn Change-Id: Ifb843f11ae34a99b7d8f93d94966e3dfa1ce90e5 Reviewed-on: https://review.whamcloud.com/46474 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Cyril Bordage Reviewed-by: Andreas Dilger --- lnet/klnds/o2iblnd/o2iblnd.c | 22 +++++++++++----------- lnet/klnds/o2iblnd/o2iblnd.h | 6 ++++-- lnet/klnds/o2iblnd/o2iblnd_cb.c | 19 +++++++++++-------- 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index 013da49..7586c95 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -1478,7 +1478,7 @@ static int kiblnd_alloc_fmr_pool(struct kib_fmr_poolset *fps, struct kib_fmr_pool *fpo) { struct ib_fmr_pool_param param = { - .max_pages_per_fmr = LNET_MAX_IOV, + .max_pages_per_fmr = IBLND_MAX_RDMA_FRAGS, .page_shift = PAGE_SHIFT, .access = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE), @@ -1529,7 +1529,7 @@ static int kiblnd_alloc_freg_pool(struct kib_fmr_poolset *fps, #ifndef HAVE_IB_MAP_MR_SG frd->frd_frpl = ib_alloc_fast_reg_page_list(fpo->fpo_hdev->ibh_ibdev, - LNET_MAX_IOV); + IBLND_MAX_RDMA_FRAGS); if (IS_ERR(frd->frd_frpl)) { rc = PTR_ERR(frd->frd_frpl); CERROR("Failed to allocate ib_fast_reg_page_list: %d\n", @@ -1541,7 +1541,7 @@ static int kiblnd_alloc_freg_pool(struct kib_fmr_poolset *fps, #ifdef HAVE_IB_ALLOC_FAST_REG_MR frd->frd_mr = ib_alloc_fast_reg_mr(fpo->fpo_hdev->ibh_pd, - LNET_MAX_IOV); + IBLND_MAX_RDMA_FRAGS); #else /* * it is expected to get here if this is an MLX-5 card. @@ -1559,7 +1559,7 @@ static int kiblnd_alloc_freg_pool(struct kib_fmr_poolset *fps, #else IB_MR_TYPE_MEM_REG, #endif - LNET_MAX_IOV); + IBLND_MAX_RDMA_FRAGS); if ((*kiblnd_tunables.kib_use_fastreg_gaps == 1) && (dev_caps & IBLND_DEV_CAPS_FASTREG_GAPS_SUPPORT)) CWARN("using IB_MR_TYPE_SG_GAPS, expect a performance drop\n"); @@ -2234,13 +2234,13 @@ kiblnd_destroy_tx_pool(struct kib_pool *pool) CFS_FREE_PTR_ARRAY(tx->tx_pages, LNET_MAX_IOV); if (tx->tx_frags != NULL) CFS_FREE_PTR_ARRAY(tx->tx_frags, - (1 + IBLND_MAX_RDMA_FRAGS)); + IBLND_MAX_RDMA_FRAGS); if (tx->tx_wrq != NULL) CFS_FREE_PTR_ARRAY(tx->tx_wrq, - (1 + IBLND_MAX_RDMA_FRAGS)); + IBLND_MAX_RDMA_FRAGS); if (tx->tx_sge != NULL) CFS_FREE_PTR_ARRAY(tx->tx_sge, - (1 + IBLND_MAX_RDMA_FRAGS) * + IBLND_MAX_RDMA_FRAGS * wrq_sge); if (tx->tx_rd != NULL) LIBCFS_FREE(tx->tx_rd, @@ -2315,21 +2315,21 @@ kiblnd_create_tx_pool(struct kib_poolset *ps, int size, struct kib_pool **pp_po) } LIBCFS_CPT_ALLOC(tx->tx_frags, lnet_cpt_table(), ps->ps_cpt, - (1 + IBLND_MAX_RDMA_FRAGS) * + IBLND_MAX_RDMA_FRAGS * sizeof(*tx->tx_frags)); if (tx->tx_frags == NULL) break; - sg_init_table(tx->tx_frags, IBLND_MAX_RDMA_FRAGS + 1); + sg_init_table(tx->tx_frags, IBLND_MAX_RDMA_FRAGS); LIBCFS_CPT_ALLOC(tx->tx_wrq, lnet_cpt_table(), ps->ps_cpt, - (1 + IBLND_MAX_RDMA_FRAGS) * + IBLND_MAX_RDMA_FRAGS * sizeof(*tx->tx_wrq)); if (tx->tx_wrq == NULL) break; LIBCFS_CPT_ALLOC(tx->tx_sge, lnet_cpt_table(), ps->ps_cpt, - (1 + IBLND_MAX_RDMA_FRAGS) * wrq_sge * + IBLND_MAX_RDMA_FRAGS * wrq_sge * sizeof(*tx->tx_sge)); if (tx->tx_sge == NULL) break; diff --git a/lnet/klnds/o2iblnd/o2iblnd.h b/lnet/klnds/o2iblnd/o2iblnd.h index 1540a04..9b185d7 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.h +++ b/lnet/klnds/o2iblnd/o2iblnd.h @@ -147,8 +147,10 @@ extern struct kib_tunables kiblnd_tunables; #define IBLND_OOB_CAPABLE(v) ((v) != IBLND_MSG_VERSION_1) #define IBLND_OOB_MSGS(v) (IBLND_OOB_CAPABLE(v) ? 2 : 0) -#define IBLND_MSG_SIZE (4<<10) /* max size of queued messages (inc hdr) */ -#define IBLND_MAX_RDMA_FRAGS LNET_MAX_IOV /* max # of fragments supported */ +/* max size of queued messages (inc hdr) */ +#define IBLND_MSG_SIZE (4<<10) +/* max # of fragments supported. + 1 for unaligned case */ +#define IBLND_MAX_RDMA_FRAGS (LNET_MAX_IOV + 1) /************************/ /* derived constants... */ diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index 1ddd45d..adafb25 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -758,8 +758,9 @@ static int kiblnd_setup_rd_kiov(struct lnet_ni *ni, struct kib_tx *tx, { struct kib_net *net = ni->ni_data; struct scatterlist *sg; - int fragnob; - int max_nkiov; + int fragnob; + int max_nkiov; + int sg_count = 0; CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob); @@ -780,6 +781,12 @@ static int kiblnd_setup_rd_kiov(struct lnet_ni *ni, struct kib_tx *tx, do { LASSERT(nkiov > 0); + if (!sg) { + CERROR("lacking enough sg entries to map tx\n"); + return -EFAULT; + } + sg_count++; + fragnob = min((int)(kiov->bv_len - offset), nob); /* @@ -799,10 +806,6 @@ static int kiblnd_setup_rd_kiov(struct lnet_ni *ni, struct kib_tx *tx, sg_set_page(sg, kiov->bv_page, fragnob, kiov->bv_offset + offset); sg = sg_next(sg); - if (!sg) { - CERROR("lacking enough sg entries to map tx\n"); - return -EFAULT; - } offset = 0; kiov++; @@ -810,7 +813,7 @@ static int kiblnd_setup_rd_kiov(struct lnet_ni *ni, struct kib_tx *tx, nob -= fragnob; } while (nob > 0); - return kiblnd_map_tx(ni, tx, rd, sg - tx->tx_frags); + return kiblnd_map_tx(ni, tx, rd, sg_count); } static int @@ -1110,7 +1113,7 @@ kiblnd_init_tx_msg(struct lnet_ni *ni, struct kib_tx *tx, int type, #endif LASSERT(tx->tx_nwrq >= 0); - LASSERT(tx->tx_nwrq < IBLND_MAX_RDMA_FRAGS + 1); + LASSERT(tx->tx_nwrq <= IBLND_MAX_RDMA_FRAGS); LASSERT(nob <= IBLND_MSG_SIZE); #ifdef HAVE_IB_GET_DMA_MR LASSERT(mr != NULL); -- 1.8.3.1