From b5166e5fe1409a8467f02d4cfec127aa8be4753c Mon Sep 17 00:00:00 2001 From: Shaun Tancheff Date: Sun, 31 Jan 2021 10:20:54 -0600 Subject: [PATCH] LU-14392 gnilnd: re-enable large I/o buffers DVS on gni breaks the LNet 1M handshake of LNET_MAX_IOV. Introduce GNILND_MAX_IOV with a 4M i/o maximum and a hint LNET_MD_GNILND so LNet can accept the large buffer w/o complaint. Test-Parameters: trivial Signed-off-by: Shaun Tancheff Change-Id: I4e78c0022fdece0d6945bbcc47e2e64d4d181dca Reviewed-on: https://review.whamcloud.com/41373 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Chris Horn Reviewed-by: Alexey Lyashkov Reviewed-by: Oleg Drokin --- lnet/include/uapi/linux/lnet/lnet-types.h | 3 +++ lnet/klnds/gnilnd/gnilnd.c | 4 ++-- lnet/klnds/gnilnd/gnilnd.h | 2 ++ lnet/klnds/gnilnd/gnilnd_cb.c | 8 ++++---- lnet/klnds/gnilnd/gnilnd_proc.c | 14 +++++++------- lnet/lnet/lib-md.c | 3 +++ 6 files changed, 21 insertions(+), 13 deletions(-) diff --git a/lnet/include/uapi/linux/lnet/lnet-types.h b/lnet/include/uapi/linux/lnet/lnet-types.h index bfc4bb3..ca887fd 100644 --- a/lnet/include/uapi/linux/lnet/lnet-types.h +++ b/lnet/include/uapi/linux/lnet/lnet-types.h @@ -393,6 +393,7 @@ struct lnet_md { * regardless of the value of the lnet_response_tracking param. * - LNET_MD_NO_TRACK_RESPONSE: Disable response tracking on this MD * regardless of the value of the lnet_response_tracking param. + * - LNET_MD_GNILND: Disable warning about exceeding LNET_MAX_IOV. * * Note: * - LNET_MD_KIOV allows for a scatter/gather capability for memory @@ -457,6 +458,8 @@ struct lnet_md { #define LNET_MD_TRACK_RESPONSE (1 << 10) /** See struct lnet_md::options. */ #define LNET_MD_NO_TRACK_RESPONSE (1 << 11) +/** See struct lnet_md::options. */ +#define LNET_MD_GNILND (1 << 12) /** Infinite threshold on MD operations. See struct lnet_md::threshold */ #define LNET_MD_THRESH_INF (-1) diff --git a/lnet/klnds/gnilnd/gnilnd.c b/lnet/klnds/gnilnd/gnilnd.c index c6b50cf..857dc20 100644 --- a/lnet/klnds/gnilnd/gnilnd.c +++ b/lnet/klnds/gnilnd/gnilnd.c @@ -2268,7 +2268,7 @@ int kgnilnd_base_startup(void) kgnilnd_data.kgn_tx_phys_cache = kmem_cache_create("kgn_tx_phys", - LNET_MAX_IOV * sizeof(gni_mem_segment_t), + GNILND_MAX_IOV * sizeof(gni_mem_segment_t), 0, 0, NULL); if (kgnilnd_data.kgn_tx_phys_cache == NULL) { CERROR("Can't create slab for kgn_tx_phys\n"); @@ -2294,7 +2294,7 @@ int kgnilnd_base_startup(void) kgnilnd_data.kgn_cksum_npages * sizeof (struct page *)); for (i = 0; i < kgnilnd_data.kgn_cksum_npages; i++) { - kgnilnd_data.kgn_cksum_map_pages[i] = kmalloc(LNET_MAX_IOV * sizeof (struct page *), + kgnilnd_data.kgn_cksum_map_pages[i] = kmalloc(GNILND_MAX_IOV * sizeof (struct page *), GFP_KERNEL); if (kgnilnd_data.kgn_cksum_map_pages[i] == NULL) { CERROR("Can't allocate vmap cksum pages for cpu %d\n", i); diff --git a/lnet/klnds/gnilnd/gnilnd.h b/lnet/klnds/gnilnd/gnilnd.h index c5282fe..f307b79 100644 --- a/lnet/klnds/gnilnd/gnilnd.h +++ b/lnet/klnds/gnilnd/gnilnd.h @@ -148,6 +148,8 @@ static inline time_t cfs_duration_sec(long duration_jiffies) /* need sane upper bound to limit copy overhead */ #define GNILND_MAX_IMMEDIATE (64<<10) +/* allow for 4M transfers over gni. Note 2.5M used by DVS */ +#define GNILND_MAX_IOV 1024 /* Max number of connections to keep in purgatory per peer */ #define GNILND_PURGATORY_MAX 5 diff --git a/lnet/klnds/gnilnd/gnilnd_cb.c b/lnet/klnds/gnilnd/gnilnd_cb.c index 124339b..a012a76 100644 --- a/lnet/klnds/gnilnd/gnilnd_cb.c +++ b/lnet/klnds/gnilnd/gnilnd_cb.c @@ -259,7 +259,7 @@ kgnilnd_free_tx(kgn_tx_t *tx) if (tx->tx_phys != NULL) { kmem_cache_free(kgnilnd_data.kgn_tx_phys_cache, tx->tx_phys); CDEBUG(D_MALLOC, "slab-freed 'tx_phys': %lu at %p.\n", - LNET_MAX_IOV * sizeof(gni_mem_segment_t), tx->tx_phys); + GNILND_MAX_IOV * sizeof(gni_mem_segment_t), tx->tx_phys); } /* Only free the buffer if we used it */ @@ -639,7 +639,7 @@ kgnilnd_setup_phys_buffer(kgn_tx_t *tx, int nkiov, struct bio_vec *kiov, } CDEBUG(D_MALLOC, "slab-alloced 'tx->tx_phys': %lu at %p.\n", - LNET_MAX_IOV * sizeof(gni_mem_segment_t), tx->tx_phys); + GNILND_MAX_IOV * sizeof(gni_mem_segment_t), tx->tx_phys); /* if loops changes, please change kgnilnd_cksum_kiov * and kgnilnd_setup_immediate_buffer */ @@ -686,7 +686,7 @@ kgnilnd_setup_phys_buffer(kgn_tx_t *tx, int nkiov, struct bio_vec *kiov, GOTO(error, rc); } - if ((phys - tx->tx_phys) == LNET_MAX_IOV) { + if ((phys - tx->tx_phys) == GNILND_MAX_IOV) { CERROR ("payload too big (%d)\n", (int)(phys - tx->tx_phys)); rc = -EMSGSIZE; GOTO(error, rc); @@ -2040,7 +2040,7 @@ kgnilnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg) LASSERTF(nob == 0 || niov > 0, "lntmsg %p nob %d niov %d\n", lntmsg, nob, niov); - LASSERTF(niov <= LNET_MAX_IOV, + LASSERTF(niov <= GNILND_MAX_IOV, "lntmsg %p niov %d\n", lntmsg, niov); if (msg_vmflush) diff --git a/lnet/klnds/gnilnd/gnilnd_proc.c b/lnet/klnds/gnilnd/gnilnd_proc.c index 0cdc6aa..901392f 100644 --- a/lnet/klnds/gnilnd/gnilnd_proc.c +++ b/lnet/klnds/gnilnd/gnilnd_proc.c @@ -48,15 +48,15 @@ _kgnilnd_proc_run_cksum_test(int caseno, int nloops, int nob) __u16 cksum, cksum2; __u64 mbytes; - CFS_ALLOC_PTR_ARRAY(src, LNET_MAX_IOV); - CFS_ALLOC_PTR_ARRAY(dest, LNET_MAX_IOV); + CFS_ALLOC_PTR_ARRAY(src, GNILND_MAX_IOV); + CFS_ALLOC_PTR_ARRAY(dest, GNILND_MAX_IOV); if (src == NULL || dest == NULL) { CERROR("couldn't allocate iovs\n"); GOTO(unwind, rc = -ENOMEM); } - for (i = 0; i < LNET_MAX_IOV; i++) { + for (i = 0; i < GNILND_MAX_IOV; i++) { src[i].bv_offset = 0; src[i].bv_len = PAGE_SIZE; src[i].bv_page = alloc_page(GFP_KERNEL | __GFP_ZERO); @@ -78,9 +78,9 @@ _kgnilnd_proc_run_cksum_test(int caseno, int nloops, int nob) /* add extra 2 pages - one for offset of src, 2nd to allow dest offset */ niov = (nob / PAGE_SIZE) + 2; - if (niov > LNET_MAX_IOV) { + if (niov > GNILND_MAX_IOV) { CERROR("bytes %d too large, requires niov %d > %d\n", - nob, niov, LNET_MAX_IOV); + nob, niov, GNILND_MAX_IOV); GOTO(unwind, rc = -E2BIG); } @@ -151,9 +151,9 @@ unwind: } if (src != NULL) - CFS_FREE_PTR_ARRAY(src, LNET_MAX_IOV); + CFS_FREE_PTR_ARRAY(src, GNILND_MAX_IOV); if (dest != NULL) - CFS_FREE_PTR_ARRAY(dest, LNET_MAX_IOV); + CFS_FREE_PTR_ARRAY(dest, GNILND_MAX_IOV); return rc; } diff --git a/lnet/lnet/lib-md.c b/lnet/lnet/lib-md.c index 68da79e..ba318a2 100644 --- a/lnet/lnet/lib-md.c +++ b/lnet/lnet/lib-md.c @@ -251,6 +251,9 @@ lnet_md_build(const struct lnet_md *umd, int unlink) pa += plen; i += 1; } + WARN(!(lmd->md_options & LNET_MD_GNILND) && i > LNET_MAX_IOV, + "Max IOV exceeded: %d should be < %d\n", + i, LNET_MAX_IOV); if ((umd->options & LNET_MD_MAX_SIZE) && /* max size used */ (umd->max_size < 0 || umd->max_size > (int)umd->length)) { /* illegal max_size */ -- 1.8.3.1