From: James Simmons Date: Tue, 20 Mar 2018 20:44:56 +0000 (-0400) Subject: LU-10157 lnet: make LNET_MAX_IOV dependent on page size X-Git-Tag: 2.11.51~20 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=272e49ce2d5d6883e6ca1b00a9322b3a23b2e55a LU-10157 lnet: make LNET_MAX_IOV dependent on page size The default behavior of LNet is to always use 256 pages which is LNET_MAX_IOV and that LNET_MAX_PAYLOAD is always one megabyte. This assumes pages are always 4K in size which is not the case. This cause bulk I/O errors when using platforms like PowerPC or ARM which tend to use 64K pages. This is resolved by first making LNET_MAX_PAYLOAD always one megabyte since this is what the configuring sets it too by default and no one ever changes it. In theory it could set it to as high as 16MB but that will cause the I/O errors since the ptlrpc layer expects the packets to be always 1 megabyte in size. Also it would be better to make the maximum payload a per network setup configurations instead of for everything. Second we make LNET_MAX_IOV equal to LNET_MAX_PAYLOAD divided by the PAGE_SIZE. This way packets will always be the LNET_MAX_PAYLOAD in size but the number of pages used, LNET_MAX_IOV will vary depending on the platform it is creating packets on. Change-Id: Ie1dcdb195e68b44e2fa2d9b24715216d8aca4c65 Signed-off-by: James Simmons Reviewed-on: https://review.whamcloud.com/31559 Tested-by: Jenkins Reviewed-by: Wang Shilong Tested-by: Maloo Reviewed-by: Ruth Klundt Reviewed-by: Oleg Drokin --- diff --git a/lnet/autoconf/lustre-lnet.m4 b/lnet/autoconf/lustre-lnet.m4 index be38840..d3a302e 100644 --- a/lnet/autoconf/lustre-lnet.m4 +++ b/lnet/autoconf/lustre-lnet.m4 @@ -1,26 +1,4 @@ # -# LN_CONFIG_MAX_PAYLOAD -# -# configure maximum payload -# -AC_DEFUN([LN_CONFIG_MAX_PAYLOAD], [ -AC_MSG_CHECKING([for non-default maximum LNET payload]) -AC_ARG_WITH([max-payload-mb], - AC_HELP_STRING([--with-max-payload-mb=MBytes], - [set maximum lnet payload in MBytes]), - [ - AC_MSG_RESULT([$with_max_payload_mb]) - CONFIG_LNET_MAX_PAYLOAD_MB=$with_max_payload_mb - CONFIG_LNET_MAX_PAYLOAD="(($with_max_payload_mb)<<20)" - ], [ - AC_MSG_RESULT([no]) - CONFIG_LNET_MAX_PAYLOAD="LNET_MTU" - ]) -AC_DEFINE_UNQUOTED(CONFIG_LNET_MAX_PAYLOAD, $CONFIG_LNET_MAX_PAYLOAD, - [Max LNET payload]) -]) # LN_CONFIG_MAX_PAYLOAD - -# # LN_CHECK_GCC_VERSION # # Check compiler version @@ -805,7 +783,6 @@ AS_IF([test "$enable_efence" = yes], [ ]) AC_SUBST(LIBEFENCE) -LN_CONFIG_MAX_PAYLOAD LN_CONFIG_DLC ]) # LN_CONFIGURE diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 9538970..e96b544 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -51,16 +51,9 @@ #include /* Max payload size */ -#ifndef CONFIG_LNET_MAX_PAYLOAD -# error "CONFIG_LNET_MAX_PAYLOAD must be defined in config.h" -#endif +#define LNET_MAX_PAYLOAD LNET_MTU -#define LNET_MAX_PAYLOAD CONFIG_LNET_MAX_PAYLOAD -#if (LNET_MAX_PAYLOAD < LNET_MTU) -# error "LNET_MAX_PAYLOAD too small - error in configure --with-max-payload-mb" -#elif (LNET_MAX_PAYLOAD > (PAGE_SIZE * LNET_MAX_IOV)) -# error "LNET_MAX_PAYLOAD too large - error in configure --with-max-payload-mb" -#endif +#define LNET_MAX_IOV (LNET_MAX_PAYLOAD >> PAGE_SHIFT) /* forward refs */ struct lnet_libmd; diff --git a/lnet/include/uapi/linux/lnet/lnet-types.h b/lnet/include/uapi/linux/lnet/lnet-types.h index 4daa0be..f30f284 100644 --- a/lnet/include/uapi/linux/lnet/lnet-types.h +++ b/lnet/include/uapi/linux/lnet/lnet-types.h @@ -515,9 +515,6 @@ struct lnet_md { #define LNET_MTU_BITS 20 #define LNET_MTU (1 << LNET_MTU_BITS) -/** limit on the number of fragments in discontiguous MDs */ -#define LNET_MAX_IOV 256 - /** * Options for the MD structure. See struct lnet_md::options. */ diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index dc7981d..343cd2e 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -1520,7 +1520,7 @@ kiblnd_fmr_flush_trigger(struct lnet_ioctl_config_o2iblnd_tunables *tunables, static int kiblnd_alloc_fmr_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo) { struct ib_fmr_pool_param param = { - .max_pages_per_fmr = LNET_MAX_PAYLOAD/PAGE_SIZE, + .max_pages_per_fmr = LNET_MAX_IOV, .page_shift = PAGE_SHIFT, .access = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE), @@ -1567,7 +1567,7 @@ static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo, #ifndef HAVE_IB_MAP_MR_SG frd->frd_frpl = ib_alloc_fast_reg_page_list(fpo->fpo_hdev->ibh_ibdev, - LNET_MAX_PAYLOAD/PAGE_SIZE); + LNET_MAX_IOV); if (IS_ERR(frd->frd_frpl)) { rc = PTR_ERR(frd->frd_frpl); CERROR("Failed to allocate ib_fast_reg_page_list: %d\n", @@ -1579,7 +1579,7 @@ static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo, #ifdef HAVE_IB_ALLOC_FAST_REG_MR frd->frd_mr = ib_alloc_fast_reg_mr(fpo->fpo_hdev->ibh_pd, - LNET_MAX_PAYLOAD/PAGE_SIZE); + LNET_MAX_IOV); #else /* * it is expected to get here if this is an MLX-5 card. @@ -1597,7 +1597,7 @@ static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo, #else IB_MR_TYPE_MEM_REG, #endif - LNET_MAX_PAYLOAD/PAGE_SIZE); + LNET_MAX_IOV); if ((*kiblnd_tunables.kib_use_fastreg_gaps == 1) && (dev_caps & IBLND_DEV_CAPS_FASTREG_GAPS_SUPPORT)) CWARN("using IB_MR_TYPE_SG_GAPS, expect a performance drop\n"); diff --git a/lnet/utils/lst.c b/lnet/utils/lst.c index 99b0e69..0b6d40a 100644 --- a/lnet/utils/lst.c +++ b/lnet/utils/lst.c @@ -2952,8 +2952,6 @@ lst_get_bulk_param(int argc, char **argv, struct lst_test_bulk_param *bulk) } else if (strcasestr(argv[i], "size=") == argv[i] || strcasestr(argv[i], "s=") == argv[i]) { - int max_size = sysconf(_SC_PAGESIZE) * LNET_MAX_IOV; - tok = strchr(argv[i], '=') + 1; bulk->blk_size = strtol(tok, &end, 0); @@ -2970,7 +2968,7 @@ lst_get_bulk_param(int argc, char **argv, struct lst_test_bulk_param *bulk) else if (*end == 'm' || *end == 'M') bulk->blk_size *= 1024 * 1024; - if (bulk->blk_size > max_size) { + if (bulk->blk_size > LNET_MTU) { fprintf(stderr, "Size exceed limitation: %d bytes\n", bulk->blk_size); return -1; diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index f9daeae..15f294a 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include