From 56097c490465cb67a87639192b1fee396acbfd24 Mon Sep 17 00:00:00 2001 From: Frank Sehr Date: Wed, 12 Apr 2023 12:31:33 -0700 Subject: [PATCH] LU-16548 lnet: report actual timeout used by lnd lnd_timeout value reported by lnetctl may be different from what is actually used. There's an lnd_timeout calculated as a function of transaction timeout and retry_count. This is the value displayed by "lnetctl global show". However, each LND may define its own timeout by setting timeout module parameter to a positive value, which overrides the higher-level lnd_timeout defined by LNet. lnetctl net show -v will show the timeout value in the lnd_tunables section. The timeout for socklnd, o2iblnd and gnilnd is implemented. A test for sock, ib and gni is included. Test-Parameters: trivial Signed-off-by: Frank Sehr Change-Id: I85a107ba6f1259c577f74945b89fd695f191d514 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50620 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Neil Brown Reviewed-by: Serguei Smirnov Reviewed-by: Cyril Bordage Reviewed-by: Oleg Drokin --- lnet/include/uapi/linux/lnet/lnet-dlc.h | 8 +++++ lnet/klnds/gnilnd/gnilnd.c | 8 ++--- lnet/klnds/gnilnd/gnilnd.h | 7 ++++ lnet/klnds/gnilnd/gnilnd_modparams.c | 19 +++++++++++ lnet/klnds/o2iblnd/o2iblnd_modparams.c | 2 ++ lnet/klnds/socklnd/socklnd_modparams.c | 2 ++ lnet/utils/lnetconfig/liblnetconfig_lnd.c | 23 ++++++++++++- lustre/tests/sanity-lnet.sh | 56 +++++++++++++++++++++++++++++++ 8 files changed, 118 insertions(+), 7 deletions(-) diff --git a/lnet/include/uapi/linux/lnet/lnet-dlc.h b/lnet/include/uapi/linux/lnet/lnet-dlc.h index 969f0df..47ac566 100644 --- a/lnet/include/uapi/linux/lnet/lnet-dlc.h +++ b/lnet/include/uapi/linux/lnet/lnet-dlc.h @@ -105,6 +105,7 @@ struct lnet_ioctl_config_o2iblnd_tunables { __u32 lnd_fmr_cache; __u16 lnd_conns_per_peer; __u16 lnd_ntx; + __u32 lnd_timeout; }; struct lnet_ioctl_config_kfilnd_tunables { @@ -120,6 +121,12 @@ struct lnet_ioctl_config_socklnd_tunables { __u32 lnd_version; __u16 lnd_conns_per_peer; __u16 lnd_pad; + __u32 lnd_timeout; +}; + +struct lnet_ioctl_config_gnilnd_tunables { + __u32 lnd_version; + __u32 lnd_timeout; }; struct lnet_lnd_tunables { @@ -127,6 +134,7 @@ struct lnet_lnd_tunables { struct lnet_ioctl_config_o2iblnd_tunables lnd_o2ib; struct lnet_ioctl_config_socklnd_tunables lnd_sock; struct lnet_ioctl_config_kfilnd_tunables lnd_kfi; + struct lnet_ioctl_config_gnilnd_tunables lnd_gni; } lnd_tun_u; }; diff --git a/lnet/klnds/gnilnd/gnilnd.c b/lnet/klnds/gnilnd/gnilnd.c index b785b13..62d578a 100644 --- a/lnet/klnds/gnilnd/gnilnd.c +++ b/lnet/klnds/gnilnd/gnilnd.c @@ -2609,12 +2609,8 @@ kgnilnd_startup(struct lnet_ni *ni) INIT_LIST_HEAD(&net->gnn_list); ni->ni_data = net; net->gnn_ni = ni; - if (!ni->ni_net->net_tunables_set) { - ni->ni_net->net_tunables.lct_max_tx_credits = - *kgnilnd_tunables.kgn_credits; - ni->ni_net->net_tunables.lct_peer_tx_credits = - *kgnilnd_tunables.kgn_peer_credits; - } + + kgnilnd_tunables_setup(ni); if (!ni->ni_interface) { rc = lnet_ni_add_interface(ni, "ipogif0"); diff --git a/lnet/klnds/gnilnd/gnilnd.h b/lnet/klnds/gnilnd/gnilnd.h index 7c335e2..bb1ccd7 100644 --- a/lnet/klnds/gnilnd/gnilnd.h +++ b/lnet/klnds/gnilnd/gnilnd.h @@ -898,6 +898,13 @@ extern void kgnilnd_destroy_conn(kgn_conn_t *conn); extern int _kgnilnd_schedule_conn(kgn_conn_t *conn, const char *caller, int line, int refheld, int lock_held); extern int _kgnilnd_schedule_delay_conn(kgn_conn_t *conn); +static inline int kgnilnd_timeout(void) +{ + return *kgnilnd_tunables.kgn_timeout ? + *kgnilnd_tunables.kgn_timeout : + lnet_get_lnd_timeout(); +} + /* Macro wrapper for _kgnilnd_schedule_conn. This will store the function * and the line of the calling function to allow us to debug problematic * schedule calls in the future without the programmer having to mark diff --git a/lnet/klnds/gnilnd/gnilnd_modparams.c b/lnet/klnds/gnilnd/gnilnd_modparams.c index 13db614..46a2b25 100644 --- a/lnet/klnds/gnilnd/gnilnd_modparams.c +++ b/lnet/klnds/gnilnd/gnilnd_modparams.c @@ -310,3 +310,22 @@ kgnilnd_tunables_init(void) out: return rc; } + +void +kgninal_tunables_setup(struct lnet_ni *ni) +{ + struct lnet_ioctl_config_gnilnd_tunables *tunables; + + if (!ni->ni_net->net_tunables_set) { + ni->ni_net->net_tunables.lct_max_tx_credits = + *kgnilnd_tunables.kgn_credits; + ni->ni_net->net_tunables.lct_peer_tx_credits = + *kgnilnd_tunables.kgn_peer_credits; + } + + tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_gni; + + tunables->lnd_version = CURRENT_LND_VERSION; + + tunables->lnd_timeout = kgnilnd_timeout(); +} diff --git a/lnet/klnds/o2iblnd/o2iblnd_modparams.c b/lnet/klnds/o2iblnd/o2iblnd_modparams.c index 9643fcb..cb6b7cc 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_modparams.c +++ b/lnet/klnds/o2iblnd/o2iblnd_modparams.c @@ -316,6 +316,8 @@ kiblnd_tunables_setup(struct lnet_ni *ni) conns_per_peer : 1; } + tunables->lnd_timeout = kiblnd_timeout(); + return 0; } diff --git a/lnet/klnds/socklnd/socklnd_modparams.c b/lnet/klnds/socklnd/socklnd_modparams.c index 68e2bea..bd97f1a 100644 --- a/lnet/klnds/socklnd/socklnd_modparams.c +++ b/lnet/klnds/socklnd/socklnd_modparams.c @@ -377,4 +377,6 @@ void ksocknal_tunables_setup(struct lnet_ni *ni) if (!tunables->lnd_conns_per_peer) tunables->lnd_conns_per_peer = ksocklnd_lookup_conns_per_peer(ni); + + tunables->lnd_timeout = ksocknal_timeout(); } diff --git a/lnet/utils/lnetconfig/liblnetconfig_lnd.c b/lnet/utils/lnetconfig/liblnetconfig_lnd.c index 251e7da..80673d9 100644 --- a/lnet/utils/lnetconfig/liblnetconfig_lnd.c +++ b/lnet/utils/lnetconfig/liblnetconfig_lnd.c @@ -70,6 +70,10 @@ lustre_o2iblnd_show_tun(struct cYAML *lndparams, lnd_cfg->lnd_conns_per_peer) == NULL) return LUSTRE_CFG_RC_OUT_OF_MEM; + if (cYAML_create_number(lndparams, "timeout", + lnd_cfg->lnd_timeout) == NULL) + return LUSTRE_CFG_RC_OUT_OF_MEM; + return LUSTRE_CFG_RC_NO_ERR; } @@ -82,6 +86,10 @@ lustre_socklnd_show_tun(struct cYAML *lndparams, lnd_cfg->lnd_conns_per_peer) == NULL) return LUSTRE_CFG_RC_OUT_OF_MEM; + if (cYAML_create_number(lndparams, "timeout", + lnd_cfg->lnd_timeout) == NULL) + return LUSTRE_CFG_RC_OUT_OF_MEM; + return LUSTRE_CFG_RC_NO_ERR; } @@ -116,6 +124,17 @@ lustre_kfilnd_show_tun(struct cYAML *lndparams, } #endif +static int +lustre_gnilnd_show_tun(struct cYAML *lndparams, + struct lnet_ioctl_config_gnilnd_tunables *lnd_cfg) +{ + if (cYAML_create_number(lndparams, "timeout", + lnd_cfg->lnd_timeout) == NULL) + return LUSTRE_CFG_RC_OUT_OF_MEM; + + return LUSTRE_CFG_RC_NO_ERR; +} + int lustre_net_show_tunables(struct cYAML *tunables, struct lnet_ioctl_config_lnd_cmn_tunables *cmn) @@ -167,7 +186,9 @@ lustre_ni_show_tunables(struct cYAML *lnd_tunables, &lnd->lnd_tun_u.lnd_kfi, backup); #endif - + else if (net_type == GNILND) + rc = lustre_gnilnd_show_tun(lnd_tunables, + &lnd->lnd_tun_u.lnd_gni); return rc; } diff --git a/lustre/tests/sanity-lnet.sh b/lustre/tests/sanity-lnet.sh index d7baa3d..e3659f9 100755 --- a/lustre/tests/sanity-lnet.sh +++ b/lustre/tests/sanity-lnet.sh @@ -3494,6 +3494,62 @@ test_303() { } run_test 303 "Check peer NI health after link down" + +check_parameter() { + local para=$1 + local value=$2 + + echo "check parameter ${para} value ${value}" + + return $(( $(do_lnetctl net show -v | \ + tee /dev/stderr | \ + grep -c "^ \+${para}: ${value}$") != 1 )) +} + +static_config() { + local module=$1 + local setting=$2 + + cleanup_lnet || error "Failed to cleanup LNet" + + load_module ../libcfs/libcfs/libcfs || + error "Failed to load module libcfs rc = $?" + + load_module ../lnet/lnet/lnet || + error "Failed to load module lnet rc = $?" + + echo "loading ${module} ${setting} type ${NETTYPE}" + load_module "${module}" "${setting}" || + error "Failed to load module ${module} rc = $?" + + do_lnetctl lnet configure --all || error "lnet configure failed rc = $?" + + return 0 +} + +test_310() { + local value=65 + + if [[ ${NETTYPE} == tcp* ]];then + static_config "../lnet/klnds/socklnd/ksocklnd" \ + "sock_timeout=${value}" + elif [[ ${NETTYPE} == o2ib* ]]; then + static_config "../lnet/klnds/o2iblnd/ko2iblnd" \ + "timeout=${value}" + elif [[ ${NETTYPE} == gni* ]]; then + static_config "../lnet/klnds/gnilnd/kgnilnd" \ + "timeout=${value}" + else + skip "NETTYPE ${NETTYPE} not supported" + fi + + check_parameter "timeout" $value + + return $? +} +run_test 310 "Set timeout and verify" + + check_udsp_prio() { local target_net="${1}" local target_nid="${2}" -- 1.8.3.1