From b8ff97611717d6f4cafd26d1e7e418fd745f5297 Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Mon, 22 Jun 2020 10:21:42 -0500 Subject: [PATCH] LU-13735 lnet: Loosen restrictions on LNet Health params The functions that set various LNet Health related parameters require that the parameters be set in a specific order depending on whether health is enabled or disabled. This is not user-friendly. - Don't overwrite lnet_transaction_timeout when health is being enabled or disabled. - Don't overwrite lnet_retry_count when health is being enabled (still set it to zero when health is disabled). - Allow lnet_retry_count to be set to 0 when health is disabled - Correct off-by-one error in transaction_to_set() to ensure lnet_transaction_timeout is greater than lnet_retry_count HPE-bug-id: LUS-8995 Signed-off-by: Chris Horn Change-Id: Ic8ca7862543fc667fdf85844e05146c78bf48cd1 Reviewed-on: https://review.whamcloud.com/39228 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Neil Brown Reviewed-by: Serguei Smirnov Reviewed-by: Oleg Drokin --- lnet/lnet/api-ni.c | 36 ++++++++++-------------------------- 1 file changed, 10 insertions(+), 26 deletions(-) diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index dd606e8..a7b3f8c 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -182,10 +182,8 @@ module_param_call(lnet_drop_asym_route, drop_asym_route_set, param_get_int, MODULE_PARM_DESC(lnet_drop_asym_route, "Set to 1 to drop asymmetrical route messages."); -#define LNET_TRANSACTION_TIMEOUT_NO_HEALTH_DEFAULT 50 -#define LNET_TRANSACTION_TIMEOUT_HEALTH_DEFAULT 50 - -unsigned lnet_transaction_timeout = LNET_TRANSACTION_TIMEOUT_HEALTH_DEFAULT; +#define LNET_TRANSACTION_TIMEOUT_DEFAULT 50 +unsigned int lnet_transaction_timeout = LNET_TRANSACTION_TIMEOUT_DEFAULT; static int transaction_to_set(const char *val, cfs_kernel_param_arg_t *kp); #ifdef HAVE_KERNEL_PARAM_OPS static struct kernel_param_ops param_ops_transaction_timeout = { @@ -203,8 +201,8 @@ module_param_call(lnet_transaction_timeout, transaction_to_set, param_get_int, MODULE_PARM_DESC(lnet_transaction_timeout, "Maximum number of seconds to wait for a peer response."); -#define LNET_RETRY_COUNT_HEALTH_DEFAULT 2 -unsigned lnet_retry_count = LNET_RETRY_COUNT_HEALTH_DEFAULT; +#define LNET_RETRY_COUNT_DEFAULT 2 +unsigned int lnet_retry_count = LNET_RETRY_COUNT_DEFAULT; static int retry_count_set(const char *val, cfs_kernel_param_arg_t *kp); #ifdef HAVE_KERNEL_PARAM_OPS static struct kernel_param_ops param_ops_retry_count = { @@ -241,8 +239,8 @@ module_param_call(lnet_response_tracking, response_tracking_set, param_get_int, MODULE_PARM_DESC(lnet_response_tracking, "(0|1|2|3) LNet Internal Only|GET Reply only|PUT ACK only|Full Tracking (default)"); -#define LNET_LND_TIMEOUT_DEFAULT ((LNET_TRANSACTION_TIMEOUT_HEALTH_DEFAULT - 1) / \ - (LNET_RETRY_COUNT_HEALTH_DEFAULT + 1)) +#define LNET_LND_TIMEOUT_DEFAULT ((LNET_TRANSACTION_TIMEOUT_DEFAULT - 1) / \ + (LNET_RETRY_COUNT_DEFAULT + 1)) unsigned int lnet_lnd_timeout = LNET_LND_TIMEOUT_DEFAULT; static void lnet_set_lnd_timeout(void) { @@ -293,21 +291,7 @@ sensitivity_set(const char *val, cfs_kernel_param_arg_t *kp) return -EINVAL; } - /* - * if we're turning on health then use the health timeout - * defaults. - */ - if (*sensitivity == 0 && value != 0) { - lnet_transaction_timeout = LNET_TRANSACTION_TIMEOUT_HEALTH_DEFAULT; - lnet_retry_count = LNET_RETRY_COUNT_HEALTH_DEFAULT; - lnet_set_lnd_timeout(); - /* - * if we're turning off health then use the no health timeout - * default. - */ - } else if (*sensitivity != 0 && value == 0) { - lnet_transaction_timeout = - LNET_TRANSACTION_TIMEOUT_NO_HEALTH_DEFAULT; + if (*sensitivity != 0 && value == 0 && lnet_retry_count != 0) { lnet_retry_count = 0; lnet_set_lnd_timeout(); } @@ -460,7 +444,7 @@ transaction_to_set(const char *val, cfs_kernel_param_arg_t *kp) */ mutex_lock(&the_lnet.ln_api_mutex); - if (value < lnet_retry_count || value == 0) { + if (value <= lnet_retry_count || value == 0) { mutex_unlock(&the_lnet.ln_api_mutex); CERROR("Invalid value for lnet_transaction_timeout (%lu). " "Has to be greater than lnet_retry_count (%u)\n", @@ -503,9 +487,9 @@ retry_count_set(const char *val, cfs_kernel_param_arg_t *kp) */ mutex_lock(&the_lnet.ln_api_mutex); - if (lnet_health_sensitivity == 0) { + if (lnet_health_sensitivity == 0 && value > 0) { mutex_unlock(&the_lnet.ln_api_mutex); - CERROR("Can not set retry_count when health feature is turned off\n"); + CERROR("Can not set lnet_retry_count when health feature is turned off\n"); return -EINVAL; } -- 1.8.3.1