From 4027395fe463b6ea11084ff2af43ba0732ad0ddb Mon Sep 17 00:00:00 2001 From: Cyril Bordage Date: Wed, 15 Sep 2021 18:15:08 +0200 Subject: [PATCH] LU-14979 lnet: set max recovery interval duration Add a tunable parameter to limit the recovery ping interval which was previously statically set to 900. This can be done by using: lnetctl set max_recovery_ping_interval Modify sanity-lnet test 210/211 to validate this new functionality. Test-Parameters: trivial testlist=sanity-lnet Signed-off-by: Cyril Bordage Signed-off-by: Chris Horn Change-Id: I766ceb6e03ffdab125005e16472b6f9eeadfb9d5 Reviewed-on: https://review.whamcloud.com/44927 Tested-by: jenkins Reviewed-by: Serguei Smirnov Tested-by: Maloo Reviewed-by: Frank Sehr Reviewed-by: Oleg Drokin --- lnet/include/lnet/lib-lnet.h | 9 ++-- lnet/lnet/api-ni.c | 54 +++++++++++++++++++ lnet/utils/lnetconfig/liblnetconfig.c | 60 +++++++++++++++++++++ lnet/utils/lnetconfig/liblnetconfig.h | 14 +++++ lnet/utils/lnetctl.c | 47 +++++++++++++++++ lustre/doc/lnetctl.8 | 7 +++ lustre/tests/sanity-lnet.sh | 99 +++++++++++++++++++++++++++++++++-- 7 files changed, 283 insertions(+), 7 deletions(-) diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index b6dc74b..58f03fa 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -606,6 +606,8 @@ extern unsigned int lnet_recovery_interval; extern unsigned int lnet_recovery_limit; extern unsigned int lnet_peer_discovery_disabled; extern unsigned int lnet_drop_asym_route; +extern unsigned int lnet_max_recovery_ping_interval; +extern unsigned int lnet_max_recovery_ping_count; extern unsigned int router_sensitivity_percentage; extern int alive_router_check_interval; extern int live_router_check_interval; @@ -1086,15 +1088,14 @@ lnet_peer_needs_push(struct lnet_peer *lp) return false; } -#define LNET_RECOVERY_INTERVAL_MAX 900 static inline unsigned int lnet_get_next_recovery_ping(unsigned int ping_count, time64_t now) { unsigned int interval; - /* 2^9 = 512, 2^10 = 1024 */ - if (ping_count > 9) - interval = LNET_RECOVERY_INTERVAL_MAX; + /* lnet_max_recovery_interval <= 2^lnet_max_recovery_ping_count */ + if (ping_count > lnet_max_recovery_ping_count) + interval = lnet_max_recovery_ping_interval; else interval = 1 << ping_count; diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index a4a1c78..ec5bf53 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -128,6 +128,27 @@ module_param(lnet_recovery_limit, uint, 0644); MODULE_PARM_DESC(lnet_recovery_limit, "How long to attempt recovery of unhealthy peer interfaces in seconds. Set to 0 to allow indefinite recovery"); +unsigned int lnet_max_recovery_ping_interval = 900; +unsigned int lnet_max_recovery_ping_count = 9; +static int max_recovery_ping_interval_set(const char *val, + cfs_kernel_param_arg_t *kp); + +#define param_check_max_recovery_ping_interval(name, p) \ + __param_check(name, p, int) + +#ifdef HAVE_KERNEL_PARAM_OPS +static struct kernel_param_ops param_ops_max_recovery_ping_interval = { + .set = max_recovery_ping_interval_set, + .get = param_get_int, +}; +module_param(lnet_max_recovery_ping_interval, max_recovery_ping_interval, 0644); +#else +module_param_call(lnet_max_recovery_ping_interval, max_recovery_ping_interval, + param_get_int, &lnet_max_recovery_ping_interval, 0644); +#endif +MODULE_PARM_DESC(lnet_max_recovery_ping_interval, + "The max interval between LNet recovery pings, in seconds"); + static int lnet_interfaces_max = LNET_INTERFACES_MAX_DEFAULT; static int intf_max_set(const char *val, cfs_kernel_param_arg_t *kp); @@ -315,6 +336,39 @@ recovery_interval_set(const char *val, cfs_kernel_param_arg_t *kp) } static int +max_recovery_ping_interval_set(const char *val, cfs_kernel_param_arg_t *kp) +{ + int rc; + unsigned long value; + + rc = kstrtoul(val, 0, &value); + if (rc) { + CERROR("Invalid module parameter value for 'lnet_max_recovery_ping_interval'\n"); + return rc; + } + + if (!value) { + CERROR("Invalid max ping timeout. Must be strictly positive\n"); + return -EINVAL; + } + + /* The purpose of locking the api_mutex here is to ensure that + * the correct value ends up stored properly. + */ + mutex_lock(&the_lnet.ln_api_mutex); + lnet_max_recovery_ping_interval = value; + lnet_max_recovery_ping_count = 0; + value >>= 1; + while (value) { + lnet_max_recovery_ping_count++; + value >>= 1; + } + mutex_unlock(&the_lnet.ln_api_mutex); + + return 0; +} + +static int discovery_set(const char *val, cfs_kernel_param_arg_t *kp) { int rc; diff --git a/lnet/utils/lnetconfig/liblnetconfig.c b/lnet/utils/lnetconfig/liblnetconfig.c index 9f0dabd..b561f9c 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.c +++ b/lnet/utils/lnetconfig/liblnetconfig.c @@ -2746,6 +2746,38 @@ out: return rc; } +int lustre_lnet_config_max_recovery_ping_interval(int interval, int seq_no, + struct cYAML **err_rc) +{ + int rc = LUSTRE_CFG_RC_NO_ERR; + char err_str[LNET_MAX_STR_LEN] = "\"success\""; + char interval_str[LNET_MAX_STR_LEN]; + + if (interval <= 0) { + rc = LUSTRE_CFG_RC_BAD_PARAM; + snprintf(err_str, sizeof(err_str), + "\"must be strictly positive\""); + + } else { + snprintf(interval_str, sizeof(interval_str), "%d", interval); + + rc = write_sysfs_file(modparam_path, + "lnet_max_recovery_ping_interval", + interval_str, 1, + strlen(interval_str) + 1); + if (rc) + snprintf(err_str, sizeof(err_str), + "\"cannot configure maximum recovery ping interval: %s\"", + strerror(errno)); + } + + cYAML_build_error(rc, seq_no, ADD_CMD, "maximum recovery ping interval", + err_str, err_rc); + + return rc; +} + + int lustre_lnet_show_routing(int seq_no, struct cYAML **show_rc, struct cYAML **err_rc, bool backup) { @@ -3965,6 +3997,34 @@ int lustre_lnet_show_recovery_limit(int seq_no, struct cYAML **show_rc, show_rc, err_rc, l_errno); } +int lustre_lnet_show_max_recovery_ping_interval(int seq_no, + struct cYAML **show_rc, + struct cYAML **err_rc) +{ + int rc = LUSTRE_CFG_RC_OUT_OF_MEM; + char val[LNET_MAX_STR_LEN]; + int interval = -1, l_errno = 0; + char err_str[LNET_MAX_STR_LEN]; + + snprintf(err_str, sizeof(err_str), "\"out of memory\""); + + rc = read_sysfs_file(modparam_path, "lnet_max_recovery_ping_interval", + val, 1, sizeof(val)); + if (rc) { + l_errno = -errno; + snprintf(err_str, sizeof(err_str), + "\"cannot get lnet_max_recovery_ping_interval value: %d\"", + rc); + } else { + interval = atoi(val); + } + + return build_global_yaml_entry(err_str, sizeof(err_str), seq_no, + "max_recovery_ping_interval", interval, + show_rc, err_rc, l_errno); +} + + int lustre_lnet_show_max_intf(int seq_no, struct cYAML **show_rc, struct cYAML **err_rc) { diff --git a/lnet/utils/lnetconfig/liblnetconfig.h b/lnet/utils/lnetconfig/liblnetconfig.h index 05d186f..f7bc7c1 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.h +++ b/lnet/utils/lnetconfig/liblnetconfig.h @@ -498,6 +498,9 @@ int lustre_lnet_config_recovery_limit(int val, int seq_no, struct cYAML **err_rc); int lustre_lnet_show_recovery_limit(int seq_no, struct cYAML **show_rc, struct cYAML **err_rc); +int lustre_lnet_show_max_recovery_ping_interval(int seq_no, + struct cYAML **show_rc, + struct cYAML **err_rc); /* * lustre_lnet_config_max_intf @@ -607,6 +610,17 @@ int lustre_lnet_config_buffers(int tiny, int small, int large, int seq_no, struct cYAML **err_rc); /* + * lustre_lnet_config_max_recovery_ping_interval + * Set the maximum recovery ping interval. + * + * interval - interval value in seconds + * seq_no - sequence number of the request + * err_rc - [OUT] struct cYAML tree describing the error. Freed by caller + */ +int lustre_lnet_config_max_recovery_ping_interval(int interval, int seq_no, + struct cYAML **err_rc); + +/* * lustre_lnet_show_routing * Send down an IOCTL to dump buffers and routing status * This function is used to dump buffers for all CPU partitions. diff --git a/lnet/utils/lnetctl.c b/lnet/utils/lnetctl.c index 51dfab6..f9bc8d3 100644 --- a/lnet/utils/lnetctl.c +++ b/lnet/utils/lnetctl.c @@ -60,6 +60,7 @@ static int jt_set_transaction_to(int argc, char **argv); static int jt_set_recov_intrv(int argc, char **argv); static int jt_set_rtr_sensitivity(int argc, char **argv); static int jt_set_hsensitivity(int argc, char **argv); +static int jt_set_max_recovery_ping_interval(int argc, char **argv); static int jt_reset_stats(int argc, char **argv); static int jt_add_peer_nid(int argc, char **argv); static int jt_del_peer_nid(int argc, char **argv); @@ -250,6 +251,9 @@ command_t set_cmds[] = { "Set how long LNet will attempt to recover unhealthy interfaces.\n" "\t0 - Recover indefinitely (default)\n" "\t>0 - Recover for the specified number of seconds.\n"}, + {"max_recovery_ping_interval", jt_set_max_recovery_ping_interval, 0, + "maximum recovery ping interval\n" + "\t>0 - maximum recovery ping interval in seconds\n"}, { 0, 0, 0, NULL } }; @@ -891,6 +895,36 @@ static int jt_set_routing(int argc, char **argv) return rc; } +static int jt_set_max_recovery_ping_interval(int argc, char **argv) +{ + long int value; + int rc; + struct cYAML *err_rc = NULL; + + rc = check_cmd(set_cmds, "set", "maximum recovery_interval", 2, argc, argv); + if (rc) + return rc; + + rc = parse_long(argv[1], &value); + if (rc != 0) { + cYAML_build_error(-1, -1, "parser", "set", + "cannot parse maximum recovery interval value", + &err_rc); + cYAML_print_tree2file(stderr, err_rc); + cYAML_free_tree(err_rc); + return -1; + } + + rc = lustre_lnet_config_max_recovery_ping_interval(value, -1, &err_rc); + if (rc != LUSTRE_CFG_RC_NO_ERR) + cYAML_print_tree2file(stderr, err_rc); + + cYAML_free_tree(err_rc); + + return rc; +} + + static int jt_config_lnet(int argc, char **argv) { struct cYAML *err_rc = NULL; @@ -1703,6 +1737,12 @@ static int jt_show_global(int argc, char **argv) goto out; } + rc = lustre_lnet_show_max_recovery_ping_interval(-1, &show_rc, &err_rc); + if (rc != LUSTRE_CFG_RC_NO_ERR) { + cYAML_print_tree2file(stderr, err_rc); + goto out; + } + if (show_rc) cYAML_print_tree(show_rc); @@ -2057,6 +2097,13 @@ static int jt_export(int argc, char **argv) err_rc = NULL; } + rc = lustre_lnet_show_max_recovery_ping_interval(-1, &show_rc, &err_rc); + if (rc != LUSTRE_CFG_RC_NO_ERR) { + cYAML_print_tree2file(stderr, err_rc); + cYAML_free_tree(err_rc); + err_rc = NULL; + } + rc = lustre_lnet_show_udsp(-1, -1, &show_rc, &err_rc); if (rc != LUSTRE_CFG_RC_NO_ERR) { cYAML_print_tree2file(stderr, err_rc); diff --git a/lustre/doc/lnetctl.8 b/lustre/doc/lnetctl.8 index f2a2793..d88cf18 100644 --- a/lustre/doc/lnetctl.8 +++ b/lustre/doc/lnetctl.8 @@ -267,6 +267,13 @@ Set how long LNet will attempt to recover unhealthy peer interfaces\. 0 - Recover indefinitely (default)\. >0 - Recover for the specified number of seconds\. . +.TP +\fBlnetctl set\fR max_recovery_ping_interval \fIvalue\fR +Set the maximum recovery ping interval. +The recovery ping mechanism increases the next scheduled recovery ping attempt +timeout exponentially (base 2) until it is equal to the value set. +The default value is 900. +. .SS "Import and Export YAML Configuration Files" LNet configuration can be represented in YAML format\. A YAML configuration file can be passed to the lnetctl utility via the \fBimport\fR command\. The diff --git a/lustre/tests/sanity-lnet.sh b/lustre/tests/sanity-lnet.sh index e45f152..97f7f36 100755 --- a/lustre/tests/sanity-lnet.sh +++ b/lustre/tests/sanity-lnet.sh @@ -1837,6 +1837,18 @@ check_nid_in_recovq() { # If the recovery limit is 10 seconds, then when the 5th enqueue happens # we expect the peer NI to have aged out, so it will not actually be # queued. +# If max_recovery_ping_interval is set to 2 then: +# First enqueue happens at time 0. +# 2nd at 0 + 2^0 = 1 +# 3rd at 1 + 2^1 = 3 +# 4th at 3 + 2^1 = 5 +# 5th at 5 + 2^1 = 7 +# 6th at 7 + 2^1 = 9 +# 7th at 9 + 2^1 = 11 +# e.g. after 4 seconds we would expect to have seen the 3th enqueue, +# (2 pings sent, 3rd about to happen), and the 4th enqueue is yet to happen +# e.g. after 10 seconds we would expect to have seen the 6th enqueue, +# (5 pings sent, 6th about to happen), and the 8th enqueue is yet to happen check_ping_count() { local queue="$1" local expect="$2" @@ -1886,6 +1898,8 @@ test_210() { do_lnetctl discover $prim_nid || error "failed to discover myself" + local default=$($LNETCTL global show | + awk '/recovery_limit/{print $NF}') # Set recovery limit to 10 seconds. do_lnetctl set recovery_limit 10 || error "failed to set recovery_limit" @@ -1894,20 +1908,59 @@ test_210() { # Use local_error so LNet doesn't attempt to resend the discovery ping $LCTL net_drop_add -s *@tcp -d *@tcp -m GET -r 1 -e local_error $LCTL net_drop_add -s *@tcp1 -d *@tcp1 -m GET -r 1 -e local_error - do_lnetctl discover $($LCTL list_nids | head -n 1) && + do_lnetctl discover $prim_nid && error "Expected discovery to fail" + # See comment for check_ping_count() sleep 5 - check_nid_in_recovq "-l" 1 + check_nid_in_recovq "-l" "1" check_ping_count "ni" "2" sleep 5 - check_nid_in_recovq "-l" 1 + check_nid_in_recovq "-l" "1" check_ping_count "ni" "3" $LCTL net_drop_del -a + reinit_dlc || return $? + add_net "tcp" "${INTERFACES[0]}" || return $? + add_net "tcp1" "${INTERFACES[0]}" || return $? + + local prim_nid=$($LCTL list_nids | head -n 1) + + do_lnetctl discover $prim_nid || + error "failed to discover myself" + + do_lnetctl set recovery_limit $default || + error "failed to set recovery_limit" + + default=$($LNETCTL global show | + awk '/max_recovery_ping_interval/{print $NF}') + do_lnetctl set max_recovery_ping_interval 2 || + error "failed to set max_recovery_ping_interval" + + $LCTL set_param debug=+net + # Use local_error so LNet doesn't attempt to resend the discovery ping + $LCTL net_drop_add -s *@tcp -d *@tcp -m GET -r 1 -e local_error + $LCTL net_drop_add -s *@tcp1 -d *@tcp1 -m GET -r 1 -e local_error + do_lnetctl discover $prim_nid && + error "Expected discovery to fail" + + # See comment for check_ping_count() + sleep 4 + check_nid_in_recovq "-l" "1" + check_ping_count "ni" "2" + + sleep 6 + check_nid_in_recovq "-l" "1" + check_ping_count "ni" "5" + + $LCTL net_drop_del -a + + do_lnetctl set max_recovery_ping_interval $default || + error "failed to set max_recovery_ping_interval" + return 0 } run_test 210 "Local NI recovery checks" @@ -1922,6 +1975,8 @@ test_211() { do_lnetctl discover $prim_nid || error "failed to discover myself" + local default=$($LNETCTL global show | + awk '/recovery_limit/{print $NF}') # Set recovery limit to 10 seconds. do_lnetctl set recovery_limit 10 || error "failed to set recovery_limit" @@ -1966,6 +2021,44 @@ test_211() { check_nid_in_recovq "-p" 0 check_ping_count "peer_ni" "0" + reinit_dlc || return $? + add_net "tcp" "${INTERFACES[0]}" || return $? + add_net "tcp1" "${INTERFACES[0]}" || return $? + + local prim_nid=$($LCTL list_nids | head -n 1) + + do_lnetctl discover $prim_nid || + error "failed to discover myself" + + do_lnetctl set recovery_limit $default || + error "failed to set recovery_limit" + + default=$($LNETCTL global show | + awk '/max_recovery_ping_interval/{print $NF}') + do_lnetctl set max_recovery_ping_interval 2 || + error "failed to set max_recovery_ping_interval" + + $LCTL net_drop_add -s *@tcp -d *@tcp -m GET -r 1 -e remote_error + $LCTL net_drop_add -s *@tcp1 -d *@tcp1 -m GET -r 1 -e remote_error + + # Set health to 0 on one interface. This forces it onto the recovery + # queue. + $LNETCTL peer set --nid $prim_nid --health 0 + + # See comment for check_ping_count() + sleep 4 + check_nid_in_recovq "-p" "1" + check_ping_count "peer_ni" "2" + + sleep 6 + check_nid_in_recovq "-p" "1" + check_ping_count "peer_ni" "5" + + $LCTL net_drop_del -a + + do_lnetctl set max_recovery_ping_interval $default || + error "failed to set max_recovery_ping_interval" + return 0 } run_test 211 "Remote NI recovery checks" -- 1.8.3.1