From: Amir Shehata Date: Tue, 23 Oct 2018 04:25:33 +0000 (-0700) Subject: LU-11300 lnet: configure lnet router senstivity X-Git-Tag: 2.12.55~25^2~23 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=aef3d58d585ee818b405b5ff197b7a98b6c5157d LU-11300 lnet: configure lnet router senstivity Allow the configuration of router_sensitivity_percentage from the user space utility lnetctl Test-Parameters: forbuildonly Signed-off-by: Amir Shehata Change-Id: If5440f30881361ebb06dafa9cadb7cbc2b934f93 Reviewed-on: https://review.whamcloud.com/33455 Reviewed-by: Olaf Weber Reviewed-by: Sebastien Buisson Reviewed-by: Chris Horn Tested-by: Jenkins --- diff --git a/lnet/utils/lnetconfig/liblnetconfig.c b/lnet/utils/lnetconfig/liblnetconfig.c index bdcda35..b9bc07d 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.c +++ b/lnet/utils/lnetconfig/liblnetconfig.c @@ -2570,6 +2570,28 @@ int lustre_lnet_config_recov_intrv(int intrv, int seq_no, struct cYAML **err_rc) return rc; } +int lustre_lnet_config_rtr_sensitivity(int sen, int seq_no, struct cYAML **err_rc) +{ + int rc = LUSTRE_CFG_RC_NO_ERR; + char err_str[LNET_MAX_STR_LEN]; + char val[LNET_MAX_STR_LEN]; + + snprintf(err_str, sizeof(err_str), "\"success\""); + + snprintf(val, sizeof(val), "%d", sen); + + rc = write_sysfs_file(modparam_path, "router_sensitivity_percentage", val, + 1, strlen(val) + 1); + if (rc) + snprintf(err_str, sizeof(err_str), + "\"cannot configure router health sensitivity: %s\"", + strerror(errno)); + + cYAML_build_error(rc, seq_no, ADD_CMD, "router_sensitivity", err_str, err_rc); + + return rc; +} + int lustre_lnet_config_hsensitivity(int sen, int seq_no, struct cYAML **err_rc) { int rc = LUSTRE_CFG_RC_NO_ERR; @@ -3498,6 +3520,31 @@ int lustre_lnet_show_hsensitivity(int seq_no, struct cYAML **show_rc, err_rc, l_errno); } +int lustre_lnet_show_rtr_sensitivity(int seq_no, struct cYAML **show_rc, + struct cYAML **err_rc) +{ + int rc = LUSTRE_CFG_RC_OUT_OF_MEM; + char val[LNET_MAX_STR_LEN]; + int sen = -1, l_errno = 0; + char err_str[LNET_MAX_STR_LEN]; + + snprintf(err_str, sizeof(err_str), "\"out of memory\""); + + rc = read_sysfs_file(modparam_path, "router_sensitivity_percentage", val, + 1, sizeof(val)); + if (rc) { + l_errno = -errno; + snprintf(err_str, sizeof(err_str), + "\"cannot get router sensitivity percentage: %d\"", rc); + } else { + sen = atoi(val); + } + + return build_global_yaml_entry(err_str, sizeof(err_str), seq_no, + "router_sensitivity", sen, show_rc, + err_rc, l_errno); +} + int lustre_lnet_show_transaction_to(int seq_no, struct cYAML **show_rc, struct cYAML **err_rc) { @@ -4626,7 +4673,7 @@ static int handle_yaml_config_global_settings(struct cYAML *tree, struct cYAML **err_rc) { struct cYAML *max_intf, *numa, *discovery, *retry, *tto, *seq_no, - *sen, *recov, *drop_asym_route; + *sen, *recov, *rsen, *drop_asym_route; int rc = 0; seq_no = cYAML_get_object_item(tree, "seq_no"); @@ -4686,6 +4733,13 @@ static int handle_yaml_config_global_settings(struct cYAML *tree, : -1, err_rc); + rsen = cYAML_get_object_item(tree, "router_sensitivity"); + if (rsen) + rc = lustre_lnet_config_rtr_sensitivity(rsen->cy_valueint, + seq_no ? seq_no->cy_valueint + : -1, + err_rc); + return rc; } @@ -4733,7 +4787,7 @@ static int handle_yaml_show_global_settings(struct cYAML *tree, struct cYAML **err_rc) { struct cYAML *max_intf, *numa, *discovery, *retry, *tto, *seq_no, - *sen, *recov, *drop_asym_route; + *sen, *recov, *rsen, *drop_asym_route; int rc = 0; seq_no = cYAML_get_object_item(tree, "seq_no"); @@ -4785,6 +4839,12 @@ static int handle_yaml_show_global_settings(struct cYAML *tree, : -1, show_rc, err_rc); + rsen = cYAML_get_object_item(tree, "router_sensitivity"); + if (rsen) + rc = lustre_lnet_show_hsensitivity(seq_no ? seq_no->cy_valueint + : -1, + show_rc, err_rc); + return rc; } diff --git a/lnet/utils/lnetconfig/liblnetconfig.h b/lnet/utils/lnetconfig/liblnetconfig.h index 8a5e6ac..a564694 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.h +++ b/lnet/utils/lnetconfig/liblnetconfig.h @@ -279,6 +279,18 @@ int lustre_lnet_show_recov_intrv(int seq_no, struct cYAML **show_rc, struct cYAML **err_rc); /* + * lustre_lnet_config_rtr_sensitivity + * sets the router sensitivity percentage. If the percentage health + * of a router interface drops below that it's considered failed + * + * sen - sensitivity value to configure + * seq_no - sequence number of the request + * err_rc - [OUT] struct cYAML tree describing the error. Freed by + * caller + */ +int lustre_lnet_config_rtr_sensitivity(int sen, int seq_no, struct cYAML **err_rc); + +/* * lustre_lnet_config_hsensitivity * sets the health sensitivity; the value by which to decrement the * health value of a local or peer NI. If 0 then health is turned off @@ -303,6 +315,18 @@ int lustre_lnet_show_hsensitivity(int seq_no, struct cYAML **show_rc, struct cYAML **err_rc); /* + * lustre_lnet_show_rtr_sensitivity + * show the router sensitivity percentage in the system + * + * seq_no - sequence number of the request + * show_rc - [OUT] struct cYAML tree containing health sensitivity info + * err_rc - [OUT] struct cYAML tree describing the error. Freed by + * caller + */ +int lustre_lnet_show_rtr_sensitivity(int seq_no, struct cYAML **show_rc, + struct cYAML **err_rc); + +/* * lustre_lnet_config_transaction_to * sets the timeout after which a message expires or a timeout event is * propagated for an expired response. diff --git a/lnet/utils/lnetctl.c b/lnet/utils/lnetctl.c index c503223..00b3bc9 100644 --- a/lnet/utils/lnetctl.c +++ b/lnet/utils/lnetctl.c @@ -57,6 +57,7 @@ static int jt_set_numa(int argc, char **argv); static int jt_set_retry_count(int argc, char **argv); static int jt_set_transaction_to(int argc, char **argv); static int jt_set_recov_intrv(int argc, char **argv); +static int jt_set_rtr_sensitivity(int argc, char **argv); static int jt_set_hsensitivity(int argc, char **argv); static int jt_add_peer_nid(int argc, char **argv); static int jt_del_peer_nid(int argc, char **argv); @@ -208,6 +209,9 @@ command_t set_cmds[] = { "\t>0 - sensitivity value not more than 1000\n"}, {"recovery_interval", jt_set_recov_intrv, 0, "interval to ping in seconds (at least 1)\n" "\t>0 - time in seconds between pings\n"}, + {"router_sensitivity", jt_set_rtr_sensitivity, 0, "router sensitivity %\n" + "\t100 - router interfaces need to be fully healthy to be used\n" + "\t<100 - router interfaces can be used even if not healthy\n"}, { 0, 0, 0, NULL } }; @@ -394,6 +398,34 @@ static int jt_set_recov_intrv(int argc, char **argv) return rc; } +static int jt_set_rtr_sensitivity(int argc, char **argv) +{ + long int value; + int rc; + struct cYAML *err_rc = NULL; + + rc = check_cmd(set_cmds, "set", "router_sensitivity", 2, argc, argv); + if (rc) + return rc; + + rc = parse_long(argv[1], &value); + if (rc != 0) { + cYAML_build_error(-1, -1, "parser", "set", + "cannot parse router sensitivity value", &err_rc); + cYAML_print_tree2file(stderr, err_rc); + cYAML_free_tree(err_rc); + return -1; + } + + rc = lustre_lnet_config_rtr_sensitivity(value, -1, &err_rc); + if (rc != LUSTRE_CFG_RC_NO_ERR) + cYAML_print_tree2file(stderr, err_rc); + + cYAML_free_tree(err_rc); + + return rc; +} + static int jt_set_hsensitivity(int argc, char **argv) { long int value; @@ -1299,6 +1331,12 @@ static int jt_show_global(int argc, char **argv) goto out; } + rc = lustre_lnet_show_rtr_sensitivity(-1, &show_rc, &err_rc); + if (rc != LUSTRE_CFG_RC_NO_ERR) { + cYAML_print_tree2file(stderr, err_rc); + goto out; + } + if (show_rc) cYAML_print_tree(show_rc);