From 29f21e2c850a77676e44df0cea746bfa5c6f19ad Mon Sep 17 00:00:00 2001 From: Amir Shehata Date: Tue, 3 Jul 2018 17:04:16 -0700 Subject: [PATCH] LU-9120 lnet: set retry count from lnetctl Added an lnetctl command to set the retry_count from userspace. lnetctl set retry_count [0|>0] 0 - turns off retries in the system >0 - number of retries. Test-Parameters: forbuildonly Signed-off-by: Amir Shehata Change-Id: I2fd5c88a91590195cfdad52e6d177619ccbbc840 Reviewed-on: https://review.whamcloud.com/32777 Reviewed-by: Olaf Weber Reviewed-by: Sonia Sharma Tested-by: Jenkins --- lnet/utils/lnetconfig/liblnetconfig.c | 65 +++++++++++++++++++++++++++++++++-- lnet/utils/lnetconfig/liblnetconfig.h | 23 +++++++++++++ lnet/utils/lnetctl.c | 38 ++++++++++++++++++++ 3 files changed, 124 insertions(+), 2 deletions(-) diff --git a/lnet/utils/lnetconfig/liblnetconfig.c b/lnet/utils/lnetconfig/liblnetconfig.c index 32ed598..4ae90e8 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.c +++ b/lnet/utils/lnetconfig/liblnetconfig.c @@ -2429,6 +2429,28 @@ int ioctl_set_value(__u32 val, int ioc, char *name, return rc; } +int lustre_lnet_config_retry_count(int count, int seq_no, struct cYAML **err_rc) +{ + int rc = LUSTRE_CFG_RC_NO_ERR; + char err_str[LNET_MAX_STR_LEN]; + char val[LNET_MAX_STR_LEN]; + + snprintf(err_str, sizeof(err_str), "\"success\""); + + snprintf(val, sizeof(val), "%d", count); + + rc = write_sysfs_file(modparam_path, "lnet_retry_count", val, + 1, strlen(val) + 1); + if (rc) + snprintf(err_str, sizeof(err_str), + "\"cannot configure retry count: %s\"", + strerror(errno)); + + cYAML_build_error(rc, seq_no, ADD_CMD, "retry_count", err_str, err_rc); + + return rc; +} + int lustre_lnet_config_max_intf(int max, int seq_no, struct cYAML **err_rc) { int rc = LUSTRE_CFG_RC_NO_ERR; @@ -3187,6 +3209,31 @@ static int ioctl_show_global_values(int ioc, int seq_no, char *name, data.sv_value, show_rc, err_rc, l_errno); } +int lustre_lnet_show_retry_count(int seq_no, struct cYAML **show_rc, + struct cYAML **err_rc) +{ + int rc = LUSTRE_CFG_RC_OUT_OF_MEM; + char val[LNET_MAX_STR_LEN]; + int retry_count = -1, l_errno = 0; + char err_str[LNET_MAX_STR_LEN]; + + snprintf(err_str, sizeof(err_str), "\"out of memory\""); + + rc = read_sysfs_file(modparam_path, "lnet_retry_count", val, + 1, sizeof(val)); + if (rc) { + l_errno = -errno; + snprintf(err_str, sizeof(err_str), + "\"cannot get retry count: %d\"", rc); + } else { + retry_count = atoi(val); + } + + return build_global_yaml_entry(err_str, sizeof(err_str), seq_no, + "retry_count", retry_count, show_rc, + err_rc, l_errno); +} + int lustre_lnet_show_max_intf(int seq_no, struct cYAML **show_rc, struct cYAML **err_rc) { @@ -4083,7 +4130,7 @@ static int handle_yaml_config_global_settings(struct cYAML *tree, struct cYAML **show_rc, struct cYAML **err_rc) { - struct cYAML *max_intf, *numa, *discovery, *seq_no; + struct cYAML *max_intf, *numa, *discovery, *retry, *seq_no; int rc = 0; seq_no = cYAML_get_object_item(tree, "seq_no"); @@ -4108,6 +4155,13 @@ static int handle_yaml_config_global_settings(struct cYAML *tree, : -1, err_rc); + retry = cYAML_get_object_item(tree, "retry_count"); + if (retry) + rc = lustre_lnet_config_retry_count(retry->cy_valueint, + seq_no ? seq_no->cy_valueint + : -1, + err_rc); + return rc; } @@ -4148,7 +4202,7 @@ static int handle_yaml_show_global_settings(struct cYAML *tree, struct cYAML **show_rc, struct cYAML **err_rc) { - struct cYAML *max_intf, *numa, *discovery, *seq_no; + struct cYAML *max_intf, *numa, *discovery, *retry, *seq_no; int rc = 0; seq_no = cYAML_get_object_item(tree, "seq_no"); @@ -4170,6 +4224,13 @@ static int handle_yaml_show_global_settings(struct cYAML *tree, : -1, show_rc, err_rc); + retry = cYAML_get_object_item(tree, "retry_count"); + if (retry) + rc = lustre_lnet_show_retry_count(seq_no ? seq_no->cy_valueint + : -1, + show_rc, err_rc); + + return rc; } diff --git a/lnet/utils/lnetconfig/liblnetconfig.h b/lnet/utils/lnetconfig/liblnetconfig.h index ce005d1..55348a0 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.h +++ b/lnet/utils/lnetconfig/liblnetconfig.h @@ -225,6 +225,29 @@ int lustre_lnet_show_numa_range(int seq_no, struct cYAML **show_rc, struct cYAML **err_rc); /* + * lustre_lnet_config_retry_count + * sets the maximum number of retries to resend a message + * + * count - maximum value to configure + * seq_no - sequence number of the request + * err_rc - [OUT] struct cYAML tree describing the error. Freed by + * caller + */ +int lustre_lnet_config_retry_count(int count, int seq_no, struct cYAML **err_rc); + +/* + * lustre_lnet_show_retry_count + * show current maximum number of retries in the system + * + * seq_no - sequence number of the request + * show_rc - [OUT] struct cYAML tree containing retry count info + * err_rc - [OUT] struct cYAML tree describing the error. Freed by + * caller + */ +int lustre_lnet_show_retry_count(int seq_no, struct cYAML **show_rc, + struct cYAML **err_rc); + +/* * lustre_lnet_config_max_intf * Sets the maximum number of interfaces per node. this tunable is * primarily useful for sanity checks prior to allocating memory. diff --git a/lnet/utils/lnetctl.c b/lnet/utils/lnetctl.c index e14d43e..27ba97d 100644 --- a/lnet/utils/lnetctl.c +++ b/lnet/utils/lnetctl.c @@ -53,6 +53,7 @@ static int jt_set_tiny(int argc, char **argv); static int jt_set_small(int argc, char **argv); static int jt_set_large(int argc, char **argv); static int jt_set_numa(int argc, char **argv); +static int jt_set_retry_count(int argc, char **argv); static int jt_add_peer_nid(int argc, char **argv); static int jt_del_peer_nid(int argc, char **argv); static int jt_set_max_intf(int argc, char **argv); @@ -174,6 +175,9 @@ command_t set_cmds[] = { {"discovery", jt_set_discovery, 0, "enable/disable peer discovery\n" "\t0 - disable peer discovery\n" "\t1 - enable peer discovery (default)\n"}, + {"retry_count", jt_set_retry_count, 0, "number of retries\n" + "\t0 - turn of retries\n" + "\t>0 - number of retries\n"}, { 0, 0, 0, NULL } }; @@ -328,6 +332,34 @@ static int jt_set_numa(int argc, char **argv) return rc; } +static int jt_set_retry_count(int argc, char **argv) +{ + long int value; + int rc; + struct cYAML *err_rc = NULL; + + rc = check_cmd(set_cmds, "set", "retry_count", 2, argc, argv); + if (rc) + return rc; + + rc = parse_long(argv[1], &value); + if (rc != 0) { + cYAML_build_error(-1, -1, "parser", "set", + "cannot parse retry_count value", &err_rc); + cYAML_print_tree2file(stderr, err_rc); + cYAML_free_tree(err_rc); + return -1; + } + + rc = lustre_lnet_config_retry_count(value, -1, &err_rc); + if (rc != LUSTRE_CFG_RC_NO_ERR) + cYAML_print_tree2file(stderr, err_rc); + + cYAML_free_tree(err_rc); + + return rc; +} + static int jt_set_discovery(int argc, char **argv) { long int value; @@ -980,6 +1012,12 @@ static int jt_show_global(int argc, char **argv) goto out; } + rc = lustre_lnet_show_retry_count(-1, &show_rc, &err_rc); + if (rc != LUSTRE_CFG_RC_NO_ERR) { + cYAML_print_tree2file(stderr, err_rc); + goto out; + } + if (show_rc) cYAML_print_tree(show_rc); -- 1.8.3.1