Allow setting/reading lnet_recovery_limit via lnetctl.
Test-Parameters: trivial
HPE-bug-id: LUS-9109
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Change-Id: I5aac297bad15e43a52d8b8531da08a1d3f559bea
Reviewed-on: https://review.whamcloud.com/39717
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Amir Shehata <ashehata@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
return rc;
}
+int lustre_lnet_config_recovery_limit(int val, int seq_no,
+ struct cYAML **err_rc)
+{
+ int rc = LUSTRE_CFG_RC_NO_ERR;
+ char err_str[LNET_MAX_STR_LEN];
+ char val_str[LNET_MAX_STR_LEN];
+
+ if (val < 0) {
+ rc = LUSTRE_CFG_RC_BAD_PARAM;
+ snprintf(err_str, sizeof(err_str),
+ "\"Must be greater than or equal to 0\"");
+ } else {
+ snprintf(err_str, sizeof(err_str), "\"success\"");
+
+ snprintf(val_str, sizeof(val_str), "%d", val);
+
+ rc = write_sysfs_file(modparam_path, "lnet_recovery_limit",
+ val_str, 1, strlen(val_str) + 1);
+ if (rc)
+ snprintf(err_str, sizeof(err_str),
+ "\"cannot configure recovery limit: %s\"",
+ strerror(errno));
+ }
+
+ cYAML_build_error(rc, seq_no, ADD_CMD, "recovery_limit", err_str,
+ err_rc);
+
+ return rc;
+}
+
int lustre_lnet_config_max_intf(int max, int seq_no, struct cYAML **err_rc)
{
int rc = LUSTRE_CFG_RC_NO_ERR;
show_rc, err_rc, l_errno);
}
+int lustre_lnet_show_recovery_limit(int seq_no, struct cYAML **show_rc,
+ struct cYAML **err_rc)
+{
+ int rc = LUSTRE_CFG_RC_OUT_OF_MEM;
+ char val[LNET_MAX_STR_LEN];
+ int recov_limit = -1, l_errno = 0;
+ char err_str[LNET_MAX_STR_LEN];
+
+ snprintf(err_str, sizeof(err_str), "\"out of memory\"");
+
+ rc = read_sysfs_file(modparam_path, "lnet_recovery_limit", val,
+ 1, sizeof(val));
+ if (rc) {
+ l_errno = -errno;
+ snprintf(err_str, sizeof(err_str),
+ "\"cannot get lnet_recovery_limit value: %d\"", rc);
+ } else {
+ recov_limit = atoi(val);
+ }
+
+ return build_global_yaml_entry(err_str, sizeof(err_str), seq_no,
+ "recovery_limit", recov_limit,
+ show_rc, err_rc, l_errno);
+}
+
int lustre_lnet_show_max_intf(int seq_no, struct cYAML **show_rc,
struct cYAML **err_rc)
{
struct cYAML **err_rc)
{
struct cYAML *max_intf, *numa, *discovery, *retry, *tto, *seq_no,
- *sen, *recov, *rsen, *drop_asym_route, *rsp_tracking;
+ *sen, *recov, *rsen, *drop_asym_route, *rsp_tracking,
+ *recov_limit;
int rc = 0;
seq_no = cYAML_get_object_item(tree, "seq_no");
: -1,
err_rc);
+ recov_limit = cYAML_get_object_item(tree, "recovery_limit");
+ if (recov_limit)
+ rc = lustre_lnet_config_recovery_limit(recov_limit->cy_valueint,
+ seq_no ? seq_no->cy_valueint
+ : -1,
+ err_rc);
+
return rc;
}
struct cYAML **err_rc)
{
struct cYAML *max_intf, *numa, *discovery, *retry, *tto, *seq_no,
- *sen, *recov, *rsen, *drop_asym_route, *rsp_tracking;
+ *sen, *recov, *rsen, *drop_asym_route, *rsp_tracking,
+ *recov_limit;
int rc = 0;
seq_no = cYAML_get_object_item(tree, "seq_no");
-1,
show_rc, err_rc);
+ recov_limit = cYAML_get_object_item(tree, "recovery_limit");
+ if (recov_limit)
+ rc = lustre_lnet_show_recovery_limit(seq_no ?
+ seq_no->cy_valueint :
+ -1,
+ show_rc, err_rc);
+
return rc;
}
struct cYAML **err_rc);
int lustre_lnet_show_response_tracking(int seq_no, struct cYAML **show_rc,
struct cYAML **err_rc);
+int lustre_lnet_config_recovery_limit(int val, int seq_no,
+ struct cYAML **err_rc);
+int lustre_lnet_show_recovery_limit(int seq_no, struct cYAML **show_rc,
+ struct cYAML **err_rc);
/*
* lustre_lnet_config_max_intf
static int jt_set_peer_ni_value(int argc, char **argv);
static int jt_calc_service_id(int argc, char **argv);
static int jt_set_response_tracking(int argc, char **argv);
+static int jt_set_recovery_limit(int argc, char **argv);
command_t cmd_list[] = {
{"lnet", jt_lnet, 0, "lnet {configure | unconfigure} [--all]"},
{"routing", jt_routing, 0, "routing {show | help}"},
{"set", jt_set, 0, "set {tiny_buffers | small_buffers | large_buffers"
" | routing | numa_range | max_interfaces"
- " | discovery}"},
+ " | discovery | drop_asym_route | retry_count"
+ " | transaction_timeout | health_sensitivity"
+ " | recovery_interval | router_sensitivity"
+ " | response_tracking | recovery_limit}"},
{"import", jt_import, 0, "import FILE.yaml"},
{"export", jt_export, 0, "export FILE.yaml"},
{"stats", jt_stats, 0, "stats {show | help}"},
"\t3 - Both PUTs and GETs are eligible for response tracking (default)\n"
"\tNote: Regardless of the value of the response_tracking parameter LNet\n"
"\t pings and discovery pushes always utilize response tracking\n"},
+ {"recovery_limit", jt_set_recovery_limit, 0,
+ "Set how long LNet will attempt to recover unhealthy interfaces.\n"
+ "\t0 - Recover indefinitely (default)\n"
+ "\t>0 - Recover for the specified number of seconds.\n"},
{ 0, 0, 0, NULL }
};
return rc;
}
+static int jt_set_recovery_limit(int argc, char **argv)
+{
+ long int value;
+ int rc;
+ struct cYAML *err_rc = NULL;
+
+ rc = check_cmd(set_cmds, "set", "recovery_limit", 2, argc, argv);
+ if (rc)
+ return rc;
+
+ rc = parse_long(argv[1], &value);
+ if (rc != 0) {
+ cYAML_build_error(-1, -1, "parser", "set",
+ "cannot parse recovery_limit value",
+ &err_rc);
+ cYAML_print_tree2file(stderr, err_rc);
+ cYAML_free_tree(err_rc);
+ return -1;
+ }
+
+ rc = lustre_lnet_config_recovery_limit(value, -1, &err_rc);
+ if (rc != LUSTRE_CFG_RC_NO_ERR)
+ cYAML_print_tree2file(stderr, err_rc);
+
+ cYAML_free_tree(err_rc);
+
+ return rc;
+}
+
static int jt_set_max_intf(int argc, char **argv)
{
long int value;
goto out;
}
+ rc = lustre_lnet_show_recovery_limit(-1, &show_rc, &err_rc);
+ if (rc != LUSTRE_CFG_RC_NO_ERR) {
+ cYAML_print_tree2file(stderr, err_rc);
+ goto out;
+ }
+
if (show_rc)
cYAML_print_tree(show_rc);
err_rc = NULL;
}
+ rc = lustre_lnet_show_recovery_limit(-1, &show_rc, &err_rc);
+ if (rc != LUSTRE_CFG_RC_NO_ERR) {
+ cYAML_print_tree2file(stderr, err_rc);
+ cYAML_free_tree(err_rc);
+ err_rc = NULL;
+ }
+
if (show_rc != NULL) {
cYAML_print_tree2file(f, show_rc);
cYAML_free_tree(show_rc);
Note: Regardless of the value of the response_tracking parameter LNet
pings and discovery pushes always utilize response tracking\.
.
+.TP
+\fBlnetctl set\fR recovery_limit \fIvalue\fR
+Set how long LNet will attempt to recover unhealthy peer interfaces\.
+ 0 - Recover indefinitely (default)\.
+ >0 - Recover for the specified number of seconds\.
+.
.SS "Import and Export YAML Configuration Files"
LNet configuration can be represented in YAML format\. A YAML configuration
file can be passed to the lnetctl utility via the \fBimport\fR command\. The