Add commands to lnetctl to set the health value.
for local NIs:
lnetctl net set --nid <nid> --health <value>
for peer NIs:
lnetctl peer set --nid <nid> --health <value>
Test-Parameters: forbuildonly
Signed-off-by: Amir Shehata <ashehata@whamcloud.com>
Change-Id: I06e1238df54c94bcfecadd84fbaa30cc1ce4dd68
Reviewed-on: https://review.whamcloud.com/32863
Tested-by: Jenkins
Reviewed-by: Sonia Sharma <sharmaso@whamcloud.com>
Reviewed-by: Olaf Weber <olaf.weber@hpe.com>
value = LNET_MAX_HEALTH_VALUE;
else
value = cfg->rh_value;
+ CDEBUG(D_NET, "Manually setting healthv to %d for %s:%s. all = %d\n",
+ value, (cfg->rh_type == LNET_HEALTH_TYPE_LOCAL_NI) ?
+ "local" : "peer", libcfs_nid2str(cfg->rh_nid), cfg->rh_all);
mutex_lock(&the_lnet.ln_api_mutex);
if (cfg->rh_type == LNET_HEALTH_TYPE_LOCAL_NI)
lnet_ni_set_healthv(cfg->rh_nid, value,
if (!all) {
lnet_net_lock(LNET_LOCK_EX);
lpni = lnet_find_peer_ni_locked(nid);
+ if (!lpni) {
+ lnet_net_unlock(LNET_LOCK_EX);
+ return;
+ }
atomic_set(&lpni->lpni_healthv, value);
lnet_peer_ni_add_to_recoveryq_locked(lpni);
lnet_peer_ni_decref_locked(lpni);
return rc;
}
+static int
+lustre_lnet_config_healthv(int value, bool all, lnet_nid_t nid,
+ enum lnet_health_type type, char *name,
+ int seq_no, struct cYAML **err_rc)
+{
+ struct lnet_ioctl_reset_health_cfg data;
+ int rc = LUSTRE_CFG_RC_NO_ERR;
+ char err_str[LNET_MAX_STR_LEN];
+
+ snprintf(err_str, sizeof(err_str), "\"success\"");
+
+ LIBCFS_IOC_INIT_V2(data, rh_hdr);
+ data.rh_type = type;
+ data.rh_all = all;
+ data.rh_value = value;
+ data.rh_nid = nid;
+
+ rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_SET_HEALHV, &data);
+ if (rc != 0) {
+ rc = -errno;
+ snprintf(err_str,
+ sizeof(err_str), "Can not configure health value");
+ }
+
+ cYAML_build_error(rc, seq_no, ADD_CMD, name, err_str, err_rc);
+
+ return rc;
+}
+
+int lustre_lnet_config_ni_healthv(int value, bool all, char *ni_nid, int seq_no,
+ struct cYAML **err_rc)
+{
+ lnet_nid_t nid;
+ if (ni_nid)
+ nid = libcfs_str2nid(ni_nid);
+ else
+ nid = LNET_NID_ANY;
+ return lustre_lnet_config_healthv(value, all, nid,
+ LNET_HEALTH_TYPE_LOCAL_NI,
+ "ni healthv", seq_no, err_rc);
+}
+
+int lustre_lnet_config_peer_ni_healthv(int value, bool all, char *lpni_nid,
+ int seq_no, struct cYAML **err_rc)
+{
+ lnet_nid_t nid;
+ if (lpni_nid)
+ nid = libcfs_str2nid(lpni_nid);
+ else
+ nid = LNET_NID_ANY;
+ return lustre_lnet_config_healthv(value, all, nid,
+ LNET_HEALTH_TYPE_PEER_NI,
+ "peer_ni healthv", seq_no, err_rc);
+}
+
static bool
add_msg_stats_to_yaml_blk(struct cYAML *yaml,
struct lnet_ioctl_comm_count *counts)
struct cYAML **err_rc);
/*
+ * lustre_lnet_config_ni_healthv
+ * set the health value of the NI. -1 resets the value to maximum.
+ *
+ * value: health value to set.
+ * all: true to set all local NIs to that value.
+ * ni_nid: NI NID to set its health value. all parameter always takes
+ * precedence
+ * seq_no - sequence number of the request
+ * err_rc - [OUT] struct cYAML tree describing the error. Freed by
+ * caller
+ */
+int lustre_lnet_config_ni_healthv(int value, bool all, char *ni_nid,
+ int seq_no, struct cYAML **err_rc);
+
+/*
+ * lustre_lnet_config_peer_ni_healthv
+ * set the health value of the peer NI. -1 resets the value to maximum.
+ *
+ * value: health value to set.
+ * all: true to set all local NIs to that value.
+ * pni_nid: Peer NI NID to set its health value. all parameter always takes
+ * precedence
+ * seq_no - sequence number of the request
+ * err_rc - [OUT] struct cYAML tree describing the error. Freed by
+ * caller
+ */
+int lustre_lnet_config_peer_ni_healthv(int value, bool all, char *pni_nid,
+ int seq_no, struct cYAML **err_rc);
+
+/*
* lustre_lnet_config_hsensitivity
* sets the health sensitivity; the value by which to decrement the
* health value of a local or peer NI. If 0 then health is turned off
static int jt_stats(int argc, char **argv);
static int jt_global(int argc, char **argv);
static int jt_peers(int argc, char **argv);
-
+static int jt_set_ni_value(int argc, char **argv);
+static int jt_set_peer_ni_value(int argc, char **argv);
command_t cmd_list[] = {
{"lnet", jt_lnet, 0, "lnet {configure | unconfigure} [--all]"},
"\t--net: net name (e.g. tcp0) to filter on\n"
"\t--verbose: display detailed output per network."
" Optional argument of '2' outputs more stats\n"},
+ {"set", jt_set_ni_value, 0, "set local NI specific parameter\n"
+ "\t--nid: NI NID to set the\n"
+ "\t--health: specify health value to set\n"
+ "\t--all: set all NIs value to the one specified\n"},
{ 0, 0, 0, NULL }
};
"\t--verbose: display detailed output per peer."
" Optional argument of '2' outputs more stats\n"},
{"list", jt_list_peer, 0, "list all peers\n"},
+ {"set", jt_set_peer_ni_value, 0, "set peer ni specific parameter\n"
+ "\t--nid: Peer NI NID to set the\n"
+ "\t--health: specify health value to set\n"
+ "\t--all: set all peer_nis values to the one specified\n"},
{ 0, 0, 0, NULL }
};
return rc;
}
+static int set_value_helper(int argc, char **argv,
+ int (*cb)(int, bool, char*, int, struct cYAML**))
+{
+ char *nid = NULL;
+ long int healthv = -1;
+ bool all = false;
+ int rc, opt;
+ struct cYAML *err_rc = NULL;
+
+ const char *const short_options = "h:n:a";
+ static const struct option long_options[] = {
+ { .name = "nid", .has_arg = required_argument, .val = 'n' },
+ { .name = "health", .has_arg = required_argument, .val = 'h' },
+ { .name = "all", .has_arg = no_argument, .val = 'a' },
+ { .name = NULL } };
+
+ rc = check_cmd(net_cmds, "net", "set", 0, argc, argv);
+ if (rc)
+ return rc;
+
+ while ((opt = getopt_long(argc, argv, short_options,
+ long_options, NULL)) != -1) {
+ switch (opt) {
+ case 'n':
+ nid = optarg;
+ break;
+ case 'h':
+ if (parse_long(argv[optind++], &healthv) != 0)
+ healthv = -1;
+ break;
+ case 'a':
+ all = true;
+ default:
+ return 0;
+ }
+ }
+
+ rc = cb(healthv, all, nid, -1, &err_rc);
+
+ if (rc != LUSTRE_CFG_RC_NO_ERR)
+ cYAML_print_tree2file(stderr, err_rc);
+
+ cYAML_free_tree(err_rc);
+
+ return rc;
+}
+
+static int jt_set_ni_value(int argc, char **argv)
+{
+ return set_value_helper(argc, argv, lustre_lnet_config_ni_healthv);
+}
+
+static int jt_set_peer_ni_value(int argc, char **argv)
+{
+ return set_value_helper(argc, argv, lustre_lnet_config_peer_ni_healthv);
+}
+
static int jt_show_net(int argc, char **argv)
{
char *network = NULL;