From c0ad398fd71610c42b7ed06f8d2ca722daa01391 Mon Sep 17 00:00:00 2001 From: Amir Shehata Date: Mon, 23 Jul 2018 17:11:07 -0700 Subject: [PATCH] LU-9120 lnet: set health value from user space Add commands to lnetctl to set the health value. for local NIs: lnetctl net set --nid --health for peer NIs: lnetctl peer set --nid --health Test-Parameters: forbuildonly Signed-off-by: Amir Shehata Change-Id: I06e1238df54c94bcfecadd84fbaa30cc1ce4dd68 Reviewed-on: https://review.whamcloud.com/32863 Tested-by: Jenkins Reviewed-by: Sonia Sharma Reviewed-by: Olaf Weber --- lnet/lnet/api-ni.c | 3 ++ lnet/lnet/peer.c | 4 +++ lnet/utils/lnetconfig/liblnetconfig.c | 55 ++++++++++++++++++++++++++++ lnet/utils/lnetconfig/liblnetconfig.h | 30 ++++++++++++++++ lnet/utils/lnetctl.c | 68 ++++++++++++++++++++++++++++++++++- 5 files changed, 159 insertions(+), 1 deletion(-) diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index a0fe3a5..e0b2a22 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -3598,6 +3598,9 @@ LNetCtl(unsigned int cmd, void *arg) value = LNET_MAX_HEALTH_VALUE; else value = cfg->rh_value; + CDEBUG(D_NET, "Manually setting healthv to %d for %s:%s. all = %d\n", + value, (cfg->rh_type == LNET_HEALTH_TYPE_LOCAL_NI) ? + "local" : "peer", libcfs_nid2str(cfg->rh_nid), cfg->rh_all); mutex_lock(&the_lnet.ln_api_mutex); if (cfg->rh_type == LNET_HEALTH_TYPE_LOCAL_NI) lnet_ni_set_healthv(cfg->rh_nid, value, diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 0159101..6d32f19 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -3495,6 +3495,10 @@ lnet_peer_ni_set_healthv(lnet_nid_t nid, int value, bool all) if (!all) { lnet_net_lock(LNET_LOCK_EX); lpni = lnet_find_peer_ni_locked(nid); + if (!lpni) { + lnet_net_unlock(LNET_LOCK_EX); + return; + } atomic_set(&lpni->lpni_healthv, value); lnet_peer_ni_add_to_recoveryq_locked(lpni); lnet_peer_ni_decref_locked(lpni); diff --git a/lnet/utils/lnetconfig/liblnetconfig.c b/lnet/utils/lnetconfig/liblnetconfig.c index c4a3cdc..e481cf7 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.c +++ b/lnet/utils/lnetconfig/liblnetconfig.c @@ -2028,6 +2028,61 @@ out: return rc; } +static int +lustre_lnet_config_healthv(int value, bool all, lnet_nid_t nid, + enum lnet_health_type type, char *name, + int seq_no, struct cYAML **err_rc) +{ + struct lnet_ioctl_reset_health_cfg data; + int rc = LUSTRE_CFG_RC_NO_ERR; + char err_str[LNET_MAX_STR_LEN]; + + snprintf(err_str, sizeof(err_str), "\"success\""); + + LIBCFS_IOC_INIT_V2(data, rh_hdr); + data.rh_type = type; + data.rh_all = all; + data.rh_value = value; + data.rh_nid = nid; + + rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_SET_HEALHV, &data); + if (rc != 0) { + rc = -errno; + snprintf(err_str, + sizeof(err_str), "Can not configure health value"); + } + + cYAML_build_error(rc, seq_no, ADD_CMD, name, err_str, err_rc); + + return rc; +} + +int lustre_lnet_config_ni_healthv(int value, bool all, char *ni_nid, int seq_no, + struct cYAML **err_rc) +{ + lnet_nid_t nid; + if (ni_nid) + nid = libcfs_str2nid(ni_nid); + else + nid = LNET_NID_ANY; + return lustre_lnet_config_healthv(value, all, nid, + LNET_HEALTH_TYPE_LOCAL_NI, + "ni healthv", seq_no, err_rc); +} + +int lustre_lnet_config_peer_ni_healthv(int value, bool all, char *lpni_nid, + int seq_no, struct cYAML **err_rc) +{ + lnet_nid_t nid; + if (lpni_nid) + nid = libcfs_str2nid(lpni_nid); + else + nid = LNET_NID_ANY; + return lustre_lnet_config_healthv(value, all, nid, + LNET_HEALTH_TYPE_PEER_NI, + "peer_ni healthv", seq_no, err_rc); +} + static bool add_msg_stats_to_yaml_blk(struct cYAML *yaml, struct lnet_ioctl_comm_count *counts) diff --git a/lnet/utils/lnetconfig/liblnetconfig.h b/lnet/utils/lnetconfig/liblnetconfig.h index 6d67302..8ec4069 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.h +++ b/lnet/utils/lnetconfig/liblnetconfig.h @@ -225,6 +225,36 @@ int lustre_lnet_show_numa_range(int seq_no, struct cYAML **show_rc, struct cYAML **err_rc); /* + * lustre_lnet_config_ni_healthv + * set the health value of the NI. -1 resets the value to maximum. + * + * value: health value to set. + * all: true to set all local NIs to that value. + * ni_nid: NI NID to set its health value. all parameter always takes + * precedence + * seq_no - sequence number of the request + * err_rc - [OUT] struct cYAML tree describing the error. Freed by + * caller + */ +int lustre_lnet_config_ni_healthv(int value, bool all, char *ni_nid, + int seq_no, struct cYAML **err_rc); + +/* + * lustre_lnet_config_peer_ni_healthv + * set the health value of the peer NI. -1 resets the value to maximum. + * + * value: health value to set. + * all: true to set all local NIs to that value. + * pni_nid: Peer NI NID to set its health value. all parameter always takes + * precedence + * seq_no - sequence number of the request + * err_rc - [OUT] struct cYAML tree describing the error. Freed by + * caller + */ +int lustre_lnet_config_peer_ni_healthv(int value, bool all, char *pni_nid, + int seq_no, struct cYAML **err_rc); + +/* * lustre_lnet_config_hsensitivity * sets the health sensitivity; the value by which to decrement the * health value of a local or peer NI. If 0 then health is turned off diff --git a/lnet/utils/lnetctl.c b/lnet/utils/lnetctl.c index 7633082..e6e4255 100644 --- a/lnet/utils/lnetctl.c +++ b/lnet/utils/lnetctl.c @@ -75,7 +75,8 @@ static int jt_set(int argc, char **argv); static int jt_stats(int argc, char **argv); static int jt_global(int argc, char **argv); static int jt_peers(int argc, char **argv); - +static int jt_set_ni_value(int argc, char **argv); +static int jt_set_peer_ni_value(int argc, char **argv); command_t cmd_list[] = { {"lnet", jt_lnet, 0, "lnet {configure | unconfigure} [--all]"}, @@ -141,6 +142,10 @@ command_t net_cmds[] = { "\t--net: net name (e.g. tcp0) to filter on\n" "\t--verbose: display detailed output per network." " Optional argument of '2' outputs more stats\n"}, + {"set", jt_set_ni_value, 0, "set local NI specific parameter\n" + "\t--nid: NI NID to set the\n" + "\t--health: specify health value to set\n" + "\t--all: set all NIs value to the one specified\n"}, { 0, 0, 0, NULL } }; @@ -206,6 +211,10 @@ command_t peer_cmds[] = { "\t--verbose: display detailed output per peer." " Optional argument of '2' outputs more stats\n"}, {"list", jt_list_peer, 0, "list all peers\n"}, + {"set", jt_set_peer_ni_value, 0, "set peer ni specific parameter\n" + "\t--nid: Peer NI NID to set the\n" + "\t--health: specify health value to set\n" + "\t--all: set all peer_nis values to the one specified\n"}, { 0, 0, 0, NULL } }; @@ -953,6 +962,63 @@ static int jt_show_route(int argc, char **argv) return rc; } +static int set_value_helper(int argc, char **argv, + int (*cb)(int, bool, char*, int, struct cYAML**)) +{ + char *nid = NULL; + long int healthv = -1; + bool all = false; + int rc, opt; + struct cYAML *err_rc = NULL; + + const char *const short_options = "h:n:a"; + static const struct option long_options[] = { + { .name = "nid", .has_arg = required_argument, .val = 'n' }, + { .name = "health", .has_arg = required_argument, .val = 'h' }, + { .name = "all", .has_arg = no_argument, .val = 'a' }, + { .name = NULL } }; + + rc = check_cmd(net_cmds, "net", "set", 0, argc, argv); + if (rc) + return rc; + + while ((opt = getopt_long(argc, argv, short_options, + long_options, NULL)) != -1) { + switch (opt) { + case 'n': + nid = optarg; + break; + case 'h': + if (parse_long(argv[optind++], &healthv) != 0) + healthv = -1; + break; + case 'a': + all = true; + default: + return 0; + } + } + + rc = cb(healthv, all, nid, -1, &err_rc); + + if (rc != LUSTRE_CFG_RC_NO_ERR) + cYAML_print_tree2file(stderr, err_rc); + + cYAML_free_tree(err_rc); + + return rc; +} + +static int jt_set_ni_value(int argc, char **argv) +{ + return set_value_helper(argc, argv, lustre_lnet_config_ni_healthv); +} + +static int jt_set_peer_ni_value(int argc, char **argv) +{ + return set_value_helper(argc, argv, lustre_lnet_config_peer_ni_healthv); +} + static int jt_show_net(int argc, char **argv) { char *network = NULL; -- 1.8.3.1