From 8a0fdfa0b281b20b6acdd25ceb4c3fb5a9efacb3 Mon Sep 17 00:00:00 2001 From: James Simmons Date: Mon, 7 Aug 2023 09:38:51 -0400 Subject: [PATCH] LU-10391 lnet: migrate peer NI control to Netlink Move peer creation and deletion to the Netlink API. This change enables the creation of peers with large NID addresses. Test-Parameters: trivial testlist=sanity-lnet Change-Id: I7f2f75e73e3f39856751f65e240f2172f703d0bc Signed-off-by: James Simmons Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49574 Reviewed-by: Frank Sehr Reviewed-by: Chris Horn Reviewed-by: Cyril Bordage Reviewed-by: Oleg Drokin Tested-by: jenkins Tested-by: Maloo --- lnet/include/lnet/lib-lnet.h | 2 +- lnet/lnet/api-ni.c | 273 ++++++++++++++++++++++- lnet/lnet/nidstrings.c | 4 + lnet/lnet/peer.c | 6 +- lnet/utils/lnetconfig/liblnetconfig.c | 289 +++++++++++++++++++++++- lnet/utils/lnetconfig/liblnetconfig.h | 10 + lnet/utils/lnetconfig/liblnetconfig_netlink.c | 12 +- lnet/utils/lnetctl.c | 302 ++++++++++++++++++++++++-- lustre/tests/sanity-lnet.sh | 3 + 9 files changed, 876 insertions(+), 25 deletions(-) diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index b95ac1d..d8fe4ff 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -651,7 +651,7 @@ int lnet_get_net_healthv_locked(struct lnet_net *net); extern int lnet_get_peer_list(__u32 *countp, __u32 *sizep, struct lnet_process_id __user *ids); -extern void lnet_peer_ni_set_healthv(lnet_nid_t nid, int value, bool all); +void lnet_peer_ni_set_healthv(struct lnet_nid *nid, int value, bool all); extern void lnet_peer_ni_add_to_recoveryq_locked(struct lnet_peer_ni *lpni, struct list_head *queue, time64_t now); diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index daf9cbf..267f4cc 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -4426,6 +4426,7 @@ LNetCtl(unsigned int cmd, void *arg) case IOC_LIBCFS_SET_HEALHV: { struct lnet_ioctl_reset_health_cfg *cfg = arg; int value; + if (cfg->rh_hdr.ioc_len < sizeof(*cfg)) return -EINVAL; if (cfg->rh_value < 0 || @@ -4436,13 +4437,13 @@ LNetCtl(unsigned int cmd, void *arg) CDEBUG(D_NET, "Manually setting healthv to %d for %s:%s. all = %d\n", value, (cfg->rh_type == LNET_HEALTH_TYPE_LOCAL_NI) ? "local" : "peer", libcfs_nid2str(cfg->rh_nid), cfg->rh_all); + lnet_nid4_to_nid(cfg->rh_nid, &nid); mutex_lock(&the_lnet.ln_api_mutex); if (cfg->rh_type == LNET_HEALTH_TYPE_LOCAL_NI) lnet_ni_set_healthv(cfg->rh_nid, value, cfg->rh_all); else - lnet_peer_ni_set_healthv(cfg->rh_nid, value, - cfg->rh_all); + lnet_peer_ni_set_healthv(&nid, value, cfg->rh_all); mutex_unlock(&the_lnet.ln_api_mutex); return 0; } @@ -5913,6 +5914,272 @@ out: return rc; } +/* Called with ln_api_mutex */ +static int lnet_parse_peer_nis(struct nlattr *rlist, struct lnet_nid *pnid, + bool mr, struct genl_info *info) +{ + struct lnet_nid snid = LNET_ANY_NID; + struct nlattr *props; + bool all = false; + int rem, rc = 0; + s64 num = -1; + + nla_for_each_nested(props, rlist, rem) { + if (nla_type(props) != LN_SCALAR_ATTR_VALUE) + continue; + + if (nla_strcmp(props, "nid") == 0) { + char nidstr[LNET_NIDSTR_SIZE]; + + props = nla_next(props, &rem); + if (nla_type(props) != LN_SCALAR_ATTR_VALUE) { + GENL_SET_ERR_MSG(info, + "invalid secondary NID"); + GOTO(report_err, rc = -EINVAL); + } + + rc = nla_strscpy(nidstr, props, sizeof(nidstr)); + if (rc < 0) { + GENL_SET_ERR_MSG(info, + "failed to get secondary NID"); + GOTO(report_err, rc); + } + + rc = libcfs_strnid(&snid, strim(nidstr)); + if (rc < 0) { + GENL_SET_ERR_MSG(info, "unsupported secondary NID"); + GOTO(report_err, rc); + } + + if (LNET_NID_IS_ANY(&snid)) + all = true; + } else if (nla_strcmp(props, "health stats") == 0) { + struct nlattr *health; + int rem2; + + props = nla_next(props, &rem); + if (nla_type(props) != + LN_SCALAR_ATTR_LIST) { + GENL_SET_ERR_MSG(info, + "invalid health configuration"); + GOTO(report_err, rc = -EINVAL); + } + + nla_for_each_nested(health, props, rem2) { + if (nla_type(health) != LN_SCALAR_ATTR_VALUE || + nla_strcmp(health, "health value") != 0) { + GENL_SET_ERR_MSG(info, + "wrong health config format"); + GOTO(report_err, rc = -EINVAL); + } + + health = nla_next(health, &rem2); + if (nla_type(health) != + LN_SCALAR_ATTR_INT_VALUE) { + GENL_SET_ERR_MSG(info, + "invalid health config format"); + GOTO(report_err, rc = -EINVAL); + } + + num = nla_get_s64(health); + clamp_t(s64, num, 0, LNET_MAX_HEALTH_VALUE); + } + } + } + + if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE && num != -1) { + lnet_peer_ni_set_healthv(pnid, num, all); + } else if (info->nlhdr->nlmsg_flags & NLM_F_CREATE) { + bool lock_prim = info->nlhdr->nlmsg_flags & NLM_F_EXCL; + + rc = lnet_user_add_peer_ni(pnid, &snid, mr, lock_prim); + if (rc < 0) + GENL_SET_ERR_MSG(info, + "failed to add peer"); + } else if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE)) { + bool force = info->nlhdr->nlmsg_flags & NLM_F_EXCL; + + rc = lnet_del_peer_ni(pnid, &snid, force); + if (rc < 0) + GENL_SET_ERR_MSG(info, + "failed to del peer"); + } +report_err: + return rc; +} + +static int lnet_peer_ni_cmd(struct sk_buff *skb, struct genl_info *info) +{ + struct nlmsghdr *nlh = nlmsg_hdr(skb); + struct genlmsghdr *gnlh = nlmsg_data(nlh); + struct nlattr *params = genlmsg_data(gnlh); + int msg_len, rem, rc = 0; + struct nlattr *attr; + + mutex_lock(&the_lnet.ln_api_mutex); + if (the_lnet.ln_state != LNET_STATE_RUNNING) { + GENL_SET_ERR_MSG(info, "Network is down"); + mutex_unlock(&the_lnet.ln_api_mutex); + return -ENETDOWN; + } + + msg_len = genlmsg_len(gnlh); + if (!msg_len) { + GENL_SET_ERR_MSG(info, "no configuration"); + mutex_unlock(&the_lnet.ln_api_mutex); + return -ENOMSG; + } + + if (!(nla_type(params) & LN_SCALAR_ATTR_LIST)) { + GENL_SET_ERR_MSG(info, "invalid configuration"); + mutex_unlock(&the_lnet.ln_api_mutex); + return -EINVAL; + } + + nla_for_each_nested(attr, params, rem) { + struct lnet_nid pnid = LNET_ANY_NID; + bool parse_peer_nis = false; + struct nlattr *pnid_prop; + int rem2; + + if (nla_type(attr) != LN_SCALAR_ATTR_LIST) + continue; + + nla_for_each_nested(pnid_prop, attr, rem2) { + bool mr = true; + + if (nla_type(pnid_prop) != LN_SCALAR_ATTR_VALUE) + continue; + + if (nla_strcmp(pnid_prop, "primary nid") == 0) { + char nidstr[LNET_NIDSTR_SIZE]; + + pnid_prop = nla_next(pnid_prop, &rem2); + if (nla_type(pnid_prop) != + LN_SCALAR_ATTR_VALUE) { + GENL_SET_ERR_MSG(info, + "invalid primary NID type"); + GOTO(report_err, rc = -EINVAL); + } + + rc = nla_strscpy(nidstr, pnid_prop, + sizeof(nidstr)); + if (rc < 0) { + GENL_SET_ERR_MSG(info, + "failed to get primary NID"); + GOTO(report_err, rc); + } + + rc = libcfs_strnid(&pnid, strim(nidstr)); + if (rc < 0) { + GENL_SET_ERR_MSG(info, + "unsupported primary NID"); + GOTO(report_err, rc); + } + + /* we must create primary NID for peer ni + * creation + */ + if (info->nlhdr->nlmsg_flags & NLM_F_CREATE) { + bool lock_prim; + + lock_prim = info->nlhdr->nlmsg_flags & NLM_F_EXCL; + rc = lnet_user_add_peer_ni(&pnid, + &LNET_ANY_NID, + true, lock_prim); + if (rc < 0) { + GENL_SET_ERR_MSG(info, + "failed to add primary peer"); + GOTO(report_err, rc); + } + } + } else if (nla_strcmp(pnid_prop, "Multi-Rail") == 0) { + pnid_prop = nla_next(pnid_prop, &rem2); + if (nla_type(pnid_prop) != + LN_SCALAR_ATTR_INT_VALUE) { + GENL_SET_ERR_MSG(info, + "invalid MR flag param"); + GOTO(report_err, rc = -EINVAL); + } + + if (nla_get_s64(pnid_prop) == 0) + mr = false; + } else if (nla_strcmp(pnid_prop, "peer state") == 0) { + struct lnet_peer_ni *lpni; + struct lnet_peer *lp; + + pnid_prop = nla_next(pnid_prop, &rem2); + if (nla_type(pnid_prop) != + LN_SCALAR_ATTR_INT_VALUE) { + GENL_SET_ERR_MSG(info, + "invalid peer state param"); + GOTO(report_err, rc = -EINVAL); + } + + lpni = lnet_peer_ni_find_locked(&pnid); + if (!lpni) { + GENL_SET_ERR_MSG(info, + "invalid peer state param"); + GOTO(report_err, rc = -ENOENT); + } + lnet_peer_ni_decref_locked(lpni); + lp = lpni->lpni_peer_net->lpn_peer; + lp->lp_state = nla_get_s64(pnid_prop); + } else if (nla_strcmp(pnid_prop, "peer ni") == 0) { + struct nlattr *rlist; + int rem3; + + if (LNET_NID_IS_ANY(&pnid)) { + GENL_SET_ERR_MSG(info, + "missing required primary NID"); + GOTO(report_err, rc); + } + + pnid_prop = nla_next(pnid_prop, &rem2); + if (nla_type(pnid_prop) != + LN_SCALAR_ATTR_LIST) { + GENL_SET_ERR_MSG(info, + "invalid NIDs list"); + GOTO(report_err, rc = -EINVAL); + } + + nla_for_each_nested(rlist, pnid_prop, rem3) { + rc = lnet_parse_peer_nis(rlist, &pnid, + mr, info); + if (rc < 0) + GOTO(report_err, rc); + } + parse_peer_nis = true; + } + } + + /* If we have remote peer ni's we already add /del peers */ + if (parse_peer_nis) + continue; + + if (LNET_NID_IS_ANY(&pnid)) { + GENL_SET_ERR_MSG(info, "missing primary NID"); + GOTO(report_err, rc); + } + + if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE)) { + bool force = info->nlhdr->nlmsg_flags & NLM_F_EXCL; + + rc = lnet_del_peer_ni(&pnid, &LNET_ANY_NID, + force); + if (rc < 0) { + GENL_SET_ERR_MSG(info, + "failed to del peer"); + GOTO(report_err, rc); + } + } + } +report_err: + mutex_unlock(&the_lnet.ln_api_mutex); + + return rc; +} + /** LNet route handling */ /* We can't use struct lnet_ioctl_config_data since it lacks @@ -7307,6 +7574,7 @@ static const struct genl_ops lnet_genl_ops[] = { }, { .cmd = LNET_CMD_PEERS, + .flags = GENL_ADMIN_PERM, #ifdef HAVE_NETLINK_CALLBACK_START .start = lnet_peer_ni_show_start, .dumpit = lnet_peer_ni_show_dump, @@ -7314,6 +7582,7 @@ static const struct genl_ops lnet_genl_ops[] = { .dumpit = lnet_old_peer_ni_show_dump, #endif .done = lnet_peer_ni_show_done, + .doit = lnet_peer_ni_cmd, }, { .cmd = LNET_CMD_ROUTES, diff --git a/lnet/lnet/nidstrings.c b/lnet/lnet/nidstrings.c index 3c822a4..02fb117 100644 --- a/lnet/lnet/nidstrings.c +++ b/lnet/lnet/nidstrings.c @@ -1170,6 +1170,10 @@ libcfs_strnid(struct lnet_nid *nid, const char *str) if (nf == NULL) return -EINVAL; } else { + if (strcmp(str, "") == 0) { + memcpy(nid, &LNET_ANY_NID, sizeof(*nid)); + return 0; + } sep = str + strlen(str); net = LNET_MKNET(SOCKLND, 0); nf = libcfs_lnd2netstrfns(SOCKLND); diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 788ec10..236f278 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -4487,13 +4487,12 @@ lnet_peer_ni_add_to_recoveryq_locked(struct lnet_peer_ni *lpni, /* Call with the ln_api_mutex held */ void -lnet_peer_ni_set_healthv(lnet_nid_t nid4, int value, bool all) +lnet_peer_ni_set_healthv(struct lnet_nid *nid, int value, bool all) { struct lnet_peer_table *ptable; struct lnet_peer *lp; struct lnet_peer_net *lpn; struct lnet_peer_ni *lpni; - struct lnet_nid nid; int lncpt; int cpt; time64_t now; @@ -4501,12 +4500,11 @@ lnet_peer_ni_set_healthv(lnet_nid_t nid4, int value, bool all) if (the_lnet.ln_state != LNET_STATE_RUNNING) return; - lnet_nid4_to_nid(nid4, &nid); now = ktime_get_seconds(); if (!all) { lnet_net_lock(LNET_LOCK_EX); - lpni = lnet_peer_ni_find_locked(&nid); + lpni = lnet_peer_ni_find_locked(nid); if (!lpni) { lnet_net_unlock(LNET_LOCK_EX); return; diff --git a/lnet/utils/lnetconfig/liblnetconfig.c b/lnet/utils/lnetconfig/liblnetconfig.c index 7ed9c05..24215be 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.c +++ b/lnet/utils/lnetconfig/liblnetconfig.c @@ -32,7 +32,7 @@ * 2. APIs that take a YAML file and parses out the information there and * calls the APIs mentioned in 1 */ - +#include #include #include #include @@ -1497,6 +1497,293 @@ static int lustre_lnet_resolve_ip2nets_rule(struct lustre_lnet_ip2nets *ip2nets, return rc; } +void lustre_lnet_free_list(struct nid_node *head) +{ + struct nid_node *entry, *tmp; + + nl_list_for_each_entry_safe(entry, tmp, &head->children, list) { + nl_list_del(&entry->list); + free(entry); + } +} + +static int unroll_nid_range_scan(struct nid_node *list, const char *nid, + char *tmp, char *tmp2, char *nidstr, + const char **errmsg) +{ + unsigned int first = 0, last = 0, off = 0, inc = 1; + bool slash = false, hyphen = false; + char num[INT_STRING_LEN] = ""; + char *range = tmp + 1; + int base = 10; + int rc; + + if (!tmp && !tmp2) { + *errmsg = "Unable to parse nidlist: [] are missing"; + return -ERANGE; + } + + if ((tmp && !tmp2) || (!tmp && tmp2)) { + *errmsg = "Unable to parse nidlist: incomplete bracket set"; + return -EINVAL; + } + + if (range > tmp2) { + *errmsg = "Unable to parse nidlist: improper bracket ordering"; + return -EINVAL; + } + + if (strchr(nid, ':')) + base = 16; + + while (range < tmp2) { + if (isxdigit(*range)) { + if (off > INT_STRING_LEN) + return -E2BIG; + num[off++] = *range++; + } else if (*range == ':') { + /* skip ':' for IPv6 and IB GUID */ + range++; + } else if (*range == '-') { + range++; + if (!isxdigit(*range)) { + *errmsg = "Unable to parse nidlist: range needs number after -"; + return -ERANGE; + } + + first = strtoul(num, NULL, base); + memset(num, 0, sizeof(num)); + hyphen = true; + off = 0; + } else if (*range == '/') { + range++; + if (!isdigit(*range)) { + *errmsg = "Unable to parse nidlist: range needs number after /"; + return -ERANGE; + } + + /* Don't lose last number. This should be the very last item */ + if (strlen(num)) { + last = strtoul(num, NULL, base); + memset(num, 0, sizeof(num)); + slash = true; + off = 0; + } + } else if (*range == ',') { + char *end = strchr(nidstr, ','); + int count = strlen(tmp2 + 1); + struct nid_node *item; + int len = tmp - nid; + + range++; + if (!isxdigit(*range)) { + *errmsg = "Unable to parse nidlist: range needs number after ,"; + return -ERANGE; + } + + /* If hyphen is true then we have the foramt '[first - last],' format. + * The other format is just x, y, z, ... format. + */ + if (hyphen) { + if (slash) + inc = strtoul(num, NULL, base); + else + last = strtoul(num, NULL, base); + + if (first > last) { + *errmsg = "Unable to parse nidlist: range is wrong order"; + return -ERANGE; + } + + while (last >= first) { + char hdr[LNET_MAX_STR_LEN], *next; + + snprintf(hdr, sizeof(hdr), "%.*s%d%.*s", + len, nid, first, count, + tmp2 + 1); + next = strchr(hdr, '['); + if (next && strchr(next, ']')) { + rc = unroll_nid_range_scan(list, hdr, next, + strchr(next, ']'), + nidstr, errmsg); + if (rc < 0) + return rc; + } else { + item = calloc(1, sizeof(struct nid_node)); + if (!item) { + *errmsg = "Unable to parse nidlist: allocation failed"; + return -ENOMEM; + } + snprintf(item->nidstr, sizeof(item->nidstr), + "%s@%s", hdr, nidstr); + nl_init_list_head(&item->list); + nl_list_add_tail(&item->list, &list->children); + } + first += inc; + } + /* reset slash / hyphen handing */ + hyphen = false; + slash = false; + inc = 1; + } else { + int end_len = end ? end - nidstr : strlen(nidstr); + + item = calloc(1, sizeof(struct nid_node)); + if (!item) { + *errmsg = "Unable to parse nidlist: allocation failed"; + return -ENOMEM; + } + snprintf(item->nidstr, sizeof(item->nidstr), + "%.*s%s%.*s@%.*s", len, nid, num, + count, tmp2 + 1, end_len, nidstr); + nl_init_list_head(&item->list); + nl_list_add_tail(&item->list, &list->children); + } + + memset(num, 0, sizeof(num)); + off = 0; + } else { + *errmsg = "Unable to parse nidlist: invalid character in range"; + return -EINVAL; + } + } + + if (strlen(num)) { + if (slash) + inc = strtoul(num, NULL, base); + else + last = strtoul(num, NULL, base); + memset(num, 0, sizeof(num)); + } + + if (!hyphen) + first = last; + + /* This is the last set of [ first - last ]. We reach this point because + * the above loop reached the end of the range and no ',' was found so + * the final set of number range wasn't processed. Do that handling here. + */ + if (first || last) { + char *next = tmp2 + 1; + int len = strlen(nid); + + if (first > last) { + *errmsg = "Unable to parse nidlist: range is wrong order"; + return -ERANGE; + } + + if (strchr(nid, '[')) + len = strchr(nid, '[') - nid; + + tmp = strchr(tmp2, '['); + if (tmp) + tmp2 = strchr(tmp, ']'); + + while (last >= first) { + char str[LNET_MAX_STR_LEN]; + + snprintf(str, sizeof(str), "%.*s%u%.*s", + len, nid, first, (int)(tmp - next), next); + if (tmp && tmp2) { + rc = unroll_nid_range_scan(list, str, tmp, tmp2, + nidstr, errmsg); + if (rc < 0) + return rc; + } else { + char *end = strchr(nidstr, ','); + int count = end ? end - nidstr : strlen(nidstr); + struct nid_node *item; + + item = calloc(1, sizeof(struct nid_node)); + if (!item) { + *errmsg = "Unable to parse nidlist: allocation failed"; + return -ENOMEM; + } + snprintf(item->nidstr, sizeof(item->nidstr), + "%s@%.*s", str, count, nidstr); + nl_init_list_head(&item->list); + nl_list_add_tail(&item->list, &list->children); + } + first += inc; + } + } + + return 0; +} + +int lustre_lnet_parse_nid_range(struct nid_node *head, char *nidstr, + const char **errmsg) +{ + int rc = 0; + char *nid; + + if (!nidstr) { + *errmsg = "supplied nidstr is NULL"; + return -EINVAL; + } + + if (strchr(nidstr, '*')) { + *errmsg = "asterisk not allowed in nidstring"; + return -EINVAL; + } + + if (strstr(nidstr, "")) { + *errmsg = "Unable to parse nidlist: LNET_ANY_NID is unsupported"; + return -EINVAL; + } + + if (strchr(nidstr, '@') == NULL) { + *errmsg = "Unable to parse nidlist: no valid NIDs in string"; + return -EINVAL; + } + + while ((nid = strsep(&nidstr, "@")) != NULL) { + char *tmp = NULL, *tmp2 = NULL, *end = NULL; + + if (!nid) { + *errmsg = "Unable to parse nidlist: no proper NID string"; + return -ENOENT; + } + + tmp = strchr(nid, '['); + tmp2 = strchr(nid, ']'); + + if (!tmp && !tmp2) { + char *end = strchr(nidstr, ','); + struct nid_node *item; + int count; + + item = calloc(1, sizeof(struct nid_node)); + if (!item) { + *errmsg = "Unable to parse nidlist: allocation failed"; + rc = -ENOMEM; + goto err; + } + + count = end ? end - nidstr : strlen(nidstr); + snprintf(item->nidstr, sizeof(item->nidstr), + "%s@%.*s", nid, count, nidstr); + nl_init_list_head(&item->list); + nl_list_add_tail(&item->list, &head->children); + } else { + rc = unroll_nid_range_scan(head, nid, tmp, tmp2, + nidstr, errmsg); + if (rc < 0) + goto err; + } + + if (nidstr) { + end = strchr(nidstr, ','); + if (end) + nidstr = end + 1; + else + nidstr = NULL; + } + } +err: + return rc; +} + static int lustre_lnet_ioctl_config_ni(struct list_head *intf_list, struct lnet_ioctl_config_lnd_tunables *tunables, diff --git a/lnet/utils/lnetconfig/liblnetconfig.h b/lnet/utils/lnetconfig/liblnetconfig.h index d7c8acc..d33b92b 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.h +++ b/lnet/utils/lnetconfig/liblnetconfig.h @@ -788,6 +788,16 @@ int lustre_yaml_show(char *f, struct cYAML **show_rc, int lustre_yaml_exec(char *f, struct cYAML **show_rc, struct cYAML **err_rc); +struct nid_node { + char nidstr[LNET_MAX_STR_LEN * 2]; + struct nl_list_head children; + struct nl_list_head list; +}; + +int lustre_lnet_parse_nid_range(struct nid_node *head, char *nidstr, + const char **errmsg); +void lustre_lnet_free_list(struct nid_node *head); + /** * yaml_emitter_set_output_netlink * diff --git a/lnet/utils/lnetconfig/liblnetconfig_netlink.c b/lnet/utils/lnetconfig/liblnetconfig_netlink.c index 1fb25da..dc7aed4 100644 --- a/lnet/utils/lnetconfig/liblnetconfig_netlink.c +++ b/lnet/utils/lnetconfig/liblnetconfig_netlink.c @@ -1291,7 +1291,17 @@ static int yaml_fill_scalar_data(struct nl_msg *msg, if (!strlen(sep)) goto nla_put_failure; - if (strspn(sep, "-0123456789") == strlen(sep)) { + if (strcasecmp(sep, "yes") == 0 || + strcasecmp(sep, "true") == 0 || + strcasecmp(sep, "on") == 0 || + strcasecmp(sep, "y") == 0) { + NLA_PUT_S64(msg, LN_SCALAR_ATTR_INT_VALUE, 1); + } else if (strcasecmp(sep, "no") == 0 || + strcasecmp(sep, "false") == 0 || + strcasecmp(sep, "off") == 0 || + strcasecmp(sep, "n") == 0) { + NLA_PUT_S64(msg, LN_SCALAR_ATTR_INT_VALUE, 0); + } else if (strspn(sep, "-0123456789") == strlen(sep)) { num = strtoll(sep, NULL, 0); NLA_PUT_S64(msg, LN_SCALAR_ATTR_INT_VALUE, num); } else { diff --git a/lnet/utils/lnetctl.c b/lnet/utils/lnetctl.c index e96bd35..8ed5755 100644 --- a/lnet/utils/lnetctl.c +++ b/lnet/utils/lnetctl.c @@ -2442,7 +2442,7 @@ old_api: } static int set_value_helper(int argc, char **argv, - int (*cb)(int, bool, char*, int, struct cYAML**)) + int (*cb)(int, bool, char*, int, int, struct cYAML**)) { char *nid = NULL; long int healthv = -1; @@ -2450,21 +2450,17 @@ static int set_value_helper(int argc, char **argv, long int state = -1; int rc, opt; struct cYAML *err_rc = NULL; - const char *const short_options = "t:n:s:a"; static const struct option long_options[] = { { .name = "nid", .has_arg = required_argument, .val = 'n' }, { .name = "health", .has_arg = required_argument, .val = 't' }, { .name = "state", .has_arg = required_argument, .val = 's' }, { .name = "all", .has_arg = no_argument, .val = 'a' }, - { .name = NULL } }; - - rc = check_cmd(net_cmds, "net", "set", 0, argc, argv); - if (rc) - return rc; + { .name = NULL } + }; while ((opt = getopt_long(argc, argv, short_options, - long_options, NULL)) != -1) { + long_options, NULL)) != -1) { switch (opt) { case 'n': nid = optarg; @@ -2485,11 +2481,7 @@ static int set_value_helper(int argc, char **argv, } } - if (state > -1) - rc = lustre_lnet_set_peer_state(state, nid, -1, &err_rc); - else - rc = cb(healthv, all, nid, -1, &err_rc); - + rc = cb(healthv, all, nid, state, -1, &err_rc); if (rc != LUSTRE_CFG_RC_NO_ERR) cYAML_print_tree2file(stderr, err_rc); @@ -2765,6 +2757,241 @@ static int yaml_lnet_peer(char *prim_nid, char *nidstr, bool disable_mr, if (rc == 0) goto emitter_error; + if (disable_mr) { + yaml_scalar_event_initialize(&event, NULL, + (yaml_char_t *)YAML_STR_TAG, + (yaml_char_t *)"Multi-Rail", + strlen("Multi-Rail"), 1, 0, + YAML_PLAIN_SCALAR_STYLE); + rc = yaml_emitter_emit(&output, &event); + if (rc == 0) + goto emitter_error; + + yaml_scalar_event_initialize(&event, NULL, + (yaml_char_t *)YAML_BOOL_TAG, + (yaml_char_t *)"False", + strlen("False"), 1, 0, + YAML_PLAIN_SCALAR_STYLE); + rc = yaml_emitter_emit(&output, &event); + if (rc == 0) + goto emitter_error; + } + + if (state != -1) { + char peer_state[INT_STRING_LEN]; + + yaml_scalar_event_initialize(&event, NULL, + (yaml_char_t *)YAML_STR_TAG, + (yaml_char_t *)"peer state", + strlen("peer state"), 1, 0, + YAML_PLAIN_SCALAR_STYLE); + rc = yaml_emitter_emit(&output, &event); + if (rc == 0) + goto emitter_error; + + snprintf(peer_state, sizeof(peer_state), "%d", state); + yaml_scalar_event_initialize(&event, NULL, + (yaml_char_t *)YAML_INT_TAG, + (yaml_char_t *)peer_state, + strlen(peer_state), 1, 0, + YAML_PLAIN_SCALAR_STYLE); + rc = yaml_emitter_emit(&output, &event); + if (rc == 0) + goto emitter_error; + } + + if (!nidstr && health_value == -1) + goto skip_peer_nis; + + yaml_scalar_event_initialize(&event, NULL, + (yaml_char_t *)YAML_STR_TAG, + (yaml_char_t *)"peer ni", + strlen("peer ni"), 1, 0, + YAML_PLAIN_SCALAR_STYLE); + rc = yaml_emitter_emit(&output, &event); + if (rc == 0) + goto emitter_error; + + yaml_sequence_start_event_initialize(&event, NULL, + (yaml_char_t *)YAML_SEQ_TAG, + 1, YAML_BLOCK_SEQUENCE_STYLE); + rc = yaml_emitter_emit(&output, &event); + if (rc == 0) + goto emitter_error; + + if (nidstr) { + struct nid_node head, *entry; + int count = 0; + + /* If we have LNET_ANY_NID and its NLM_F_REPLACE we + * treat it as the all flag case for lnetctl peer set + */ + if (strcmp(nidstr, "") == 0) { + yaml_mapping_start_event_initialize(&event, NULL, + (yaml_char_t *)YAML_MAP_TAG, + 1, YAML_BLOCK_MAPPING_STYLE); + rc = yaml_emitter_emit(&output, &event); + if (rc == 0) + goto emitter_error; + + yaml_scalar_event_initialize(&event, NULL, + (yaml_char_t *)YAML_STR_TAG, + (yaml_char_t *)"nid", + strlen("nid"), 1, 0, + YAML_PLAIN_SCALAR_STYLE); + rc = yaml_emitter_emit(&output, &event); + if (rc == 0) + goto emitter_error; + + yaml_scalar_event_initialize(&event, NULL, + (yaml_char_t *)YAML_STR_TAG, + (yaml_char_t *)nidstr, + strlen(nidstr), 1, 0, + YAML_PLAIN_SCALAR_STYLE); + rc = yaml_emitter_emit(&output, &event); + if (rc == 0) + goto emitter_error; + + yaml_mapping_end_event_initialize(&event); + rc = yaml_emitter_emit(&output, &event); + if (rc == 0) + goto emitter_error; + + goto handle_health; + } + + NL_INIT_LIST_HEAD(&head.children); + nl_init_list_head(&head.list); + rc = lustre_lnet_parse_nid_range(&head, nidstr, &msg); + if (rc < 0) { + fprintf(stdout, "can't parse nidrange: \"%s\"\n", nidstr); + lustre_lnet_free_list(&head); + yaml_emitter_delete(&output); + errno = rc; + rc = 0; + goto free_reply; + } + + if (nl_list_empty(&head.children)) { + lustre_lnet_free_list(&head); + yaml_emitter_delete(&output); + msg = "Unable to parse nidlist: did not expand to any nids"; + errno = -ENOENT; + rc = 0; + goto free_reply; + } + rc = 1; /* one means its working */ + + nl_list_for_each_entry(entry, &head.children, list) { + char *nid = entry->nidstr; + + if (count++ > LNET_MAX_NIDS_PER_PEER) { + lustre_lnet_free_list(&head); + yaml_emitter_delete(&output); + msg = "Unable to parse nidlist: specifies more NIDs than allowed"; + errno = -E2BIG; + rc = 0; + goto free_reply; + } + + yaml_mapping_start_event_initialize(&event, NULL, + (yaml_char_t *)YAML_MAP_TAG, + 1, YAML_BLOCK_MAPPING_STYLE); + rc = yaml_emitter_emit(&output, &event); + if (rc == 0) + goto emitter_error; + + yaml_scalar_event_initialize(&event, NULL, + (yaml_char_t *)YAML_STR_TAG, + (yaml_char_t *)"nid", + strlen("nid"), 1, 0, + YAML_PLAIN_SCALAR_STYLE); + rc = yaml_emitter_emit(&output, &event); + if (rc == 0) + goto emitter_error; + + yaml_scalar_event_initialize(&event, NULL, + (yaml_char_t *)YAML_STR_TAG, + (yaml_char_t *)nid, + strlen(nid), 1, 0, + YAML_PLAIN_SCALAR_STYLE); + rc = yaml_emitter_emit(&output, &event); + if (rc == 0) + goto emitter_error; + + yaml_mapping_end_event_initialize(&event); + rc = yaml_emitter_emit(&output, &event); + if (rc == 0) + goto emitter_error; + } + lustre_lnet_free_list(&head); + } +handle_health: + if (health_value >= 0) { + char health[INT_STRING_LEN]; + + /* Create the mapping for 'health stats'. The value field for + * the mapping is not provided so its treated as a empty string. + */ + yaml_mapping_start_event_initialize(&event, NULL, + (yaml_char_t *)YAML_MAP_TAG, + 1, YAML_BLOCK_MAPPING_STYLE); + rc = yaml_emitter_emit(&output, &event); + if (rc == 0) + goto emitter_error; + + yaml_scalar_event_initialize(&event, NULL, + (yaml_char_t *)YAML_STR_TAG, + (yaml_char_t *)"health stats", + strlen("health stats"), 1, 0, + YAML_PLAIN_SCALAR_STYLE); + rc = yaml_emitter_emit(&output, &event); + if (rc == 0) + goto emitter_error; + + /* Setup all mappings for data related to the 'health stats' */ + yaml_mapping_start_event_initialize(&event, NULL, + (yaml_char_t *)YAML_MAP_TAG, + 1, YAML_BLOCK_MAPPING_STYLE); + rc = yaml_emitter_emit(&output, &event); + if (rc == 0) + goto emitter_error; + + yaml_scalar_event_initialize(&event, NULL, + (yaml_char_t *)YAML_STR_TAG, + (yaml_char_t *)"health value", + strlen("health value"), 1, 0, + YAML_PLAIN_SCALAR_STYLE); + rc = yaml_emitter_emit(&output, &event); + if (rc == 0) + goto emitter_error; + + snprintf(health, sizeof(health), "%d", health_value); + yaml_scalar_event_initialize(&event, NULL, + (yaml_char_t *)YAML_INT_TAG, + (yaml_char_t *)health, + strlen(health), 1, 0, + YAML_PLAIN_SCALAR_STYLE); + rc = yaml_emitter_emit(&output, &event); + if (rc == 0) + goto emitter_error; + + yaml_mapping_end_event_initialize(&event); + rc = yaml_emitter_emit(&output, &event); + if (rc == 0) + goto emitter_error; + + yaml_mapping_end_event_initialize(&event); + rc = yaml_emitter_emit(&output, &event); + if (rc == 0) + goto emitter_error; + } + + yaml_sequence_end_event_initialize(&event); + rc = yaml_emitter_emit(&output, &event); + if (rc == 0) + goto emitter_error; +skip_peer_nis: yaml_mapping_end_event_initialize(&event); rc = yaml_emitter_emit(&output, &event); if (rc == 0) @@ -2822,9 +3049,41 @@ free_reply: return rc == 1 ? 0 : rc; } +int yaml_lnet_config_peer_ni_healthv(int healthv, bool all, char *lpni_nid, + int state, int seq_no, struct cYAML **err_rc) +{ + int rc; + + rc = yaml_lnet_peer(lpni_nid ? lpni_nid : "", all ? "" : NULL, + false, healthv, state, false, LNET_GENL_VERSION, + NLM_F_REPLACE); + if (rc <= 0) { + if (rc == -EOPNOTSUPP) + goto old_api; + return rc; + } +old_api: + if (state == -1) + rc = lustre_lnet_config_peer_ni_healthv(healthv, all, lpni_nid, + seq_no, err_rc); + else + rc = lustre_lnet_set_peer_state(state, lpni_nid, -1, err_rc); + if (rc != LUSTRE_CFG_RC_NO_ERR) + cYAML_print_tree2file(stderr, *err_rc); + + cYAML_free_tree(*err_rc); + + return rc; +} + static int jt_set_peer_ni_value(int argc, char **argv) { - return set_value_helper(argc, argv, lustre_lnet_config_peer_ni_healthv); + int rc = check_cmd(peer_cmds, "peer", "set", 0, argc, argv); + + if (rc < 0) + return rc; + + return set_value_helper(argc, argv, yaml_lnet_config_peer_ni_healthv); } static int jt_show_recovery(int argc, char **argv) @@ -3519,13 +3778,13 @@ static int jt_export(int argc, char **argv) static int jt_peer_nid_common(int argc, char **argv, int cmd) { + int flags = cmd == LNETCTL_ADD_CMD ? NLM_F_CREATE : 0; int rc = LUSTRE_CFG_RC_NO_ERR, opt; bool is_mr = true; char *prim_nid = NULL, *nidstr = NULL; char err_str[LNET_MAX_STR_LEN] = "Error"; struct cYAML *err_rc = NULL; int force_lock = 0; - const char *const short_opts = "k:m:n:f:l"; const struct option long_opts[] = { { .name = "prim_nid", .has_arg = required_argument, .val = 'k' }, @@ -3535,7 +3794,8 @@ static int jt_peer_nid_common(int argc, char **argv, int cmd) { .name = "lock_prim", .has_arg = no_argument, .val = 'l' }, { .name = NULL } }; - rc = check_cmd(peer_cmds, "peer", "add", 2, argc, argv); + rc = check_cmd(peer_cmds, "peer", + cmd == LNETCTL_ADD_CMD ? "add" : "del", 2, argc, argv); if (rc) return rc; @@ -3564,6 +3824,7 @@ static int jt_peer_nid_common(int argc, char **argv, int cmd) "Unrecognized option '-%c'", opt); } force_lock = 1; + flags |= NLM_F_EXCL; break; case 'l': if (cmd == LNETCTL_DEL_CMD) { @@ -3572,6 +3833,7 @@ static int jt_peer_nid_common(int argc, char **argv, int cmd) "Unrecognized option '-%c'", opt); } force_lock = 1; + flags |= NLM_F_EXCL; break; case '?': print_help(peer_cmds, "peer", @@ -3581,6 +3843,14 @@ static int jt_peer_nid_common(int argc, char **argv, int cmd) } } + rc = yaml_lnet_peer(prim_nid, nidstr, !is_mr, -1, -1, false, + LNET_GENL_VERSION, flags); + if (rc <= 0) { + if (rc == -EOPNOTSUPP) + goto old_api; + return rc; + } +old_api: rc = lustre_lnet_modify_peer(prim_nid, nidstr, is_mr, cmd, force_lock, -1, &err_rc); if (rc != LUSTRE_CFG_RC_NO_ERR) diff --git a/lustre/tests/sanity-lnet.sh b/lustre/tests/sanity-lnet.sh index 7b52e30..74b10f2 100755 --- a/lustre/tests/sanity-lnet.sh +++ b/lustre/tests/sanity-lnet.sh @@ -346,6 +346,9 @@ EOF echo "Add peer with nidranage that overlaps primary nid (o2ib)" compare_peer_add "5.5.5.5@o2ib" "5.5.5.[1-4]@o2ib" + + echo "Add peer with nidranage that contain , plus primary nid (o2ib)" + compare_peer_add "5.5.5.5@o2ib" "5.5.5.[1,2,3-4]@o2ib" } run_test 5 "Add peer with nidrange (o2ib)" -- 1.8.3.1