From 5ddc05454019a7629fab796583f76c48cb529f8e Mon Sep 17 00:00:00 2001 From: James Simmons Date: Wed, 26 Jun 2024 13:43:35 -0400 Subject: [PATCH] LU-17629 utils: support hostname with lustre_lnet_parse_nid_range() For a hostname it's possible it maps to multiple IPs. In this case lnetctl commands that attempt to use the hostname can resolve to the wrong IP address. Update the function lustre_lnet_parse_nid_range() to work with hostnames and properly resolve the correct IP address. Update both lnetctl ping and lnetctl discover to work with lnet_parse_nid_range(). Test-Parameters: trivial testlist=sanity-lnet Change-Id: I670799edcb04a02380e96c289ba26854b057d978 Signed-off-by: James Simmons Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54894 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Timothy Day Reviewed-by: Chris Horn Reviewed-by: Oleg Drokin --- lnet/utils/lnetconfig/liblnetconfig.c | 18 ++++++++++++++-- lnet/utils/lnetctl.c | 39 ++++++++++++++++++++++++++++------- lustre/tests/sanity-lnet.sh | 21 +++++++++++++++++++ 3 files changed, 68 insertions(+), 10 deletions(-) diff --git a/lnet/utils/lnetconfig/liblnetconfig.c b/lnet/utils/lnetconfig/liblnetconfig.c index 5ed6ede..09cb45b 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.c +++ b/lnet/utils/lnetconfig/liblnetconfig.c @@ -2148,7 +2148,9 @@ int lustre_lnet_parse_nid_range(struct nid_node *head, char *nidstr, if (!tmp && !tmp2) { char *end = strchr(nidstr, ','); + char orig[LNET_MAX_STR_LEN]; struct nid_node *item; + struct lnet_nid NID; int count; item = calloc(1, sizeof(struct nid_node)); @@ -2159,8 +2161,20 @@ int lustre_lnet_parse_nid_range(struct nid_node *head, char *nidstr, } count = end ? end - nidstr : strlen(nidstr); - snprintf(item->nidstr, sizeof(item->nidstr), - "%s@%.*s", nid, count, nidstr); + snprintf(orig, sizeof(orig), "%s@%.*s", nid, count, + nidstr); + + /* Allow hostname version of the NID. We have to + * translate them to address type NID. + */ + rc = libcfs_strnid(&NID, orig); + if (rc < 0) { + *errmsg = "Unable to parse nidlist: invalid NID in nidstr"; + goto err; + } + + libcfs_nidstr_r(&NID, item->nidstr, + sizeof(item->nidstr)); nl_init_list_head(&item->list); nl_list_add_tail(&item->list, &head->children); } else { diff --git a/lnet/utils/lnetctl.c b/lnet/utils/lnetctl.c index 0856b0b..f381031 100644 --- a/lnet/utils/lnetctl.c +++ b/lnet/utils/lnetctl.c @@ -5081,9 +5081,10 @@ report_reply_error: return rc2 ? rc2 : rc; } -static int yaml_lnet_ping(char *group, int timeout, char *src_nidstr, +static int yaml_lnet_ping(char *group, int timeout, struct lnet_nid *src_nid, int start, int end, char **nids, int flags) { + struct nid_node head, *entry; struct nl_sock *sk = NULL; const char *msg = NULL; yaml_emitter_t output; @@ -5151,8 +5152,10 @@ static int yaml_lnet_ping(char *group, int timeout, char *src_nidstr, if (rc == 0) goto emitter_error; - if (timeout != 1000 || src_nidstr) { - if (src_nidstr) { + if (timeout != 1000 || (src_nid && nid_addr_is_set(src_nid))) { + if (src_nid) { + char *src_nidstr = libcfs_nidstr(src_nid); + yaml_scalar_event_initialize(&event, NULL, (yaml_char_t *)YAML_STR_TAG, (yaml_char_t *)"source", @@ -5212,17 +5215,33 @@ static int yaml_lnet_ping(char *group, int timeout, char *src_nidstr, if (rc == 0) goto emitter_error; + NL_INIT_LIST_HEAD(&head.children); + nl_init_list_head(&head.list); for (i = start; i < end; i++) { + rc = lustre_lnet_parse_nid_range(&head, nids[i], &msg); + if (rc < 0) { + lustre_lnet_free_list(&head); + yaml_emitter_delete(&output); + errno = rc; + rc = 0; + goto free_reply; + } + } + + if (nl_list_empty(&head.children)) + goto skip_nids; + + nl_list_for_each_entry(entry, &head.children, list) { yaml_scalar_event_initialize(&event, NULL, (yaml_char_t *)YAML_STR_TAG, - (yaml_char_t *)nids[i], - strlen(nids[i]), 1, 0, + (yaml_char_t *)entry->nidstr, + strlen(entry->nidstr), 1, 0, YAML_PLAIN_SCALAR_STYLE); rc = yaml_emitter_emit(&output, &event); if (rc == 0) goto emitter_error; } - +skip_nids: yaml_sequence_end_event_initialize(&event); rc = yaml_emitter_emit(&output, &event); if (rc == 0) @@ -5270,6 +5289,7 @@ static int jt_ping(int argc, char **argv) { struct cYAML *err_rc = NULL; struct cYAML *show_rc = NULL; + struct lnet_nid src = { }; int timeout = 1000; int rc = 0, opt; char *src_nidstr = NULL; @@ -5286,6 +5306,7 @@ static int jt_ping(int argc, char **argv) switch (opt) { case 's': src_nidstr = optarg; + rc = libcfs_strnid(&src, src_nidstr); break; case 't': timeout = 1000 * atol(optarg); @@ -5300,9 +5321,11 @@ static int jt_ping(int argc, char **argv) return 0; } } + if (rc < 0) + return rc; - rc = yaml_lnet_ping("ping", timeout, src_nidstr, optind, argc, - argv, NLM_F_DUMP); + rc = yaml_lnet_ping("ping", timeout, &src, optind, argc, argv, + NLM_F_DUMP); if (rc <= 0) { if (rc != -EOPNOTSUPP) return rc; diff --git a/lustre/tests/sanity-lnet.sh b/lustre/tests/sanity-lnet.sh index d9ee55c..7590b12 100755 --- a/lustre/tests/sanity-lnet.sh +++ b/lustre/tests/sanity-lnet.sh @@ -4164,6 +4164,27 @@ EOF } run_test 304 "Check locked primary peer nid consolidation" +test_305() { + [[ ${NETTYPE} == tcp* ]] || skip "Need tcp NETTYPE" + + reinit_dlc || return $? + + add_net "${NETTYPE}" "${INTERFACES[0]}" || return $? + + local nid=$($LCTL list_nids) + + do_lnetctl ping ${nid} || + error "pinging self failed $?" + + [[ "${nid%@*}" == "$(hostname -i | awk '{print $1}')" ]] || + skip "IP $(hostname -i) isn't NID $nid" + + nid="$(hostname -s)@${NETTYPE}" + do_lnetctl ping $nid || + error "pinging own hostname $nid failed $?" +} +run_test 305 "Resolve hostname before lnetctl ping" + check_parameter() { local para=$1 local value=$2 -- 1.8.3.1