Whamcloud - gitweb
LU-17629 utils: support hostname with 94/54894/9
authorJames Simmons <jsimmons@infradead.org>
Wed, 26 Jun 2024 17:43:35 +0000 (13:43 -0400)
committerOleg Drokin <green@whamcloud.com>
Sat, 13 Jul 2024 20:53:16 +0000 (20:53 +0000)
 lustre_lnet_parse_nid_range()

For a hostname it's possible it maps to multiple IPs. In
this case lnetctl commands that attempt to use the hostname
can resolve to the wrong IP address. Update the function
lustre_lnet_parse_nid_range() to work with hostnames and
properly resolve the correct IP address. Update both
lnetctl ping and lnetctl discover to work with
lnet_parse_nid_range().

Test-Parameters: trivial testlist=sanity-lnet
Change-Id: I670799edcb04a02380e96c289ba26854b057d978
Signed-off-by: James Simmons <jsimmons@infradead.org>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54894
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Timothy Day <timday@amazon.com>
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/utils/lnetconfig/liblnetconfig.c
lnet/utils/lnetctl.c
lustre/tests/sanity-lnet.sh

index 5ed6ede..09cb45b 100644 (file)
@@ -2148,7 +2148,9 @@ int lustre_lnet_parse_nid_range(struct nid_node *head, char *nidstr,
 
                if (!tmp && !tmp2) {
                        char *end = strchr(nidstr, ',');
+                       char orig[LNET_MAX_STR_LEN];
                        struct nid_node *item;
+                       struct lnet_nid NID;
                        int count;
 
                        item = calloc(1, sizeof(struct nid_node));
@@ -2159,8 +2161,20 @@ int lustre_lnet_parse_nid_range(struct nid_node *head, char *nidstr,
                        }
 
                        count = end ? end - nidstr : strlen(nidstr);
-                       snprintf(item->nidstr, sizeof(item->nidstr),
-                                "%s@%.*s", nid, count, nidstr);
+                       snprintf(orig, sizeof(orig), "%s@%.*s", nid, count,
+                                nidstr);
+
+                       /* Allow hostname version of the NID. We have to
+                        * translate them to address type NID.
+                        */
+                       rc = libcfs_strnid(&NID, orig);
+                       if (rc < 0) {
+                               *errmsg = "Unable to parse nidlist: invalid NID in nidstr";
+                               goto err;
+                       }
+
+                       libcfs_nidstr_r(&NID, item->nidstr,
+                                       sizeof(item->nidstr));
                        nl_init_list_head(&item->list);
                        nl_list_add_tail(&item->list, &head->children);
                } else {
index 0856b0b..f381031 100644 (file)
@@ -5081,9 +5081,10 @@ report_reply_error:
        return rc2 ? rc2 : rc;
 }
 
-static int yaml_lnet_ping(char *group, int timeout, char *src_nidstr,
+static int yaml_lnet_ping(char *group, int timeout, struct lnet_nid *src_nid,
                          int start, int end, char **nids, int flags)
 {
+       struct nid_node head, *entry;
        struct nl_sock *sk = NULL;
        const char *msg = NULL;
        yaml_emitter_t output;
@@ -5151,8 +5152,10 @@ static int yaml_lnet_ping(char *group, int timeout, char *src_nidstr,
        if (rc == 0)
                goto emitter_error;
 
-       if (timeout != 1000 || src_nidstr) {
-               if (src_nidstr) {
+       if (timeout != 1000 || (src_nid && nid_addr_is_set(src_nid))) {
+               if (src_nid) {
+                       char *src_nidstr = libcfs_nidstr(src_nid);
+
                        yaml_scalar_event_initialize(&event, NULL,
                                                     (yaml_char_t *)YAML_STR_TAG,
                                                     (yaml_char_t *)"source",
@@ -5212,17 +5215,33 @@ static int yaml_lnet_ping(char *group, int timeout, char *src_nidstr,
        if (rc == 0)
                goto emitter_error;
 
+       NL_INIT_LIST_HEAD(&head.children);
+       nl_init_list_head(&head.list);
        for (i = start; i < end; i++) {
+               rc = lustre_lnet_parse_nid_range(&head, nids[i], &msg);
+               if (rc < 0) {
+                       lustre_lnet_free_list(&head);
+                       yaml_emitter_delete(&output);
+                       errno = rc;
+                       rc = 0;
+                       goto free_reply;
+               }
+       }
+
+       if (nl_list_empty(&head.children))
+               goto skip_nids;
+
+       nl_list_for_each_entry(entry, &head.children, list) {
                yaml_scalar_event_initialize(&event, NULL,
                                             (yaml_char_t *)YAML_STR_TAG,
-                                            (yaml_char_t *)nids[i],
-                                            strlen(nids[i]), 1, 0,
+                                            (yaml_char_t *)entry->nidstr,
+                                            strlen(entry->nidstr), 1, 0,
                                             YAML_PLAIN_SCALAR_STYLE);
                rc = yaml_emitter_emit(&output, &event);
                if (rc == 0)
                        goto emitter_error;
        }
-
+skip_nids:
        yaml_sequence_end_event_initialize(&event);
        rc = yaml_emitter_emit(&output, &event);
        if (rc == 0)
@@ -5270,6 +5289,7 @@ static int jt_ping(int argc, char **argv)
 {
        struct cYAML *err_rc = NULL;
        struct cYAML *show_rc = NULL;
+       struct lnet_nid src = { };
        int timeout = 1000;
        int rc = 0, opt;
        char *src_nidstr = NULL;
@@ -5286,6 +5306,7 @@ static int jt_ping(int argc, char **argv)
                switch (opt) {
                case 's':
                        src_nidstr = optarg;
+                       rc = libcfs_strnid(&src, src_nidstr);
                        break;
                case 't':
                        timeout = 1000 * atol(optarg);
@@ -5300,9 +5321,11 @@ static int jt_ping(int argc, char **argv)
                        return 0;
                }
        }
+       if (rc < 0)
+               return rc;
 
-       rc = yaml_lnet_ping("ping", timeout, src_nidstr, optind, argc,
-                           argv, NLM_F_DUMP);
+       rc = yaml_lnet_ping("ping", timeout, &src, optind, argc, argv,
+                           NLM_F_DUMP);
        if (rc <= 0) {
                if (rc != -EOPNOTSUPP)
                        return rc;
index d9ee55c..7590b12 100755 (executable)
@@ -4164,6 +4164,27 @@ EOF
 }
 run_test 304 "Check locked primary peer nid consolidation"
 
+test_305() {
+       [[ ${NETTYPE} == tcp* ]] || skip "Need tcp NETTYPE"
+
+       reinit_dlc || return $?
+
+       add_net "${NETTYPE}" "${INTERFACES[0]}" || return $?
+
+       local nid=$($LCTL list_nids)
+
+       do_lnetctl ping ${nid} ||
+               error "pinging self failed $?"
+
+       [[ "${nid%@*}" == "$(hostname -i | awk '{print $1}')" ]] ||
+               skip "IP $(hostname -i) isn't NID $nid"
+
+       nid="$(hostname -s)@${NETTYPE}"
+       do_lnetctl ping $nid ||
+               error "pinging own hostname $nid failed $?"
+}
+run_test 305 "Resolve hostname before lnetctl ping"
+
 check_parameter() {
        local para=$1
        local value=$2