Whamcloud - gitweb
LU-10003 lnet: implement Netlink version of lnet distance API. 56/53556/2
authorJames Simmons <jsimmons@infradead.org>
Sat, 23 Dec 2023 18:43:20 +0000 (13:43 -0500)
committerOleg Drokin <green@whamcloud.com>
Wed, 10 Jan 2024 07:28:18 +0000 (07:28 +0000)
Userland can query the distance of a peer using an ioctl. Move
this over to Netlink so we can support large NIDs for IPv6
handling.

Test-Parameters: trivial testlist=sanity-lnet
Change-Id: I090538e4cc55fd26bd61888de659b99bba85a111
Signed-off-by: James Simmons <jsimmons@infradead.org>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53556
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/include/lnet/lib-types.h
lnet/include/uapi/linux/lnet/lnet-dlc.h
lnet/lnet/api-ni.c
lustre/utils/portals.c

index 6083ecb..bc8a8d0 100644 (file)
@@ -1072,6 +1072,32 @@ enum lnet_cpt_of_nid_attr {
 
 #define LNET_CPT_OF_NID_ATTR_MAX (__LNET_CPT_OF_NID_ATTR_MAX_PLUS_ONE - 1)
 
+/** enum lnet_peer_dist_attr                 - Attributes to support
+ *                                             reporting distance for peers
+ *
+ * @LNET_PEER_DIST_ATTR_UNSPEC                 unspecified attribute to catch
+ *                                             errors
+ * @LNET_PEER_DIST_ATTR_HDR                    Grouping which we just use peer
+ *                                             (NLA_NUL_STRING)
+ * @LNET_PEER_DIST_ATTR_NID                    The NID we collect data for
+ *                                             (NLA_STRING)
+ * @LNET_PEER_DIST_ATTR_DIST                   The distance for the specified
+ *                                             NID (NLA_U32)
+ * @LNET_PEER_DIST_ATTR_ORDER                  The order for the specified NID
+ *                                             (NLA_U32)
+ */
+enum lnet_peer_dist_attr {
+       LNET_PEER_DIST_ATTR_UNSPEC = 0,
+
+       LNET_PEER_DIST_ATTR_HDR,
+       LNET_PEER_DIST_ATTR_NID,
+       LNET_PEER_DIST_ATTR_DIST,
+       LNET_PEER_DIST_ATTR_ORDER,
+       __LNET_PEER_DIST_ATTR_MAX_PLUS_ONE,
+};
+
+#define LNET_PEER_DIST_ATTR_MAX (__LNET_PEER_DIST_ATTR_MAX_PLUS_ONE - 1)
+
 struct lnet_ni {
        /* chain on the lnet_net structure */
        struct list_head        ni_netlist;
index aefcc88..8b51a99 100644 (file)
@@ -71,6 +71,7 @@ enum lnet_commands {
        LNET_CMD_CONNS          = 5,
        LNET_CMD_PING           = 6,
        LNET_CMD_CPT_OF_NID     = 7,
+       LNET_CMD_PEER_DIST      = 8,
 
        __LNET_CMD_MAX_PLUS_ONE
 };
index 60a2f93..d177a81 100644 (file)
@@ -8443,6 +8443,210 @@ report_err:
        return rc;
 }
 
+#define lnet_peer_dist_show_done       lnet_peer_ni_show_done
+
+static int lnet_peer_dist_show_start(struct netlink_callback *cb)
+{
+       struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
+#ifdef HAVE_NL_PARSE_WITH_EXT_ACK
+       struct netlink_ext_ack *extack = NULL;
+#endif
+       struct lnet_genl_processid_list *plist;
+       int msg_len = genlmsg_len(gnlh);
+       struct nlattr *params, *top;
+       int rem, rc = 0;
+
+#ifdef HAVE_NL_DUMP_WITH_EXT_ACK
+       extack = cb->extack;
+#endif
+       mutex_lock(&the_lnet.ln_api_mutex);
+       if (the_lnet.ln_state != LNET_STATE_RUNNING) {
+               NL_SET_ERR_MSG(extack, "Network is down");
+               mutex_unlock(&the_lnet.ln_api_mutex);
+               return -ENETDOWN;
+       }
+
+       msg_len = genlmsg_len(gnlh);
+       if (!msg_len) {
+               NL_SET_ERR_MSG(extack, "Missing NID argument(s)");
+               mutex_unlock(&the_lnet.ln_api_mutex);
+               return -ENOENT;
+       }
+
+       CFS_ALLOC_PTR(plist);
+       if (!plist) {
+               NL_SET_ERR_MSG(extack, "No memory for peer NID list");
+               mutex_unlock(&the_lnet.ln_api_mutex);
+               return -ENOMEM;
+       }
+
+       genradix_init(&plist->lgpl_list);
+       plist->lgpl_count = 0;
+       plist->lgpl_index = 0;
+       cb->args[0] = (long)plist;
+
+       params = genlmsg_data(gnlh);
+       nla_for_each_attr(top, params, msg_len, rem) {
+               struct nlattr *nids;
+               int rem2;
+
+               if (nla_type(top) != LN_SCALAR_ATTR_LIST)
+                       continue;
+
+               nla_for_each_nested(nids, top, rem2) {
+                       char nidstr[LNET_NIDSTR_SIZE + 1];
+                       struct lnet_processid *id;
+
+                       if (nla_type(nids) != LN_SCALAR_ATTR_VALUE)
+                               continue;
+
+                       memset(nidstr, 0, sizeof(nidstr));
+                       rc = nla_strscpy(nidstr, nids, sizeof(nidstr));
+                       if (rc < 0) {
+                               NL_SET_ERR_MSG(extack,
+                                              "failed to get NID");
+                               GOTO(report_err, rc);
+                       }
+
+                       id = genradix_ptr_alloc(&plist->lgpl_list,
+                                               plist->lgpl_count++,
+                                               GFP_KERNEL);
+                       if (!id) {
+                               NL_SET_ERR_MSG(extack, "failed to allocate NID");
+                               GOTO(report_err, rc = -ENOMEM);
+                       }
+
+                       rc = libcfs_strid(id, strim(nidstr));
+                       if (rc < 0) {
+                               NL_SET_ERR_MSG(extack, "invalid NID");
+                               GOTO(report_err, rc);
+                       }
+                       rc = 0;
+               }
+       }
+report_err:
+       mutex_unlock(&the_lnet.ln_api_mutex);
+
+       if (rc < 0)
+               lnet_peer_dist_show_done(cb);
+
+       return rc;
+}
+
+static const struct ln_key_list peer_dist_props_list = {
+       .lkl_maxattr                    = LNET_PEER_DIST_ATTR_MAX,
+       .lkl_list                       = {
+               [LNET_PEER_DIST_ATTR_HDR]       = {
+                       .lkp_value              = "peer",
+                       .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
+                       .lkp_data_type          = NLA_NUL_STRING,
+               },
+               [LNET_PEER_DIST_ATTR_NID]       = {
+                       .lkp_value              = "nid",
+                       .lkp_data_type          = NLA_STRING
+               },
+               [LNET_PEER_DIST_ATTR_DIST]      = {
+                       .lkp_value              = "distance",
+                       .lkp_data_type          = NLA_U32
+               },
+               [LNET_PEER_DIST_ATTR_ORDER]     = {
+                       .lkp_value              = "order",
+                       .lkp_data_type          = NLA_U32
+               },
+       },
+};
+
+static int lnet_peer_dist_show_dump(struct sk_buff *msg,
+                                   struct netlink_callback *cb)
+{
+       struct lnet_genl_processid_list *plist = lnet_peer_dump_ctx(cb);
+#ifdef HAVE_NL_PARSE_WITH_EXT_ACK
+       struct netlink_ext_ack *extack = NULL;
+#endif
+       int portid = NETLINK_CB(cb->skb).portid;
+       int seq = cb->nlh->nlmsg_seq;
+       int idx = plist->lgpl_index;
+       int rc = 0;
+
+#ifdef HAVE_NL_DUMP_WITH_EXT_ACK
+       extack = cb->extack;
+#endif
+       if (!idx) {
+               const struct ln_key_list *all[] = {
+                       &peer_dist_props_list, NULL
+               };
+
+               rc = lnet_genl_send_scalar_list(msg, portid, seq,
+                                               &lnet_family,
+                                               NLM_F_CREATE | NLM_F_MULTI,
+                                               LNET_CMD_PEER_DIST, all);
+               if (rc < 0) {
+                       NL_SET_ERR_MSG(extack, "failed to send key table");
+                       GOTO(send_error, rc);
+               }
+       }
+
+       while (idx < plist->lgpl_count) {
+               struct lnet_processid *id;
+               void *hdr;
+               u32 order;
+               int dist;
+
+               id = genradix_ptr(&plist->lgpl_list, idx++);
+               if (nid_is_lo0(&id->nid))
+                       continue;
+
+               dist = LNetDist(&id->nid, &id->nid, &order);
+               if (dist < 0) {
+                       if (dist == -EHOSTUNREACH)
+                               continue;
+
+                       rc = dist;
+                       return rc;
+               }
+
+               hdr = genlmsg_put(msg, portid, seq, &lnet_family,
+                                 NLM_F_MULTI, LNET_CMD_PEER_DIST);
+               if (!hdr) {
+                       NL_SET_ERR_MSG(extack, "failed to send values");
+                       genlmsg_cancel(msg, hdr);
+                       GOTO(send_error, rc = -EMSGSIZE);
+               }
+
+               if (idx == 1)
+                       nla_put_string(msg, LNET_PEER_DIST_ATTR_HDR, "");
+
+               nla_put_string(msg, LNET_PEER_DIST_ATTR_NID,
+                              libcfs_nidstr(&id->nid));
+               nla_put_u32(msg, LNET_PEER_DIST_ATTR_DIST, dist);
+               nla_put_u32(msg, LNET_PEER_DIST_ATTR_ORDER, order);
+
+               genlmsg_end(msg, hdr);
+       }
+
+       plist->lgpl_index = idx;
+send_error:
+       return lnet_nl_send_error(cb->skb, portid, seq, rc);
+}
+
+#ifndef HAVE_NETLINK_CALLBACK_START
+static int lnet_old_peer_dist_show_dump(struct sk_buff *msg,
+                                       struct netlink_callback *cb)
+{
+       if (!cb->args[0]) {
+               int rc = lnet_peer_dist_show_start(cb);
+
+               if (rc < 0)
+                       return lnet_nl_send_error(cb->skb,
+                                                 NETLINK_CB(cb->skb).portid,
+                                                 cb->nlh->nlmsg_seq,
+                                                 rc);
+       }
+
+       return lnet_peer_dist_show_dump(msg, cb);
+}
+#endif
+
 static const struct genl_multicast_group lnet_mcast_grps[] = {
        { .name =       "ip2net",       },
        { .name =       "net",          },
@@ -8512,6 +8716,16 @@ static const struct genl_ops lnet_genl_ops[] = {
 #endif
                .done           = lnet_cpt_of_nid_show_done,
        },
+       {
+               .cmd            = LNET_CMD_PEER_DIST,
+#ifdef HAVE_NETLINK_CALLBACK_START
+               .start          = lnet_peer_dist_show_start,
+               .dumpit         = lnet_peer_dist_show_dump,
+#else
+               .dumpit         = lnet_old_peer_dist_show_dump,
+#endif
+               .done           = lnet_peer_dist_show_done,
+       },
 };
 
 static struct genl_family lnet_family = {
index e9ebf3f..3f88d63 100644 (file)
@@ -584,13 +584,16 @@ old_api: {
 int
 jt_ptl_which_nid(int argc, char **argv)
 {
-       struct libcfs_ioctl_data data;
+       struct lnet_nid best_nid = LNET_ANY_NID;
+       yaml_emitter_t request;
+       yaml_parser_t reply;
+       yaml_event_t event;
+       struct nl_sock *sk;
        int best_dist = 0;
        int best_order = 0;
-       lnet_nid_t   best_nid = LNET_NID_ANY;
-       int dist;
-       int order;
-       lnet_nid_t nid;
+       bool done = false;
+       int dist = 0;
+       int order = 0;
        char *nidstr;
        int rc;
        int i;
@@ -600,16 +603,188 @@ jt_ptl_which_nid(int argc, char **argv)
                return 0;
        }
 
+       /* Create Netlink emitter to send request to kernel */
+       sk = nl_socket_alloc();
+       if (!sk)
+               goto old_api;
+
+       /* Setup parser to recieve Netlink packets */
+       rc = yaml_parser_initialize(&reply);
+       if (rc == 0)
+               goto old_api;
+
+       rc = yaml_parser_set_input_netlink(&reply, sk, false);
+       if (rc == 0)
+               goto free_reply;
+
+       /* Create Netlink emitter to send request to kernel */
+       rc = yaml_emitter_initialize(&request);
+       if (rc == 0)
+               goto free_reply;
+
+       rc = yaml_emitter_set_output_netlink(&request, sk, LNET_GENL_NAME,
+                                            LNET_GENL_VERSION,
+                                            LNET_CMD_PEER_DIST, NLM_F_DUMP);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_emitter_open(&request);
+       yaml_document_start_event_initialize(&event, NULL, NULL, NULL, 0);
+       rc = yaml_emitter_emit(&request, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_mapping_start_event_initialize(&event, NULL,
+                                           (yaml_char_t *)YAML_MAP_TAG,
+                                           1, YAML_ANY_MAPPING_STYLE);
+       rc = yaml_emitter_emit(&request, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_scalar_event_initialize(&event, NULL,
+                                    (yaml_char_t *)YAML_STR_TAG,
+                                    (yaml_char_t *)"peer",
+                                    strlen("peer"), 1, 0,
+                                    YAML_PLAIN_SCALAR_STYLE);
+       rc = yaml_emitter_emit(&request, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_sequence_start_event_initialize(&event, NULL,
+                                            (yaml_char_t *)YAML_SEQ_TAG,
+                                            1, YAML_BLOCK_SEQUENCE_STYLE);
+       rc = yaml_emitter_emit(&request, &event);
+       if (rc == 0)
+               goto emitter_error;
+
        for (i = 1; i < argc; i++) {
+               struct lnet_nid nid;
+
                nidstr = argv[i];
-               nid = libcfs_str2nid(nidstr);
-               if (nid == LNET_NID_ANY) {
+               if (strcmp(nidstr, "*") == 0)
+                       nidstr = "<?>";
+
+               rc = libcfs_strnid(&nid, nidstr);
+               if (rc < 0 || nid_same(&nid, &LNET_ANY_NID)) {
+                       fprintf(stderr, "Can't parse NID %s\n", nidstr);
+                       return -1;
+               }
+
+               yaml_scalar_event_initialize(&event, NULL,
+                                            (yaml_char_t *)YAML_STR_TAG,
+                                            (yaml_char_t *)nidstr,
+                                            strlen(nidstr), 1, 0,
+                                            YAML_PLAIN_SCALAR_STYLE);
+               rc = yaml_emitter_emit(&request, &event);
+               if (rc == 0)
+                       goto emitter_error;
+       }
+
+       yaml_sequence_end_event_initialize(&event);
+       rc = yaml_emitter_emit(&request, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_mapping_end_event_initialize(&event);
+       rc = yaml_emitter_emit(&request, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_document_end_event_initialize(&event, 0);
+       rc = yaml_emitter_emit(&request, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       rc = yaml_emitter_close(&request);
+emitter_error:
+       if (rc == 0) {
+               yaml_emitter_log_error(&request, stderr);
+               rc = -EINVAL;
+       }
+       yaml_emitter_delete(&request);
+
+       while (!done) {
+               rc = yaml_parser_parse(&reply, &event);
+               if (rc == 0)
+                       break;
+
+               if (event.type != YAML_SCALAR_EVENT)
+                       goto not_scalar;
+
+
+               if (strcmp((char *)event.data.scalar.value, "nid") == 0) {
+                       yaml_event_delete(&event);
+                       rc = yaml_parser_parse(&reply, &event);
+                       if (rc == 0) {
+                               yaml_event_delete(&event);
+                               break;
+                       }
+
+                       nidstr = (char *)event.data.scalar.value;
+
+                       if (nid_same(&best_nid, &LNET_ANY_NID) ||
+                           dist < best_dist ||
+                           (dist == best_dist && order < best_order)) {
+                               best_dist = dist;
+                               best_order = order;
+                               libcfs_strnid(&best_nid, nidstr);
+                       }
+               } else if (strcmp((char *)event.data.scalar.value,
+                                 "distance") == 0) {
+                       yaml_event_delete(&event);
+                       rc = yaml_parser_parse(&reply, &event);
+                       if (rc == 0) {
+                               yaml_event_delete(&event);
+                               break;
+                       }
+
+                       dist = strtol((char *)event.data.scalar.value, NULL, 10);
+               } else if (strcmp((char *)event.data.scalar.value,
+                                 "order") == 0) {
+                       yaml_event_delete(&event);
+                       rc = yaml_parser_parse(&reply, &event);
+                       if (rc == 0) {
+                               yaml_event_delete(&event);
+                               break;
+                       }
+
+                       order = strtol((char *)event.data.scalar.value, NULL, 10);
+               }
+not_scalar:
+               done = (event.type == YAML_STREAM_END_EVENT);
+               yaml_event_delete(&event);
+       }
+
+free_reply:
+       if (rc == 0) {
+               /* yaml_* functions return 0 for error */
+               const char *msg = yaml_parser_get_reader_error(&reply);
+
+               fprintf(stderr, "Unexpected distance: %s\n", msg);
+               rc = -1;
+       } else if (rc == 1) {
+               /* yaml_* functions return 1 for success */
+               rc = 0;
+       }
+
+       yaml_parser_delete(&reply);
+       nl_socket_free(sk);
+       goto finished;
+
+old_api:
+       for (i = 1; i < argc; i++) {
+               struct libcfs_ioctl_data data;
+               lnet_nid_t nid4;
+
+               nidstr = argv[i];
+               nid4 = libcfs_str2nid(nidstr);
+               if (nid4 == LNET_NID_ANY) {
                        fprintf(stderr, "Can't parse NID %s\n", nidstr);
                        return -1;
                }
 
                LIBCFS_IOC_INIT(data);
-               data.ioc_nid = nid;
+               data.ioc_nid = nid4;
 
                rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LNET_DIST, &data);
                if (rc != 0) {
@@ -630,21 +805,21 @@ jt_ptl_which_nid(int argc, char **argv)
                        return -1;
                }
 
-               if (best_nid == LNET_NID_ANY ||
+               if (nid_same(&best_nid, &LNET_ANY_NID) ||
                    dist < best_dist ||
                    (dist == best_dist && order < best_order)) {
                        best_dist = dist;
                        best_order = order;
-                       best_nid = nid;
+                       lnet_nid4_to_nid(nid4, &best_nid);
                }
        }
-
-       if (best_nid == LNET_NID_ANY) {
+finished:
+       if (nid_same(&best_nid, &LNET_ANY_NID)) {
                fprintf(stderr, "No reachable NID\n");
                return -1;
        }
 
-       printf("%s\n", libcfs_nid2str(best_nid));
+       printf("%s\n", libcfs_nidstr(&best_nid));
        return 0;
 }