From b62c385e180fbdf85533e334fa63d6b9c6bb2452 Mon Sep 17 00:00:00 2001 From: James Simmons Date: Sat, 23 Dec 2023 13:43:20 -0500 Subject: [PATCH] LU-10003 lnet: implement Netlink version of lnet distance API. Userland can query the distance of a peer using an ioctl. Move this over to Netlink so we can support large NIDs for IPv6 handling. Test-Parameters: trivial testlist=sanity-lnet Change-Id: I090538e4cc55fd26bd61888de659b99bba85a111 Signed-off-by: James Simmons Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53556 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Cyril Bordage Reviewed-by: Chris Horn Reviewed-by: Frank Sehr Reviewed-by: Oleg Drokin --- lnet/include/lnet/lib-types.h | 26 ++++ lnet/include/uapi/linux/lnet/lnet-dlc.h | 1 + lnet/lnet/api-ni.c | 214 ++++++++++++++++++++++++++++++++ lustre/utils/portals.c | 201 ++++++++++++++++++++++++++++-- 4 files changed, 429 insertions(+), 13 deletions(-) diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 6083ecb..bc8a8d0 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -1072,6 +1072,32 @@ enum lnet_cpt_of_nid_attr { #define LNET_CPT_OF_NID_ATTR_MAX (__LNET_CPT_OF_NID_ATTR_MAX_PLUS_ONE - 1) +/** enum lnet_peer_dist_attr - Attributes to support + * reporting distance for peers + * + * @LNET_PEER_DIST_ATTR_UNSPEC unspecified attribute to catch + * errors + * @LNET_PEER_DIST_ATTR_HDR Grouping which we just use peer + * (NLA_NUL_STRING) + * @LNET_PEER_DIST_ATTR_NID The NID we collect data for + * (NLA_STRING) + * @LNET_PEER_DIST_ATTR_DIST The distance for the specified + * NID (NLA_U32) + * @LNET_PEER_DIST_ATTR_ORDER The order for the specified NID + * (NLA_U32) + */ +enum lnet_peer_dist_attr { + LNET_PEER_DIST_ATTR_UNSPEC = 0, + + LNET_PEER_DIST_ATTR_HDR, + LNET_PEER_DIST_ATTR_NID, + LNET_PEER_DIST_ATTR_DIST, + LNET_PEER_DIST_ATTR_ORDER, + __LNET_PEER_DIST_ATTR_MAX_PLUS_ONE, +}; + +#define LNET_PEER_DIST_ATTR_MAX (__LNET_PEER_DIST_ATTR_MAX_PLUS_ONE - 1) + struct lnet_ni { /* chain on the lnet_net structure */ struct list_head ni_netlist; diff --git a/lnet/include/uapi/linux/lnet/lnet-dlc.h b/lnet/include/uapi/linux/lnet/lnet-dlc.h index aefcc88..8b51a99 100644 --- a/lnet/include/uapi/linux/lnet/lnet-dlc.h +++ b/lnet/include/uapi/linux/lnet/lnet-dlc.h @@ -71,6 +71,7 @@ enum lnet_commands { LNET_CMD_CONNS = 5, LNET_CMD_PING = 6, LNET_CMD_CPT_OF_NID = 7, + LNET_CMD_PEER_DIST = 8, __LNET_CMD_MAX_PLUS_ONE }; diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 60a2f93..d177a81 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -8443,6 +8443,210 @@ report_err: return rc; } +#define lnet_peer_dist_show_done lnet_peer_ni_show_done + +static int lnet_peer_dist_show_start(struct netlink_callback *cb) +{ + struct genlmsghdr *gnlh = nlmsg_data(cb->nlh); +#ifdef HAVE_NL_PARSE_WITH_EXT_ACK + struct netlink_ext_ack *extack = NULL; +#endif + struct lnet_genl_processid_list *plist; + int msg_len = genlmsg_len(gnlh); + struct nlattr *params, *top; + int rem, rc = 0; + +#ifdef HAVE_NL_DUMP_WITH_EXT_ACK + extack = cb->extack; +#endif + mutex_lock(&the_lnet.ln_api_mutex); + if (the_lnet.ln_state != LNET_STATE_RUNNING) { + NL_SET_ERR_MSG(extack, "Network is down"); + mutex_unlock(&the_lnet.ln_api_mutex); + return -ENETDOWN; + } + + msg_len = genlmsg_len(gnlh); + if (!msg_len) { + NL_SET_ERR_MSG(extack, "Missing NID argument(s)"); + mutex_unlock(&the_lnet.ln_api_mutex); + return -ENOENT; + } + + CFS_ALLOC_PTR(plist); + if (!plist) { + NL_SET_ERR_MSG(extack, "No memory for peer NID list"); + mutex_unlock(&the_lnet.ln_api_mutex); + return -ENOMEM; + } + + genradix_init(&plist->lgpl_list); + plist->lgpl_count = 0; + plist->lgpl_index = 0; + cb->args[0] = (long)plist; + + params = genlmsg_data(gnlh); + nla_for_each_attr(top, params, msg_len, rem) { + struct nlattr *nids; + int rem2; + + if (nla_type(top) != LN_SCALAR_ATTR_LIST) + continue; + + nla_for_each_nested(nids, top, rem2) { + char nidstr[LNET_NIDSTR_SIZE + 1]; + struct lnet_processid *id; + + if (nla_type(nids) != LN_SCALAR_ATTR_VALUE) + continue; + + memset(nidstr, 0, sizeof(nidstr)); + rc = nla_strscpy(nidstr, nids, sizeof(nidstr)); + if (rc < 0) { + NL_SET_ERR_MSG(extack, + "failed to get NID"); + GOTO(report_err, rc); + } + + id = genradix_ptr_alloc(&plist->lgpl_list, + plist->lgpl_count++, + GFP_KERNEL); + if (!id) { + NL_SET_ERR_MSG(extack, "failed to allocate NID"); + GOTO(report_err, rc = -ENOMEM); + } + + rc = libcfs_strid(id, strim(nidstr)); + if (rc < 0) { + NL_SET_ERR_MSG(extack, "invalid NID"); + GOTO(report_err, rc); + } + rc = 0; + } + } +report_err: + mutex_unlock(&the_lnet.ln_api_mutex); + + if (rc < 0) + lnet_peer_dist_show_done(cb); + + return rc; +} + +static const struct ln_key_list peer_dist_props_list = { + .lkl_maxattr = LNET_PEER_DIST_ATTR_MAX, + .lkl_list = { + [LNET_PEER_DIST_ATTR_HDR] = { + .lkp_value = "peer", + .lkp_key_format = LNKF_SEQUENCE | LNKF_MAPPING, + .lkp_data_type = NLA_NUL_STRING, + }, + [LNET_PEER_DIST_ATTR_NID] = { + .lkp_value = "nid", + .lkp_data_type = NLA_STRING + }, + [LNET_PEER_DIST_ATTR_DIST] = { + .lkp_value = "distance", + .lkp_data_type = NLA_U32 + }, + [LNET_PEER_DIST_ATTR_ORDER] = { + .lkp_value = "order", + .lkp_data_type = NLA_U32 + }, + }, +}; + +static int lnet_peer_dist_show_dump(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct lnet_genl_processid_list *plist = lnet_peer_dump_ctx(cb); +#ifdef HAVE_NL_PARSE_WITH_EXT_ACK + struct netlink_ext_ack *extack = NULL; +#endif + int portid = NETLINK_CB(cb->skb).portid; + int seq = cb->nlh->nlmsg_seq; + int idx = plist->lgpl_index; + int rc = 0; + +#ifdef HAVE_NL_DUMP_WITH_EXT_ACK + extack = cb->extack; +#endif + if (!idx) { + const struct ln_key_list *all[] = { + &peer_dist_props_list, NULL + }; + + rc = lnet_genl_send_scalar_list(msg, portid, seq, + &lnet_family, + NLM_F_CREATE | NLM_F_MULTI, + LNET_CMD_PEER_DIST, all); + if (rc < 0) { + NL_SET_ERR_MSG(extack, "failed to send key table"); + GOTO(send_error, rc); + } + } + + while (idx < plist->lgpl_count) { + struct lnet_processid *id; + void *hdr; + u32 order; + int dist; + + id = genradix_ptr(&plist->lgpl_list, idx++); + if (nid_is_lo0(&id->nid)) + continue; + + dist = LNetDist(&id->nid, &id->nid, &order); + if (dist < 0) { + if (dist == -EHOSTUNREACH) + continue; + + rc = dist; + return rc; + } + + hdr = genlmsg_put(msg, portid, seq, &lnet_family, + NLM_F_MULTI, LNET_CMD_PEER_DIST); + if (!hdr) { + NL_SET_ERR_MSG(extack, "failed to send values"); + genlmsg_cancel(msg, hdr); + GOTO(send_error, rc = -EMSGSIZE); + } + + if (idx == 1) + nla_put_string(msg, LNET_PEER_DIST_ATTR_HDR, ""); + + nla_put_string(msg, LNET_PEER_DIST_ATTR_NID, + libcfs_nidstr(&id->nid)); + nla_put_u32(msg, LNET_PEER_DIST_ATTR_DIST, dist); + nla_put_u32(msg, LNET_PEER_DIST_ATTR_ORDER, order); + + genlmsg_end(msg, hdr); + } + + plist->lgpl_index = idx; +send_error: + return lnet_nl_send_error(cb->skb, portid, seq, rc); +} + +#ifndef HAVE_NETLINK_CALLBACK_START +static int lnet_old_peer_dist_show_dump(struct sk_buff *msg, + struct netlink_callback *cb) +{ + if (!cb->args[0]) { + int rc = lnet_peer_dist_show_start(cb); + + if (rc < 0) + return lnet_nl_send_error(cb->skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + rc); + } + + return lnet_peer_dist_show_dump(msg, cb); +} +#endif + static const struct genl_multicast_group lnet_mcast_grps[] = { { .name = "ip2net", }, { .name = "net", }, @@ -8512,6 +8716,16 @@ static const struct genl_ops lnet_genl_ops[] = { #endif .done = lnet_cpt_of_nid_show_done, }, + { + .cmd = LNET_CMD_PEER_DIST, +#ifdef HAVE_NETLINK_CALLBACK_START + .start = lnet_peer_dist_show_start, + .dumpit = lnet_peer_dist_show_dump, +#else + .dumpit = lnet_old_peer_dist_show_dump, +#endif + .done = lnet_peer_dist_show_done, + }, }; static struct genl_family lnet_family = { diff --git a/lustre/utils/portals.c b/lustre/utils/portals.c index e9ebf3f..3f88d63 100644 --- a/lustre/utils/portals.c +++ b/lustre/utils/portals.c @@ -584,13 +584,16 @@ old_api: { int jt_ptl_which_nid(int argc, char **argv) { - struct libcfs_ioctl_data data; + struct lnet_nid best_nid = LNET_ANY_NID; + yaml_emitter_t request; + yaml_parser_t reply; + yaml_event_t event; + struct nl_sock *sk; int best_dist = 0; int best_order = 0; - lnet_nid_t best_nid = LNET_NID_ANY; - int dist; - int order; - lnet_nid_t nid; + bool done = false; + int dist = 0; + int order = 0; char *nidstr; int rc; int i; @@ -600,16 +603,188 @@ jt_ptl_which_nid(int argc, char **argv) return 0; } + /* Create Netlink emitter to send request to kernel */ + sk = nl_socket_alloc(); + if (!sk) + goto old_api; + + /* Setup parser to recieve Netlink packets */ + rc = yaml_parser_initialize(&reply); + if (rc == 0) + goto old_api; + + rc = yaml_parser_set_input_netlink(&reply, sk, false); + if (rc == 0) + goto free_reply; + + /* Create Netlink emitter to send request to kernel */ + rc = yaml_emitter_initialize(&request); + if (rc == 0) + goto free_reply; + + rc = yaml_emitter_set_output_netlink(&request, sk, LNET_GENL_NAME, + LNET_GENL_VERSION, + LNET_CMD_PEER_DIST, NLM_F_DUMP); + if (rc == 0) + goto emitter_error; + + yaml_emitter_open(&request); + yaml_document_start_event_initialize(&event, NULL, NULL, NULL, 0); + rc = yaml_emitter_emit(&request, &event); + if (rc == 0) + goto emitter_error; + + yaml_mapping_start_event_initialize(&event, NULL, + (yaml_char_t *)YAML_MAP_TAG, + 1, YAML_ANY_MAPPING_STYLE); + rc = yaml_emitter_emit(&request, &event); + if (rc == 0) + goto emitter_error; + + yaml_scalar_event_initialize(&event, NULL, + (yaml_char_t *)YAML_STR_TAG, + (yaml_char_t *)"peer", + strlen("peer"), 1, 0, + YAML_PLAIN_SCALAR_STYLE); + rc = yaml_emitter_emit(&request, &event); + if (rc == 0) + goto emitter_error; + + yaml_sequence_start_event_initialize(&event, NULL, + (yaml_char_t *)YAML_SEQ_TAG, + 1, YAML_BLOCK_SEQUENCE_STYLE); + rc = yaml_emitter_emit(&request, &event); + if (rc == 0) + goto emitter_error; + for (i = 1; i < argc; i++) { + struct lnet_nid nid; + nidstr = argv[i]; - nid = libcfs_str2nid(nidstr); - if (nid == LNET_NID_ANY) { + if (strcmp(nidstr, "*") == 0) + nidstr = ""; + + rc = libcfs_strnid(&nid, nidstr); + if (rc < 0 || nid_same(&nid, &LNET_ANY_NID)) { + fprintf(stderr, "Can't parse NID %s\n", nidstr); + return -1; + } + + yaml_scalar_event_initialize(&event, NULL, + (yaml_char_t *)YAML_STR_TAG, + (yaml_char_t *)nidstr, + strlen(nidstr), 1, 0, + YAML_PLAIN_SCALAR_STYLE); + rc = yaml_emitter_emit(&request, &event); + if (rc == 0) + goto emitter_error; + } + + yaml_sequence_end_event_initialize(&event); + rc = yaml_emitter_emit(&request, &event); + if (rc == 0) + goto emitter_error; + + yaml_mapping_end_event_initialize(&event); + rc = yaml_emitter_emit(&request, &event); + if (rc == 0) + goto emitter_error; + + yaml_document_end_event_initialize(&event, 0); + rc = yaml_emitter_emit(&request, &event); + if (rc == 0) + goto emitter_error; + + rc = yaml_emitter_close(&request); +emitter_error: + if (rc == 0) { + yaml_emitter_log_error(&request, stderr); + rc = -EINVAL; + } + yaml_emitter_delete(&request); + + while (!done) { + rc = yaml_parser_parse(&reply, &event); + if (rc == 0) + break; + + if (event.type != YAML_SCALAR_EVENT) + goto not_scalar; + + + if (strcmp((char *)event.data.scalar.value, "nid") == 0) { + yaml_event_delete(&event); + rc = yaml_parser_parse(&reply, &event); + if (rc == 0) { + yaml_event_delete(&event); + break; + } + + nidstr = (char *)event.data.scalar.value; + + if (nid_same(&best_nid, &LNET_ANY_NID) || + dist < best_dist || + (dist == best_dist && order < best_order)) { + best_dist = dist; + best_order = order; + libcfs_strnid(&best_nid, nidstr); + } + } else if (strcmp((char *)event.data.scalar.value, + "distance") == 0) { + yaml_event_delete(&event); + rc = yaml_parser_parse(&reply, &event); + if (rc == 0) { + yaml_event_delete(&event); + break; + } + + dist = strtol((char *)event.data.scalar.value, NULL, 10); + } else if (strcmp((char *)event.data.scalar.value, + "order") == 0) { + yaml_event_delete(&event); + rc = yaml_parser_parse(&reply, &event); + if (rc == 0) { + yaml_event_delete(&event); + break; + } + + order = strtol((char *)event.data.scalar.value, NULL, 10); + } +not_scalar: + done = (event.type == YAML_STREAM_END_EVENT); + yaml_event_delete(&event); + } + +free_reply: + if (rc == 0) { + /* yaml_* functions return 0 for error */ + const char *msg = yaml_parser_get_reader_error(&reply); + + fprintf(stderr, "Unexpected distance: %s\n", msg); + rc = -1; + } else if (rc == 1) { + /* yaml_* functions return 1 for success */ + rc = 0; + } + + yaml_parser_delete(&reply); + nl_socket_free(sk); + goto finished; + +old_api: + for (i = 1; i < argc; i++) { + struct libcfs_ioctl_data data; + lnet_nid_t nid4; + + nidstr = argv[i]; + nid4 = libcfs_str2nid(nidstr); + if (nid4 == LNET_NID_ANY) { fprintf(stderr, "Can't parse NID %s\n", nidstr); return -1; } LIBCFS_IOC_INIT(data); - data.ioc_nid = nid; + data.ioc_nid = nid4; rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LNET_DIST, &data); if (rc != 0) { @@ -630,21 +805,21 @@ jt_ptl_which_nid(int argc, char **argv) return -1; } - if (best_nid == LNET_NID_ANY || + if (nid_same(&best_nid, &LNET_ANY_NID) || dist < best_dist || (dist == best_dist && order < best_order)) { best_dist = dist; best_order = order; - best_nid = nid; + lnet_nid4_to_nid(nid4, &best_nid); } } - - if (best_nid == LNET_NID_ANY) { +finished: + if (nid_same(&best_nid, &LNET_ANY_NID)) { fprintf(stderr, "No reachable NID\n"); return -1; } - printf("%s\n", libcfs_nid2str(best_nid)); + printf("%s\n", libcfs_nidstr(&best_nid)); return 0; } -- 1.8.3.1