Whamcloud - gitweb
LU-10003 lnet: migrate fail nid to Netlink 51/55051/5
authorJames Simmons <jsimmons@infradead.org>
Thu, 23 May 2024 21:25:30 +0000 (17:25 -0400)
committerOleg Drokin <green@whamcloud.com>
Wed, 5 Jun 2024 04:44:20 +0000 (04:44 +0000)
We have the ability to make peers fail when they reach a specific
threshold using an ioctl that currently only uses small NIDs.
Move to Netlink to be able to use large NIDs. Also the Netlink
code is written to support more than one peer at a time even if
the original user land tool only supports setting one peer at a
time.

Test-Parameters: trivial testlist=sanity-lnet
Change-Id: I8e5b38fcb582624530d208fac731183488662138
Signed-off-by: James Simmons <jsimmons@infradead.org>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/55051
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/include/lnet/lib-lnet.h
lnet/include/uapi/linux/lnet/lnet-dlc.h
lnet/lnet/api-ni.c
lnet/lnet/lib-move.c
lustre/utils/portals.c

index 8c6fda0..18edaf7 100644 (file)
@@ -788,7 +788,7 @@ int lnet_msg_containers_create(void);
 
 char *lnet_health_error2str(enum lnet_msg_hstatus hstatus);
 char *lnet_msgtyp2str(int type);
-int lnet_fail_nid(lnet_nid_t nid, unsigned int threshold);
+int lnet_fail_nid(struct lnet_nid *nid, unsigned int threshold);
 
 /** \addtogroup lnet_fault_simulation @{ */
 
index e5b7b03..34616a9 100644 (file)
  *
  * @LNET_CMD_CONFIGURE:                command to setup LNet stack
  * @LNET_CMD_NETS:             command to manage the LNet networks
+ * @LNET_CMD_PEERS:            command to manage LNet peers
  * @LNET_CMD_ROUTES:           command to manage LNet routes
  * @LNET_CMD_PING:             command to send pings to LNet connections
  * @LNET_CMD_CPT_OF_NID:       command to calculate the CPT of specified NIDs
+ * @LNET_CMD_PEER_DIST:                command to find distance between LNet peers
+ * @LNET_CMD_UDSP:             command to manage LNet UDSP rules
+ * @LNET_CMD_PEER_FAIL:                command to fail LNet peers
  */
 enum lnet_commands {
        LNET_CMD_UNSPEC         = 0,
@@ -51,6 +55,8 @@ enum lnet_commands {
        LNET_CMD_PING           = 6,
        LNET_CMD_CPT_OF_NID     = 7,
        LNET_CMD_PEER_DIST      = 8,
+       LNET_CMD_UDSP           = 9,
+       LNET_CMD_PEER_FAIL      = 10,
 
        __LNET_CMD_MAX_PLUS_ONE
 };
index 94cfaa4..e8307ce 100644 (file)
@@ -1423,9 +1423,9 @@ lnet_unprepare(void)
        /* NB no LNET_LOCK since this is the last reference.  All LND instances
         * have shut down already, so it is safe to unlink and free all
         * descriptors, even those that appear committed to a network op (eg MD
-        * with non-zero pending count) */
-
-       lnet_fail_nid(LNET_NID_ANY, 0);
+        * with non-zero pending count)
+        */
+       lnet_fail_nid(&LNET_ANY_NID, 0);
 
        LASSERT(the_lnet.ln_refcount == 0);
        LASSERT(list_empty(&the_lnet.ln_test_peers));
@@ -2884,6 +2884,47 @@ canceled:
 }
 EXPORT_SYMBOL(lnet_genl_send_scalar_list);
 
+int
+nla_extract_val(struct nlattr **attr, int *rem,
+               enum lnet_nl_scalar_attrs attr_type,
+               void *ret, int ret_size,
+               struct netlink_ext_ack *extack)
+{
+       int rc = -EINVAL;
+
+       ENTRY;
+       *attr = nla_next(*attr, rem);
+       if (nla_type(*attr) != attr_type) {
+               CDEBUG(D_NET, "nla_type %d expect %d\n", nla_type(*attr),
+                      attr_type);
+               NL_SET_ERR_MSG(extack, "Invalid type for attribute");
+               RETURN(rc);
+       }
+
+       switch (attr_type) {
+       case LN_SCALAR_ATTR_VALUE:
+               rc = nla_strscpy(ret, *attr, ret_size);
+               if (rc < 0) {
+                       NL_SET_ERR_MSG(extack,
+                                      "Failed to extract value from string attribute");
+               } else {
+                       rc = 0;
+               }
+               break;
+       case LN_SCALAR_ATTR_INT_VALUE:
+               if (ret_size == sizeof(u64)) {
+                       *(u64 *)ret = nla_get_s64(*attr);
+                       rc = 0;
+               }
+               break;
+       default:
+               NL_SET_ERR_MSG(extack, "Unrecognized attribute type");
+               ret = NULL;
+               break;
+       }
+       RETURN(rc);
+}
+
 static struct genl_family lnet_family;
 
 /**
@@ -4116,7 +4157,8 @@ LNetCtl(unsigned int cmd, void *arg)
                return rc;
        }
        case IOC_LIBCFS_FAIL_NID:
-               return lnet_fail_nid(data->ioc_nid, data->ioc_count);
+               lnet_nid4_to_nid(data->ioc_nid, &nid);
+               return lnet_fail_nid(&nid, data->ioc_count);
 
        case IOC_LIBCFS_ADD_ROUTE: {
                /* default router sensitivity to 1 */
@@ -8866,6 +8908,86 @@ static int lnet_old_peer_dist_show_dump(struct sk_buff *msg,
 }
 #endif
 
+static int lnet_peer_fail_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+       struct nlmsghdr *nlh = nlmsg_hdr(skb);
+       struct genlmsghdr *gnlh = nlmsg_data(nlh);
+       struct nlattr *params = genlmsg_data(gnlh);
+       struct netlink_ext_ack *extack = NULL;
+       int msg_len, rem, rc = 0;
+       struct nlattr *attr;
+
+#ifdef HAVE_NL_DUMP_WITH_EXT_ACK
+       extack = info->extack;
+#endif
+       msg_len = genlmsg_len(gnlh);
+       if (!msg_len) {
+               GENL_SET_ERR_MSG(info, "no configuration");
+               return -ENOMSG;
+       }
+
+       if (!(nla_type(params) & LN_SCALAR_ATTR_LIST)) {
+               GENL_SET_ERR_MSG(info, "invalid configuration");
+               return -EINVAL;
+       }
+
+       nla_for_each_nested(attr, params, rem) {
+               s64 threshold = LNET_MD_THRESH_INF;
+               struct lnet_nid pnid = {};
+               struct nlattr *peer;
+               int rem2;
+
+               if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
+                       continue;
+
+               nla_for_each_nested(peer, attr, rem2) {
+                       if (nla_type(peer) != LN_SCALAR_ATTR_VALUE)
+                               continue;
+
+                       if (nla_strcmp(peer, "nid") == 0) {
+                               char nidstr[LNET_NIDSTR_SIZE];
+
+                               rc = nla_extract_val(&peer, &rem2,
+                                                    LN_SCALAR_ATTR_VALUE,
+                                                    nidstr, sizeof(nidstr),
+                                                    extack);
+                               if (rc < 0)
+                                       GOTO(report_err, rc);
+
+                               rc = libcfs_strnid(&pnid, strim(nidstr));
+                               if (rc < 0) {
+                                       GENL_SET_ERR_MSG(info,
+                                                        "invalid peer NID");
+                                       GOTO(report_err, rc);
+                               }
+                               rc = 0;
+                       } else if (nla_strcmp(peer, "threshold") == 0) {
+                               rc = nla_extract_val(&peer, &rem2,
+                                                    LN_SCALAR_ATTR_INT_VALUE,
+                                                    &threshold, sizeof(threshold),
+                                                    extack);
+                               if (rc < 0) {
+                                       GOTO(report_err, rc);
+                               }
+                       }
+               }
+
+               if (!nid_addr_is_set(&pnid)) {
+                       GENL_SET_ERR_MSG(info, "peer NID missing");
+                       GOTO(report_err, rc);
+               }
+
+               rc = lnet_fail_nid(&pnid, threshold);
+               if (rc < 0) {
+                       GENL_SET_ERR_MSG(info,
+                                        "could not set threshoold for peer NID");
+                       GOTO(report_err, rc);
+               }
+       }
+report_err:
+       return rc;
+}
+
 static const struct genl_multicast_group lnet_mcast_grps[] = {
        { .name =       "ip2net",       },
        { .name =       "net",          },
@@ -8950,6 +9072,11 @@ static const struct genl_ops lnet_genl_ops[] = {
 #endif
                .done           = lnet_peer_dist_show_done,
        },
+       {
+               .cmd            = LNET_CMD_PEER_FAIL,
+               .flags          = GENL_ADMIN_PERM,
+               .doit           = lnet_peer_fail_cmd,
+       },
 };
 
 static struct genl_family lnet_family = {
index ad6504b..9804cac 100644 (file)
@@ -168,15 +168,13 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
 }
 
 int
-lnet_fail_nid(lnet_nid_t nid4, unsigned int threshold)
+lnet_fail_nid(struct lnet_nid *nid, unsigned int threshold)
 {
        struct lnet_test_peer *tp;
        struct list_head *el;
        struct list_head *next;
-       struct lnet_nid nid;
        LIST_HEAD(cull);
 
-       lnet_nid4_to_nid(nid4, &nid);
        /* NB: use lnet_net_lock(0) to serialize operations on test peers */
        if (threshold != 0) {
                /* Adding a new entry */
@@ -184,7 +182,7 @@ lnet_fail_nid(lnet_nid_t nid4, unsigned int threshold)
                if (tp == NULL)
                        return -ENOMEM;
 
-               tp->tp_nid = nid;
+               tp->tp_nid = *nid;
                tp->tp_threshold = threshold;
 
                lnet_net_lock(0);
@@ -199,8 +197,8 @@ lnet_fail_nid(lnet_nid_t nid4, unsigned int threshold)
                tp = list_entry(el, struct lnet_test_peer, tp_list);
 
                if (tp->tp_threshold == 0 ||    /* needs culling anyway */
-                   LNET_NID_IS_ANY(&nid) ||    /* removing all entries */
-                   nid_same(&tp->tp_nid, &nid)) {      /* matched this one */
+                   LNET_NID_IS_ANY(nid) ||     /* removing all entries */
+                   nid_same(&tp->tp_nid, nid)) {       /* matched this one */
                        list_move(&tp->tp_list, &cull);
                }
        }
index dce10e6..37ee203 100644 (file)
@@ -1621,12 +1621,176 @@ int jt_ptl_mynid(int argc, char **argv)
        return 0;
 }
 
+int yaml_fail_nid(struct lnet_nid *nid, unsigned int threshold)
+{
+       const char *nidstr;
+       yaml_emitter_t request;
+       yaml_parser_t reply;
+       yaml_event_t event;
+       struct nl_sock *sk;
+       int rc;
+
+       /* Create Netlink emitter to send request to kernel */
+       sk = nl_socket_alloc();
+       if (!sk) {
+               return -EOPNOTSUPP;
+       }
+
+       /* Setup parser to recieve Netlink packets */
+       rc = yaml_parser_initialize(&reply);
+       if (rc == 0) {
+               yaml_parser_log_error(&reply, stderr, NULL);
+               nl_socket_free(sk);
+               return -EOPNOTSUPP;
+       }
+
+       rc = yaml_parser_set_input_netlink(&reply, sk, false);
+       if (rc == 0)
+               goto free_reply;
+
+       rc = yaml_emitter_initialize(&request);
+       if (rc == 0)
+               goto emitter_error;
+
+       rc = yaml_emitter_set_output_netlink(&request, sk, LNET_GENL_NAME,
+                                            LNET_GENL_VERSION,
+                                            LNET_CMD_PEER_FAIL, 0);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_emitter_open(&request);
+       yaml_document_start_event_initialize(&event, NULL, NULL, NULL, 0);
+       rc = yaml_emitter_emit(&request, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_mapping_start_event_initialize(&event, NULL,
+                                           (yaml_char_t *)YAML_MAP_TAG,
+                                           1, YAML_ANY_MAPPING_STYLE);
+       rc = yaml_emitter_emit(&request, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_scalar_event_initialize(&event, NULL,
+                                    (yaml_char_t *)YAML_STR_TAG,
+                                    (yaml_char_t *)"peer",
+                                    strlen("peer"), 1, 0,
+                                    YAML_PLAIN_SCALAR_STYLE);
+       rc = yaml_emitter_emit(&request, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_sequence_start_event_initialize(&event, NULL,
+                                            (yaml_char_t *)YAML_SEQ_TAG,
+                                            1, YAML_BLOCK_SEQUENCE_STYLE);
+       rc = yaml_emitter_emit(&request, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_mapping_start_event_initialize(&event, NULL,
+                                           (yaml_char_t *)YAML_MAP_TAG,
+                                           1, YAML_BLOCK_MAPPING_STYLE);
+       rc = yaml_emitter_emit(&request, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_scalar_event_initialize(&event, NULL,
+                                    (yaml_char_t *)YAML_STR_TAG,
+                                    (yaml_char_t *)"nid",
+                                    strlen("nid"), 1, 0,
+                                    YAML_PLAIN_SCALAR_STYLE);
+       rc = yaml_emitter_emit(&request, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       nidstr = libcfs_nidstr(nid);
+       yaml_scalar_event_initialize(&event, NULL,
+                                    (yaml_char_t *)YAML_STR_TAG,
+                                    (yaml_char_t *)nidstr,
+                                    strlen(nidstr), 1, 0,
+                                    YAML_PLAIN_SCALAR_STYLE);
+       rc = yaml_emitter_emit(&request, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       if (threshold != LNET_MD_THRESH_INF) {
+               char time[INT_STRING_LEN];
+
+               yaml_scalar_event_initialize(&event, NULL,
+                                            (yaml_char_t *)YAML_STR_TAG,
+                                            (yaml_char_t *)"threshold",
+                                            strlen("threshold"), 1, 0,
+                                            YAML_PLAIN_SCALAR_STYLE);
+               rc = yaml_emitter_emit(&request, &event);
+               if (rc == 0)
+                       goto emitter_error;
+
+               snprintf(time, sizeof(time), "%d", threshold);
+               yaml_scalar_event_initialize(&event, NULL,
+                                            (yaml_char_t *)YAML_INT_TAG,
+                                            (yaml_char_t *)time,
+                                            strlen(time), 1, 0,
+                                            YAML_PLAIN_SCALAR_STYLE);
+               rc = yaml_emitter_emit(&request, &event);
+               if (rc == 0)
+                       goto emitter_error;
+       }
+
+       yaml_mapping_end_event_initialize(&event);
+       rc = yaml_emitter_emit(&request, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_sequence_end_event_initialize(&event);
+       rc = yaml_emitter_emit(&request, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_mapping_end_event_initialize(&event);
+       rc = yaml_emitter_emit(&request, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_document_end_event_initialize(&event, 0);
+       rc = yaml_emitter_emit(&request, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       rc = yaml_emitter_close(&request);
+emitter_error:
+       if (rc == 0) {
+               yaml_emitter_log_error(&request, stderr);
+               rc = -EINVAL;
+       } else {
+               yaml_document_t errmsg;
+
+               rc = yaml_parser_load(&reply, &errmsg);
+               if (rc == 0) {
+                       const char *msg = yaml_parser_get_reader_error(&reply);
+
+                       fprintf(stderr, "IOC_LIBCFS_FAIL_NID failed: %s\n",
+                               msg);
+                       rc = -EINVAL;
+               }
+               yaml_document_delete(&errmsg);
+       }
+       yaml_emitter_delete(&request);
+free_reply:
+       if (rc == 0)
+               yaml_parser_log_error(&reply, stderr, NULL);
+       yaml_parser_delete(&reply);
+       nl_socket_free(sk);
+
+       return rc == 1 ? 0 : rc;
+}
+
 int
 jt_ptl_fail_nid(int argc, char **argv)
 {
        int rc;
-       lnet_nid_t nid;
+       lnet_nid_t nid4;
        int threshold;
+       struct lnet_nid nid;
        struct libcfs_ioctl_data data;
 
        if (argc < 2 || argc > 3) {
@@ -1635,7 +1799,7 @@ jt_ptl_fail_nid(int argc, char **argv)
                return 0;
        }
 
-       if (!libcfs_str2anynid(&nid, argv[1])) {
+       if (!libcfs_str2anynid(&nid4, argv[1])) {
                fprintf(stderr, "Can't parse nid \"%s\"\n", argv[1]);
                return -1;
        }
@@ -1647,8 +1811,13 @@ jt_ptl_fail_nid(int argc, char **argv)
                return -1;
        }
 
+       lnet_nid4_to_nid(nid4, &nid);
+       rc = yaml_fail_nid(&nid, threshold);
+       if (rc <= 0 && rc != -EOPNOTSUPP)
+               return rc;
+
        LIBCFS_IOC_INIT(data);
-       data.ioc_nid = nid;
+       data.ioc_nid = nid4;
        data.ioc_count = threshold;
 
        rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_FAIL_NID, &data);