Whamcloud - gitweb
LU-10391 lnet: handle discovery with Netlink 53/50253/3
authorJames Simmons <jsimmons@infradead.org>
Mon, 23 Oct 2023 14:18:51 +0000 (10:18 -0400)
committerOleg Drokin <green@whamcloud.com>
Wed, 8 Nov 2023 21:59:15 +0000 (21:59 +0000)
Move the LNet discover feature to the Netlink API. This change
enables the detection of remote LNet setups using large NID
addresses. We treat LNet discover as a ping doit function since
the output is nearly identical to pings. Returned are successes
as well as failed attempts to discover the requested NIDs.

Test-Parameters: trivial testlist=sanity-lnet
Change-Id: Id0eb4adcb4561cfae96040086aae85d6ff804259
Signed-off-by: James Simmons <jsimmons@infradead.org>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50253
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/include/lnet/lib-types.h
lnet/lnet/api-ni.c
lnet/utils/lnetctl.c

index 687e7ca..d68ee29 100644 (file)
@@ -457,6 +457,34 @@ struct lnet_net {
  * the differences.
  */
 
+/** enum lnet_err_atrrs                      - LNet error netlink properties
+ *                                     For LNet request of multiple items
+ *                                     sometimes those items exist and
+ *                                     others don't. In the case the item
+ *                                     item doesn't exist we return the
+ *                                     error state.
+ *
+ * @LNET_ERR_ATTR_UNSPEC:              unspecified attribute to catch errors
+ *
+ * @LNET_ERR_ATTR_HDR:                 Name of the error header
+ *                                     (NLA_NUL_STRING)
+ * @LNET_ERR_ATTR_TYPE:                        Which LNet function since error is for
+ *                                     (NLA_STRING)
+ * @LNET_ERR_TYPE_ERRNO:               Error code for failure (NLA_S16)
+ * @LNET_ERR_DESCR:                    Complete error message (NLA_STRING)
+ */
+enum lnet_err_attrs {
+       LNET_ERR_ATTR_UNSPEC = 0,
+
+       LNET_ERR_ATTR_HDR,
+       LNET_ERR_ATTR_TYPE,
+       LNET_ERR_ATTR_ERRNO,
+       LNET_ERR_ATTR_DESCR,
+       __LNET_ERR_ATTR_MAX_PLUS_ONE,
+};
+
+#define LNET_ERR_ATTR_MAX (__LNET_ERR_ATTR_MAX_PLUS_ONE - 1)
+
 /** enum lnet_net_attrs                      - LNet NI netlink properties
  *                                     attributes that describe LNet 'NI'
  *                                     These values are used to piece together
index 49dc3b7..3867c8a 100644 (file)
@@ -294,6 +294,7 @@ static atomic_t lnet_dlc_seq_no = ATOMIC_INIT(0);
 struct lnet_fail_ping {
        struct lnet_processid           lfp_id;
        int                             lfp_errno;
+       char                            lfp_msg[256];
 };
 
 struct lnet_genl_ping_list {
@@ -310,8 +311,8 @@ static int lnet_ping(struct lnet_processid *id, struct lnet_nid *src_nid,
                     signed long timeout, struct lnet_genl_ping_list *plist,
                     int n_ids);
 
-static int lnet_discover(struct lnet_process_id id, __u32 force,
-                        struct lnet_process_id __user *ids, int n_ids);
+static int lnet_discover(struct lnet_processid *id, u32 force,
+                        struct lnet_genl_ping_list *dlists);
 
 static int
 sensitivity_set(const char *val, cfs_kernel_param_arg_t *kp)
@@ -4612,17 +4613,50 @@ report_ping_err:
 
        case IOC_LIBCFS_DISCOVER: {
                struct lnet_ioctl_ping_data *discover = arg;
+               struct lnet_process_id __user *ids;
+               struct lnet_genl_ping_list dlists;
+               struct lnet_processid id;
                struct lnet_peer *lp;
+               int count, i;
+
+               if (discover->ping_count <= 0)
+                       return -EINVAL;
+
+               genradix_init(&dlists.lgpl_list);
+               /* If the user buffer has more space than the lnet_interfaces_max,
+                * then only fill it up to lnet_interfaces_max.
+                */
+               if (discover->ping_count > lnet_interfaces_max)
+                       discover->ping_count = lnet_interfaces_max;
 
-               rc = lnet_discover(discover->ping_id, discover->op_param,
-                                  discover->ping_buf,
-                                  discover->ping_count);
+               id.pid = discover->ping_id.pid;
+               lnet_nid4_to_nid(discover->ping_id.nid, &id.nid);
+               rc = lnet_discover(&id, discover->op_param, &dlists);
                if (rc < 0)
-                       return rc;
+                       goto report_discover_err;
+               count = rc;
+
+               ids = discover->ping_buf;
+               for (i = 0; i < count; i++) {
+                       struct lnet_processid *result;
+                       struct lnet_process_id tmpid;
+
+                       result = genradix_ptr(&dlists.lgpl_list, i);
+                       memset(&tmpid, 0, sizeof(tmpid));
+                       tmpid.pid = result->pid;
+                       tmpid.nid = lnet_nid_to_nid4(&result->nid);
+                       if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid))) {
+                               rc = -EFAULT;
+                               goto report_discover_err;
+                       }
+
+                       if (i >= discover->ping_count)
+                               break;
+               }
+               rc = 0;
 
                mutex_lock(&the_lnet.ln_api_mutex);
-               lnet_nid4_to_nid(discover->ping_id.nid, &nid);
-               lp = lnet_find_peer(&nid);
+               lp = lnet_find_peer(&id.nid);
                if (lp) {
                        discover->ping_id.nid =
                                lnet_nid_to_nid4(&lp->lp_primary_nid);
@@ -4631,8 +4665,10 @@ report_ping_err:
                }
                mutex_unlock(&the_lnet.ln_api_mutex);
 
-               discover->ping_count = rc;
-               return 0;
+               discover->ping_count = count;
+report_discover_err:
+               genradix_free(&dlists.lgpl_list);
+               return rc;
        }
 
        case IOC_LIBCFS_ADD_UDSP: {
@@ -7836,12 +7872,293 @@ static int lnet_old_ping_show_dump(struct sk_buff *msg,
 }
 #endif
 
+static const struct ln_key_list discover_err_props_list = {
+       .lkl_maxattr                    = LNET_ERR_ATTR_MAX,
+       .lkl_list                       = {
+               [LNET_ERR_ATTR_HDR]             = {
+                       .lkp_value              = "manage",
+                       .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
+                       .lkp_data_type          = NLA_NUL_STRING,
+               },
+               [LNET_ERR_ATTR_TYPE]            = {
+                       .lkp_value              = "discover",
+                       .lkp_data_type          = NLA_STRING,
+               },
+               [LNET_ERR_ATTR_ERRNO]           = {
+                       .lkp_value              = "errno",
+                       .lkp_data_type          = NLA_S16,
+               },
+               [LNET_ERR_ATTR_DESCR]           = {
+                       .lkp_value              = "descr",
+                       .lkp_data_type          = NLA_STRING,
+               },
+       },
+};
+
+static const struct ln_key_list discover_props_list = {
+       .lkl_maxattr                    = LNET_PING_ATTR_MAX,
+       .lkl_list                       = {
+               [LNET_PING_ATTR_HDR]            = {
+                       .lkp_value              = "discover",
+                       .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
+                       .lkp_data_type          = NLA_NUL_STRING,
+               },
+               [LNET_PING_ATTR_PRIMARY_NID]    = {
+                       .lkp_value              = "primary nid",
+                       .lkp_data_type          = NLA_STRING
+               },
+               [LNET_PING_ATTR_ERRNO]          = {
+                       .lkp_value              = "errno",
+                       .lkp_data_type          = NLA_S16
+               },
+               [LNET_PING_ATTR_MULTIRAIL]      = {
+                       .lkp_value              = "Multi-Rail",
+                       .lkp_data_type          = NLA_FLAG
+               },
+               [LNET_PING_ATTR_PEER_NI_LIST]   = {
+                       .lkp_value              = "peer_ni",
+                       .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
+                       .lkp_data_type          = NLA_NESTED
+               },
+       },
+};
+
+static int lnet_ping_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+       const struct ln_key_list *all[] = {
+               &discover_props_list, &ping_peer_ni_list, NULL
+       };
+       struct nlmsghdr *nlh = nlmsg_hdr(skb);
+       struct genlmsghdr *gnlh = nlmsg_data(nlh);
+       struct nlattr *params = genlmsg_data(gnlh);
+       struct lnet_genl_ping_list dlists;
+       int msg_len, rem, rc = 0, i;
+       bool clear_hdr = false;
+       struct sk_buff *reply;
+       struct nlattr *attr;
+       void *hdr = NULL;
+
+       msg_len = genlmsg_len(gnlh);
+       if (!msg_len) {
+               GENL_SET_ERR_MSG(info, "no configuration");
+               return -ENOMSG;
+       }
+
+       if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE)) {
+               GENL_SET_ERR_MSG(info, "only NLM_F_CREATE setting is allowed");
+               return -EINVAL;
+       }
+
+       reply = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+       if (!reply) {
+               GENL_SET_ERR_MSG(info,
+                                "fail to allocate reply");
+               return -ENOMEM;
+       }
+
+       genradix_init(&dlists.lgpl_failed);
+       dlists.lgpl_failed_count = 0;
+       genradix_init(&dlists.lgpl_list);
+       dlists.lgpl_list_count = 0;
+
+       rc = lnet_genl_send_scalar_list(reply, info->snd_portid,
+                                       info->snd_seq, &lnet_family,
+                                       NLM_F_CREATE | NLM_F_MULTI,
+                                       LNET_CMD_PING, all);
+       if (rc < 0) {
+               GENL_SET_ERR_MSG(info,
+                                "failed to send key table");
+               GOTO(report_err, rc);
+       }
+
+       nla_for_each_attr(attr, params, msg_len, rem) {
+               struct nlattr *nids;
+               int rem2;
+
+               /* We only care about the NID list to discover with */
+               if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
+                       continue;
+
+               nla_for_each_nested(nids, attr, rem2) {
+                       char nid[LNET_NIDSTR_SIZE + 1];
+                       struct lnet_processid id;
+                       struct nlattr *nid_list;
+                       struct lnet_peer *lp;
+                       ssize_t len;
+
+                       if (nla_type(nids) != LN_SCALAR_ATTR_VALUE)
+                               continue;
+
+                       memset(nid, 0, sizeof(nid));
+                       rc = nla_strscpy(nid, nids, sizeof(nid));
+                       if (rc < 0) {
+                               GENL_SET_ERR_MSG(info,
+                                                "failed to get NID");
+                               GOTO(report_err, rc);
+                       }
+
+                       len = libcfs_strid(&id, strim(nid));
+                       if (len < 0) {
+                               struct lnet_fail_ping *fail;
+
+                               fail = genradix_ptr_alloc(&dlists.lgpl_failed,
+                                                         dlists.lgpl_failed_count++,
+                                                         GFP_KERNEL);
+                               if (!fail) {
+                                       GENL_SET_ERR_MSG(info,
+                                                        "failed to allocate improper NID");
+                                       GOTO(report_err, rc = -ENOMEM);
+                               }
+                               memset(fail->lfp_msg, '\0', sizeof(fail->lfp_msg));
+                               snprintf(fail->lfp_msg, sizeof(fail->lfp_msg),
+                                        "cannot parse NID '%s'", strim(nid));
+                               fail->lfp_id = id;
+                               fail->lfp_errno = len;
+                               continue;
+                       }
+
+                       if (LNET_NID_IS_ANY(&id.nid))
+                               continue;
+
+                       rc = lnet_discover(&id,
+                                          info->nlhdr->nlmsg_flags & NLM_F_EXCL,
+                                          &dlists);
+                       if (rc < 0) {
+                               struct lnet_fail_ping *fail;
+
+                               fail = genradix_ptr_alloc(&dlists.lgpl_failed,
+                                                         dlists.lgpl_failed_count++,
+                                                         GFP_KERNEL);
+                               if (!fail) {
+                                       GENL_SET_ERR_MSG(info,
+                                                        "failed to allocate failed NID");
+                                       GOTO(report_err, rc = -ENOMEM);
+                               }
+                               memset(fail->lfp_msg, '\0', sizeof(fail->lfp_msg));
+                               snprintf(fail->lfp_msg, sizeof(fail->lfp_msg),
+                                        "failed to discover %s",
+                                        libcfs_nidstr(&id.nid));
+                               fail->lfp_id = id;
+                               fail->lfp_errno = rc;
+                               continue;
+                       }
+
+                       /* create the genetlink message header */
+                       hdr = genlmsg_put(reply, info->snd_portid, info->snd_seq,
+                                         &lnet_family, NLM_F_MULTI, LNET_CMD_PING);
+                       if (!hdr) {
+                               GENL_SET_ERR_MSG(info,
+                                                "failed to allocate hdr");
+                               GOTO(report_err, rc = -ENOMEM);
+                       }
+
+                       if (!clear_hdr) {
+                               nla_put_string(reply, LNET_PING_ATTR_HDR, "");
+                               clear_hdr = true;
+                       }
+
+                       lp = lnet_find_peer(&id.nid);
+                       if (lp) {
+                               nla_put_string(reply, LNET_PING_ATTR_PRIMARY_NID,
+                                              libcfs_nidstr(&lp->lp_primary_nid));
+                               if (lnet_peer_is_multi_rail(lp))
+                                       nla_put_flag(reply, LNET_PING_ATTR_MULTIRAIL);
+                               lnet_peer_decref_locked(lp);
+                       }
+
+                       nid_list = nla_nest_start(reply, LNET_PING_ATTR_PEER_NI_LIST);
+                       for (i = 0; i < dlists.lgpl_list_count; i++) {
+                               struct lnet_processid *found;
+                               struct nlattr *nid_attr;
+                               char *idstr;
+
+                               found = genradix_ptr(&dlists.lgpl_list, i);
+                               if (nid_is_lo0(&found->nid))
+                                       continue;
+
+                               nid_attr = nla_nest_start(reply, i + 1);
+                               if (id.pid == LNET_PID_LUSTRE)
+                                       idstr = libcfs_nidstr(&found->nid);
+                               else
+                                       idstr = libcfs_idstr(found);
+                               nla_put_string(reply, LNET_PING_PEER_NI_ATTR_NID, idstr);
+                               nla_nest_end(reply, nid_attr);
+                       }
+                       nla_nest_end(reply, nid_list);
+
+                       genlmsg_end(reply, hdr);
+               }
+       }
+
+       if (dlists.lgpl_failed_count) {
+               int flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI;
+               const struct ln_key_list *fail[] = {
+                       &discover_err_props_list, NULL
+               };
+
+               rc = lnet_genl_send_scalar_list(reply, info->snd_portid,
+                                               info->snd_seq, &lnet_family,
+                                               flags, LNET_CMD_PING, fail);
+               if (rc < 0) {
+                       GENL_SET_ERR_MSG(info,
+                                        "failed to send new key table");
+                       GOTO(report_err, rc);
+               }
+
+               for (i = 0; i < dlists.lgpl_failed_count; i++) {
+                       struct lnet_fail_ping *fail;
+
+                       hdr = genlmsg_put(reply, info->snd_portid, info->snd_seq,
+                                         &lnet_family, NLM_F_MULTI, LNET_CMD_PING);
+                       if (!hdr) {
+                               GENL_SET_ERR_MSG(info,
+                                                "failed to send failed values");
+                               GOTO(report_err, rc = -ENOMSG);
+                       }
+
+                       fail = genradix_ptr(&dlists.lgpl_failed, i);
+                       if (i == 0)
+                               nla_put_string(reply, LNET_ERR_ATTR_HDR, "");
+
+                       nla_put_string(reply, LNET_ERR_ATTR_TYPE, "\n");
+                       nla_put_s16(reply, LNET_ERR_ATTR_ERRNO,
+                                   fail->lfp_errno);
+                       nla_put_string(reply, LNET_ERR_ATTR_DESCR,
+                                      fail->lfp_msg);
+                       genlmsg_end(reply, hdr);
+               }
+       }
+
+       nlh = nlmsg_put(reply, info->snd_portid, info->snd_seq, NLMSG_DONE, 0,
+                       NLM_F_MULTI);
+       if (!nlh) {
+               genlmsg_cancel(reply, hdr);
+               GENL_SET_ERR_MSG(info,
+                                "failed to finish message");
+               GOTO(report_err, rc = -EMSGSIZE);
+       }
+
+report_err:
+       genradix_free(&dlists.lgpl_failed);
+       genradix_free(&dlists.lgpl_list);
+
+       if (rc < 0) {
+               genlmsg_cancel(reply, hdr);
+               nlmsg_free(reply);
+       } else {
+               rc = genlmsg_reply(reply, info);
+       }
+
+       return rc;
+}
+
 static const struct genl_multicast_group lnet_mcast_grps[] = {
        { .name =       "ip2net",       },
        { .name =       "net",          },
        { .name =       "peer",         },
        { .name =       "route",        },
        { .name =       "ping",         },
+       { .name =       "discover",     },
 };
 
 static const struct genl_ops lnet_genl_ops[] = {
@@ -7883,6 +8200,7 @@ static const struct genl_ops lnet_genl_ops[] = {
        },
        {
                .cmd            = LNET_CMD_PING,
+               .flags          = GENL_ADMIN_PERM,
 #ifdef HAVE_NETLINK_CALLBACK_START
                .start          = lnet_ping_show_start,
                .dumpit         = lnet_ping_show_dump,
@@ -7890,6 +8208,7 @@ static const struct genl_ops lnet_genl_ops[] = {
                .dumpit         = lnet_old_ping_show_dump,
 #endif
                .done           = lnet_ping_show_done,
+               .doit           = lnet_ping_cmd,
        },
 };
 
@@ -8159,39 +8478,23 @@ fail_ping_buffer_decref:
 }
 
 static int
-lnet_discover(struct lnet_process_id id4, __u32 force,
-             struct lnet_process_id __user *ids, int n_ids)
+lnet_discover(struct lnet_processid *pid, u32 force,
+             struct lnet_genl_ping_list *dlist)
 {
        struct lnet_peer_ni *lpni;
        struct lnet_peer_ni *p;
        struct lnet_peer *lp;
-       struct lnet_process_id *buf;
-       struct lnet_processid id;
        int cpt;
-       int i;
        int rc;
 
-       if (n_ids <= 0 ||
-           id4.nid == LNET_NID_ANY)
+       if (LNET_NID_IS_ANY(&pid->nid))
                return -EINVAL;
 
-       lnet_pid4_to_pid(id4, &id);
-       if (id.pid == LNET_PID_ANY)
-               id.pid = LNET_PID_LUSTRE;
-
-       /*
-        * If the user buffer has more space than the lnet_interfaces_max,
-        * then only fill it up to lnet_interfaces_max.
-        */
-       if (n_ids > lnet_interfaces_max)
-               n_ids = lnet_interfaces_max;
-
-       CFS_ALLOC_PTR_ARRAY(buf, n_ids);
-       if (!buf)
-               return -ENOMEM;
+       if (pid->pid == LNET_PID_ANY)
+               pid->pid = LNET_PID_LUSTRE;
 
        cpt = lnet_net_lock_current();
-       lpni = lnet_peerni_by_nid_locked(&id.nid, NULL, cpt);
+       lpni = lnet_peerni_by_nid_locked(&pid->nid, NULL, cpt);
        if (IS_ERR(lpni)) {
                rc = PTR_ERR(lpni);
                goto out;
@@ -8217,33 +8520,34 @@ lnet_discover(struct lnet_process_id id4, __u32 force,
         * and lookup the lpni again
         */
        lnet_peer_ni_decref_locked(lpni);
-       lpni = lnet_peer_ni_find_locked(&id.nid);
+       lpni = lnet_peer_ni_find_locked(&pid->nid);
        if (!lpni) {
                rc = -ENOENT;
                goto out;
        }
        lp = lpni->lpni_peer_net->lpn_peer;
 
-       i = 0;
+       dlist->lgpl_list_count = 0;
        p = NULL;
        while ((p = lnet_get_next_peer_ni_locked(lp, NULL, p)) != NULL) {
-               buf[i].pid = id.pid;
-               buf[i].nid = lnet_nid_to_nid4(&p->lpni_nid);
-               if (++i >= n_ids)
-                       break;
+               struct lnet_processid *id;
+
+               id = genradix_ptr_alloc(&dlist->lgpl_list,
+                                       dlist->lgpl_list_count++, GFP_KERNEL);
+               if (!id) {
+                       rc = -ENOMEM;
+                       goto out_decref;
+               }
+               id->pid = pid->pid;
+               id->nid = p->lpni_nid;
        }
-       rc = i;
+       rc = dlist->lgpl_list_count;
 
 out_decref:
        lnet_peer_ni_decref_locked(lpni);
 out:
        lnet_net_unlock(cpt);
 
-       if (rc >= 0)
-               if (copy_to_user(ids, buf, rc * sizeof(*buf)))
-                       rc = -EFAULT;
-       CFS_FREE_PTR_ARRAY(buf, n_ids);
-
        return rc;
 }
 
index 2088d5f..df1ac30 100644 (file)
@@ -966,6 +966,9 @@ static void yaml_lnet_print_error(int op, char *cmd, const char *errstr)
        case 0:
                flag = "del";
                break;
+       case -1:
+               flag = "manage";
+               break;
        case NLM_F_DUMP:
        default:
                flag = "show";
@@ -3979,6 +3982,283 @@ old_api:
        return rc;
 }
 
+static int yaml_lnet_ping_display(yaml_parser_t *reply)
+{
+       yaml_emitter_t debug;
+       bool done = false;
+       int rc, rc2 = 0;
+       long error = 0;
+
+       rc = yaml_emitter_initialize(&debug);
+       if (rc == 1)
+               yaml_emitter_set_output_file(&debug, stdout);
+       if (rc == 0)
+               goto emitter_error;
+
+       while (!done) {
+               yaml_event_t event;
+
+               rc = yaml_parser_parse(reply, &event);
+               if (rc == 0)
+                       goto report_reply_error;
+
+               if (event.type != YAML_SCALAR_EVENT) {
+                       rc = yaml_emitter_emit(&debug, &event);
+                       if (rc == 0)
+                               goto emitter_error;
+
+                       done = (event.type == YAML_DOCUMENT_END_EVENT);
+                       continue;
+               }
+
+               if (strcmp((char *)event.data.scalar.value, "errno") == 0) {
+                       rc = yaml_emitter_emit(&debug, &event);
+                       if (rc == 0)
+                               goto emitter_error;
+
+                       rc = yaml_parser_parse(reply, &event);
+                       if (rc == 0)
+                               goto report_reply_error;
+
+                       rc = parse_long((char *)event.data.scalar.value,
+                                       &error);
+                       if (rc != 0)
+                               goto report_reply_error;
+
+                       rc = yaml_emitter_emit(&debug, &event);
+                       if (rc == 0)
+                               goto emitter_error;
+
+                       rc2 = -1;
+               } else if (error != 0 &&
+                          strcmp((char *)event.data.scalar.value,
+                                 "descr") == 0) {
+                       rc = yaml_emitter_emit(&debug, &event);
+                       if (rc == 0)
+                               goto emitter_error;
+
+                       rc = yaml_parser_parse(reply, &event);
+                       if (rc == 0)
+                               goto report_reply_error;
+
+                       if (strncmp((char *)event.data.scalar.value,
+                                   "failed to ", strlen("failed to ")) == 0) {
+                               char err[256];
+
+                               snprintf(err, sizeof(err), "%s: %s",
+                                       (char *)event.data.scalar.value,
+                                       strerror(-error));
+                               yaml_scalar_event_initialize(&event, NULL,
+                                                            (yaml_char_t *)YAML_STR_TAG,
+                                                            (yaml_char_t *)err,
+                                                            strlen(err), 1, 0,
+                                                            YAML_PLAIN_SCALAR_STYLE);
+                       }
+                       rc = yaml_emitter_emit(&debug, &event);
+                       if (rc == 0)
+                               goto emitter_error;
+
+                       errno = 0;
+               } else {
+                       rc = yaml_emitter_emit(&debug, &event);
+                       if (rc == 0)
+                               goto emitter_error;
+               }
+       }
+emitter_error:
+       if (rc == 0)
+               yaml_emitter_log_error(&debug, stderr);
+report_reply_error:
+       yaml_emitter_delete(&debug);
+
+       return rc2 ? rc2 : rc;
+}
+
+static int yaml_lnet_ping(char *group, int timeout, char *src_nidstr,
+                         int start, int end, char **nids, int flags)
+{
+       struct nl_sock *sk = NULL;
+       const char *msg = NULL;
+       yaml_emitter_t output;
+       yaml_parser_t reply;
+       yaml_event_t event;
+       int rc, i;
+
+       /* Create Netlink emitter to send request to kernel */
+       sk = nl_socket_alloc();
+       if (!sk)
+               return -EOPNOTSUPP;
+
+       /* Setup parser to receive Netlink packets */
+       rc = yaml_parser_initialize(&reply);
+       if (rc == 0) {
+               nl_socket_free(sk);
+               return -EOPNOTSUPP;
+       }
+
+       rc = yaml_parser_set_input_netlink(&reply, sk, false);
+       if (rc == 0) {
+               msg = yaml_parser_get_reader_error(&reply);
+               goto free_reply;
+       }
+
+       /* Create Netlink emitter to send request to kernel */
+       rc = yaml_emitter_initialize(&output);
+       if (rc == 0) {
+               msg = "failed to initialize emitter";
+               goto free_reply;
+       }
+
+       rc = yaml_emitter_set_output_netlink(&output, sk, LNET_GENL_NAME,
+                                            LNET_GENL_VERSION, LNET_CMD_PING,
+                                            flags);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_emitter_open(&output);
+       yaml_document_start_event_initialize(&event, NULL, NULL, NULL, 0);
+       rc = yaml_emitter_emit(&output, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_mapping_start_event_initialize(&event, NULL,
+                                           (yaml_char_t *)YAML_MAP_TAG,
+                                           1, YAML_ANY_MAPPING_STYLE);
+       rc = yaml_emitter_emit(&output, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_scalar_event_initialize(&event, NULL,
+                                    (yaml_char_t *)YAML_STR_TAG,
+                                    (yaml_char_t *)group,
+                                    strlen(group), 1, 0,
+                                    YAML_PLAIN_SCALAR_STYLE);
+       rc = yaml_emitter_emit(&output, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_mapping_start_event_initialize(&event, NULL,
+                                           (yaml_char_t *)YAML_MAP_TAG,
+                                           1, YAML_ANY_MAPPING_STYLE);
+       rc = yaml_emitter_emit(&output, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       if (timeout != 1000 || src_nidstr) {
+               if (src_nidstr) {
+                       yaml_scalar_event_initialize(&event, NULL,
+                                                    (yaml_char_t *)YAML_STR_TAG,
+                                                    (yaml_char_t *)"source",
+                                                    strlen("source"), 1, 0,
+                                                    YAML_PLAIN_SCALAR_STYLE);
+                       rc = yaml_emitter_emit(&output, &event);
+                       if (rc == 0)
+                               goto emitter_error;
+
+                       yaml_scalar_event_initialize(&event, NULL,
+                                                    (yaml_char_t *)YAML_STR_TAG,
+                                                    (yaml_char_t *)src_nidstr,
+                                                    strlen(src_nidstr), 1, 0,
+                                                    YAML_PLAIN_SCALAR_STYLE);
+                       rc = yaml_emitter_emit(&output, &event);
+                       if (rc == 0)
+                               goto emitter_error;
+               }
+
+               if (timeout != 1000) {
+                       char time[23];
+
+                       yaml_scalar_event_initialize(&event, NULL,
+                                                    (yaml_char_t *)YAML_STR_TAG,
+                                                    (yaml_char_t *)"timeout",
+                                                    strlen("timeout"), 1, 0,
+                                                    YAML_PLAIN_SCALAR_STYLE);
+                       rc = yaml_emitter_emit(&output, &event);
+                       if (rc == 0)
+                               goto emitter_error;
+
+                       snprintf(time, sizeof(time), "%u", timeout);
+                       yaml_scalar_event_initialize(&event, NULL,
+                                                    (yaml_char_t *)YAML_INT_TAG,
+                                                    (yaml_char_t *)time,
+                                                    strlen(time), 1, 0,
+                                                    YAML_PLAIN_SCALAR_STYLE);
+                       rc = yaml_emitter_emit(&output, &event);
+                       if (rc == 0)
+                               goto emitter_error;
+               }
+       }
+
+       yaml_scalar_event_initialize(&event, NULL,
+                                    (yaml_char_t *)YAML_STR_TAG,
+                                    (yaml_char_t *)"nids",
+                                    strlen("nids"), 1, 0,
+                                    YAML_PLAIN_SCALAR_STYLE);
+       rc = yaml_emitter_emit(&output, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_sequence_start_event_initialize(&event, NULL,
+                                            (yaml_char_t *)YAML_SEQ_TAG,
+                                            1, YAML_FLOW_SEQUENCE_STYLE);
+       rc = yaml_emitter_emit(&output, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       for (i = start; i < end; i++) {
+               yaml_scalar_event_initialize(&event, NULL,
+                                            (yaml_char_t *)YAML_STR_TAG,
+                                            (yaml_char_t *)nids[i],
+                                            strlen(nids[i]), 1, 0,
+                                            YAML_PLAIN_SCALAR_STYLE);
+               rc = yaml_emitter_emit(&output, &event);
+               if (rc == 0)
+                       goto emitter_error;
+       }
+
+       yaml_sequence_end_event_initialize(&event);
+       rc = yaml_emitter_emit(&output, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_mapping_end_event_initialize(&event);
+       rc = yaml_emitter_emit(&output, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_mapping_end_event_initialize(&event);
+       rc = yaml_emitter_emit(&output, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       yaml_document_end_event_initialize(&event, 0);
+       rc = yaml_emitter_emit(&output, &event);
+       if (rc == 0)
+               goto emitter_error;
+
+       rc = yaml_emitter_close(&output);
+emitter_error:
+       if (rc == 0) {
+               yaml_emitter_log_error(&output, stderr);
+               rc = -EINVAL;
+       } else {
+               rc = yaml_lnet_ping_display(&reply);
+               if (rc == 0)
+                       msg = yaml_parser_get_reader_error(&reply);
+       }
+       yaml_emitter_delete(&output);
+free_reply:
+       if (rc == 0) {
+               yaml_lnet_print_error(-1, group, msg);
+               rc = -EINVAL;
+       }
+
+       yaml_parser_delete(&reply);
+       nl_socket_free(sk);
+
+       return rc == 1 ? 0 : rc;
+}
+
 static int jt_ping(int argc, char **argv)
 {
        struct cYAML *err_rc = NULL;
@@ -4034,19 +4314,22 @@ static int jt_discover(int argc, char **argv)
 {
        struct cYAML *err_rc = NULL;
        struct cYAML *show_rc = NULL;
+       int flags = NLM_F_CREATE;
        int force = 0;
        int rc = 0, opt;
-
        const char *const short_options = "fh";
        const struct option long_options[] = {
                { .name = "force",      .has_arg = no_argument, .val = 'f' },
                { .name = "help",       .has_arg = no_argument, .val = 'h' },
-               { .name = NULL } };
+               { .name = NULL }
+       };
 
        while ((opt = getopt_long(argc, argv, short_options,
                                  long_options, NULL)) != -1) {
                switch (opt) {
                case 'f':
+                       /* BSD treats NLM_F_CREATE | NLM_F_EXCL as an add */
+                       flags |= NLM_F_EXCL;
                        force = 1;
                        break;
                case 'h':
@@ -4064,6 +4347,13 @@ static int jt_discover(int argc, char **argv)
                return -1;
        }
 
+       rc = yaml_lnet_ping("discover", 1000, NULL, optind, argc, argv,
+                           flags);
+       if (rc <= 0) {
+               if (rc != -EOPNOTSUPP)
+                       return rc;
+       }
+
        for (; optind < argc; optind++)
                rc = lustre_lnet_discover_nid(argv[optind], force, -1, &show_rc,
                                              &err_rc);