return rc;
}
+struct lnet_genl_debug_recovery_list {
+ unsigned int lgdrl_index;
+ unsigned int lgdrl_count;
+ unsigned int lgdrl_len;
+ struct ln_key_list *lgdrl_keys;
+ enum lnet_health_type lgdrl_type;
+ GENRADIX(struct lnet_nid) lgdrl_nids;
+};
+
+static inline struct lnet_genl_debug_recovery_list *
+lnet_debug_recovery_dump_ctx(struct netlink_callback *cb)
+{
+ return (struct lnet_genl_debug_recovery_list *)cb->args[0];
+}
+
+static int lnet_debug_recovery_show_done(struct netlink_callback *cb)
+{
+ struct lnet_genl_debug_recovery_list *drlist;
+
+ ENTRY;
+ drlist = lnet_debug_recovery_dump_ctx(cb);
+ if (drlist) {
+ if (drlist->lgdrl_keys) {
+ int i;
+
+ for (i = 1; i < drlist->lgdrl_count; i++) {
+ int idx = i + LNET_DBG_RECOV_ATTR_MAX;
+ struct ln_key_props *props;
+
+ props = &drlist->lgdrl_keys->lkl_list[idx];
+ kfree(props->lkp_value);
+ }
+ LIBCFS_FREE(drlist->lgdrl_keys,
+ drlist->lgdrl_len);
+ }
+ genradix_free(&drlist->lgdrl_nids);
+ CFS_FREE_PTR(drlist);
+ }
+
+ cb->args[0] = 0;
+
+ RETURN(0);
+}
+
+static int lnet_debug_recovery_show_start(struct netlink_callback *cb)
+{
+ struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
+ struct netlink_ext_ack *extack = NULL;
+ struct nlattr *params;
+ struct nlattr *entry;
+ struct lnet_genl_debug_recovery_list *drlist;
+ enum lnet_health_type type = -1;
+ struct lnet_nid *nid;
+ int rem, rc = 0;
+ int msg_len;
+
+ ENTRY;
+#ifdef HAVE_NL_DUMP_WITH_EXT_ACK
+ extack = cb->extack;
+#endif
+ msg_len = genlmsg_len(gnlh);
+ if (!msg_len) {
+ NL_SET_ERR_MSG(extack, "No configuration");
+ RETURN(-ENOMSG);
+ }
+
+ params = genlmsg_data(gnlh);
+ if (!(nla_type(params) & LN_SCALAR_ATTR_LIST)) {
+ NL_SET_ERR_MSG(extack, "invalid configuration");
+ RETURN(-EINVAL);
+ }
+
+ nla_for_each_attr(entry, params, msg_len, rem) {
+ s64 tmp;
+
+ if (nla_type(entry) != LN_SCALAR_ATTR_VALUE ||
+ nla_strcmp(entry, "queue_type") != 0)
+ continue;
+
+ rc = nla_extract_val(&entry, &rem, LN_SCALAR_ATTR_INT_VALUE,
+ (void *)&tmp, sizeof(tmp),
+ extack);
+ if (rc < 0)
+ GOTO(report_error, rc);
+ type = tmp;
+ }
+ CDEBUG(D_NET, "Got queue_type: %d\n", type);
+
+ CFS_ALLOC_PTR(drlist);
+ if (!drlist) {
+ NL_SET_ERR_MSG(extack, "No memory for recovery list");
+ RETURN(-ENOMEM);
+ }
+
+ genradix_init(&drlist->lgdrl_nids);
+ drlist->lgdrl_index = 0;
+ drlist->lgdrl_count = 0;
+ drlist->lgdrl_type = type;
+ cb->args[0] = (long)drlist;
+
+ rc = -ENOENT;
+ lnet_net_lock(LNET_LOCK_EX);
+ if (type == LNET_HEALTH_TYPE_LOCAL_NI) {
+ struct lnet_ni *ni;
+
+ list_for_each_entry(ni, &the_lnet.ln_mt_localNIRecovq,
+ ni_recovery) {
+ CDEBUG(D_NET, "nid: %s\n", libcfs_nidstr(&ni->ni_nid));
+ nid = genradix_ptr_alloc(&drlist->lgdrl_nids,
+ drlist->lgdrl_count++,
+ GFP_ATOMIC);
+ if (!nid)
+ GOTO(report_error_unlock, rc = -ENOMEM);
+
+ *nid = ni->ni_nid;
+ rc = 0;
+ }
+ } else if (type == LNET_HEALTH_TYPE_PEER_NI) {
+ struct lnet_peer_ni *lpni;
+
+ list_for_each_entry(lpni, &the_lnet.ln_mt_peerNIRecovq,
+ lpni_recovery) {
+ CDEBUG(D_NET, "nid: %s\n",
+ libcfs_nidstr(&lpni->lpni_nid));
+ nid = genradix_ptr_alloc(&drlist->lgdrl_nids,
+ drlist->lgdrl_count++,
+ GFP_ATOMIC);
+ if (!nid)
+ GOTO(report_error_unlock, rc = -ENOMEM);
+
+ *nid = lpni->lpni_nid;
+ rc = 0;
+ }
+ }
+report_error_unlock:
+ lnet_net_unlock(LNET_LOCK_EX);
+report_error:
+ if (rc < 0)
+ lnet_debug_recovery_show_done(cb);
+
+ RETURN(rc);
+}
+
+static const struct ln_key_list debug_recovery_attr_list = {
+ .lkl_maxattr = LNET_DBG_RECOV_ATTR_MAX,
+ .lkl_list = {
+ [LNET_DBG_RECOV_ATTR_HDR] = {
+ .lkp_key_format = LNKF_MAPPING,
+ .lkp_data_type = NLA_NUL_STRING,
+ },
+ [LNET_DBG_RECOV_ATTR_NID] = {
+ .lkp_value = "nid-0",
+ .lkp_data_type = NLA_STRING,
+ },
+ },
+};
+
+static int lnet_debug_recovery_show_dump(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ struct lnet_genl_debug_recovery_list *drlist;
+#ifdef HAVE_NL_PARSE_WITH_EXT_ACK
+ struct netlink_ext_ack *extack = NULL;
+#endif
+ int portid = NETLINK_CB(cb->skb).portid;
+ int seq = cb->nlh->nlmsg_seq;
+ int rc = 0;
+ void *hdr;
+ int idx;
+
+ ENTRY;
+#ifdef HAVE_NL_DUMP_WITH_EXT_ACK
+ extack = cb->extack;
+#endif
+ drlist = lnet_debug_recovery_dump_ctx(cb);
+ if (!drlist->lgdrl_count) {
+ NL_SET_ERR_MSG(extack, "No NIDs in recovery");
+ GOTO(send_error, rc = -ENOENT);
+ }
+
+ idx = drlist->lgdrl_index;
+ if (!idx) {
+ unsigned int count = debug_recovery_attr_list.lkl_maxattr;
+ const struct ln_key_list *all[] = { NULL, NULL };
+ size_t len = sizeof(struct ln_key_list);
+ struct ln_key_list *keys;
+ int i;
+
+ count += drlist->lgdrl_count - 1;
+ len += sizeof(struct ln_key_props) * count;
+ LIBCFS_ALLOC(keys, len);
+ if (!keys) {
+ NL_SET_ERR_MSG(extack,
+ "key list allocation failure");
+ GOTO(send_error, rc = -ENOMEM);
+ }
+ /* Set initial values */
+ *keys = debug_recovery_attr_list;
+ if (drlist->lgdrl_type == LNET_HEALTH_TYPE_LOCAL_NI) {
+ keys->lkl_list[LNET_DBG_RECOV_ATTR_HDR].lkp_value =
+ "Local NI recovery";
+ } else {
+ keys->lkl_list[LNET_DBG_RECOV_ATTR_HDR].lkp_value =
+ "Peer NI recovery";
+ }
+ keys->lkl_maxattr = count;
+
+ for (i = 1; i < drlist->lgdrl_count; i++) {
+ keys->lkl_list[LNET_DBG_RECOV_ATTR_MAX + i].lkp_data_type =
+ NLA_STRING;
+ keys->lkl_list[LNET_DBG_RECOV_ATTR_MAX + i].lkp_value =
+ kasprintf(GFP_ATOMIC, "nid-%u", i);
+ }
+ /* memory cleaned up is done in lnet_debug_recovery_show_done */
+ drlist->lgdrl_keys = keys;
+ drlist->lgdrl_len = len;
+
+ all[0] = keys;
+ rc = lnet_genl_send_scalar_list(msg, portid, seq,
+ &lnet_family,
+ NLM_F_CREATE | NLM_F_MULTI,
+ LNET_CMD_DBG_RECOV, all);
+ if (rc < 0) {
+ NL_SET_ERR_MSG(extack, "failed to send key table");
+ GOTO(send_error, rc);
+ }
+ }
+
+ hdr = genlmsg_put(msg, portid, seq, &lnet_family,
+ NLM_F_MULTI, LNET_CMD_DBG_RECOV);
+ if (!hdr) {
+ NL_SET_ERR_MSG(extack, "failed to send values");
+ genlmsg_cancel(msg, hdr);
+ GOTO(send_error, rc = -EMSGSIZE);
+ }
+
+ while (idx < drlist->lgdrl_count) {
+ struct lnet_nid *nid;
+
+ if (idx == 1)
+ nla_put_string(msg, LNET_DBG_RECOV_ATTR_HDR, "");
+
+ nid = genradix_ptr(&drlist->lgdrl_nids, idx++);
+ CDEBUG(D_NET, "nid: %s\n", libcfs_nidstr(nid));
+ nla_put_string(msg, LNET_DBG_RECOV_ATTR_NID + idx - 1,
+ libcfs_nidstr(nid));
+ }
+ genlmsg_end(msg, hdr);
+
+ drlist->lgdrl_index = idx;
+send_error:
+ RETURN(lnet_nl_send_error(cb->skb, portid, seq, rc));
+}
+
+#ifndef HAVE_NETLINK_CALLBACK_START
+static int lnet_old_debug_recovery_show_dump(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ if (!cb->args[0]) {
+ int rc = lnet_debug_recovery_show_start(cb);
+
+ if (rc < 0)
+ return lnet_nl_send_error(cb->skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ rc);
+ }
+
+ return lnet_debug_recovery_show_dump(msg, cb);
+}
+#endif
+
static const struct genl_multicast_group lnet_mcast_grps[] = {
{ .name = "ip2net", },
{ .name = "net", },
{ .name = "ping", },
{ .name = "discover", },
{ .name = "cpt-of-nid", },
+ { .name = "dbg-recov", },
};
static const struct genl_ops lnet_genl_ops[] = {
.flags = GENL_ADMIN_PERM,
.doit = lnet_peer_fail_cmd,
},
+ {
+ .cmd = LNET_CMD_DBG_RECOV,
+ .flags = GENL_ADMIN_PERM,
+#ifdef HAVE_NETLINK_CALLBACK_START
+ .start = lnet_debug_recovery_show_start,
+ .dumpit = lnet_debug_recovery_show_dump,
+#else
+ .dumpit = lnet_old_debug_recovery_show_dump,
+#endif
+ .done = lnet_debug_recovery_show_done,
+ },
};
static struct genl_family lnet_family = {
yaml_lnet_config_peer_ni_healthv);
}
+static int yaml_debug_recovery(enum lnet_health_type type)
+{
+ yaml_parser_t setup, reply;
+ yaml_document_t results;
+ yaml_emitter_t output;
+ const char *msg = NULL;
+ struct nl_sock *sk;
+ char *config;
+ int rc = 0;
+
+ switch (type) {
+ case LNET_HEALTH_TYPE_LOCAL_NI:
+ config = "dbg-recov:\n queue_type: 0\n";
+ break;
+ case LNET_HEALTH_TYPE_PEER_NI:
+ config = "dbg-recov:\n queue_type: 1\n";
+ break;
+ default:
+ rc = -EINVAL;
+ break;
+ }
+ if (rc < 0)
+ return rc;
+
+ /* Initialize configuration parser */
+ rc = yaml_parser_initialize(&setup);
+ if (rc == 0) {
+ yaml_parser_log_error(&setup, stderr, NULL);
+ yaml_parser_delete(&setup);
+ return -EOPNOTSUPP;
+ }
+
+ yaml_parser_set_input_string(&setup, (unsigned char *)config,
+ strlen(config));
+ rc = yaml_parser_load(&setup, &results);
+ if (rc == 0) {
+ yaml_parser_log_error(&setup, stderr, NULL);
+ yaml_parser_delete(&setup);
+ return -EOPNOTSUPP;
+ }
+ yaml_parser_delete(&setup);
+
+ /* Create Netlink emitter to send request to kernel */
+ sk = nl_socket_alloc();
+ if (!sk) {
+ yaml_document_delete(&results);
+ return -EOPNOTSUPP;
+ }
+
+ /* Setup parser to recieve Netlink packets */
+ rc = yaml_parser_initialize(&reply);
+ if (rc == 0) {
+ yaml_document_delete(&results);
+ nl_socket_free(sk);
+ return -EOPNOTSUPP;
+ }
+
+ rc = yaml_parser_set_input_netlink(&reply, sk, false);
+ if (rc == 0)
+ goto free_reply;
+
+ yaml_emitter_initialize(&output);
+ rc = yaml_emitter_set_output_netlink(&output, sk, LNET_GENL_NAME,
+ LNET_GENL_VERSION,
+ LNET_CMD_DBG_RECOV, NLM_F_DUMP);
+ if (rc == 1) /* 1 is success */
+ rc = yaml_emitter_dump(&output, &results);
+ if (rc == 0) {
+ yaml_emitter_log_error(&output, stderr);
+ rc = -EINVAL;
+ } else {
+ yaml_document_t errmsg;
+
+ rc = yaml_parser_load(&reply, &errmsg);
+ if (rc == 1) {
+ yaml_emitter_t debug;
+
+ rc = yaml_emitter_initialize(&debug);
+ if (rc == 1) {
+ yaml_emitter_set_indent(&debug,
+ LNET_DEFAULT_INDENT);
+ yaml_emitter_set_output_file(&debug,
+ stdout);
+ rc = yaml_emitter_dump(&debug, &errmsg);
+ }
+ yaml_emitter_delete(&debug);
+ } else {
+ msg = yaml_parser_get_reader_error(&reply);
+ if (errno == -ENOENT)
+ rc = 1;
+ }
+ yaml_document_delete(&errmsg);
+ }
+ yaml_emitter_delete(&output);
+free_reply:
+ if (rc == 0) {
+ if (!msg)
+ msg = yaml_parser_get_reader_error(&reply);
+
+ fprintf(stdout, "Operation failed: %s\n", msg);
+ }
+ yaml_parser_delete(&reply);
+ nl_socket_free(sk);
+
+ return rc == 1 ? 0 : rc;
+}
+
static int jt_show_recovery(int argc, char **argv)
{
int rc, opt;
static const struct option long_options[] = {
{ .name = "local", .has_arg = no_argument, .val = 'l' },
{ .name = "peer", .has_arg = no_argument, .val = 'p' },
- { .name = NULL } };
+ { .name = NULL }
+ };
+ enum lnet_health_type type = -1;
rc = check_cmd(debug_cmds, "debug", "recovery", 0, argc, argv);
if (rc)
long_options, NULL)) != -1) {
switch (opt) {
case 'l':
- rc = lustre_lnet_show_local_ni_recovq(-1, &show_rc, &err_rc);
+ type = LNET_HEALTH_TYPE_LOCAL_NI;
break;
case 'p':
- rc = lustre_lnet_show_peer_ni_recovq(-1, &show_rc, &err_rc);
+ type = LNET_HEALTH_TYPE_PEER_NI;
break;
default:
return 0;
}
}
+ rc = yaml_debug_recovery(type);
+ if (rc <= 0) {
+ if (rc == -EOPNOTSUPP)
+ goto old_api;
+ return rc;
+ }
+old_api:
+ switch (type) {
+ case LNET_HEALTH_TYPE_LOCAL_NI:
+ rc = lustre_lnet_show_local_ni_recovq(-1, &show_rc, &err_rc);
+ break;
+ case LNET_HEALTH_TYPE_PEER_NI:
+ rc = lustre_lnet_show_peer_ni_recovq(-1, &show_rc, &err_rc);
+ break;
+ default:
+ rc = LUSTRE_CFG_RC_BAD_PARAM;
+ break;
+ }
+
if (rc != LUSTRE_CFG_RC_NO_ERR)
cYAML_print_tree2file(stderr, err_rc);
else if (show_rc)