Convert the fault injection list ioctls to a netlink implementation.
sanity-lnet tests that use fault injection can now be enabled for
large NIDs.
Test-Parameters: trivial testlist=sanity-lnet
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Change-Id: Ieaf9c01401fc0841c1e5805667531ba3455e8110
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53733
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
]) # LIBCFS_REFCOUNT_T
#
+# HAVE_NLA_PUT_U64_64BIT
+#
+# Kernel version 4.10 commit 73520786b0793c612ef4de3e9addb2ec411bea20
+# added nla_put_u64_64bit
+#
+AC_DEFUN([LIBCFS_SRC_NLA_PUT_U64_64BIT], [
+ LB2_LINUX_TEST_SRC([nla_put_u64_64bit], [
+ #include <net/genetlink.h>
+ ],[
+ nla_put_u64_64bit(NULL, 0, 0, 0)
+ ])
+])
+AC_DEFUN([LIBCFS_NLA_PUT_U64_64BIT], [
+ LB2_MSG_LINUX_TEST_RESULT([if 'nla_put_u64_64bit()' exists],
+ [nla_put_u64_64bit], [
+ AC_DEFINE(HAVE_NLA_PUT_U64_64BIT, 1,
+ ['nla_put_u64_64bit' is available])
+ ])
+]) # LIBCFS_NLA_PUT_U64_64BIT
+
+#
# Kernel version 4.12 commit 499118e966f1d2150bd66647c8932343c4e9a0b8
# introduce memalloc_noreclaim_{save,restore}
#
LIBCFS_SRC_RHASHTABLE_WALK_ENTER
# 4.10
LIBCFS_SRC_HOTPLUG_STATE_MACHINE
+ LIBCFS_SRC_NLA_PUT_U64_64BIT
# 4.11
LIBCFS_SRC_NL_EXT_ACK
LIBCFS_SRC_RHASHTABLE_LOOKUP_GET_INSERT_FAST
LIBCFS_RHASHTABLE_WALK_ENTER
# 4.10
LIBCFS_HOTPLUG_STATE_MACHINE
+ LIBCFS_NLA_PUT_U64_64BIT
# 4.11
LIBCFS_NL_EXT_ACK
LIBCFS_RHASHTABLE_LOOKUP_GET_INSERT_FAST
#define nla_strscpy nla_strlcpy
#endif /* HAVE_NLA_STRLCPY */
+#ifndef HAVE_NLA_PUT_U64_64BIT
+#define nla_put_u64_64bit(skb, type, value, padattr) \
+ nla_put_u64(skb, type, value)
+#endif
+
#ifndef HAVE_NL_PARSE_WITH_EXT_ACK
#define NL_SET_BAD_ATTR(extack, attr)
#define CFS_FAIL_MATCH_MD_NID 0xe001
#define CFS_FAIL_DELAY_MSG_FORWARD 0xe002
+#include <linux/generic-radix-tree.h>
#include <linux/netdevice.h>
#include <libcfs/libcfs.h>
} u;
};
+struct lnet_rule_properties {
+ struct lnet_fault_large_attr attr;
+ struct lnet_fault_stat stat;
+};
+
+struct lnet_genl_fault_rule_list {
+ unsigned int lgfrl_index;
+ unsigned int lgfrl_count;
+ u32 lgfrl_opc;
+ GENRADIX(struct lnet_rule_properties) lgfrl_list;
+};
+
int lnet_fault_ctl(int cmd, struct libcfs_ioctl_data *data);
int lnet_fault_init(void);
void lnet_fault_fini(void);
bool lnet_drop_rule_match(struct lnet_hdr *hdr, struct lnet_nid *local_nid,
enum lnet_msg_hstatus *hstatus);
-
+int lnet_drop_rule_collect(struct lnet_genl_fault_rule_list *rlist);
int lnet_delay_rule_add(struct lnet_fault_large_attr *attr);
int lnet_delay_rule_del(struct lnet_nid *src, struct lnet_nid *dst,
bool shutdown);
int lnet_delay_rule_list(int pos, struct lnet_fault_large_attr *attr,
struct lnet_fault_stat *stat);
+int lnet_delay_rule_collect(struct lnet_genl_fault_rule_list *rlist);
void lnet_delay_rule_reset(void);
void lnet_delay_rule_check(void);
bool lnet_delay_rule_match_locked(struct lnet_hdr *hdr, struct lnet_msg *msg);
/** enum lnet_debug_recovery_attr Attributes to report contents of
* the LNet health recovery queues
*
- * @LNET_DBG_RECOV_ATTR_UNSPEC Unspecified attribyute to catch
+ * @LNET_DBG_RECOV_ATTR_UNSPEC Unspecified attribute to catch
* errors
* @LNET_DBG_RECOV_ATTR_HDR Grouping for NI recovery queue
* (NLA_NUL_STRING)
#define LNET_DBG_RECOV_ATTR_MAX (__LNET_DBG_RECOV_ATTR_MAX_PLUS_ONE - 1)
+
+/** enum lnet_fault_rule_attr Attributes to report LNet fault
+ * injection.
+ *
+ * @LNET_FAULT_ATTR_UNSPEC Unspecified attribute to catch errors
+ * @LNET_FAULT_ATTR_PAD Pad attribute for 64b alignment
+ *
+ * @LNET_FAULT_ATTR_HDR Grouping for "fault"
+ * @LNET_FAULT_ATTR_FA_TYPE The type of fault injection rule. i.e.
+ * either a "drop" rule or a "delay" rule.
+ * @LNET_FAULT_ATTR_FA_SRC For a description of this field, and
+ * the ones below, refer to
+ * struct lnet_fault_attr
+ * @LNET_FAULT_ATTR_FA_DST
+ * @LNET_FAULT_ATTR_FA_PTL_MASK
+ * @LNET_FAULT_ATTR_FA_MSG_MASK
+ * @LNET_FAULT_ATTR_DA_RATE
+ * @LNET_FAULT_ATTR_DA_INTERVAL
+ * @LNET_FAULT_ATTR_DS_DROPPED
+ * @LNET_FAULT_ATTR_LA_RATE
+ * @LNET_FAULT_ATTR_LA_INTERVAL
+ * @LNET_FAULT_ATTR_LA_LATENCY
+ * @LNET_FAULT_ATTR_LS_DELAYED
+ * @LNET_FAULT_ATTR_FS_COUNT
+ * @LNET_FAULT_ATTR_FS_PUT
+ * @LNET_FAULT_ATTR_FS_ACK
+ * @LNET_FAULT_ATTR_FS_GET
+ * @LNET_FAULT_ATTR_FS_REPLY
+ */
+enum lnet_fault_rule_attr {
+ LNET_FAULT_ATTR_UNSPEC = 0,
+ LNET_FAULT_ATTR_PAD = LNET_FAULT_ATTR_UNSPEC,
+
+ LNET_FAULT_ATTR_HDR,
+ LNET_FAULT_ATTR_FA_TYPE,
+ LNET_FAULT_ATTR_FA_SRC,
+ LNET_FAULT_ATTR_FA_DST,
+ LNET_FAULT_ATTR_FA_PTL_MASK,
+ LNET_FAULT_ATTR_FA_MSG_MASK,
+ LNET_FAULT_ATTR_DA_RATE,
+ LNET_FAULT_ATTR_DA_INTERVAL,
+ LNET_FAULT_ATTR_DS_DROPPED,
+ LNET_FAULT_ATTR_LA_RATE,
+ LNET_FAULT_ATTR_LA_INTERVAL,
+ LNET_FAULT_ATTR_LA_LATENCY,
+ LNET_FAULT_ATTR_LS_DELAYED,
+ LNET_FAULT_ATTR_FS_COUNT,
+ LNET_FAULT_ATTR_FS_PUT,
+ LNET_FAULT_ATTR_FS_ACK,
+ LNET_FAULT_ATTR_FS_GET,
+ LNET_FAULT_ATTR_FS_REPLY,
+ __LNET_FAULT_ATTR_MAX_PLUS_ONE,
+};
+
+#define LNET_FAULT_ATTR_MAX (__LNET_FAULT_ATTR_MAX_PLUS_ONE - 1)
+
struct lnet_ni {
/* chain on the lnet_net structure */
struct list_head ni_netlist;
* @LNET_CMD_PEER_DIST: command to find distance between LNet peers
* @LNET_CMD_UDSP: command to manage LNet UDSP rules
* @LNET_CMD_PEER_FAIL: command to fail LNet peers
- * @LNET_CMD_DBG_RECOV: command to debug peers
+ * @LNET_CMD_DBG_RECOV: command to print recovery queues
+ * @LNET_CMD_FAULT: command to inject LNet message failures
*/
enum lnet_commands {
LNET_CMD_UNSPEC = 0,
LNET_CMD_UDSP = 9,
LNET_CMD_PEER_FAIL = 10,
LNET_CMD_DBG_RECOV = 11,
+ LNET_CMD_FAULT = 12,
__LNET_CMD_MAX_PLUS_ONE
};
}
#endif
+static inline struct lnet_genl_fault_rule_list *
+lnet_fault_dump_ctx(struct netlink_callback *cb)
+{
+ return (struct lnet_genl_fault_rule_list *)cb->args[0];
+}
+
+int lnet_fault_show_done(struct netlink_callback *cb)
+{
+ struct lnet_genl_fault_rule_list *rlist = lnet_fault_dump_ctx(cb);
+
+ ENTRY;
+ if (rlist) {
+ genradix_free(&rlist->lgfrl_list);
+ CFS_FREE_PTR(rlist);
+ }
+ cb->args[0] = 0;
+
+ RETURN(0);
+}
+
+int lnet_fault_show_start(struct netlink_callback *cb)
+{
+ struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
+ struct netlink_ext_ack *extack = NULL;
+ struct nlattr *params = genlmsg_data(gnlh);
+ struct lnet_genl_fault_rule_list *rlist;
+ int msg_len, rem, rc = 0;
+ struct nlattr *entry;
+ s64 opc = 0;
+
+ ENTRY;
+#ifdef HAVE_NL_DUMP_WITH_EXT_ACK
+ extack = cb->extack;
+#endif
+ msg_len = genlmsg_len(gnlh);
+ if (!msg_len) {
+ NL_SET_ERR_MSG(extack, "no configuration");
+ RETURN(-ENOMSG);
+ }
+
+ if (!(nla_type(params) & LN_SCALAR_ATTR_LIST)) {
+ NL_SET_ERR_MSG(extack, "invalid configuration");
+ RETURN(-EINVAL);
+ }
+
+ nla_for_each_attr(entry, params, msg_len, rem) {
+ if (nla_type(entry) != LN_SCALAR_ATTR_VALUE)
+ continue;
+
+ if (nla_strcmp(entry, "rule_type") == 0) {
+ rc = nla_extract_val(&entry, &rem,
+ LN_SCALAR_ATTR_INT_VALUE,
+ (void *)&opc, sizeof(opc), extack);
+ if (rc < 0)
+ GOTO(report_error, rc);
+ }
+ }
+
+ CDEBUG(D_NET, "Got opc %lld\n", opc);
+
+ if (opc != LNET_CTL_DROP_LIST && opc != LNET_CTL_DELAY_LIST) {
+ NL_SET_ERR_MSG(extack, "invalid operation");
+ GOTO(report_error, rc = -EINVAL);
+ }
+
+ CFS_ALLOC_PTR(rlist);
+ if (!rlist) {
+ NL_SET_ERR_MSG(extack, "No memory for rule list");
+ RETURN(-ENOMEM);
+ }
+
+ genradix_init(&rlist->lgfrl_list);
+ rlist->lgfrl_count = 0;
+ rlist->lgfrl_index = 0;
+ rlist->lgfrl_opc = opc;
+ cb->args[0] = (long)rlist;
+
+ rc = -ENOENT;
+ if (opc == LNET_CTL_DROP_LIST)
+ rc = lnet_drop_rule_collect(rlist);
+ else if (opc == LNET_CTL_DELAY_LIST)
+ rc = lnet_delay_rule_collect(rlist);
+report_error:
+ if (rc < 0)
+ lnet_fault_show_done(cb);
+
+ RETURN(rc);
+}
+
+static const struct ln_key_list fault_attr_list = {
+ .lkl_maxattr = LNET_FAULT_ATTR_MAX,
+ .lkl_list = {
+ [LNET_FAULT_ATTR_HDR] = {
+ .lkp_value = "fault",
+ .lkp_key_format = LNKF_SEQUENCE | LNKF_MAPPING,
+ .lkp_data_type = NLA_NUL_STRING,
+ },
+ [LNET_FAULT_ATTR_FA_TYPE] = {
+ .lkp_value = "rule_type",
+ .lkp_data_type = NLA_STRING
+ },
+ [LNET_FAULT_ATTR_FA_SRC] = {
+ .lkp_value = "fa_src",
+ .lkp_data_type = NLA_STRING
+ },
+ [LNET_FAULT_ATTR_FA_DST] = {
+ .lkp_value = "fa_dst",
+ .lkp_data_type = NLA_STRING
+ },
+ [LNET_FAULT_ATTR_FA_PTL_MASK] = {
+ .lkp_value = "fa_ptl_mask",
+ .lkp_data_type = NLA_U64
+ },
+ [LNET_FAULT_ATTR_FA_MSG_MASK] = {
+ .lkp_value = "fa_msg_mask",
+ .lkp_data_type = NLA_U32
+ },
+ [LNET_FAULT_ATTR_DA_RATE] = {
+ .lkp_value = "da_rate",
+ .lkp_data_type = NLA_U32
+ },
+ [LNET_FAULT_ATTR_DA_INTERVAL] = {
+ .lkp_value = "da_interval",
+ .lkp_data_type = NLA_U32
+ },
+ [LNET_FAULT_ATTR_DS_DROPPED] = {
+ .lkp_value = "ds_dropped",
+ .lkp_data_type = NLA_U64
+ },
+ [LNET_FAULT_ATTR_LA_RATE] = {
+ .lkp_value = "la_rate",
+ .lkp_data_type = NLA_U32
+ },
+ [LNET_FAULT_ATTR_LA_INTERVAL] = {
+ .lkp_value = "la_interval",
+ .lkp_data_type = NLA_U32
+ },
+ [LNET_FAULT_ATTR_LA_LATENCY] = {
+ .lkp_value = "la_latency",
+ .lkp_data_type = NLA_U32
+ },
+ [LNET_FAULT_ATTR_LS_DELAYED] = {
+ .lkp_value = "ls_delayed",
+ .lkp_data_type = NLA_U64
+ },
+ [LNET_FAULT_ATTR_FS_COUNT] = {
+ .lkp_value = "fs_count",
+ .lkp_data_type = NLA_U64
+ },
+ [LNET_FAULT_ATTR_FS_PUT] = {
+ .lkp_value = "fs_put",
+ .lkp_data_type = NLA_U64
+ },
+ [LNET_FAULT_ATTR_FS_ACK] = {
+ .lkp_value = "fs_ack",
+ .lkp_data_type = NLA_U64
+ },
+ [LNET_FAULT_ATTR_FS_GET] = {
+ .lkp_value = "fs_get",
+ .lkp_data_type = NLA_U64
+ },
+ [LNET_FAULT_ATTR_FS_REPLY] = {
+ .lkp_value = "fs_reply",
+ .lkp_data_type = NLA_U64
+ },
+ },
+};
+
+int lnet_fault_show_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ struct lnet_genl_fault_rule_list *rlist = lnet_fault_dump_ctx(cb);
+#ifdef HAVE_NL_PARSE_WITH_EXT_ACK
+ struct netlink_ext_ack *extack = NULL;
+#endif
+ int portid = NETLINK_CB(cb->skb).portid;
+ int seq = cb->nlh->nlmsg_seq;
+ int idx, rc = 0;
+ u32 opc;
+
+ ENTRY;
+#ifdef HAVE_NL_DUMP_WITH_EXT_ACK
+ extack = cb->extack;
+#endif
+ if (!rlist->lgfrl_count) {
+ NL_SET_ERR_MSG(extack, "No routes found");
+ GOTO(send_error, rc = -ENOENT);
+ }
+
+ idx = rlist->lgfrl_index;
+ if (!idx) {
+ const struct ln_key_list *all[] = {
+ &fault_attr_list, NULL
+ };
+
+ rc = lnet_genl_send_scalar_list(msg, portid, seq,
+ &lnet_family,
+ NLM_F_CREATE | NLM_F_MULTI,
+ LNET_CMD_FAULT, all);
+ if (rc < 0) {
+ NL_SET_ERR_MSG(extack, "failed to send key table");
+ GOTO(send_error, rc);
+ }
+ }
+ opc = rlist->lgfrl_opc;
+
+ while (idx < rlist->lgfrl_count) {
+ struct lnet_rule_properties *prop;
+ void *hdr;
+
+ prop = genradix_ptr(&rlist->lgfrl_list, idx++);
+
+ hdr = genlmsg_put(msg, portid, seq, &lnet_family,
+ NLM_F_MULTI, LNET_CMD_FAULT);
+ if (!hdr) {
+ NL_SET_ERR_MSG(extack, "failed to send values");
+ genlmsg_cancel(msg, hdr);
+ GOTO(send_error, rc = -EMSGSIZE);
+ }
+
+ if (idx == 1)
+ nla_put_string(msg, LNET_FAULT_ATTR_HDR, "");
+
+ nla_put_string(msg, LNET_FAULT_ATTR_FA_TYPE,
+ opc == LNET_CTL_DROP_LIST ? "drop" : "delay");
+
+ nla_put_string(msg, LNET_FAULT_ATTR_FA_SRC,
+ libcfs_nidstr(&prop->attr.fa_src));
+ nla_put_string(msg, LNET_FAULT_ATTR_FA_DST,
+ libcfs_nidstr(&prop->attr.fa_dst));
+
+ nla_put_u64_64bit(msg, LNET_FAULT_ATTR_FA_PTL_MASK,
+ prop->attr.fa_ptl_mask,
+ LNET_FAULT_ATTR_PAD);
+ nla_put_u32(msg, LNET_FAULT_ATTR_FA_MSG_MASK,
+ prop->attr.fa_msg_mask);
+
+ if (opc == LNET_CTL_DROP_LIST) {
+ nla_put_u32(msg, LNET_FAULT_ATTR_DA_RATE,
+ prop->attr.u.drop.da_rate);
+ nla_put_u32(msg, LNET_FAULT_ATTR_DA_INTERVAL,
+ prop->attr.u.drop.da_interval);
+ nla_put_u64_64bit(msg, LNET_FAULT_ATTR_DS_DROPPED,
+ prop->stat.u.drop.ds_dropped,
+ LNET_FAULT_ATTR_PAD);
+ } else if (opc == LNET_CTL_DELAY_LIST) {
+ nla_put_u32(msg, LNET_FAULT_ATTR_LA_RATE,
+ prop->attr.u.delay.la_rate);
+ nla_put_u32(msg, LNET_FAULT_ATTR_LA_INTERVAL,
+ prop->attr.u.delay.la_interval);
+ nla_put_u32(msg, LNET_FAULT_ATTR_LA_LATENCY,
+ prop->attr.u.delay.la_latency);
+ nla_put_u64_64bit(msg, LNET_FAULT_ATTR_LS_DELAYED,
+ prop->stat.u.delay.ls_delayed,
+ LNET_FAULT_ATTR_PAD);
+ }
+ nla_put_u64_64bit(msg, LNET_FAULT_ATTR_FS_COUNT,
+ prop->stat.fs_count,
+ LNET_FAULT_ATTR_PAD);
+ nla_put_u64_64bit(msg, LNET_FAULT_ATTR_FS_PUT,
+ prop->stat.fs_put,
+ LNET_FAULT_ATTR_PAD);
+ nla_put_u64_64bit(msg, LNET_FAULT_ATTR_FS_ACK,
+ prop->stat.fs_ack,
+ LNET_FAULT_ATTR_PAD);
+ nla_put_u64_64bit(msg, LNET_FAULT_ATTR_FS_GET,
+ prop->stat.fs_get,
+ LNET_FAULT_ATTR_PAD);
+ nla_put_u64_64bit(msg, LNET_FAULT_ATTR_FS_REPLY,
+ prop->stat.fs_reply,
+ LNET_FAULT_ATTR_PAD);
+ genlmsg_end(msg, hdr);
+ }
+ rlist->lgfrl_index = idx;
+send_error:
+ return lnet_nl_send_error(cb->skb, portid, seq, rc);
+}
+
+#ifndef HAVE_NETLINK_CALLBACK_START
+int lnet_old_fault_show_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+ if (!cb->args[0]) {
+ int rc = lnet_fault_show_start(cb);
+
+ if (rc < 0)
+ return lnet_nl_send_error(cb->skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ rc);
+ }
+
+ return lnet_fault_show_dump(msg, cb);
+}
+#endif
+
static const struct genl_multicast_group lnet_mcast_grps[] = {
{ .name = "ip2net", },
{ .name = "net", },
{ .name = "discover", },
{ .name = "cpt-of-nid", },
{ .name = "dbg-recov", },
+ { .name = "fault", },
};
static const struct genl_ops lnet_genl_ops[] = {
#endif
.done = lnet_debug_recovery_show_done,
},
+ {
+ .cmd = LNET_CMD_FAULT,
+ .flags = GENL_ADMIN_PERM,
+#ifdef HAVE_NETLINK_CALLBACK_START
+ .start = lnet_fault_show_start,
+ .dumpit = lnet_fault_show_dump,
+#else
+ .dumpit = lnet_old_fault_show_dump,
+#endif
+ .done = lnet_fault_show_done,
+ },
};
static struct genl_family lnet_family = {
RETURN(rc);
}
+int lnet_drop_rule_collect(struct lnet_genl_fault_rule_list *rlist)
+{
+ struct lnet_drop_rule *rule;
+ int cpt, rc = 0;
+
+ ENTRY;
+ cpt = lnet_net_lock_current();
+ list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) {
+ struct lnet_rule_properties *prop;
+
+ prop = genradix_ptr_alloc(&rlist->lgfrl_list,
+ rlist->lgfrl_count++,
+ GFP_KERNEL);
+ if (!prop) {
+ rc = -ENOMEM;
+ break;
+ }
+ spin_lock(&rule->dr_lock);
+ prop->attr = rule->dr_attr;
+ prop->stat = rule->dr_stat;
+ spin_unlock(&rule->dr_lock);
+ }
+
+ lnet_net_unlock(cpt);
+ RETURN(rc);
+}
+
/**
* reset counters for all drop rules
*/
RETURN(rc);
}
+int lnet_delay_rule_collect(struct lnet_genl_fault_rule_list *rlist)
+{
+ struct lnet_delay_rule *rule;
+ int cpt, rc = 0;
+
+ ENTRY;
+ cpt = lnet_net_lock_current();
+ list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) {
+ struct lnet_rule_properties *prop;
+
+ prop = genradix_ptr_alloc(&rlist->lgfrl_list,
+ rlist->lgfrl_count++,
+ GFP_KERNEL);
+ if (!prop) {
+ rc = -ENOMEM;
+ break;
+ }
+ spin_lock(&rule->dl_lock);
+ prop->attr = rule->dl_attr;
+ prop->stat = rule->dl_stat;
+ spin_unlock(&rule->dl_lock);
+ }
+
+ lnet_net_unlock(cpt);
+ RETURN(rc);
+}
+
/**
* reset counters for all Delay Rules
*/
goto emitter_error;
yaml_emitter_open(output);
- yaml_emitter_set_indent(output, 6);
yaml_document_start_event_initialize(&hdr, NULL, NULL, NULL, 0);
rc = yaml_emitter_emit(output, &hdr);
if (rc == 0)
return rc == 1 ? 0 : rc;
}
+static char *
+fault_opc_to_str(__u32 opc)
+{
+ switch (opc) {
+ case LNET_CTL_DROP_LIST:
+ return "list drop rule";
+ case LNET_CTL_DELAY_LIST:
+ return "list delay rules";
+ default:
+ return "unrecognized command";
+ }
+}
+
+int yaml_lnet_fault_rule(yaml_document_t *results, __u32 opc, char *src,
+ char *dst, char *local_nid,
+ struct lnet_fault_attr *attr)
+{
+ struct nl_sock *sk = NULL;
+ char num[INT_STRING_LEN];
+ const char *msg = NULL;
+ int flags = NLM_F_DUMP;
+ yaml_emitter_t output;
+ yaml_parser_t reply;
+ yaml_event_t event;
+ int rc;
+
+ /* Create Netlink emitter to send request to kernel */
+ sk = nl_socket_alloc();
+ if (!sk)
+ return -EOPNOTSUPP;
+
+ /* Setup parser to receive Netlink packets */
+ rc = yaml_parser_initialize(&reply);
+ if (rc == 0) {
+ nl_socket_free(sk);
+ return -EOPNOTSUPP;
+ }
+
+ rc = yaml_parser_set_input_netlink(&reply, sk, false);
+ if (rc == 0)
+ goto free_reply;
+
+ rc = yaml_netlink_setup_emitter(&output, sk, LNET_GENL_NAME,
+ LNET_GENL_VERSION, flags,
+ LNET_CMD_FAULT, false);
+ if (rc == 0)
+ goto emitter_error;
+
+ yaml_scalar_event_initialize(&event, NULL,
+ (yaml_char_t *)YAML_STR_TAG,
+ (yaml_char_t *)"fault",
+ strlen("fault"), 1, 0,
+ YAML_PLAIN_SCALAR_STYLE);
+ rc = yaml_emitter_emit(&output, &event);
+ if (rc == 0)
+ goto emitter_error;
+
+ yaml_mapping_start_event_initialize(&event, NULL,
+ (yaml_char_t *)YAML_MAP_TAG,
+ 1, YAML_ANY_MAPPING_STYLE);
+ rc = yaml_emitter_emit(&output, &event);
+ if (rc == 0)
+ goto emitter_error;
+
+ yaml_scalar_event_initialize(&event, NULL,
+ (yaml_char_t *)YAML_STR_TAG,
+ (yaml_char_t *)"rule_type",
+ strlen("rule_type"), 1, 0,
+ YAML_PLAIN_SCALAR_STYLE);
+ rc = yaml_emitter_emit(&output, &event);
+ if (rc == 0)
+ goto emitter_error;
+
+ snprintf(num, sizeof(num), "%d", opc);
+ yaml_scalar_event_initialize(&event, NULL,
+ (yaml_char_t *)YAML_INT_TAG,
+ (yaml_char_t *)num,
+ strlen(num), 1, 0,
+ YAML_PLAIN_SCALAR_STYLE);
+ rc = yaml_emitter_emit(&output, &event);
+ if (rc == 0)
+ goto emitter_error;
+
+ yaml_mapping_end_event_initialize(&event);
+ rc = yaml_emitter_emit(&output, &event);
+ if (rc == 0)
+ goto emitter_error;
+
+ rc = yaml_netlink_complete_emitter(&output);
+emitter_error:
+ if (rc == 0) {
+ yaml_emitter_log_error(&output, stderr);
+ rc = -EINVAL;
+ } else {
+ rc = yaml_parser_load(&reply, results);
+ if (rc == 0) {
+ msg = yaml_parser_get_reader_error(&reply);
+ /* dump routine returns ENOENT if the rule list is
+ * empty. This is not an error condition.
+ */
+ if (errno == -ENOENT && (flags & NLM_F_DUMP))
+ rc = 1;
+ }
+ }
+ yaml_emitter_delete(&output);
+free_reply:
+ if (rc == 0) {
+ if (!msg)
+ msg = yaml_parser_get_reader_error(&reply);
+
+ if (strcmp(msg, "Unspecific failure") != 0) {
+ fprintf(stdout, "failed to %s: %s\n",
+ fault_opc_to_str(opc), msg);
+ } else {
+ fprintf(stdout, "failed to %s: %s\n",
+ fault_opc_to_str(opc), strerror(errno));
+ }
+ rc = errno;
+ }
+ yaml_parser_delete(&reply);
+ nl_socket_free(sk);
+
+ return rc == 1 ? 0 : rc;
+}
+
static int dispatch_peer_ni_cmd(__u32 cmd, struct lnet_ioctl_peer_cfg *data,
char *err_str, char *cmd_str)
{
#include <libcfs/util/ioctl.h>
#include <libcfs/util/string.h>
#include <linux/lnet/lnet-dlc.h>
+#include <linux/lnet/lnetctl.h>
#include <linux/lnet/nidstr.h>
#define LUSTRE_CFG_RC_NO_ERR 0 /* success */
*/
int yaml_lnet_configure(int flags, const char **msg);
+int yaml_lnet_fault_rule(yaml_document_t *results, __u32 opc, char *src,
+ char *dst, char *local_nid,
+ struct lnet_fault_attr *attr);
+
/**
* yaml_emitter_set_output_netlink
*
static int jt_show_recovery(int argc, char **argv);
static int jt_show_global(int argc, char **argv);
static int jt_show_udsp(int argc, char **argv);
+static int jt_show_fault(int argc, char **argv);
static int jt_set_tiny(int argc, char **argv);
static int jt_set_small(int argc, char **argv);
static int jt_set_large(int argc, char **argv);
static int jt_list_peer(int argc, char **argv);
static int jt_add_udsp(int argc, char **argv);
static int jt_del_udsp(int argc, char **argv);
-/*static int jt_show_peer(int argc, char **argv);*/
static int jt_import(int argc, char **argv);
static int jt_export(int argc, char **argv);
static int jt_ping(int argc, char **argv);
static int jt_set_response_tracking(int argc, char **argv);
static int jt_set_recovery_limit(int argc, char **argv);
static int jt_udsp(int argc, char **argv);
+static int jt_fault(int argc, char **argv);
static int jt_setup_mrrouting(int argc, char **argv);
static int jt_calc_cpt_of_nid(int argc, char **argv);
static int jt_show_peer_debug_info(int argc, char **argv);
{"discover", jt_discover, 0, "discover nid[,nid,...]"},
{"service-id", jt_calc_service_id, 0, "Calculate IB Lustre service ID\n"},
{"udsp", jt_udsp, 0, "udsp {add | del | help}"},
+ {"fault", jt_fault, 0, "udsp {show | help}"},
{"setup-mrrouting", jt_setup_mrrouting, 0,
"setup linux routing tables\n"},
{"cpt-of-nid", jt_calc_cpt_of_nid, 0,
{ 0, 0, 0, NULL }
};
+command_t fault_cmds[] = {
+ {"show", jt_show_fault, 0, "show fault rules\n"
+ "\t--rule_type t: Show LNet fault rules of type t.\n"},
+ { 0, 0, 0, NULL }
+};
+
static int parse_long(const char *number, long int *value)
{
char *end;
return cfs_parser(argc, argv, udsp_cmds);
}
+static int jt_fault(int argc, char **argv)
+{
+ int rc;
+
+ rc = check_cmd(fault_cmds, "fault", NULL, 2, argc, argv);
+ if (rc)
+ return rc;
+
+ return cfs_parser(argc, argv, fault_cmds);
+}
+
static int yaml_import_global_settings(char *key, unsigned long value,
char cmd, struct cYAML *show_rc,
struct cYAML *err_rc)
return rc;
}
+int jt_show_fault(int argc, char **argv)
+{
+ const char *const short_options = "t:";
+ static const struct option long_options[] = {
+ { .name = "rule_type", .has_arg = required_argument, .val = 't' },
+ { .name = NULL }
+ };
+ yaml_document_t results;
+ yaml_emitter_t debug;
+ int opc = 0, opt, rc;
+
+ rc = check_cmd(fault_cmds, "fault", "show", 2, argc, argv);
+ if (rc < 0)
+ return rc;
+
+ while ((opt = getopt_long(argc, argv, short_options,
+ long_options, NULL)) != -1) {
+ switch (opt) {
+ case 't':
+ if (strcmp(optarg, "delay") == 0)
+ opc = LNET_CTL_DELAY_LIST;
+ else if (strcmp(optarg, "drop") == 0)
+ opc = LNET_CTL_DROP_LIST;
+ else
+ return -EINVAL;
+ break;
+ default:
+ return 0;
+ }
+ }
+ if (rc < 0)
+ return rc;
+
+ rc = yaml_lnet_fault_rule(&results, opc, NULL, NULL, NULL, NULL);
+ if (rc < 0)
+ return rc;
+
+ rc = yaml_emitter_initialize(&debug);
+ if (rc == 0)
+ return -EINVAL;
+
+ yaml_emitter_set_indent(&debug, LNET_DEFAULT_INDENT);
+ yaml_emitter_set_output_file(&debug, stdout);
+ rc = yaml_emitter_dump(&debug, &results);
+
+ yaml_emitter_delete(&debug);
+ yaml_document_delete(&results);
+
+ return rc == 0 ? -EINVAL : 0;
+}
+
int main(int argc, char **argv)
{
int rc = 0;
argc, argv);
}
+static void print_fault_rules(__u32 opc, struct lnet_nid *src,
+ struct lnet_nid *dst,
+ struct lnet_fault_attr *attr,
+ struct lnet_fault_stat *stat)
+{
+ if (opc == LNET_CTL_DROP_LIST) {
+ printf("%s->%s (1/%d | %d) ptl %#jx, msg %x, %ju/%ju, PUT %ju, ACK %ju, GET %ju, REP %ju\n",
+ libcfs_nidstr(src),
+ libcfs_nidstr(dst),
+ attr->u.drop.da_rate, attr->u.drop.da_interval,
+ (uintmax_t)attr->fa_ptl_mask, attr->fa_msg_mask,
+ (uintmax_t)stat->u.drop.ds_dropped,
+ (uintmax_t)stat->fs_count,
+ (uintmax_t)stat->fs_put,
+ (uintmax_t)stat->fs_ack,
+ (uintmax_t)stat->fs_get,
+ (uintmax_t)stat->fs_reply);
+ } else if (opc == LNET_CTL_DELAY_LIST) {
+ printf("%s->%s (1/%d | %d, latency %d) ptl %#jx, msg %x, %ju/%ju, PUT %ju, ACK %ju, GET %ju, REP %ju\n",
+ libcfs_nidstr(src),
+ libcfs_nidstr(dst),
+ attr->u.delay.la_rate, attr->u.delay.la_interval,
+ attr->u.delay.la_latency,
+ (uintmax_t)attr->fa_ptl_mask, attr->fa_msg_mask,
+ (uintmax_t)stat->u.delay.ls_delayed,
+ (uintmax_t)stat->fs_count,
+ (uintmax_t)stat->fs_put,
+ (uintmax_t)stat->fs_ack,
+ (uintmax_t)stat->fs_get,
+ (uintmax_t)stat->fs_reply);
+ }
+}
+
static int
fault_simul_rule_list(__u32 opc, char *name, int argc, char **argv)
{
struct libcfs_ioctl_data data = { { 0 } };
- struct lnet_fault_attr attr;
- struct lnet_fault_stat stat;
- int pos;
+ struct lnet_nid src = {}, dst = {};
+ struct lnet_fault_attr attr;
+ struct lnet_fault_stat stat;
+ yaml_document_t results;
+ yaml_node_t *node;
+ bool first = true;
+ int pos, rc;
+ int i = 2;
+
+ rc = yaml_lnet_fault_rule(&results, opc, NULL, NULL, NULL, NULL);
+ if (rc < 0) {
+ if (rc == -EOPNOTSUPP)
+ goto old_api;
+ return rc;
+ }
+ memset(&attr, 0, sizeof(attr));
+ memset(&stat, 0, sizeof(stat));
+ pos = 0;
+
+ while ((node = yaml_document_get_node(&results, i++)) != NULL) {
+ yaml_node_t *next;
+ char *tmp;
+
+ if (node->type == YAML_MAPPING_NODE) {
+ if (first) {
+ fprintf(stderr, "LNet %s rules:\n",
+ opc == LNET_CTL_DELAY_LIST ?
+ "delay" : "drop");
+ first = false;
+ } else {
+ print_fault_rules(opc, &src, &dst,
+ &attr, &stat);
+ }
+ pos++;
+ }
+
+ if (node->type != YAML_SCALAR_NODE)
+ continue;
+
+ tmp = (char *)node->data.scalar.value;
+ if (strcmp("fa_src", tmp) == 0) {
+ next = yaml_document_get_node(&results, i);
+ tmp = (char *)next->data.scalar.value;
+ libcfs_strnid(&src, tmp);
+ } else if (strcmp("fa_dst", tmp) == 0) {
+ next = yaml_document_get_node(&results, i);
+ tmp = (char *)next->data.scalar.value;
+ libcfs_strnid(&dst, tmp);
+ } else if (strcmp("fa_ptl_mask", tmp) == 0) {
+ next = yaml_document_get_node(&results, i);
+ tmp = (char *)next->data.scalar.value;
+ attr.fa_ptl_mask = strtoul(tmp, NULL, 0);
+ } else if (strcmp("fa_msg_mask", tmp) == 0) {
+ next = yaml_document_get_node(&results, i);
+ tmp = (char *)next->data.scalar.value;
+ attr.fa_msg_mask = strtoul(tmp, NULL, 0);
+ } else if (strcmp("la_rate", tmp) == 0) {
+ next = yaml_document_get_node(&results, i);
+ tmp = (char *)next->data.scalar.value;
+ attr.u.delay.la_rate = strtoul(tmp, NULL, 0);
+ } else if (strcmp("la_interval", tmp) == 0) {
+ next = yaml_document_get_node(&results, i);
+ tmp = (char *)next->data.scalar.value;
+ attr.u.delay.la_interval = strtoul(tmp, NULL, 0);
+ } else if (strcmp("la_latency", tmp) == 0) {
+ next = yaml_document_get_node(&results, i);
+ tmp = (char *)next->data.scalar.value;
+ attr.u.delay.la_latency = strtoul(tmp, NULL, 0);
+ } else if (strcmp("da_rate", tmp) == 0) {
+ next = yaml_document_get_node(&results, i);
+ tmp = (char *)next->data.scalar.value;
+ attr.u.drop.da_rate = strtoul(tmp, NULL, 0);
+ } else if (strcmp("da_interval", tmp) == 0) {
+ next = yaml_document_get_node(&results, i);
+ tmp = (char *)next->data.scalar.value;
+ attr.u.drop.da_interval = strtoul(tmp, NULL, 0);
+ } else if (strcmp("ds_dropped", tmp) == 0) {
+ next = yaml_document_get_node(&results, i);
+ tmp = (char *)next->data.scalar.value;
+ stat.u.drop.ds_dropped = strtoul(tmp, NULL, 0);
+ } else if (strcmp("ls_delayed", tmp) == 0) {
+ next = yaml_document_get_node(&results, i);
+ tmp = (char *)next->data.scalar.value;
+ stat.u.delay.ls_delayed = strtoul(tmp, NULL, 0);
+ } else if (strcmp("fs_count", tmp) == 0) {
+ next = yaml_document_get_node(&results, i);
+ tmp = (char *)next->data.scalar.value;
+ stat.fs_count = strtoul(tmp, NULL, 0);
+ } else if (strcmp("fs_put", tmp) == 0) {
+ next = yaml_document_get_node(&results, i);
+ tmp = (char *)next->data.scalar.value;
+ stat.fs_put = strtoul(tmp, NULL, 0);
+ } else if (strcmp("fs_ack", tmp) == 0) {
+ next = yaml_document_get_node(&results, i);
+ tmp = (char *)next->data.scalar.value;
+ stat.fs_ack = strtoul(tmp, NULL, 0);
+ } else if (strcmp("fs_get", tmp) == 0) {
+ next = yaml_document_get_node(&results, i);
+ tmp = (char *)next->data.scalar.value;
+ stat.fs_get = strtoul(tmp, NULL, 0);
+ } else if (strcmp("fs_reply", tmp) == 0) {
+ next = yaml_document_get_node(&results, i);
+ tmp = (char *)next->data.scalar.value;
+ stat.fs_reply = strtoul(tmp, NULL, 0);
+ }
+ }
+
+ print_fault_rules(opc, &src, &dst, &attr, &stat);
+ printf("found total %d\n", pos);
+ return rc == 0 ? -EINVAL : 0;
+old_api:
+ rc = 0;
printf("LNet %s rules:\n", name);
for (pos = 0;; pos++) {
- int rc;
-
memset(&attr, 0, sizeof(attr));
memset(&stat, 0, sizeof(stat));
libcfs_ioctl_unpack(&data, ioc_buf);
- if (opc == LNET_CTL_DROP_LIST) {
- printf("%s->%s (1/%d | %d) ptl %#jx, msg %x, %ju/%ju, PUT %ju, ACK %ju, GET %ju, REP %ju\n",
- libcfs_nid2str(attr.fa_src),
- libcfs_nid2str(attr.fa_dst),
- attr.u.drop.da_rate, attr.u.drop.da_interval,
- (uintmax_t)attr.fa_ptl_mask, attr.fa_msg_mask,
- (uintmax_t)stat.u.drop.ds_dropped,
- (uintmax_t)stat.fs_count,
- (uintmax_t)stat.fs_put,
- (uintmax_t)stat.fs_ack,
- (uintmax_t)stat.fs_get,
- (uintmax_t)stat.fs_reply);
-
- } else if (opc == LNET_CTL_DELAY_LIST) {
- printf("%s->%s (1/%d | %d, latency %d) ptl %#jx, msg %x, %ju/%ju, PUT %ju, ACK %ju, GET %ju, REP %ju\n",
- libcfs_nid2str(attr.fa_src),
- libcfs_nid2str(attr.fa_dst),
- attr.u.delay.la_rate, attr.u.delay.la_interval,
- attr.u.delay.la_latency,
- (uintmax_t)attr.fa_ptl_mask, attr.fa_msg_mask,
- (uintmax_t)stat.u.delay.ls_delayed,
- (uintmax_t)stat.fs_count,
- (uintmax_t)stat.fs_put,
- (uintmax_t)stat.fs_ack,
- (uintmax_t)stat.fs_get,
- (uintmax_t)stat.fs_reply);
- }
+ lnet_nid4_to_nid(attr.fa_src, &src);
+ lnet_nid4_to_nid(attr.fa_dst, &dst);
+
+ print_fault_rules(opc, &src, &dst, &attr, &stat);
}
printf("found total %d\n", pos);
- return 0;
+ return rc;
}
int