From f1c66236c6869a83f57d07206d845bba42d7cd9f Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Sun, 16 Jun 2024 09:28:20 -0400 Subject: [PATCH] LU-10391 lnet: Fault injection add/del ioctls to netlink Convert the fault injection add/del ioctls to a netlink implementaiton. Test-Parameters: trivial testlist=sanity-lnet Signed-off-by: Chris Horn Change-Id: I20f38d4e7c0215a1b19772c6253c617174c0b00c Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53732 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Serguei Smirnov Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- libcfs/libcfs/util/nidstrings.c | 14 ++ lnet/include/lnet/lib-lnet.h | 3 + lnet/include/uapi/linux/lnet/nidstr.h | 1 + lnet/lnet/api-ni.c | 191 ++++++++++++++++++++ lnet/lnet/net_fault.c | 18 +- lnet/lnet/nidstrings.c | 15 ++ lnet/utils/lnetconfig/liblnetconfig.c | 317 +++++++++++++++++++++++++++++++-- lnet/utils/lnetconfig/liblnetconfig.h | 13 ++ lnet/utils/lnetctl.c | 325 ++++++++++++++++++++++++++++++++++ lustre/tests/sanity-lnet.sh | 11 -- lustre/utils/portals.c | 107 ++++++++--- 11 files changed, 950 insertions(+), 65 deletions(-) diff --git a/libcfs/libcfs/util/nidstrings.c b/libcfs/libcfs/util/nidstrings.c index c35d30d..2b11313 100644 --- a/libcfs/libcfs/util/nidstrings.c +++ b/libcfs/libcfs/util/nidstrings.c @@ -1029,6 +1029,20 @@ libcfs_str2anynid(lnet_nid_t *nidp, const char *str) return *nidp != LNET_NID_ANY; } +int +libcfs_stranynid(struct lnet_nid *nid, const char *str) +{ + if (!strcmp(str, "*")) { + *nid = LNET_ANY_NID; + return 1; + } + + if (libcfs_strnid(nid, str) < 0) + *nid = LNET_ANY_NID; + + return !LNET_NID_IS_ANY(nid); +} + /** * Nid range list syntax. * \verbatim diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 88037e2..e2d251b 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -830,12 +830,15 @@ struct lnet_genl_fault_rule_list { }; int lnet_fault_ctl(int cmd, struct libcfs_ioctl_data *data); +int lnet_drop_rule_add(struct lnet_fault_large_attr *attr); +int lnet_drop_rule_del(struct lnet_nid *fa_src, struct lnet_nid *fa_dst); int lnet_fault_init(void); void lnet_fault_fini(void); bool lnet_drop_rule_match(struct lnet_hdr *hdr, struct lnet_nid *local_nid, enum lnet_msg_hstatus *hstatus); int lnet_drop_rule_collect(struct lnet_genl_fault_rule_list *rlist); +void lnet_drop_rule_reset(void); int lnet_delay_rule_add(struct lnet_fault_large_attr *attr); int lnet_delay_rule_del(struct lnet_nid *src, struct lnet_nid *dst, bool shutdown); diff --git a/lnet/include/uapi/linux/lnet/nidstr.h b/lnet/include/uapi/linux/lnet/nidstr.h index 6bc44ef..869f91a 100644 --- a/lnet/include/uapi/linux/lnet/nidstr.h +++ b/lnet/include/uapi/linux/lnet/nidstr.h @@ -99,6 +99,7 @@ char *libcfs_idstr(struct lnet_processid *id); __u32 libcfs_str2net(const char *str); lnet_nid_t libcfs_str2nid(const char *str); int libcfs_str2anynid(lnet_nid_t *nid, const char *str); +int libcfs_stranynid(struct lnet_nid *nid, const char *str); int libcfs_num_parse(char *str, int len, struct list_head *list); char *libcfs_id2str(struct lnet_process_id id); void cfs_free_nidlist(struct list_head *list); diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 300887c..f62b13f 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -2925,6 +2925,33 @@ nla_extract_val(struct nlattr **attr, int *rem, RETURN(rc); } +int +nla_strnid(struct nlattr **attr, struct lnet_nid *nid, int *rem, + struct netlink_ext_ack *extack) +{ + char nidstr[LNET_NIDSTR_SIZE]; + int rc; + + ENTRY; + rc = nla_extract_val(attr, rem, LN_SCALAR_ATTR_VALUE, + nidstr, sizeof(nidstr), extack); + if (rc < 0) { + NL_SET_ERR_MSG(extack, "failed to copy nidstring attribute"); + RETURN(rc); + } + + rc = libcfs_strnid(nid, strim(nidstr)); + if (rc < 0) { + CDEBUG(D_NET, "Invalid nidstr \"%s\"\n", nidstr); + NL_SET_ERR_MSG(extack, "failed to convert nidstring to NID"); + RETURN(rc); + } + + CDEBUG(D_NET, "%s -> %s\n", nidstr, libcfs_nidstr(nid)); + + RETURN(0); +} + static struct genl_family lnet_family; /** @@ -9555,6 +9582,169 @@ int lnet_old_fault_show_dump(struct sk_buff *msg, struct netlink_callback *cb) } #endif +static int lnet_fault_cmd(struct sk_buff *skb, struct genl_info *info) +{ + struct nlmsghdr *nlh = nlmsg_hdr(skb); + struct genlmsghdr *gnlh = nlmsg_data(nlh); + struct nlattr *params = genlmsg_data(gnlh); + struct netlink_ext_ack *extack = NULL; + struct lnet_fault_large_attr fattr; + int msg_len, rem, rc = 0; + struct nlattr *entry; + s64 opc = 0; + + ENTRY; +#ifdef HAVE_NL_PARSE_WITH_EXT_ACK + extack = info->extack; +#endif + msg_len = genlmsg_len(gnlh); + if (!msg_len) { + GENL_SET_ERR_MSG(info, "no configuration"); + RETURN(-ENOMSG); + } + + if (!(nla_type(params) & LN_SCALAR_ATTR_LIST)) { + GENL_SET_ERR_MSG(info, "invalid configuration"); + RETURN(-EINVAL); + } + + fattr.fa_src = LNET_ANY_NID; + fattr.fa_dst = LNET_ANY_NID; + + nla_for_each_attr(entry, params, msg_len, rem) { + u64 tmp; + + CDEBUG(D_NET, "attr type: %d\n", nla_type(entry)); + if (nla_type(entry) != LN_SCALAR_ATTR_VALUE) + continue; + + if (nla_strcmp(entry, "rule_type") == 0) { + rc = nla_extract_val(&entry, &rem, + LN_SCALAR_ATTR_INT_VALUE, + &opc, sizeof(opc), extack); + if (rc < 0) + GOTO(report_error, rc); + } else if (nla_strcmp(entry, "fa_src") == 0) { + rc = nla_strnid(&entry, &fattr.fa_src, &rem, extack); + if (rc < 0) + GOTO(report_error, rc); + } else if (nla_strcmp(entry, "fa_dst") == 0) { + rc = nla_strnid(&entry, &fattr.fa_dst, &rem, extack); + if (rc < 0) + GOTO(report_error, rc); + } else if (nla_strcmp(entry, "fa_local_nid") == 0) { + rc = nla_strnid(&entry, &fattr.fa_local_nid, &rem, + extack); + if (rc < 0) + GOTO(report_error, rc); + } else if (nla_strcmp(entry, "fa_ptl_mask") == 0) { + rc = nla_extract_val(&entry, &rem, + LN_SCALAR_ATTR_INT_VALUE, + &tmp, sizeof(tmp), extack); + if (rc < 0) + GOTO(report_error, rc); + + fattr.fa_ptl_mask = tmp; + } else if (nla_strcmp(entry, "fa_msg_mask") == 0) { + rc = nla_extract_val(&entry, &rem, + LN_SCALAR_ATTR_INT_VALUE, + &tmp, sizeof(tmp), extack); + if (rc < 0) + GOTO(report_error, rc); + + fattr.fa_msg_mask = tmp; + } else if (nla_strcmp(entry, "da_rate") == 0) { + rc = nla_extract_val(&entry, &rem, + LN_SCALAR_ATTR_INT_VALUE, + &tmp, sizeof(tmp), extack); + if (rc < 0) + GOTO(report_error, rc); + + fattr.u.drop.da_rate = tmp; + } else if (nla_strcmp(entry, "da_interval") == 0) { + rc = nla_extract_val(&entry, &rem, + LN_SCALAR_ATTR_INT_VALUE, + &tmp, sizeof(tmp), extack); + if (rc < 0) + GOTO(report_error, rc); + + fattr.u.drop.da_interval = tmp; + } else if (nla_strcmp(entry, "da_health_error_mask") == 0) { + rc = nla_extract_val(&entry, &rem, + LN_SCALAR_ATTR_INT_VALUE, + &tmp, sizeof(tmp), extack); + if (rc < 0) + GOTO(report_error, rc); + + fattr.u.drop.da_health_error_mask = tmp; + } else if (nla_strcmp(entry, "da_random") == 0) { + rc = nla_extract_val(&entry, &rem, + LN_SCALAR_ATTR_INT_VALUE, + &tmp, sizeof(tmp), extack); + if (rc < 0) + GOTO(report_error, rc); + + fattr.u.drop.da_random = !!tmp; + } else if (nla_strcmp(entry, "da_drop_all") == 0) { + rc = nla_extract_val(&entry, &rem, + LN_SCALAR_ATTR_INT_VALUE, + &tmp, sizeof(tmp), extack); + if (rc < 0) + GOTO(report_error, rc); + + fattr.u.drop.da_drop_all = !!tmp; + } else if (nla_strcmp(entry, "la_rate") == 0) { + rc = nla_extract_val(&entry, &rem, + LN_SCALAR_ATTR_INT_VALUE, + &tmp, sizeof(tmp), extack); + if (rc < 0) + GOTO(report_error, rc); + + fattr.u.delay.la_rate = tmp; + } else if (nla_strcmp(entry, "la_interval") == 0) { + rc = nla_extract_val(&entry, &rem, + LN_SCALAR_ATTR_INT_VALUE, + &tmp, sizeof(tmp), extack); + if (rc < 0) + GOTO(report_error, rc); + + fattr.u.delay.la_interval = tmp; + } else if (nla_strcmp(entry, "la_latency") == 0) { + rc = nla_extract_val(&entry, &rem, + LN_SCALAR_ATTR_INT_VALUE, + &tmp, sizeof(tmp), extack); + if (rc < 0) + GOTO(report_error, rc); + + fattr.u.delay.la_latency = tmp; + } + } + + if (info->nlhdr->nlmsg_flags & NLM_F_CREATE) { + if (opc == LNET_CTL_DROP_ADD) + rc = lnet_drop_rule_add(&fattr); + else + rc = lnet_delay_rule_add(&fattr); + } else if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE) { + if (opc == LNET_CTL_DROP_RESET) + lnet_drop_rule_reset(); + else + lnet_delay_rule_reset(); + } else if (!(info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_REPLACE))) { + if (opc == LNET_CTL_DROP_DEL) + rc = lnet_drop_rule_del(&fattr.fa_src, &fattr.fa_dst); + else + rc = lnet_delay_rule_del(&fattr.fa_src, &fattr.fa_dst, + false); + if (rc == 0) + rc = -ENOENT; + else + rc = 0; + } +report_error: + RETURN(rc); +} + static const struct genl_multicast_group lnet_mcast_grps[] = { { .name = "ip2net", }, { .name = "net", }, @@ -9667,6 +9857,7 @@ static const struct genl_ops lnet_genl_ops[] = { .dumpit = lnet_old_fault_show_dump, #endif .done = lnet_fault_show_done, + .doit = lnet_fault_cmd, }, }; diff --git a/lnet/lnet/net_fault.c b/lnet/lnet/net_fault.c index a611d40..4736885 100644 --- a/lnet/lnet/net_fault.c +++ b/lnet/lnet/net_fault.c @@ -173,8 +173,7 @@ lnet_fault_stat_inc(struct lnet_fault_stat *stat, unsigned int type) * There is no check for duplicated drop rule, all rules will be checked for * incoming message. */ -static int -lnet_drop_rule_add(struct lnet_fault_large_attr *attr) +int lnet_drop_rule_add(struct lnet_fault_large_attr *attr) { struct lnet_drop_rule *rule; ENTRY; @@ -222,8 +221,7 @@ lnet_drop_rule_add(struct lnet_fault_large_attr *attr) * If \a dst is zero, then all rules have \a src as source will be removed * If both of them are zero, all rules will be removed */ -static int -lnet_drop_rule_del(struct lnet_nid *src, struct lnet_nid *dst) +int lnet_drop_rule_del(struct lnet_nid *src, struct lnet_nid *dst) { struct lnet_drop_rule *rule; struct lnet_drop_rule *tmp; @@ -231,6 +229,8 @@ lnet_drop_rule_del(struct lnet_nid *src, struct lnet_nid *dst) int n = 0; ENTRY; + CDEBUG(D_NET, "src %s dst %s\n", libcfs_nidstr(src), + libcfs_nidstr(dst)); lnet_net_lock(LNET_LOCK_EX); list_for_each_entry_safe(rule, tmp, &the_lnet.ln_drop_rules, dr_link) { if (!(LNET_NID_IS_ANY(src) || nid_same(&rule->dr_attr.fa_src, src))) @@ -318,8 +318,7 @@ int lnet_drop_rule_collect(struct lnet_genl_fault_rule_list *rlist) /** * reset counters for all drop rules */ -static void -lnet_drop_rule_reset(void) +void lnet_drop_rule_reset(void) { struct lnet_drop_rule *rule; int cpt; @@ -958,13 +957,14 @@ lnet_delay_rule_del(struct lnet_nid *src, struct lnet_nid *dst, bool shutdown) bool cleanup; ENTRY; - if (shutdown) - src = dst = 0; - mutex_lock(&delay_dd.dd_mutex); lnet_net_lock(LNET_LOCK_EX); list_for_each_entry_safe(rule, tmp, &the_lnet.ln_delay_rules, dl_link) { + CDEBUG(D_NET, "src %s dst %s fa_src %s fa_dst %s\n", + libcfs_nidstr(src), libcfs_nidstr(dst), + libcfs_nidstr(&rule->dl_attr.fa_src), + libcfs_nidstr(&rule->dl_attr.fa_dst)); if (!(LNET_NID_IS_ANY(src) || nid_same(&rule->dl_attr.fa_src, src))) continue; diff --git a/lnet/lnet/nidstrings.c b/lnet/lnet/nidstrings.c index e360467..6e409ad 100644 --- a/lnet/lnet/nidstrings.c +++ b/lnet/lnet/nidstrings.c @@ -1513,3 +1513,18 @@ libcfs_str2anynid(lnet_nid_t *nidp, const char *str) return *nidp != LNET_NID_ANY; } EXPORT_SYMBOL(libcfs_str2anynid); + +int +libcfs_stranynid(struct lnet_nid *nid, const char *str) +{ + if (!strcmp(str, "*")) { + *nid = LNET_ANY_NID; + return 1; + } + + if (libcfs_strnid(nid, str)) + *nid = LNET_ANY_NID; + + return !LNET_NID_IS_ANY(nid); +} +EXPORT_SYMBOL(libcfs_stranynid); diff --git a/lnet/utils/lnetconfig/liblnetconfig.c b/lnet/utils/lnetconfig/liblnetconfig.c index 6df3100..5ed6ede 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.c +++ b/lnet/utils/lnetconfig/liblnetconfig.c @@ -528,14 +528,58 @@ free_reply: return rc == 1 ? 0 : rc; } +static int +fault_attr_parse_nid(char *str, struct lnet_nid *nid) +{ + int rc = 0; + __u32 net; + + if (!str) { + *nid = LNET_ANY_NID; + return 0; + } + + if (strlen(str) > 2 && str[0] == '*' && str[1] == '@') { + net = libcfs_str2net(str + 2); + if (net == LNET_NET_ANY) { + rc = -EINVAL; + goto failed; + } + + lnet_nid4_to_nid(LNET_MKNID(net, LNET_NIDADDR(LNET_NID_ANY)), + nid); + } else { + rc = libcfs_stranynid(nid, str); + if (rc != 1) + rc = -EINVAL; + } +failed: + if (rc < 0) + fprintf(stderr, "Invalid NID: %s\n", str); + + return rc; +} + static char * fault_opc_to_str(__u32 opc) { switch (opc) { + case LNET_CTL_DROP_ADD: + return "add drop rule"; + case LNET_CTL_DELAY_ADD: + return "add delay rule"; case LNET_CTL_DROP_LIST: return "list drop rule"; case LNET_CTL_DELAY_LIST: return "list delay rules"; + case LNET_CTL_DROP_DEL: + return "delete drop rule"; + case LNET_CTL_DELAY_DEL: + return "delete delay rule"; + case LNET_CTL_DROP_RESET: + return "drop_reset rule"; + case LNET_CTL_DELAY_RESET: + return "delay reset_rule"; default: return "unrecognized command"; } @@ -546,14 +590,56 @@ int yaml_lnet_fault_rule(yaml_document_t *results, __u32 opc, char *src, struct lnet_fault_attr *attr) { struct nl_sock *sk = NULL; - char num[INT_STRING_LEN]; + struct lnet_nid fa_local_nid; + struct lnet_nid fa_src; + struct lnet_nid fa_dst; const char *msg = NULL; - int flags = NLM_F_DUMP; yaml_emitter_t output; yaml_parser_t reply; yaml_event_t event; - int rc; + int flags, rc; + switch (opc) { + case LNET_CTL_DROP_ADD: + case LNET_CTL_DELAY_ADD: + flags = NLM_F_CREATE; + break; + case LNET_CTL_DROP_RESET: + case LNET_CTL_DELAY_RESET: + flags = NLM_F_REPLACE; + break; + case LNET_CTL_DROP_LIST: + case LNET_CTL_DELAY_LIST: + flags = NLM_F_DUMP; + break; + case LNET_CTL_DROP_DEL: + case LNET_CTL_DELAY_DEL: + default: + flags = 0; + break; + } + + if (opc == LNET_CTL_DROP_LIST || opc == LNET_CTL_DELAY_LIST || + opc == LNET_CTL_DROP_RESET || opc == LNET_CTL_DELAY_RESET) + goto skip_options; + + rc = fault_attr_parse_nid(src, &fa_src); + if (rc < 0) + return rc; + + rc = fault_attr_parse_nid(dst, &fa_dst); + if (rc < 0) + return rc; + + if (local_nid) { + rc = fault_attr_parse_nid(local_nid, &fa_local_nid); + if (rc < 0) + return rc; + } else { + fa_local_nid = LNET_ANY_NID; + } + +skip_options: /* Create Netlink emitter to send request to kernel */ sk = nl_socket_alloc(); if (!sk) @@ -592,25 +678,97 @@ int yaml_lnet_fault_rule(yaml_document_t *results, __u32 opc, char *src, if (rc == 0) goto emitter_error; - yaml_scalar_event_initialize(&event, NULL, - (yaml_char_t *)YAML_STR_TAG, - (yaml_char_t *)"rule_type", - strlen("rule_type"), 1, 0, - YAML_PLAIN_SCALAR_STYLE); - rc = yaml_emitter_emit(&output, &event); + rc = lnet_yaml_uint_mapping(&event, &output, "rule_type", &opc, + sizeof(__u32)); if (rc == 0) goto emitter_error; - snprintf(num, sizeof(num), "%d", opc); - yaml_scalar_event_initialize(&event, NULL, - (yaml_char_t *)YAML_INT_TAG, - (yaml_char_t *)num, - strlen(num), 1, 0, - YAML_PLAIN_SCALAR_STYLE); - rc = yaml_emitter_emit(&output, &event); + if (opc == LNET_CTL_DROP_LIST || opc == LNET_CTL_DELAY_LIST || + opc == LNET_CTL_DROP_RESET || opc == LNET_CTL_DELAY_RESET) + goto yaml_mapping_end_event; + + rc = lnet_yaml_str_mapping(&event, &output, "fa_src", + libcfs_nidstr(&fa_src)); + if (rc == 0) + goto emitter_error; + + rc = lnet_yaml_str_mapping(&event, &output, "fa_dst", + libcfs_nidstr(&fa_dst)); + if (rc == 0) + goto emitter_error; + + if (opc == LNET_CTL_DROP_DEL || opc == LNET_CTL_DELAY_DEL) + goto yaml_mapping_end_event; + + rc = lnet_yaml_str_mapping(&event, &output, "fa_local_nid", + libcfs_nidstr(&fa_local_nid)); if (rc == 0) goto emitter_error; + rc = lnet_yaml_uint_mapping(&event, &output, "fa_ptl_mask", + &attr->fa_ptl_mask, + sizeof(attr->fa_ptl_mask)); + if (rc == 0) + goto emitter_error; + + rc = lnet_yaml_uint_mapping(&event, &output, "fa_msg_mask", + &attr->fa_msg_mask, + sizeof(attr->fa_msg_mask)); + if (rc == 0) + goto emitter_error; + + if (opc == LNET_CTL_DROP_ADD) { + rc = lnet_yaml_uint_mapping(&event, &output, "da_rate", + &attr->u.drop.da_rate, + sizeof(attr->u.drop.da_rate)); + if (rc == 0) + goto emitter_error; + + rc = lnet_yaml_uint_mapping(&event, &output, "da_interval", + &attr->u.drop.da_interval, + sizeof(attr->u.drop.da_interval)); + if (rc == 0) + goto emitter_error; + + rc = lnet_yaml_uint_mapping(&event, &output, + "da_health_error_mask", + &attr->u.drop.da_health_error_mask, + sizeof(attr->u.drop.da_health_error_mask)); + if (rc == 0) + goto emitter_error; + + rc = lnet_yaml_bool_mapping(&event, &output, "da_random", + attr->u.drop.da_random ? "True" : + "False"); + if (rc == 0) + goto emitter_error; + + rc = lnet_yaml_bool_mapping(&event, &output, "da_drop_all", + attr->u.drop.da_drop_all ? "True" : + "False"); + if (rc == 0) + goto emitter_error; + } else if (opc == LNET_CTL_DELAY_ADD) { + rc = lnet_yaml_uint_mapping(&event, &output, "la_rate", + &attr->u.delay.la_rate, + sizeof(attr->u.delay.la_rate)); + if (rc == 0) + goto emitter_error; + + rc = lnet_yaml_uint_mapping(&event, &output, "la_interval", + &attr->u.delay.la_interval, + sizeof(attr->u.delay.la_interval)); + if (rc == 0) + goto emitter_error; + + rc = lnet_yaml_uint_mapping(&event, &output, "la_latency", + &attr->u.delay.la_latency, + sizeof(attr->u.delay.la_latency)); + if (rc == 0) + goto emitter_error; + } + +yaml_mapping_end_event: yaml_mapping_end_event_initialize(&event); rc = yaml_emitter_emit(&output, &event); if (rc == 0) @@ -6115,3 +6273,130 @@ int lustre_yaml_exec(char *f, int len, struct cYAML **show_rc, return lustre_yaml_cb_helper(f, len, lookup_exec_tbl, show_rc, err_rc); } + +static int +val_to_str(char *buf, int buf_size, const void *val, int size, + bool is_unsigned) +{ + int rc = 0; + + if (is_unsigned) { + switch (size) { + case sizeof(__u8): + rc = snprintf(buf, buf_size, "%u", *((__u8 *)val)); + break; + case sizeof(__u16): + rc = snprintf(buf, buf_size, "%u", *((__u16 *)val)); + break; + case sizeof(__u32): + rc = snprintf(buf, buf_size, "%u", *((__u32 *)val)); + break; + case sizeof(__u64): + rc = snprintf(buf, buf_size, "%llu", *((__u64 *)val)); + break; + default: + return -EINVAL; + } + } else { + switch (size) { + case sizeof(__s8): + rc = snprintf(buf, buf_size, "%d", *((__s8 *)val)); + break; + case sizeof(__s16): + rc = snprintf(buf, buf_size, "%d", *((__s16 *)val)); + break; + case sizeof(__s32): + rc = snprintf(buf, buf_size, "%d", *((__s32 *)val)); + break; + case sizeof(__s64): + rc = snprintf(buf, buf_size, "%lld", *((__s64 *)val)); + break; + default: + return -EINVAL; + } + } + + return (rc >= 0 && rc < buf_size) ? 0 : -EOVERFLOW; +} + +static int +lnet_yaml_event_helper(yaml_event_t *event, yaml_emitter_t *emitter, + const char *key, yaml_char_t *tag, const void *val, + int size, bool is_unsigned) +{ + int rc; + + yaml_scalar_event_initialize(event, NULL, + (yaml_char_t *)YAML_STR_TAG, + (yaml_char_t *)key, + strlen(key), 1, 0, + YAML_PLAIN_SCALAR_STYLE); + rc = yaml_emitter_emit(emitter, event); + if (rc == 0) + return rc; + + if (strcmp((char *)tag, YAML_INT_TAG) == 0) { + char num[INT_STRING_LEN]; + + if (val_to_str(num, INT_STRING_LEN, val, size, is_unsigned)) + return 0; + + yaml_scalar_event_initialize(event, NULL, + (yaml_char_t *)YAML_INT_TAG, + (yaml_char_t *)num, + strlen(num), 1, 0, + YAML_PLAIN_SCALAR_STYLE); + } else if (strcmp((char *)tag, YAML_STR_TAG) == 0) { + yaml_scalar_event_initialize(event, NULL, + (yaml_char_t *)YAML_STR_TAG, + (yaml_char_t *)val, + strlen((char *)val), 1, 0, + YAML_PLAIN_SCALAR_STYLE); + } else if (strcmp((char *)tag, YAML_BOOL_TAG) == 0) { + yaml_scalar_event_initialize(event, NULL, + (yaml_char_t *)YAML_BOOL_TAG, + (yaml_char_t *)val, + strlen((char *)val), 1, 0, + YAML_PLAIN_SCALAR_STYLE); + } else { + return 0; + } + + return yaml_emitter_emit(emitter, event); +} + +int +lnet_yaml_int_mapping(yaml_event_t *event, yaml_emitter_t *emitter, + const char *key, const void *val, int size) +{ + return lnet_yaml_event_helper(event, emitter, key, + (yaml_char_t *)YAML_INT_TAG, val, size, + false); +} + +int +lnet_yaml_uint_mapping(yaml_event_t *event, yaml_emitter_t *emitter, + const char *key, const void *val, int size) +{ + return lnet_yaml_event_helper(event, emitter, key, + (yaml_char_t *)YAML_INT_TAG, val, size, + true); +} + +int +lnet_yaml_bool_mapping(yaml_event_t *event, yaml_emitter_t *emitter, + const char *key, const char *val) +{ + return lnet_yaml_event_helper(event, emitter, key, + (yaml_char_t *)YAML_BOOL_TAG, val, 0, + false); +} + +int +lnet_yaml_str_mapping(yaml_event_t *event, yaml_emitter_t *emitter, + const char *key, const char *val) +{ + return lnet_yaml_event_helper(event, emitter, key, + (yaml_char_t *)YAML_STR_TAG, val, 0, + false); +} diff --git a/lnet/utils/lnetconfig/liblnetconfig.h b/lnet/utils/lnetconfig/liblnetconfig.h index e9e2198..613e1de 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.h +++ b/lnet/utils/lnetconfig/liblnetconfig.h @@ -1014,4 +1014,17 @@ int lustre_lnet_show_peer_debug_info(char *peer_nid, int seq_no, int lustre_lnet_set_peer_state(int state, char *lpni_nid, int seq_no, struct cYAML **err_rc); +int +lnet_yaml_int_mapping(yaml_event_t *event, yaml_emitter_t *emitter, + const char *key, const void *val, int size); +int +lnet_yaml_uint_mapping(yaml_event_t *event, yaml_emitter_t *emitter, + const char *key, const void *val, int size); +int +lnet_yaml_bool_mapping(yaml_event_t *event, yaml_emitter_t *emitter, + const char *key, const char *val); +int +lnet_yaml_str_mapping(yaml_event_t *event, yaml_emitter_t *emitter, + const char *key, const char *val); + #endif /* LIB_LNET_CONFIG_API_H */ diff --git a/lnet/utils/lnetctl.c b/lnet/utils/lnetctl.c index 7b59686..0856b0b 100644 --- a/lnet/utils/lnetctl.c +++ b/lnet/utils/lnetctl.c @@ -32,9 +32,11 @@ static int jt_config_lnet(int argc, char **argv); static int jt_unconfig_lnet(int argc, char **argv); static int jt_add_route(int argc, char **argv); static int jt_add_ni(int argc, char **argv); +static int jt_add_fault(int argc, char **argv); static int jt_set_routing(int argc, char **argv); static int jt_del_route(int argc, char **argv); static int jt_del_ni(int argc, char **argv); +static int jt_del_fault(int argc, char **argv); static int jt_show_route(int argc, char **argv); static int jt_show_net(int argc, char **argv); static int jt_show_routing(int argc, char **argv); @@ -55,6 +57,7 @@ static int jt_set_rtr_sensitivity(int argc, char **argv); static int jt_set_hsensitivity(int argc, char **argv); static int jt_set_max_recovery_ping_interval(int argc, char **argv); static int jt_reset_stats(int argc, char **argv); +static int jt_reset_fault(int argc, char **argv); static int jt_add_peer_nid(int argc, char **argv); static int jt_del_peer_nid(int argc, char **argv); static int jt_set_max_intf(int argc, char **argv); @@ -299,6 +302,22 @@ command_t udsp_cmds[] = { }; command_t fault_cmds[] = { + {"add", jt_add_fault, 0, "add LNet fault rule\n" + "\t--rule_type: Add rules of type t.\n" + "\t--source nid: Add rule entry with source NID.\n" + "\t--dest nid: Add rule entry with destination NID.\n" + "\t--rate: Rule entry rate.\n" + "\t--interval: How long to run the rule.\n" + "\t--portal: Rule portal.\n" + "\t--message: Type of message .\n" + "\t--health_error: Act on a specific health error (drop only).\n" + "\t--latency: Delay sending a message (delay only).\n"}, + {"del", jt_del_fault, 0, "delete LNet fault rule\n" + "\t--rule_type: Delete all rules of type t.\n" + "\t--source nid: Delete rule entry with source NID\n" + "\t--dest nid: Delete rule entry with destination NID\n"}, + {"reset", jt_reset_fault, 0, "reset_fault\n" + "\t--rule_type t: Reset the LNet rule t.\n"}, {"show", jt_show_fault, 0, "show fault rules\n" "\t--rule_type t: Show LNet fault rules of type t.\n"}, { 0, 0, 0, NULL } @@ -5494,6 +5513,312 @@ static int jt_del_udsp(int argc, char **argv) return rc; } +static int +fault_attr_nid_parse(char *str, lnet_nid_t *nid_p) +{ + lnet_nid_t nid; + __u32 net; + int rc = 0; + + /* NB: can't support range ipaddress except * and *@net */ + if (strlen(str) > 2 && str[0] == '*' && str[1] == '@') { + net = libcfs_str2net(str + 2); + if (net == LNET_NET_ANY) + goto failed; + + nid = LNET_MKNID(net, LNET_NIDADDR(LNET_NID_ANY)); + } else { + rc = libcfs_str2anynid(&nid, str); + if (!rc) + goto failed; + } + + *nid_p = nid; + return 0; +failed: + fprintf(stderr, "Invalid NID : %s\n", str); + return -1; +} + +static int +fault_attr_health_error_parse(char *error, __u32 *mask) +{ + if (!strcasecmp(error, "local_interrupt")) { + *mask |= HSTATUS_LOCAL_INTERRUPT_BIT; + return 0; + } + if (!strcasecmp(error, "local_dropped")) { + *mask |= HSTATUS_LOCAL_DROPPED_BIT; + return 0; + } + if (!strcasecmp(error, "local_aborted")) { + *mask |= HSTATUS_LOCAL_ABORTED_BIT; + return 0; + } + if (!strcasecmp(error, "local_no_route")) { + *mask |= HSTATUS_LOCAL_NO_ROUTE_BIT; + return 0; + } + if (!strcasecmp(error, "local_error")) { + *mask |= HSTATUS_LOCAL_ERROR_BIT; + return 0; + } + if (!strcasecmp(error, "local_timeout")) { + *mask |= HSTATUS_LOCAL_TIMEOUT_BIT; + return 0; + } + if (!strcasecmp(error, "remote_error")) { + *mask |= HSTATUS_REMOTE_ERROR_BIT; + return 0; + } + if (!strcasecmp(error, "remote_dropped")) { + *mask |= HSTATUS_REMOTE_DROPPED_BIT; + return 0; + } + if (!strcasecmp(error, "remote_timeout")) { + *mask |= HSTATUS_REMOTE_TIMEOUT_BIT; + return 0; + } + if (!strcasecmp(error, "network_timeout")) { + *mask |= HSTATUS_NETWORK_TIMEOUT_BIT; + return 0; + } + if (!strcasecmp(error, "random")) { + *mask = HSTATUS_RANDOM; + return 0; + } + + return -1; +} + +static int jt_add_fault(int argc, char **argv) +{ + const char *const short_options = "r:s:d:o:r:i:l:p:m:e:nx"; + static const struct option long_options[] = { + { .name = "rule_type", .has_arg = required_argument, .val = 't' }, + { .name = "source", .has_arg = required_argument, .val = 's' }, + { .name = "dest", .has_arg = required_argument, .val = 'd' }, + { .name = "rate", .has_arg = required_argument, .val = 'r' }, + { .name = "interval", .has_arg = required_argument, .val = 'i' }, + { .name = "random", .has_arg = no_argument, .val = 'n' }, + { .name = "latency", .has_arg = required_argument, .val = 'l' }, + { .name = "portal", .has_arg = required_argument, .val = 'p' }, + { .name = "message", .has_arg = required_argument, .val = 'm' }, + { .name = "health_error", .has_arg = required_argument, .val = 'e' }, + { .name = "local_nid", .has_arg = required_argument, .val = 'o' }, + { .name = "drop_all", .has_arg = no_argument, .val = 'x' }, + { .name = NULL } + }; + int opc = 0, opt, rc2, rc = 0; + struct lnet_fault_attr attr; + yaml_document_t results; + yaml_emitter_t debug; + + rc = check_cmd(fault_cmds, "fault", "add", 2, argc, argv); + if (rc < 0) + return rc; + + attr.fa_local_nid = LNET_NID_ANY; + + while ((opt = getopt_long(argc, argv, short_options, + long_options, NULL)) != -1) { + switch (opt) { + case 't': + if (strcmp(optarg, "delay") == 0) + opc = LNET_CTL_DELAY_ADD; + else if (strcmp(optarg, "drop") == 0) + opc = LNET_CTL_DROP_ADD; + else + rc = -EINVAL; + break; + + case 'o': + rc2 = fault_attr_nid_parse(optarg, &attr.fa_local_nid); + if (rc2 < 0 && !rc) + rc = rc2; + break; + + case 's': /* source NID/NET */ + rc2 = fault_attr_nid_parse(optarg, &attr.fa_src); + if (rc2 < 0 && !rc) + rc = rc2; + break; + + case 'd': /* dest NID/NET */ + rc2 = fault_attr_nid_parse(optarg, &attr.fa_dst); + if (rc2 < 0 && !rc) + rc = rc2; + break; + + case 'r': /* drop rate */ + if (opc == LNET_CTL_DROP_ADD) + attr.u.drop.da_rate = strtoul(optarg, NULL, 0); + else + attr.u.delay.la_rate = strtoul(optarg, NULL, 0); + break; + + case 'e': + if (opc == LNET_CTL_DROP_ADD) { + rc2 = fault_attr_health_error_parse(optarg, + &attr.u.drop.da_health_error_mask); + if (rc2 < 0 && !rc) + rc = rc2; + } + break; + + case 'x': + if (opc == LNET_CTL_DROP_ADD) + attr.u.drop.da_drop_all = true; + break; + + case 'n': + if (opc == LNET_CTL_DROP_ADD) + attr.u.drop.da_random = true; + break; + + case 'i': /* time interval (# seconds) for message drop */ + if (opc == LNET_CTL_DROP_ADD) + attr.u.drop.da_interval = strtoul(optarg, + NULL, 0); + else + attr.u.delay.la_interval = strtoul(optarg, + NULL, 0); + break; + default: + return 0; + } + } + if (rc < 0) + return rc; + + rc = yaml_lnet_fault_rule(&results, opc, NULL, NULL, NULL, NULL); + if (rc < 0) + return rc; + + rc = yaml_emitter_initialize(&debug); + if (rc == 0) + return -EINVAL; + + yaml_emitter_set_indent(&debug, LNET_DEFAULT_INDENT); + yaml_emitter_set_output_file(&debug, stdout); + rc = yaml_emitter_dump(&debug, &results); + + yaml_emitter_delete(&debug); + yaml_document_delete(&results); + + return rc == 0 ? -EINVAL : 0; +} + +static int jt_del_fault(int argc, char **argv) +{ + const char *const short_options = "t:"; + static const struct option long_options[] = { + { .name = "rule_type", .has_arg = required_argument, .val = 't' }, + { .name = "source", .has_arg = required_argument, .val = 's' }, + { .name = "dest", .has_arg = required_argument, .val = 'd' }, + { .name = NULL } + }; + struct lnet_fault_attr attr; + yaml_document_t results; + yaml_emitter_t debug; + int opc = 0, opt, rc; + + rc = check_cmd(fault_cmds, "fault", "del", 2, argc, argv); + if (rc < 0) + return rc; + + while ((opt = getopt_long(argc, argv, short_options, + long_options, NULL)) != -1) { + switch (opt) { + case 't': + if (strcmp(optarg, "delay") == 0) + opc = LNET_CTL_DELAY_DEL; + else if (strcmp(optarg, "drop") == 0) + opc = LNET_CTL_DROP_DEL; + else + rc = -EINVAL; + break; + case 's': /* source NID/NET */ + rc = fault_attr_nid_parse(optarg, &attr.fa_src); + break; + + case 'd': /* dest NID/NET */ + rc = fault_attr_nid_parse(optarg, &attr.fa_dst); + break; + default: + return 0; + } + } + if (rc < 0) + return rc; + + rc = yaml_lnet_fault_rule(&results, opc, NULL, NULL, NULL, NULL); + if (rc < 0) + return rc; + + rc = yaml_emitter_initialize(&debug); + if (rc == 0) + return -EINVAL; + + yaml_emitter_set_indent(&debug, LNET_DEFAULT_INDENT); + yaml_emitter_set_output_file(&debug, stdout); + rc = yaml_emitter_dump(&debug, &results); + yaml_emitter_delete(&debug); + yaml_document_delete(&results); + + return rc == 0 ? -EINVAL : 0; +} + +static int jt_reset_fault(int argc, char **argv) +{ + const char *const short_options = "r:"; + static const struct option long_options[] = { + { .name = "rule_type", .has_arg = required_argument, .val = 't' }, + { .name = NULL } + }; + yaml_document_t results; + yaml_emitter_t debug; + int opc = 0, opt, rc; + + rc = check_cmd(fault_cmds, "fault", "reset", 2, argc, argv); + if (rc < 0) + return rc; + + while ((opt = getopt_long(argc, argv, short_options, + long_options, NULL)) != -1) { + switch (opt) { + case 't': + if (strcmp(optarg, "delay") == 0) + opc = LNET_CTL_DELAY_RESET; + else if (strcmp(optarg, "drop") == 0) + opc = LNET_CTL_DROP_RESET; + else + rc = -EINVAL; + break; + default: + return 0; + } + } + if (rc < 0) + return rc; + + rc = yaml_lnet_fault_rule(&results, opc, NULL, NULL, NULL, NULL); + if (rc < 0) + return rc; + + rc = yaml_emitter_initialize(&debug); + if (rc == 0) + return -EINVAL; + + yaml_emitter_set_indent(&debug, LNET_DEFAULT_INDENT); + yaml_emitter_set_output_file(&debug, stdout); + rc = yaml_emitter_dump(&debug, &results); + yaml_emitter_delete(&debug); + yaml_document_delete(&results); + + return rc == 0 ? -EINVAL : 0; +} + int jt_show_fault(int argc, char **argv) { const char *const short_options = "t:"; diff --git a/lustre/tests/sanity-lnet.sh b/lustre/tests/sanity-lnet.sh index 546c562..f3f4988 100755 --- a/lustre/tests/sanity-lnet.sh +++ b/lustre/tests/sanity-lnet.sh @@ -289,20 +289,9 @@ fi if [[ $NETTYPE =~ (tcp|o2ib)[0-9]* ]]; then if ! intf_has_ipv4 "${INTERFACES[0]}"; then always_except LU-5960 230 - always_except LU-9680 204 - always_except LU-9680 205 - always_except LU-9680 206 - always_except LU-9680 207 - always_except LU-9680 209 - always_except LU-9680 210 - always_except LU-9680 211 - always_except LU-9680 212 always_except LU-9680 213 - always_except LU-9680 216 - always_except LU-9680 218 always_except LU-9680 231 always_except LU-9680 302 - always_except LU-9680 500 always_except LU-14288 101 always_except LU-14288 103 always_except LU-17458 220 diff --git a/lustre/utils/portals.c b/lustre/utils/portals.c index 5475d98..2f063fd 100644 --- a/lustre/utils/portals.c +++ b/lustre/utils/portals.c @@ -2503,11 +2503,12 @@ fault_attr_health_error_parse(char *error, __u32 *mask) static int fault_simul_rule_add(__u32 opc, char *name, int argc, char **argv) { - struct libcfs_ioctl_data data = { { 0 } }; - struct lnet_fault_attr attr; + char *fa_src = NULL, *fa_dst = NULL, *fa_local_nid = NULL; + struct libcfs_ioctl_data data = { { 0 } }; + struct lnet_fault_attr attr; + yaml_document_t results; char *optstr; int rc; - static const struct option opts[] = { { .name = "source", .has_arg = required_argument, .val = 's' }, { .name = "dest", .has_arg = required_argument, .val = 'd' }, @@ -2520,7 +2521,8 @@ fault_simul_rule_add(__u32 opc, char *name, int argc, char **argv) { .name = "health_error", .has_arg = required_argument, .val = 'e' }, { .name = "local_nid", .has_arg = required_argument, .val = 'o' }, { .name = "drop_all", .has_arg = no_argument, .val = 'x' }, - { .name = NULL } }; + { .name = NULL } + }; if (argc == 1) { fprintf(stderr, @@ -2538,20 +2540,14 @@ fault_simul_rule_add(__u32 opc, char *name, int argc, char **argv) switch (c) { case 'o': - rc = fault_attr_nid_parse(optarg, &attr.fa_local_nid); - if (rc != 0) - goto getopt_failed; + fa_local_nid = optarg; break; case 's': /* source NID/NET */ - rc = fault_attr_nid_parse(optarg, &attr.fa_src); - if (rc != 0) - goto getopt_failed; + fa_src = optarg; break; case 'd': /* dest NID/NET */ - rc = fault_attr_nid_parse(optarg, &attr.fa_dst); - if (rc != 0) - goto getopt_failed; + fa_dst = optarg; break; case 'r': /* drop rate */ @@ -2642,15 +2638,36 @@ fault_simul_rule_add(__u32 opc, char *name, int argc, char **argv) } } - if (attr.fa_src == 0 || attr.fa_dst == 0) { + if (!(fa_src && fa_dst)) { fprintf(stderr, "Please provide both source and destination of %s rule\n", name); return -1; } - if (attr.fa_local_nid == 0) + rc = yaml_lnet_fault_rule(&results, opc, fa_src, fa_dst, fa_local_nid, + &attr); + if (rc <= 0) { + if (rc == -EOPNOTSUPP) + goto old_api; + return rc; + } +old_api: + rc = fault_attr_nid_parse(fa_src, &attr.fa_src); + if (rc) + goto getopt_failed; + + rc = fault_attr_nid_parse(fa_dst, &attr.fa_dst); + if (rc) + goto getopt_failed; + + if (fa_local_nid) { + rc = fault_attr_nid_parse(fa_local_nid, &attr.fa_local_nid); + if (rc) + goto getopt_failed; + } else { attr.fa_local_nid = LNET_NID_ANY; + } data.ioc_flags = opc; data.ioc_inllen1 = sizeof(attr); @@ -2695,15 +2712,18 @@ static int fault_simul_rule_del(__u32 opc, char *name, int argc, char **argv) { struct libcfs_ioctl_data data = { { 0 } }; - struct lnet_fault_attr attr; + struct lnet_fault_attr attr; + yaml_document_t results; + char *fa_src = NULL; + char *fa_dst = NULL; bool all = false; int rc; - static const struct option opts[] = { { .name = "source", .has_arg = required_argument, .val = 's' }, { .name = "dest", .has_arg = required_argument, .val = 'd' }, { .name = "all", .has_arg = no_argument, .val = 'a' }, - { .name = NULL } }; + { .name = NULL } + }; if (argc == 1) { fprintf(stderr, @@ -2720,20 +2740,13 @@ fault_simul_rule_del(__u32 opc, char *name, int argc, char **argv) switch (c) { case 's': - rc = fault_attr_nid_parse(optarg, &attr.fa_src); - if (rc != 0) - goto getopt_failed; + fa_src = optarg; break; case 'd': - rc = fault_attr_nid_parse(optarg, &attr.fa_dst); - if (rc != 0) - goto getopt_failed; + fa_dst = optarg; break; case 'a': - attr.fa_src = 0; - attr.fa_dst = 0; all = true; - break; default: fprintf(stderr, "error: %s: option '%s' unrecognized\n", @@ -2743,6 +2756,34 @@ fault_simul_rule_del(__u32 opc, char *name, int argc, char **argv) } optind = 1; + if (!all && !(fa_src && fa_dst)) { + fprintf(stderr, + "Failed, please provide source and destination of rule\n"); + return -1; + } else if (all && (fa_src || fa_dst)) { + fprintf(stderr, "'-s' or '-d' cannot be combined with '-a'\n"); + return -1; + } + + rc = yaml_lnet_fault_rule(&results, opc, fa_src, fa_dst, NULL, NULL); + if (rc <= 0) { + if (rc == -EOPNOTSUPP) + goto old_api; + return rc; + } +old_api: + if (fa_src) { + rc = fault_attr_nid_parse(fa_src, &attr.fa_src); + if (rc != 0) + goto getopt_failed; + } + + if (fa_dst) { + rc = fault_attr_nid_parse(fa_dst, &attr.fa_dst); + if (rc != 0) + goto getopt_failed; + } + data.ioc_flags = opc; data.ioc_inllen1 = sizeof(attr); data.ioc_inlbuf1 = (char *)&attr; @@ -2784,9 +2825,17 @@ jt_ptl_delay_del(int argc, char **argv) static int fault_simul_rule_reset(__u32 opc, char *name, int argc, char **argv) { - struct libcfs_ioctl_data data = { { 0 } }; - int rc; + struct libcfs_ioctl_data data = { { 0 } }; + yaml_document_t results; + int rc; + rc = yaml_lnet_fault_rule(&results, opc, NULL, NULL, NULL, NULL); + if (rc <= 0) { + if (rc == -EOPNOTSUPP) + goto old_api; + return rc; + } +old_api: LIBCFS_IOC_INIT(data); data.ioc_flags = opc; -- 1.8.3.1