From: Amir Shehata Date: Thu, 4 Oct 2018 00:36:45 +0000 (-0700) Subject: LU-11470 lnet: drop all rule X-Git-Tag: 2.12.55~25^2~8 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=deb31c2ffad581a98a7bcff7ed3900e05eabb7fb LU-11470 lnet: drop all rule Add a rule to drop all messages arriving on a specific interface. This is useful for simulating failures on a specific router interface. Test-Parameters: forbuildonly Signed-off-by: Amir Shehata Change-Id: Ic69f683fb2caf7a69a1d85428878c89b7b1ee3ad Reviewed-on: https://review.whamcloud.com/33305 Reviewed-by: Olaf Weber Tested-by: Jenkins --- diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index f1dbbff..b421990 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -731,7 +731,8 @@ int lnet_fault_ctl(int cmd, struct libcfs_ioctl_data *data); int lnet_fault_init(void); void lnet_fault_fini(void); -bool lnet_drop_rule_match(struct lnet_hdr *hdr, enum lnet_msg_hstatus *hstatus); +bool lnet_drop_rule_match(struct lnet_hdr *hdr, lnet_nid_t local_nid, + enum lnet_msg_hstatus *hstatus); int lnet_delay_rule_add(struct lnet_fault_attr *attr); int lnet_delay_rule_del(lnet_nid_t src, lnet_nid_t dst, bool shutdown); diff --git a/lnet/include/uapi/linux/lnet/lnetctl.h b/lnet/include/uapi/linux/lnet/lnetctl.h index cb4f153..cdf5849 100644 --- a/lnet/include/uapi/linux/lnet/lnetctl.h +++ b/lnet/include/uapi/linux/lnet/lnetctl.h @@ -77,6 +77,10 @@ struct lnet_fault_attr { lnet_nid_t fa_src; /** destination NID of drop rule, see \a dr_src for details */ lnet_nid_t fa_dst; + /** local NID. In case of router this is the NID we're ceiving + * messages on + */ + lnet_nid_t fa_local_nid; /** * Portal mask to drop, -1 means all portals, for example: * fa_ptl_mask = (1 << _LDLM_CB_REQUEST_PORTAL ) | @@ -108,6 +112,8 @@ struct lnet_fault_attr { __u32 da_health_error_mask; /** randomize error generation */ bool da_random; + /** drop all messages if flag is set */ + bool da_drop_all; } drop; /** message latency simulation */ struct { diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index d4f1bd9..ffd41f6 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -4235,7 +4235,7 @@ lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr, lnet_nid_t from_nid, } if (!list_empty(&the_lnet.ln_drop_rules) && - lnet_drop_rule_match(hdr, NULL)) { + lnet_drop_rule_match(hdr, ni->ni_nid, NULL)) { CDEBUG(D_NET, "%s, src %s, dst %s: Dropping %s to simulate" "silent message loss\n", libcfs_nid2str(from_nid), libcfs_nid2str(src_nid), diff --git a/lnet/lnet/lib-msg.c b/lnet/lnet/lib-msg.c index ffaaad8..6541811 100644 --- a/lnet/lnet/lib-msg.c +++ b/lnet/lnet/lib-msg.c @@ -906,11 +906,13 @@ lnet_send_error_simulation(struct lnet_msg *msg, return false; /* match only health rules */ - if (!lnet_drop_rule_match(&msg->msg_hdr, hstatus)) + if (!lnet_drop_rule_match(&msg->msg_hdr, LNET_NID_ANY, + hstatus)) return false; - CDEBUG(D_NET, "src %s, dst %s: %s simulate health error: %s\n", + CDEBUG(D_NET, "src %s(%s)->dst %s: %s simulate health error: %s\n", libcfs_nid2str(msg->msg_hdr.src_nid), + libcfs_nid2str(msg->msg_txni->ni_nid), libcfs_nid2str(msg->msg_hdr.dest_nid), lnet_msgtyp2str(msg->msg_type), lnet_health_error2str(*hstatus)); diff --git a/lnet/lnet/net_fault.c b/lnet/lnet/net_fault.c index 04c98d5..7fdd8df 100644 --- a/lnet/lnet/net_fault.c +++ b/lnet/lnet/net_fault.c @@ -79,10 +79,12 @@ lnet_fault_nid_match(lnet_nid_t nid, lnet_nid_t msg_nid) static bool lnet_fault_attr_match(struct lnet_fault_attr *attr, lnet_nid_t src, - lnet_nid_t dst, unsigned int type, unsigned int portal) + lnet_nid_t local_nid, lnet_nid_t dst, + unsigned int type, unsigned int portal) { if (!lnet_fault_nid_match(attr->fa_src, src) || - !lnet_fault_nid_match(attr->fa_dst, dst)) + !lnet_fault_nid_match(attr->fa_dst, dst) || + !lnet_fault_nid_match(attr->fa_local_nid, local_nid)) return false; if (!(attr->fa_msg_mask & (1 << type))) @@ -344,15 +346,22 @@ lnet_fault_match_health(enum lnet_msg_hstatus *hstatus, __u32 mask) */ static bool drop_rule_match(struct lnet_drop_rule *rule, lnet_nid_t src, - lnet_nid_t dst, unsigned int type, unsigned int portal, + lnet_nid_t local_nid, lnet_nid_t dst, + unsigned int type, unsigned int portal, enum lnet_msg_hstatus *hstatus) { struct lnet_fault_attr *attr = &rule->dr_attr; bool drop; - if (!lnet_fault_attr_match(attr, src, dst, type, portal)) + if (!lnet_fault_attr_match(attr, src, local_nid, dst, type, portal)) return false; + if (attr->u.drop.da_drop_all) { + CDEBUG(D_NET, "set to drop all messages\n"); + drop = true; + goto drop_matched; + } + /* * if we're trying to match a health status error but it hasn't * been set in the rule, then don't match @@ -402,6 +411,8 @@ drop_rule_match(struct lnet_drop_rule *rule, lnet_nid_t src, } } +drop_matched: + if (drop) { /* drop this message, update counters */ if (hstatus) lnet_fault_match_health(hstatus, @@ -418,7 +429,9 @@ drop_rule_match(struct lnet_drop_rule *rule, lnet_nid_t src, * Check if message from \a src to \a dst can match any existed drop rule */ bool -lnet_drop_rule_match(struct lnet_hdr *hdr, enum lnet_msg_hstatus *hstatus) +lnet_drop_rule_match(struct lnet_hdr *hdr, + lnet_nid_t local_nid, + enum lnet_msg_hstatus *hstatus) { lnet_nid_t src = le64_to_cpu(hdr->src_nid); lnet_nid_t dst = le64_to_cpu(hdr->dest_nid); @@ -437,7 +450,7 @@ lnet_drop_rule_match(struct lnet_hdr *hdr, enum lnet_msg_hstatus *hstatus) cpt = lnet_net_lock_current(); list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) { - drop = drop_rule_match(rule, src, dst, typ, ptl, + drop = drop_rule_match(rule, src, local_nid, dst, typ, ptl, hstatus); if (drop) break; @@ -528,7 +541,8 @@ delay_rule_match(struct lnet_delay_rule *rule, lnet_nid_t src, struct lnet_fault_attr *attr = &rule->dl_attr; bool delay; - if (!lnet_fault_attr_match(attr, src, dst, type, portal)) + if (!lnet_fault_attr_match(attr, src, LNET_NID_ANY, + dst, type, portal)) return false; /* match this rule, check delay rate now */ diff --git a/lustre/utils/portals.c b/lustre/utils/portals.c index 9466ea4..ca2eb34 100644 --- a/lustre/utils/portals.c +++ b/lustre/utils/portals.c @@ -1383,6 +1383,8 @@ fault_simul_rule_add(__u32 opc, char *name, int argc, char **argv) { .name = "portal", .has_arg = required_argument, .val = 'p' }, { .name = "message", .has_arg = required_argument, .val = 'm' }, { .name = "health_error", .has_arg = required_argument, .val = 'e' }, + { .name = "local_nid", .has_arg = required_argument, .val = 'o' }, + { .name = "drop_all", .has_arg = no_argument, .val = 'x' }, { .name = NULL } }; if (argc == 1) { @@ -1391,7 +1393,7 @@ fault_simul_rule_add(__u32 opc, char *name, int argc, char **argv) return -1; } - optstr = opc == LNET_CTL_DROP_ADD ? "s:d:r:i:p:m:e:n" : "s:d:r:l:p:m:"; + optstr = opc == LNET_CTL_DROP_ADD ? "s:d:o:r:i:p:m:e:nx" : "s:d:o:r:l:p:m:"; memset(&attr, 0, sizeof(attr)); while (1) { char c = getopt_long(argc, argv, optstr, opts, NULL); @@ -1400,6 +1402,11 @@ fault_simul_rule_add(__u32 opc, char *name, int argc, char **argv) break; switch (c) { + case 'o': + rc = fault_attr_nid_parse(optarg, &attr.fa_local_nid); + if (rc != 0) + goto getopt_failed; + break; case 's': /* source NID/NET */ rc = fault_attr_nid_parse(optarg, &attr.fa_src); if (rc != 0) @@ -1428,6 +1435,11 @@ fault_simul_rule_add(__u32 opc, char *name, int argc, char **argv) } break; + case 'x': + if (opc == LNET_CTL_DROP_ADD) + attr.u.drop.da_drop_all = true; + break; + case 'n': if (opc == LNET_CTL_DROP_ADD) attr.u.drop.da_random = true; @@ -1502,6 +1514,9 @@ fault_simul_rule_add(__u32 opc, char *name, int argc, char **argv) return -1; } + if (attr.fa_local_nid == 0) + attr.fa_local_nid = LNET_NID_ANY; + data.ioc_flags = opc; data.ioc_inllen1 = sizeof(attr); data.ioc_inlbuf1 = (char *)&attr;