Add a rule to drop all messages arriving on a specific interface.
This is useful for simulating failures on a specific router interface.
Test-Parameters: forbuildonly
Signed-off-by: Amir Shehata <ashehata@whamcloud.com>
Change-Id: Ic69f683fb2caf7a69a1d85428878c89b7b1ee3ad
Reviewed-on: https://review.whamcloud.com/33305
Reviewed-by: Olaf Weber <olaf.weber@hpe.com>
Tested-by: Jenkins
int lnet_fault_init(void);
void lnet_fault_fini(void);
-bool lnet_drop_rule_match(struct lnet_hdr *hdr, enum lnet_msg_hstatus *hstatus);
+bool lnet_drop_rule_match(struct lnet_hdr *hdr, lnet_nid_t local_nid,
+ enum lnet_msg_hstatus *hstatus);
int lnet_delay_rule_add(struct lnet_fault_attr *attr);
int lnet_delay_rule_del(lnet_nid_t src, lnet_nid_t dst, bool shutdown);
lnet_nid_t fa_src;
/** destination NID of drop rule, see \a dr_src for details */
lnet_nid_t fa_dst;
+ /** local NID. In case of router this is the NID we're ceiving
+ * messages on
+ */
+ lnet_nid_t fa_local_nid;
/**
* Portal mask to drop, -1 means all portals, for example:
* fa_ptl_mask = (1 << _LDLM_CB_REQUEST_PORTAL ) |
__u32 da_health_error_mask;
/** randomize error generation */
bool da_random;
+ /** drop all messages if flag is set */
+ bool da_drop_all;
} drop;
/** message latency simulation */
struct {
}
if (!list_empty(&the_lnet.ln_drop_rules) &&
- lnet_drop_rule_match(hdr, NULL)) {
+ lnet_drop_rule_match(hdr, ni->ni_nid, NULL)) {
CDEBUG(D_NET, "%s, src %s, dst %s: Dropping %s to simulate"
"silent message loss\n",
libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
return false;
/* match only health rules */
- if (!lnet_drop_rule_match(&msg->msg_hdr, hstatus))
+ if (!lnet_drop_rule_match(&msg->msg_hdr, LNET_NID_ANY,
+ hstatus))
return false;
- CDEBUG(D_NET, "src %s, dst %s: %s simulate health error: %s\n",
+ CDEBUG(D_NET, "src %s(%s)->dst %s: %s simulate health error: %s\n",
libcfs_nid2str(msg->msg_hdr.src_nid),
+ libcfs_nid2str(msg->msg_txni->ni_nid),
libcfs_nid2str(msg->msg_hdr.dest_nid),
lnet_msgtyp2str(msg->msg_type),
lnet_health_error2str(*hstatus));
static bool
lnet_fault_attr_match(struct lnet_fault_attr *attr, lnet_nid_t src,
- lnet_nid_t dst, unsigned int type, unsigned int portal)
+ lnet_nid_t local_nid, lnet_nid_t dst,
+ unsigned int type, unsigned int portal)
{
if (!lnet_fault_nid_match(attr->fa_src, src) ||
- !lnet_fault_nid_match(attr->fa_dst, dst))
+ !lnet_fault_nid_match(attr->fa_dst, dst) ||
+ !lnet_fault_nid_match(attr->fa_local_nid, local_nid))
return false;
if (!(attr->fa_msg_mask & (1 << type)))
*/
static bool
drop_rule_match(struct lnet_drop_rule *rule, lnet_nid_t src,
- lnet_nid_t dst, unsigned int type, unsigned int portal,
+ lnet_nid_t local_nid, lnet_nid_t dst,
+ unsigned int type, unsigned int portal,
enum lnet_msg_hstatus *hstatus)
{
struct lnet_fault_attr *attr = &rule->dr_attr;
bool drop;
- if (!lnet_fault_attr_match(attr, src, dst, type, portal))
+ if (!lnet_fault_attr_match(attr, src, local_nid, dst, type, portal))
return false;
+ if (attr->u.drop.da_drop_all) {
+ CDEBUG(D_NET, "set to drop all messages\n");
+ drop = true;
+ goto drop_matched;
+ }
+
/*
* if we're trying to match a health status error but it hasn't
* been set in the rule, then don't match
}
}
+drop_matched:
+
if (drop) { /* drop this message, update counters */
if (hstatus)
lnet_fault_match_health(hstatus,
* Check if message from \a src to \a dst can match any existed drop rule
*/
bool
-lnet_drop_rule_match(struct lnet_hdr *hdr, enum lnet_msg_hstatus *hstatus)
+lnet_drop_rule_match(struct lnet_hdr *hdr,
+ lnet_nid_t local_nid,
+ enum lnet_msg_hstatus *hstatus)
{
lnet_nid_t src = le64_to_cpu(hdr->src_nid);
lnet_nid_t dst = le64_to_cpu(hdr->dest_nid);
cpt = lnet_net_lock_current();
list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) {
- drop = drop_rule_match(rule, src, dst, typ, ptl,
+ drop = drop_rule_match(rule, src, local_nid, dst, typ, ptl,
hstatus);
if (drop)
break;
struct lnet_fault_attr *attr = &rule->dl_attr;
bool delay;
- if (!lnet_fault_attr_match(attr, src, dst, type, portal))
+ if (!lnet_fault_attr_match(attr, src, LNET_NID_ANY,
+ dst, type, portal))
return false;
/* match this rule, check delay rate now */
{ .name = "portal", .has_arg = required_argument, .val = 'p' },
{ .name = "message", .has_arg = required_argument, .val = 'm' },
{ .name = "health_error", .has_arg = required_argument, .val = 'e' },
+ { .name = "local_nid", .has_arg = required_argument, .val = 'o' },
+ { .name = "drop_all", .has_arg = no_argument, .val = 'x' },
{ .name = NULL } };
if (argc == 1) {
return -1;
}
- optstr = opc == LNET_CTL_DROP_ADD ? "s:d:r:i:p:m:e:n" : "s:d:r:l:p:m:";
+ optstr = opc == LNET_CTL_DROP_ADD ? "s:d:o:r:i:p:m:e:nx" : "s:d:o:r:l:p:m:";
memset(&attr, 0, sizeof(attr));
while (1) {
char c = getopt_long(argc, argv, optstr, opts, NULL);
break;
switch (c) {
+ case 'o':
+ rc = fault_attr_nid_parse(optarg, &attr.fa_local_nid);
+ if (rc != 0)
+ goto getopt_failed;
+ break;
case 's': /* source NID/NET */
rc = fault_attr_nid_parse(optarg, &attr.fa_src);
if (rc != 0)
}
break;
+ case 'x':
+ if (opc == LNET_CTL_DROP_ADD)
+ attr.u.drop.da_drop_all = true;
+ break;
+
case 'n':
if (opc == LNET_CTL_DROP_ADD)
attr.u.drop.da_random = true;
return -1;
}
+ if (attr.fa_local_nid == 0)
+ attr.fa_local_nid = LNET_NID_ANY;
+
data.ioc_flags = opc;
data.ioc_inllen1 = sizeof(attr);
data.ioc_inlbuf1 = (char *)&attr;