Whamcloud - gitweb
LU-11470 lnet: drop all rule 05/33305/36
authorAmir Shehata <ashehata@whamcloud.com>
Thu, 4 Oct 2018 00:36:45 +0000 (17:36 -0700)
committerAmir Shehata <ashehata@whamcloud.com>
Fri, 7 Jun 2019 18:19:15 +0000 (18:19 +0000)
Add a rule to drop all messages arriving on a specific interface.
This is useful for simulating failures on a specific router interface.

Test-Parameters: forbuildonly
Signed-off-by: Amir Shehata <ashehata@whamcloud.com>
Change-Id: Ic69f683fb2caf7a69a1d85428878c89b7b1ee3ad
Reviewed-on: https://review.whamcloud.com/33305
Reviewed-by: Olaf Weber <olaf.weber@hpe.com>
Tested-by: Jenkins
lnet/include/lnet/lib-lnet.h
lnet/include/uapi/linux/lnet/lnetctl.h
lnet/lnet/lib-move.c
lnet/lnet/lib-msg.c
lnet/lnet/net_fault.c
lustre/utils/portals.c

index f1dbbff..b421990 100644 (file)
@@ -731,7 +731,8 @@ int lnet_fault_ctl(int cmd, struct libcfs_ioctl_data *data);
 int lnet_fault_init(void);
 void lnet_fault_fini(void);
 
-bool lnet_drop_rule_match(struct lnet_hdr *hdr, enum lnet_msg_hstatus *hstatus);
+bool lnet_drop_rule_match(struct lnet_hdr *hdr, lnet_nid_t local_nid,
+                         enum lnet_msg_hstatus *hstatus);
 
 int lnet_delay_rule_add(struct lnet_fault_attr *attr);
 int lnet_delay_rule_del(lnet_nid_t src, lnet_nid_t dst, bool shutdown);
index cb4f153..cdf5849 100644 (file)
@@ -77,6 +77,10 @@ struct lnet_fault_attr {
        lnet_nid_t                      fa_src;
        /** destination NID of drop rule, see \a dr_src for details */
        lnet_nid_t                      fa_dst;
+       /** local NID. In case of router this is the NID we're ceiving
+        * messages on
+        */
+       lnet_nid_t                      fa_local_nid;
        /**
         * Portal mask to drop, -1 means all portals, for example:
         * fa_ptl_mask = (1 << _LDLM_CB_REQUEST_PORTAL ) |
@@ -108,6 +112,8 @@ struct lnet_fault_attr {
                        __u32                   da_health_error_mask;
                        /** randomize error generation */
                        bool                    da_random;
+                       /** drop all messages if flag is set */
+                       bool                    da_drop_all;
                } drop;
                /** message latency simulation */
                struct {
index d4f1bd9..ffd41f6 100644 (file)
@@ -4235,7 +4235,7 @@ lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr, lnet_nid_t from_nid,
        }
 
        if (!list_empty(&the_lnet.ln_drop_rules) &&
-           lnet_drop_rule_match(hdr, NULL)) {
+           lnet_drop_rule_match(hdr, ni->ni_nid, NULL)) {
                CDEBUG(D_NET, "%s, src %s, dst %s: Dropping %s to simulate"
                              "silent message loss\n",
                       libcfs_nid2str(from_nid), libcfs_nid2str(src_nid),
index ffaaad8..6541811 100644 (file)
@@ -906,11 +906,13 @@ lnet_send_error_simulation(struct lnet_msg *msg,
            return false;
 
        /* match only health rules */
-       if (!lnet_drop_rule_match(&msg->msg_hdr, hstatus))
+       if (!lnet_drop_rule_match(&msg->msg_hdr, LNET_NID_ANY,
+                                 hstatus))
                return false;
 
-       CDEBUG(D_NET, "src %sdst %s: %s simulate health error: %s\n",
+       CDEBUG(D_NET, "src %s(%s)->dst %s: %s simulate health error: %s\n",
                libcfs_nid2str(msg->msg_hdr.src_nid),
+               libcfs_nid2str(msg->msg_txni->ni_nid),
                libcfs_nid2str(msg->msg_hdr.dest_nid),
                lnet_msgtyp2str(msg->msg_type),
                lnet_health_error2str(*hstatus));
index 04c98d5..7fdd8df 100644 (file)
@@ -79,10 +79,12 @@ lnet_fault_nid_match(lnet_nid_t nid, lnet_nid_t msg_nid)
 
 static bool
 lnet_fault_attr_match(struct lnet_fault_attr *attr, lnet_nid_t src,
-                     lnet_nid_t dst, unsigned int type, unsigned int portal)
+                     lnet_nid_t local_nid, lnet_nid_t dst,
+                     unsigned int type, unsigned int portal)
 {
        if (!lnet_fault_nid_match(attr->fa_src, src) ||
-           !lnet_fault_nid_match(attr->fa_dst, dst))
+           !lnet_fault_nid_match(attr->fa_dst, dst) ||
+           !lnet_fault_nid_match(attr->fa_local_nid, local_nid))
                return false;
 
        if (!(attr->fa_msg_mask & (1 << type)))
@@ -344,15 +346,22 @@ lnet_fault_match_health(enum lnet_msg_hstatus *hstatus, __u32 mask)
  */
 static bool
 drop_rule_match(struct lnet_drop_rule *rule, lnet_nid_t src,
-               lnet_nid_t dst, unsigned int type, unsigned int portal,
+               lnet_nid_t local_nid, lnet_nid_t dst,
+               unsigned int type, unsigned int portal,
                enum lnet_msg_hstatus *hstatus)
 {
        struct lnet_fault_attr  *attr = &rule->dr_attr;
        bool                     drop;
 
-       if (!lnet_fault_attr_match(attr, src, dst, type, portal))
+       if (!lnet_fault_attr_match(attr, src, local_nid, dst, type, portal))
                return false;
 
+       if (attr->u.drop.da_drop_all) {
+               CDEBUG(D_NET, "set to drop all messages\n");
+               drop = true;
+               goto drop_matched;
+       }
+
        /*
         * if we're trying to match a health status error but it hasn't
         * been set in the rule, then don't match
@@ -402,6 +411,8 @@ drop_rule_match(struct lnet_drop_rule *rule, lnet_nid_t src,
                }
        }
 
+drop_matched:
+
        if (drop) { /* drop this message, update counters */
                if (hstatus)
                        lnet_fault_match_health(hstatus,
@@ -418,7 +429,9 @@ drop_rule_match(struct lnet_drop_rule *rule, lnet_nid_t src,
  * Check if message from \a src to \a dst can match any existed drop rule
  */
 bool
-lnet_drop_rule_match(struct lnet_hdr *hdr, enum lnet_msg_hstatus *hstatus)
+lnet_drop_rule_match(struct lnet_hdr *hdr,
+                    lnet_nid_t local_nid,
+                    enum lnet_msg_hstatus *hstatus)
 {
        lnet_nid_t src = le64_to_cpu(hdr->src_nid);
        lnet_nid_t dst = le64_to_cpu(hdr->dest_nid);
@@ -437,7 +450,7 @@ lnet_drop_rule_match(struct lnet_hdr *hdr, enum lnet_msg_hstatus *hstatus)
 
        cpt = lnet_net_lock_current();
        list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) {
-               drop = drop_rule_match(rule, src, dst, typ, ptl,
+               drop = drop_rule_match(rule, src, local_nid, dst, typ, ptl,
                                       hstatus);
                if (drop)
                        break;
@@ -528,7 +541,8 @@ delay_rule_match(struct lnet_delay_rule *rule, lnet_nid_t src,
        struct lnet_fault_attr  *attr = &rule->dl_attr;
        bool                     delay;
 
-       if (!lnet_fault_attr_match(attr, src, dst, type, portal))
+       if (!lnet_fault_attr_match(attr, src, LNET_NID_ANY,
+                                  dst, type, portal))
                return false;
 
        /* match this rule, check delay rate now */
index 9466ea4..ca2eb34 100644 (file)
@@ -1383,6 +1383,8 @@ fault_simul_rule_add(__u32 opc, char *name, int argc, char **argv)
        { .name = "portal",   .has_arg = required_argument, .val = 'p' },
        { .name = "message",  .has_arg = required_argument, .val = 'm' },
        { .name = "health_error",  .has_arg = required_argument, .val = 'e' },
+       { .name = "local_nid",  .has_arg = required_argument, .val = 'o' },
+       { .name = "drop_all",  .has_arg = no_argument, .val = 'x' },
        { .name = NULL } };
 
        if (argc == 1) {
@@ -1391,7 +1393,7 @@ fault_simul_rule_add(__u32 opc, char *name, int argc, char **argv)
                return -1;
        }
 
-       optstr = opc == LNET_CTL_DROP_ADD ? "s:d:r:i:p:m:e:n" : "s:d:r:l:p:m:";
+       optstr = opc == LNET_CTL_DROP_ADD ? "s:d:o:r:i:p:m:e:nx" : "s:d:o:r:l:p:m:";
        memset(&attr, 0, sizeof(attr));
        while (1) {
                char c = getopt_long(argc, argv, optstr, opts, NULL);
@@ -1400,6 +1402,11 @@ fault_simul_rule_add(__u32 opc, char *name, int argc, char **argv)
                        break;
 
                switch (c) {
+               case 'o':
+                       rc = fault_attr_nid_parse(optarg, &attr.fa_local_nid);
+                       if (rc != 0)
+                               goto getopt_failed;
+                       break;
                case 's': /* source NID/NET */
                        rc = fault_attr_nid_parse(optarg, &attr.fa_src);
                        if (rc != 0)
@@ -1428,6 +1435,11 @@ fault_simul_rule_add(__u32 opc, char *name, int argc, char **argv)
                        }
                        break;
 
+               case 'x':
+                       if (opc == LNET_CTL_DROP_ADD)
+                               attr.u.drop.da_drop_all = true;
+                       break;
+
                case 'n':
                        if (opc == LNET_CTL_DROP_ADD)
                                attr.u.drop.da_random = true;
@@ -1502,6 +1514,9 @@ fault_simul_rule_add(__u32 opc, char *name, int argc, char **argv)
                return -1;
        }
 
+       if (attr.fa_local_nid == 0)
+               attr.fa_local_nid = LNET_NID_ANY;
+
        data.ioc_flags = opc;
        data.ioc_inllen1 = sizeof(attr);
        data.ioc_inlbuf1 = (char *)&attr;