Whamcloud - gitweb
LU-13606 lnet: Allow router to forward to healthier NID 98/38798/6
authorChris Horn <chris.horn@hpe.com>
Tue, 26 May 2020 18:47:50 +0000 (13:47 -0500)
committerOleg Drokin <green@whamcloud.com>
Fri, 10 Jul 2020 16:51:32 +0000 (16:51 +0000)
When a final-hop router (aka edge router) is forwarding a message,
if both the originator and destination of the message are mutli-rail
capable, then allow the router to choose a new destination lpni if
the one selected by the message originator is unhealthy or down.

Test-Parameters: trivial
HPE-bug-id: LUS-8905
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Change-Id: I4676edc0395584c9a8c396930f2db3d6ffd99eba
Reviewed-on: https://review.whamcloud.com/38798
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Amir Shehata <ashehata@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/include/lnet/lib-lnet.h
lnet/lnet/lib-move.c

index 1c690cb..f4adc2b 100644 (file)
@@ -891,8 +891,8 @@ lnet_peer_needs_push(struct lnet_peer *lp)
 }
 
 /*
- * A peer is alive if it satisfies the following two conditions:
- *  1. peer health >= LNET_MAX_HEALTH_VALUE * router_sensitivity_percentage
+ * A peer NI is alive if it satisfies the following two conditions:
+ *  1. peer NI health >= LNET_MAX_HEALTH_VALUE * router_sensitivity_percentage
  *  2. the cached NI status received when we discover the peer is UP
  */
 static inline bool
index 9cf30c2..7887494 100644 (file)
@@ -2614,6 +2614,8 @@ lnet_select_pathway(lnet_nid_t src_nid, lnet_nid_t dst_nid,
        int cpt, rc;
        int md_cpt;
        __u32 send_case = 0;
+       bool final_hop;
+       bool mr_forwarding_allowed;
 
        memset(&send_data, 0, sizeof(send_data));
 
@@ -2697,17 +2699,49 @@ again:
        else
                send_case |= REMOTE_DST;
 
+       final_hop = false;
+       if (msg->msg_routing && (send_case & LOCAL_DST))
+               final_hop = true;
+
+       /* Determine whether to allow MR forwarding for this message.
+        * NB: MR forwarding is allowed if the message originator and the
+        * destination are both MR capable, and the destination lpni that was
+        * originally chosen by the originator is unhealthy or down.
+        * We check the MR capability of the destination further below
+        */
+       mr_forwarding_allowed = false;
+       if (final_hop) {
+               struct lnet_peer *src_lp;
+               struct lnet_peer_ni *src_lpni;
+
+               src_lpni = lnet_nid2peerni_locked(msg->msg_hdr.src_nid,
+                                                 LNET_NID_ANY, cpt);
+               /* We don't fail the send if we hit any errors here. We'll just
+                * try to send it via non-multi-rail criteria
+                */
+               if (!IS_ERR(src_lpni)) {
+                       src_lp = lpni->lpni_peer_net->lpn_peer;
+                       if (lnet_peer_is_multi_rail(src_lp) &&
+                           !lnet_is_peer_ni_alive(lpni))
+                               mr_forwarding_allowed = true;
+
+               }
+               CDEBUG(D_NET, "msg %p MR forwarding %s\n", msg,
+                      mr_forwarding_allowed ? "allowed" : "not allowed");
+       }
+
        /*
         * Deal with the peer as NMR in the following cases:
         * 1. the peer is NMR
         * 2. We're trying to recover a specific peer NI
-        * 3. I'm a router sending to the final destination
+        * 3. I'm a router sending to the final destination and MR forwarding is
+        *    not allowed for this message (as determined above).
         *    In this case the source of the message would've
         *    already selected the final destination so my job
         *    is to honor the selection.
         */
        if (!lnet_peer_is_multi_rail(peer) || msg->msg_recovery ||
-           (msg->msg_routing && (send_case & LOCAL_DST)))
+           (final_hop && !mr_forwarding_allowed))
                send_case |= NMR_DST;
        else
                send_case |= MR_DST;