Whamcloud - gitweb
LU-13713 lnet: check rtr_nid is a gateway 75/39175/2
authorAmir Shehata <ashehata@whamcloud.com>
Wed, 24 Jun 2020 23:46:58 +0000 (16:46 -0700)
committerOleg Drokin <green@whamcloud.com>
Fri, 10 Jul 2020 16:53:06 +0000 (16:53 +0000)
The rtr_nid is specified for all REPLY/ACK. However it is possible
for the route through the gateway specified by rtr_nid to be removed.
In this case we don't want to use it. We should lookup alternative
paths.

This patch checks if the peer looked up is indeed a gateway. If it's
not a gateway then we attempt to find another path. There is no need
to fail right away. It's not a hard requirement to fail if the default
rtr_nid is not valid.

Test-Parameters: trivial
Signed-off-by: Amir Shehata <ashehata@whamcloud.com>
Change-Id: Ic1c93b7c6c2c8060e2cfeb8fb1cb875dbc3010f7
Reviewed-on: https://review.whamcloud.com/39175
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/lnet/lib-move.c

index 7887494..7baef67 100644 (file)
@@ -1981,6 +1981,7 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
        struct lnet_route *last_route = NULL;
        struct lnet_peer_ni *lpni = NULL;
        struct lnet_peer_ni *gwni = NULL;
+       bool route_found = false;
        lnet_nid_t src_nid = (sd->sd_src_nid != LNET_NID_ANY) ? sd->sd_src_nid :
                (sd->sd_best_ni != NULL) ? sd->sd_best_ni->ni_nid :
                LNET_NID_ANY;
@@ -1994,15 +1995,20 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
         */
        if (sd->sd_rtr_nid != LNET_NID_ANY) {
                gwni = lnet_find_peer_ni_locked(sd->sd_rtr_nid);
-               if (!gwni) {
-                       CERROR("No peer NI for gateway %s\n",
+               if (gwni) {
+                       gw = gwni->lpni_peer_net->lpn_peer;
+                       lnet_peer_ni_decref_locked(gwni);
+                       if (gw->lp_rtr_refcount) {
+                               local_lnet = LNET_NIDNET(sd->sd_rtr_nid);
+                               route_found = true;
+                       }
+               } else {
+                       CWARN("No peer NI for gateway %s. Attempting to find an alternative route.\n",
                               libcfs_nid2str(sd->sd_rtr_nid));
-                       return -EHOSTUNREACH;
                }
-               gw = gwni->lpni_peer_net->lpn_peer;
-               lnet_peer_ni_decref_locked(gwni);
-               local_lnet = LNET_NIDNET(sd->sd_rtr_nid);
-       } else {
+       }
+
+       if (!route_found) {
                /* we've already looked up the initial lpni using dst_nid */
                lpni = sd->sd_best_lpni;
                /* the peer tree must be in existence */