Whamcloud - gitweb
LU-14206 lnet: Router ping timeout with discovery disabled
[fs/lustre-release.git] / lnet / lnet / router.c
index 35f8c5d..e5dd77b 100644 (file)
@@ -309,7 +309,7 @@ bool lnet_is_route_alive(struct lnet_route *route)
         * enabled.
         */
        if (lnet_is_discovery_disabled(gw))
-               return route->lr_alive;
+               return atomic_read(&route->lr_alive) == 1;
 
        /*
         * check the gateway's interfaces on the local network
@@ -326,7 +326,8 @@ bool lnet_is_route_alive(struct lnet_route *route)
         * that the remote net must exist on the gateway. For multi-hop
         * routes the next-hop will not have the remote net.
         */
-       if (avoid_asym_router_failure && route->lr_single_hop) {
+       if (avoid_asym_router_failure &&
+           (route->lr_hops == 1 || route->lr_hops == LNET_UNDEFINED_HOPS)) {
                rlpn = lnet_peer_get_net_locked(gw, route->lr_net);
                if (!rlpn)
                        return false;
@@ -377,7 +378,8 @@ lnet_consolidate_routes_locked(struct lnet_peer *orig_lp,
 static inline void
 lnet_check_route_inconsistency(struct lnet_route *route)
 {
-       if (!route->lr_single_hop && (int)route->lr_hops <= 1) {
+       if (!route->lr_single_hop &&
+           (route->lr_hops == 1 || route->lr_hops == LNET_UNDEFINED_HOPS)) {
                CWARN("route %s->%s is detected to be multi-hop but hop count is set to %d\n",
                        libcfs_net2str(route->lr_net),
                        libcfs_nid2str(route->lr_gateway->lp_primary_nid),
@@ -401,20 +403,7 @@ lnet_set_route_hop_type(struct lnet_peer *gw, struct lnet_route *route)
        lnet_check_route_inconsistency(route);
 }
 
-static inline void
-lnet_set_route_aliveness(struct lnet_route *route, bool alive)
-{
-       /* Log when there's a state change */
-       if (route->lr_alive != alive) {
-               CERROR("route to %s through %s has gone from %s to %s\n",
-                      libcfs_net2str(route->lr_net),
-                      libcfs_nid2str(route->lr_gateway->lp_primary_nid),
-                      (route->lr_alive) ? "up" : "down",
-                      alive ? "up" : "down");
-               route->lr_alive = alive;
-       }
-}
-
+/* Must hold net_lock/EX */
 void
 lnet_router_discovery_ping_reply(struct lnet_peer *lp)
 {
@@ -491,7 +480,9 @@ lnet_router_discovery_ping_reply(struct lnet_peer *lp)
                }
 
                route->lr_single_hop = single_hop;
-               if (avoid_asym_router_failure && single_hop)
+               if (avoid_asym_router_failure &&
+                   (route->lr_hops == 1 ||
+                    route->lr_hops == LNET_UNDEFINED_HOPS))
                        lnet_set_route_aliveness(route, net_up);
                else
                        lnet_set_route_aliveness(route, true);
@@ -516,11 +507,11 @@ lnet_router_discovery_complete(struct lnet_peer *lp)
        lp->lp_alive = lp->lp_dc_error == 0;
        spin_unlock(&lp->lp_lock);
 
-       /* ping replies are being handled when discovery is disabled */
-       if (lnet_is_discovery_disabled_locked(lp))
-               return;
-
        if (!lp->lp_dc_error) {
+               /* ping replies are being handled when discovery is disabled */
+               if (lnet_is_discovery_disabled_locked(lp))
+                       return;
+
                /*
                * mark single-hop routes.  If the remote net is not configured on
                * the gateway we assume this is intentional and we mark the
@@ -682,7 +673,7 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
 
        if (gateway == LNET_NID_ANY ||
            gateway == LNET_NID_LO_0 ||
-           net == LNET_NIDNET(LNET_NID_ANY) ||
+           net == LNET_NET_ANY ||
            LNET_NETTYP(net) == LOLND ||
            LNET_NIDNET(gateway) == net ||
            (hops != LNET_UNDEFINED_HOPS && (hops < 1 || hops > 255)))
@@ -720,6 +711,10 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
        route->lr_nid = gateway;
        route->lr_priority = priority;
        route->lr_hops = hops;
+       if (lnet_peers_start_down())
+               atomic_set(&route->lr_alive, 0);
+       else
+               atomic_set(&route->lr_alive, 1);
 
        lnet_net_lock(LNET_LOCK_EX);
 
@@ -865,7 +860,7 @@ lnet_del_route(__u32 net, lnet_nid_t gw_nid)
                lnet_peer_ni_decref_locked(lpni);
        }
 
-       if (net != LNET_NIDNET(LNET_NID_ANY)) {
+       if (net != LNET_NET_ANY) {
                rnet = lnet_find_rnet_locked(net);
                if (!rnet) {
                        lnet_net_unlock(LNET_LOCK_EX);
@@ -923,7 +918,7 @@ delete_zombies:
 void
 lnet_destroy_routes (void)
 {
-       lnet_del_route(LNET_NIDNET(LNET_NID_ANY), LNET_NID_ANY);
+       lnet_del_route(LNET_NET_ANY, LNET_NID_ANY);
 }
 
 int lnet_get_rtr_pool_cfg(int cpt, struct lnet_ioctl_pool_cfg *pool_cfg)
@@ -1049,15 +1044,9 @@ lnet_net_set_status_locked(struct lnet_net *net, __u32 status)
        struct lnet_ni *ni;
        bool update = false;
 
-       list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
-               lnet_ni_lock(ni);
-               if (ni->ni_status &&
-                   ni->ni_status->ns_status != status) {
-                   ni->ni_status->ns_status = status;
-                   update = true;
-               }
-               lnet_ni_unlock(ni);
-       }
+       list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
+               if (lnet_ni_set_status(ni, status))
+                       update = true;
 
        return update;
 }
@@ -1066,6 +1055,7 @@ static bool
 lnet_update_ni_status_locked(void)
 {
        struct lnet_net *net;
+       struct lnet_ni *ni;
        bool push = false;
        time64_t now;
        time64_t timeout;
@@ -1080,13 +1070,13 @@ lnet_update_ni_status_locked(void)
                        continue;
 
                if (now < net->net_last_alive + timeout)
-                       continue;
+                       goto check_ni_fatal;
 
                spin_lock(&net->net_lock);
                /* re-check with lock */
                if (now < net->net_last_alive + timeout) {
                        spin_unlock(&net->net_lock);
-                       continue;
+                       goto check_ni_fatal;
                }
                spin_unlock(&net->net_lock);
 
@@ -1095,7 +1085,25 @@ lnet_update_ni_status_locked(void)
                 * timeout on any of its constituent NIs, then mark all
                 * the NIs down.
                 */
-               push = lnet_net_set_status_locked(net, LNET_NI_STATUS_DOWN);
+               if (lnet_net_set_status_locked(net, LNET_NI_STATUS_DOWN)) {
+                       push = true;
+                       continue;
+               }
+
+check_ni_fatal:
+               list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+                       /* lnet_ni_set_status() will perform the same check of
+                        * ni_status while holding the ni lock. We can safely
+                        * check ni_status without that lock because it is only
+                        * written to under net_lock/EX and our caller is
+                        * holding a net lock.
+                        */
+                       if (atomic_read(&ni->ni_fatal_error_on) &&
+                           ni->ni_status &&
+                           ni->ni_status->ns_status != LNET_NI_STATUS_DOWN &&
+                           lnet_ni_set_status(ni, LNET_NI_STATUS_DOWN))
+                               push = true;
+               }
        }
 
        return push;
@@ -1115,8 +1123,7 @@ void lnet_wait_router_start(void)
  * This function is called from the monitor thread to check if there are
  * any active routers that need to be checked.
  */
-inline bool
-lnet_router_checker_active(void)
+bool lnet_router_checker_active(void)
 {
        /* Router Checker thread needs to run when routing is enabled in
         * order to call lnet_update_ni_status_locked() */
@@ -1758,6 +1765,31 @@ lnet_notify(struct lnet_ni *ni, lnet_nid_t nid, bool alive, bool reset,
 
        /* recalculate aliveness */
        alive = lnet_is_peer_ni_alive(lpni);
+
+       lp = lpni->lpni_peer_net->lpn_peer;
+       /* If this is an LNet router then update route aliveness */
+       if (lp->lp_rtr_refcount) {
+               if (reset)
+                       /* reset flag indicates gateway peer went up or down */
+                       lp->lp_alive = alive;
+
+               /* If discovery is disabled, locally or on the gateway, then
+                * any routes using lpni as next-hop need to be updated
+                *
+                * NB: We can get many notifications while a route is down, so
+                * we try and avoid the expensive net_lock/EX here for the
+                * common case of receiving duplicate lnet_notify() calls (i.e.
+                * only grab EX lock when we actually need to update the route
+                * aliveness).
+                */
+               if (lnet_is_discovery_disabled(lp)) {
+                       list_for_each_entry(route, &lp->lp_routes, lr_gwlist) {
+                               if (route->lr_nid == lpni->lpni_nid)
+                                       lnet_set_route_aliveness(route, alive);
+                       }
+               }
+       }
+
        lnet_net_unlock(0);
 
        if (ni != NULL && !alive)
@@ -1766,12 +1798,6 @@ lnet_notify(struct lnet_ni *ni, lnet_nid_t nid, bool alive, bool reset,
        cpt = lpni->lpni_cpt;
        lnet_net_lock(cpt);
        lnet_peer_ni_decref_locked(lpni);
-       if (lpni && lpni->lpni_peer_net && lpni->lpni_peer_net->lpn_peer) {
-               lp = lpni->lpni_peer_net->lpn_peer;
-               lp->lp_alive = alive;
-               list_for_each_entry(route, &lp->lp_routes, lr_gwlist)
-                       lnet_set_route_aliveness(route, alive);
-       }
        lnet_net_unlock(cpt);
 
        return 0;