Whamcloud - gitweb
LU-6142 lnet: use list_first_entry() in lnet/lnet subdirectory.
[fs/lustre-release.git] / lnet / lnet / router.c
index 3e90c30..d581fa2 100644 (file)
@@ -120,7 +120,8 @@ MODULE_PARM_DESC(router_sensitivity_percentage,
 
 static void lnet_add_route_to_rnet(struct lnet_remotenet *rnet,
                                   struct lnet_route *route);
-static void lnet_del_route_from_rnet(lnet_nid_t gw_nid, struct list_head *route_list,
+static void lnet_del_route_from_rnet(struct lnet_nid *gw_nid,
+                                    struct list_head *route_list,
                                     struct list_head *zombies);
 
 static int
@@ -174,24 +175,25 @@ lnet_move_route(struct lnet_route *route, struct lnet_peer *lp,
 
        CDEBUG(D_NET, "deleting route %s->%s\n",
               libcfs_net2str(route->lr_net),
-              libcfs_nid2str(route->lr_nid));
+              libcfs_nidstr(&route->lr_nid));
 
        /*
         * use the gateway's lp_primary_nid to delete the route as the
         * lr_nid can be a constituent NID of the peer
         */
-       lnet_del_route_from_rnet(route->lr_gateway->lp_primary_nid,
-                                &rnet->lrn_routes, l);
+       lnet_del_route_from_rnet(
+               &route->lr_gateway->lp_primary_nid,
+               &rnet->lrn_routes, l);
 
        if (lp) {
                route = list_first_entry(l, struct lnet_route,
-                                       lr_list);
+                                        lr_list);
                route->lr_gateway = lp;
                lnet_add_route_to_rnet(rnet, route);
        } else {
                while (!list_empty(l) && !rt_list) {
                        route = list_first_entry(l, struct lnet_route,
-                                lr_list);
+                                                lr_list);
                        list_del(&route->lr_list);
                        LIBCFS_FREE(route, sizeof(*route));
                }
@@ -206,12 +208,13 @@ lnet_rtr_transfer_to_peer(struct lnet_peer *src, struct lnet_peer *target)
 
        lnet_net_lock(LNET_LOCK_EX);
        CDEBUG(D_NET, "transfering routes from %s -> %s\n",
-              libcfs_nid2str(src->lp_primary_nid),
-              libcfs_nid2str(target->lp_primary_nid));
+              libcfs_nidstr(&src->lp_primary_nid),
+              libcfs_nidstr(&target->lp_primary_nid));
        list_for_each_entry(route, &src->lp_routes, lr_gwlist) {
-               CDEBUG(D_NET, "%s: %s->%s\n", libcfs_nid2str(src->lp_primary_nid),
+               CDEBUG(D_NET, "%s: %s->%s\n",
+                      libcfs_nidstr(&src->lp_primary_nid),
                       libcfs_net2str(route->lr_net),
-                      libcfs_nid2str(route->lr_nid));
+                      libcfs_nidstr(&route->lr_nid));
        }
        list_splice_init(&src->lp_rtrq, &target->lp_rtrq);
        list_for_each_entry_safe(route, tmp, &src->lp_routes, lr_gwlist) {
@@ -309,7 +312,7 @@ bool lnet_is_route_alive(struct lnet_route *route)
         * enabled.
         */
        if (lnet_is_discovery_disabled(gw))
-               return route->lr_alive;
+               return atomic_read(&route->lr_alive) == 1;
 
        /*
         * check the gateway's interfaces on the local network
@@ -326,7 +329,8 @@ bool lnet_is_route_alive(struct lnet_route *route)
         * that the remote net must exist on the gateway. For multi-hop
         * routes the next-hop will not have the remote net.
         */
-       if (avoid_asym_router_failure && route->lr_single_hop) {
+       if (avoid_asym_router_failure &&
+           (route->lr_hops == 1 || route->lr_single_hop)) {
                rlpn = lnet_peer_get_net_locked(gw, route->lr_net);
                if (!rlpn)
                        return false;
@@ -339,7 +343,7 @@ bool lnet_is_route_alive(struct lnet_route *route)
                spin_unlock(&gw->lp_lock);
                if (gw->lp_rtr_refcount > 0)
                        CERROR("peer %s is being used as a gateway but routing feature is not turned on\n",
-                              libcfs_nid2str(gw->lp_primary_nid));
+                              libcfs_nidstr(&gw->lp_primary_nid));
                return false;
        }
        spin_unlock(&gw->lp_lock);
@@ -365,7 +369,7 @@ lnet_consolidate_routes_locked(struct lnet_peer *orig_lp,
         * intent here is not to confuse the user who added the route.
         */
        list_for_each_entry(route, &orig_lp->lp_routes, lr_gwlist) {
-               lpni = lnet_peer_get_ni_locked(orig_lp, route->lr_nid);
+               lpni = lnet_peer_ni_get_locked(orig_lp, &route->lr_nid);
                if (!lpni) {
                        lnet_net_lock(LNET_LOCK_EX);
                        list_move(&route->lr_gwlist, &new_lp->lp_routes);
@@ -377,10 +381,11 @@ lnet_consolidate_routes_locked(struct lnet_peer *orig_lp,
 static inline void
 lnet_check_route_inconsistency(struct lnet_route *route)
 {
-       if (!route->lr_single_hop && (int)route->lr_hops <= 1) {
+       if (!route->lr_single_hop &&
+           (route->lr_hops == 1 || route->lr_hops == LNET_UNDEFINED_HOPS)) {
                CWARN("route %s->%s is detected to be multi-hop but hop count is set to %d\n",
                        libcfs_net2str(route->lr_net),
-                       libcfs_nid2str(route->lr_gateway->lp_primary_nid),
+                       libcfs_nidstr(&route->lr_gateway->lp_primary_nid),
                        (int) route->lr_hops);
        }
 }
@@ -401,20 +406,7 @@ lnet_set_route_hop_type(struct lnet_peer *gw, struct lnet_route *route)
        lnet_check_route_inconsistency(route);
 }
 
-static inline void
-lnet_set_route_aliveness(struct lnet_route *route, bool alive)
-{
-       /* Log when there's a state change */
-       if (route->lr_alive != alive) {
-               CERROR("route to %s through %s has gone from %s to %s\n",
-                      libcfs_net2str(route->lr_net),
-                      libcfs_nid2str(route->lr_gateway->lp_primary_nid),
-                      (route->lr_alive) ? "up" : "down",
-                      alive ? "up" : "down");
-               route->lr_alive = alive;
-       }
-}
-
+/* Must hold net_lock/EX */
 void
 lnet_router_discovery_ping_reply(struct lnet_peer *lp)
 {
@@ -442,7 +434,7 @@ lnet_router_discovery_ping_reply(struct lnet_peer *lp)
        if (lp_state & LNET_PEER_PING_FAILED ||
            pbuf->pb_info.pi_features & LNET_PING_FEAT_RTE_DISABLED) {
                CDEBUG(D_NET, "Set routes down for gw %s because %s %d\n",
-                      libcfs_nid2str(lp->lp_primary_nid),
+                      libcfs_nidstr(&lp->lp_primary_nid),
                       lp_state & LNET_PEER_PING_FAILED ? "ping failed" :
                       "route feature is disabled", lp->lp_ping_error);
                /* If the ping failed or the peer has routing disabled then
@@ -454,7 +446,7 @@ lnet_router_discovery_ping_reply(struct lnet_peer *lp)
        }
 
        CDEBUG(D_NET, "Discovery is disabled. Processing reply for gw: %s:%d\n",
-              libcfs_nid2str(lp->lp_primary_nid), pbuf->pb_info.pi_nnis);
+              libcfs_nidstr(&lp->lp_primary_nid), pbuf->pb_info.pi_nnis);
 
        /*
         * examine the ping response to determine if the routes on that
@@ -491,7 +483,8 @@ lnet_router_discovery_ping_reply(struct lnet_peer *lp)
                }
 
                route->lr_single_hop = single_hop;
-               if (avoid_asym_router_failure && single_hop)
+               if (avoid_asym_router_failure &&
+                   (route->lr_hops == 1 || route->lr_single_hop))
                        lnet_set_route_aliveness(route, net_up);
                else
                        lnet_set_route_aliveness(route, true);
@@ -516,18 +509,20 @@ lnet_router_discovery_complete(struct lnet_peer *lp)
        lp->lp_alive = lp->lp_dc_error == 0;
        spin_unlock(&lp->lp_lock);
 
-       /* ping replies are being handled when discovery is disabled */
-       if (lnet_is_discovery_disabled_locked(lp))
-               return;
-
        if (!lp->lp_dc_error) {
+               /* ping replies are being handled when discovery is disabled */
+               if (lnet_is_discovery_disabled_locked(lp))
+                       return;
+
                /*
                * mark single-hop routes.  If the remote net is not configured on
                * the gateway we assume this is intentional and we mark the
                * gateway as multi-hop
                */
-               list_for_each_entry(route, &lp->lp_routes, lr_gwlist)
+               list_for_each_entry(route, &lp->lp_routes, lr_gwlist) {
+                       lnet_set_route_aliveness(route, true);
                        lnet_set_route_hop_type(lp, route);
+               }
 
                return;
        }
@@ -542,7 +537,7 @@ lnet_router_discovery_complete(struct lnet_peer *lp)
         * determine otherwise.
         */
        CDEBUG(D_NET, "%s: Router discovery failed %d\n",
-              libcfs_nid2str(lp->lp_primary_nid), lp->lp_dc_error);
+              libcfs_nidstr(&lp->lp_primary_nid), lp->lp_dc_error);
        while ((lpni = lnet_get_next_peer_ni_locked(lp, NULL, lpni)) != NULL)
                lpni->lpni_ns_status = LNET_NI_STATUS_DOWN;
 
@@ -587,15 +582,12 @@ struct lnet_remotenet *
 lnet_find_rnet_locked(__u32 net)
 {
        struct lnet_remotenet *rnet;
-       struct list_head *tmp;
        struct list_head *rn_list;
 
        LASSERT(the_lnet.ln_state == LNET_STATE_RUNNING);
 
        rn_list = lnet_net2rnethash(net);
-       list_for_each(tmp, rn_list) {
-               rnet = list_entry(tmp, struct lnet_remotenet, lrn_list);
-
+       list_for_each_entry(rnet, rn_list, lrn_list) {
                if (rnet->lrn_net == net)
                        return rnet;
        }
@@ -626,6 +618,7 @@ lnet_add_route_to_rnet(struct lnet_remotenet *rnet, struct lnet_route *route)
        unsigned int offset = 0;
        unsigned int len = 0;
        struct list_head *e;
+       time64_t now;
 
        lnet_shuffle_seed();
 
@@ -648,9 +641,10 @@ lnet_add_route_to_rnet(struct lnet_remotenet *rnet, struct lnet_route *route)
         * force a router check on the gateway to make sure the route is
         * alive
         */
+       now = ktime_get_real_seconds();
        list_for_each_entry(lpn, &route->lr_gateway->lp_peer_nets,
                            lpn_peer_nets) {
-               lpn->lpn_rtrcheck_timestamp = 0;
+               lpn->lpn_next_ping = now;
        }
 
        the_lnet.ln_remote_nets_version++;
@@ -663,7 +657,7 @@ lnet_add_route_to_rnet(struct lnet_remotenet *rnet, struct lnet_route *route)
 }
 
 int
-lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
+lnet_add_route(__u32 net, __u32 hops, struct lnet_nid *gateway,
               __u32 priority, __u32 sensitivity)
 {
        struct list_head *route_entry;
@@ -676,13 +670,13 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
        int rc;
 
        CDEBUG(D_NET, "Add route: remote net %s hops %d priority %u gw %s\n",
-              libcfs_net2str(net), hops, priority, libcfs_nid2str(gateway));
+              libcfs_net2str(net), hops, priority, libcfs_nidstr(gateway));
 
-       if (gateway == LNET_NID_ANY ||
-           gateway == LNET_NID_LO_0 ||
-           net == LNET_NIDNET(LNET_NID_ANY) ||
+       if (LNET_NID_IS_ANY(gateway) ||
+           nid_is_lo0(gateway) ||
+           net == LNET_NET_ANY ||
            LNET_NETTYP(net) == LOLND ||
-           LNET_NIDNET(gateway) == net ||
+           LNET_NID_NET(gateway) == net ||
            (hops != LNET_UNDEFINED_HOPS && (hops < 1 || hops > 255)))
                return -EINVAL;
 
@@ -690,10 +684,10 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
        if (lnet_islocalnet(net))
                return -EEXIST;
 
-       if (!lnet_islocalnet(LNET_NIDNET(gateway))) {
+       if (!lnet_islocalnet(LNET_NID_NET(gateway))) {
                CERROR("Cannot add route with gateway %s. There is no local interface configured on LNet %s\n",
-                      libcfs_nid2str(gateway),
-                      libcfs_net2str(LNET_NIDNET(gateway)));
+                      libcfs_nidstr(gateway),
+                      libcfs_net2str(LNET_NID_NET(gateway)));
                return -EHOSTUNREACH;
        }
 
@@ -702,7 +696,7 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
        LIBCFS_ALLOC(rnet, sizeof(*rnet));
        if (route == NULL || rnet == NULL) {
                CERROR("Out of memory creating route %s %d %s\n",
-                      libcfs_net2str(net), hops, libcfs_nid2str(gateway));
+                      libcfs_net2str(net), hops, libcfs_nidstr(gateway));
                if (route != NULL)
                        LIBCFS_FREE(route, sizeof(*route));
                if (rnet != NULL)
@@ -713,11 +707,15 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
        INIT_LIST_HEAD(&rnet->lrn_routes);
        rnet->lrn_net = net;
        /* store the local and remote net that the route represents */
-       route->lr_lnet = LNET_NIDNET(gateway);
+       route->lr_lnet = LNET_NID_NET(gateway);
        route->lr_net = net;
-       route->lr_nid = gateway;
+       route->lr_nid = *gateway;
        route->lr_priority = priority;
        route->lr_hops = hops;
+       if (lnet_peers_start_down())
+               atomic_set(&route->lr_alive, 0);
+       else
+               atomic_set(&route->lr_alive, 1);
 
        lnet_net_lock(LNET_LOCK_EX);
 
@@ -725,7 +723,7 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
         * lnet_nid2peerni_ex() grabs a ref on the lpni. We will need to
         * lose that once we're done
         */
-       lpni = lnet_nid2peerni_ex(gateway, LNET_LOCK_EX);
+       lpni = lnet_nid2peerni_ex(gateway);
        if (IS_ERR(lpni)) {
                lnet_net_unlock(LNET_LOCK_EX);
 
@@ -735,11 +733,13 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
                rc = PTR_ERR(lpni);
                CERROR("Error %d creating route %s %d %s\n", rc,
                        libcfs_net2str(net), hops,
-                       libcfs_nid2str(gateway));
+                       libcfs_nidstr(gateway));
                return rc;
        }
 
-       LASSERT(lpni->lpni_peer_net && lpni->lpni_peer_net->lpn_peer);
+       LASSERT(lpni);
+       LASSERT(lpni->lpni_peer_net);
+       LASSERT(lpni->lpni_peer_net->lpn_peer);
        gw = lpni->lpni_peer_net->lpn_peer;
 
        route->lr_gateway = gw;
@@ -763,7 +763,8 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
                }
 
                /* our lookups must be true */
-               LASSERT(route2->lr_gateway->lp_primary_nid != gateway);
+               LASSERT(!nid_same(&route2->lr_gateway->lp_primary_nid,
+                                 gateway));
        }
 
        /*
@@ -778,8 +779,7 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
                gw->lp_health_sensitivity = sensitivity;
                lnet_add_route_to_rnet(rnet2, route);
                if (lnet_peer_discovery_disabled)
-                       CWARN("Consider turning discovery on to enable full "
-                             "Multi-Rail routing functionality\n");
+                       CWARN("Consider turning discovery on to enable full Multi-Rail routing functionality\n");
        }
 
        /*
@@ -788,6 +788,14 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
        lnet_peer_ni_decref_locked(lpni);
        lnet_net_unlock(LNET_LOCK_EX);
 
+       /* If avoid_asym_router_failure is enabled and hop count is not
+        * set to 1 for a route that is actually single-hop, then the
+        * feature will fail to prevent the router from being selected
+        * if it is missing a NI on the remote network due to misconfiguration.
+        */
+       if (avoid_asym_router_failure && hops == LNET_UNDEFINED_HOPS)
+               CWARN("Use hops = 1 for a single-hop route when avoid_asym_router_failure feature is enabled\n");
+
        rc = 0;
 
        if (!add_route) {
@@ -805,7 +813,8 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
 }
 
 void
-lnet_del_route_from_rnet(lnet_nid_t gw_nid, struct list_head *route_list,
+lnet_del_route_from_rnet(struct lnet_nid *gw_nid,
+                        struct list_head *route_list,
                         struct list_head *zombies)
 {
        struct lnet_peer *gateway;
@@ -814,8 +823,7 @@ lnet_del_route_from_rnet(lnet_nid_t gw_nid, struct list_head *route_list,
 
        list_for_each_entry_safe(route, tmp, route_list, lr_list) {
                gateway = route->lr_gateway;
-               if (gw_nid != LNET_NID_ANY &&
-                   gw_nid != gateway->lp_primary_nid)
+               if (gw_nid && !nid_same(gw_nid, &gateway->lp_primary_nid))
                        continue;
 
                /*
@@ -835,7 +843,7 @@ lnet_del_route_from_rnet(lnet_nid_t gw_nid, struct list_head *route_list,
 }
 
 int
-lnet_del_route(__u32 net, lnet_nid_t gw_nid)
+lnet_del_route(__u32 net, struct lnet_nid *gw)
 {
        LIST_HEAD(rnet_zombies);
        struct lnet_remotenet *rnet;
@@ -843,33 +851,38 @@ lnet_del_route(__u32 net, lnet_nid_t gw_nid)
        struct list_head *rn_list;
        struct lnet_peer_ni *lpni;
        struct lnet_route *route;
+       struct lnet_nid gw_nid;
        LIST_HEAD(zombies);
        struct lnet_peer *lp = NULL;
        int i = 0;
 
        CDEBUG(D_NET, "Del route: net %s : gw %s\n",
-              libcfs_net2str(net), libcfs_nid2str(gw_nid));
+              libcfs_net2str(net), libcfs_nidstr(gw));
 
        /* NB Caller may specify either all routes via the given gateway
         * or a specific route entry actual NIDs) */
 
        lnet_net_lock(LNET_LOCK_EX);
 
-       lpni = lnet_find_peer_ni_locked(gw_nid);
+       if (gw)
+               lpni = lnet_peer_ni_find_locked(gw);
+       else
+               lpni = NULL;
        if (lpni) {
                lp = lpni->lpni_peer_net->lpn_peer;
                LASSERT(lp);
                gw_nid = lp->lp_primary_nid;
+               gw = &gw_nid;
                lnet_peer_ni_decref_locked(lpni);
        }
 
-       if (net != LNET_NIDNET(LNET_NID_ANY)) {
+       if (net != LNET_NET_ANY) {
                rnet = lnet_find_rnet_locked(net);
                if (!rnet) {
                        lnet_net_unlock(LNET_LOCK_EX);
                        return -ENOENT;
                }
-               lnet_del_route_from_rnet(gw_nid, &rnet->lrn_routes,
+               lnet_del_route_from_rnet(gw, &rnet->lrn_routes,
                                         &zombies);
                if (list_empty(&rnet->lrn_routes))
                        list_move(&rnet->lrn_list, &rnet_zombies);
@@ -880,7 +893,7 @@ lnet_del_route(__u32 net, lnet_nid_t gw_nid)
                rn_list = &the_lnet.ln_remote_nets_hash[i];
 
                list_for_each_entry_safe(rnet, tmp, rn_list, lrn_list) {
-                       lnet_del_route_from_rnet(gw_nid, &rnet->lrn_routes,
+                       lnet_del_route_from_rnet(gw, &rnet->lrn_routes,
                                                 &zombies);
                        if (list_empty(&rnet->lrn_routes))
                                list_move(&rnet->lrn_list, &rnet_zombies);
@@ -919,9 +932,9 @@ delete_zombies:
 }
 
 void
-lnet_destroy_routes (void)
+lnet_destroy_routes(void)
 {
-       lnet_del_route(LNET_NIDNET(LNET_NID_ANY), LNET_NID_ANY);
+       lnet_del_route(LNET_NET_ANY, NULL);
 }
 
 int lnet_get_rtr_pool_cfg(int cpt, struct lnet_ioctl_pool_cfg *pool_cfg)
@@ -957,14 +970,12 @@ int lnet_get_rtr_pool_cfg(int cpt, struct lnet_ioctl_pool_cfg *pool_cfg)
 }
 
 int
-lnet_get_route(int idx, __u32 *net, __u32 *hops,
-              lnet_nid_t *gateway, __u32 *flags, __u32 *priority, __u32 *sensitivity)
+lnet_get_route(int idx, __u32 *net, __u32 *hops, lnet_nid_t *gateway,
+              __u32 *flags, __u32 *priority, __u32 *sensitivity)
 {
        struct lnet_remotenet *rnet;
        struct list_head *rn_list;
        struct lnet_route *route;
-       struct list_head *e1;
-       struct list_head *e2;
        int cpt;
        int i;
 
@@ -972,16 +983,11 @@ lnet_get_route(int idx, __u32 *net, __u32 *hops,
 
        for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
                rn_list = &the_lnet.ln_remote_nets_hash[i];
-               list_for_each(e1, rn_list) {
-                       rnet = list_entry(e1, struct lnet_remotenet, lrn_list);
-
-                       list_for_each(e2, &rnet->lrn_routes) {
-                               route = list_entry(e2, struct lnet_route,
-                                                  lr_list);
-
+               list_for_each_entry(rnet, rn_list, lrn_list) {
+                       list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
                                if (idx-- == 0) {
                                        *net      = rnet->lrn_net;
-                                       *gateway  = route->lr_nid;
+                                       *gateway  = lnet_nid_to_nid4(&route->lr_nid);
                                        *hops     = route->lr_hops;
                                        *priority = route->lr_priority;
                                        *sensitivity = route->lr_gateway->
@@ -1009,7 +1015,6 @@ static void
 lnet_wait_known_routerstate(void)
 {
        struct lnet_peer *rtr;
-       struct list_head *entry;
        int all_known;
 
        LASSERT(the_lnet.ln_mt_state == LNET_MT_STATE_RUNNING);
@@ -1018,10 +1023,7 @@ lnet_wait_known_routerstate(void)
                int cpt = lnet_net_lock_current();
 
                all_known = 1;
-               list_for_each(entry, &the_lnet.ln_routers) {
-                       rtr = list_entry(entry, struct lnet_peer,
-                                        lp_rtr_list);
-
+               list_for_each_entry(rtr, &the_lnet.ln_routers, lp_rtr_list) {
                        spin_lock(&rtr->lp_lock);
 
                        if ((rtr->lp_state & LNET_PEER_RTR_DISCOVERED) == 0) {
@@ -1047,15 +1049,9 @@ lnet_net_set_status_locked(struct lnet_net *net, __u32 status)
        struct lnet_ni *ni;
        bool update = false;
 
-       list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
-               lnet_ni_lock(ni);
-               if (ni->ni_status &&
-                   ni->ni_status->ns_status != status) {
-                   ni->ni_status->ns_status = status;
-                   update = true;
-               }
-               lnet_ni_unlock(ni);
-       }
+       list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
+               if (lnet_ni_set_status(ni, status))
+                       update = true;
 
        return update;
 }
@@ -1064,6 +1060,7 @@ static bool
 lnet_update_ni_status_locked(void)
 {
        struct lnet_net *net;
+       struct lnet_ni *ni;
        bool push = false;
        time64_t now;
        time64_t timeout;
@@ -1072,19 +1069,19 @@ lnet_update_ni_status_locked(void)
 
        timeout = router_ping_timeout + alive_router_check_interval;
 
-       now = ktime_get_real_seconds();
+       now = ktime_get_seconds();
        list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
                if (net->net_lnd->lnd_type == LOLND)
                        continue;
 
                if (now < net->net_last_alive + timeout)
-                       continue;
+                       goto check_ni_fatal;
 
                spin_lock(&net->net_lock);
                /* re-check with lock */
                if (now < net->net_last_alive + timeout) {
                        spin_unlock(&net->net_lock);
-                       continue;
+                       goto check_ni_fatal;
                }
                spin_unlock(&net->net_lock);
 
@@ -1093,7 +1090,25 @@ lnet_update_ni_status_locked(void)
                 * timeout on any of its constituent NIs, then mark all
                 * the NIs down.
                 */
-               push = lnet_net_set_status_locked(net, LNET_NI_STATUS_DOWN);
+               if (lnet_net_set_status_locked(net, LNET_NI_STATUS_DOWN)) {
+                       push = true;
+                       continue;
+               }
+
+check_ni_fatal:
+               list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+                       /* lnet_ni_set_status() will perform the same check of
+                        * ni_status while holding the ni lock. We can safely
+                        * check ni_status without that lock because it is only
+                        * written to under net_lock/EX and our caller is
+                        * holding a net lock.
+                        */
+                       if (atomic_read(&ni->ni_fatal_error_on) &&
+                           ni->ni_status &&
+                           ni->ni_status->ns_status != LNET_NI_STATUS_DOWN &&
+                           lnet_ni_set_status(ni, LNET_NI_STATUS_DOWN))
+                               push = true;
+               }
        }
 
        return push;
@@ -1113,8 +1128,7 @@ void lnet_wait_router_start(void)
  * This function is called from the monitor thread to check if there are
  * any active routers that need to be checked.
  */
-inline bool
-lnet_router_checker_active(void)
+bool lnet_router_checker_active(void)
 {
        /* Router Checker thread needs to run when routing is enabled in
         * order to call lnet_update_ni_status_locked() */
@@ -1128,12 +1142,12 @@ lnet_router_checker_active(void)
 void
 lnet_check_routers(void)
 {
-       struct lnet_peer_net *first_lpn = NULL;
+       struct lnet_peer_net *first_lpn;
        struct lnet_peer_net *lpn;
        struct lnet_peer_ni *lpni;
-       struct list_head *entry;
        struct lnet_peer *rtr;
        bool push = false;
+       bool needs_ping;
        bool found_lpn;
        __u64 version;
        __u32 net_id;
@@ -1145,54 +1159,59 @@ lnet_check_routers(void)
 rescan:
        version = the_lnet.ln_routers_version;
 
-       list_for_each(entry, &the_lnet.ln_routers) {
-               rtr = list_entry(entry, struct lnet_peer,
-                                lp_rtr_list);
+       list_for_each_entry(rtr, &the_lnet.ln_routers, lp_rtr_list) {
+               /* If we're currently discovering the peer then don't
+                * issue another discovery
+                */
+               if (rtr->lp_state & LNET_PEER_RTR_DISCOVERY)
+                       continue;
 
                now = ktime_get_real_seconds();
 
-               /*
-                * only discover the router if we've passed
-                * alive_router_check_interval seconds. Some of the router
-                * interfaces could be down and in that case they would be
-                * undergoing recovery separately from this discovery.
-                */
-               /* find next peer net which is also local */
+               /* find the next local peer net which needs to be ping'd */
+               needs_ping = false;
+               first_lpn = NULL;
+               found_lpn = false;
                net_id = rtr->lp_disc_net_id;
                do {
                        lpn = lnet_get_next_peer_net_locked(rtr, net_id);
                        if (!lpn) {
                                CERROR("gateway %s has no networks\n",
-                               libcfs_nid2str(rtr->lp_primary_nid));
+                               libcfs_nidstr(&rtr->lp_primary_nid));
                                break;
                        }
+
+                       /* We looped back to the first peer net */
                        if (first_lpn == lpn)
                                break;
                        if (!first_lpn)
                                first_lpn = lpn;
-                       found_lpn = lnet_islocalnet_locked(lpn->lpn_net_id);
+
                        net_id = lpn->lpn_net_id;
-               } while (!found_lpn);
+                       if (!lnet_islocalnet_locked(net_id))
+                               continue;
+
+                       found_lpn = true;
+
+                       CDEBUG(D_NET, "rtr %s(%p) %s(%p) next ping %lld\n",
+                              libcfs_nidstr(&rtr->lp_primary_nid), rtr,
+                              libcfs_net2str(net_id), lpn,
+                              lpn->lpn_next_ping);
+
+                       needs_ping = now >= lpn->lpn_next_ping;
+
+               } while (!needs_ping);
 
                if (!found_lpn || !lpn) {
                        CERROR("no local network found for gateway %s\n",
-                              libcfs_nid2str(rtr->lp_primary_nid));
+                              libcfs_nidstr(&rtr->lp_primary_nid));
                        continue;
                }
 
-               if (now - lpn->lpn_rtrcheck_timestamp <
-                   alive_router_check_interval / lnet_current_net_count)
-                      continue;
+               if (!needs_ping)
+                       continue;
 
-               /*
-                * If we're currently discovering the peer then don't
-                * issue another discovery
-                */
                spin_lock(&rtr->lp_lock);
-               if (rtr->lp_state & LNET_PEER_RTR_DISCOVERY) {
-                       spin_unlock(&rtr->lp_lock);
-                       continue;
-               }
                /* make sure we fully discover the router */
                rtr->lp_state &= ~LNET_PEER_NIDS_UPTODATE;
                rtr->lp_state |= LNET_PEER_FORCE_PING | LNET_PEER_FORCE_PUSH |
@@ -1200,10 +1219,10 @@ rescan:
                spin_unlock(&rtr->lp_lock);
 
                /* find the peer_ni associated with the primary NID */
-               lpni = lnet_peer_get_ni_locked(rtr, rtr->lp_primary_nid);
+               lpni = lnet_peer_ni_get_locked(rtr, &rtr->lp_primary_nid);
                if (!lpni) {
                        CDEBUG(D_NET, "Expected to find an lpni for %s, but non found\n",
-                              libcfs_nid2str(rtr->lp_primary_nid));
+                              libcfs_nidstr(&rtr->lp_primary_nid));
                        continue;
                }
                lnet_peer_ni_addref_locked(lpni);
@@ -1213,19 +1232,19 @@ rescan:
 
                /* discover the router */
                CDEBUG(D_NET, "discover %s, cpt = %d\n",
-                      libcfs_nid2str(lpni->lpni_nid), cpt);
+                      libcfs_nidstr(&lpni->lpni_nid), cpt);
                rc = lnet_discover_peer_locked(lpni, cpt, false);
 
-               /* decrement ref count acquired by find_peer_ni_locked() */
+               /* drop ref taken above */
                lnet_peer_ni_decref_locked(lpni);
 
                if (!rc)
-                       lpn->lpn_rtrcheck_timestamp = now;
+                       lpn->lpn_next_ping = now + alive_router_check_interval;
                else
                        CERROR("Failed to discover router %s\n",
-                              libcfs_nid2str(rtr->lp_primary_nid));
+                              libcfs_nidstr(&rtr->lp_primary_nid));
 
-               /* NB dropped lock */
+               /* NB cpt lock was dropped in lnet_discover_peer_locked() */
                if (version != the_lnet.ln_routers_version) {
                        /* the routers list has changed */
                        goto rescan;
@@ -1269,8 +1288,8 @@ lnet_new_rtrbuf(struct lnet_rtrbufpool *rbp, int cpt)
        rb->rb_pool = rbp;
 
        for (i = 0; i < npages; i++) {
-               page = cfs_page_cpt_alloc(lnet_cpt_table(), cpt,
-                                         GFP_KERNEL | __GFP_ZERO);
+               page = cfs_page_cpt_alloc(lnet_cpt_table(), cpt, GFP_KERNEL |
+                                         __GFP_ZERO | __GFP_NORETRY);
                if (page == NULL) {
                        while (--i >= 0)
                                __free_page(rb->rb_kiov[i].bv_page);
@@ -1308,7 +1327,7 @@ lnet_rtrpool_free_bufs(struct lnet_rtrbufpool *rbp, int cpt)
 
        /* Free buffers on the free list. */
        while (!list_empty(&tmp)) {
-               rb = list_entry(tmp.next, struct lnet_rtrbuf, rb_list);
+               rb = list_first_entry(&tmp, struct lnet_rtrbuf, rb_list);
                list_del(&rb->rb_list);
                lnet_destroy_rtrbuf(rb, npages);
        }
@@ -1351,8 +1370,8 @@ lnet_rtrpool_adjust_bufs(struct lnet_rtrbufpool *rbp, int nbufs, int cpt)
        while (num_rb-- > 0) {
                rb = lnet_new_rtrbuf(rbp, cpt);
                if (rb == NULL) {
-                       CERROR("Failed to allocate %d route bufs of %d pages\n",
-                              nbufs, npages);
+                       CERROR("lnet: error allocating %ux%u page router buffers on CPT %u: rc = %d\n",
+                              nbufs, npages, cpt, -ENOMEM);
 
                        lnet_net_lock(cpt);
                        rbp->rbp_req_nbuffers = old_req_nbufs;
@@ -1382,8 +1401,9 @@ lnet_rtrpool_adjust_bufs(struct lnet_rtrbufpool *rbp, int nbufs, int cpt)
        return 0;
 
 failed:
-       while (!list_empty(&rb_list)) {
-               rb = list_entry(rb_list.next, struct lnet_rtrbuf, rb_list);
+       while ((rb = list_first_entry_or_null(&rb_list,
+                                             struct lnet_rtrbuf,
+                                             rb_list)) != NULL) {
                list_del(&rb->rb_list);
                lnet_destroy_rtrbuf(rb, npages);
        }
@@ -1500,9 +1520,11 @@ lnet_rtrpools_alloc(int im_a_router)
        } else if (!strcmp(forwarding, "enabled")) {
                /* explicitly enabled */
        } else {
-               LCONSOLE_ERROR_MSG(0x10b, "'forwarding' not set to either "
-                                  "'enabled' or 'disabled'\n");
-               return -EINVAL;
+               rc = -EINVAL;
+               LCONSOLE_ERROR_MSG(0x10b,
+                                  "lnet: forwarding='%s' not set to either 'enabled' or 'disabled': rc = %d\n",
+                                  forwarding, rc);
+               return rc;
        }
 
        nrb_tiny = lnet_nrb_tiny_calculate();
@@ -1521,30 +1543,32 @@ lnet_rtrpools_alloc(int im_a_router)
                                                LNET_NRBPOOLS *
                                                sizeof(struct lnet_rtrbufpool));
        if (the_lnet.ln_rtrpools == NULL) {
+               rc = -ENOMEM;
                LCONSOLE_ERROR_MSG(0x10c,
-                                  "Failed to initialize router buffe pool\n");
-               return -ENOMEM;
+                       "lnet: error allocating router buffer pool: rc = %d\n",
+                       rc);
+               return rc;
        }
 
        cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
                lnet_rtrpool_init(&rtrp[LNET_TINY_BUF_IDX], 0);
                rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_TINY_BUF_IDX],
                                              nrb_tiny, i);
-               if (rc != 0)
+               if (rc)
                        goto failed;
 
                lnet_rtrpool_init(&rtrp[LNET_SMALL_BUF_IDX],
                                  LNET_NRB_SMALL_PAGES);
                rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_SMALL_BUF_IDX],
                                              nrb_small, i);
-               if (rc != 0)
+               if (rc)
                        goto failed;
 
                lnet_rtrpool_init(&rtrp[LNET_LARGE_BUF_IDX],
                                  LNET_NRB_LARGE_PAGES);
                rc = lnet_rtrpool_adjust_bufs(&rtrp[LNET_LARGE_BUF_IDX],
                                              nrb_large, i);
-               if (rc != 0)
+               if (rc)
                        goto failed;
        }
 
@@ -1669,7 +1693,7 @@ lnet_rtrpools_disable(void)
 }
 
 static inline void
-lnet_notify_peer_down(struct lnet_ni *ni, lnet_nid_t nid)
+lnet_notify_peer_down(struct lnet_ni *ni, struct lnet_nid *nid)
 {
        if (ni->ni_net->net_lnd->lnd_notify_peer_down != NULL)
                (ni->ni_net->net_lnd->lnd_notify_peer_down)(nid);
@@ -1683,36 +1707,36 @@ lnet_notify_peer_down(struct lnet_ni *ni, lnet_nid_t nid)
  * when: notificaiton time.
  */
 int
-lnet_notify(struct lnet_ni *ni, lnet_nid_t nid, bool alive, bool reset,
+lnet_notify(struct lnet_ni *ni, lnet_nid_t nid4, bool alive, bool reset,
            time64_t when)
 {
        struct lnet_peer_ni *lpni = NULL;
        struct lnet_route *route;
        struct lnet_peer *lp;
        time64_t now = ktime_get_seconds();
+       struct lnet_nid nid;
        int cpt;
 
-       LASSERT (!in_interrupt ());
+       lnet_nid4_to_nid(nid4, &nid);
+       LASSERT(!in_interrupt());
 
-       CDEBUG (D_NET, "%s notifying %s: %s\n",
-               (ni == NULL) ? "userspace" : libcfs_nid2str(ni->ni_nid),
-               libcfs_nid2str(nid),
-               alive ? "up" : "down");
+       CDEBUG(D_NET, "%s notifying %s: %s\n",
+              (ni == NULL) ? "userspace" : libcfs_nidstr(&ni->ni_nid),
+              libcfs_nidstr(&nid), alive ? "up" : "down");
 
        if (ni != NULL &&
-           LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid)) {
+           LNET_NID_NET(&ni->ni_nid) != LNET_NID_NET(&nid)) {
                CWARN("Ignoring notification of %s %s by %s (different net)\n",
-                     libcfs_nid2str(nid), alive ? "birth" : "death",
-                     libcfs_nid2str(ni->ni_nid));
+                     libcfs_nidstr(&nid), alive ? "birth" : "death",
+                     libcfs_nidstr(&ni->ni_nid));
                return -EINVAL;
        }
 
        /* can't do predictions... */
        if (when > now) {
-               CWARN("Ignoring prediction from %s of %s %s "
-                     "%lld seconds in the future\n",
-                     (ni == NULL) ? "userspace" : libcfs_nid2str(ni->ni_nid),
-                     libcfs_nid2str(nid), alive ? "up" : "down", when - now);
+               CWARN("Ignoring prediction from %s of %s %s %lld seconds in the future\n",
+                       ni ? libcfs_nidstr(&ni->ni_nid) :  "userspace",
+                       libcfs_nidstr(&nid), alive ? "up" : "down", when - now);
                return -EINVAL;
        }
 
@@ -1730,16 +1754,17 @@ lnet_notify(struct lnet_ni *ni, lnet_nid_t nid, bool alive, bool reset,
                return -ESHUTDOWN;
        }
 
-       lpni = lnet_find_peer_ni_locked(nid);
+       lpni = lnet_peer_ni_find_locked(&nid);
        if (lpni == NULL) {
                /* nid not found */
                lnet_net_unlock(0);
-               CDEBUG(D_NET, "%s not found\n", libcfs_nid2str(nid));
+               CDEBUG(D_NET, "%s not found\n", libcfs_nidstr(&nid));
                return 0;
        }
 
        if (alive) {
                if (reset) {
+                       lpni->lpni_ns_status = LNET_NI_STATUS_UP;
                        lnet_set_lpni_healthv_locked(lpni,
                                                     LNET_MAX_HEALTH_VALUE);
                } else {
@@ -1750,24 +1775,45 @@ lnet_notify(struct lnet_ni *ni, lnet_nid_t nid, bool alive, bool reset,
                                        (sensitivity) ? sensitivity :
                                        lnet_health_sensitivity);
                }
+       } else if (reset) {
+               lpni->lpni_ns_status = LNET_NI_STATUS_DOWN;
        }
 
        /* recalculate aliveness */
        alive = lnet_is_peer_ni_alive(lpni);
+
+       lp = lpni->lpni_peer_net->lpn_peer;
+       /* If this is an LNet router then update route aliveness */
+       if (lp->lp_rtr_refcount) {
+               if (reset)
+                       /* reset flag indicates gateway peer went up or down */
+                       lp->lp_alive = alive;
+
+               /* If discovery is disabled, locally or on the gateway, then
+                * any routes using lpni as next-hop need to be updated
+                *
+                * NB: We can get many notifications while a route is down, so
+                * we try and avoid the expensive net_lock/EX here for the
+                * common case of receiving duplicate lnet_notify() calls (i.e.
+                * only grab EX lock when we actually need to update the route
+                * aliveness).
+                */
+               if (lnet_is_discovery_disabled(lp)) {
+                       list_for_each_entry(route, &lp->lp_routes, lr_gwlist) {
+                               if (nid_same(&route->lr_nid, &lpni->lpni_nid))
+                                       lnet_set_route_aliveness(route, alive);
+                       }
+               }
+       }
+
        lnet_net_unlock(0);
 
        if (ni != NULL && !alive)
-               lnet_notify_peer_down(ni, lpni->lpni_nid);
+               lnet_notify_peer_down(ni, &lpni->lpni_nid);
 
        cpt = lpni->lpni_cpt;
        lnet_net_lock(cpt);
        lnet_peer_ni_decref_locked(lpni);
-       if (lpni && lpni->lpni_peer_net && lpni->lpni_peer_net->lpn_peer) {
-               lp = lpni->lpni_peer_net->lpn_peer;
-               lp->lp_alive = alive;
-               list_for_each_entry(route, &lp->lp_routes, lr_gwlist)
-                       lnet_set_route_aliveness(route, alive);
-       }
        lnet_net_unlock(cpt);
 
        return 0;