#define LNET_NRB_LARGE_PAGES ((LNET_MTU + PAGE_SIZE - 1) >> \
PAGE_SHIFT)
-extern unsigned int lnet_current_net_count;
-
static char *forwarding = "";
module_param(forwarding, charp, 0444);
MODULE_PARM_DESC(forwarding, "Explicitly enable/disable forwarding between networks");
}
/*
- * A net is alive if at least one gateway NI on the network is alive.
+ * The peer_net of a gateway is alive if at least one of the peer_ni's on
+ * that peer_net is alive.
*/
static bool
lnet_is_gateway_net_alive(struct lnet_peer_net *lpn)
{
struct lnet_peer_net *lpn;
+ if (!gw->lp_alive)
+ return false;
+
list_for_each_entry(lpn, &gw->lp_peer_nets, lpn_peer_nets) {
if (!lnet_is_gateway_net_alive(lpn))
return false;
struct lnet_peer *gw = route->lr_gateway;
struct lnet_peer_net *llpn;
struct lnet_peer_net *rlpn;
- bool route_alive;
+
+ /* If the gateway is down then all routes are considered down */
+ if (!gw->lp_alive)
+ return false;
/*
* if discovery is disabled then rely on the cached aliveness
* aliveness information can only be obtained when discovery is
* enabled.
*/
- if (lnet_peer_discovery_disabled)
+ if (lnet_is_discovery_disabled(gw))
return route->lr_alive;
/*
- * check the gateway's interfaces on the route rnet to make sure
- * that the gateway is viable.
+ * check the gateway's interfaces on the local network
*/
llpn = lnet_peer_get_net_locked(gw, route->lr_lnet);
if (!llpn)
return false;
- route_alive = lnet_is_gateway_net_alive(llpn);
+ if (!lnet_is_gateway_net_alive(llpn))
+ return false;
if (avoid_asym_router_failure) {
+ /* Check the gateway's interfaces on the remote network */
rlpn = lnet_peer_get_net_locked(gw, route->lr_net);
if (!rlpn)
return false;
- route_alive = route_alive &&
- lnet_is_gateway_net_alive(rlpn);
+ if (!lnet_is_gateway_net_alive(rlpn))
+ return false;
}
- if (!route_alive)
- return route_alive;
-
spin_lock(&gw->lp_lock);
if (!(gw->lp_state & LNET_PEER_ROUTER_ENABLED)) {
+ spin_unlock(&gw->lp_lock);
if (gw->lp_rtr_refcount > 0)
CERROR("peer %s is being used as a gateway but routing feature is not turned on\n",
libcfs_nid2str(gw->lp_primary_nid));
- route_alive = false;
+ return false;
}
spin_unlock(&gw->lp_lock);
- return route_alive;
+ return true;
}
void
spin_lock(&lp->lp_lock);
lp_state = lp->lp_state;
- spin_unlock(&lp->lp_lock);
/* only handle replies if discovery is disabled. */
- if (!lnet_peer_discovery_disabled)
+ if (!lnet_is_discovery_disabled_locked(lp)) {
+ spin_unlock(&lp->lp_lock);
return;
+ }
- if (lp_state & LNET_PEER_PING_FAILED) {
- CDEBUG(D_NET,
- "Ping failed with %d. Set routes down for gw %s\n",
- lp->lp_ping_error, libcfs_nid2str(lp->lp_primary_nid));
- /* If the ping failed then mark the routes served by this
- * peer down
+ spin_unlock(&lp->lp_lock);
+
+ if (lp_state & LNET_PEER_PING_FAILED ||
+ pbuf->pb_info.pi_features & LNET_PING_FEAT_RTE_DISABLED) {
+ CDEBUG(D_NET, "Set routes down for gw %s because %s %d\n",
+ libcfs_nid2str(lp->lp_primary_nid),
+ lp_state & LNET_PEER_PING_FAILED ? "ping failed" :
+ "route feature is disabled", lp->lp_ping_error);
+ /* If the ping failed or the peer has routing disabled then
+ * mark the routes served by this peer down
*/
list_for_each_entry(route, &lp->lp_routes, lr_gwlist)
lnet_set_route_aliveness(route, false);
route->lr_gateway->lp_primary_nid)
continue;
- /* gateway has the routing feature disabled */
- if (pbuf->pb_info.pi_features &
- LNET_PING_FEAT_RTE_DISABLED) {
- lnet_set_route_aliveness(route, false);
- continue;
- }
-
llpn = lnet_peer_get_net_locked(lp, route->lr_lnet);
if (!llpn) {
lnet_set_route_aliveness(route, false);
spin_lock(&lp->lp_lock);
lp->lp_state &= ~LNET_PEER_RTR_DISCOVERY;
+ lp->lp_state |= LNET_PEER_RTR_DISCOVERED;
+ lp->lp_alive = lp->lp_dc_error == 0;
spin_unlock(&lp->lp_lock);
/*
* Router discovery successful? All peer information would've been
* updated already. No need to do any more processing
*/
- if (!lp->lp_dc_error)
+ if (lp->lp_alive)
return;
+
/*
- * discovery failed? then we need to set the status of each lpni
- * to DOWN. It will be updated the next time we discover the
- * router. For router peer NIs not on local networks, we never send
- * messages directly to them, so their health will always remain
- * at maximum. We can only tell if they are up or down from the
- * status returned in the PING response. If we fail to get that
- * status in our scheduled router discovery, then we'll assume
- * it's down until we're told otherwise.
+ * We do not send messages directly to the remote interfaces
+ * of an LNet router. As such, we rely on the PING response
+ * to determine the up/down status of these interfaces. If
+ * a PING response is not receieved, or some other problem with
+ * discovery occurs that prevents us from getting this status,
+ * we assume all interfaces are down until we're able to
+ * determine otherwise.
*/
CDEBUG(D_NET, "%s: Router discovery failed %d\n",
libcfs_nid2str(lp->lp_primary_nid), lp->lp_dc_error);
add_device_randomness(&ni->ni_nid, sizeof(ni->ni_nid));
seeded = 1;
- return;
}
/* NB expects LNET_LOCK held */
struct lnet_peer_ni *lpni;
struct lnet_route *route;
struct list_head zombies;
- struct lnet_peer *lp;
+ struct lnet_peer *lp = NULL;
int i = 0;
INIT_LIST_HEAD(&rnet_zombies);
spin_lock(&rtr->lp_lock);
- if ((rtr->lp_state & LNET_PEER_DISCOVERED) == 0) {
+ if ((rtr->lp_state & LNET_PEER_RTR_DISCOVERED) == 0) {
all_known = 0;
spin_unlock(&rtr->lp_lock);
break;
lnet_peer_ni_decref_locked(lpni);
if (lpni && lpni->lpni_peer_net && lpni->lpni_peer_net->lpn_peer) {
lp = lpni->lpni_peer_net->lpn_peer;
+ lp->lp_alive = alive;
list_for_each_entry(route, &lp->lp_routes, lr_gwlist)
lnet_set_route_aliveness(route, alive);
}