From 2e07619477684f287a2399ccdbbde0a71289574b Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Mon, 13 Jul 2020 23:08:28 -0500 Subject: [PATCH] LU-13785 lnet: Use lr_hops for avoid_asym_router_failure In order for the asymmetric route failure avoidance feature to work properly it needs to know what the hop count of a route should be. This information is defined by the lr_hops field of the lnet_route. The lr_single_hop is what discovery was able to determine the hop count actually is (single or multi) based on the last ping reply. If a remote interface on a router goes missing, the route may be classified as multi-hop by discovery, but it should be considered single-hop for the purposes of avoiding asymmetric route failure. HPE-bug-id: LUS-9099 Signed-off-by: Chris Horn Change-Id: I9c255f9a2175d964661850277808dae96ff7735c Reviewed-on: https://review.whamcloud.com/39362 Reviewed-by: Serguei Smirnov Reviewed-by: Neil Brown Reviewed-by: James Simmons Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- lnet/lnet/router.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index 3d06b45..fb60c56 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -326,7 +326,8 @@ bool lnet_is_route_alive(struct lnet_route *route) * that the remote net must exist on the gateway. For multi-hop * routes the next-hop will not have the remote net. */ - if (avoid_asym_router_failure && route->lr_single_hop) { + if (avoid_asym_router_failure && + (route->lr_hops == 1 || route->lr_hops == LNET_UNDEFINED_HOPS)) { rlpn = lnet_peer_get_net_locked(gw, route->lr_net); if (!rlpn) return false; @@ -377,7 +378,8 @@ lnet_consolidate_routes_locked(struct lnet_peer *orig_lp, static inline void lnet_check_route_inconsistency(struct lnet_route *route) { - if (!route->lr_single_hop && (int)route->lr_hops <= 1) { + if (!route->lr_single_hop && + (route->lr_hops == 1 || route->lr_hops == LNET_UNDEFINED_HOPS)) { CWARN("route %s->%s is detected to be multi-hop but hop count is set to %d\n", libcfs_net2str(route->lr_net), libcfs_nid2str(route->lr_gateway->lp_primary_nid), @@ -493,7 +495,9 @@ lnet_router_discovery_ping_reply(struct lnet_peer *lp) } route->lr_single_hop = single_hop; - if (avoid_asym_router_failure && single_hop) + if (avoid_asym_router_failure && + (route->lr_hops == 1 || + route->lr_hops == LNET_UNDEFINED_HOPS)) lnet_set_route_aliveness(route, net_up); else lnet_set_route_aliveness(route, true); -- 1.8.3.1