From: Chris Horn Date: Fri, 13 Sep 2019 21:23:43 +0000 (-0500) Subject: LU-12763 lnet: Use alternate ping processing for non-mr peers X-Git-Tag: 2.12.90~94 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=010f6b1819b9009745abda8d6119589dc336bd95 LU-12763 lnet: Use alternate ping processing for non-mr peers Router peers without multi-rail capabilities (i.e. older Lustre versions) or router peers that have discovery disabled need to use the alternate ping processing introduced by LU-12422. Otherwise, these peers go through the normal discovery processing, but their remote network interfaces are never added to the peer object. This causes routes through these peers to be considered down when avoid_asym_router_failure is enabled. Cray-bug-id: LUS-7866 Signed-off-by: Chris Horn Change-Id: Ib567b66c871abdad9b39b4f29b38eca424d4cd8d Reviewed-on: https://review.whamcloud.com/36182 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Alexandr Boyko Reviewed-by: Amir Shehata Reviewed-by: Oleg Drokin --- diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 428ae91..4d1f0bf 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -994,6 +994,7 @@ lnet_peer_ni_is_primary(struct lnet_peer_ni *lpni) bool lnet_peer_is_uptodate(struct lnet_peer *lp); bool lnet_peer_is_uptodate_locked(struct lnet_peer *lp); bool lnet_is_discovery_disabled(struct lnet_peer *lp); +bool lnet_is_discovery_disabled_locked(struct lnet_peer *lp); bool lnet_peer_gw_discovery(struct lnet_peer *lp); static inline bool diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index e9505fd..1425bd8 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -1152,6 +1152,7 @@ lnet_peer_primary_nid_locked(lnet_nid_t nid) bool lnet_is_discovery_disabled_locked(struct lnet_peer *lp) +__must_hold(&lp->lp_lock) { if (lnet_peer_discovery_disabled) return true; diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index 6daca44..380f5ea 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -239,7 +239,7 @@ bool lnet_is_route_alive(struct lnet_route *route) * aliveness information can only be obtained when discovery is * enabled. */ - if (lnet_peer_discovery_disabled) + if (lnet_is_discovery_disabled(gw)) return route->lr_alive; /* @@ -332,11 +332,14 @@ lnet_router_discovery_ping_reply(struct lnet_peer *lp) spin_lock(&lp->lp_lock); lp_state = lp->lp_state; - spin_unlock(&lp->lp_lock); /* only handle replies if discovery is disabled. */ - if (!lnet_peer_discovery_disabled) + if (!lnet_is_discovery_disabled_locked(lp)) { + spin_unlock(&lp->lp_lock); return; + } + + spin_unlock(&lp->lp_lock); if (lp_state & LNET_PEER_PING_FAILED) { CDEBUG(D_NET,