Whamcloud - gitweb
LU-12763 lnet: Use alternate ping processing for non-mr peers 82/36182/2
authorChris Horn <hornc@cray.com>
Fri, 13 Sep 2019 21:23:43 +0000 (16:23 -0500)
committerOleg Drokin <green@whamcloud.com>
Fri, 27 Sep 2019 23:12:31 +0000 (23:12 +0000)
Router peers without multi-rail capabilities (i.e. older Lustre
versions) or router peers that have discovery disabled need to use
the alternate ping processing introduced by LU-12422. Otherwise,
these peers go through the normal discovery processing, but their
remote network interfaces are never added to the peer object. This
causes routes through these peers to be considered down when
avoid_asym_router_failure is enabled.

Cray-bug-id: LUS-7866
Signed-off-by: Chris Horn <hornc@cray.com>
Change-Id: Ib567b66c871abdad9b39b4f29b38eca424d4cd8d
Reviewed-on: https://review.whamcloud.com/36182
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alexandr Boyko <c17825@cray.com>
Reviewed-by: Amir Shehata <ashehata@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/include/lnet/lib-lnet.h
lnet/lnet/peer.c
lnet/lnet/router.c

index 428ae91..4d1f0bf 100644 (file)
@@ -994,6 +994,7 @@ lnet_peer_ni_is_primary(struct lnet_peer_ni *lpni)
 bool lnet_peer_is_uptodate(struct lnet_peer *lp);
 bool lnet_peer_is_uptodate_locked(struct lnet_peer *lp);
 bool lnet_is_discovery_disabled(struct lnet_peer *lp);
 bool lnet_peer_is_uptodate(struct lnet_peer *lp);
 bool lnet_peer_is_uptodate_locked(struct lnet_peer *lp);
 bool lnet_is_discovery_disabled(struct lnet_peer *lp);
+bool lnet_is_discovery_disabled_locked(struct lnet_peer *lp);
 bool lnet_peer_gw_discovery(struct lnet_peer *lp);
 
 static inline bool
 bool lnet_peer_gw_discovery(struct lnet_peer *lp);
 
 static inline bool
index e9505fd..1425bd8 100644 (file)
@@ -1152,6 +1152,7 @@ lnet_peer_primary_nid_locked(lnet_nid_t nid)
 
 bool
 lnet_is_discovery_disabled_locked(struct lnet_peer *lp)
 
 bool
 lnet_is_discovery_disabled_locked(struct lnet_peer *lp)
+__must_hold(&lp->lp_lock)
 {
        if (lnet_peer_discovery_disabled)
                return true;
 {
        if (lnet_peer_discovery_disabled)
                return true;
index 6daca44..380f5ea 100644 (file)
@@ -239,7 +239,7 @@ bool lnet_is_route_alive(struct lnet_route *route)
         * aliveness information can only be obtained when discovery is
         * enabled.
         */
         * aliveness information can only be obtained when discovery is
         * enabled.
         */
-       if (lnet_peer_discovery_disabled)
+       if (lnet_is_discovery_disabled(gw))
                return route->lr_alive;
 
        /*
                return route->lr_alive;
 
        /*
@@ -332,11 +332,14 @@ lnet_router_discovery_ping_reply(struct lnet_peer *lp)
 
        spin_lock(&lp->lp_lock);
        lp_state = lp->lp_state;
 
        spin_lock(&lp->lp_lock);
        lp_state = lp->lp_state;
-       spin_unlock(&lp->lp_lock);
 
        /* only handle replies if discovery is disabled. */
 
        /* only handle replies if discovery is disabled. */
-       if (!lnet_peer_discovery_disabled)
+       if (!lnet_is_discovery_disabled_locked(lp)) {
+               spin_unlock(&lp->lp_lock);
                return;
                return;
+       }
+
+       spin_unlock(&lp->lp_lock);
 
        if (lp_state & LNET_PEER_PING_FAILED) {
                CDEBUG(D_NET,
 
        if (lp_state & LNET_PEER_PING_FAILED) {
                CDEBUG(D_NET,