Whamcloud - gitweb
LU-3679 lnet: reflect down routes in /proc/sys/lnet/routes 57/7857/4
authorChris Horn <hornc@cray.com>
Wed, 23 Oct 2013 17:12:40 +0000 (12:12 -0500)
committerOleg Drokin <oleg.drokin@intel.com>
Sat, 26 Oct 2013 22:40:32 +0000 (22:40 +0000)
We consider routes "down" if the router is down or the router
NI for the target network is down. This should be reflected
in the output of /proc/sys/lnet/routes

Signed-off-by: Chris Horn <hornc@cray.com>
Change-Id: I82ee769d88aec92f1690ad9c095e32c9a9f9e282
Reviewed-on: http://review.whamcloud.com/7857
Reviewed-by: Cory Spitz <spitzcor@cray.com>
Reviewed-by: Isaac Huang <he.huang@intel.com>
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lnet/include/lnet/lib-lnet.h
lnet/lnet/lib-move.c
lnet/lnet/router_proc.c

index 8f9710e..9cebc3a 100644 (file)
@@ -87,6 +87,17 @@ extern lnet_t  the_lnet;                        /* THE network */
 /** exclusive lock */
 #define LNET_LOCK_EX            CFS_PERCPT_LOCK_EX
 
 /** exclusive lock */
 #define LNET_LOCK_EX            CFS_PERCPT_LOCK_EX
 
+static inline int lnet_is_route_alive(lnet_route_t *route)
+{
+       if (!route->lr_gateway->lp_alive)
+               return 0; /* gateway is down */
+       if ((route->lr_gateway->lp_ping_feats &
+            LNET_PING_FEAT_NI_STATUS) == 0)
+               return 1; /* no NI status, assume it's alive */
+       /* has NI status, check # down NIs */
+       return route->lr_downis == 0;
+}
+
 static inline int lnet_is_wire_handle_none (lnet_handle_wire_t *wh)
 {
         return (wh->wh_interface_cookie == LNET_WIRE_HANDLE_COOKIE_NONE &&
 static inline int lnet_is_wire_handle_none (lnet_handle_wire_t *wh)
 {
         return (wh->wh_interface_cookie == LNET_WIRE_HANDLE_COOKIE_NONE &&
index ec03257..ce7f926 100644 (file)
@@ -1158,9 +1158,9 @@ static lnet_peer_t *
 lnet_find_route_locked(lnet_ni_t *ni, lnet_nid_t target, lnet_nid_t rtr_nid)
 {
        lnet_remotenet_t        *rnet;
 lnet_find_route_locked(lnet_ni_t *ni, lnet_nid_t target, lnet_nid_t rtr_nid)
 {
        lnet_remotenet_t        *rnet;
-       lnet_route_t            *rtr;
-       lnet_route_t            *rtr_best;
-       lnet_route_t            *rtr_last;
+       lnet_route_t            *route;
+       lnet_route_t            *best_route;
+       lnet_route_t            *last_route;
        struct lnet_peer        *lp_best;
        struct lnet_peer        *lp;
        int                     rc;
        struct lnet_peer        *lp_best;
        struct lnet_peer        *lp;
        int                     rc;
@@ -1173,13 +1173,11 @@ lnet_find_route_locked(lnet_ni_t *ni, lnet_nid_t target, lnet_nid_t rtr_nid)
                return NULL;
 
        lp_best = NULL;
                return NULL;
 
        lp_best = NULL;
-       rtr_best = rtr_last = NULL;
-       cfs_list_for_each_entry(rtr, &rnet->lrn_routes, lr_list) {
-               lp = rtr->lr_gateway;
+       best_route = last_route = NULL;
+       cfs_list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
+               lp = route->lr_gateway;
 
 
-               if (!lp->lp_alive || /* gateway is down */
-                   ((lp->lp_ping_feats & LNET_PING_FEAT_NI_STATUS) != 0 &&
-                    rtr->lr_downis != 0)) /* NI to target is down */
+               if (!lnet_is_route_alive(route))
                        continue;
 
                if (ni != NULL && lp->lp_ni != ni)
                        continue;
 
                if (ni != NULL && lp->lp_ni != ni)
@@ -1189,28 +1187,28 @@ lnet_find_route_locked(lnet_ni_t *ni, lnet_nid_t target, lnet_nid_t rtr_nid)
                        return lp;
 
                if (lp_best == NULL) {
                        return lp;
 
                if (lp_best == NULL) {
-                       rtr_best = rtr_last = rtr;
+                       best_route = last_route = route;
                        lp_best = lp;
                        continue;
                }
 
                /* no protection on below fields, but it's harmless */
                        lp_best = lp;
                        continue;
                }
 
                /* no protection on below fields, but it's harmless */
-               if (rtr_last->lr_seq - rtr->lr_seq < 0)
-                       rtr_last = rtr;
+               if (last_route->lr_seq - route->lr_seq < 0)
+                       last_route = route;
 
 
-               rc = lnet_compare_routes(rtr, rtr_best);
+               rc = lnet_compare_routes(route, best_route);
                if (rc < 0)
                        continue;
 
                if (rc < 0)
                        continue;
 
-               rtr_best = rtr;
+               best_route = route;
                lp_best = lp;
        }
 
        /* set sequence number on the best router to the latest sequence + 1
         * so we can round-robin all routers, it's race and inaccurate but
         * harmless and functional  */
                lp_best = lp;
        }
 
        /* set sequence number on the best router to the latest sequence + 1
         * so we can round-robin all routers, it's race and inaccurate but
         * harmless and functional  */
-       if (rtr_best != NULL)
-               rtr_best->lr_seq = rtr_last->lr_seq + 1;
+       if (best_route != NULL)
+               best_route->lr_seq = last_route->lr_seq + 1;
        return lp_best;
 }
 
        return lp_best;
 }
 
index cb19004..5bbcbb7 100644 (file)
@@ -242,7 +242,7 @@ int LL_PROC_PROTO(proc_lnet_routes)
                        unsigned int hops       = route->lr_hops;
                        unsigned int priority   = route->lr_priority;
                        lnet_nid_t   nid        = route->lr_gateway->lp_nid;
                        unsigned int hops       = route->lr_hops;
                        unsigned int priority   = route->lr_priority;
                        lnet_nid_t   nid        = route->lr_gateway->lp_nid;
-                       int          alive      = route->lr_gateway->lp_alive;
+                       int          alive      = lnet_is_route_alive(route);
 
                        s += snprintf(s, tmpstr + tmpsiz - s,
                                      "%-8s %4u %8u %7s %s\n",
 
                        s += snprintf(s, tmpstr + tmpsiz - s,
                                      "%-8s %4u %8u %7s %s\n",