Whamcloud - gitweb
LU-12930 various: use schedule_timeout_*interruptible
[fs/lustre-release.git] / lnet / lnet / router.c
index 599a8a1..4ab65f7 100644 (file)
@@ -35,8 +35,6 @@
 #define LNET_NRB_LARGE_PAGES   ((LNET_MTU + PAGE_SIZE - 1) >> \
                                  PAGE_SHIFT)
 
-extern unsigned int lnet_current_net_count;
-
 static char *forwarding = "";
 module_param(forwarding, charp, 0444);
 MODULE_PARM_DESC(forwarding, "Explicitly enable/disable forwarding between networks");
@@ -80,6 +78,14 @@ int avoid_asym_router_failure = 1;
 module_param(avoid_asym_router_failure, int, 0644);
 MODULE_PARM_DESC(avoid_asym_router_failure, "Avoid asymmetrical router failures (0 to disable)");
 
+int dead_router_check_interval = INT_MIN;
+module_param(dead_router_check_interval, int, 0444);
+MODULE_PARM_DESC(dead_router_check_interval, "(DEPRECATED - Use alive_router_check_interval)");
+
+int live_router_check_interval = INT_MIN;
+module_param(live_router_check_interval, int, 0444);
+MODULE_PARM_DESC(live_router_check_interval, "(DEPRECATED - Use alive_router_check_interval)");
+
 int alive_router_check_interval = 60;
 module_param(alive_router_check_interval, int, 0644);
 MODULE_PARM_DESC(alive_router_check_interval, "Seconds between live router health checks (<= 0 to disable)");
@@ -179,7 +185,8 @@ lnet_peers_start_down(void)
 }
 
 /*
- * A net is alive if at least one gateway NI on the network is alive.
+ * The peer_net of a gateway is alive if at least one of the peer_ni's on
+ * that peer_net is alive.
  */
 static bool
 lnet_is_gateway_net_alive(struct lnet_peer_net *lpn)
@@ -202,6 +209,9 @@ bool lnet_is_gateway_alive(struct lnet_peer *gw)
 {
        struct lnet_peer_net *lpn;
 
+       if (!gw->lp_alive)
+               return false;
+
        list_for_each_entry(lpn, &gw->lp_peer_nets, lpn_peer_nets) {
                if (!lnet_is_gateway_net_alive(lpn))
                        return false;
@@ -222,39 +232,51 @@ bool lnet_is_route_alive(struct lnet_route *route)
        struct lnet_peer *gw = route->lr_gateway;
        struct lnet_peer_net *llpn;
        struct lnet_peer_net *rlpn;
-       bool route_alive;
+
+       /* If the gateway is down then all routes are considered down */
+       if (!gw->lp_alive)
+               return false;
 
        /*
-        * check the gateway's interfaces on the route rnet to make sure
-        * that the gateway is viable.
+        * if discovery is disabled then rely on the cached aliveness
+        * information. This is handicapped information which we log when
+        * we receive the discovery ping response. The most uptodate
+        * aliveness information can only be obtained when discovery is
+        * enabled.
+        */
+       if (lnet_is_discovery_disabled(gw))
+               return route->lr_alive;
+
+       /*
+        * check the gateway's interfaces on the local network
         */
        llpn = lnet_peer_get_net_locked(gw, route->lr_lnet);
        if (!llpn)
                return false;
 
-       route_alive = lnet_is_gateway_net_alive(llpn);
+       if (!lnet_is_gateway_net_alive(llpn))
+               return false;
 
        if (avoid_asym_router_failure) {
+               /* Check the gateway's interfaces on the remote network */
                rlpn = lnet_peer_get_net_locked(gw, route->lr_net);
                if (!rlpn)
                        return false;
-               route_alive = route_alive &&
-                             lnet_is_gateway_net_alive(rlpn);
+               if (!lnet_is_gateway_net_alive(rlpn))
+                       return false;
        }
 
-       if (!route_alive)
-               return route_alive;
-
        spin_lock(&gw->lp_lock);
        if (!(gw->lp_state & LNET_PEER_ROUTER_ENABLED)) {
+               spin_unlock(&gw->lp_lock);
                if (gw->lp_rtr_refcount > 0)
                        CERROR("peer %s is being used as a gateway but routing feature is not turned on\n",
                               libcfs_nid2str(gw->lp_primary_nid));
-               route_alive = false;
+               return false;
        }
        spin_unlock(&gw->lp_lock);
 
-       return route_alive;
+       return true;
 }
 
 void
@@ -285,35 +307,157 @@ lnet_consolidate_routes_locked(struct lnet_peer *orig_lp,
 
 }
 
+static inline void
+lnet_set_route_aliveness(struct lnet_route *route, bool alive)
+{
+       /* Log when there's a state change */
+       if (route->lr_alive != alive) {
+               CERROR("route to %s through %s has gone from %s to %s\n",
+                      libcfs_net2str(route->lr_net),
+                      libcfs_nid2str(route->lr_gateway->lp_primary_nid),
+                      (route->lr_alive) ? "up" : "down",
+                      alive ? "up" : "down");
+               route->lr_alive = alive;
+       }
+}
+
+void
+lnet_router_discovery_ping_reply(struct lnet_peer *lp)
+{
+       struct lnet_ping_buffer *pbuf = lp->lp_data;
+       struct lnet_remotenet *rnet;
+       struct lnet_peer_net *llpn;
+       struct lnet_route *route;
+       bool net_up = false;
+       unsigned lp_state;
+       __u32 net, net2;
+       int i, j;
+
+
+       spin_lock(&lp->lp_lock);
+       lp_state = lp->lp_state;
+
+       /* only handle replies if discovery is disabled. */
+       if (!lnet_is_discovery_disabled_locked(lp)) {
+               spin_unlock(&lp->lp_lock);
+               return;
+       }
+
+       spin_unlock(&lp->lp_lock);
+
+       if (lp_state & LNET_PEER_PING_FAILED ||
+           pbuf->pb_info.pi_features & LNET_PING_FEAT_RTE_DISABLED) {
+               CDEBUG(D_NET, "Set routes down for gw %s because %s %d\n",
+                      libcfs_nid2str(lp->lp_primary_nid),
+                      lp_state & LNET_PEER_PING_FAILED ? "ping failed" :
+                      "route feature is disabled", lp->lp_ping_error);
+               /* If the ping failed or the peer has routing disabled then
+                * mark the routes served by this peer down
+                */
+               list_for_each_entry(route, &lp->lp_routes, lr_gwlist)
+                       lnet_set_route_aliveness(route, false);
+               return;
+       }
+
+       CDEBUG(D_NET, "Discovery is disabled. Processing reply for gw: %s\n",
+              libcfs_nid2str(lp->lp_primary_nid));
+
+       /*
+        * examine the ping response:
+        * For each NID in the ping response, extract the net
+        * if the net exists on our remote net list then
+        * iterate over the routes on the rnet and if:
+        *      The route's local net is healthy and
+        *      The remote net status is UP, then mark the route up
+        * otherwise mark the route down
+        */
+       for (i = 1; i < pbuf->pb_info.pi_nnis; i++) {
+               net = LNET_NIDNET(pbuf->pb_info.pi_ni[i].ns_nid);
+               rnet = lnet_find_rnet_locked(net);
+               if (!rnet)
+                       continue;
+               list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
+                       /* check if this is the route's gateway */
+                       if (lp->lp_primary_nid !=
+                           route->lr_gateway->lp_primary_nid)
+                               continue;
+
+                       llpn = lnet_peer_get_net_locked(lp, route->lr_lnet);
+                       if (!llpn) {
+                               lnet_set_route_aliveness(route, false);
+                               continue;
+                       }
+
+                       if (!lnet_is_gateway_net_alive(llpn)) {
+                               lnet_set_route_aliveness(route, false);
+                               continue;
+                       }
+
+                       if (avoid_asym_router_failure &&
+                           pbuf->pb_info.pi_ni[i].ns_status !=
+                               LNET_NI_STATUS_UP) {
+                               net_up = false;
+
+                               /*
+                                * revisit all previous NIDs and check if
+                                * any on the network we're examining is
+                                * up. If at least one is up then we consider
+                                * the route to be alive.
+                                */
+                               for (j = 1; j < i; j++) {
+                                       net2 = LNET_NIDNET(pbuf->pb_info.
+                                                          pi_ni[j].ns_nid);
+                                       if (net2 == net &&
+                                           pbuf->pb_info.pi_ni[j].ns_status ==
+                                               LNET_NI_STATUS_UP)
+                                               net_up = true;
+                               }
+                               if (!net_up) {
+                                       lnet_set_route_aliveness(route, false);
+                                       continue;
+                               }
+                       }
+
+                       lnet_set_route_aliveness(route, true);
+               }
+       }
+}
+
 void
 lnet_router_discovery_complete(struct lnet_peer *lp)
 {
        struct lnet_peer_ni *lpni = NULL;
+       struct lnet_route *route;
 
        spin_lock(&lp->lp_lock);
        lp->lp_state &= ~LNET_PEER_RTR_DISCOVERY;
+       lp->lp_state |= LNET_PEER_RTR_DISCOVERED;
+       lp->lp_alive = lp->lp_dc_error == 0;
        spin_unlock(&lp->lp_lock);
 
        /*
         * Router discovery successful? All peer information would've been
         * updated already. No need to do any more processing
         */
-       if (!lp->lp_dc_error)
+       if (lp->lp_alive)
                return;
+
        /*
-        * discovery failed? then we need to set the status of each lpni
-        * to DOWN. It will be updated the next time we discover the
-        * router. For router peer NIs not on local networks, we never send
-        * messages directly to them, so their health will always remain
-        * at maximum. We can only tell if they are up or down from the
-        * status returned in the PING response. If we fail to get that
-        * status in our scheduled router discovery, then we'll assume
-        * it's down until we're told otherwise.
+        * We do not send messages directly to the remote interfaces
+        * of an LNet router. As such, we rely on the PING response
+        * to determine the up/down status of these interfaces. If
+        * a PING response is not receieved, or some other problem with
+        * discovery occurs that prevents us from getting this status,
+        * we assume all interfaces are down until we're able to
+        * determine otherwise.
         */
        CDEBUG(D_NET, "%s: Router discovery failed %d\n",
               libcfs_nid2str(lp->lp_primary_nid), lp->lp_dc_error);
        while ((lpni = lnet_get_next_peer_ni_locked(lp, NULL, lpni)) != NULL)
                lpni->lpni_ns_status = LNET_NI_STATUS_DOWN;
+
+       list_for_each_entry(route, &lp->lp_routes, lr_gwlist)
+               lnet_set_route_aliveness(route, false);
 }
 
 static void
@@ -382,7 +526,6 @@ static void lnet_shuffle_seed(void)
                add_device_randomness(&ni->ni_nid, sizeof(ni->ni_nid));
 
        seeded = 1;
-       return;
 }
 
 /* NB expects LNET_LOCK held */
@@ -461,7 +604,7 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
                CERROR("Cannot add route with gateway %s. There is no local interface configured on LNet %s\n",
                       libcfs_nid2str(gateway),
                       libcfs_net2str(LNET_NIDNET(gateway)));
-               return -EINVAL;
+               return -EHOSTUNREACH;
        }
 
        /* Assume net, route, all new */
@@ -544,6 +687,9 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
        if (add_route) {
                gw->lp_health_sensitivity = sensitivity;
                lnet_add_route_to_rnet(rnet2, route);
+               if (lnet_peer_discovery_disabled)
+                       CWARN("Consider turning discovery on to enable full "
+                             "Multi-Rail routing functionality\n");
        }
 
        /*
@@ -563,7 +709,7 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
                LIBCFS_FREE(rnet, sizeof(*rnet));
 
        /* kick start the monitor thread to handle the added route */
-       wake_up(&the_lnet.ln_mt_waitq);
+       complete(&the_lnet.ln_mt_wait_complete);
 
        return rc;
 }
@@ -601,19 +747,16 @@ lnet_del_route_from_rnet(lnet_nid_t gw_nid, struct list_head *route_list,
 int
 lnet_del_route(__u32 net, lnet_nid_t gw_nid)
 {
-       struct list_head rnet_zombies;
+       LIST_HEAD(rnet_zombies);
        struct lnet_remotenet *rnet;
        struct lnet_remotenet *tmp;
        struct list_head *rn_list;
        struct lnet_peer_ni *lpni;
        struct lnet_route *route;
-       struct list_head zombies;
-       struct lnet_peer *lp;
+       LIST_HEAD(zombies);
+       struct lnet_peer *lp = NULL;
        int i = 0;
 
-       INIT_LIST_HEAD(&rnet_zombies);
-       INIT_LIST_HEAD(&zombies);
-
        CDEBUG(D_NET, "Del route: net %s : gw %s\n",
               libcfs_net2str(net), libcfs_nid2str(gw_nid));
 
@@ -784,7 +927,7 @@ lnet_wait_known_routerstate(void)
 
                        spin_lock(&rtr->lp_lock);
 
-                       if ((rtr->lp_state & LNET_PEER_DISCOVERED) == 0) {
+                       if ((rtr->lp_state & LNET_PEER_RTR_DISCOVERED) == 0) {
                                all_known = 0;
                                spin_unlock(&rtr->lp_lock);
                                break;
@@ -797,8 +940,7 @@ lnet_wait_known_routerstate(void)
                if (all_known)
                        return;
 
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               schedule_timeout(cfs_time_seconds(1));
+               schedule_timeout_uninterruptible(cfs_time_seconds(1));
        }
 }
 
@@ -1052,13 +1194,11 @@ lnet_rtrpool_free_bufs(struct lnet_rtrbufpool *rbp, int cpt)
 {
        int npages = rbp->rbp_npages;
        struct lnet_rtrbuf *rb;
-       struct list_head tmp;
+       LIST_HEAD(tmp);
 
        if (rbp->rbp_nbuffers == 0) /* not initialized or already freed */
                return;
 
-       INIT_LIST_HEAD(&tmp);
-
        lnet_net_lock(cpt);
        list_splice_init(&rbp->rbp_msgs, &tmp);
        lnet_drop_routed_msgs_locked(&tmp, cpt);
@@ -1079,7 +1219,7 @@ lnet_rtrpool_free_bufs(struct lnet_rtrbufpool *rbp, int cpt)
 static int
 lnet_rtrpool_adjust_bufs(struct lnet_rtrbufpool *rbp, int nbufs, int cpt)
 {
-       struct list_head rb_list;
+       LIST_HEAD(rb_list);
        struct lnet_rtrbuf *rb;
        int             num_rb;
        int             num_buffers = 0;
@@ -1107,8 +1247,6 @@ lnet_rtrpool_adjust_bufs(struct lnet_rtrbufpool *rbp, int nbufs, int cpt)
        rbp->rbp_req_nbuffers = nbufs;
        lnet_net_unlock(cpt);
 
-       INIT_LIST_HEAD(&rb_list);
-
        /* allocate the buffers on a local list first.  If all buffers are
         * allocated successfully then join this list to the rbp buffer
         * list.  If not then free all allocated buffers. */
@@ -1315,7 +1453,7 @@ lnet_rtrpools_alloc(int im_a_router)
        lnet_net_lock(LNET_LOCK_EX);
        the_lnet.ln_routing = 1;
        lnet_net_unlock(LNET_LOCK_EX);
-       wake_up(&the_lnet.ln_mt_waitq);
+       complete(&the_lnet.ln_mt_wait_complete);
        return 0;
 
  failed:
@@ -1407,6 +1545,10 @@ lnet_rtrpools_enable(void)
                ~LNET_PING_FEAT_RTE_DISABLED;
        lnet_net_unlock(LNET_LOCK_EX);
 
+       if (lnet_peer_discovery_disabled)
+               CWARN("Consider turning discovery on to enable full "
+                     "Multi-Rail routing functionality\n");
+
        return rc;
 }
 
@@ -1447,6 +1589,8 @@ lnet_notify(struct lnet_ni *ni, lnet_nid_t nid, bool alive, bool reset,
            time64_t when)
 {
        struct lnet_peer_ni *lpni = NULL;
+       struct lnet_route *route;
+       struct lnet_peer *lp;
        time64_t now = ktime_get_seconds();
        int cpt;
 
@@ -1516,6 +1660,12 @@ lnet_notify(struct lnet_ni *ni, lnet_nid_t nid, bool alive, bool reset,
        cpt = lpni->lpni_cpt;
        lnet_net_lock(cpt);
        lnet_peer_ni_decref_locked(lpni);
+       if (lpni && lpni->lpni_peer_net && lpni->lpni_peer_net->lpn_peer) {
+               lp = lpni->lpni_peer_net->lpn_peer;
+               lp->lp_alive = alive;
+               list_for_each_entry(route, &lp->lp_routes, lr_gwlist)
+                       lnet_set_route_aliveness(route, alive);
+       }
        lnet_net_unlock(cpt);
 
        return 0;