Whamcloud - gitweb
LU-9679 lnet: use LIST_HEAD() for local lists.
[fs/lustre-release.git] / lnet / lnet / router.c
index 9fdfc2a..cae8da6 100644 (file)
@@ -35,8 +35,6 @@
 #define LNET_NRB_LARGE_PAGES   ((LNET_MTU + PAGE_SIZE - 1) >> \
                                  PAGE_SHIFT)
 
-extern unsigned int lnet_current_net_count;
-
 static char *forwarding = "";
 module_param(forwarding, charp, 0444);
 MODULE_PARM_DESC(forwarding, "Explicitly enable/disable forwarding between networks");
@@ -187,7 +185,8 @@ lnet_peers_start_down(void)
 }
 
 /*
- * A net is alive if at least one gateway NI on the network is alive.
+ * The peer_net of a gateway is alive if at least one of the peer_ni's on
+ * that peer_net is alive.
  */
 static bool
 lnet_is_gateway_net_alive(struct lnet_peer_net *lpn)
@@ -210,6 +209,9 @@ bool lnet_is_gateway_alive(struct lnet_peer *gw)
 {
        struct lnet_peer_net *lpn;
 
+       if (!gw->lp_alive)
+               return false;
+
        list_for_each_entry(lpn, &gw->lp_peer_nets, lpn_peer_nets) {
                if (!lnet_is_gateway_net_alive(lpn))
                        return false;
@@ -230,7 +232,10 @@ bool lnet_is_route_alive(struct lnet_route *route)
        struct lnet_peer *gw = route->lr_gateway;
        struct lnet_peer_net *llpn;
        struct lnet_peer_net *rlpn;
-       bool route_alive;
+
+       /* If the gateway is down then all routes are considered down */
+       if (!gw->lp_alive)
+               return false;
 
        /*
         * if discovery is disabled then rely on the cached aliveness
@@ -239,40 +244,39 @@ bool lnet_is_route_alive(struct lnet_route *route)
         * aliveness information can only be obtained when discovery is
         * enabled.
         */
-       if (lnet_peer_discovery_disabled)
+       if (lnet_is_discovery_disabled(gw))
                return route->lr_alive;
 
        /*
-        * check the gateway's interfaces on the route rnet to make sure
-        * that the gateway is viable.
+        * check the gateway's interfaces on the local network
         */
        llpn = lnet_peer_get_net_locked(gw, route->lr_lnet);
        if (!llpn)
                return false;
 
-       route_alive = lnet_is_gateway_net_alive(llpn);
+       if (!lnet_is_gateway_net_alive(llpn))
+               return false;
 
        if (avoid_asym_router_failure) {
+               /* Check the gateway's interfaces on the remote network */
                rlpn = lnet_peer_get_net_locked(gw, route->lr_net);
                if (!rlpn)
                        return false;
-               route_alive = route_alive &&
-                             lnet_is_gateway_net_alive(rlpn);
+               if (!lnet_is_gateway_net_alive(rlpn))
+                       return false;
        }
 
-       if (!route_alive)
-               return route_alive;
-
        spin_lock(&gw->lp_lock);
        if (!(gw->lp_state & LNET_PEER_ROUTER_ENABLED)) {
+               spin_unlock(&gw->lp_lock);
                if (gw->lp_rtr_refcount > 0)
                        CERROR("peer %s is being used as a gateway but routing feature is not turned on\n",
                               libcfs_nid2str(gw->lp_primary_nid));
-               route_alive = false;
+               return false;
        }
        spin_unlock(&gw->lp_lock);
 
-       return route_alive;
+       return true;
 }
 
 void
@@ -332,18 +336,23 @@ lnet_router_discovery_ping_reply(struct lnet_peer *lp)
 
        spin_lock(&lp->lp_lock);
        lp_state = lp->lp_state;
-       spin_unlock(&lp->lp_lock);
 
        /* only handle replies if discovery is disabled. */
-       if (!lnet_peer_discovery_disabled)
+       if (!lnet_is_discovery_disabled_locked(lp)) {
+               spin_unlock(&lp->lp_lock);
                return;
+       }
+
+       spin_unlock(&lp->lp_lock);
 
-       if (lp_state & LNET_PEER_PING_FAILED) {
-               CDEBUG(D_NET,
-                      "Ping failed with %d. Set routes down for gw %s\n",
-                      lp->lp_ping_error, libcfs_nid2str(lp->lp_primary_nid));
-               /* If the ping failed then mark the routes served by this
-                * peer down
+       if (lp_state & LNET_PEER_PING_FAILED ||
+           pbuf->pb_info.pi_features & LNET_PING_FEAT_RTE_DISABLED) {
+               CDEBUG(D_NET, "Set routes down for gw %s because %s %d\n",
+                      libcfs_nid2str(lp->lp_primary_nid),
+                      lp_state & LNET_PEER_PING_FAILED ? "ping failed" :
+                      "route feature is disabled", lp->lp_ping_error);
+               /* If the ping failed or the peer has routing disabled then
+                * mark the routes served by this peer down
                 */
                list_for_each_entry(route, &lp->lp_routes, lr_gwlist)
                        lnet_set_route_aliveness(route, false);
@@ -373,13 +382,6 @@ lnet_router_discovery_ping_reply(struct lnet_peer *lp)
                            route->lr_gateway->lp_primary_nid)
                                continue;
 
-                       /* gateway has the routing feature disabled */
-                       if (pbuf->pb_info.pi_features &
-                             LNET_PING_FEAT_RTE_DISABLED) {
-                               lnet_set_route_aliveness(route, false);
-                               continue;
-                       }
-
                        llpn = lnet_peer_get_net_locked(lp, route->lr_lnet);
                        if (!llpn) {
                                lnet_set_route_aliveness(route, false);
@@ -429,23 +431,25 @@ lnet_router_discovery_complete(struct lnet_peer *lp)
 
        spin_lock(&lp->lp_lock);
        lp->lp_state &= ~LNET_PEER_RTR_DISCOVERY;
+       lp->lp_state |= LNET_PEER_RTR_DISCOVERED;
+       lp->lp_alive = lp->lp_dc_error == 0;
        spin_unlock(&lp->lp_lock);
 
        /*
         * Router discovery successful? All peer information would've been
         * updated already. No need to do any more processing
         */
-       if (!lp->lp_dc_error)
+       if (lp->lp_alive)
                return;
+
        /*
-        * discovery failed? then we need to set the status of each lpni
-        * to DOWN. It will be updated the next time we discover the
-        * router. For router peer NIs not on local networks, we never send
-        * messages directly to them, so their health will always remain
-        * at maximum. We can only tell if they are up or down from the
-        * status returned in the PING response. If we fail to get that
-        * status in our scheduled router discovery, then we'll assume
-        * it's down until we're told otherwise.
+        * We do not send messages directly to the remote interfaces
+        * of an LNet router. As such, we rely on the PING response
+        * to determine the up/down status of these interfaces. If
+        * a PING response is not receieved, or some other problem with
+        * discovery occurs that prevents us from getting this status,
+        * we assume all interfaces are down until we're able to
+        * determine otherwise.
         */
        CDEBUG(D_NET, "%s: Router discovery failed %d\n",
               libcfs_nid2str(lp->lp_primary_nid), lp->lp_dc_error);
@@ -522,7 +526,6 @@ static void lnet_shuffle_seed(void)
                add_device_randomness(&ni->ni_nid, sizeof(ni->ni_nid));
 
        seeded = 1;
-       return;
 }
 
 /* NB expects LNET_LOCK held */
@@ -706,7 +709,7 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
                LIBCFS_FREE(rnet, sizeof(*rnet));
 
        /* kick start the monitor thread to handle the added route */
-       wake_up(&the_lnet.ln_mt_waitq);
+       complete(&the_lnet.ln_mt_wait_complete);
 
        return rc;
 }
@@ -744,19 +747,16 @@ lnet_del_route_from_rnet(lnet_nid_t gw_nid, struct list_head *route_list,
 int
 lnet_del_route(__u32 net, lnet_nid_t gw_nid)
 {
-       struct list_head rnet_zombies;
+       LIST_HEAD(rnet_zombies);
        struct lnet_remotenet *rnet;
        struct lnet_remotenet *tmp;
        struct list_head *rn_list;
        struct lnet_peer_ni *lpni;
        struct lnet_route *route;
-       struct list_head zombies;
-       struct lnet_peer *lp;
+       LIST_HEAD(zombies);
+       struct lnet_peer *lp = NULL;
        int i = 0;
 
-       INIT_LIST_HEAD(&rnet_zombies);
-       INIT_LIST_HEAD(&zombies);
-
        CDEBUG(D_NET, "Del route: net %s : gw %s\n",
               libcfs_net2str(net), libcfs_nid2str(gw_nid));
 
@@ -927,7 +927,7 @@ lnet_wait_known_routerstate(void)
 
                        spin_lock(&rtr->lp_lock);
 
-                       if ((rtr->lp_state & LNET_PEER_DISCOVERED) == 0) {
+                       if ((rtr->lp_state & LNET_PEER_RTR_DISCOVERED) == 0) {
                                all_known = 0;
                                spin_unlock(&rtr->lp_lock);
                                break;
@@ -1195,13 +1195,11 @@ lnet_rtrpool_free_bufs(struct lnet_rtrbufpool *rbp, int cpt)
 {
        int npages = rbp->rbp_npages;
        struct lnet_rtrbuf *rb;
-       struct list_head tmp;
+       LIST_HEAD(tmp);
 
        if (rbp->rbp_nbuffers == 0) /* not initialized or already freed */
                return;
 
-       INIT_LIST_HEAD(&tmp);
-
        lnet_net_lock(cpt);
        list_splice_init(&rbp->rbp_msgs, &tmp);
        lnet_drop_routed_msgs_locked(&tmp, cpt);
@@ -1222,7 +1220,7 @@ lnet_rtrpool_free_bufs(struct lnet_rtrbufpool *rbp, int cpt)
 static int
 lnet_rtrpool_adjust_bufs(struct lnet_rtrbufpool *rbp, int nbufs, int cpt)
 {
-       struct list_head rb_list;
+       LIST_HEAD(rb_list);
        struct lnet_rtrbuf *rb;
        int             num_rb;
        int             num_buffers = 0;
@@ -1250,8 +1248,6 @@ lnet_rtrpool_adjust_bufs(struct lnet_rtrbufpool *rbp, int nbufs, int cpt)
        rbp->rbp_req_nbuffers = nbufs;
        lnet_net_unlock(cpt);
 
-       INIT_LIST_HEAD(&rb_list);
-
        /* allocate the buffers on a local list first.  If all buffers are
         * allocated successfully then join this list to the rbp buffer
         * list.  If not then free all allocated buffers. */
@@ -1458,7 +1454,7 @@ lnet_rtrpools_alloc(int im_a_router)
        lnet_net_lock(LNET_LOCK_EX);
        the_lnet.ln_routing = 1;
        lnet_net_unlock(LNET_LOCK_EX);
-       wake_up(&the_lnet.ln_mt_waitq);
+       complete(&the_lnet.ln_mt_wait_complete);
        return 0;
 
  failed:
@@ -1667,6 +1663,7 @@ lnet_notify(struct lnet_ni *ni, lnet_nid_t nid, bool alive, bool reset,
        lnet_peer_ni_decref_locked(lpni);
        if (lpni && lpni->lpni_peer_net && lpni->lpni_peer_net->lpn_peer) {
                lp = lpni->lpni_peer_net->lpn_peer;
+               lp->lp_alive = alive;
                list_for_each_entry(route, &lp->lp_routes, lr_gwlist)
                        lnet_set_route_aliveness(route, alive);
        }