Whamcloud - gitweb
LU-2934 lnet: Add LNet Router Priority parameter
[fs/lustre-release.git] / lnet / lnet / router.c
index a755db6..46adbeb 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  *
- * Copyright (c) 2011, Whamcloud, Inc.
+ * Copyright (c) 2011, 2013, Intel Corporation.
  *
  *   This file is part of Portals
  *   http://sourceforge.net/projects/sandiaportals/
@@ -82,23 +82,23 @@ lnet_peer_buffer_credits(lnet_ni_t *ni)
 
 static int check_routers_before_use = 0;
 CFS_MODULE_PARM(check_routers_before_use, "i", int, 0444,
-                "Assume routers are down and ping them before use");
+               "Assume routers are down and ping them before use");
 
-static int avoid_asym_router_failure = 0;
-CFS_MODULE_PARM(avoid_asym_router_failure, "i", int, 0444,
-                "Avoid asymmetrical failures: reserved, use at your own risk");
+static int avoid_asym_router_failure = 1;
+CFS_MODULE_PARM(avoid_asym_router_failure, "i", int, 0644,
+               "Avoid asymmetrical router failures (0 to disable)");
 
-static int dead_router_check_interval = 0;
-CFS_MODULE_PARM(dead_router_check_interval, "i", int, 0444,
-                "Seconds between dead router health checks (<= 0 to disable)");
+static int dead_router_check_interval = 60;
+CFS_MODULE_PARM(dead_router_check_interval, "i", int, 0644,
+               "Seconds between dead router health checks (<= 0 to disable)");
 
-static int live_router_check_interval = 0;
-CFS_MODULE_PARM(live_router_check_interval, "i", int, 0444,
-                "Seconds between live router health checks (<= 0 to disable)");
+static int live_router_check_interval = 60;
+CFS_MODULE_PARM(live_router_check_interval, "i", int, 0644,
+               "Seconds between live router health checks (<= 0 to disable)");
 
 static int router_ping_timeout = 50;
-CFS_MODULE_PARM(router_ping_timeout, "i", int, 0444,
-                "Seconds to wait for the reply to a router health query");
+CFS_MODULE_PARM(router_ping_timeout, "i", int, 0644,
+               "Seconds to wait for the reply to a router health query");
 
 int
 lnet_peers_start_down(void)
@@ -227,18 +227,20 @@ lnet_rtr_decref_locked(lnet_peer_t *lp)
 lnet_remotenet_t *
 lnet_find_net_locked (__u32 net)
 {
-        lnet_remotenet_t *rnet;
-        cfs_list_t       *tmp;
+       lnet_remotenet_t        *rnet;
+       cfs_list_t              *tmp;
+       cfs_list_t              *rn_list;
 
-        LASSERT (!the_lnet.ln_shutdown);
+       LASSERT(!the_lnet.ln_shutdown);
 
-        cfs_list_for_each (tmp, &the_lnet.ln_remote_nets) {
-                rnet = cfs_list_entry(tmp, lnet_remotenet_t, lrn_list);
+       rn_list = lnet_net2rnethash(net);
+       cfs_list_for_each(tmp, rn_list) {
+               rnet = cfs_list_entry(tmp, lnet_remotenet_t, lrn_list);
 
-                if (rnet->lrn_net == net)
-                        return rnet;
-        }
-        return NULL;
+               if (rnet->lrn_net == net)
+                       return rnet;
+       }
+       return NULL;
 }
 
 static void lnet_shuffle_seed(void)
@@ -299,7 +301,8 @@ lnet_add_route_to_rnet (lnet_remotenet_t *rnet, lnet_route_t *route)
 }
 
 int
-lnet_add_route (__u32 net, unsigned int hops, lnet_nid_t gateway)
+lnet_add_route(__u32 net, unsigned int hops, lnet_nid_t gateway,
+              unsigned int priority)
 {
         cfs_list_t          *e;
         lnet_remotenet_t    *rnet;
@@ -309,8 +312,8 @@ lnet_add_route (__u32 net, unsigned int hops, lnet_nid_t gateway)
         int                  add_route;
         int                  rc;
 
-        CDEBUG(D_NET, "Add route: net %s hops %u gw %s\n",
-               libcfs_net2str(net), hops, libcfs_nid2str(gateway));
+       CDEBUG(D_NET, "Add route: net %s hops %u priority %u gw %s\n",
+              libcfs_net2str(net), hops, priority, libcfs_nid2str(gateway));
 
         if (gateway == LNET_NID_ANY ||
             LNET_NETTYP(LNET_NIDNET(gateway)) == LOLND ||
@@ -340,6 +343,7 @@ lnet_add_route (__u32 net, unsigned int hops, lnet_nid_t gateway)
         rnet->lrn_net = net;
         route->lr_hops = hops;
        route->lr_net = net;
+       route->lr_priority = priority;
 
        lnet_net_lock(LNET_LOCK_EX);
 
@@ -365,7 +369,7 @@ lnet_add_route (__u32 net, unsigned int hops, lnet_nid_t gateway)
         rnet2 = lnet_find_net_locked(net);
         if (rnet2 == NULL) {
                 /* new network */
-                cfs_list_add_tail(&rnet->lrn_list, &the_lnet.ln_remote_nets);
+               cfs_list_add_tail(&rnet->lrn_list, lnet_net2rnethash(net));
                 rnet2 = rnet;
         }
 
@@ -419,39 +423,47 @@ lnet_check_routes(void)
        cfs_list_t              *e1;
        cfs_list_t              *e2;
        int                     cpt;
+       cfs_list_t              *rn_list;
+       int                     i;
 
        cpt = lnet_net_lock_current();
 
-       cfs_list_for_each(e1, &the_lnet.ln_remote_nets) {
-               rnet = cfs_list_entry(e1, lnet_remotenet_t, lrn_list);
+       for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
+               rn_list = &the_lnet.ln_remote_nets_hash[i];
+               cfs_list_for_each(e1, rn_list) {
+                       rnet = cfs_list_entry(e1, lnet_remotenet_t, lrn_list);
 
-               route2 = NULL;
-               cfs_list_for_each(e2, &rnet->lrn_routes) {
-                       lnet_nid_t      nid1;
-                       lnet_nid_t      nid2;
-                       int             net;
+                       route2 = NULL;
+                       cfs_list_for_each(e2, &rnet->lrn_routes) {
+                               lnet_nid_t      nid1;
+                               lnet_nid_t      nid2;
+                               int             net;
 
-                       route = cfs_list_entry(e2, lnet_route_t, lr_list);
+                               route = cfs_list_entry(e2, lnet_route_t,
+                                                      lr_list);
 
-                       if (route2 == NULL) {
-                               route2 = route;
-                               continue;
-                       }
+                               if (route2 == NULL) {
+                                       route2 = route;
+                                       continue;
+                               }
 
-                       if (route->lr_gateway->lp_ni ==
-                           route2->lr_gateway->lp_ni)
-                               continue;
+                               if (route->lr_gateway->lp_ni ==
+                                   route2->lr_gateway->lp_ni)
+                                       continue;
 
-                       nid1 = route->lr_gateway->lp_nid;
-                       nid2 = route2->lr_gateway->lp_nid;
-                       net = rnet->lrn_net;
+                               nid1 = route->lr_gateway->lp_nid;
+                               nid2 = route2->lr_gateway->lp_nid;
+                               net = rnet->lrn_net;
 
-                       lnet_net_unlock(cpt);
+                               lnet_net_unlock(cpt);
 
-                       CERROR("Routes to %s via %s and %s not supported\n",
-                              libcfs_net2str(net), libcfs_nid2str(nid1),
-                              libcfs_nid2str(nid2));
-                       return -EINVAL;
+                               CERROR("Routes to %s via %s and %s not "
+                                      "supported\n",
+                                      libcfs_net2str(net),
+                                      libcfs_nid2str(nid1),
+                                      libcfs_nid2str(nid2));
+                               return -EINVAL;
+                       }
                }
        }
 
@@ -463,30 +475,36 @@ int
 lnet_del_route(__u32 net, lnet_nid_t gw_nid)
 {
        struct lnet_peer        *gateway;
-        lnet_remotenet_t    *rnet;
-        lnet_route_t        *route;
-        cfs_list_t          *e1;
-        cfs_list_t          *e2;
-        int                  rc = -ENOENT;
+       lnet_remotenet_t        *rnet;
+       lnet_route_t            *route;
+       cfs_list_t              *e1;
+       cfs_list_t              *e2;
+       int                     rc = -ENOENT;
+       cfs_list_t              *rn_list;
+       int                     idx = 0;
 
-        CDEBUG(D_NET, "Del route: net %s : gw %s\n",
-               libcfs_net2str(net), libcfs_nid2str(gw_nid));
+       CDEBUG(D_NET, "Del route: net %s : gw %s\n",
+              libcfs_net2str(net), libcfs_nid2str(gw_nid));
 
-        /* NB Caller may specify either all routes via the given gateway
-         * or a specific route entry actual NIDs) */
+       /* NB Caller may specify either all routes via the given gateway
+        * or a specific route entry actual NIDs) */
 
- again:
        lnet_net_lock(LNET_LOCK_EX);
+       if (net == LNET_NIDNET(LNET_NID_ANY))
+               rn_list = &the_lnet.ln_remote_nets_hash[0];
+       else
+               rn_list = lnet_net2rnethash(net);
 
-        cfs_list_for_each (e1, &the_lnet.ln_remote_nets) {
-                rnet = cfs_list_entry(e1, lnet_remotenet_t, lrn_list);
+ again:
+       cfs_list_for_each(e1, rn_list) {
+               rnet = cfs_list_entry(e1, lnet_remotenet_t, lrn_list);
 
-                if (!(net == LNET_NIDNET(LNET_NID_ANY) ||
-                      net == rnet->lrn_net))
-                        continue;
+               if (!(net == LNET_NIDNET(LNET_NID_ANY) ||
+                       net == rnet->lrn_net))
+                       continue;
 
-                cfs_list_for_each (e2, &rnet->lrn_routes) {
-                        route = cfs_list_entry(e2, lnet_route_t, lr_list);
+               cfs_list_for_each(e2, &rnet->lrn_routes) {
+                       route = cfs_list_entry(e2, lnet_route_t, lr_list);
 
                        gateway = route->lr_gateway;
                        if (!(gw_nid == LNET_NID_ANY ||
@@ -495,29 +513,36 @@ lnet_del_route(__u32 net, lnet_nid_t gw_nid)
 
                        cfs_list_del(&route->lr_list);
                        cfs_list_del(&route->lr_gwlist);
-                        the_lnet.ln_remote_nets_version++;
+                       the_lnet.ln_remote_nets_version++;
 
-                        if (cfs_list_empty(&rnet->lrn_routes))
-                                cfs_list_del(&rnet->lrn_list);
-                        else
-                                rnet = NULL;
+                       if (cfs_list_empty(&rnet->lrn_routes))
+                               cfs_list_del(&rnet->lrn_list);
+                       else
+                               rnet = NULL;
 
                        lnet_rtr_decref_locked(gateway);
                        lnet_peer_decref_locked(gateway);
 
                        lnet_net_unlock(LNET_LOCK_EX);
 
-                        LIBCFS_FREE(route, sizeof (*route));
+                       LIBCFS_FREE(route, sizeof(*route));
 
-                        if (rnet != NULL)
-                                LIBCFS_FREE(rnet, sizeof(*rnet));
+                       if (rnet != NULL)
+                               LIBCFS_FREE(rnet, sizeof(*rnet));
 
-                        rc = 0;
-                        goto again;
-                }
-        }
+                       rc = 0;
+                       lnet_net_lock(LNET_LOCK_EX);
+                       goto again;
+               }
+       }
 
+       if (net == LNET_NIDNET(LNET_NID_ANY) &&
+           ++idx < LNET_REMOTE_NETS_HASH_SIZE) {
+               rn_list = &the_lnet.ln_remote_nets_hash[idx];
+               goto again;
+       }
        lnet_net_unlock(LNET_LOCK_EX);
+
        return rc;
 }
 
@@ -529,29 +554,36 @@ lnet_destroy_routes (void)
 
 int
 lnet_get_route(int idx, __u32 *net, __u32 *hops,
-              lnet_nid_t *gateway, __u32 *alive)
+              lnet_nid_t *gateway, __u32 *alive, __u32 *priority)
 {
        cfs_list_t              *e1;
        cfs_list_t              *e2;
        lnet_remotenet_t        *rnet;
        lnet_route_t            *route;
        int                     cpt;
+       int                     i;
+       cfs_list_t              *rn_list;
 
        cpt = lnet_net_lock_current();
 
-        cfs_list_for_each (e1, &the_lnet.ln_remote_nets) {
-                rnet = cfs_list_entry(e1, lnet_remotenet_t, lrn_list);
-
-                cfs_list_for_each (e2, &rnet->lrn_routes) {
-                        route = cfs_list_entry(e2, lnet_route_t, lr_list);
-
-                        if (idx-- == 0) {
-                                *net     = rnet->lrn_net;
-                                *hops    = route->lr_hops;
-                                *gateway = route->lr_gateway->lp_nid;
-                                *alive   = route->lr_gateway->lp_alive;
-                               lnet_net_unlock(cpt);
-                               return 0;
+       for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
+               rn_list = &the_lnet.ln_remote_nets_hash[i];
+               cfs_list_for_each(e1, rn_list) {
+                       rnet = cfs_list_entry(e1, lnet_remotenet_t, lrn_list);
+
+                       cfs_list_for_each(e2, &rnet->lrn_routes) {
+                               route = cfs_list_entry(e2, lnet_route_t,
+                                                      lr_list);
+
+                               if (idx-- == 0) {
+                                       *net      = rnet->lrn_net;
+                                       *hops     = route->lr_hops;
+                                       *priority = route->lr_priority;
+                                       *gateway  = route->lr_gateway->lp_nid;
+                                       *alive    = route->lr_gateway->lp_alive;
+                                       lnet_net_unlock(cpt);
+                                       return 0;
+                               }
                        }
                }
        }
@@ -1049,7 +1081,7 @@ lnet_router_checker_start(void)
                 return 0;
 
 #ifdef __KERNEL__
-        cfs_sema_init(&the_lnet.ln_rc_signal, 0);
+       sema_init(&the_lnet.ln_rc_signal, 0);
         /* EQ size doesn't matter; the callback is guaranteed to get every
          * event */
        eqsz = 0;
@@ -1066,16 +1098,17 @@ lnet_router_checker_start(void)
 
         the_lnet.ln_rc_state = LNET_RC_STATE_RUNNING;
 #ifdef __KERNEL__
-        rc = cfs_create_thread(lnet_router_checker, NULL, 0);
-        if (rc < 0) {
-                CERROR("Can't start router checker thread: %d\n", rc);
-                /* block until event callback signals exit */
-                cfs_down(&the_lnet.ln_rc_signal);
-                rc = LNetEQFree(the_lnet.ln_rc_eqh);
-                LASSERT (rc == 0);
-                the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
-                return -ENOMEM;
-        }
+       rc = PTR_ERR(kthread_run(lnet_router_checker,
+                                NULL, "router_checker"));
+       if (IS_ERR_VALUE(rc)) {
+               CERROR("Can't start router checker thread: %d\n", rc);
+               /* block until event callback signals exit */
+               down(&the_lnet.ln_rc_signal);
+               rc = LNetEQFree(the_lnet.ln_rc_eqh);
+               LASSERT(rc == 0);
+               the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
+               return -ENOMEM;
+       }
 #endif
 
         if (check_routers_before_use) {
@@ -1101,7 +1134,7 @@ lnet_router_checker_stop (void)
 
 #ifdef __KERNEL__
        /* block until event callback signals exit */
-       cfs_down(&the_lnet.ln_rc_signal);
+       down(&the_lnet.ln_rc_signal);
 #else
        lnet_router_checker();
 #endif
@@ -1201,7 +1234,6 @@ lnet_router_checker(void *arg)
         lnet_peer_t       *rtr;
         cfs_list_t        *entry;
 
-        cfs_daemonize("router_checker");
         cfs_block_allsigs();
 
         LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
@@ -1256,7 +1288,7 @@ rescan:
        lnet_prune_rc_data(1); /* wait for UNLINK */
 
        the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
-       cfs_up(&the_lnet.ln_rc_signal);
+       up(&the_lnet.ln_rc_signal);
        /* The unlink event callback will signal final completion */
        return 0;
 }
@@ -1267,7 +1299,7 @@ lnet_destroy_rtrbuf(lnet_rtrbuf_t *rb, int npages)
         int sz = offsetof(lnet_rtrbuf_t, rb_kiov[npages]);
 
         while (--npages >= 0)
-                cfs_free_page(rb->rb_kiov[npages].kiov_page);
+               __free_page(rb->rb_kiov[npages].kiov_page);
 
         LIBCFS_FREE(rb, sz);
 }
@@ -1289,16 +1321,16 @@ lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp, int cpt)
 
        for (i = 0; i < npages; i++) {
                page = cfs_page_cpt_alloc(lnet_cpt_table(), cpt,
-                                         CFS_ALLOC_ZERO | CFS_ALLOC_STD);
+                                         __GFP_ZERO | GFP_IOFS);
                 if (page == NULL) {
                         while (--i >= 0)
-                                cfs_free_page(rb->rb_kiov[i].kiov_page);
+                               __free_page(rb->rb_kiov[i].kiov_page);
 
                         LIBCFS_FREE(rb, sz);
                         return NULL;
                 }
 
-                rb->rb_kiov[i].kiov_len = CFS_PAGE_SIZE;
+               rb->rb_kiov[i].kiov_len = PAGE_CACHE_SIZE;
                 rb->rb_kiov[i].kiov_offset = 0;
                 rb->rb_kiov[i].kiov_page = page;
         }
@@ -1460,7 +1492,7 @@ int
 lnet_rtrpools_alloc(int im_a_router)
 {
        lnet_rtrbufpool_t *rtrp;
-       int     large_pages = (LNET_MTU + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
+       int     large_pages = (LNET_MTU + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
        int     small_pages = 1;
        int     nrb_tiny;
        int     nrb_small;