Whamcloud - gitweb
LU-2466 lnet: Use Hash Table for Remote Route List
authorDoug Oucharek <doug.s.oucharek@intel.com>
Sat, 12 Jan 2013 01:09:15 +0000 (17:09 -0800)
committerOleg Drokin <green@whamcloud.com>
Thu, 31 Jan 2013 21:09:47 +0000 (16:09 -0500)
This change updates the remote route list (the_lnet.ln_remote_nets)
to be a hash table (the_lnet.ln_remote_nets_hash) to speed up
access when dealing with a large number of routes (over 1000).

Signed-off-by: Doug Oucharek <doug.s.oucharek@intel.com>
Change-Id: I2dac8075ff38d29bd38905b5b1b002603c5a56b6
Reviewed-on: http://review.whamcloud.com/5023
Reviewed-by: Liang Zhen <liang.zhen@intel.com>
Tested-by: Hudson
Reviewed-by: John Hammond <john.hammond@intel.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/include/lnet/lib-lnet.h
lnet/include/lnet/lib-types.h
lnet/lnet/api-ni.c
lnet/lnet/lib-move.c
lnet/lnet/router.c
lnet/lnet/router_proc.c

index d94c266..bc03b4d 100644 (file)
@@ -689,6 +689,14 @@ lnet_nid2peerhash(lnet_nid_t nid)
        return cfs_hash_long(nid, LNET_PEER_HASH_BITS);
 }
 
+static inline cfs_list_t *
+lnet_net2rnethash(__u32 net)
+{
+       return &the_lnet.ln_remote_nets_hash[(LNET_NETNUM(net) +
+               LNET_NETTYP(net)) &
+               ((1U << the_lnet.ln_remote_nets_hbits) - 1)];
+}
+
 extern lnd_t the_lolnd;
 
 #ifndef __KERNEL__
index 378a3a4..4d4ca7c 100644 (file)
@@ -532,8 +532,12 @@ typedef struct {
        unsigned int            lr_hops;        /* how far I am */
 } lnet_route_t;
 
+#define LNET_REMOTE_NETS_HASH_DEFAULT  (1U << 7)
+#define LNET_REMOTE_NETS_HASH_MAX      (1U << 16)
+#define LNET_REMOTE_NETS_HASH_SIZE     (1 << the_lnet.ln_remote_nets_hbits)
+
 typedef struct {
-        cfs_list_t              lrn_list;       /* chain on ln_remote_nets */
+        cfs_list_t              lrn_list;       /* chain on ln_remote_nets_hash */
         cfs_list_t              lrn_routes;     /* routes to me */
         __u32                   lrn_net;        /* my net number */
 } lnet_remotenet_t;
@@ -732,6 +736,8 @@ typedef struct
        pthread_mutex_t                 ln_eq_wait_lock;
 # endif
 #endif
+       unsigned int                    ln_remote_nets_hbits;
+
        /* protect NI, peer table, credits, routers, rtrbuf... */
        struct cfs_percpt_lock          *ln_net_lock;
        /* percpt message containers for active/finalizing/freed message */
@@ -751,7 +757,7 @@ typedef struct
        lnet_ni_t                       *ln_eq_waitni;
 
        /* remote networks with routes to them */
-       cfs_list_t                      ln_remote_nets;
+       cfs_list_t                      *ln_remote_nets_hash;
        /* validity stamp */
        __u64                           ln_remote_nets_version;
        /* list of all known routers */
index 005522d..8721253 100644 (file)
@@ -36,6 +36,9 @@
 
 #define DEBUG_SUBSYSTEM S_LNET
 #include <lnet/lib-lnet.h>
+#ifdef __KERNEL__
+#include <linux/log2.h>
+#endif
 
 #ifdef __KERNEL__
 #define D_LNI D_CONSOLE
@@ -60,6 +63,10 @@ static char *routes = "";
 CFS_MODULE_PARM(routes, "s", charp, 0444,
                 "routes to non-local networks");
 
+static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
+CFS_MODULE_PARM(rnet_htable_size, "i", int, 0444,
+               "size of remote network hash table");
+
 char *
 lnet_get_routes(void)
 {
@@ -208,6 +215,43 @@ void lnet_fini_locks(void)
 #endif
 
 static int
+lnet_create_remote_nets_table(void)
+{
+       int             i;
+       cfs_list_t      *hash;
+
+       LASSERT(the_lnet.ln_remote_nets_hash == NULL);
+       LASSERT(the_lnet.ln_remote_nets_hbits > 0);
+       LIBCFS_ALLOC(hash, LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash));
+       if (hash == NULL) {
+               CERROR("Failed to create remote nets hash table\n");
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
+               CFS_INIT_LIST_HEAD(&hash[i]);
+       the_lnet.ln_remote_nets_hash = hash;
+       return 0;
+}
+
+static void
+lnet_destroy_remote_nets_table(void)
+{
+       int             i;
+       cfs_list_t      *hash;
+
+       if (the_lnet.ln_remote_nets_hash == NULL)
+               return;
+
+       for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
+               LASSERT(cfs_list_empty(&the_lnet.ln_remote_nets_hash[i]));
+
+       LIBCFS_FREE(the_lnet.ln_remote_nets_hash,
+                   LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash));
+       the_lnet.ln_remote_nets_hash = NULL;
+}
+
+static int
 lnet_create_locks(void)
 {
        lnet_init_locks();
@@ -724,9 +768,12 @@ lnet_prepare(lnet_pid_t requested_pid)
        CFS_INIT_LIST_HEAD(&the_lnet.ln_nis);
        CFS_INIT_LIST_HEAD(&the_lnet.ln_nis_cpt);
        CFS_INIT_LIST_HEAD(&the_lnet.ln_nis_zombie);
-       CFS_INIT_LIST_HEAD(&the_lnet.ln_remote_nets);
        CFS_INIT_LIST_HEAD(&the_lnet.ln_routers);
 
+       rc = lnet_create_remote_nets_table();
+       if (rc != 0)
+               goto failed;
+
        the_lnet.ln_interface_cookie = lnet_create_interface_cookie();
 
        the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
@@ -816,6 +863,7 @@ lnet_unprepare (void)
                cfs_percpt_free(the_lnet.ln_counters);
                the_lnet.ln_counters = NULL;
        }
+       lnet_destroy_remote_nets_table();
 
        return 0;
 }
@@ -1022,7 +1070,6 @@ lnet_shutdown_lndnis (void)
        LASSERT(!the_lnet.ln_shutdown);
        LASSERT(the_lnet.ln_refcount == 0);
        LASSERT(cfs_list_empty(&the_lnet.ln_nis_zombie));
-       LASSERT(cfs_list_empty(&the_lnet.ln_remote_nets));
 
        lnet_net_lock(LNET_LOCK_EX);
        the_lnet.ln_shutdown = 1;       /* flag shutdown */
@@ -1348,10 +1395,22 @@ LNetInit(void)
        CFS_INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
 
 #ifdef __KERNEL__
+       /* The hash table size is the number of bits it takes to express the set
+        * ln_num_routes, minus 1 (better to under estimate than over so we
+        * don't waste memory). */
+       if (rnet_htable_size <= 0)
+               rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
+       else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
+               rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
+       the_lnet.ln_remote_nets_hbits = max_t(int, 1,
+                                          order_base_2(rnet_htable_size) - 1);
+
         /* All LNDs apart from the LOLND are in separate modules.  They
          * register themselves when their module loads, and unregister
          * themselves when their module is unloaded. */
 #else
+       the_lnet.ln_remote_nets_hbits = 8;
+
         /* Register LNDs
          * NB the order here determines default 'networks=' order */
 # ifdef HAVE_LIBPTHREAD
index 2061107..be082c1 100644 (file)
@@ -2413,6 +2413,7 @@ LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
        int                     hops;
        int                     cpt;
        __u32                   order = 2;
+       cfs_list_t              *rn_list;
 
         /* if !local_nid_dist_zero, I don't return a distance of 0 ever
          * (when lustre sees a distance of 0, it substitutes 0@lo), so I
@@ -2447,13 +2448,14 @@ LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
                         if (orderp != NULL)
                                 *orderp = order;
                        lnet_net_unlock(cpt);
-                        return 1;
-                }
+                       return 1;
+               }
 
-                order++;
-        }
+               order++;
+       }
 
-        cfs_list_for_each (e, &the_lnet.ln_remote_nets) {
+       rn_list = lnet_net2rnethash(dstnet);
+       cfs_list_for_each(e, rn_list) {
                 rnet = cfs_list_entry(e, lnet_remotenet_t, lrn_list);
 
                 if (rnet->lrn_net == dstnet) {
index da0373b..80c0a59 100644 (file)
@@ -227,18 +227,20 @@ lnet_rtr_decref_locked(lnet_peer_t *lp)
 lnet_remotenet_t *
 lnet_find_net_locked (__u32 net)
 {
-        lnet_remotenet_t *rnet;
-        cfs_list_t       *tmp;
+       lnet_remotenet_t        *rnet;
+       cfs_list_t              *tmp;
+       cfs_list_t              *rn_list;
 
-        LASSERT (!the_lnet.ln_shutdown);
+       LASSERT(!the_lnet.ln_shutdown);
 
-        cfs_list_for_each (tmp, &the_lnet.ln_remote_nets) {
-                rnet = cfs_list_entry(tmp, lnet_remotenet_t, lrn_list);
+       rn_list = lnet_net2rnethash(net);
+       cfs_list_for_each(tmp, rn_list) {
+               rnet = cfs_list_entry(tmp, lnet_remotenet_t, lrn_list);
 
-                if (rnet->lrn_net == net)
-                        return rnet;
-        }
-        return NULL;
+               if (rnet->lrn_net == net)
+                       return rnet;
+       }
+       return NULL;
 }
 
 static void lnet_shuffle_seed(void)
@@ -365,7 +367,7 @@ lnet_add_route (__u32 net, unsigned int hops, lnet_nid_t gateway)
         rnet2 = lnet_find_net_locked(net);
         if (rnet2 == NULL) {
                 /* new network */
-                cfs_list_add_tail(&rnet->lrn_list, &the_lnet.ln_remote_nets);
+               cfs_list_add_tail(&rnet->lrn_list, lnet_net2rnethash(net));
                 rnet2 = rnet;
         }
 
@@ -419,39 +421,47 @@ lnet_check_routes(void)
        cfs_list_t              *e1;
        cfs_list_t              *e2;
        int                     cpt;
+       cfs_list_t              *rn_list;
+       int                     i;
 
        cpt = lnet_net_lock_current();
 
-       cfs_list_for_each(e1, &the_lnet.ln_remote_nets) {
-               rnet = cfs_list_entry(e1, lnet_remotenet_t, lrn_list);
+       for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
+               rn_list = &the_lnet.ln_remote_nets_hash[i];
+               cfs_list_for_each(e1, rn_list) {
+                       rnet = cfs_list_entry(e1, lnet_remotenet_t, lrn_list);
 
-               route2 = NULL;
-               cfs_list_for_each(e2, &rnet->lrn_routes) {
-                       lnet_nid_t      nid1;
-                       lnet_nid_t      nid2;
-                       int             net;
+                       route2 = NULL;
+                       cfs_list_for_each(e2, &rnet->lrn_routes) {
+                               lnet_nid_t      nid1;
+                               lnet_nid_t      nid2;
+                               int             net;
 
-                       route = cfs_list_entry(e2, lnet_route_t, lr_list);
+                               route = cfs_list_entry(e2, lnet_route_t,
+                                                      lr_list);
 
-                       if (route2 == NULL) {
-                               route2 = route;
-                               continue;
-                       }
+                               if (route2 == NULL) {
+                                       route2 = route;
+                                       continue;
+                               }
 
-                       if (route->lr_gateway->lp_ni ==
-                           route2->lr_gateway->lp_ni)
-                               continue;
+                               if (route->lr_gateway->lp_ni ==
+                                   route2->lr_gateway->lp_ni)
+                                       continue;
 
-                       nid1 = route->lr_gateway->lp_nid;
-                       nid2 = route2->lr_gateway->lp_nid;
-                       net = rnet->lrn_net;
+                               nid1 = route->lr_gateway->lp_nid;
+                               nid2 = route2->lr_gateway->lp_nid;
+                               net = rnet->lrn_net;
 
-                       lnet_net_unlock(cpt);
+                               lnet_net_unlock(cpt);
 
-                       CERROR("Routes to %s via %s and %s not supported\n",
-                              libcfs_net2str(net), libcfs_nid2str(nid1),
-                              libcfs_nid2str(nid2));
-                       return -EINVAL;
+                               CERROR("Routes to %s via %s and %s not "
+                                      "supported\n",
+                                      libcfs_net2str(net),
+                                      libcfs_nid2str(nid1),
+                                      libcfs_nid2str(nid2));
+                               return -EINVAL;
+                       }
                }
        }
 
@@ -463,30 +473,36 @@ int
 lnet_del_route(__u32 net, lnet_nid_t gw_nid)
 {
        struct lnet_peer        *gateway;
-        lnet_remotenet_t    *rnet;
-        lnet_route_t        *route;
-        cfs_list_t          *e1;
-        cfs_list_t          *e2;
-        int                  rc = -ENOENT;
+       lnet_remotenet_t        *rnet;
+       lnet_route_t            *route;
+       cfs_list_t              *e1;
+       cfs_list_t              *e2;
+       int                     rc = -ENOENT;
+       cfs_list_t              *rn_list;
+       int                     idx = 0;
 
-        CDEBUG(D_NET, "Del route: net %s : gw %s\n",
-               libcfs_net2str(net), libcfs_nid2str(gw_nid));
+       CDEBUG(D_NET, "Del route: net %s : gw %s\n",
+              libcfs_net2str(net), libcfs_nid2str(gw_nid));
 
-        /* NB Caller may specify either all routes via the given gateway
-         * or a specific route entry actual NIDs) */
+       /* NB Caller may specify either all routes via the given gateway
+        * or a specific route entry actual NIDs) */
 
- again:
        lnet_net_lock(LNET_LOCK_EX);
+       if (net == LNET_NIDNET(LNET_NID_ANY))
+               rn_list = &the_lnet.ln_remote_nets_hash[0];
+       else
+               rn_list = lnet_net2rnethash(net);
 
-        cfs_list_for_each (e1, &the_lnet.ln_remote_nets) {
-                rnet = cfs_list_entry(e1, lnet_remotenet_t, lrn_list);
+ again:
+       cfs_list_for_each(e1, rn_list) {
+               rnet = cfs_list_entry(e1, lnet_remotenet_t, lrn_list);
 
-                if (!(net == LNET_NIDNET(LNET_NID_ANY) ||
-                      net == rnet->lrn_net))
-                        continue;
+               if (!(net == LNET_NIDNET(LNET_NID_ANY) ||
+                       net == rnet->lrn_net))
+                       continue;
 
-                cfs_list_for_each (e2, &rnet->lrn_routes) {
-                        route = cfs_list_entry(e2, lnet_route_t, lr_list);
+               cfs_list_for_each(e2, &rnet->lrn_routes) {
+                       route = cfs_list_entry(e2, lnet_route_t, lr_list);
 
                        gateway = route->lr_gateway;
                        if (!(gw_nid == LNET_NID_ANY ||
@@ -495,29 +511,36 @@ lnet_del_route(__u32 net, lnet_nid_t gw_nid)
 
                        cfs_list_del(&route->lr_list);
                        cfs_list_del(&route->lr_gwlist);
-                        the_lnet.ln_remote_nets_version++;
+                       the_lnet.ln_remote_nets_version++;
 
-                        if (cfs_list_empty(&rnet->lrn_routes))
-                                cfs_list_del(&rnet->lrn_list);
-                        else
-                                rnet = NULL;
+                       if (cfs_list_empty(&rnet->lrn_routes))
+                               cfs_list_del(&rnet->lrn_list);
+                       else
+                               rnet = NULL;
 
                        lnet_rtr_decref_locked(gateway);
                        lnet_peer_decref_locked(gateway);
 
                        lnet_net_unlock(LNET_LOCK_EX);
 
-                        LIBCFS_FREE(route, sizeof (*route));
+                       LIBCFS_FREE(route, sizeof(*route));
 
-                        if (rnet != NULL)
-                                LIBCFS_FREE(rnet, sizeof(*rnet));
+                       if (rnet != NULL)
+                               LIBCFS_FREE(rnet, sizeof(*rnet));
 
-                        rc = 0;
-                        goto again;
-                }
-        }
+                       rc = 0;
+                       lnet_net_lock(LNET_LOCK_EX);
+                       goto again;
+               }
+       }
 
+       if (net == LNET_NIDNET(LNET_NID_ANY) &&
+           ++idx < LNET_REMOTE_NETS_HASH_SIZE) {
+               rn_list = &the_lnet.ln_remote_nets_hash[idx];
+               goto again;
+       }
        lnet_net_unlock(LNET_LOCK_EX);
+
        return rc;
 }
 
@@ -536,22 +559,28 @@ lnet_get_route(int idx, __u32 *net, __u32 *hops,
        lnet_remotenet_t        *rnet;
        lnet_route_t            *route;
        int                     cpt;
+       int                     i;
+       cfs_list_t              *rn_list;
 
        cpt = lnet_net_lock_current();
 
-        cfs_list_for_each (e1, &the_lnet.ln_remote_nets) {
-                rnet = cfs_list_entry(e1, lnet_remotenet_t, lrn_list);
-
-                cfs_list_for_each (e2, &rnet->lrn_routes) {
-                        route = cfs_list_entry(e2, lnet_route_t, lr_list);
-
-                        if (idx-- == 0) {
-                                *net     = rnet->lrn_net;
-                                *hops    = route->lr_hops;
-                                *gateway = route->lr_gateway->lp_nid;
-                                *alive   = route->lr_gateway->lp_alive;
-                               lnet_net_unlock(cpt);
-                               return 0;
+       for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
+               rn_list = &the_lnet.ln_remote_nets_hash[i];
+               cfs_list_for_each(e1, rn_list) {
+                       rnet = cfs_list_entry(e1, lnet_remotenet_t, lrn_list);
+
+                       cfs_list_for_each(e2, &rnet->lrn_routes) {
+                               route = cfs_list_entry(e2, lnet_route_t,
+                                                      lr_list);
+
+                               if (idx-- == 0) {
+                                       *net     = rnet->lrn_net;
+                                       *hops    = route->lr_hops;
+                                       *gateway = route->lr_gateway->lp_nid;
+                                       *alive   = route->lr_gateway->lp_alive;
+                                       lnet_net_unlock(cpt);
+                                       return 0;
+                               }
                        }
                }
        }
index 8b88987..7b16298 100644 (file)
@@ -199,37 +199,45 @@ int LL_PROC_PROTO(proc_lnet_routes)
                lnet_route_t            *route = NULL;
                lnet_remotenet_t        *rnet  = NULL;
                int                     skip  = off - 1;
+               cfs_list_t              *rn_list;
+               int                     i;
 
                lnet_net_lock(0);
 
                if (ver != LNET_PROC_VERSION(the_lnet.ln_remote_nets_version)) {
                        lnet_net_unlock(0);
-                        LIBCFS_FREE(tmpstr, tmpsiz);
-                        return -ESTALE;
-                }
+                       LIBCFS_FREE(tmpstr, tmpsiz);
+                       return -ESTALE;
+               }
 
-                n = the_lnet.ln_remote_nets.next;
+               for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE && route == NULL;
+                    i++) {
+                       rn_list = &the_lnet.ln_remote_nets_hash[i];
 
-                while (n != &the_lnet.ln_remote_nets && route == NULL) {
-                        rnet = cfs_list_entry(n, lnet_remotenet_t, lrn_list);
+                       n = rn_list->next;
 
-                        r = rnet->lrn_routes.next;
+                       while (n != rn_list && route == NULL) {
+                               rnet = cfs_list_entry(n, lnet_remotenet_t,
+                                                     lrn_list);
 
-                        while (r != &rnet->lrn_routes) {
-                                lnet_route_t *re =
-                                        cfs_list_entry(r, lnet_route_t,
-                                                       lr_list);
-                                if (skip == 0) {
-                                        route = re;
-                                        break;
-                                }
+                               r = rnet->lrn_routes.next;
 
-                                skip--;
-                                r = r->next;
-                        }
+                               while (r != &rnet->lrn_routes) {
+                                       lnet_route_t *re =
+                                               cfs_list_entry(r, lnet_route_t,
+                                                              lr_list);
+                                       if (skip == 0) {
+                                               route = re;
+                                               break;
+                                       }
 
-                        n = n->next;
-                }
+                                       skip--;
+                                       r = r->next;
+                               }
+
+                               n = n->next;
+                       }
+               }
 
                 if (route != NULL) {
                         __u32        net   = rnet->lrn_net;