Whamcloud - gitweb
i=liang,b=15332,b=21103:
authorisaac <isaac>
Tue, 1 Dec 2009 15:10:57 +0000 (15:10 +0000)
committerisaac <isaac>
Tue, 1 Dec 2009 15:10:57 +0000 (15:10 +0000)
- LNet router shuffler.

lnet/ChangeLog
lnet/include/lnet/lib-lnet.h
lnet/lnet/lib-md.c
lnet/lnet/router.c
lnet/lnet/router_proc.c

index ae041fa..0bc3834 100644 (file)
@@ -19,6 +19,10 @@ Details    :
 
 Severity   : enhancement
 Bugzilla   : 15332
+Description: LNet router shuffler.
+
+Severity   : enhancement
+Bugzilla   : 15332
 Description: LNet fine grain routing support.
 
 Severity   : normal
index 5332664..69b4829 100644 (file)
@@ -674,9 +674,6 @@ void lnet_get_tunables(void);
 int lnet_peers_start_down(void);
 int lnet_peer_buffer_credits(lnet_ni_t *ni);
 
-extern int router_ping_timeout;
-extern int dead_router_check_interval;
-extern int live_router_check_interval;
 int lnet_router_checker_start(void);
 void lnet_router_checker_stop(void);
 void lnet_swap_pinginfo(lnet_ping_info_t *info);
index 6e64ad2..1d5eb7a 100644 (file)
@@ -222,7 +222,7 @@ lnet_md_validate(lnet_md_t *umd)
 
         if ((umd->options & (LNET_MD_KIOV | LNET_MD_IOVEC)) != 0 &&
             umd->length > LNET_MAX_IOV) {
-                CERROR("Invalid option: too many fragments %d, %d max\n",
+                CERROR("Invalid option: too many fragments %u, %d max\n",
                        umd->length, LNET_MAX_IOV);
                 return -EINVAL;
         }
index a8f816c..9a231e4 100644 (file)
@@ -81,15 +81,15 @@ static int avoid_asym_router_failure = 0;
 CFS_MODULE_PARM(avoid_asym_router_failure, "i", int, 0444,
                 "Avoid asymmetrical failures: reserved, use at your own risk");
 
-int dead_router_check_interval = 0;
+static int dead_router_check_interval = 0;
 CFS_MODULE_PARM(dead_router_check_interval, "i", int, 0444,
                 "Seconds between dead router health checks (<= 0 to disable)");
 
-int live_router_check_interval = 0;
+static int live_router_check_interval = 0;
 CFS_MODULE_PARM(live_router_check_interval, "i", int, 0444,
                 "Seconds between live router health checks (<= 0 to disable)");
 
-int router_ping_timeout = 50;
+static int router_ping_timeout = 50;
 CFS_MODULE_PARM(router_ping_timeout, "i", int, 0444,
                 "Seconds to wait for the reply to a router health query");
 
@@ -235,6 +235,34 @@ lnet_find_net_locked (__u32 net)
         return NULL;
 }
 
+/* NB expects LNET_LOCK held */
+void
+lnet_add_route_to_rnet (lnet_remotenet_t *rnet, lnet_route_t *route)
+{
+        unsigned int      len = 0;
+        unsigned int      offset = 0;
+        struct list_head *e;
+        extern __u64 lnet_create_interface_cookie(void);
+
+        list_for_each (e, &rnet->lrn_routes) {
+                len++;
+        }
+
+        /* FIXME use Lustre random function when it's moved to libcfs.
+         * See bug 18751 */
+        /* len+1 positions to add a new entry, also prevents division by 0 */
+        offset = ((unsigned int) lnet_create_interface_cookie()) % (len + 1);
+        list_for_each (e, &rnet->lrn_routes) {
+                if (offset == 0)
+                        break;
+                offset--;
+        }
+        list_add(&route->lr_list, e);
+
+        the_lnet.ln_remote_nets_version++;
+        lnet_rtr_addref_locked(route->lr_gateway);
+}
+
 int
 lnet_add_route (__u32 net, unsigned int hops, lnet_nid_t gateway)
 {
@@ -321,11 +349,7 @@ lnet_add_route (__u32 net, unsigned int hops, lnet_nid_t gateway)
                 ni = route->lr_gateway->lp_ni;
                 lnet_ni_addref_locked(ni);
 
-                list_add_tail(&route->lr_list, &rnet2->lrn_routes);
-                the_lnet.ln_remote_nets_version++;
-
-                lnet_rtr_addref_locked(route->lr_gateway);
-
+                lnet_add_route_to_rnet(rnet2, route);
                 LNET_UNLOCK();
 
                 /* XXX Assume alive */
index fce91c2..76d82b3 100644 (file)
@@ -168,9 +168,9 @@ int LL_PROC_PROTO(proc_lnet_routes)
                                 if (skip == 0) {
                                         route = re;
                                         break;
-                                } else
-                                        skip--;
+                                }
 
+                                skip--;
                                 r = r->next;
                         }
 
@@ -264,10 +264,10 @@ int LL_PROC_PROTO(proc_lnet_routers)
 
                         if (skip == 0) {
                                 peer = lp;
-                                        break;
-                                } else
-                                        skip--;
+                                break;
+                        }
 
+                        skip--;
                         r = r->next;
                 }
 
@@ -280,8 +280,10 @@ int LL_PROC_PROTO(proc_lnet_routers)
                         int alive_cnt = peer->lp_alive_count;
                         int alive     = peer->lp_alive;
                         int pingsent  = !peer->lp_ping_notsent;
-                        int last_ping = cfs_duration_sec(now - peer->lp_ping_timestamp);
-                        int down_ni   = lnet_router_down_ni(peer, LNET_NIDNET(LNET_NID_ANY));
+                        int last_ping = cfs_duration_sec(cfs_time_sub(now,
+                                                     peer->lp_ping_timestamp));
+                        int down_ni   = lnet_router_down_ni(peer,
+                                                    LNET_NIDNET(LNET_NID_ANY));
 
                         if (deadline == 0)
                                 s += snprintf(s, tmpstr + tmpsiz - s,
@@ -296,7 +298,7 @@ int LL_PROC_PROTO(proc_lnet_routers)
                                               nrefs, nrtrrefs, alive_cnt,
                                               alive ? "up" : "down", last_ping,
                                               pingsent,
-                                              cfs_duration_sec(deadline - now),
+                                              cfs_duration_sec(cfs_time_sub(deadline, now)),
                                               down_ni, libcfs_nid2str(nid));
                         LASSERT (tmpstr + tmpsiz - s > 0);
                 }
@@ -411,13 +413,14 @@ int LL_PROC_PROTO(proc_lnet_peers)
                                             &the_lnet.ln_peer_hash[idx]) {
                                                 num = 1;
                                                 idx++;
-                                        } else
+                                        } else {
                                                 num++;
+                                        }
 
                                         break;
-                                } else
-                                        skip--;
+                                }
 
+                                skip--;
                                 p = lp->lp_hashlist.next;
                         }
 
@@ -572,14 +575,15 @@ int LL_PROC_PROTO(proc_lnet_nis)
                         if (skip == 0) {
                                 ni = a_ni;
                                 break;
-                        } else
-                                skip--;
+                        }
 
+                        skip--;
                         n = n->next;
                 }
 
                 if (ni != NULL) {
                         cfs_time_t now = cfs_time_current();
+                        int        last_alive = -1;
                         int        maxtxcr = ni->ni_maxtxcredits;
                         int        txcr = ni->ni_txcredits;
                         int        mintxcr = ni->ni_mintxcredits;
@@ -587,11 +591,11 @@ int LL_PROC_PROTO(proc_lnet_nis)
                         int        npeerrtrcr = ni->ni_peerrtrcredits;
                         lnet_nid_t nid = ni->ni_nid;
                         int        nref = ni->ni_refcount;
-                        int        last_alive;
                         char      *stat;
 
-                        last_alive = (the_lnet.ln_routing) ?
-                                 cfs_duration_sec(now - ni->ni_last_alive) : -1;
+                        if (the_lnet.ln_routing)
+                                last_alive = cfs_duration_sec(cfs_time_sub(now,
+                                                            ni->ni_last_alive));
                         if (ni->ni_lnd->lnd_type == LOLND)  /* @lo forever alive */
                                 last_alive = 0;