- LNet fine grain routing support.
Description:
Details :
+Severity : enhancement
+Bugzilla : 15332
+Description: LNet fine grain routing support.
+
Severity : normal
Bugzilla : 20171
Description: router checker stops working when system wall clock goes backward
typedef struct {
struct list_head lr_list; /* chain on net */
lnet_peer_t *lr_gateway; /* router node */
+ unsigned int lr_hops; /* how far I am */
} lnet_route_t;
typedef struct {
struct list_head lrn_list; /* chain on ln_remote_nets */
struct list_head lrn_routes; /* routes to me */
__u32 lrn_net; /* my net number */
- unsigned int lrn_hops; /* how far I am */
} lnet_remotenet_t;
typedef struct {
}
int
-lnet_compare_routers(lnet_peer_t *p1, lnet_peer_t *p2)
+lnet_compare_routes(lnet_route_t *r1, lnet_route_t *r2)
{
+ lnet_peer_t *p1 = r1->lr_gateway;
+ lnet_peer_t *p2 = r2->lr_gateway;
+
+ if (r1->lr_hops < r2->lr_hops)
+ return 1;
+
+ if (r1->lr_hops > r2->lr_hops)
+ return -1;
+
if (p1->lp_txqnob < p2->lp_txqnob)
return 1;
lnet_ni_decref_locked(local_ni);
lnet_ni_decref_locked(src_ni);
LNET_UNLOCK();
- CERROR("no route to %s via from %s\n",
+ CERROR("No route to %s via from %s\n",
libcfs_nid2str(dst_nid), libcfs_nid2str(src_nid));
return -EINVAL;
}
if (lp2->lp_alive &&
lnet_router_down_ni(lp2, rnet->lrn_net) <= 0 &&
(src_ni == NULL || lp2->lp_ni == src_ni) &&
- (lp == NULL || lnet_compare_routers(lp2, lp) > 0)) {
+ (lp == NULL ||
+ lnet_compare_routes(route, best_route) > 0)) {
best_route = route;
lp = lp2;
}
if (src_ni != NULL)
lnet_ni_decref_locked(src_ni);
LNET_UNLOCK();
- CERROR("No route to %s (all routers down)\n",
- libcfs_id2str(msg->msg_target));
+
+ CERROR("No route to %s via %s (all routers down)\n",
+ libcfs_id2str(msg->msg_target),
+ libcfs_nid2str(src_nid));
return -EHOSTUNREACH;
}
{
struct list_head *e;
lnet_ni_t *ni;
- lnet_route_t *route;
lnet_remotenet_t *rnet;
__u32 dstnet = LNET_NIDNET(dstnid);
int hops;
rnet = list_entry(e, lnet_remotenet_t, lrn_list);
if (rnet->lrn_net == dstnet) {
+ lnet_route_t *route;
+ lnet_route_t *shortest = NULL;
+
LASSERT (!list_empty(&rnet->lrn_routes));
- route = list_entry(rnet->lrn_routes.next,
- lnet_route_t, lr_list);
- hops = rnet->lrn_hops;
+
+ list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
+ if (shortest == NULL ||
+ route->lr_hops < shortest->lr_hops)
+ shortest = route;
+ }
+
+ LASSERT (shortest != NULL);
+ hops = shortest->lr_hops;
if (srcnidp != NULL)
- *srcnidp = route->lr_gateway->lp_ni->ni_nid;
+ *srcnidp = shortest->lr_gateway->lp_ni->ni_nid;
if (orderp != NULL)
*orderp = order;
LNET_UNLOCK();
lp->lp_rtrcredits =
lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
- LASSERT (!the_lnet.ln_shutdown);
/* can't add peers after shutdown starts */
+ LASSERT (!the_lnet.ln_shutdown);
list_add_tail(&lp->lp_hashlist, lnet_nid2peerhash(nid));
the_lnet.ln_npeers++;
int
lnet_add_route (__u32 net, unsigned int hops, lnet_nid_t gateway)
{
- struct list_head zombies;
struct list_head *e;
lnet_remotenet_t *rnet;
lnet_remotenet_t *rnet2;
lnet_route_t *route;
- lnet_route_t *route2;
lnet_ni_t *ni;
int add_route;
int rc;
CFS_INIT_LIST_HEAD(&rnet->lrn_routes);
rnet->lrn_net = net;
- rnet->lrn_hops = hops;
+ route->lr_hops = hops;
LNET_LOCK();
}
LASSERT (!the_lnet.ln_shutdown);
- CFS_INIT_LIST_HEAD(&zombies);
rnet2 = lnet_find_net_locked(net);
if (rnet2 == NULL) {
rnet2 = rnet;
}
- if (hops > rnet2->lrn_hops) {
- /* New route is longer; ignore it */
- add_route = 0;
- } else if (hops < rnet2->lrn_hops) {
- /* new route supercedes all currently known routes to this
- * net */
- list_add(&zombies, &rnet2->lrn_routes);
- list_del_init(&rnet2->lrn_routes);
- add_route = 1;
- } else {
- add_route = 1;
- /* New route has the same hopcount as existing routes; search
- * for a duplicate route (it's a NOOP if it is) */
- list_for_each (e, &rnet2->lrn_routes) {
- route2 = list_entry(e, lnet_route_t, lr_list);
-
- if (route2->lr_gateway == route->lr_gateway) {
- add_route = 0;
- break;
- }
+ /* Search for a duplicate route (it's a NOOP if it is) */
+ add_route = 1;
+ list_for_each (e, &rnet2->lrn_routes) {
+ lnet_route_t *route2 = list_entry(e, lnet_route_t, lr_list);
- /* our loopups must be true */
- LASSERT (route2->lr_gateway->lp_nid != gateway);
+ if (route2->lr_gateway == route->lr_gateway) {
+ add_route = 0;
+ break;
}
+
+ /* our lookups must be true */
+ LASSERT (route2->lr_gateway->lp_nid != gateway);
}
if (add_route) {
ni = route->lr_gateway->lp_ni;
lnet_ni_addref_locked(ni);
- LASSERT (rc == 0);
list_add_tail(&route->lr_list, &rnet2->lrn_routes);
the_lnet.ln_remote_nets_version++;
if (rnet != rnet2)
LIBCFS_FREE(rnet, sizeof(*rnet));
- while (!list_empty(&zombies)) {
- route = list_entry(zombies.next, lnet_route_t, lr_list);
- list_del(&route->lr_list);
-
- LNET_LOCK();
- lnet_rtr_decref_locked(route->lr_gateway);
- lnet_peer_decref_locked(route->lr_gateway);
- LNET_UNLOCK();
- LIBCFS_FREE(route, sizeof(*route));
- }
-
- return rc;
+ return 0;
}
int
if (idx-- == 0) {
*net = rnet->lrn_net;
- *hops = rnet->lrn_hops;
+ *hops = route->lr_hops;
*gateway = route->lr_gateway->lp_nid;
*alive = route->lr_gateway->lp_alive;
LNET_UNLOCK();
if (route != NULL) {
__u32 net = rnet->lrn_net;
- unsigned int hops = rnet->lrn_hops;
+ unsigned int hops = route->lr_hops;
lnet_nid_t nid = route->lr_gateway->lp_nid;
int alive = route->lr_gateway->lp_alive;