X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lnet%2Flnet%2Frouter.c;h=80c0a59b00da0b49e00f8b4eb4dfa6bec6d94577;hb=b1e22bd4df5bcea4f32dbf510ba784817c529ce8;hp=a380035e94198fda87210f9495f965fd0ee08f75;hpb=2f5ad4e8cf26406a7aa732efa4d813dbcced55d9;p=fs%2Flustre-release.git diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index a380035..80c0a59 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * - * Copyright (c) 2011, Whamcloud, Inc. + * Copyright (c) 2011, 2012, Intel Corporation. * * This file is part of Portals * http://sourceforge.net/projects/sandiaportals/ @@ -82,23 +82,23 @@ lnet_peer_buffer_credits(lnet_ni_t *ni) static int check_routers_before_use = 0; CFS_MODULE_PARM(check_routers_before_use, "i", int, 0444, - "Assume routers are down and ping them before use"); + "Assume routers are down and ping them before use"); -static int avoid_asym_router_failure = 0; +static int avoid_asym_router_failure = 1; CFS_MODULE_PARM(avoid_asym_router_failure, "i", int, 0644, - "Avoid asymmetrical failures: reserved, use at your own risk"); + "Avoid asymmetrical router failures (0 to disable)"); -static int dead_router_check_interval = 0; +static int dead_router_check_interval = 60; CFS_MODULE_PARM(dead_router_check_interval, "i", int, 0644, - "Seconds between dead router health checks (<= 0 to disable)"); + "Seconds between dead router health checks (<= 0 to disable)"); -static int live_router_check_interval = 0; +static int live_router_check_interval = 60; CFS_MODULE_PARM(live_router_check_interval, "i", int, 0644, - "Seconds between live router health checks (<= 0 to disable)"); + "Seconds between live router health checks (<= 0 to disable)"); static int router_ping_timeout = 50; CFS_MODULE_PARM(router_ping_timeout, "i", int, 0644, - "Seconds to wait for the reply to a router health query"); + "Seconds to wait for the reply to a router health query"); int lnet_peers_start_down(void) @@ -227,18 +227,20 @@ lnet_rtr_decref_locked(lnet_peer_t *lp) lnet_remotenet_t * lnet_find_net_locked (__u32 net) { - lnet_remotenet_t *rnet; - cfs_list_t *tmp; + lnet_remotenet_t *rnet; + cfs_list_t *tmp; + cfs_list_t *rn_list; - LASSERT (!the_lnet.ln_shutdown); + LASSERT(!the_lnet.ln_shutdown); - cfs_list_for_each (tmp, &the_lnet.ln_remote_nets) { - rnet = cfs_list_entry(tmp, lnet_remotenet_t, lrn_list); + rn_list = lnet_net2rnethash(net); + cfs_list_for_each(tmp, rn_list) { + rnet = cfs_list_entry(tmp, lnet_remotenet_t, lrn_list); - if (rnet->lrn_net == net) - return rnet; - } - return NULL; + if (rnet->lrn_net == net) + return rnet; + } + return NULL; } static void lnet_shuffle_seed(void) @@ -365,7 +367,7 @@ lnet_add_route (__u32 net, unsigned int hops, lnet_nid_t gateway) rnet2 = lnet_find_net_locked(net); if (rnet2 == NULL) { /* new network */ - cfs_list_add_tail(&rnet->lrn_list, &the_lnet.ln_remote_nets); + cfs_list_add_tail(&rnet->lrn_list, lnet_net2rnethash(net)); rnet2 = rnet; } @@ -419,39 +421,47 @@ lnet_check_routes(void) cfs_list_t *e1; cfs_list_t *e2; int cpt; + cfs_list_t *rn_list; + int i; cpt = lnet_net_lock_current(); - cfs_list_for_each(e1, &the_lnet.ln_remote_nets) { - rnet = cfs_list_entry(e1, lnet_remotenet_t, lrn_list); + for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) { + rn_list = &the_lnet.ln_remote_nets_hash[i]; + cfs_list_for_each(e1, rn_list) { + rnet = cfs_list_entry(e1, lnet_remotenet_t, lrn_list); - route2 = NULL; - cfs_list_for_each(e2, &rnet->lrn_routes) { - lnet_nid_t nid1; - lnet_nid_t nid2; - int net; + route2 = NULL; + cfs_list_for_each(e2, &rnet->lrn_routes) { + lnet_nid_t nid1; + lnet_nid_t nid2; + int net; - route = cfs_list_entry(e2, lnet_route_t, lr_list); + route = cfs_list_entry(e2, lnet_route_t, + lr_list); - if (route2 == NULL) { - route2 = route; - continue; - } + if (route2 == NULL) { + route2 = route; + continue; + } - if (route->lr_gateway->lp_ni == - route2->lr_gateway->lp_ni) - continue; + if (route->lr_gateway->lp_ni == + route2->lr_gateway->lp_ni) + continue; - nid1 = route->lr_gateway->lp_nid; - nid2 = route2->lr_gateway->lp_nid; - net = rnet->lrn_net; + nid1 = route->lr_gateway->lp_nid; + nid2 = route2->lr_gateway->lp_nid; + net = rnet->lrn_net; - lnet_net_unlock(cpt); + lnet_net_unlock(cpt); - CERROR("Routes to %s via %s and %s not supported\n", - libcfs_net2str(net), libcfs_nid2str(nid1), - libcfs_nid2str(nid2)); - return -EINVAL; + CERROR("Routes to %s via %s and %s not " + "supported\n", + libcfs_net2str(net), + libcfs_nid2str(nid1), + libcfs_nid2str(nid2)); + return -EINVAL; + } } } @@ -463,30 +473,36 @@ int lnet_del_route(__u32 net, lnet_nid_t gw_nid) { struct lnet_peer *gateway; - lnet_remotenet_t *rnet; - lnet_route_t *route; - cfs_list_t *e1; - cfs_list_t *e2; - int rc = -ENOENT; + lnet_remotenet_t *rnet; + lnet_route_t *route; + cfs_list_t *e1; + cfs_list_t *e2; + int rc = -ENOENT; + cfs_list_t *rn_list; + int idx = 0; - CDEBUG(D_NET, "Del route: net %s : gw %s\n", - libcfs_net2str(net), libcfs_nid2str(gw_nid)); + CDEBUG(D_NET, "Del route: net %s : gw %s\n", + libcfs_net2str(net), libcfs_nid2str(gw_nid)); - /* NB Caller may specify either all routes via the given gateway - * or a specific route entry actual NIDs) */ + /* NB Caller may specify either all routes via the given gateway + * or a specific route entry actual NIDs) */ - again: lnet_net_lock(LNET_LOCK_EX); + if (net == LNET_NIDNET(LNET_NID_ANY)) + rn_list = &the_lnet.ln_remote_nets_hash[0]; + else + rn_list = lnet_net2rnethash(net); - cfs_list_for_each (e1, &the_lnet.ln_remote_nets) { - rnet = cfs_list_entry(e1, lnet_remotenet_t, lrn_list); + again: + cfs_list_for_each(e1, rn_list) { + rnet = cfs_list_entry(e1, lnet_remotenet_t, lrn_list); - if (!(net == LNET_NIDNET(LNET_NID_ANY) || - net == rnet->lrn_net)) - continue; + if (!(net == LNET_NIDNET(LNET_NID_ANY) || + net == rnet->lrn_net)) + continue; - cfs_list_for_each (e2, &rnet->lrn_routes) { - route = cfs_list_entry(e2, lnet_route_t, lr_list); + cfs_list_for_each(e2, &rnet->lrn_routes) { + route = cfs_list_entry(e2, lnet_route_t, lr_list); gateway = route->lr_gateway; if (!(gw_nid == LNET_NID_ANY || @@ -495,29 +511,36 @@ lnet_del_route(__u32 net, lnet_nid_t gw_nid) cfs_list_del(&route->lr_list); cfs_list_del(&route->lr_gwlist); - the_lnet.ln_remote_nets_version++; + the_lnet.ln_remote_nets_version++; - if (cfs_list_empty(&rnet->lrn_routes)) - cfs_list_del(&rnet->lrn_list); - else - rnet = NULL; + if (cfs_list_empty(&rnet->lrn_routes)) + cfs_list_del(&rnet->lrn_list); + else + rnet = NULL; lnet_rtr_decref_locked(gateway); lnet_peer_decref_locked(gateway); lnet_net_unlock(LNET_LOCK_EX); - LIBCFS_FREE(route, sizeof (*route)); + LIBCFS_FREE(route, sizeof(*route)); - if (rnet != NULL) - LIBCFS_FREE(rnet, sizeof(*rnet)); + if (rnet != NULL) + LIBCFS_FREE(rnet, sizeof(*rnet)); - rc = 0; - goto again; - } - } + rc = 0; + lnet_net_lock(LNET_LOCK_EX); + goto again; + } + } + if (net == LNET_NIDNET(LNET_NID_ANY) && + ++idx < LNET_REMOTE_NETS_HASH_SIZE) { + rn_list = &the_lnet.ln_remote_nets_hash[idx]; + goto again; + } lnet_net_unlock(LNET_LOCK_EX); + return rc; } @@ -536,22 +559,28 @@ lnet_get_route(int idx, __u32 *net, __u32 *hops, lnet_remotenet_t *rnet; lnet_route_t *route; int cpt; + int i; + cfs_list_t *rn_list; cpt = lnet_net_lock_current(); - cfs_list_for_each (e1, &the_lnet.ln_remote_nets) { - rnet = cfs_list_entry(e1, lnet_remotenet_t, lrn_list); - - cfs_list_for_each (e2, &rnet->lrn_routes) { - route = cfs_list_entry(e2, lnet_route_t, lr_list); - - if (idx-- == 0) { - *net = rnet->lrn_net; - *hops = route->lr_hops; - *gateway = route->lr_gateway->lp_nid; - *alive = route->lr_gateway->lp_alive; - lnet_net_unlock(cpt); - return 0; + for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) { + rn_list = &the_lnet.ln_remote_nets_hash[i]; + cfs_list_for_each(e1, rn_list) { + rnet = cfs_list_entry(e1, lnet_remotenet_t, lrn_list); + + cfs_list_for_each(e2, &rnet->lrn_routes) { + route = cfs_list_entry(e2, lnet_route_t, + lr_list); + + if (idx-- == 0) { + *net = rnet->lrn_net; + *hops = route->lr_hops; + *gateway = route->lr_gateway->lp_nid; + *alive = route->lr_gateway->lp_alive; + lnet_net_unlock(cpt); + return 0; + } } } } @@ -1049,7 +1078,7 @@ lnet_router_checker_start(void) return 0; #ifdef __KERNEL__ - cfs_sema_init(&the_lnet.ln_rc_signal, 0); + sema_init(&the_lnet.ln_rc_signal, 0); /* EQ size doesn't matter; the callback is guaranteed to get every * event */ eqsz = 0; @@ -1070,7 +1099,7 @@ lnet_router_checker_start(void) if (rc < 0) { CERROR("Can't start router checker thread: %d\n", rc); /* block until event callback signals exit */ - cfs_down(&the_lnet.ln_rc_signal); + down(&the_lnet.ln_rc_signal); rc = LNetEQFree(the_lnet.ln_rc_eqh); LASSERT (rc == 0); the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN; @@ -1101,7 +1130,7 @@ lnet_router_checker_stop (void) #ifdef __KERNEL__ /* block until event callback signals exit */ - cfs_down(&the_lnet.ln_rc_signal); + down(&the_lnet.ln_rc_signal); #else lnet_router_checker(); #endif @@ -1256,7 +1285,7 @@ rescan: lnet_prune_rc_data(1); /* wait for UNLINK */ the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN; - cfs_up(&the_lnet.ln_rc_signal); + up(&the_lnet.ln_rc_signal); /* The unlink event callback will signal final completion */ return 0; }