X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Flnet%2Frouter.c;h=2cc502dc5cb1d1eae008ffd9f7d6252f792e0ee6;hp=0346cf02efcde65aaf1b95a0d269b00535820d19;hb=1a7720934dfb3105afd2f025c953bea2167d4e5d;hpb=cab57464e17b9d21f37f7234bb3d54451a7829fc diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index 0346cf0..2cc502d 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -78,6 +78,14 @@ int avoid_asym_router_failure = 1; module_param(avoid_asym_router_failure, int, 0644); MODULE_PARM_DESC(avoid_asym_router_failure, "Avoid asymmetrical router failures (0 to disable)"); +int dead_router_check_interval = INT_MIN; +module_param(dead_router_check_interval, int, 0444); +MODULE_PARM_DESC(dead_router_check_interval, "(DEPRECATED - Use alive_router_check_interval)"); + +int live_router_check_interval = INT_MIN; +module_param(live_router_check_interval, int, 0444); +MODULE_PARM_DESC(live_router_check_interval, "(DEPRECATED - Use alive_router_check_interval)"); + int alive_router_check_interval = 60; module_param(alive_router_check_interval, int, 0644); MODULE_PARM_DESC(alive_router_check_interval, "Seconds between live router health checks (<= 0 to disable)"); @@ -223,6 +231,16 @@ bool lnet_is_route_alive(struct lnet_route *route) bool route_alive; /* + * if discovery is disabled then rely on the cached aliveness + * information. This is handicapped information which we log when + * we receive the discovery ping response. The most uptodate + * aliveness information can only be obtained when discovery is + * enabled. + */ + if (lnet_is_discovery_disabled(gw)) + return route->lr_alive; + + /* * check the gateway's interfaces on the route rnet to make sure * that the gateway is viable. */ @@ -283,13 +301,131 @@ lnet_consolidate_routes_locked(struct lnet_peer *orig_lp, } +static inline void +lnet_set_route_aliveness(struct lnet_route *route, bool alive) +{ + /* Log when there's a state change */ + if (route->lr_alive != alive) { + CERROR("route to %s through %s has gone from %s to %s\n", + libcfs_net2str(route->lr_net), + libcfs_nid2str(route->lr_gateway->lp_primary_nid), + (route->lr_alive) ? "up" : "down", + alive ? "up" : "down"); + route->lr_alive = alive; + } +} + +void +lnet_router_discovery_ping_reply(struct lnet_peer *lp) +{ + struct lnet_ping_buffer *pbuf = lp->lp_data; + struct lnet_remotenet *rnet; + struct lnet_peer_net *llpn; + struct lnet_route *route; + bool net_up = false; + unsigned lp_state; + __u32 net, net2; + int i, j; + + + spin_lock(&lp->lp_lock); + lp_state = lp->lp_state; + + /* only handle replies if discovery is disabled. */ + if (!lnet_is_discovery_disabled_locked(lp)) { + spin_unlock(&lp->lp_lock); + return; + } + + spin_unlock(&lp->lp_lock); + + if (lp_state & LNET_PEER_PING_FAILED || + pbuf->pb_info.pi_features & LNET_PING_FEAT_RTE_DISABLED) { + CDEBUG(D_NET, "Set routes down for gw %s because %s %d\n", + libcfs_nid2str(lp->lp_primary_nid), + lp_state & LNET_PEER_PING_FAILED ? "ping failed" : + "route feature is disabled", lp->lp_ping_error); + /* If the ping failed or the peer has routing disabled then + * mark the routes served by this peer down + */ + list_for_each_entry(route, &lp->lp_routes, lr_gwlist) + lnet_set_route_aliveness(route, false); + return; + } + + CDEBUG(D_NET, "Discovery is disabled. Processing reply for gw: %s\n", + libcfs_nid2str(lp->lp_primary_nid)); + + /* + * examine the ping response: + * For each NID in the ping response, extract the net + * if the net exists on our remote net list then + * iterate over the routes on the rnet and if: + * The route's local net is healthy and + * The remote net status is UP, then mark the route up + * otherwise mark the route down + */ + for (i = 1; i < pbuf->pb_info.pi_nnis; i++) { + net = LNET_NIDNET(pbuf->pb_info.pi_ni[i].ns_nid); + rnet = lnet_find_rnet_locked(net); + if (!rnet) + continue; + list_for_each_entry(route, &rnet->lrn_routes, lr_list) { + /* check if this is the route's gateway */ + if (lp->lp_primary_nid != + route->lr_gateway->lp_primary_nid) + continue; + + llpn = lnet_peer_get_net_locked(lp, route->lr_lnet); + if (!llpn) { + lnet_set_route_aliveness(route, false); + continue; + } + + if (!lnet_is_gateway_net_alive(llpn)) { + lnet_set_route_aliveness(route, false); + continue; + } + + if (avoid_asym_router_failure && + pbuf->pb_info.pi_ni[i].ns_status != + LNET_NI_STATUS_UP) { + net_up = false; + + /* + * revisit all previous NIDs and check if + * any on the network we're examining is + * up. If at least one is up then we consider + * the route to be alive. + */ + for (j = 1; j < i; j++) { + net2 = LNET_NIDNET(pbuf->pb_info. + pi_ni[j].ns_nid); + if (net2 == net && + pbuf->pb_info.pi_ni[j].ns_status == + LNET_NI_STATUS_UP) + net_up = true; + } + if (!net_up) { + lnet_set_route_aliveness(route, false); + continue; + } + } + + lnet_set_route_aliveness(route, true); + } + } +} + void lnet_router_discovery_complete(struct lnet_peer *lp) { struct lnet_peer_ni *lpni = NULL; + struct lnet_route *route; spin_lock(&lp->lp_lock); lp->lp_state &= ~LNET_PEER_RTR_DISCOVERY; + lp->lp_state |= LNET_PEER_RTR_DISCOVERED; spin_unlock(&lp->lp_lock); /* @@ -312,6 +448,9 @@ lnet_router_discovery_complete(struct lnet_peer *lp) libcfs_nid2str(lp->lp_primary_nid), lp->lp_dc_error); while ((lpni = lnet_get_next_peer_ni_locked(lp, NULL, lpni)) != NULL) lpni->lpni_ns_status = LNET_NI_STATUS_DOWN; + + list_for_each_entry(route, &lp->lp_routes, lr_gwlist) + lnet_set_route_aliveness(route, false); } static void @@ -380,15 +519,15 @@ static void lnet_shuffle_seed(void) add_device_randomness(&ni->ni_nid, sizeof(ni->ni_nid)); seeded = 1; - return; } /* NB expects LNET_LOCK held */ static void lnet_add_route_to_rnet(struct lnet_remotenet *rnet, struct lnet_route *route) { - unsigned int len = 0; + struct lnet_peer_net *lpn; unsigned int offset = 0; + unsigned int len = 0; struct list_head *e; lnet_shuffle_seed(); @@ -401,7 +540,7 @@ lnet_add_route_to_rnet(struct lnet_remotenet *rnet, struct lnet_route *route) * different nodes are using the same list of routers, they end up * preferring different routers. */ - offset = cfs_rand() % (len + 1); + offset = prandom_u32_max(len + 1); list_for_each(e, &rnet->lrn_routes) { if (offset == 0) break; @@ -412,7 +551,10 @@ lnet_add_route_to_rnet(struct lnet_remotenet *rnet, struct lnet_route *route) * force a router check on the gateway to make sure the route is * alive */ - route->lr_gateway->lp_rtrcheck_timestamp = 0; + list_for_each_entry(lpn, &route->lr_gateway->lp_peer_nets, + lpn_peer_nets) { + lpn->lpn_rtrcheck_timestamp = 0; + } the_lnet.ln_remote_nets_version++; @@ -425,7 +567,7 @@ lnet_add_route_to_rnet(struct lnet_remotenet *rnet, struct lnet_route *route) int lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway, - unsigned int priority) + __u32 priority, __u32 sensitivity) { struct list_head *route_entry; struct lnet_remotenet *rnet; @@ -451,6 +593,13 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway, if (lnet_islocalnet(net)) return -EEXIST; + if (!lnet_islocalnet(LNET_NIDNET(gateway))) { + CERROR("Cannot add route with gateway %s. There is no local interface configured on LNet %s\n", + libcfs_nid2str(gateway), + libcfs_net2str(LNET_NIDNET(gateway))); + return -EHOSTUNREACH; + } + /* Assume net, route, all new */ LIBCFS_ALLOC(route, sizeof(*route)); LIBCFS_ALLOC(rnet, sizeof(*rnet)); @@ -528,8 +677,13 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway, * to move the routes from the peer that's being deleted to the * consolidated peer lp_routes list */ - if (add_route) + if (add_route) { + gw->lp_health_sensitivity = sensitivity; lnet_add_route_to_rnet(rnet2, route); + if (lnet_peer_discovery_disabled) + CWARN("Consider turning discovery on to enable full " + "Multi-Rail routing functionality\n"); + } /* * get rid of the reference on the lpni. @@ -548,7 +702,7 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway, LIBCFS_FREE(rnet, sizeof(*rnet)); /* kick start the monitor thread to handle the added route */ - wake_up(&the_lnet.ln_mt_waitq); + complete(&the_lnet.ln_mt_wait_complete); return rc; } @@ -593,7 +747,7 @@ lnet_del_route(__u32 net, lnet_nid_t gw_nid) struct lnet_peer_ni *lpni; struct lnet_route *route; struct list_head zombies; - struct lnet_peer *lp; + struct lnet_peer *lp = NULL; int i = 0; INIT_LIST_HEAD(&rnet_zombies); @@ -640,6 +794,18 @@ lnet_del_route(__u32 net, lnet_nid_t gw_nid) } delete_zombies: + /* + * check if there are any routes remaining on the gateway + * If there are no more routes make sure to set the peer's + * lp_disc_net_id to 0 (invalid), in case we add more routes in + * the future on that gateway, then we start our discovery process + * from scratch + */ + if (lpni) { + if (list_empty(&lp->lp_routes)) + lp->lp_disc_net_id = 0; + } + lnet_net_unlock(LNET_LOCK_EX); while (!list_empty(&zombies)) { @@ -698,15 +864,15 @@ int lnet_get_rtr_pool_cfg(int cpt, struct lnet_ioctl_pool_cfg *pool_cfg) int lnet_get_route(int idx, __u32 *net, __u32 *hops, - lnet_nid_t *gateway, __u32 *alive, __u32 *priority) + lnet_nid_t *gateway, __u32 *alive, __u32 *priority, __u32 *sensitivity) { - struct list_head *e1; - struct list_head *e2; struct lnet_remotenet *rnet; - struct lnet_route *route; - int cpt; - int i; struct list_head *rn_list; + struct lnet_route *route; + struct list_head *e1; + struct list_head *e2; + int cpt; + int i; cpt = lnet_net_lock_current(); @@ -724,6 +890,8 @@ lnet_get_route(int idx, __u32 *net, __u32 *hops, *gateway = route->lr_nid; *hops = route->lr_hops; *priority = route->lr_priority; + *sensitivity = route->lr_gateway-> + lp_health_sensitivity; *alive = lnet_is_route_alive(route); lnet_net_unlock(cpt); return 0; @@ -755,7 +923,7 @@ lnet_wait_known_routerstate(void) spin_lock(&rtr->lp_lock); - if ((rtr->lp_state & LNET_PEER_DISCOVERED) == 0) { + if ((rtr->lp_state & LNET_PEER_RTR_DISCOVERED) == 0) { all_known = 0; spin_unlock(&rtr->lp_lock); break; @@ -773,10 +941,30 @@ lnet_wait_known_routerstate(void) } } -static void +static inline bool +lnet_net_set_status_locked(struct lnet_net *net, __u32 status) +{ + struct lnet_ni *ni; + bool update = false; + + list_for_each_entry(ni, &net->net_ni_list, ni_netlist) { + lnet_ni_lock(ni); + if (ni->ni_status && + ni->ni_status->ns_status != status) { + ni->ni_status->ns_status = status; + update = true; + } + lnet_ni_unlock(ni); + } + + return update; +} + +static bool lnet_update_ni_status_locked(void) { - struct lnet_ni *ni = NULL; + struct lnet_net *net; + bool push = false; time64_t now; time64_t timeout; @@ -785,31 +973,30 @@ lnet_update_ni_status_locked(void) timeout = router_ping_timeout + alive_router_check_interval; now = ktime_get_real_seconds(); - while ((ni = lnet_get_next_ni_locked(NULL, ni))) { - if (ni->ni_net->net_lnd->lnd_type == LOLND) + list_for_each_entry(net, &the_lnet.ln_nets, net_list) { + if (net->net_lnd->lnd_type == LOLND) continue; - if (now < ni->ni_last_alive + timeout) + if (now < net->net_last_alive + timeout) continue; - lnet_ni_lock(ni); + spin_lock(&net->net_lock); /* re-check with lock */ - if (now < ni->ni_last_alive + timeout) { - lnet_ni_unlock(ni); + if (now < net->net_last_alive + timeout) { + spin_unlock(&net->net_lock); continue; } + spin_unlock(&net->net_lock); - LASSERT(ni->ni_status != NULL); - - if (ni->ni_status->ns_status != LNET_NI_STATUS_DOWN) { - CDEBUG(D_NET, "NI(%s:%lld) status changed to down\n", - libcfs_nid2str(ni->ni_nid), timeout); - /* NB: so far, this is the only place to set - * NI status to "down" */ - ni->ni_status->ns_status = LNET_NI_STATUS_DOWN; - } - lnet_ni_unlock(ni); + /* + * if the net didn't receive any traffic for past the + * timeout on any of its constituent NIs, then mark all + * the NIs down. + */ + push = lnet_net_set_status_locked(net, LNET_NI_STATUS_DOWN); } + + return push; } void lnet_wait_router_start(void) @@ -841,10 +1028,15 @@ lnet_router_checker_active(void) void lnet_check_routers(void) { + struct lnet_peer_net *first_lpn = NULL; + struct lnet_peer_net *lpn; struct lnet_peer_ni *lpni; struct list_head *entry; struct lnet_peer *rtr; + bool push = false; + bool found_lpn; __u64 version; + __u32 net_id; time64_t now; int cpt; int rc; @@ -865,8 +1057,31 @@ rescan: * interfaces could be down and in that case they would be * undergoing recovery separately from this discovery. */ - if (now - rtr->lp_rtrcheck_timestamp < - alive_router_check_interval) + /* find next peer net which is also local */ + net_id = rtr->lp_disc_net_id; + do { + lpn = lnet_get_next_peer_net_locked(rtr, net_id); + if (!lpn) { + CERROR("gateway %s has no networks\n", + libcfs_nid2str(rtr->lp_primary_nid)); + break; + } + if (first_lpn == lpn) + break; + if (!first_lpn) + first_lpn = lpn; + found_lpn = lnet_islocalnet_locked(lpn->lpn_net_id); + net_id = lpn->lpn_net_id; + } while (!found_lpn); + + if (!found_lpn || !lpn) { + CERROR("no local network found for gateway %s\n", + libcfs_nid2str(rtr->lp_primary_nid)); + continue; + } + + if (now - lpn->lpn_rtrcheck_timestamp < + alive_router_check_interval / lnet_current_net_count) continue; /* @@ -892,6 +1107,9 @@ rescan: } lnet_peer_ni_addref_locked(lpni); + /* specify the net to use */ + rtr->lp_disc_net_id = lpn->lpn_net_id; + /* discover the router */ CDEBUG(D_NET, "discover %s, cpt = %d\n", libcfs_nid2str(lpni->lpni_nid), cpt); @@ -901,7 +1119,7 @@ rescan: lnet_peer_ni_decref_locked(lpni); if (!rc) - rtr->lp_rtrcheck_timestamp = now; + lpn->lpn_rtrcheck_timestamp = now; else CERROR("Failed to discover router %s\n", libcfs_nid2str(rtr->lp_primary_nid)); @@ -914,9 +1132,13 @@ rescan: } if (the_lnet.ln_routing) - lnet_update_ni_status_locked(); + push = lnet_update_ni_status_locked(); lnet_net_unlock(cpt); + + /* if the status of the ni changed update the peers */ + if (push) + lnet_push_update_to_peers(1); } void @@ -1232,7 +1454,7 @@ lnet_rtrpools_alloc(int im_a_router) lnet_net_lock(LNET_LOCK_EX); the_lnet.ln_routing = 1; lnet_net_unlock(LNET_LOCK_EX); - wake_up(&the_lnet.ln_mt_waitq); + complete(&the_lnet.ln_mt_wait_complete); return 0; failed: @@ -1324,6 +1546,10 @@ lnet_rtrpools_enable(void) ~LNET_PING_FEAT_RTE_DISABLED; lnet_net_unlock(LNET_LOCK_EX); + if (lnet_peer_discovery_disabled) + CWARN("Consider turning discovery on to enable full " + "Multi-Rail routing functionality\n"); + return rc; } @@ -1364,6 +1590,8 @@ lnet_notify(struct lnet_ni *ni, lnet_nid_t nid, bool alive, bool reset, time64_t when) { struct lnet_peer_ni *lpni = NULL; + struct lnet_route *route; + struct lnet_peer *lp; time64_t now = ktime_get_seconds(); int cpt; @@ -1433,6 +1661,11 @@ lnet_notify(struct lnet_ni *ni, lnet_nid_t nid, bool alive, bool reset, cpt = lpni->lpni_cpt; lnet_net_lock(cpt); lnet_peer_ni_decref_locked(lpni); + if (lpni && lpni->lpni_peer_net && lpni->lpni_peer_net->lpn_peer) { + lp = lpni->lpni_peer_net->lpn_peer; + list_for_each_entry(route, &lp->lp_routes, lr_gwlist) + lnet_set_route_aliveness(route, alive); + } lnet_net_unlock(cpt); return 0;