From 146580754295024ef433a877e54f1549cb7a88ba Mon Sep 17 00:00:00 2001 From: Amir Shehata Date: Mon, 22 Oct 2018 16:03:06 -0700 Subject: [PATCH] LU-11299 lnet: use discovery for routing Instead of re-inventing the wheel, routing now uses discovery. Every router interval the router is discovered. This will update the router information locally and will serve to let the router know that the peer is alive. Test-Parameters: forbuildonly Signed-off-by: Amir Shehata Change-Id: I211bf15af0b0a5d50f9e2a69a385419a1dd5096b Reviewed-on: https://review.whamcloud.com/33454 Reviewed-by: Olaf Weber Tested-by: Jenkins --- lnet/include/lnet/lib-lnet.h | 9 ++- lnet/include/lnet/lib-types.h | 4 ++ lnet/lnet/api-ni.c | 19 +++--- lnet/lnet/lib-move.c | 10 ++- lnet/lnet/peer.c | 40 +++++++++++- lnet/lnet/router.c | 141 ++++++++++++++++++++++++++++++++++++------ lnet/lnet/router_proc.c | 3 +- 7 files changed, 190 insertions(+), 36 deletions(-) diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index faf176a..494325e 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -567,6 +567,7 @@ extern unsigned int lnet_recovery_interval; extern unsigned int lnet_peer_discovery_disabled; extern unsigned int lnet_drop_asym_route; extern unsigned int router_sensitivity_percentage; +extern int alive_router_check_interval; extern int portal_rotor; void lnet_mt_event_handler(struct lnet_event *event); @@ -851,13 +852,16 @@ int lnet_sock_connect(struct socket **sockp, int *fatal, int lnet_peers_start_down(void); int lnet_peer_buffer_credits(struct lnet_net *net); +void lnet_consolidate_routes_locked(struct lnet_peer *orig_lp, + struct lnet_peer *new_lp); +void lnet_router_discovery_complete(struct lnet_peer *lp); int lnet_monitor_thr_start(void); void lnet_monitor_thr_stop(void); bool lnet_router_checker_active(void); void lnet_check_routers(void); -void lnet_router_post_mt_start(void); +void lnet_wait_router_start(void); void lnet_swap_pinginfo(struct lnet_ping_buffer *pbuf); int lnet_ping_info_validate(struct lnet_ping_info *pinfo); @@ -904,6 +908,8 @@ struct lnet_peer_ni *lnet_get_next_peer_ni_locked(struct lnet_peer *peer, struct lnet_peer_ni *lnet_nid2peerni_locked(lnet_nid_t nid, lnet_nid_t pref, int cpt); struct lnet_peer_ni *lnet_nid2peerni_ex(lnet_nid_t nid, int cpt); +struct lnet_peer_ni *lnet_peer_get_ni_locked(struct lnet_peer *lp, + lnet_nid_t nid); struct lnet_peer_ni *lnet_find_peer_ni_locked(lnet_nid_t nid); struct lnet_peer *lnet_find_peer(lnet_nid_t nid); void lnet_peer_net_added(struct lnet_net *net); @@ -967,6 +973,7 @@ lnet_peer_ni_is_primary(struct lnet_peer_ni *lpni) } bool lnet_peer_is_uptodate(struct lnet_peer *lp); +bool lnet_peer_gw_discovery(struct lnet_peer *lp); static inline bool lnet_peer_needs_push(struct lnet_peer *lp) diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index f6a8ab4..3cf7783 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -734,6 +734,9 @@ struct lnet_peer { #define LNET_PEER_FORCE_PING (1 << 13) /* Forced Ping */ #define LNET_PEER_FORCE_PUSH (1 << 14) /* Forced Push */ +/* gw undergoing alive discovery */ +#define LNET_PEER_RTR_DISCOVERY (1 << 16) + struct lnet_peer_net { /* chain on lp_peer_nets */ struct list_head lpn_peer_nets; @@ -792,6 +795,7 @@ struct lnet_route { struct list_head lr_list; /* chain on net */ struct list_head lr_gwlist; /* chain on gateway */ struct lnet_peer *lr_gateway; /* router node */ + lnet_nid_t lr_nid; /* NID used to add route */ __u32 lr_net; /* remote network number */ __u32 lr_lnet; /* local network number */ int lr_seq; /* sequence for round-robin */ diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 53988ec..91a128a 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -2618,29 +2618,32 @@ LNetNIInit(lnet_pid_t requested_pid) goto err_stop_ping; } - rc = lnet_monitor_thr_start(); - if (rc != 0) - goto err_stop_ping; - rc = lnet_push_target_init(); if (rc != 0) - goto err_stop_monitor_thr; + goto err_stop_ping; rc = lnet_peer_discovery_start(); if (rc != 0) goto err_destroy_push_target; + rc = lnet_monitor_thr_start(); + if (rc != 0) + goto err_stop_discovery_thr; + lnet_fault_init(); lnet_router_debugfs_init(); mutex_unlock(&the_lnet.ln_api_mutex); + /* wait for all routers to start */ + lnet_wait_router_start(); + return 0; +err_stop_discovery_thr: + lnet_peer_discovery_stop(); err_destroy_push_target: lnet_push_target_fini(); -err_stop_monitor_thr: - lnet_monitor_thr_stop(); err_stop_ping: lnet_ping_target_fini(); err_acceptor_stop: @@ -2690,9 +2693,9 @@ LNetNIFini() lnet_fault_fini(); lnet_router_debugfs_fini(); + lnet_monitor_thr_stop(); lnet_peer_discovery_stop(); lnet_push_target_fini(); - lnet_monitor_thr_stop(); lnet_ping_target_fini(); /* Teardown fns that use my own API functions BEFORE here */ diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index f3f47ff..a079c86 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -1941,6 +1941,13 @@ lnet_initiate_peer_discovery(struct lnet_peer_ni *lpni, lnet_peer_ni_addref_locked(lpni); + peer = lpni->lpni_peer_net->lpn_peer; + + if (lnet_peer_gw_discovery(peer)) { + lnet_peer_ni_decref_locked(lpni); + return 0; + } + rc = lnet_discover_peer_locked(lpni, cpt, false); if (rc) { lnet_peer_ni_decref_locked(lpni); @@ -3631,9 +3638,6 @@ int lnet_monitor_thr_start(void) goto clean_thread; } - /* post monitor thread start processing */ - lnet_router_post_mt_start(); - return 0; clean_thread: diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index e1284ae..18367d5 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -663,6 +663,24 @@ lnet_find_peer_ni_locked(lnet_nid_t nid) return lpni; } +struct lnet_peer_ni * +lnet_peer_get_ni_locked(struct lnet_peer *lp, lnet_nid_t nid) +{ + struct lnet_peer_net *lpn; + struct lnet_peer_ni *lpni; + + lpn = lnet_peer_get_net_locked(lp, LNET_NIDNET(nid)); + if (!lpn) + return NULL; + + list_for_each_entry(lpni, &lpn->lpn_peer_nis, lpni_peer_nis) { + if (lpni->lpni_nid == nid) + return lpni; + } + + return NULL; +} + struct lnet_peer * lnet_find_peer(lnet_nid_t nid) { @@ -1716,6 +1734,19 @@ out_mutex_unlock: * Peer Discovery */ +bool +lnet_peer_gw_discovery(struct lnet_peer *lp) +{ + bool rc = false; + + spin_lock(&lp->lp_lock); + if (lp->lp_state & LNET_PEER_RTR_DISCOVERY) + rc = true; + spin_unlock(&lp->lp_lock); + + return rc; +} + /* * Is a peer uptodate from the point of view of discovery? * @@ -1805,6 +1836,9 @@ static void lnet_peer_discovery_complete(struct lnet_peer *lp) spin_unlock(&lp->lp_lock); wake_up_all(&lp->lp_dc_waitq); + if (lp->lp_rtr_refcount > 0) + lnet_router_discovery_complete(lp); + lnet_net_unlock(LNET_LOCK_EX); /* iterate through all pending messages and send them again */ @@ -2693,8 +2727,10 @@ __must_hold(&lp->lp_lock) rc = lnet_peer_merge_data(lp, pbuf); } } else { - rc = lnet_peer_set_primary_data( - lpni->lpni_peer_net->lpn_peer, pbuf); + struct lnet_peer *new_lp; + new_lp = lpni->lpni_peer_net->lpn_peer; + rc = lnet_peer_set_primary_data(new_lp, pbuf); + lnet_consolidate_routes_locked(lp, new_lp); lnet_peer_ni_decref_locked(lpni); } } diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index 832b2bf..9ac6de1 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -78,13 +78,9 @@ int avoid_asym_router_failure = 1; module_param(avoid_asym_router_failure, int, 0644); MODULE_PARM_DESC(avoid_asym_router_failure, "Avoid asymmetrical router failures (0 to disable)"); -static int dead_router_check_interval = 60; -module_param(dead_router_check_interval, int, 0644); -MODULE_PARM_DESC(dead_router_check_interval, "Seconds between dead router health checks (<= 0 to disable)"); - -static int live_router_check_interval = 60; -module_param(live_router_check_interval, int, 0644); -MODULE_PARM_DESC(live_router_check_interval, "Seconds between live router health checks (<= 0 to disable)"); +int alive_router_check_interval = 60; +module_param(alive_router_check_interval, int, 0644); +MODULE_PARM_DESC(alive_router_check_interval, "Seconds between live router health checks (<= 0 to disable)"); static int router_ping_timeout = 50; module_param(router_ping_timeout, int, 0644); @@ -230,6 +226,65 @@ bool lnet_is_route_alive(struct lnet_route *route) return route_alive; } +void +lnet_consolidate_routes_locked(struct lnet_peer *orig_lp, + struct lnet_peer *new_lp) +{ + struct lnet_peer_ni *lpni; + struct lnet_route *route; + + /* + * Although a route is correlated with a peer, but when it's added + * a specific NID is used. That NID refers to a peer_ni within + * a peer. There could be other peer_nis on the same net, which + * can be used to send to that gateway. However when we are + * consolidating gateways because of discovery, the nid used to + * add the route might've moved between gateway peers. In this + * case we want to move the route to the new gateway as well. The + * intent here is not to confuse the user who added the route. + */ + list_for_each_entry(route, &orig_lp->lp_routes, lr_gwlist) { + lpni = lnet_peer_get_ni_locked(orig_lp, route->lr_nid); + if (!lpni) { + lnet_net_lock(LNET_LOCK_EX); + list_move(&route->lr_gwlist, &new_lp->lp_routes); + lnet_net_unlock(LNET_LOCK_EX); + } + } + +} + +void +lnet_router_discovery_complete(struct lnet_peer *lp) +{ + struct lnet_peer_ni *lpni = NULL; + + spin_lock(&lp->lp_lock); + lp->lp_state &= ~LNET_PEER_RTR_DISCOVERY; + spin_unlock(&lp->lp_lock); + + /* + * Router discovery successful? All peer information would've been + * updated already. No need to do any more processing + */ + if (!lp->lp_dc_error) + return; + /* + * discovery failed? then we need to set the status of each lpni + * to DOWN. It will be updated the next time we discover the + * router. For router peer NIs not on local networks, we never send + * messages directly to them, so their health will always remain + * at maximum. We can only tell if they are up or down from the + * status returned in the PING response. If we fail to get that + * status in our scheduled router discovery, then we'll assume + * it's down until we're told otherwise. + */ + CDEBUG(D_NET, "%s: Router discovery failed %d\n", + libcfs_nid2str(lp->lp_primary_nid), lp->lp_dc_error); + while ((lpni = lnet_get_next_peer_ni_locked(lp, NULL, lpni)) != NULL) + lpni->lpni_ns_status = LNET_NI_STATUS_DOWN; +} + static void lnet_rtr_addref_locked(struct lnet_peer *lp) { @@ -385,6 +440,7 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway, /* store the local and remote net that the route represents */ route->lr_lnet = LNET_NIDNET(gateway); route->lr_net = net; + route->lr_nid = gateway; route->lr_priority = priority; route->lr_hops = hops; @@ -636,9 +692,9 @@ lnet_get_route(int idx, __u32 *net, __u32 *hops, if (idx-- == 0) { *net = rnet->lrn_net; + *gateway = route->lr_nid; *hops = route->lr_hops; *priority = route->lr_priority; - *gateway = route->lr_gateway->lp_primary_nid; *alive = lnet_is_route_alive(route); lnet_net_unlock(cpt); return 0; @@ -697,8 +753,7 @@ lnet_update_ni_status_locked(void) LASSERT(the_lnet.ln_routing); - timeout = router_ping_timeout + - MAX(live_router_check_interval, dead_router_check_interval); + timeout = router_ping_timeout + alive_router_check_interval; now = ktime_get_real_seconds(); while ((ni = lnet_get_next_ni_locked(NULL, ni))) { @@ -728,7 +783,7 @@ lnet_update_ni_status_locked(void) } } -void lnet_router_post_mt_start(void) +void lnet_wait_router_start(void) { if (check_routers_before_use) { /* Note that a helpful side-effect of pinging all known routers @@ -745,26 +800,25 @@ void lnet_router_post_mt_start(void) inline bool lnet_router_checker_active(void) { - if (the_lnet.ln_mt_state != LNET_MT_STATE_RUNNING) - return true; - /* Router Checker thread needs to run when routing is enabled in * order to call lnet_update_ni_status_locked() */ if (the_lnet.ln_routing) return true; return !list_empty(&the_lnet.ln_routers) && - (live_router_check_interval > 0 || - dead_router_check_interval > 0); + alive_router_check_interval > 0; } void lnet_check_routers(void) { - struct lnet_peer *rtr; + struct lnet_peer_ni *lpni; struct list_head *entry; - __u64 version; - int cpt; + struct lnet_peer *rtr; + __u64 version; + time64_t now; + int cpt; + int rc; cpt = lnet_net_lock_current(); rescan: @@ -774,7 +828,54 @@ rescan: rtr = list_entry(entry, struct lnet_peer, lp_rtr_list); - /* TODO use discovery to determine if router is alive */ + now = ktime_get_real_seconds(); + + /* + * only discover the router if we've passed + * alive_router_check_interval seconds. Some of the router + * interfaces could be down and in that case they would be + * undergoing recovery separately from this discovery. + */ + if (now - rtr->lp_rtrcheck_timestamp < + alive_router_check_interval) + continue; + + /* + * If we're currently discovering the peer then don't + * issue another discovery + */ + spin_lock(&rtr->lp_lock); + if (rtr->lp_state & LNET_PEER_RTR_DISCOVERY) { + spin_unlock(&rtr->lp_lock); + continue; + } + /* make sure we actively discover the router */ + rtr->lp_state &= ~LNET_PEER_NIDS_UPTODATE; + rtr->lp_state |= LNET_PEER_RTR_DISCOVERY; + spin_unlock(&rtr->lp_lock); + + /* find the peer_ni associated with the primary NID */ + lpni = lnet_peer_get_ni_locked(rtr, rtr->lp_primary_nid); + if (!lpni) { + CDEBUG(D_NET, "Expected to find an lpni for %s, but non found\n", + libcfs_nid2str(rtr->lp_primary_nid)); + continue; + } + lnet_peer_ni_addref_locked(lpni); + + /* discover the router */ + CDEBUG(D_NET, "discover %s, cpt = %d\n", + libcfs_nid2str(lpni->lpni_nid), cpt); + rc = lnet_discover_peer_locked(lpni, cpt, false); + + /* decrement ref count acquired by find_peer_ni_locked() */ + lnet_peer_ni_decref_locked(lpni); + + if (!rc) + rtr->lp_rtrcheck_timestamp = now; + else + CERROR("Failed to discover router %s\n", + libcfs_nid2str(rtr->lp_primary_nid)); /* NB dropped lock */ if (version != the_lnet.ln_routers_version) { diff --git a/lnet/lnet/router_proc.c b/lnet/lnet/router_proc.c index 5627e40..8d6b040 100644 --- a/lnet/lnet/router_proc.c +++ b/lnet/lnet/router_proc.c @@ -234,8 +234,7 @@ proc_lnet_routes(struct ctl_table *table, int write, void __user *buffer, libcfs_net2str(net), hops, priority, alive ? "up" : "down", - /* TODO: replace with actual nid */ - libcfs_nid2str(LNET_NID_ANY)); + libcfs_nid2str(route->lr_nid)); LASSERT(tmpstr + tmpsiz - s > 0); } -- 1.8.3.1