From da230373bd14306cb97fb48748ebce205f09d468 Mon Sep 17 00:00:00 2001 From: Serguei Smirnov Date: Thu, 16 Feb 2023 10:34:03 -0800 Subject: [PATCH] LU-16563 lnet: use discovered ni status to set initial health If not routing, track local NI status in the ping buffer such that locally recognized "down" state, for example, due to a downed network interface/link, is available to any discovering peer. If NI 'fatal' status is changed, push update to peers. On the active side of discovery, check peer NI status so if NI is down, decrement its health score and queue for recovery. Test-Parameters: trivial testlist=sanity-lnet Signed-off-by: Serguei Smirnov Change-Id: I513c7942099c0da9088fa6d4460f76386ea91d3b Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50027 Reviewed-by: Chris Horn Reviewed-by: Cyril Bordage Reviewed-by: Frank Sehr Reviewed-by: Oleg Drokin Tested-by: jenkins Tested-by: Maloo --- lnet/include/lnet/lib-lnet.h | 3 ++- lnet/klnds/o2iblnd/o2iblnd.c | 49 ++++++++++++++++++++++++++++++++++---------- lnet/klnds/socklnd/socklnd.c | 38 ++++++++++++++++++++++++++-------- lnet/lnet/api-ni.c | 20 ++++++++++++++++++ lnet/lnet/peer.c | 14 +++++++++++++ 5 files changed, 103 insertions(+), 21 deletions(-) diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 218f496..c8b8d4d 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -251,7 +251,7 @@ __must_hold(&ni->ni_lock) return LNET_NI_STATUS_UP; else if (atomic_read(&ni->ni_fatal_error_on)) return LNET_NI_STATUS_DOWN; - else if (ni->ni_status) + else if (the_lnet.ln_routing && ni->ni_status) return *ni->ni_status; else return LNET_NI_STATUS_UP; @@ -1295,4 +1295,5 @@ lnet_set_route_aliveness(struct lnet_route *route, bool alive) old ? "up" : "down", alive ? "up" : "down"); } +void lnet_update_ping_buffer(void); #endif diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index d943cd1..07b2d68 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -2635,14 +2635,22 @@ static inline void kiblnd_set_ni_fatal_on(struct kib_hca_dev *hdev, int val) { struct kib_net *net; + __u32 ni_state_before; + bool update_ping_buf = false; /* for health check */ list_for_each_entry(net, &hdev->ibh_dev->ibd_nets, ibn_list) { if (val) CDEBUG(D_NETERROR, "Fatal device error for NI %s\n", libcfs_nidstr(&net->ibn_ni->ni_nid)); - atomic_set(&net->ibn_ni->ni_fatal_error_on, val); + ni_state_before = atomic_xchg(&net->ibn_ni->ni_fatal_error_on, + val); + if (!update_ping_buf && (val != ni_state_before)) + update_ping_buf = true; } + + if (update_ping_buf) + lnet_update_ping_buffer(); } void @@ -3113,6 +3121,8 @@ kiblnd_handle_link_state_change(struct net_device *dev, bool link_down = !(operstate == IF_OPER_UP); struct in_device *in_dev; bool found_ip = false; + __u32 ni_state_before; + bool update_ping_buf = false; DECLARE_CONST_IN_IFADDR(ifa); event_kibdev = kiblnd_dev_search(dev->name); @@ -3122,7 +3132,6 @@ kiblnd_handle_link_state_change(struct net_device *dev, list_for_each_entry_safe(net, cnxt, &event_kibdev->ibd_nets, ibn_list) { found_ip = false; - ni = net->ibn_ni; in_dev = __in_dev_get_rtnl(dev); @@ -3131,8 +3140,9 @@ kiblnd_handle_link_state_change(struct net_device *dev, dev->name); CDEBUG(D_NET, "%s: set link fatal state to 1\n", libcfs_nidstr(&net->ibn_ni->ni_nid)); - atomic_set(&ni->ni_fatal_error_on, 1); - continue; + ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, + 1); + goto ni_done; } in_dev_for_each_ifa_rtnl(ifa, in_dev) { if (htonl(event_kibdev->ibd_ifip) == ifa->ifa_local) @@ -3145,22 +3155,31 @@ kiblnd_handle_link_state_change(struct net_device *dev, dev->name); CDEBUG(D_NET, "%s: set link fatal state to 1\n", libcfs_nidstr(&net->ibn_ni->ni_nid)); - atomic_set(&ni->ni_fatal_error_on, 1); - continue; + ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, + 1); + goto ni_done; } if (link_down) { CDEBUG(D_NET, "%s: set link fatal state to 1\n", libcfs_nidstr(&net->ibn_ni->ni_nid)); - atomic_set(&ni->ni_fatal_error_on, link_down); + ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, + link_down); } else { CDEBUG(D_NET, "%s: set link fatal state to %u\n", libcfs_nidstr(&net->ibn_ni->ni_nid), (kiblnd_get_link_status(dev) == 0)); - atomic_set(&ni->ni_fatal_error_on, - (kiblnd_get_link_status(dev) == 0)); + ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, + (kiblnd_get_link_status(dev) == 0)); } +ni_done: + if (!update_ping_buf && + (atomic_read(&ni->ni_fatal_error_on) != ni_state_before)) + update_ping_buf = true; } + + if (update_ping_buf) + lnet_update_ping_buffer(); out: return 0; } @@ -3172,6 +3191,8 @@ kiblnd_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event) struct kib_net *net; struct kib_net *cnxt; struct net_device *event_netdev = ifa->ifa_dev->dev; + __u32 ni_state_before; + bool update_ping_buf = false; event_kibdev = kiblnd_dev_search(event_netdev->name); @@ -3186,9 +3207,15 @@ kiblnd_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event) CDEBUG(D_NET, "%s: set link fatal state to %u\n", libcfs_nidstr(&net->ibn_ni->ni_nid), (event == NETDEV_DOWN)); - atomic_set(&net->ibn_ni->ni_fatal_error_on, - (event == NETDEV_DOWN)); + ni_state_before = atomic_xchg(&net->ibn_ni->ni_fatal_error_on, + (event == NETDEV_DOWN)); + if (!update_ping_buf && + ((event == NETDEV_DOWN) != ni_state_before)) + update_ping_buf = true; } + + if (update_ping_buf) + lnet_update_ping_buffer(); out: return 0; } diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 215d3de..5917654 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -1983,6 +1983,8 @@ ksocknal_handle_link_state_change(struct net_device *dev, bool found_ip = false; struct ksock_interface *ksi = NULL; struct sockaddr_in *sa; + __u32 ni_state_before; + bool update_ping_buf = false; DECLARE_CONST_IN_IFADDR(ifa); ifindex = dev->ifindex; @@ -2026,8 +2028,9 @@ ksocknal_handle_link_state_change(struct net_device *dev, CDEBUG(D_NET, "Interface %s has no IPv4 status.\n", dev->name); CDEBUG(D_NET, "set link fatal state to 1\n"); - atomic_set(&ni->ni_fatal_error_on, 1); - continue; + ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, + 1); + goto ni_done; } in_dev_for_each_ifa_rtnl(ifa, in_dev) { if (sa->sin_addr.s_addr == ifa->ifa_local) @@ -2039,20 +2042,29 @@ ksocknal_handle_link_state_change(struct net_device *dev, CDEBUG(D_NET, "Interface %s has no matching ip\n", dev->name); CDEBUG(D_NET, "set link fatal state to 1\n"); - atomic_set(&ni->ni_fatal_error_on, 1); - continue; + ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, + 1); + goto ni_done; } if (link_down) { CDEBUG(D_NET, "set link fatal state to 1\n"); - atomic_set(&ni->ni_fatal_error_on, link_down); + ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, + 1); } else { CDEBUG(D_NET, "set link fatal state to %u\n", (ksocknal_get_link_status(dev) == 0)); - atomic_set(&ni->ni_fatal_error_on, - (ksocknal_get_link_status(dev) == 0)); + ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, + (ksocknal_get_link_status(dev) == 0)); } +ni_done: + if (!update_ping_buf && + (atomic_read(&ni->ni_fatal_error_on) != ni_state_before)) + update_ping_buf = true; } + + if (update_ping_buf) + lnet_update_ping_buffer(); out: return 0; } @@ -2068,6 +2080,8 @@ ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event) int ifindex; struct ksock_interface *ksi = NULL; struct sockaddr_in *sa; + __u32 ni_state_before; + bool update_ping_buf = false; if (!ksocknal_data.ksnd_nnets) goto out; @@ -2088,10 +2102,16 @@ ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event) CDEBUG(D_NET, "set link fatal state to %u\n", (event == NETDEV_DOWN)); ni = net->ksnn_ni; - atomic_set(&ni->ni_fatal_error_on, - (event == NETDEV_DOWN)); + ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, + (event == NETDEV_DOWN)); + if (!update_ping_buf && + ((event == NETDEV_DOWN) != ni_state_before)) + update_ping_buf = true; } } + + if (update_ping_buf) + lnet_update_ping_buffer(); out: return 0; } diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index c0f9293..da8e767 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -3925,6 +3925,26 @@ out: return rc; } +void lnet_update_ping_buffer(void) +{ + struct lnet_ping_buffer *pbuf; + struct lnet_handle_md ping_mdh; + + if (the_lnet.ln_routing) + return; + + mutex_lock(&the_lnet.ln_api_mutex); + + if (!lnet_ping_target_setup(&pbuf, &ping_mdh, + LNET_PING_INFO_HDR_SIZE + + lnet_get_ni_bytes(), + false)) + lnet_ping_target_update(pbuf, ping_mdh); + + mutex_unlock(&the_lnet.ln_api_mutex); +} +EXPORT_SYMBOL(lnet_update_ping_buffer); + void lnet_incr_dlc_seq(void) { atomic_inc(&lnet_dlc_seq_no); diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 168507e..5e617d8 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -3095,6 +3095,15 @@ int ping_info_count_entries(struct lnet_ping_buffer *pbuf) return nnis; } +static inline void handle_disc_lpni_health(struct lnet_peer_ni *lpni) +{ + if (lpni->lpni_ns_status == LNET_NI_STATUS_DOWN) + lnet_handle_remote_failure_locked(lpni); + else if (lpni->lpni_ns_status == LNET_NI_STATUS_UP && + !lpni->lpni_last_alive) + atomic_set(&lpni->lpni_healthv, LNET_MAX_HEALTH_VALUE); +} + /* * Build a peer from incoming data. * @@ -3134,6 +3143,7 @@ static int lnet_peer_merge_data(struct lnet_peer *lp, int i; int j; int rc; + __u32 old_st; flags = LNET_PEER_DISCOVERED; if (pbuf->pb_info.pi_features & LNET_PING_FEAT_MULTI_RAIL) @@ -3212,7 +3222,10 @@ static int lnet_peer_merge_data(struct lnet_peer *lp, */ lpni = lnet_peer_ni_find_locked(&curnis[i]); if (lpni) { + old_st = lpni->lpni_ns_status; lpni->lpni_ns_status = *stp; + if (old_st != lpni->lpni_ns_status) + handle_disc_lpni_health(lpni); lnet_peer_ni_decref_locked(lpni); } break; @@ -3243,6 +3256,7 @@ static int lnet_peer_merge_data(struct lnet_peer *lp, lpni = lnet_peer_ni_find_locked(&addnis[i].ns_nid); if (lpni) { lpni->lpni_ns_status = addnis[i].ns_status; + handle_disc_lpni_health(lpni); lnet_peer_ni_decref_locked(lpni); } } -- 1.8.3.1