Whamcloud - gitweb
LU-16563 lnet: use discovered ni status to set initial health
authorSerguei Smirnov <ssmirnov@whamcloud.com>
Thu, 16 Feb 2023 18:34:03 +0000 (10:34 -0800)
committerAndreas Dilger <adilger@whamcloud.com>
Tue, 25 Apr 2023 04:01:46 +0000 (04:01 +0000)
If not routing, track local NI status in the ping buffer
such that locally recognized "down" state, for example,
due to a downed network interface/link, is available
to any discovering peer.

On the active side of discovery, check peer NI status so if NI
is down, decrement its health score and queue for recovery.

Lustre-change: https://review.whamcloud.com/50027/
Lustre-commit: da230373bd14306cb97fb48748ebce205f09d468

Test-Parameters: trivial testlist=sanity-lnet
Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Change-Id: I513c7942099c0da9088fa6d4460f76386ea91d3b
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/50040
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lnet/include/lnet/lib-lnet.h
lnet/klnds/o2iblnd/o2iblnd.c
lnet/klnds/socklnd/socklnd.c
lnet/lnet/api-ni.c
lnet/lnet/peer.c

index a653170..f2bf44d 100644 (file)
@@ -251,7 +251,7 @@ __must_hold(&ni->ni_lock)
                return LNET_NI_STATUS_UP;
        else if (atomic_read(&ni->ni_fatal_error_on))
                return LNET_NI_STATUS_DOWN;
-       else if (ni->ni_status)
+       else if (the_lnet.ln_routing && ni->ni_status)
                return ni->ni_status->ns_status;
        else
                return LNET_NI_STATUS_UP;
@@ -1086,4 +1086,5 @@ lnet_set_route_aliveness(struct lnet_route *route, bool alive)
                       old ? "up" : "down",
                       alive ? "up" : "down");
 }
+void lnet_update_ping_buffer(void);
 #endif
index 93d8d35..9dfb0ad 100644 (file)
@@ -2553,14 +2553,22 @@ static inline void
 kiblnd_set_ni_fatal_on(struct kib_hca_dev *hdev, int val)
 {
        struct kib_net  *net;
+       __u32 ni_state_before;
+       bool update_ping_buf = false;
 
        /* for health check */
        list_for_each_entry(net, &hdev->ibh_dev->ibd_nets, ibn_list) {
                if (val)
                        CDEBUG(D_NETERROR, "Fatal device error for NI %s\n",
                                        libcfs_nid2str(net->ibn_ni->ni_nid));
-               atomic_set(&net->ibn_ni->ni_fatal_error_on, val);
+               ni_state_before = atomic_xchg(&net->ibn_ni->ni_fatal_error_on,
+                                             val);
+               if (!update_ping_buf && (val != ni_state_before))
+                       update_ping_buf = true;
        }
+
+       if (update_ping_buf)
+               lnet_update_ping_buffer();
 }
 
 void
@@ -3039,6 +3047,8 @@ kiblnd_handle_link_state_change(struct net_device *dev,
        bool link_down = !(operstate == IF_OPER_UP);
        struct in_device *in_dev;
        bool found_ip = false;
+       __u32 ni_state_before;
+       bool update_ping_buf = false;
        DECLARE_CONST_IN_IFADDR(ifa);
 
        event_kibdev = kiblnd_dev_search(dev->name);
@@ -3048,7 +3058,6 @@ kiblnd_handle_link_state_change(struct net_device *dev,
 
        list_for_each_entry_safe(net, cnxt, &event_kibdev->ibd_nets, ibn_list) {
                found_ip = false;
-
                ni = net->ibn_ni;
 
                in_dev = __in_dev_get_rtnl(dev);
@@ -3057,8 +3066,9 @@ kiblnd_handle_link_state_change(struct net_device *dev,
                               dev->name);
                        CDEBUG(D_NET, "%s: set link fatal state to 1\n",
                               libcfs_nid2str(net->ibn_ni->ni_nid));
-                       atomic_set(&ni->ni_fatal_error_on, 1);
-                       continue;
+                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+                                                     1);
+                       goto ni_done;
                }
                in_dev_for_each_ifa_rtnl(ifa, in_dev) {
                        if (htonl(event_kibdev->ibd_ifip) == ifa->ifa_local)
@@ -3071,22 +3081,31 @@ kiblnd_handle_link_state_change(struct net_device *dev,
                               dev->name);
                        CDEBUG(D_NET, "%s: set link fatal state to 1\n",
                               libcfs_nid2str(net->ibn_ni->ni_nid));
-                       atomic_set(&ni->ni_fatal_error_on, 1);
-                       continue;
+                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+                                                     1);
+                       goto ni_done;
                }
 
                if (link_down) {
                        CDEBUG(D_NET, "%s: set link fatal state to 1\n",
                               libcfs_nid2str(net->ibn_ni->ni_nid));
-                       atomic_set(&ni->ni_fatal_error_on, link_down);
+                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+                                                     link_down);
                } else {
                        CDEBUG(D_NET, "%s: set link fatal state to %u\n",
                               libcfs_nid2str(net->ibn_ni->ni_nid),
                               (kiblnd_get_link_status(dev) == 0));
-                       atomic_set(&ni->ni_fatal_error_on,
-                                  (kiblnd_get_link_status(dev) == 0));
+                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+                                                     (kiblnd_get_link_status(dev) == 0));
                }
+ni_done:
+               if (!update_ping_buf &&
+                   (atomic_read(&ni->ni_fatal_error_on) != ni_state_before))
+                       update_ping_buf = true;
        }
+
+       if (update_ping_buf)
+               lnet_update_ping_buffer();
 out:
        return 0;
 }
@@ -3098,6 +3117,8 @@ kiblnd_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event)
        struct kib_net *net;
        struct kib_net *cnxt;
        struct net_device *event_netdev = ifa->ifa_dev->dev;
+       __u32 ni_state_before;
+       bool update_ping_buf = false;
 
        event_kibdev = kiblnd_dev_search(event_netdev->name);
 
@@ -3112,9 +3133,15 @@ kiblnd_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event)
                CDEBUG(D_NET, "%s: set link fatal state to %u\n",
                       libcfs_nid2str(net->ibn_ni->ni_nid),
                       (event == NETDEV_DOWN));
-               atomic_set(&net->ibn_ni->ni_fatal_error_on,
-                          (event == NETDEV_DOWN));
+               ni_state_before = atomic_xchg(&net->ibn_ni->ni_fatal_error_on,
+                                             (event == NETDEV_DOWN));
+               if (!update_ping_buf &&
+                   ((event == NETDEV_DOWN) != ni_state_before))
+                       update_ping_buf = true;
        }
+
+       if (update_ping_buf)
+               lnet_update_ping_buffer();
 out:
        return 0;
 }
index 2bca5b5..50098bb 100644 (file)
@@ -1937,6 +1937,8 @@ ksocknal_handle_link_state_change(struct net_device *dev,
        struct in_device *in_dev;
        bool found_ip = false;
        struct ksock_interface *ksi = NULL;
+       __u32 ni_state_before;
+       bool update_ping_buf = false;
        DECLARE_CONST_IN_IFADDR(ifa);
 
        ifindex = dev->ifindex;
@@ -1979,8 +1981,9 @@ ksocknal_handle_link_state_change(struct net_device *dev,
                        CDEBUG(D_NET, "Interface %s has no IPv4 status.\n",
                               dev->name);
                        CDEBUG(D_NET, "set link fatal state to 1\n");
-                       atomic_set(&ni->ni_fatal_error_on, 1);
-                       continue;
+                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+                                                     1);
+                       goto ni_done;
                }
                in_dev_for_each_ifa_rtnl(ifa, in_dev) {
                        if (ksi->ksni_ipaddr == ntohl(ifa->ifa_local))
@@ -1992,20 +1995,29 @@ ksocknal_handle_link_state_change(struct net_device *dev,
                        CDEBUG(D_NET, "Interface %s has no matching ip\n",
                               dev->name);
                        CDEBUG(D_NET, "set link fatal state to 1\n");
-                       atomic_set(&ni->ni_fatal_error_on, 1);
-                       continue;
+                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+                                                     1);
+                       goto ni_done;
                }
 
                if (link_down) {
                        CDEBUG(D_NET, "set link fatal state to 1\n");
-                       atomic_set(&ni->ni_fatal_error_on, link_down);
+                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+                                                     1);
                } else {
                        CDEBUG(D_NET, "set link fatal state to %u\n",
                               (ksocknal_get_link_status(dev) == 0));
-                       atomic_set(&ni->ni_fatal_error_on,
-                                  (ksocknal_get_link_status(dev) == 0));
+                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+                                                     (ksocknal_get_link_status(dev) == 0));
                }
+ni_done:
+               if (!update_ping_buf &&
+                   (atomic_read(&ni->ni_fatal_error_on) != ni_state_before))
+                       update_ping_buf = true;
        }
+
+       if (update_ping_buf)
+               lnet_update_ping_buffer();
 out:
        return 0;
 }
@@ -2020,6 +2032,8 @@ ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event)
        struct net_device *event_netdev = ifa->ifa_dev->dev;
        int ifindex;
        struct ksock_interface *ksi = NULL;
+       __u32 ni_state_before;
+       bool update_ping_buf = false;
 
        if (!ksocknal_data.ksnd_nnets)
                goto out;
@@ -2039,10 +2053,16 @@ ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event)
                        CDEBUG(D_NET, "set link fatal state to %u\n",
                               (event == NETDEV_DOWN));
                        ni = net->ksnn_ni;
-                       atomic_set(&ni->ni_fatal_error_on,
-                                  (event == NETDEV_DOWN));
+                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+                                                     (event == NETDEV_DOWN));
+                       if (!update_ping_buf &&
+                           ((event == NETDEV_DOWN) != ni_state_before))
+                               update_ping_buf = true;
                }
        }
+
+       if (update_ping_buf)
+               lnet_update_ping_buffer();
 out:
        return 0;
 }
index 2ba6e5e..6431b43 100644 (file)
@@ -1818,7 +1818,14 @@ lnet_ping_target_install_locked(struct lnet_ping_buffer *pbuf)
                        ns->ns_nid = ni->ni_nid;
 
                        lnet_ni_lock(ni);
-                       ns->ns_status = lnet_ni_get_status_locked(ni);
+                       if (the_lnet.ln_routing) {
+                               ns->ns_status = lnet_ni_get_status_locked(ni);
+                       } else {
+                               if (atomic_read(&ni->ni_fatal_error_on))
+                                       ns->ns_status = LNET_NI_STATUS_DOWN;
+                               else
+                                       ns->ns_status = LNET_NI_STATUS_UP;
+                       }
                        ni->ni_status = ns;
                        lnet_ni_unlock(ni);
 
@@ -3521,6 +3528,25 @@ out:
        return rc;
 }
 
+void lnet_update_ping_buffer(void)
+{
+       struct lnet_ping_buffer *pbuf;
+       struct lnet_handle_md ping_mdh;
+
+       if (the_lnet.ln_routing)
+               return;
+
+       mutex_lock(&the_lnet.ln_api_mutex);
+
+       if (!lnet_ping_target_setup(&pbuf, &ping_mdh,
+                                   lnet_get_ni_count(),
+                                   false))
+               lnet_ping_target_update(pbuf, ping_mdh);
+
+       mutex_unlock(&the_lnet.ln_api_mutex);
+}
+EXPORT_SYMBOL(lnet_update_ping_buffer);
+
 void lnet_incr_dlc_seq(void)
 {
        atomic_inc(&lnet_dlc_seq_no);
index 0a49691..30f3aa2 100644 (file)
@@ -2887,6 +2887,15 @@ static void lnet_discovery_event_handler(struct lnet_event *event)
        lnet_net_unlock(LNET_LOCK_EX);
 }
 
+static inline void handle_disc_lpni_health(struct lnet_peer_ni *lpni)
+{
+       if (lpni->lpni_ns_status == LNET_NI_STATUS_DOWN)
+               lnet_handle_remote_failure_locked(lpni);
+       else if (lpni->lpni_ns_status == LNET_NI_STATUS_UP &&
+                !lpni->lpni_last_alive)
+               atomic_set(&lpni->lpni_healthv, LNET_MAX_HEALTH_VALUE);
+}
+
 /*
  * Build a peer from incoming data.
  *
@@ -2921,6 +2930,7 @@ static int lnet_peer_merge_data(struct lnet_peer *lp,
        int i;
        int j;
        int rc;
+       __u32 old_st;
 
        flags = LNET_PEER_DISCOVERED;
        if (pbuf->pb_info.pi_features & LNET_PING_FEAT_MULTI_RAIL)
@@ -2984,7 +2994,10 @@ static int lnet_peer_merge_data(struct lnet_peer *lp,
                                 */
                                lpni = lnet_find_peer_ni_locked(curnis[i]);
                                if (lpni) {
+                                       old_st = lpni->lpni_ns_status;
                                        lpni->lpni_ns_status = pbuf->pb_info.pi_ni[j].ns_status;
+                                       if (old_st != lpni->lpni_ns_status)
+                                               handle_disc_lpni_health(lpni);
                                        lnet_peer_ni_decref_locked(lpni);
                                }
                                break;
@@ -3015,6 +3028,7 @@ static int lnet_peer_merge_data(struct lnet_peer *lp,
                lpni = lnet_find_peer_ni_locked(addnis[i].ns_nid);
                if (lpni) {
                        lpni->lpni_ns_status = addnis[i].ns_status;
+                       handle_disc_lpni_health(lpni);
                        lnet_peer_ni_decref_locked(lpni);
                }
        }