Whamcloud - gitweb
LU-16836 lnet: ensure dev notification on lnd startup 57/51057/6
authorSerguei Smirnov <ssmirnov@whamcloud.com>
Fri, 19 May 2023 02:12:19 +0000 (19:12 -0700)
committerOleg Drokin <green@whamcloud.com>
Wed, 28 Jun 2023 21:48:46 +0000 (21:48 +0000)
Look up device and link state on lnd startup so that
the initial NI state may be set properly.

Reduce code duplication by adding lnet_set_link_fatal_state() and
lnet_get_link_status() functions which are shared across LNDs.
LND-specific versions of these are removed.

This fixes the issue with adding LNet NI using an interface with
cable unplugged which results in the NI state initialized as "up".

Fixes: da230373bd ("LU-16563 lnet: use discovered ni status")
Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Change-Id: I16084092cc21a4e42dfef4624adfbf57eb4fdecb
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51057
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/include/lnet/lib-lnet.h
lnet/klnds/o2iblnd/o2iblnd.c
lnet/klnds/socklnd/socklnd.c
lnet/lnet/config.c

index 0d0ddd9..e4224b0 100644 (file)
@@ -960,6 +960,8 @@ void lnet_swap_pinginfo(struct lnet_ping_buffer *pbuf);
 int lnet_ping_info_validate(struct lnet_ping_info *pinfo);
 struct lnet_ping_buffer *lnet_ping_buffer_alloc(int bytes, gfp_t gfp);
 void lnet_ping_buffer_free(struct lnet_ping_buffer *pbuf);
+int lnet_get_link_status(struct net_device *dev);
+__u32 lnet_set_link_fatal_state(struct lnet_ni *ni, unsigned int link_state);
 
 static inline void lnet_ping_buffer_addref(struct lnet_ping_buffer *pbuf)
 {
index 07b2d68..7c1c8a3 100644 (file)
@@ -2637,15 +2637,19 @@ kiblnd_set_ni_fatal_on(struct kib_hca_dev *hdev, int val)
        struct kib_net  *net;
        __u32 ni_state_before;
        bool update_ping_buf = false;
+       struct lnet_ni *ni = NULL;
 
        /* for health check */
        list_for_each_entry(net, &hdev->ibh_dev->ibd_nets, ibn_list) {
+               ni = net->ibn_ni;
                if (val)
                        CDEBUG(D_NETERROR, "Fatal device error for NI %s\n",
-                                       libcfs_nidstr(&net->ibn_ni->ni_nid));
-               ni_state_before = atomic_xchg(&net->ibn_ni->ni_fatal_error_on,
-                                             val);
-               if (!update_ping_buf && (val != ni_state_before))
+                                       libcfs_nidstr(&ni->ni_nid));
+               ni_state_before = lnet_set_link_fatal_state(ni, val);
+
+               if (!update_ping_buf &&
+                   (ni->ni_state == LNET_NI_STATE_ACTIVE) &&
+                   (val != ni_state_before))
                        update_ping_buf = true;
        }
 
@@ -2831,21 +2835,6 @@ kiblnd_dummy_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
         return 0;
 }
 
-static int kiblnd_get_link_status(struct net_device *dev)
-{
-       int ret = -1;
-
-       LASSERT(dev);
-
-       if (!netif_running(dev))
-               ret = 0;
-       /* Some devices may not be providing link settings */
-       else if (dev->ethtool_ops->get_link)
-               ret = dev->ethtool_ops->get_link(dev);
-
-       return ret;
-}
-
 static int
 kiblnd_dev_need_failover(struct kib_dev *dev, struct net *ns)
 {
@@ -3053,7 +3042,7 @@ kiblnd_dev_failover(struct kib_dev *dev, struct net *ns)
                if (set_fatal) {
                        rcu_read_lock();
                        netdev = dev_get_by_name_rcu(ns, dev->ibd_ifname);
-                       if (netdev && (kiblnd_get_link_status(netdev) == 1))
+                       if (netdev && (lnet_get_link_status(netdev) == 1))
                                kiblnd_set_ni_fatal_on(dev->ibd_hdev, 0);
                        rcu_read_unlock();
                }
@@ -3123,6 +3112,7 @@ kiblnd_handle_link_state_change(struct net_device *dev,
        bool found_ip = false;
        __u32 ni_state_before;
        bool update_ping_buf = false;
+       int state;
        DECLARE_CONST_IN_IFADDR(ifa);
 
        event_kibdev = kiblnd_dev_search(dev->name);
@@ -3138,10 +3128,7 @@ kiblnd_handle_link_state_change(struct net_device *dev,
                if (!in_dev) {
                        CDEBUG(D_NET, "Interface %s has no IPv4 status.\n",
                               dev->name);
-                       CDEBUG(D_NET, "%s: set link fatal state to 1\n",
-                              libcfs_nidstr(&net->ibn_ni->ni_nid));
-                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
-                                                     1);
+                       ni_state_before = lnet_set_link_fatal_state(ni, 1);
                        goto ni_done;
                }
                in_dev_for_each_ifa_rtnl(ifa, in_dev) {
@@ -3153,27 +3140,20 @@ kiblnd_handle_link_state_change(struct net_device *dev,
                if (!found_ip) {
                        CDEBUG(D_NET, "Interface %s has no matching ip\n",
                               dev->name);
-                       CDEBUG(D_NET, "%s: set link fatal state to 1\n",
-                              libcfs_nidstr(&net->ibn_ni->ni_nid));
-                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
-                                                     1);
+                       ni_state_before = lnet_set_link_fatal_state(ni, 1);
                        goto ni_done;
                }
 
                if (link_down) {
-                       CDEBUG(D_NET, "%s: set link fatal state to 1\n",
-                              libcfs_nidstr(&net->ibn_ni->ni_nid));
-                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
-                                                     link_down);
+                       ni_state_before = lnet_set_link_fatal_state(ni, 1);
                } else {
-                       CDEBUG(D_NET, "%s: set link fatal state to %u\n",
-                              libcfs_nidstr(&net->ibn_ni->ni_nid),
-                              (kiblnd_get_link_status(dev) == 0));
-                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
-                                                     (kiblnd_get_link_status(dev) == 0));
+                       state = (lnet_get_link_status(dev) == 0);
+                       ni_state_before = lnet_set_link_fatal_state(ni,
+                                                                   state);
                }
 ni_done:
                if (!update_ping_buf &&
+                   (ni->ni_state == LNET_NI_STATE_ACTIVE) &&
                    (atomic_read(&ni->ni_fatal_error_on) != ni_state_before))
                        update_ping_buf = true;
        }
@@ -3193,6 +3173,8 @@ kiblnd_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event)
        struct net_device *event_netdev = ifa->ifa_dev->dev;
        __u32 ni_state_before;
        bool update_ping_buf = false;
+       struct lnet_ni *ni = NULL;
+       bool link_down;
 
        event_kibdev = kiblnd_dev_search(event_netdev->name);
 
@@ -3204,12 +3186,11 @@ kiblnd_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event)
 
        list_for_each_entry_safe(net, cnxt, &event_kibdev->ibd_nets,
                                 ibn_list) {
-               CDEBUG(D_NET, "%s: set link fatal state to %u\n",
-                      libcfs_nidstr(&net->ibn_ni->ni_nid),
-                      (event == NETDEV_DOWN));
-               ni_state_before = atomic_xchg(&net->ibn_ni->ni_fatal_error_on,
-                                             (event == NETDEV_DOWN));
+               ni = net->ibn_ni;
+               link_down = (event == NETDEV_DOWN);
+               ni_state_before = lnet_set_link_fatal_state(ni, link_down);
                if (!update_ping_buf &&
+                   (ni->ni_state == LNET_NI_STATE_ACTIVE) &&
                    ((event == NETDEV_DOWN) != ni_state_before))
                        update_ping_buf = true;
        }
@@ -3580,6 +3561,7 @@ kiblnd_startup(struct lnet_ni *ni)
        int rc;
        int i;
        bool newdev;
+       struct net_device *netdev;
 
        LASSERT(ni->ni_net->net_lnd == &the_o2iblnd);
 
@@ -3691,6 +3673,16 @@ kiblnd_startup(struct lnet_ni *ni)
        /* for health check */
        if (ibdev->ibd_hdev->ibh_state == IBLND_DEV_PORT_DOWN)
                kiblnd_set_ni_fatal_on(ibdev->ibd_hdev, 1);
+
+       rcu_read_lock();
+       netdev = dev_get_by_name_rcu(ni->ni_net_ns, net->ibn_dev->ibd_ifname);
+       if (((netdev->reg_state == NETREG_UNREGISTERING) ||
+            (netdev->operstate != IF_OPER_UP)) ||
+           (lnet_get_link_status(netdev) == 0)) {
+               kiblnd_set_ni_fatal_on(ibdev->ibd_hdev, 1);
+       }
+       rcu_read_unlock();
+
        write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
 
        net->ibn_init = IBLND_INIT_ALL;
index 43b46b7..cb8e214 100644 (file)
 static const struct lnet_lnd the_ksocklnd;
 struct ksock_nal_data ksocknal_data;
 
+static int ksocknal_ip2index(struct sockaddr *addr, struct lnet_ni *ni,
+                            int *dev_status)
+{
+       struct net_device *dev;
+       int ret = -1;
+       DECLARE_CONST_IN_IFADDR(ifa);
+
+       *dev_status = -1;
+
+       if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
+               return ret;
+
+       rcu_read_lock();
+       for_each_netdev(ni->ni_net_ns, dev) {
+               int flags = dev_get_flags(dev);
+               struct in_device *in_dev;
+
+               if (flags & IFF_LOOPBACK) /* skip the loopback IF */
+                       continue;
+
+               if (!(flags & IFF_UP))
+                       continue;
+
+               switch (addr->sa_family) {
+               case AF_INET:
+                       in_dev = __in_dev_get_rcu(dev);
+                       if (!in_dev)
+                               continue;
+
+                       in_dev_for_each_ifa_rcu(ifa, in_dev) {
+                               if (ifa->ifa_local ==
+                                   ((struct sockaddr_in *)addr)->sin_addr.s_addr)
+                                       ret = dev->ifindex;
+                       }
+                       endfor_ifa(in_dev);
+                       break;
+#if IS_ENABLED(CONFIG_IPV6)
+               case AF_INET6: {
+                       struct inet6_dev *in6_dev;
+                       const struct inet6_ifaddr *ifa6;
+                       struct sockaddr_in6 *addr6 = (struct sockaddr_in6*)addr;
+
+                       in6_dev = __in6_dev_get(dev);
+                       if (!in6_dev)
+                               continue;
+
+                       list_for_each_entry_rcu(ifa6, &in6_dev->addr_list, if_list) {
+                               if (ipv6_addr_cmp(&ifa6->addr,
+                                                &addr6->sin6_addr) == 0)
+                                       ret = dev->ifindex;
+                       }
+                       break;
+                       }
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+               }
+               if (ret >= 0)
+                       break;
+       }
+
+       rcu_read_unlock();
+       if (ret >= 0)
+               *dev_status = 1;
+
+       if ((ret == -1) ||
+           ((dev->reg_state == NETREG_UNREGISTERING) ||
+            (dev->operstate != IF_OPER_UP)) ||
+           (lnet_get_link_status(dev) == 0))
+               *dev_status = 0;
+
+       return ret;
+}
+
 static struct ksock_conn_cb *
 ksocknal_create_conn_cb(struct sockaddr *addr)
 {
@@ -1838,25 +1910,6 @@ ksocknal_free_buffers (void)
        }
 }
 
-static int ksocknal_get_link_status(struct net_device *dev)
-{
-       int ret = -1;
-
-       LASSERT(dev);
-
-       if (!netif_running(dev)) {
-               ret = 0;
-               CDEBUG(D_NET, "device not running\n");
-       }
-       /* Some devices may not be providing link settings */
-       else if (dev->ethtool_ops->get_link) {
-               ret = dev->ethtool_ops->get_link(dev);
-               CDEBUG(D_NET, "get_link returns %u\n", ret);
-       }
-
-       return ret;
-}
-
 static int
 ksocknal_handle_link_state_change(struct net_device *dev,
                                  unsigned char operstate)
@@ -1872,6 +1925,7 @@ ksocknal_handle_link_state_change(struct net_device *dev,
        struct sockaddr_in *sa;
        __u32 ni_state_before;
        bool update_ping_buf = false;
+       int state;
        DECLARE_CONST_IN_IFADDR(ifa);
 
        ifindex = dev->ifindex;
@@ -1902,7 +1956,7 @@ ksocknal_handle_link_state_change(struct net_device *dev,
                        continue;
 
                if (dev->reg_state == NETREG_UNREGISTERING) {
-                       /* Device is being unregitering, we need to clear the
+                       /* Device is being unregistered, we need to clear the
                         * index, it can change when device will be back */
                        ksi->ksni_index = -1;
                        goto out;
@@ -1914,9 +1968,7 @@ ksocknal_handle_link_state_change(struct net_device *dev,
                if (!in_dev) {
                        CDEBUG(D_NET, "Interface %s has no IPv4 status.\n",
                               dev->name);
-                       CDEBUG(D_NET, "set link fatal state to 1\n");
-                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
-                                                     1);
+                       ni_state_before = lnet_set_link_fatal_state(ni, 1);
                        goto ni_done;
                }
                in_dev_for_each_ifa_rtnl(ifa, in_dev) {
@@ -1928,24 +1980,20 @@ ksocknal_handle_link_state_change(struct net_device *dev,
                if (!found_ip) {
                        CDEBUG(D_NET, "Interface %s has no matching ip\n",
                               dev->name);
-                       CDEBUG(D_NET, "set link fatal state to 1\n");
-                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
-                                                     1);
+                       ni_state_before = lnet_set_link_fatal_state(ni, 1);
                        goto ni_done;
                }
 
                if (link_down) {
-                       CDEBUG(D_NET, "set link fatal state to 1\n");
-                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
-                                                     1);
+                       ni_state_before = lnet_set_link_fatal_state(ni, 1);
                } else {
-                       CDEBUG(D_NET, "set link fatal state to %u\n",
-                              (ksocknal_get_link_status(dev) == 0));
-                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
-                                                     (ksocknal_get_link_status(dev) == 0));
+                       state = (lnet_get_link_status(dev) == 0);
+                       ni_state_before = lnet_set_link_fatal_state(ni,
+                                                                   state);
                }
 ni_done:
                if (!update_ping_buf &&
+                   (ni->ni_state == LNET_NI_STATE_ACTIVE) &&
                    (atomic_read(&ni->ni_fatal_error_on) != ni_state_before))
                        update_ping_buf = true;
        }
@@ -1960,7 +2008,7 @@ out:
 static int
 ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event)
 {
-       struct lnet_ni *ni;
+       struct lnet_ni *ni = NULL;
        struct ksock_net *net;
        struct ksock_net *cnxt;
        struct net_device *event_netdev = ifa->ifa_dev->dev;
@@ -1969,6 +2017,7 @@ ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event)
        struct sockaddr_in *sa;
        __u32 ni_state_before;
        bool update_ping_buf = false;
+       bool link_down;
 
        if (!ksocknal_data.ksnd_nnets)
                goto out;
@@ -1986,12 +2035,13 @@ ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event)
                        continue;
 
                if (sa->sin_addr.s_addr == ifa->ifa_local) {
-                       CDEBUG(D_NET, "set link fatal state to %u\n",
-                              (event == NETDEV_DOWN));
                        ni = net->ksnn_ni;
-                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
-                                                     (event == NETDEV_DOWN));
+                       link_down = (event == NETDEV_DOWN);
+                       ni_state_before = lnet_set_link_fatal_state(ni,
+                                                                   link_down);
+
                        if (!update_ping_buf &&
+                           (ni->ni_state == LNET_NI_STATE_ACTIVE) &&
                            ((event == NETDEV_DOWN) != ni_state_before))
                                update_ping_buf = true;
                }
@@ -2440,6 +2490,7 @@ ksocknal_startup(struct lnet_ni *ni)
        struct ksock_interface *ksi = NULL;
        struct lnet_inetdev *ifaces = NULL;
        int rc, if_idx;
+       int dev_status;
 
        LASSERT (ni->ni_net->net_lnd == &the_ksocklnd);
        if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) {
@@ -2504,6 +2555,12 @@ ksocknal_startup(struct lnet_ni *ni)
        if (rc != 0)
                goto out_net;
 
+       if ((ksocknal_ip2index((struct sockaddr *)&ksi->ksni_addr,
+                               ni,
+                               &dev_status) < 0) ||
+            (dev_status <= 0))
+               lnet_set_link_fatal_state(ni, 1);
+
        list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets);
        net->ksnn_ni = ni;
        ksocknal_data.ksnd_nnets++;
index 08609e3..cfa55c4 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/ctype.h>
 #include <linux/inetdevice.h>
 #include <linux/nsproxy.h>
+#include <linux/ethtool.h>
 #include <net/net_namespace.h>
 #include <lnet/lib-lnet.h>
 #include <net/addrconf.h>
@@ -1502,6 +1503,37 @@ lnet_match_networks(const char **networksp, const char *ip2nets,
        return count;
 }
 
+__u32 lnet_set_link_fatal_state(struct lnet_ni *ni, unsigned int link_state)
+{
+       CDEBUG(D_NET, "%s: set link fatal state to %u\n",
+              libcfs_nidstr(&ni->ni_nid), link_state);
+       return atomic_xchg(&ni->ni_fatal_error_on, link_state);
+}
+EXPORT_SYMBOL(lnet_set_link_fatal_state);
+
+int lnet_get_link_status(struct net_device *dev)
+{
+       int ret = -1;
+
+       if (!dev)
+               return -1;
+
+       if (!netif_running(dev)) {
+               ret = 0;
+               CDEBUG(D_NET, "device idx %d not running\n", dev->ifindex);
+       }
+       /* Some devices may not be providing link settings */
+       else if (dev->ethtool_ops->get_link) {
+               ret = dev->ethtool_ops->get_link(dev);
+               CDEBUG(D_NET, "device idx %d get_link %u\n",
+                      ret,
+                      dev->ifindex);
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL(lnet_get_link_status);
+
 int lnet_inet_enumerate(struct lnet_inetdev **dev_list, struct net *ns, bool v6)
 {
        struct lnet_inetdev *ifaces = NULL;