Whamcloud - gitweb
LU-16836 lnet: ensure dev notification on lnd startup
authorSerguei Smirnov <ssmirnov@whamcloud.com>
Fri, 19 May 2023 02:12:19 +0000 (19:12 -0700)
committerAndreas Dilger <adilger@whamcloud.com>
Sat, 1 Jul 2023 10:03:51 +0000 (10:03 +0000)
Look up device and link state on lnd startup so that
the initial NI state may be set properly.

Reduce code duplication by adding lnet_set_link_fatal_state() and
lnet_get_link_status() functions which are shared across LNDs.
LND-specific versions of these are removed.

This fixes the issue with adding LNet NI using an interface with
cable unplugged which results in the NI state initialized as "up".

Lustre-change: https://review.whamcloud.com/51057
Lustre-commit: 09c6e2b872287c847d15620788f6cf50b3a9f30b

Fixes: c4df48116d ("LU-16563 lnet: use discovered ni status")
Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Change-Id: I16084092cc21a4e42dfef4624adfbf57eb4fdecb
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/51310
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lnet/include/lnet/lib-lnet.h
lnet/klnds/o2iblnd/o2iblnd.c
lnet/klnds/socklnd/socklnd.c
lnet/lnet/config.c

index 73ae810..40a9df0 100644 (file)
@@ -844,6 +844,8 @@ void lnet_swap_pinginfo(struct lnet_ping_buffer *pbuf);
 int lnet_ping_info_validate(struct lnet_ping_info *pinfo);
 struct lnet_ping_buffer *lnet_ping_buffer_alloc(int nnis, gfp_t gfp);
 void lnet_ping_buffer_free(struct lnet_ping_buffer *pbuf);
+int lnet_get_link_status(struct net_device *dev);
+__u32 lnet_set_link_fatal_state(struct lnet_ni *ni, unsigned int link_state);
 
 static inline void lnet_ping_buffer_addref(struct lnet_ping_buffer *pbuf)
 {
index 9dfb0ad..d2dc55f 100644 (file)
@@ -2555,15 +2555,19 @@ kiblnd_set_ni_fatal_on(struct kib_hca_dev *hdev, int val)
        struct kib_net  *net;
        __u32 ni_state_before;
        bool update_ping_buf = false;
+       struct lnet_ni *ni = NULL;
 
        /* for health check */
        list_for_each_entry(net, &hdev->ibh_dev->ibd_nets, ibn_list) {
+               ni = net->ibn_ni;
                if (val)
                        CDEBUG(D_NETERROR, "Fatal device error for NI %s\n",
-                                       libcfs_nid2str(net->ibn_ni->ni_nid));
-               ni_state_before = atomic_xchg(&net->ibn_ni->ni_fatal_error_on,
-                                             val);
-               if (!update_ping_buf && (val != ni_state_before))
+                                       libcfs_nid2str(ni->ni_nid));
+               ni_state_before = lnet_set_link_fatal_state(ni, val);
+
+               if (!update_ping_buf &&
+                   (ni->ni_state == LNET_NI_STATE_ACTIVE) &&
+                   (val != ni_state_before))
                        update_ping_buf = true;
        }
 
@@ -2749,21 +2753,6 @@ kiblnd_dummy_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
         return 0;
 }
 
-static int kiblnd_get_link_status(struct net_device *dev)
-{
-       int ret = -1;
-
-       LASSERT(dev);
-
-       if (!netif_running(dev))
-               ret = 0;
-       /* Some devices may not be providing link settings */
-       else if (dev->ethtool_ops->get_link)
-               ret = dev->ethtool_ops->get_link(dev);
-
-       return ret;
-}
-
 static int
 kiblnd_dev_need_failover(struct kib_dev *dev, struct net *ns)
 {
@@ -2971,7 +2960,7 @@ kiblnd_dev_failover(struct kib_dev *dev, struct net *ns)
 #ifdef HAVE_DEV_GET_BY_NAME_RCU
                        rcu_read_lock();
                        netdev = dev_get_by_name_rcu(ns, dev->ibd_ifname);
-                       if (netdev && (kiblnd_get_link_status(netdev) == 1))
+                       if (netdev && (lnet_get_link_status(netdev) == 1))
                                kiblnd_set_ni_fatal_on(dev->ibd_hdev, 0);
                        rcu_read_unlock();
 #else
@@ -3049,6 +3038,7 @@ kiblnd_handle_link_state_change(struct net_device *dev,
        bool found_ip = false;
        __u32 ni_state_before;
        bool update_ping_buf = false;
+       int state;
        DECLARE_CONST_IN_IFADDR(ifa);
 
        event_kibdev = kiblnd_dev_search(dev->name);
@@ -3064,10 +3054,7 @@ kiblnd_handle_link_state_change(struct net_device *dev,
                if (!in_dev) {
                        CDEBUG(D_NET, "Interface %s has no IPv4 status.\n",
                               dev->name);
-                       CDEBUG(D_NET, "%s: set link fatal state to 1\n",
-                              libcfs_nid2str(net->ibn_ni->ni_nid));
-                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
-                                                     1);
+                       ni_state_before = lnet_set_link_fatal_state(ni, 1);
                        goto ni_done;
                }
                in_dev_for_each_ifa_rtnl(ifa, in_dev) {
@@ -3079,27 +3066,20 @@ kiblnd_handle_link_state_change(struct net_device *dev,
                if (!found_ip) {
                        CDEBUG(D_NET, "Interface %s has no matching ip\n",
                               dev->name);
-                       CDEBUG(D_NET, "%s: set link fatal state to 1\n",
-                              libcfs_nid2str(net->ibn_ni->ni_nid));
-                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
-                                                     1);
+                       ni_state_before = lnet_set_link_fatal_state(ni, 1);
                        goto ni_done;
                }
 
                if (link_down) {
-                       CDEBUG(D_NET, "%s: set link fatal state to 1\n",
-                              libcfs_nid2str(net->ibn_ni->ni_nid));
-                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
-                                                     link_down);
+                       ni_state_before = lnet_set_link_fatal_state(ni, 1);
                } else {
-                       CDEBUG(D_NET, "%s: set link fatal state to %u\n",
-                              libcfs_nid2str(net->ibn_ni->ni_nid),
-                              (kiblnd_get_link_status(dev) == 0));
-                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
-                                                     (kiblnd_get_link_status(dev) == 0));
+                       state = (lnet_get_link_status(dev) == 0);
+                       ni_state_before = lnet_set_link_fatal_state(ni,
+                                                                   state);
                }
 ni_done:
                if (!update_ping_buf &&
+                   (ni->ni_state == LNET_NI_STATE_ACTIVE) &&
                    (atomic_read(&ni->ni_fatal_error_on) != ni_state_before))
                        update_ping_buf = true;
        }
@@ -3119,6 +3099,8 @@ kiblnd_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event)
        struct net_device *event_netdev = ifa->ifa_dev->dev;
        __u32 ni_state_before;
        bool update_ping_buf = false;
+       struct lnet_ni *ni = NULL;
+       bool link_down;
 
        event_kibdev = kiblnd_dev_search(event_netdev->name);
 
@@ -3130,12 +3112,11 @@ kiblnd_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event)
 
        list_for_each_entry_safe(net, cnxt, &event_kibdev->ibd_nets,
                                 ibn_list) {
-               CDEBUG(D_NET, "%s: set link fatal state to %u\n",
-                      libcfs_nid2str(net->ibn_ni->ni_nid),
-                      (event == NETDEV_DOWN));
-               ni_state_before = atomic_xchg(&net->ibn_ni->ni_fatal_error_on,
-                                             (event == NETDEV_DOWN));
+               ni = net->ibn_ni;
+               link_down = (event == NETDEV_DOWN);
+               ni_state_before = lnet_set_link_fatal_state(ni, link_down);
                if (!update_ping_buf &&
+                   (ni->ni_state == LNET_NI_STATE_ACTIVE) &&
                    ((event == NETDEV_DOWN) != ni_state_before))
                        update_ping_buf = true;
        }
@@ -3517,6 +3498,7 @@ kiblnd_startup(struct lnet_ni *ni)
        int rc;
        int i;
        bool newdev;
+       struct net_device *netdev;
 
        LASSERT(ni->ni_net->net_lnd == &the_o2iblnd);
 
@@ -3621,6 +3603,16 @@ kiblnd_startup(struct lnet_ni *ni)
        /* for health check */
        if (ibdev->ibd_hdev->ibh_state == IBLND_DEV_PORT_DOWN)
                kiblnd_set_ni_fatal_on(ibdev->ibd_hdev, 1);
+
+       rcu_read_lock();
+       netdev = dev_get_by_name_rcu(ni->ni_net_ns, net->ibn_dev->ibd_ifname);
+       if (((netdev->reg_state == NETREG_UNREGISTERING) ||
+            (netdev->operstate != IF_OPER_UP)) ||
+           (lnet_get_link_status(netdev) == 0)) {
+               kiblnd_set_ni_fatal_on(ibdev->ibd_hdev, 1);
+       }
+       rcu_read_unlock();
+
        write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
 
        net->ibn_init = IBLND_INIT_ALL;
index 76dbb72..b0bf7b1 100644 (file)
 static const struct lnet_lnd the_ksocklnd;
 struct ksock_nal_data ksocknal_data;
 
-static int ksocknal_ip2index(__u32 ipaddress, struct lnet_ni *ni)
+static int ksocknal_ip2index(__u32 ipaddress, struct lnet_ni *ni,
+                            int *dev_status)
 {
        struct net_device *dev;
        int ret = -1;
        DECLARE_CONST_IN_IFADDR(ifa);
 
+       *dev_status = -1;
+
        rcu_read_lock();
        for_each_netdev(ni->ni_net_ns, dev) {
                int flags = dev_get_flags(dev);
@@ -73,11 +76,21 @@ static int ksocknal_ip2index(__u32 ipaddress, struct lnet_ni *ni)
                if (ret >= 0)
                        break;
        }
+
        rcu_read_unlock();
+       if (ret >= 0)
+               *dev_status = 1;
+
+       if ((ret == -1) ||
+           ((dev->reg_state == NETREG_UNREGISTERING) ||
+            (dev->operstate != IF_OPER_UP)) ||
+           (lnet_get_link_status(dev) == 0))
+               *dev_status = 0;
 
        return ret;
 }
 
+
 static struct ksock_conn_cb *
 ksocknal_create_conn_cb(__u32 ipaddr, int port)
 {
@@ -1797,25 +1810,6 @@ ksocknal_free_buffers (void)
        }
 }
 
-static int ksocknal_get_link_status(struct net_device *dev)
-{
-       int ret = -1;
-
-       LASSERT(dev);
-
-       if (!netif_running(dev)) {
-               ret = 0;
-               CDEBUG(D_NET, "device not running\n");
-       }
-       /* Some devices may not be providing link settings */
-       else if (dev->ethtool_ops->get_link) {
-               ret = dev->ethtool_ops->get_link(dev);
-               CDEBUG(D_NET, "get_link returns %u\n", ret);
-       }
-
-       return ret;
-}
-
 static int
 ksocknal_handle_link_state_change(struct net_device *dev,
                                  unsigned char operstate)
@@ -1830,6 +1824,7 @@ ksocknal_handle_link_state_change(struct net_device *dev,
        struct ksock_interface *ksi = NULL;
        __u32 ni_state_before;
        bool update_ping_buf = false;
+       int state;
        DECLARE_CONST_IN_IFADDR(ifa);
 
        ifindex = dev->ifindex;
@@ -1859,7 +1854,7 @@ ksocknal_handle_link_state_change(struct net_device *dev,
                        continue;
 
                if (dev->reg_state == NETREG_UNREGISTERING) {
-                       /* Device is being unregitering, we need to clear the
+                       /* Device is being unregistered, we need to clear the
                         * index, it can change when device will be back */
                        ksi->ksni_index = -1;
                        goto out;
@@ -1871,9 +1866,7 @@ ksocknal_handle_link_state_change(struct net_device *dev,
                if (!in_dev) {
                        CDEBUG(D_NET, "Interface %s has no IPv4 status.\n",
                               dev->name);
-                       CDEBUG(D_NET, "set link fatal state to 1\n");
-                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
-                                                     1);
+                       ni_state_before = lnet_set_link_fatal_state(ni, 1);
                        goto ni_done;
                }
                in_dev_for_each_ifa_rtnl(ifa, in_dev) {
@@ -1885,24 +1878,20 @@ ksocknal_handle_link_state_change(struct net_device *dev,
                if (!found_ip) {
                        CDEBUG(D_NET, "Interface %s has no matching ip\n",
                               dev->name);
-                       CDEBUG(D_NET, "set link fatal state to 1\n");
-                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
-                                                     1);
+                       ni_state_before = lnet_set_link_fatal_state(ni, 1);
                        goto ni_done;
                }
 
                if (link_down) {
-                       CDEBUG(D_NET, "set link fatal state to 1\n");
-                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
-                                                     1);
+                       ni_state_before = lnet_set_link_fatal_state(ni, 1);
                } else {
-                       CDEBUG(D_NET, "set link fatal state to %u\n",
-                              (ksocknal_get_link_status(dev) == 0));
-                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
-                                                     (ksocknal_get_link_status(dev) == 0));
+                       state = (lnet_get_link_status(dev) == 0);
+                       ni_state_before = lnet_set_link_fatal_state(ni,
+                                                                   state);
                }
 ni_done:
                if (!update_ping_buf &&
+                   (ni->ni_state == LNET_NI_STATE_ACTIVE) &&
                    (atomic_read(&ni->ni_fatal_error_on) != ni_state_before))
                        update_ping_buf = true;
        }
@@ -1917,7 +1906,7 @@ out:
 static int
 ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event)
 {
-       struct lnet_ni *ni;
+       struct lnet_ni *ni = NULL;
        struct ksock_net *net;
        struct ksock_net *cnxt;
        struct net_device *event_netdev = ifa->ifa_dev->dev;
@@ -1925,6 +1914,7 @@ ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event)
        struct ksock_interface *ksi = NULL;
        __u32 ni_state_before;
        bool update_ping_buf = false;
+       bool link_down;
 
        if (!ksocknal_data.ksnd_nnets)
                goto out;
@@ -1944,9 +1934,12 @@ ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event)
                        CDEBUG(D_NET, "set link fatal state to %u\n",
                               (event == NETDEV_DOWN));
                        ni = net->ksnn_ni;
-                       ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
-                                                     (event == NETDEV_DOWN));
+                       link_down = (event == NETDEV_DOWN);
+                       ni_state_before = lnet_set_link_fatal_state(ni,
+                                                                   link_down);
+
                        if (!update_ping_buf &&
+                           (ni->ni_state == LNET_NI_STATE_ACTIVE) &&
                            ((event == NETDEV_DOWN) != ni_state_before))
                                update_ping_buf = true;
                }
@@ -2405,6 +2398,7 @@ ksocknal_startup(struct lnet_ni *ni)
        struct lnet_inetdev *ifaces = NULL;
        int i = 0;
        int rc;
+       int dev_status;
 
         LASSERT (ni->ni_net->net_lnd == &the_ksocklnd);
         if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) {
@@ -2443,7 +2437,9 @@ ksocknal_startup(struct lnet_ni *ni)
        ni->ni_dev_cpt = ifaces[i].li_cpt;
 
        ksi->ksni_ipaddr = ifaces[i].li_ipaddr;
-       ksi->ksni_index = ksocknal_ip2index(ksi->ksni_ipaddr, ni);
+       ksi->ksni_index = ksocknal_ip2index(ksi->ksni_ipaddr, ni, &dev_status);
+       if (ksi->ksni_index < 0 || dev_status <= 0)
+               lnet_set_link_fatal_state(ni, 1);
        ksi->ksni_netmask = ifaces[i].li_netmask;
        strlcpy(ksi->ksni_name, ifaces[i].li_name, sizeof(ksi->ksni_name));
 
@@ -2454,6 +2450,7 @@ ksocknal_startup(struct lnet_ni *ni)
 
        LASSERT(ksi);
        ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ksi->ksni_ipaddr);
+
        list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets);
        net->ksnn_ni = ni;
        ksocknal_data.ksnd_nnets++;
index 2295977..7791bc2 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/ctype.h>
 #include <linux/inetdevice.h>
 #include <linux/nsproxy.h>
+#include <linux/ethtool.h>
 #include <net/net_namespace.h>
 #include <lnet/lib-lnet.h>
 
@@ -1510,6 +1511,37 @@ lnet_match_networks(const char **networksp, const char *ip2nets,
        return count;
 }
 
+__u32 lnet_set_link_fatal_state(struct lnet_ni *ni, unsigned int link_state)
+{
+       CDEBUG(D_NET, "%s: set link fatal state to %u\n",
+              libcfs_nid2str(ni->ni_nid), link_state);
+       return atomic_xchg(&ni->ni_fatal_error_on, link_state);
+}
+EXPORT_SYMBOL(lnet_set_link_fatal_state);
+
+int lnet_get_link_status(struct net_device *dev)
+{
+       int ret = -1;
+
+       if (!dev)
+               return -1;
+
+       if (!netif_running(dev)) {
+               ret = 0;
+               CDEBUG(D_NET, "device idx %d not running\n", dev->ifindex);
+       }
+       /* Some devices may not be providing link settings */
+       else if (dev->ethtool_ops->get_link) {
+               ret = dev->ethtool_ops->get_link(dev);
+               CDEBUG(D_NET, "device idx %d get_link %u\n",
+                      ret,
+                      dev->ifindex);
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL(lnet_get_link_status);
+
 int lnet_inet_enumerate(struct lnet_inetdev **dev_list, struct net *ns)
 {
        struct lnet_inetdev *ifaces = NULL;