From: Serguei Smirnov Date: Fri, 19 May 2023 02:12:19 +0000 (-0700) Subject: LU-16836 lnet: ensure dev notification on lnd startup X-Git-Tag: 2.15.57~110 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=09c6e2b872287c847d15620788f6cf50b3a9f30b;p=fs%2Flustre-release.git LU-16836 lnet: ensure dev notification on lnd startup Look up device and link state on lnd startup so that the initial NI state may be set properly. Reduce code duplication by adding lnet_set_link_fatal_state() and lnet_get_link_status() functions which are shared across LNDs. LND-specific versions of these are removed. This fixes the issue with adding LNet NI using an interface with cable unplugged which results in the NI state initialized as "up". Fixes: da230373bd ("LU-16563 lnet: use discovered ni status") Signed-off-by: Serguei Smirnov Change-Id: I16084092cc21a4e42dfef4624adfbf57eb4fdecb Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51057 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Frank Sehr Reviewed-by: Cyril Bordage Reviewed-by: Oleg Drokin --- diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 0d0ddd9..e4224b0 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -960,6 +960,8 @@ void lnet_swap_pinginfo(struct lnet_ping_buffer *pbuf); int lnet_ping_info_validate(struct lnet_ping_info *pinfo); struct lnet_ping_buffer *lnet_ping_buffer_alloc(int bytes, gfp_t gfp); void lnet_ping_buffer_free(struct lnet_ping_buffer *pbuf); +int lnet_get_link_status(struct net_device *dev); +__u32 lnet_set_link_fatal_state(struct lnet_ni *ni, unsigned int link_state); static inline void lnet_ping_buffer_addref(struct lnet_ping_buffer *pbuf) { diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index 07b2d68..7c1c8a3 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -2637,15 +2637,19 @@ kiblnd_set_ni_fatal_on(struct kib_hca_dev *hdev, int val) struct kib_net *net; __u32 ni_state_before; bool update_ping_buf = false; + struct lnet_ni *ni = NULL; /* for health check */ list_for_each_entry(net, &hdev->ibh_dev->ibd_nets, ibn_list) { + ni = net->ibn_ni; if (val) CDEBUG(D_NETERROR, "Fatal device error for NI %s\n", - libcfs_nidstr(&net->ibn_ni->ni_nid)); - ni_state_before = atomic_xchg(&net->ibn_ni->ni_fatal_error_on, - val); - if (!update_ping_buf && (val != ni_state_before)) + libcfs_nidstr(&ni->ni_nid)); + ni_state_before = lnet_set_link_fatal_state(ni, val); + + if (!update_ping_buf && + (ni->ni_state == LNET_NI_STATE_ACTIVE) && + (val != ni_state_before)) update_ping_buf = true; } @@ -2831,21 +2835,6 @@ kiblnd_dummy_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) return 0; } -static int kiblnd_get_link_status(struct net_device *dev) -{ - int ret = -1; - - LASSERT(dev); - - if (!netif_running(dev)) - ret = 0; - /* Some devices may not be providing link settings */ - else if (dev->ethtool_ops->get_link) - ret = dev->ethtool_ops->get_link(dev); - - return ret; -} - static int kiblnd_dev_need_failover(struct kib_dev *dev, struct net *ns) { @@ -3053,7 +3042,7 @@ kiblnd_dev_failover(struct kib_dev *dev, struct net *ns) if (set_fatal) { rcu_read_lock(); netdev = dev_get_by_name_rcu(ns, dev->ibd_ifname); - if (netdev && (kiblnd_get_link_status(netdev) == 1)) + if (netdev && (lnet_get_link_status(netdev) == 1)) kiblnd_set_ni_fatal_on(dev->ibd_hdev, 0); rcu_read_unlock(); } @@ -3123,6 +3112,7 @@ kiblnd_handle_link_state_change(struct net_device *dev, bool found_ip = false; __u32 ni_state_before; bool update_ping_buf = false; + int state; DECLARE_CONST_IN_IFADDR(ifa); event_kibdev = kiblnd_dev_search(dev->name); @@ -3138,10 +3128,7 @@ kiblnd_handle_link_state_change(struct net_device *dev, if (!in_dev) { CDEBUG(D_NET, "Interface %s has no IPv4 status.\n", dev->name); - CDEBUG(D_NET, "%s: set link fatal state to 1\n", - libcfs_nidstr(&net->ibn_ni->ni_nid)); - ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, - 1); + ni_state_before = lnet_set_link_fatal_state(ni, 1); goto ni_done; } in_dev_for_each_ifa_rtnl(ifa, in_dev) { @@ -3153,27 +3140,20 @@ kiblnd_handle_link_state_change(struct net_device *dev, if (!found_ip) { CDEBUG(D_NET, "Interface %s has no matching ip\n", dev->name); - CDEBUG(D_NET, "%s: set link fatal state to 1\n", - libcfs_nidstr(&net->ibn_ni->ni_nid)); - ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, - 1); + ni_state_before = lnet_set_link_fatal_state(ni, 1); goto ni_done; } if (link_down) { - CDEBUG(D_NET, "%s: set link fatal state to 1\n", - libcfs_nidstr(&net->ibn_ni->ni_nid)); - ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, - link_down); + ni_state_before = lnet_set_link_fatal_state(ni, 1); } else { - CDEBUG(D_NET, "%s: set link fatal state to %u\n", - libcfs_nidstr(&net->ibn_ni->ni_nid), - (kiblnd_get_link_status(dev) == 0)); - ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, - (kiblnd_get_link_status(dev) == 0)); + state = (lnet_get_link_status(dev) == 0); + ni_state_before = lnet_set_link_fatal_state(ni, + state); } ni_done: if (!update_ping_buf && + (ni->ni_state == LNET_NI_STATE_ACTIVE) && (atomic_read(&ni->ni_fatal_error_on) != ni_state_before)) update_ping_buf = true; } @@ -3193,6 +3173,8 @@ kiblnd_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event) struct net_device *event_netdev = ifa->ifa_dev->dev; __u32 ni_state_before; bool update_ping_buf = false; + struct lnet_ni *ni = NULL; + bool link_down; event_kibdev = kiblnd_dev_search(event_netdev->name); @@ -3204,12 +3186,11 @@ kiblnd_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event) list_for_each_entry_safe(net, cnxt, &event_kibdev->ibd_nets, ibn_list) { - CDEBUG(D_NET, "%s: set link fatal state to %u\n", - libcfs_nidstr(&net->ibn_ni->ni_nid), - (event == NETDEV_DOWN)); - ni_state_before = atomic_xchg(&net->ibn_ni->ni_fatal_error_on, - (event == NETDEV_DOWN)); + ni = net->ibn_ni; + link_down = (event == NETDEV_DOWN); + ni_state_before = lnet_set_link_fatal_state(ni, link_down); if (!update_ping_buf && + (ni->ni_state == LNET_NI_STATE_ACTIVE) && ((event == NETDEV_DOWN) != ni_state_before)) update_ping_buf = true; } @@ -3580,6 +3561,7 @@ kiblnd_startup(struct lnet_ni *ni) int rc; int i; bool newdev; + struct net_device *netdev; LASSERT(ni->ni_net->net_lnd == &the_o2iblnd); @@ -3691,6 +3673,16 @@ kiblnd_startup(struct lnet_ni *ni) /* for health check */ if (ibdev->ibd_hdev->ibh_state == IBLND_DEV_PORT_DOWN) kiblnd_set_ni_fatal_on(ibdev->ibd_hdev, 1); + + rcu_read_lock(); + netdev = dev_get_by_name_rcu(ni->ni_net_ns, net->ibn_dev->ibd_ifname); + if (((netdev->reg_state == NETREG_UNREGISTERING) || + (netdev->operstate != IF_OPER_UP)) || + (lnet_get_link_status(netdev) == 0)) { + kiblnd_set_ni_fatal_on(ibdev->ibd_hdev, 1); + } + rcu_read_unlock(); + write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); net->ibn_init = IBLND_INIT_ALL; diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 43b46b7..cb8e214 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -46,6 +46,78 @@ static const struct lnet_lnd the_ksocklnd; struct ksock_nal_data ksocknal_data; +static int ksocknal_ip2index(struct sockaddr *addr, struct lnet_ni *ni, + int *dev_status) +{ + struct net_device *dev; + int ret = -1; + DECLARE_CONST_IN_IFADDR(ifa); + + *dev_status = -1; + + if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) + return ret; + + rcu_read_lock(); + for_each_netdev(ni->ni_net_ns, dev) { + int flags = dev_get_flags(dev); + struct in_device *in_dev; + + if (flags & IFF_LOOPBACK) /* skip the loopback IF */ + continue; + + if (!(flags & IFF_UP)) + continue; + + switch (addr->sa_family) { + case AF_INET: + in_dev = __in_dev_get_rcu(dev); + if (!in_dev) + continue; + + in_dev_for_each_ifa_rcu(ifa, in_dev) { + if (ifa->ifa_local == + ((struct sockaddr_in *)addr)->sin_addr.s_addr) + ret = dev->ifindex; + } + endfor_ifa(in_dev); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: { + struct inet6_dev *in6_dev; + const struct inet6_ifaddr *ifa6; + struct sockaddr_in6 *addr6 = (struct sockaddr_in6*)addr; + + in6_dev = __in6_dev_get(dev); + if (!in6_dev) + continue; + + list_for_each_entry_rcu(ifa6, &in6_dev->addr_list, if_list) { + if (ipv6_addr_cmp(&ifa6->addr, + &addr6->sin6_addr) == 0) + ret = dev->ifindex; + } + break; + } +#endif /* IS_ENABLED(CONFIG_IPV6) */ + } + if (ret >= 0) + break; + } + + rcu_read_unlock(); + if (ret >= 0) + *dev_status = 1; + + if ((ret == -1) || + ((dev->reg_state == NETREG_UNREGISTERING) || + (dev->operstate != IF_OPER_UP)) || + (lnet_get_link_status(dev) == 0)) + *dev_status = 0; + + return ret; +} + static struct ksock_conn_cb * ksocknal_create_conn_cb(struct sockaddr *addr) { @@ -1838,25 +1910,6 @@ ksocknal_free_buffers (void) } } -static int ksocknal_get_link_status(struct net_device *dev) -{ - int ret = -1; - - LASSERT(dev); - - if (!netif_running(dev)) { - ret = 0; - CDEBUG(D_NET, "device not running\n"); - } - /* Some devices may not be providing link settings */ - else if (dev->ethtool_ops->get_link) { - ret = dev->ethtool_ops->get_link(dev); - CDEBUG(D_NET, "get_link returns %u\n", ret); - } - - return ret; -} - static int ksocknal_handle_link_state_change(struct net_device *dev, unsigned char operstate) @@ -1872,6 +1925,7 @@ ksocknal_handle_link_state_change(struct net_device *dev, struct sockaddr_in *sa; __u32 ni_state_before; bool update_ping_buf = false; + int state; DECLARE_CONST_IN_IFADDR(ifa); ifindex = dev->ifindex; @@ -1902,7 +1956,7 @@ ksocknal_handle_link_state_change(struct net_device *dev, continue; if (dev->reg_state == NETREG_UNREGISTERING) { - /* Device is being unregitering, we need to clear the + /* Device is being unregistered, we need to clear the * index, it can change when device will be back */ ksi->ksni_index = -1; goto out; @@ -1914,9 +1968,7 @@ ksocknal_handle_link_state_change(struct net_device *dev, if (!in_dev) { CDEBUG(D_NET, "Interface %s has no IPv4 status.\n", dev->name); - CDEBUG(D_NET, "set link fatal state to 1\n"); - ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, - 1); + ni_state_before = lnet_set_link_fatal_state(ni, 1); goto ni_done; } in_dev_for_each_ifa_rtnl(ifa, in_dev) { @@ -1928,24 +1980,20 @@ ksocknal_handle_link_state_change(struct net_device *dev, if (!found_ip) { CDEBUG(D_NET, "Interface %s has no matching ip\n", dev->name); - CDEBUG(D_NET, "set link fatal state to 1\n"); - ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, - 1); + ni_state_before = lnet_set_link_fatal_state(ni, 1); goto ni_done; } if (link_down) { - CDEBUG(D_NET, "set link fatal state to 1\n"); - ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, - 1); + ni_state_before = lnet_set_link_fatal_state(ni, 1); } else { - CDEBUG(D_NET, "set link fatal state to %u\n", - (ksocknal_get_link_status(dev) == 0)); - ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, - (ksocknal_get_link_status(dev) == 0)); + state = (lnet_get_link_status(dev) == 0); + ni_state_before = lnet_set_link_fatal_state(ni, + state); } ni_done: if (!update_ping_buf && + (ni->ni_state == LNET_NI_STATE_ACTIVE) && (atomic_read(&ni->ni_fatal_error_on) != ni_state_before)) update_ping_buf = true; } @@ -1960,7 +2008,7 @@ out: static int ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event) { - struct lnet_ni *ni; + struct lnet_ni *ni = NULL; struct ksock_net *net; struct ksock_net *cnxt; struct net_device *event_netdev = ifa->ifa_dev->dev; @@ -1969,6 +2017,7 @@ ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event) struct sockaddr_in *sa; __u32 ni_state_before; bool update_ping_buf = false; + bool link_down; if (!ksocknal_data.ksnd_nnets) goto out; @@ -1986,12 +2035,13 @@ ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event) continue; if (sa->sin_addr.s_addr == ifa->ifa_local) { - CDEBUG(D_NET, "set link fatal state to %u\n", - (event == NETDEV_DOWN)); ni = net->ksnn_ni; - ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, - (event == NETDEV_DOWN)); + link_down = (event == NETDEV_DOWN); + ni_state_before = lnet_set_link_fatal_state(ni, + link_down); + if (!update_ping_buf && + (ni->ni_state == LNET_NI_STATE_ACTIVE) && ((event == NETDEV_DOWN) != ni_state_before)) update_ping_buf = true; } @@ -2440,6 +2490,7 @@ ksocknal_startup(struct lnet_ni *ni) struct ksock_interface *ksi = NULL; struct lnet_inetdev *ifaces = NULL; int rc, if_idx; + int dev_status; LASSERT (ni->ni_net->net_lnd == &the_ksocklnd); if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) { @@ -2504,6 +2555,12 @@ ksocknal_startup(struct lnet_ni *ni) if (rc != 0) goto out_net; + if ((ksocknal_ip2index((struct sockaddr *)&ksi->ksni_addr, + ni, + &dev_status) < 0) || + (dev_status <= 0)) + lnet_set_link_fatal_state(ni, 1); + list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets); net->ksnn_ni = ni; ksocknal_data.ksnd_nnets++; diff --git a/lnet/lnet/config.c b/lnet/lnet/config.c index 08609e3..cfa55c4 100644 --- a/lnet/lnet/config.c +++ b/lnet/lnet/config.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -1502,6 +1503,37 @@ lnet_match_networks(const char **networksp, const char *ip2nets, return count; } +__u32 lnet_set_link_fatal_state(struct lnet_ni *ni, unsigned int link_state) +{ + CDEBUG(D_NET, "%s: set link fatal state to %u\n", + libcfs_nidstr(&ni->ni_nid), link_state); + return atomic_xchg(&ni->ni_fatal_error_on, link_state); +} +EXPORT_SYMBOL(lnet_set_link_fatal_state); + +int lnet_get_link_status(struct net_device *dev) +{ + int ret = -1; + + if (!dev) + return -1; + + if (!netif_running(dev)) { + ret = 0; + CDEBUG(D_NET, "device idx %d not running\n", dev->ifindex); + } + /* Some devices may not be providing link settings */ + else if (dev->ethtool_ops->get_link) { + ret = dev->ethtool_ops->get_link(dev); + CDEBUG(D_NET, "device idx %d get_link %u\n", + ret, + dev->ifindex); + } + + return ret; +} +EXPORT_SYMBOL(lnet_get_link_status); + int lnet_inet_enumerate(struct lnet_inetdev **dev_list, struct net *ns, bool v6) { struct lnet_inetdev *ifaces = NULL;