From 11bbe287e85838b105d88e36129164c99360daca Mon Sep 17 00:00:00 2001 From: Serguei Smirnov Date: Thu, 18 May 2023 19:12:19 -0700 Subject: [PATCH] LU-16836 lnet: ensure dev notification on lnd startup Look up device and link state on lnd startup so that the initial NI state may be set properly. Reduce code duplication by adding lnet_set_link_fatal_state() and lnet_get_link_status() functions which are shared across LNDs. LND-specific versions of these are removed. This fixes the issue with adding LNet NI using an interface with cable unplugged which results in the NI state initialized as "up". Lustre-change: https://review.whamcloud.com/51057 Lustre-commit: 09c6e2b872287c847d15620788f6cf50b3a9f30b Fixes: c4df48116d ("LU-16563 lnet: use discovered ni status") Signed-off-by: Serguei Smirnov Change-Id: I16084092cc21a4e42dfef4624adfbf57eb4fdecb Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/51310 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Cyril Bordage Reviewed-by: Frank Sehr Reviewed-by: Andreas Dilger --- lnet/include/lnet/lib-lnet.h | 2 ++ lnet/klnds/o2iblnd/o2iblnd.c | 76 ++++++++++++++++++++------------------------ lnet/klnds/socklnd/socklnd.c | 73 ++++++++++++++++++++---------------------- lnet/lnet/config.c | 32 +++++++++++++++++++ 4 files changed, 103 insertions(+), 80 deletions(-) diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 73ae810..40a9df0 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -844,6 +844,8 @@ void lnet_swap_pinginfo(struct lnet_ping_buffer *pbuf); int lnet_ping_info_validate(struct lnet_ping_info *pinfo); struct lnet_ping_buffer *lnet_ping_buffer_alloc(int nnis, gfp_t gfp); void lnet_ping_buffer_free(struct lnet_ping_buffer *pbuf); +int lnet_get_link_status(struct net_device *dev); +__u32 lnet_set_link_fatal_state(struct lnet_ni *ni, unsigned int link_state); static inline void lnet_ping_buffer_addref(struct lnet_ping_buffer *pbuf) { diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index 9dfb0ad..d2dc55f 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -2555,15 +2555,19 @@ kiblnd_set_ni_fatal_on(struct kib_hca_dev *hdev, int val) struct kib_net *net; __u32 ni_state_before; bool update_ping_buf = false; + struct lnet_ni *ni = NULL; /* for health check */ list_for_each_entry(net, &hdev->ibh_dev->ibd_nets, ibn_list) { + ni = net->ibn_ni; if (val) CDEBUG(D_NETERROR, "Fatal device error for NI %s\n", - libcfs_nid2str(net->ibn_ni->ni_nid)); - ni_state_before = atomic_xchg(&net->ibn_ni->ni_fatal_error_on, - val); - if (!update_ping_buf && (val != ni_state_before)) + libcfs_nid2str(ni->ni_nid)); + ni_state_before = lnet_set_link_fatal_state(ni, val); + + if (!update_ping_buf && + (ni->ni_state == LNET_NI_STATE_ACTIVE) && + (val != ni_state_before)) update_ping_buf = true; } @@ -2749,21 +2753,6 @@ kiblnd_dummy_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) return 0; } -static int kiblnd_get_link_status(struct net_device *dev) -{ - int ret = -1; - - LASSERT(dev); - - if (!netif_running(dev)) - ret = 0; - /* Some devices may not be providing link settings */ - else if (dev->ethtool_ops->get_link) - ret = dev->ethtool_ops->get_link(dev); - - return ret; -} - static int kiblnd_dev_need_failover(struct kib_dev *dev, struct net *ns) { @@ -2971,7 +2960,7 @@ kiblnd_dev_failover(struct kib_dev *dev, struct net *ns) #ifdef HAVE_DEV_GET_BY_NAME_RCU rcu_read_lock(); netdev = dev_get_by_name_rcu(ns, dev->ibd_ifname); - if (netdev && (kiblnd_get_link_status(netdev) == 1)) + if (netdev && (lnet_get_link_status(netdev) == 1)) kiblnd_set_ni_fatal_on(dev->ibd_hdev, 0); rcu_read_unlock(); #else @@ -3049,6 +3038,7 @@ kiblnd_handle_link_state_change(struct net_device *dev, bool found_ip = false; __u32 ni_state_before; bool update_ping_buf = false; + int state; DECLARE_CONST_IN_IFADDR(ifa); event_kibdev = kiblnd_dev_search(dev->name); @@ -3064,10 +3054,7 @@ kiblnd_handle_link_state_change(struct net_device *dev, if (!in_dev) { CDEBUG(D_NET, "Interface %s has no IPv4 status.\n", dev->name); - CDEBUG(D_NET, "%s: set link fatal state to 1\n", - libcfs_nid2str(net->ibn_ni->ni_nid)); - ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, - 1); + ni_state_before = lnet_set_link_fatal_state(ni, 1); goto ni_done; } in_dev_for_each_ifa_rtnl(ifa, in_dev) { @@ -3079,27 +3066,20 @@ kiblnd_handle_link_state_change(struct net_device *dev, if (!found_ip) { CDEBUG(D_NET, "Interface %s has no matching ip\n", dev->name); - CDEBUG(D_NET, "%s: set link fatal state to 1\n", - libcfs_nid2str(net->ibn_ni->ni_nid)); - ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, - 1); + ni_state_before = lnet_set_link_fatal_state(ni, 1); goto ni_done; } if (link_down) { - CDEBUG(D_NET, "%s: set link fatal state to 1\n", - libcfs_nid2str(net->ibn_ni->ni_nid)); - ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, - link_down); + ni_state_before = lnet_set_link_fatal_state(ni, 1); } else { - CDEBUG(D_NET, "%s: set link fatal state to %u\n", - libcfs_nid2str(net->ibn_ni->ni_nid), - (kiblnd_get_link_status(dev) == 0)); - ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, - (kiblnd_get_link_status(dev) == 0)); + state = (lnet_get_link_status(dev) == 0); + ni_state_before = lnet_set_link_fatal_state(ni, + state); } ni_done: if (!update_ping_buf && + (ni->ni_state == LNET_NI_STATE_ACTIVE) && (atomic_read(&ni->ni_fatal_error_on) != ni_state_before)) update_ping_buf = true; } @@ -3119,6 +3099,8 @@ kiblnd_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event) struct net_device *event_netdev = ifa->ifa_dev->dev; __u32 ni_state_before; bool update_ping_buf = false; + struct lnet_ni *ni = NULL; + bool link_down; event_kibdev = kiblnd_dev_search(event_netdev->name); @@ -3130,12 +3112,11 @@ kiblnd_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event) list_for_each_entry_safe(net, cnxt, &event_kibdev->ibd_nets, ibn_list) { - CDEBUG(D_NET, "%s: set link fatal state to %u\n", - libcfs_nid2str(net->ibn_ni->ni_nid), - (event == NETDEV_DOWN)); - ni_state_before = atomic_xchg(&net->ibn_ni->ni_fatal_error_on, - (event == NETDEV_DOWN)); + ni = net->ibn_ni; + link_down = (event == NETDEV_DOWN); + ni_state_before = lnet_set_link_fatal_state(ni, link_down); if (!update_ping_buf && + (ni->ni_state == LNET_NI_STATE_ACTIVE) && ((event == NETDEV_DOWN) != ni_state_before)) update_ping_buf = true; } @@ -3517,6 +3498,7 @@ kiblnd_startup(struct lnet_ni *ni) int rc; int i; bool newdev; + struct net_device *netdev; LASSERT(ni->ni_net->net_lnd == &the_o2iblnd); @@ -3621,6 +3603,16 @@ kiblnd_startup(struct lnet_ni *ni) /* for health check */ if (ibdev->ibd_hdev->ibh_state == IBLND_DEV_PORT_DOWN) kiblnd_set_ni_fatal_on(ibdev->ibd_hdev, 1); + + rcu_read_lock(); + netdev = dev_get_by_name_rcu(ni->ni_net_ns, net->ibn_dev->ibd_ifname); + if (((netdev->reg_state == NETREG_UNREGISTERING) || + (netdev->operstate != IF_OPER_UP)) || + (lnet_get_link_status(netdev) == 0)) { + kiblnd_set_ni_fatal_on(ibdev->ibd_hdev, 1); + } + rcu_read_unlock(); + write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); net->ibn_init = IBLND_INIT_ALL; diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 76dbb72..b0bf7b1 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -44,12 +44,15 @@ static const struct lnet_lnd the_ksocklnd; struct ksock_nal_data ksocknal_data; -static int ksocknal_ip2index(__u32 ipaddress, struct lnet_ni *ni) +static int ksocknal_ip2index(__u32 ipaddress, struct lnet_ni *ni, + int *dev_status) { struct net_device *dev; int ret = -1; DECLARE_CONST_IN_IFADDR(ifa); + *dev_status = -1; + rcu_read_lock(); for_each_netdev(ni->ni_net_ns, dev) { int flags = dev_get_flags(dev); @@ -73,11 +76,21 @@ static int ksocknal_ip2index(__u32 ipaddress, struct lnet_ni *ni) if (ret >= 0) break; } + rcu_read_unlock(); + if (ret >= 0) + *dev_status = 1; + + if ((ret == -1) || + ((dev->reg_state == NETREG_UNREGISTERING) || + (dev->operstate != IF_OPER_UP)) || + (lnet_get_link_status(dev) == 0)) + *dev_status = 0; return ret; } + static struct ksock_conn_cb * ksocknal_create_conn_cb(__u32 ipaddr, int port) { @@ -1797,25 +1810,6 @@ ksocknal_free_buffers (void) } } -static int ksocknal_get_link_status(struct net_device *dev) -{ - int ret = -1; - - LASSERT(dev); - - if (!netif_running(dev)) { - ret = 0; - CDEBUG(D_NET, "device not running\n"); - } - /* Some devices may not be providing link settings */ - else if (dev->ethtool_ops->get_link) { - ret = dev->ethtool_ops->get_link(dev); - CDEBUG(D_NET, "get_link returns %u\n", ret); - } - - return ret; -} - static int ksocknal_handle_link_state_change(struct net_device *dev, unsigned char operstate) @@ -1830,6 +1824,7 @@ ksocknal_handle_link_state_change(struct net_device *dev, struct ksock_interface *ksi = NULL; __u32 ni_state_before; bool update_ping_buf = false; + int state; DECLARE_CONST_IN_IFADDR(ifa); ifindex = dev->ifindex; @@ -1859,7 +1854,7 @@ ksocknal_handle_link_state_change(struct net_device *dev, continue; if (dev->reg_state == NETREG_UNREGISTERING) { - /* Device is being unregitering, we need to clear the + /* Device is being unregistered, we need to clear the * index, it can change when device will be back */ ksi->ksni_index = -1; goto out; @@ -1871,9 +1866,7 @@ ksocknal_handle_link_state_change(struct net_device *dev, if (!in_dev) { CDEBUG(D_NET, "Interface %s has no IPv4 status.\n", dev->name); - CDEBUG(D_NET, "set link fatal state to 1\n"); - ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, - 1); + ni_state_before = lnet_set_link_fatal_state(ni, 1); goto ni_done; } in_dev_for_each_ifa_rtnl(ifa, in_dev) { @@ -1885,24 +1878,20 @@ ksocknal_handle_link_state_change(struct net_device *dev, if (!found_ip) { CDEBUG(D_NET, "Interface %s has no matching ip\n", dev->name); - CDEBUG(D_NET, "set link fatal state to 1\n"); - ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, - 1); + ni_state_before = lnet_set_link_fatal_state(ni, 1); goto ni_done; } if (link_down) { - CDEBUG(D_NET, "set link fatal state to 1\n"); - ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, - 1); + ni_state_before = lnet_set_link_fatal_state(ni, 1); } else { - CDEBUG(D_NET, "set link fatal state to %u\n", - (ksocknal_get_link_status(dev) == 0)); - ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, - (ksocknal_get_link_status(dev) == 0)); + state = (lnet_get_link_status(dev) == 0); + ni_state_before = lnet_set_link_fatal_state(ni, + state); } ni_done: if (!update_ping_buf && + (ni->ni_state == LNET_NI_STATE_ACTIVE) && (atomic_read(&ni->ni_fatal_error_on) != ni_state_before)) update_ping_buf = true; } @@ -1917,7 +1906,7 @@ out: static int ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event) { - struct lnet_ni *ni; + struct lnet_ni *ni = NULL; struct ksock_net *net; struct ksock_net *cnxt; struct net_device *event_netdev = ifa->ifa_dev->dev; @@ -1925,6 +1914,7 @@ ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event) struct ksock_interface *ksi = NULL; __u32 ni_state_before; bool update_ping_buf = false; + bool link_down; if (!ksocknal_data.ksnd_nnets) goto out; @@ -1944,9 +1934,12 @@ ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event) CDEBUG(D_NET, "set link fatal state to %u\n", (event == NETDEV_DOWN)); ni = net->ksnn_ni; - ni_state_before = atomic_xchg(&ni->ni_fatal_error_on, - (event == NETDEV_DOWN)); + link_down = (event == NETDEV_DOWN); + ni_state_before = lnet_set_link_fatal_state(ni, + link_down); + if (!update_ping_buf && + (ni->ni_state == LNET_NI_STATE_ACTIVE) && ((event == NETDEV_DOWN) != ni_state_before)) update_ping_buf = true; } @@ -2405,6 +2398,7 @@ ksocknal_startup(struct lnet_ni *ni) struct lnet_inetdev *ifaces = NULL; int i = 0; int rc; + int dev_status; LASSERT (ni->ni_net->net_lnd == &the_ksocklnd); if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) { @@ -2443,7 +2437,9 @@ ksocknal_startup(struct lnet_ni *ni) ni->ni_dev_cpt = ifaces[i].li_cpt; ksi->ksni_ipaddr = ifaces[i].li_ipaddr; - ksi->ksni_index = ksocknal_ip2index(ksi->ksni_ipaddr, ni); + ksi->ksni_index = ksocknal_ip2index(ksi->ksni_ipaddr, ni, &dev_status); + if (ksi->ksni_index < 0 || dev_status <= 0) + lnet_set_link_fatal_state(ni, 1); ksi->ksni_netmask = ifaces[i].li_netmask; strlcpy(ksi->ksni_name, ifaces[i].li_name, sizeof(ksi->ksni_name)); @@ -2454,6 +2450,7 @@ ksocknal_startup(struct lnet_ni *ni) LASSERT(ksi); ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ksi->ksni_ipaddr); + list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets); net->ksnn_ni = ni; ksocknal_data.ksnd_nnets++; diff --git a/lnet/lnet/config.c b/lnet/lnet/config.c index 2295977..7791bc2 100644 --- a/lnet/lnet/config.c +++ b/lnet/lnet/config.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -1510,6 +1511,37 @@ lnet_match_networks(const char **networksp, const char *ip2nets, return count; } +__u32 lnet_set_link_fatal_state(struct lnet_ni *ni, unsigned int link_state) +{ + CDEBUG(D_NET, "%s: set link fatal state to %u\n", + libcfs_nid2str(ni->ni_nid), link_state); + return atomic_xchg(&ni->ni_fatal_error_on, link_state); +} +EXPORT_SYMBOL(lnet_set_link_fatal_state); + +int lnet_get_link_status(struct net_device *dev) +{ + int ret = -1; + + if (!dev) + return -1; + + if (!netif_running(dev)) { + ret = 0; + CDEBUG(D_NET, "device idx %d not running\n", dev->ifindex); + } + /* Some devices may not be providing link settings */ + else if (dev->ethtool_ops->get_link) { + ret = dev->ethtool_ops->get_link(dev); + CDEBUG(D_NET, "device idx %d get_link %u\n", + ret, + dev->ifindex); + } + + return ret; +} +EXPORT_SYMBOL(lnet_get_link_status); + int lnet_inet_enumerate(struct lnet_inetdev **dev_list, struct net *ns) { struct lnet_inetdev *ifaces = NULL; -- 1.8.3.1