From 03a0bee0bcc54adc962da5936af27312aafabded Mon Sep 17 00:00:00 2001 From: Serguei Smirnov Date: Mon, 23 Aug 2021 12:58:51 -0700 Subject: [PATCH] LU-14954 socklnd: fix link state detection Due to matching only the device index, link detection implemented in LU-14742 has issues with confusing the link events for the virtual interfaces with the link events for the interface that LNet was actually configured to use. Fix this by improving the identification of the event source: use both device name and device index. Also, to make sure the link fatal state is cleared only when the device is bound to the IP address used at NI creation, subscribe to inetaddr events in addition to the netdev events. Lustre-change: https://review.whamcloud.com/44732 Lustre-commit: TBD (from d4dbbf3cfd692ed548c82e2dda9fdcadae052a62) Test-Parameters: trivial Fixes: b842fb6fd5 ("LU-14742: detect link state to set fatal error") Signed-off-by: Serguei Smirnov Change-Id: Ib1996c66a8ae2596970d66e3d920702190851e3f Reviewed-on: https://review.whamcloud.com/44787 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lnet/klnds/socklnd/socklnd.c | 129 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 119 insertions(+), 10 deletions(-) diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index fec7d92..3aa8849 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -1790,11 +1790,15 @@ static int ksocknal_get_link_status(struct net_device *dev) LASSERT(dev); - if (!netif_running(dev)) + if (!netif_running(dev)) { ret = 0; + CDEBUG(D_NET, "device not running\n"); + } /* Some devices may not be providing link settings */ - else if (dev->ethtool_ops->get_link) + else if (dev->ethtool_ops->get_link) { ret = dev->ethtool_ops->get_link(dev); + CDEBUG(D_NET, "get_link returns %u\n", ret); + } return ret; } @@ -1803,11 +1807,15 @@ static int ksocknal_handle_link_state_change(struct net_device *dev, unsigned char operstate) { - struct lnet_ni *ni; + struct lnet_ni *ni = NULL; struct ksock_net *net; struct ksock_net *cnxt; int ifindex; unsigned char link_down = !(operstate == IF_OPER_UP); + struct in_device *in_dev; + bool found_ip = false; + struct ksock_interface *ksi = NULL; + DECLARE_CONST_IN_IFADDR(ifa); ifindex = dev->ifindex; @@ -1816,20 +1824,89 @@ ksocknal_handle_link_state_change(struct net_device *dev, list_for_each_entry_safe(net, cnxt, &ksocknal_data.ksnd_nets, ksnn_list) { - if (net->ksnn_interface.ksni_index != ifindex) + + ksi = &net->ksnn_interface; + found_ip = false; + + if (ksi->ksni_index != ifindex || + strcmp(ksi->ksni_name, dev->name)) continue; + ni = net->ksnn_ni; - if (link_down) + + in_dev = __in_dev_get_rtnl(dev); + if (!in_dev) { + CDEBUG(D_NET, "Interface %s has no IPv4 status.\n", + dev->name); + CDEBUG(D_NET, "set link fatal state to 1\n"); + atomic_set(&ni->ni_fatal_error_on, 1); + continue; + } + in_dev_for_each_ifa_rtnl(ifa, in_dev) { + if (ksi->ksni_ipaddr == ntohl(ifa->ifa_local)) + found_ip = true; + } + endfor_ifa(in_dev); + + if (!found_ip) { + CDEBUG(D_NET, "Interface %s has no matching ip\n", + dev->name); + CDEBUG(D_NET, "set link fatal state to 1\n"); + atomic_set(&ni->ni_fatal_error_on, 1); + continue; + } + + if (link_down) { + CDEBUG(D_NET, "set link fatal state to 1\n"); atomic_set(&ni->ni_fatal_error_on, link_down); - else + } else { + CDEBUG(D_NET, "set link fatal state to %u\n", + (ksocknal_get_link_status(dev) == 0)); atomic_set(&ni->ni_fatal_error_on, (ksocknal_get_link_status(dev) == 0)); + } } out: return 0; } +static int +ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event) +{ + struct lnet_ni *ni; + struct ksock_net *net; + struct ksock_net *cnxt; + struct net_device *event_netdev = ifa->ifa_dev->dev; + int ifindex; + struct ksock_interface *ksi = NULL; + + if (!ksocknal_data.ksnd_nnets) + goto out; + + ifindex = event_netdev->ifindex; + + list_for_each_entry_safe(net, cnxt, &ksocknal_data.ksnd_nets, + ksnn_list) { + + ksi = &net->ksnn_interface; + + if (ksi->ksni_index != ifindex || + strcmp(ksi->ksni_name, event_netdev->name)) + continue; + + if (ksi->ksni_ipaddr == ntohl(ifa->ifa_local)) { + CDEBUG(D_NET, "set link fatal state to %u\n", + (event == NETDEV_DOWN)); + ni = net->ksnn_ni; + atomic_set(&ni->ni_fatal_error_on, + (event == NETDEV_DOWN)); + } + } +out: + return 0; +} + /************************************ * Net device notifier event handler ************************************/ @@ -1841,6 +1918,9 @@ static int ksocknal_device_event(struct notifier_block *unused, operstate = dev->operstate; + CDEBUG(D_NET, "devevent: status=%ld, iface=%s ifindex %d state %u\n", + event, dev->name, dev->ifindex, operstate); + switch (event) { case NETDEV_UP: case NETDEV_DOWN: @@ -1852,10 +1932,36 @@ static int ksocknal_device_event(struct notifier_block *unused, return NOTIFY_OK; } -static struct notifier_block ksocknal_notifier_block = { +/************************************ + * Inetaddr notifier event handler + ************************************/ +static int ksocknal_inetaddr_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct in_ifaddr *ifa = ptr; + + CDEBUG(D_NET, "addrevent: status %ld ip addr %pI4, netmask %pI4.\n", + event, &ifa->ifa_address, &ifa->ifa_mask); + + switch (event) { + case NETDEV_UP: + case NETDEV_DOWN: + case NETDEV_CHANGE: + ksocknal_handle_inetaddr_change(ifa, event); + break; + + } + return NOTIFY_OK; +} + +static struct notifier_block ksocknal_dev_notifier_block = { .notifier_call = ksocknal_device_event, }; +static struct notifier_block ksocknal_inetaddr_notifier_block = { + .notifier_call = ksocknal_inetaddr_event, +}; + static void ksocknal_base_shutdown(void) { @@ -1867,8 +1973,10 @@ ksocknal_base_shutdown(void) libcfs_kmem_read()); LASSERT (ksocknal_data.ksnd_nnets == 0); - if (ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL) - unregister_netdevice_notifier(&ksocknal_notifier_block); + if (ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL) { + unregister_netdevice_notifier(&ksocknal_dev_notifier_block); + unregister_inetaddr_notifier(&ksocknal_inetaddr_notifier_block); + } switch (ksocknal_data.ksnd_init) { default: @@ -2036,7 +2144,8 @@ ksocknal_base_startup(void) goto failed; } - register_netdevice_notifier(&ksocknal_notifier_block); + register_netdevice_notifier(&ksocknal_dev_notifier_block); + register_inetaddr_notifier(&ksocknal_inetaddr_notifier_block); /* flag everything initialised */ ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL; -- 1.8.3.1