From e5903f8b531935297809880e962d785be4fe57ff Mon Sep 17 00:00:00 2001 From: James Simmons Date: Tue, 23 Apr 2024 09:30:04 -0400 Subject: [PATCH] LU-17460 lnet: support IPv6 for link state The LNet layer montiors the state of the underlying TCP connection. Currently it only supports network interfaces setup with IPv4 addresses. Update to handle IPv6 setups. Test-Parameters: trivial testlist=sanity-lnet Change-Id: I249e9591d5f637112f6bd862cd0f928a555af229 Signed-off-by: James Simmons Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53761 Reviewed-by: Serguei Smirnov Reviewed-by: Frank Sehr Reviewed-by: Oleg Drokin Tested-by: jenkins Tested-by: Maloo --- libcfs/include/libcfs/linux/linux-net.h | 21 +++++ lnet/autoconf/lustre-lnet.m4 | 25 ++++++ lnet/klnds/o2iblnd/o2iblnd.c | 113 ++++++++++++++++++------ lnet/klnds/socklnd/socklnd.c | 149 +++++++++++++++++++++++--------- lnet/klnds/socklnd/socklnd_modparams.c | 44 ++++++++-- 5 files changed, 277 insertions(+), 75 deletions(-) diff --git a/libcfs/include/libcfs/linux/linux-net.h b/libcfs/include/libcfs/linux/linux-net.h index e4e2263..6785b7f 100644 --- a/libcfs/include/libcfs/linux/linux-net.h +++ b/libcfs/include/libcfs/linux/linux-net.h @@ -23,9 +23,30 @@ #ifndef __LIBCFS_LINUX_NET_H__ #define __LIBCFS_LINUX_NET_H__ +#include #include #include +#ifndef HAVE_NETDEV_CMD_TO_NAME +static inline const char *netdev_cmd_to_name(unsigned long cmd) +{ +#define N(val) \ + case NETDEV_##val: \ + return "NETDEV_" __stringify(val); + switch (cmd) { + N(UP) N(DOWN) N(REBOOT) N(CHANGE) N(REGISTER) N(UNREGISTER) + N(CHANGEMTU) N(CHANGEADDR) N(GOING_DOWN) N(CHANGENAME) N(FEAT_CHANGE) + N(BONDING_FAILOVER) N(PRE_UP) N(PRE_TYPE_CHANGE) N(POST_TYPE_CHANGE) + N(POST_INIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN) N(CHANGEUPPER) + N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA) N(BONDING_INFO) + N(PRECHANGEUPPER) N(CHANGELOWERSTATE) N(UDP_TUNNEL_PUSH_INFO) + N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN) + }; +#undef N + return "UNKNOWN_NETDEV_EVENT"; +} +#endif + /* NL_SET_ERR_MSG macros is already defined in kernels * 3.10.0-1160 and above. For older kernels (3.10.0-957) * where this is not defined we put the message to the diff --git a/lnet/autoconf/lustre-lnet.m4 b/lnet/autoconf/lustre-lnet.m4 index e0271e1..d722d2f 100644 --- a/lnet/autoconf/lustre-lnet.m4 +++ b/lnet/autoconf/lustre-lnet.m4 @@ -1123,6 +1123,27 @@ AC_DEFUN([LN_HAVE_ORACLE_OFED_EXTENSIONS], [ ]) # LN_HAVE_ORACLE_OFED_EXTENSIONS # +# LN_SRC_HAVE_NETDEV_CMD_TO_NAME +# +# 4.16-rc6 commit ede2762d93ff16e0974f7446516b46b1022db213 +# created netdev_cmd_to_name() to map NETDEV events to char names +# +AC_DEFUN([LN_SRC_HAVE_NETDEV_CMD_TO_NAME], [ + LB2_LINUX_TEST_SRC([netdev_cmd_to_name], [ + #include + ],[ + netdev_cmd_to_name(NETDEV_UP); + ],[-Werror]) +]) +AC_DEFUN([LN_HAVE_NETDEV_CMD_TO_NAME], [ + LB2_MSG_LINUX_TEST_RESULT([if 'netdev_cmd_to_name' exist], + [netdev_cmd_to_name], [ + AC_DEFINE(HAVE_NETDEV_CMD_TO_NAME, 1, + ['netdev_cmd_to_name' is present]) + ]) +]) # LN_SRC_HAVE_NETDEV_CMD_TO_NAME + +# # LN_CONFIG_SOCK_GETNAME # # 4.17 commit 9b2c45d479d0fb8647c9e83359df69162b5fbe5f getname() @@ -1200,6 +1221,8 @@ AC_DEFUN([LN_PROG_LINUX_SRC], [ # 4.14 LN_SRC_HAVE_HYPERVISOR_IS_TYPE LN_SRC_HAVE_ORACLE_OFED_EXTENSIONS + # 4.16 + LN_SRC_HAVE_NETDEV_CMD_TO_NAME # 4.17 LN_SRC_CONFIG_SOCK_GETNAME # 5.3 and 4.18.0-193.el8 @@ -1217,6 +1240,8 @@ AC_DEFUN([LN_PROG_LINUX_RESULTS], [ # 4.14 LN_HAVE_HYPERVISOR_IS_TYPE LN_HAVE_ORACLE_OFED_EXTENSIONS + # 4.16 + LN_HAVE_NETDEV_CMD_TO_NAME # 4.17 LN_CONFIG_SOCK_GETNAME # 5.3 and 4.18.0-193.el8 diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index da98b1e..9f9c941 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -10,10 +10,13 @@ * * Author: Eric Barton */ - -#include #include #include +#include +#include +#include + +#include #include "o2iblnd.h" @@ -3215,15 +3218,12 @@ kiblnd_handle_link_state_change(struct net_device *dev, struct kib_net *net; struct kib_net *cnxt; bool link_down = !(operstate == IF_OPER_UP); - struct in_device *in_dev; bool found_ip = false; - __u32 ni_state_before; + u32 ni_state_before; bool update_ping_buf = false; int state; - DECLARE_CONST_IN_IFADDR(ifa); event_kibdev = kiblnd_dev_search(dev->name); - if (!event_kibdev) goto out; @@ -3231,18 +3231,44 @@ kiblnd_handle_link_state_change(struct net_device *dev, found_ip = false; ni = net->ibn_ni; - in_dev = __in_dev_get_rtnl(dev); - if (!in_dev) { - CDEBUG(D_NET, "Interface %s has no IPv4 status.\n", - dev->name); - ni_state_before = lnet_set_link_fatal_state(ni, 1); - goto ni_done; - } - in_dev_for_each_ifa_rtnl(ifa, in_dev) { - if (ifa->ifa_local == ni->ni_nid.nid_addr[0]) - found_ip = true; + if (nid_is_nid4(&ni->ni_nid)) { + struct in_device *in_dev = __in_dev_get_rtnl(dev); + DECLARE_CONST_IN_IFADDR(ifa); + + if (!in_dev) { + CDEBUG(D_NET, "Interface %s has no IPv4 status.\n", + dev->name); + ni_state_before = lnet_set_link_fatal_state(ni, 1); + goto ni_done; + } + in_dev_for_each_ifa_rtnl(ifa, in_dev) { + if (ifa->ifa_local == ni->ni_nid.nid_addr[0]) + found_ip = true; + } + endfor_ifa(in_dev); +#if IS_ENABLED(CONFIG_IPV6) + } else { + struct inet6_dev *in6_dev = __in6_dev_get(dev); + const struct inet6_ifaddr *ifa6; + struct in6_addr sin6_addr; + + if (!in6_dev) { + CDEBUG(D_NET, "Interface %s has no IPv6 status.\n", + dev->name); + ni_state_before = lnet_set_link_fatal_state(ni, 1); + goto ni_done; + } + + memcpy(&sin6_addr, &ni->ni_nid.nid_addr, sizeof(sin6_addr)); + rcu_read_lock(); + list_for_each_entry_rcu(ifa6, &in6_dev->addr_list, + if_list) { + if (!ipv6_addr_cmp(&ifa6->addr, &sin6_addr)) + found_ip = true; + } + rcu_read_unlock(); +#endif } - endfor_ifa(in_dev); if (!found_ip) { CDEBUG(D_NET, "Interface %s has no matching ip\n", @@ -3273,18 +3299,17 @@ out: } static int -kiblnd_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event) +kiblnd_handle_inetaddr_change(struct net_device *dev, unsigned long event, int family) { struct kib_dev *event_kibdev; struct kib_net *net; struct kib_net *cnxt; - struct net_device *event_netdev = ifa->ifa_dev->dev; - __u32 ni_state_before; + u32 ni_state_before; bool update_ping_buf = false; struct lnet_ni *ni = NULL; bool link_down; - event_kibdev = kiblnd_dev_search(event_netdev->name); + event_kibdev = kiblnd_dev_search(dev->name); if (!event_kibdev) goto out; @@ -3292,7 +3317,7 @@ kiblnd_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event) ibn_list) { ni = net->ibn_ni; - if (!(nid_is_nid4(&ni->ni_nid))) + if (nid_is_nid4(&ni->ni_nid) ^ (family == AF_INET)) continue; link_down = (event == NETDEV_DOWN); @@ -3322,8 +3347,8 @@ static int kiblnd_device_event(struct notifier_block *unused, operstate = dev->operstate; - CDEBUG(D_NET, "devevent: status=%ld, iface=%s ifindex %d state %u\n", - event, dev->name, dev->ifindex, operstate); + CDEBUG(D_NET, "devevent: status=%s, iface=%s ifindex %d state %u\n", + netdev_cmd_to_name(event), dev->name, dev->ifindex, operstate); switch (event) { case NETDEV_UP: @@ -3344,14 +3369,15 @@ static int kiblnd_inetaddr_event(struct notifier_block *unused, { struct in_ifaddr *ifa = ptr; - CDEBUG(D_NET, "addrevent: status %ld ip addr %pI4, netmask %pI4.\n", - event, &ifa->ifa_address, &ifa->ifa_mask); + CDEBUG(D_NET, "addrevent: status %s ip addr %pI4, netmask %pI4.\n", + netdev_cmd_to_name(event), &ifa->ifa_address, &ifa->ifa_mask); switch (event) { case NETDEV_UP: case NETDEV_DOWN: case NETDEV_CHANGE: - kiblnd_handle_inetaddr_change(ifa, event); + kiblnd_handle_inetaddr_change(ifa->ifa_dev->dev, event, + AF_INET); break; } @@ -3366,6 +3392,32 @@ static struct notifier_block kiblnd_inetaddr_notifier_block = { .notifier_call = kiblnd_inetaddr_event, }; +#if IS_ENABLED(CONFIG_IPV6) +static int kiblnd_inet6addr_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *ifa6 = ptr; + + CDEBUG(D_NET, "addrevent: status %s ip addr %pISc\n", + netdev_cmd_to_name(event), &ifa6->addr); + + switch (event) { + case NETDEV_UP: + case NETDEV_DOWN: + case NETDEV_CHANGE: + kiblnd_handle_inetaddr_change(ifa6->idev->dev, event, + AF_INET6); + break; + + } + return NOTIFY_OK; +} + +static struct notifier_block kiblnd_inet6addr_notifier_block = { + .notifier_call = kiblnd_inet6addr_event, +}; +#endif + static void kiblnd_base_shutdown(void) { @@ -3381,6 +3433,9 @@ kiblnd_base_shutdown(void) if (kiblnd_data.kib_init == IBLND_INIT_ALL) { unregister_netdevice_notifier(&kiblnd_dev_notifier_block); unregister_inetaddr_notifier(&kiblnd_inetaddr_notifier_block); +#if IS_ENABLED(CONFIG_IPV6) + unregister_inet6addr_notifier(&kiblnd_inet6addr_notifier_block); +#endif } switch (kiblnd_data.kib_init) { @@ -3581,7 +3636,9 @@ kiblnd_base_startup(struct net *ns) register_netdevice_notifier(&kiblnd_dev_notifier_block); register_inetaddr_notifier(&kiblnd_inetaddr_notifier_block); - +#if IS_ENABLED(CONFIG_IPV6) + register_inet6addr_notifier(&kiblnd_inet6addr_notifier_block); +#endif /* flag everything initialised */ kiblnd_data.kib_init = IBLND_INIT_ALL; /*****************************************************/ diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 164f534..0c0ba02 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -1980,14 +1980,12 @@ ksocknal_handle_link_state_change(struct net_device *dev, struct ksock_net *cnxt; int ifindex; unsigned char link_down; - struct in_device *in_dev; bool found_ip = false; struct ksock_interface *ksi = NULL; - struct sockaddr_in *sa; - __u32 ni_state_before; + struct sockaddr *sa = NULL; + u32 ni_state_before; bool update_ping_buf = false; int state; - DECLARE_CONST_IN_IFADDR(ifa); link_down = !((operstate == IF_OPER_UP) || (operstate == IF_OPER_UNKNOWN)); ifindex = dev->ifindex; @@ -1997,9 +1995,7 @@ ksocknal_handle_link_state_change(struct net_device *dev, list_for_each_entry_safe(net, cnxt, &ksocknal_data.ksnd_nets, ksnn_list) { - ksi = &net->ksnn_interface; - sa = (void *)&ksi->ksni_addr; found_ip = false; if (strcmp(ksi->ksni_name, dev->name)) @@ -2026,22 +2022,65 @@ ksocknal_handle_link_state_change(struct net_device *dev, ni = net->ksnn_ni; - in_dev = __in_dev_get_rtnl(dev); - if (!in_dev) { - CDEBUG(D_NET, "Interface %s has no IPv4 status.\n", - dev->name); - ni_state_before = lnet_set_link_fatal_state(ni, 1); - goto ni_done; + sa = (void *)&ksi->ksni_addr; + switch (sa->sa_family) { + case AF_INET: { + struct in_device *in_dev = __in_dev_get_rtnl(dev); + DECLARE_CONST_IN_IFADDR(ifa); + + if (in_dev) { + struct sockaddr_in *sa4; + + sa4 = (struct sockaddr_in *)sa; + in_dev_for_each_ifa_rtnl(ifa, in_dev) { + if (sa4->sin_addr.s_addr == + ifa->ifa_local) + found_ip = true; + } + endfor_ifa(in_dev); + } else { + sa = NULL; + } + break; + } +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6:{ + struct inet6_dev *in6_dev = __in6_dev_get(dev); + + if (in6_dev) { + const struct inet6_ifaddr *ifa6; + struct sockaddr_in6 *sa6; + + sa6 = (struct sockaddr_in6 *)sa; + list_for_each_entry_rcu(ifa6, + &in6_dev->addr_list, + if_list) { + if (!ipv6_addr_cmp(&ifa6->addr, + &sa6->sin6_addr)) { + found_ip = true; + } + } + } else { + sa = NULL; + } + break; } - in_dev_for_each_ifa_rtnl(ifa, in_dev) { - if (sa->sin_addr.s_addr == ifa->ifa_local) - found_ip = true; +#endif + default: + sa = NULL; + break; } - endfor_ifa(in_dev); - if (!found_ip) { - CDEBUG(D_NET, "Interface %s has no matching ip\n", - dev->name); + if (!sa || !found_ip) { + if (!sa) { + CDEBUG(D_NET, + "Interface %s has no IP status.\n", + dev->name); + } else { + CDEBUG(D_NET, + "Interface %s has no matching IP\n", + dev->name); + } ni_state_before = lnet_set_link_fatal_state(ni, 1); goto ni_done; } @@ -2068,16 +2107,15 @@ out: static int -ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event) +ksocknal_handle_inetaddr_change(struct net_device *event_netdev, unsigned long event) { struct lnet_ni *ni = NULL; struct ksock_net *net; struct ksock_net *cnxt; - struct net_device *event_netdev = ifa->ifa_dev->dev; int ifindex; struct ksock_interface *ksi = NULL; - struct sockaddr_in *sa; - __u32 ni_state_before; + struct sockaddr *sa; + u32 ni_state_before; bool update_ping_buf = false; bool link_down; @@ -2088,7 +2126,6 @@ ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event) list_for_each_entry_safe(net, cnxt, &ksocknal_data.ksnd_nets, ksnn_list) { - ksi = &net->ksnn_interface; sa = (void *)&ksi->ksni_addr; @@ -2096,17 +2133,18 @@ ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event) strcmp(ksi->ksni_name, event_netdev->name)) continue; - if (sa->sin_addr.s_addr == ifa->ifa_local) { - ni = net->ksnn_ni; - link_down = (event == NETDEV_DOWN); - ni_state_before = lnet_set_link_fatal_state(ni, - link_down); + ni = net->ksnn_ni; + if (nid_is_nid4(&ni->ni_nid) ^ (sa->sa_family == AF_INET)) + continue; - if (!update_ping_buf && - (ni->ni_state == LNET_NI_STATE_ACTIVE) && - ((event == NETDEV_DOWN) != ni_state_before)) - update_ping_buf = true; - } + link_down = (event == NETDEV_DOWN); + ni_state_before = lnet_set_link_fatal_state(ni, + link_down); + + if (!update_ping_buf && + (ni->ni_state == LNET_NI_STATE_ACTIVE) && + ((event == NETDEV_DOWN) != ni_state_before)) + update_ping_buf = true; } if (update_ping_buf) @@ -2126,8 +2164,8 @@ static int ksocknal_device_event(struct notifier_block *unused, operstate = dev->operstate; - CDEBUG(D_NET, "devevent: status=%ld, iface=%s ifindex %d state %u\n", - event, dev->name, dev->ifindex, operstate); + CDEBUG(D_NET, "devevent: status=%s, iface=%s ifindex %d state %u\n", + netdev_cmd_to_name(event), dev->name, dev->ifindex, operstate); switch (event) { case NETDEV_UP: @@ -2150,14 +2188,15 @@ static int ksocknal_inetaddr_event(struct notifier_block *unused, { struct in_ifaddr *ifa = ptr; - CDEBUG(D_NET, "addrevent: status %ld ip addr %pI4, netmask %pI4.\n", - event, &ifa->ifa_address, &ifa->ifa_mask); + CDEBUG(D_NET, "addrevent: status %s device %s, ip addr %pI4, netmask %pI4.\n", + netdev_cmd_to_name(event), ifa->ifa_dev->dev->name, + &ifa->ifa_address, &ifa->ifa_mask); switch (event) { case NETDEV_UP: case NETDEV_DOWN: case NETDEV_CHANGE: - ksocknal_handle_inetaddr_change(ifa, event); + ksocknal_handle_inetaddr_change(ifa->ifa_dev->dev, event); break; } @@ -2172,6 +2211,31 @@ static struct notifier_block ksocknal_inetaddr_notifier_block = { .notifier_call = ksocknal_inetaddr_event, }; +#if IS_ENABLED(CONFIG_IPV6) +static int ksocknal_inet6addr_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *ifa6 = ptr; + + CDEBUG(D_NET, "addr6event: status %s, device %s, ip addr %pISc\n", + netdev_cmd_to_name(event), ifa6->idev->dev->name, &ifa6->addr); + + switch (event) { + case NETDEV_UP: + case NETDEV_DOWN: + case NETDEV_CHANGE: + ksocknal_handle_inetaddr_change(ifa6->idev->dev, event); + break; + + } + return NOTIFY_OK; +} + +static struct notifier_block ksocknal_inet6addr_notifier_block = { + .notifier_call = ksocknal_inet6addr_event, +}; +#endif + static void ksocknal_base_shutdown(void) { @@ -2186,6 +2250,9 @@ ksocknal_base_shutdown(void) if (ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL) { unregister_netdevice_notifier(&ksocknal_dev_notifier_block); unregister_inetaddr_notifier(&ksocknal_inetaddr_notifier_block); +#if IS_ENABLED(CONFIG_IPV6) + unregister_inet6addr_notifier(&ksocknal_inet6addr_notifier_block); +#endif } switch (ksocknal_data.ksnd_init) { @@ -2354,7 +2421,9 @@ ksocknal_base_startup(void) register_netdevice_notifier(&ksocknal_dev_notifier_block); register_inetaddr_notifier(&ksocknal_inetaddr_notifier_block); - +#if IS_ENABLED(CONFIG_IPV6) + register_inet6addr_notifier(&ksocknal_inet6addr_notifier_block); +#endif /* flag everything initialised */ ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL; diff --git a/lnet/klnds/socklnd/socklnd_modparams.c b/lnet/klnds/socklnd/socklnd_modparams.c index 1b72ffa..2ff3957 100644 --- a/lnet/klnds/socklnd/socklnd_modparams.c +++ b/lnet/klnds/socklnd/socklnd_modparams.c @@ -19,6 +19,7 @@ #ifdef HAVE_ETHTOOL_LINK_SETTINGS #include #include +#include #endif #define CURRENT_LND_VERSION 1 @@ -238,14 +239,43 @@ static int ksocklnd_ni_get_eth_intf_speed(struct lnet_ni *ni) continue; in_dev = __in_dev_get_rtnl(dev); - if (!in_dev) - continue; - - in_dev_for_each_ifa_rtnl(ifa, in_dev) { - if (strcmp(ifa->ifa_label, ni->ni_interface) == 0) - intf_idx = dev->ifindex; + if (in_dev) { + in_dev_for_each_ifa_rtnl(ifa, in_dev) { + if (strcmp(ifa->ifa_label, ni->ni_interface) == 0) + intf_idx = dev->ifindex; + } + endfor_ifa(in_dev); + } else { +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_dev *in6_dev = __in6_dev_get(dev); + + if (in6_dev) { + const struct inet6_ifaddr *ifa6; + + list_for_each_entry_rcu(ifa6, + &in6_dev->addr_list, + if_list) { + if (ifa6->flags & IFA_F_TEMPORARY) + continue; + + /* As different IPv6 addresses don't + * have unique labels, it is safest + * just to use the first and ignore + * the rest. + */ + if (strcmp(dev->name, + ni->ni_interface) == 0) { + intf_idx = dev->ifindex; + break; + } + } + } else { +#endif + continue; +#if IS_ENABLED(CONFIG_IPV6) + } +#endif } - endfor_ifa(in_dev); if (intf_idx >= 0) break; -- 1.8.3.1