From 25e1da07d70c7748be14a8a096aa0da9c3aa9644 Mon Sep 17 00:00:00 2001 From: Josh Samuelson Date: Sat, 26 Oct 2024 15:06:37 -0500 Subject: [PATCH] LU-18383 ksocklnd: Avoid TCP socket orphans in racy LNet LU-18137 3367ef3eb2 introduced code to handle upstream kernel changes with regards to releasing kernel sockets. Under heavy/racy LNet hello transactions between servers and clients, client TCP sockets can transition to a orphan state that is never reaped. This commit moves LU-18137's fix closer to the kernel socket allocation to avoid this racy connection setup condition. This commit also provides a backport of the kernel's socket accounting function sock_inuse_add() for compatibility when vendor kernels lack the static function definition in their kernel headers. The lnet_compat.h file has been introduced for a place to put such compatibility code. Fixes: 3367ef3eb2 ("LU-18137 ksocklnd: Fix TCP socket cleanup") Signed-off-by: Josh Samuelson Change-Id: I11fc0b53e051871da26c079e9aeb50976b20bbd3 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56737 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Timothy Day Reviewed-by: Chris Horn Reviewed-by: Mark Roper Reviewed-by: Oleg Drokin --- lnet/autoconf/lustre-lnet.m4 | 23 +++++++++++++++++++++++ lnet/include/lnet/Makefile.am | 1 + lnet/include/lnet/lnet_compat.h | 29 +++++++++++++++++++++++++++++ lnet/klnds/socklnd/socklnd.h | 1 - lnet/klnds/socklnd/socklnd_lib.c | 12 ++---------- lnet/lnet/lib-socket.c | 8 ++++++++ 6 files changed, 63 insertions(+), 11 deletions(-) create mode 100644 lnet/include/lnet/lnet_compat.h diff --git a/lnet/autoconf/lustre-lnet.m4 b/lnet/autoconf/lustre-lnet.m4 index cb4e936..161dee7 100644 --- a/lnet/autoconf/lustre-lnet.m4 +++ b/lnet/autoconf/lustre-lnet.m4 @@ -1027,6 +1027,27 @@ AC_DEFUN([LN_CONFIG_SOCK_CREATE_KERN], [ ]) # LN_CONFIG_SOCK_CREATE_KERN # +# LN_CONFIG_SOCK_INUSE_ADD +# +# Linux v5.17-rc1-gd477eb900484 added helper function sock_inuse_add() +# for socket statistics. +# +AC_DEFUN([LN_SRC_CONFIG_SOCK_INUSE_ADD], [ + LB2_LINUX_TEST_SRC([sock_inuse_add], [ + #include + ],[ + sock_inuse_add((struct net*)0, 0); + ],[-Werror]) +]) +AC_DEFUN([LN_CONFIG_SOCK_INUSE_ADD], [ + LB2_MSG_LINUX_TEST_RESULT([if 'sock_inuse_add()' is available], + [sock_inuse_add], [ + AC_DEFINE(HAVE_SOCK_INUSE_ADD, 1, + [sock_inuse_add() is available]) + ]) +]) # LN_CONFIG_SOCK_INUSE_ADD + +# # LN_CONFIG_SOCK_NOT_OWNED_BY_ME # # Linux upstream v6.11-rc3-g151c9c724d05d5b0d changes TCP socket orphan @@ -1227,6 +1248,7 @@ AC_DEFUN([LN_PROG_LINUX_SRC], [ LN_SRC_CONFIG_SK_DATA_READY # 4.x LN_SRC_CONFIG_SOCK_CREATE_KERN + LN_SRC_CONFIG_SOCK_INUSE_ADD LN_SRC_CONFIG_SOCK_NOT_OWNED_BY_ME # 4.6 LN_SRC_ETHTOOL_LINK_SETTINGS @@ -1247,6 +1269,7 @@ AC_DEFUN([LN_PROG_LINUX_RESULTS], [ LN_CONFIG_SK_DATA_READY # 4.x LN_CONFIG_SOCK_CREATE_KERN + LN_CONFIG_SOCK_INUSE_ADD LN_CONFIG_SOCK_NOT_OWNED_BY_ME # 4.6 LN_ETHTOOL_LINK_SETTINGS diff --git a/lnet/include/lnet/Makefile.am b/lnet/include/lnet/Makefile.am index 6baeae1..b9508da 100644 --- a/lnet/include/lnet/Makefile.am +++ b/lnet/include/lnet/Makefile.am @@ -10,6 +10,7 @@ EXTRA_DIST = \ lib-lnet.h \ lib-types.h \ udsp.h \ + lnet_compat.h \ lnet_crypto.h \ lnet_rdma.h \ lnet_gds.h \ diff --git a/lnet/include/lnet/lnet_compat.h b/lnet/include/lnet/lnet_compat.h new file mode 100644 index 0000000..d0d25c7 --- /dev/null +++ b/lnet/include/lnet/lnet_compat.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* This file is part of Lustre, http://www.lustre.org/ */ + +#ifndef __LNET_LNET_COMPAT_H__ +#define __LNET_LNET_COMPAT_H__ + +/* kernel v5.17-rc1: commit d477eb9004845cb2dc92ad5eed79a437738a868a + * added static function sock_inuse_add() to the kernel headers, backport + * a copy for vendor kernels that may not provide it; such kernels will + * be lacking the all member added to the prot_inuse structure in the + * next commit, 4199bae10c49e24bc2c5d8c06a68820d56640000. + */ +#ifdef HAVE_SOCK_NOT_OWNED_BY_ME +#ifndef HAVE_SOCK_INUSE_ADD +#ifdef CONFIG_PROC_FS +static inline void sock_inuse_add(const struct net *net, int val) +{ + this_cpu_add(*net->core.sock_inuse, val); +} +#else +static inline void sock_inuse_add(const struct net *net, int val) +{ +} +#endif +#endif +#endif + +#endif diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index 6a68e40..5fd6b39 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -38,7 +38,6 @@ #include #include #include -#include #include #include diff --git a/lnet/klnds/socklnd/socklnd_lib.c b/lnet/klnds/socklnd/socklnd_lib.c index 4954905..d2ff979 100644 --- a/lnet/klnds/socklnd/socklnd_lib.c +++ b/lnet/klnds/socklnd/socklnd_lib.c @@ -429,18 +429,10 @@ ksocknal_lib_setup_sock(struct socket *sock, struct lnet_ni *ni) int keep_intvl; int keep_count; int do_keepalive; - struct sock *sk = sock->sk; - struct tcp_sock *tp = tcp_sk(sk); + struct tcp_sock *tp = tcp_sk(sock->sk); struct lnet_ioctl_config_socklnd_tunables *lndtun; -#ifdef HAVE_SOCK_NOT_OWNED_BY_ME - struct net *net = sock_net(sk); - - /* Set sk_net_refcnt and namespace for orphan cleanup LU-18137 */ - sk->sk_net_refcnt = 1; - get_net(net); -#endif - sk->sk_allocation = GFP_NOFS; + sock->sk->sk_allocation = GFP_NOFS; /* Ensure this socket aborts active sends immediately when closed. */ sock_reset_flag(sock->sk, SOCK_LINGER); diff --git a/lnet/lnet/lib-socket.c b/lnet/lnet/lib-socket.c index d44d8dd..d9fa38b 100644 --- a/lnet/lnet/lib-socket.c +++ b/lnet/lnet/lib-socket.c @@ -19,6 +19,7 @@ #include /* For sys_open & sys_close */ #include +#include #include #include @@ -26,6 +27,7 @@ #include #include #include +#include int lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout) @@ -190,6 +192,12 @@ retry: } sock->sk->sk_reuseport = 1; +#ifdef HAVE_SOCK_NOT_OWNED_BY_ME + /* Set sk_net_refcnt and namespace for orphan cleanup LU-18137 */ + sock->sk->sk_net_refcnt = 1; + get_net(ns); + sock_inuse_add(ns, 1); +#endif if (interface >= 0 || local_port != 0) { struct sockaddr_storage locaddr = {}; -- 1.8.3.1