From 7e410763d85c1cc14f59ea3d9af072d8d4ec3843 Mon Sep 17 00:00:00 2001 From: Mr NeilBrown Date: Mon, 10 May 2021 23:27:46 -0700 Subject: [PATCH] LU-13783 libcfs: support removal of kernel_setsockopt() Linux 5.8 removes kernel_setsockopt() and kernel_getsockopt(), and provides some helper functions for some accesses that are not trivial. This patch adds those helpers to libcfs when they are not available, and changes (nearly) all calls to kernel_[gs]etsockopt() to either use direct access to a helper call. ->keepalive() is not available before v4.11-rc1~94^2~43^2~14 and there is no helper function, so for SO_KEEPALIVE we need to have #ifdef code in the C file. TCP_BACKOFF* setting are not converted as they are not available in any upstream kernel, so no conversion is possible. Also include some minor style fixes and change lnet_sock_setbuf() and lnet_sock_getbuf() to be 'void' functions. Lustre-change: https://review.whamcloud.com/39259 Lustre-commit: 99d9638d6c074b48f1c21c5c94d6dfe347eed3ee Test-Parameters: trivial Signed-off-by: Mr NeilBrown Change-Id: I539cf8d20555ddb3565fa75130fdd3acf709c545 Reviewed-by: Aurelien Degremont Reviewed-by: Chris Horn Reviewed-by: James Simmons Reviewed-on: https://review.whamcloud.com/43644 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Li Xi --- libcfs/autoconf/lustre-libcfs.m4 | 21 +++ libcfs/include/libcfs/linux/Makefile.am | 2 +- libcfs/include/libcfs/linux/linux-net.h | 72 +++++++++ lnet/include/lnet/lib-lnet.h | 4 +- lnet/klnds/socklnd/socklnd.h | 2 + lnet/klnds/socklnd/socklnd_lib.c | 249 ++++++++++++++------------------ lnet/lnet/lib-socket.c | 42 ++---- 7 files changed, 215 insertions(+), 177 deletions(-) create mode 100644 libcfs/include/libcfs/linux/linux-net.h diff --git a/libcfs/autoconf/lustre-libcfs.m4 b/libcfs/autoconf/lustre-libcfs.m4 index b980b13..531885c 100644 --- a/libcfs/autoconf/lustre-libcfs.m4 +++ b/libcfs/autoconf/lustre-libcfs.m4 @@ -1356,6 +1356,26 @@ mmap_write_lock, [ ]) ]) # LIBCFS_HAVE_MMAP_LOCK +# +# LIBCFS_KERNEL_SETSOCKOPT +# +# kernel v5.8-rc1~165^2~59^2 +# net: remove kernel_setsockopt +AC_DEFUN([LIBCFS_KERNEL_SETSOCKOPT], [ +tmp_flags="$EXTRA_KCFLAGS" +EXTRA_KCFLAGS="-Werror" +LB_CHECK_COMPILE([if kernel_setsockopt still in use], +kernel_setsockopt_exists, [ + #include +],[ + kernel_setsockopt(NULL, 0, 0, NULL, 0); +],[ + AC_DEFINE(HAVE_KERNEL_SETSOCKOPT, 1, + [kernel_setsockopt still in use]) +]) +EXTRA_KCFLAGS="$tmp_flags" +]) # LIBCFS_KERNEL_SETSOCKOPT + AC_DEFUN([LIBCFS_PROG_LINUX_SRC], [ LIBCFS_SRC_HAVE_PROC_OPS @@ -1475,6 +1495,7 @@ LIBCFS_CACHE_DETAIL_WRITERS LIBCFS_HAVE_NR_UNSTABLE_NFS # 5.8 LIBCFS_HAVE_MMAP_LOCK +LIBCFS_KERNEL_SETSOCKOPT ]) # LIBCFS_PROG_LINUX # diff --git a/libcfs/include/libcfs/linux/Makefile.am b/libcfs/include/libcfs/linux/Makefile.am index 389cd48..7e9b29b 100644 --- a/libcfs/include/libcfs/linux/Makefile.am +++ b/libcfs/include/libcfs/linux/Makefile.am @@ -1,3 +1,3 @@ EXTRA_DIST = linux-misc.h linux-fs.h linux-mem.h linux-time.h linux-cpu.h \ - linux-list.h linux-hash.h linux-uuid.h linux-wait.h \ + linux-list.h linux-hash.h linux-uuid.h linux-wait.h linux-net.h \ refcount.h processor.h xarray.h diff --git a/libcfs/include/libcfs/linux/linux-net.h b/libcfs/include/libcfs/linux/linux-net.h new file mode 100644 index 0000000..41484bd --- /dev/null +++ b/libcfs/include/libcfs/linux/linux-net.h @@ -0,0 +1,72 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ + +#ifndef __LIBCFS_LINUX_NET_H__ +#define __LIBCFS_LINUX_NET_H__ + +#ifdef HAVE_KERNEL_SETSOCKOPT + +#include + +static inline void tcp_sock_set_quickack(struct sock *sk, int opt) +{ + struct socket *sock = sk->sk_socket; + + kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK, + (char *)&opt, sizeof(opt)); +} + +static inline void tcp_sock_set_nodelay(struct sock *sk) +{ + int opt = 1; + struct socket *sock = sk->sk_socket; + + kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, + (char *)&opt, sizeof(opt)); +} + +static inline int tcp_sock_set_keepidle(struct sock *sk, int opt) +{ + struct socket *sock = sk->sk_socket; + + return kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, + (char *)&opt, sizeof(opt)); +} + +static inline int tcp_sock_set_keepintvl(struct sock *sk, int opt) +{ + struct socket *sock = sk->sk_socket; + + return kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL, + (char *)&opt, sizeof(opt)); +} + +static inline int tcp_sock_set_keepcnt(struct sock *sk, int opt) +{ + struct socket *sock = sk->sk_socket; + + return kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, + (char *)&opt, sizeof(opt)); +} +#endif /* HAVE_KERNEL_SETSOCKOPT */ + +#endif /* __LIBCFS_LINUX_NET_H__ */ diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index b94ad12..ea54d5f 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -787,8 +787,8 @@ struct lnet_inetdev { }; int lnet_inet_enumerate(struct lnet_inetdev **dev_list, struct net *ns); -int lnet_sock_setbuf(struct socket *socket, int txbufsize, int rxbufsize); -int lnet_sock_getbuf(struct socket *socket, int *txbufsize, int *rxbufsize); +void lnet_sock_setbuf(struct socket *socket, int txbufsize, int rxbufsize); +void lnet_sock_getbuf(struct socket *socket, int *txbufsize, int *rxbufsize); int lnet_sock_getaddr(struct socket *socket, bool remote, __u32 *ip, int *port); int lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout); int lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout); diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index bae3b52..a4f7682 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -56,6 +56,8 @@ #include #include +#include + #ifndef NETIF_F_CSUM_MASK # define NETIF_F_CSUM_MASK NETIF_F_ALL_CSUM #endif diff --git a/lnet/klnds/socklnd/socklnd_lib.c b/lnet/klnds/socklnd/socklnd_lib.c index 375128c..3465944 100644 --- a/lnet/klnds/socklnd/socklnd_lib.c +++ b/lnet/klnds/socklnd/socklnd_lib.c @@ -177,16 +177,14 @@ ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx, void ksocknal_lib_eager_ack(struct ksock_conn *conn) { - int opt = 1; struct socket *sock = conn->ksnc_sock; - /* Remind the socket to ACK eagerly. If I don't, the socket might - * think I'm about to send something it could piggy-back the ACK - * on, introducing delay in completing zero-copy sends in my - * peer_ni. */ + /* Remind the socket to ACK eagerly. If I don't, the socket might + * think I'm about to send something it could piggy-back the ACK on, + * introducing delay in completing zero-copy sends in my peer_ni. + */ - kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK, - (char *)&opt, sizeof(opt)); + tcp_sock_set_quickack(sock->sk, 1); } int @@ -408,162 +406,132 @@ ksocknal_lib_csum_tx(struct ksock_tx *tx) int ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem, int *rxmem, int *nagle) { - struct socket *sock = conn->ksnc_sock; - int len; - int rc; + struct socket *sock = conn->ksnc_sock; + struct tcp_sock *tp = tcp_sk(sock->sk); + + if (ksocknal_connsock_addref(conn) < 0) { + LASSERT(conn->ksnc_closing); + *txmem = 0; + *rxmem = 0; + *nagle = 0; + return -ESHUTDOWN; + } - rc = ksocknal_connsock_addref(conn); - if (rc != 0) { - LASSERT (conn->ksnc_closing); - *txmem = *rxmem = *nagle = 0; - return (-ESHUTDOWN); - } + lnet_sock_getbuf(sock, txmem, rxmem); - rc = lnet_sock_getbuf(sock, txmem, rxmem); - if (rc == 0) { - len = sizeof(*nagle); - rc = kernel_getsockopt(sock, SOL_TCP, TCP_NODELAY, - (char *)nagle, &len); - } + *nagle = !(tp->nonagle & TCP_NAGLE_OFF); - ksocknal_connsock_decref(conn); + ksocknal_connsock_decref(conn); - if (rc == 0) - *nagle = !*nagle; - else - *txmem = *rxmem = *nagle = 0; - return (rc); + return 0; } int ksocknal_lib_setup_sock (struct socket *sock) { - int rc; - int option; - int keep_idle; - int keep_intvl; - int keep_count; - int do_keepalive; - struct linger linger; - - sock->sk->sk_allocation = GFP_NOFS; - - /* Ensure this socket aborts active sends immediately when we close - * it. */ - - linger.l_onoff = 0; - linger.l_linger = 0; + int rc; + int keep_idle; + int keep_intvl; + int keep_count; + int do_keepalive; + struct tcp_sock *tp = tcp_sk(sock->sk); - rc = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, - (char *)&linger, sizeof(linger)); - if (rc != 0) { - CERROR ("Can't set SO_LINGER: %d\n", rc); - return (rc); - } + sock->sk->sk_allocation = GFP_NOFS; - option = -1; - rc = kernel_setsockopt(sock, SOL_TCP, TCP_LINGER2, - (char *)&option, sizeof(option)); - if (rc != 0) { - CERROR ("Can't set SO_LINGER2: %d\n", rc); - return (rc); - } + /* Ensure this socket aborts active sends immediately when closed. */ + sock_reset_flag(sock->sk, SOCK_LINGER); - if (!*ksocknal_tunables.ksnd_nagle) { - option = 1; + tp->linger2 = -1; - rc = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, - (char *)&option, sizeof(option)); - if (rc != 0) { - CERROR ("Can't disable nagle: %d\n", rc); - return (rc); - } - } + if (!*ksocknal_tunables.ksnd_nagle) + tcp_sock_set_nodelay(sock->sk); - rc = lnet_sock_setbuf(sock, - *ksocknal_tunables.ksnd_tx_buffer_size, - *ksocknal_tunables.ksnd_rx_buffer_size); - if (rc != 0) { - CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n", - *ksocknal_tunables.ksnd_tx_buffer_size, - *ksocknal_tunables.ksnd_rx_buffer_size, rc); - return (rc); - } + lnet_sock_setbuf(sock, + *ksocknal_tunables.ksnd_tx_buffer_size, + *ksocknal_tunables.ksnd_rx_buffer_size); /* TCP_BACKOFF_* sockopt tunables unsupported in stock kernels */ #ifdef SOCKNAL_BACKOFF - if (*ksocknal_tunables.ksnd_backoff_init > 0) { - option = *ksocknal_tunables.ksnd_backoff_init; + if (*ksocknal_tunables.ksnd_backoff_init > 0) { + int option = *ksocknal_tunables.ksnd_backoff_init; #ifdef SOCKNAL_BACKOFF_MS - option *= 1000; + option *= 1000; #endif rc = kernel_setsockopt(sock, SOL_TCP, TCP_BACKOFF_INIT, (char *)&option, sizeof(option)); - if (rc != 0) { - CERROR ("Can't set initial tcp backoff %d: %d\n", - option, rc); - return (rc); - } - } + if (rc != 0) { + CERROR("Can't set initial tcp backoff %d: %d\n", + option, rc); + return rc; + } + } - if (*ksocknal_tunables.ksnd_backoff_max > 0) { - option = *ksocknal_tunables.ksnd_backoff_max; + if (*ksocknal_tunables.ksnd_backoff_max > 0) { + int option = *ksocknal_tunables.ksnd_backoff_max; #ifdef SOCKNAL_BACKOFF_MS - option *= 1000; + option *= 1000; #endif rc = kernel_setsockopt(sock, SOL_TCP, TCP_BACKOFF_MAX, (char *)&option, sizeof(option)); - if (rc != 0) { - CERROR ("Can't set maximum tcp backoff %d: %d\n", - option, rc); - return (rc); - } - } + if (rc != 0) { + CERROR("Can't set maximum tcp backoff %d: %d\n", + option, rc); + return rc; + } + } #endif - /* snapshot tunables */ - keep_idle = *ksocknal_tunables.ksnd_keepalive_idle; - keep_count = *ksocknal_tunables.ksnd_keepalive_count; - keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl; + /* snapshot tunables */ + keep_idle = *ksocknal_tunables.ksnd_keepalive_idle; + keep_count = *ksocknal_tunables.ksnd_keepalive_count; + keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl; - do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); + do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); - option = (do_keepalive ? 1 : 0); - rc = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, - (char *)&option, sizeof(option)); - if (rc != 0) { - CERROR ("Can't set SO_KEEPALIVE: %d\n", rc); - return (rc); - } - - if (!do_keepalive) - return (0); - - rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, - (char *)&keep_idle, sizeof(keep_idle)); - if (rc != 0) { - CERROR ("Can't set TCP_KEEPIDLE: %d\n", rc); - return (rc); - } +#ifdef HAVE_KERNEL_SETSOCKOPT + /* open-coded version doesn't work in all kernels, and + * there is no helper function, so call kernel_setsockopt() + * directly. + */ + { + int option = (do_keepalive ? 1 : 0); + kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, + (char *)&option, sizeof(option)); + } +#else + if (sock->sk->sk_prot->keepalive) + sock->sk->sk_prot->keepalive(sock->sk, do_keepalive); + if (do_keepalive) + sock_set_flag(sock->sk, SOCK_KEEPOPEN); + else + sock_reset_flag(sock->sk, SOCK_KEEPOPEN); +#endif /* HAVE_KERNEL_SETSOCKOPT */ + + if (!do_keepalive) + return (0); + + rc = tcp_sock_set_keepidle(sock->sk, keep_idle); + if (rc != 0) { + CERROR("Can't set TCP_KEEPIDLE: %d\n", rc); + return rc; + } - rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL, - (char *)&keep_intvl, sizeof(keep_intvl)); - if (rc != 0) { - CERROR ("Can't set TCP_KEEPINTVL: %d\n", rc); - return (rc); - } + rc = tcp_sock_set_keepintvl(sock->sk, keep_intvl); + if (rc != 0) { + CERROR("Can't set TCP_KEEPINTVL: %d\n", rc); + return rc; + } - rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, - (char *)&keep_count, sizeof(keep_count)); - if (rc != 0) { - CERROR ("Can't set TCP_KEEPCNT: %d\n", rc); - return (rc); - } + rc = tcp_sock_set_keepcnt(sock->sk, keep_count); + if (rc != 0) { + CERROR("Can't set TCP_KEEPCNT: %d\n", rc); + return rc; + } - return (0); + return (0); } void @@ -572,30 +540,27 @@ ksocknal_lib_push_conn(struct ksock_conn *conn) struct sock *sk; struct tcp_sock *tp; int nonagle; - int val = 1; int rc; - rc = ksocknal_connsock_addref(conn); - if (rc != 0) /* being shut down */ - return; + rc = ksocknal_connsock_addref(conn); + if (rc != 0) /* being shut down */ + return; sk = conn->ksnc_sock->sk; tp = tcp_sk(sk); - lock_sock (sk); - nonagle = tp->nonagle; - tp->nonagle = 1; - release_sock (sk); + lock_sock(sk); + nonagle = tp->nonagle; + tp->nonagle = TCP_NAGLE_OFF; + release_sock(sk); - rc = kernel_setsockopt(conn->ksnc_sock, SOL_TCP, TCP_NODELAY, - (char *)&val, sizeof(val)); - LASSERT (rc == 0); + tcp_sock_set_nodelay(conn->ksnc_sock->sk); - lock_sock (sk); - tp->nonagle = nonagle; - release_sock (sk); + lock_sock(sk); + tp->nonagle = nonagle; + release_sock(sk); - ksocknal_connsock_decref(conn); + ksocknal_connsock_decref(conn); } void ksocknal_read_callback(struct ksock_conn *conn); diff --git a/lnet/lnet/lib-socket.c b/lnet/lnet/lib-socket.c index ee4127c..98fe5bf 100644 --- a/lnet/lnet/lib-socket.c +++ b/lnet/lnet/lib-socket.c @@ -42,6 +42,7 @@ #include #include +#include #include #include @@ -186,7 +187,6 @@ lnet_sock_create(int interface, struct sockaddr *remaddr, { struct socket *sock; int rc; - int option; #ifdef HAVE_SOCK_CREATE_KERN_USE_NET rc = sock_create_kern(ns, PF_INET, SOCK_STREAM, 0, &sock); @@ -198,13 +198,7 @@ lnet_sock_create(int interface, struct sockaddr *remaddr, return ERR_PTR(rc); } - option = 1; - rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, - (char *)&option, sizeof(option)); - if (rc) { - CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc); - goto failed; - } + sock->sk->sk_reuseport = 1; if (interface >= 0 || local_port != 0) { struct sockaddr_in locaddr = {}; @@ -245,34 +239,21 @@ failed: return ERR_PTR(rc); } -int +void lnet_sock_setbuf(struct socket *sock, int txbufsize, int rxbufsize) { - int option; - int rc; + struct sock *sk = sock->sk; if (txbufsize != 0) { - option = txbufsize; - rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDBUF, - (char *)&option, sizeof(option)); - if (rc != 0) { - CERROR("Can't set send buffer %d: %d\n", - option, rc); - return rc; - } + sk->sk_userlocks |= SOCK_SNDBUF_LOCK; + sk->sk_sndbuf = txbufsize; + sk->sk_write_space(sk); } if (rxbufsize != 0) { - option = rxbufsize; - rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF, - (char *)&option, sizeof(option)); - if (rc != 0) { - CERROR("Can't set receive buffer %d: %d\n", - option, rc); - return rc; - } + sk->sk_userlocks |= SOCK_RCVBUF_LOCK; + sk->sk_sndbuf = rxbufsize; } - return 0; } EXPORT_SYMBOL(lnet_sock_setbuf); @@ -307,16 +288,13 @@ lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port) } EXPORT_SYMBOL(lnet_sock_getaddr); -int -lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize) +void lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize) { if (txbufsize != NULL) *txbufsize = sock->sk->sk_sndbuf; if (rxbufsize != NULL) *rxbufsize = sock->sk->sk_rcvbuf; - - return 0; } EXPORT_SYMBOL(lnet_sock_getbuf); -- 1.8.3.1