Whamcloud - gitweb
LU-13783 libcfs: support removal of kernel_setsockopt() 59/39259/7
authorMr NeilBrown <neilb@suse.de>
Fri, 3 Jul 2020 03:51:50 +0000 (13:51 +1000)
committerOleg Drokin <green@whamcloud.com>
Wed, 10 Mar 2021 08:02:38 +0000 (08:02 +0000)
Linux 5.8 removes kernel_setsockopt() and kernel_getsockopt(), and
provides some helper functions for some accesses that are
not trivial.

This patch adds those helpers to libcfs when they are not available,
and changes (nearly) all calls to kernel_[gs]etsockopt() to
either use direct access to a helper call.

->keepalive() is not available before v4.11-rc1~94^2~43^2~14
and there is no helper function, so for SO_KEEPALIVE we
need to have #ifdef code in the C file.

TCP_BACKOFF* setting are not converted as they are not available in
any upstream kernel, so no conversion is possible.

Also include some minor style fixes and change lnet_sock_setbuf() and
lnet_sock_getbuf() to be 'void' functions.

Test-Parameters: trivial
Signed-off-by: Mr NeilBrown <neilb@suse.de>
Change-Id: I539cf8d20555ddb3565fa75130fdd3acf709c545
Reviewed-on: https://review.whamcloud.com/39259
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Aurelien Degremont <degremoa@amazon.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
libcfs/autoconf/lustre-libcfs.m4
libcfs/include/libcfs/linux/Makefile.am
libcfs/include/libcfs/linux/linux-net.h [new file with mode: 0644]
lnet/include/lnet/lib-lnet.h
lnet/klnds/socklnd/socklnd.h
lnet/klnds/socklnd/socklnd_lib.c
lnet/lnet/lib-socket.c

index 396f7a5..c787b14 100644 (file)
@@ -1330,6 +1330,26 @@ mmap_write_lock, [
 ])
 ]) # LIBCFS_HAVE_MMAP_LOCK
 
+#
+# LIBCFS_KERNEL_SETSOCKOPT
+#
+# kernel v5.8-rc1~165^2~59^2
+# net: remove kernel_setsockopt
+AC_DEFUN([LIBCFS_KERNEL_SETSOCKOPT], [
+tmp_flags="$EXTRA_KCFLAGS"
+EXTRA_KCFLAGS="-Werror"
+LB_CHECK_COMPILE([if kernel_setsockopt still in use],
+kernel_setsockopt_exists, [
+       #include <linux/net.h>
+],[
+       kernel_setsockopt(NULL, 0, 0, NULL, 0);
+],[
+       AC_DEFINE(HAVE_KERNEL_SETSOCKOPT, 1,
+               [kernel_setsockopt still in use])
+])
+EXTRA_KCFLAGS="$tmp_flags"
+]) # LIBCFS_KERNEL_SETSOCKOPT
+
 AC_DEFUN([LIBCFS_PROG_LINUX_SRC], [] )
 AC_DEFUN([LIBCFS_PROG_LINUX_RESULTS], [])
 
@@ -1440,6 +1460,7 @@ LIBCFS_CACHE_DETAIL_WRITERS
 LIBCFS_HAVE_NR_UNSTABLE_NFS
 # 5.8
 LIBCFS_HAVE_MMAP_LOCK
+LIBCFS_KERNEL_SETSOCKOPT
 ]) # LIBCFS_PROG_LINUX
 
 #
index 389cd48..7e9b29b 100644 (file)
@@ -1,3 +1,3 @@
 EXTRA_DIST = linux-misc.h linux-fs.h linux-mem.h linux-time.h linux-cpu.h \
-            linux-list.h linux-hash.h linux-uuid.h linux-wait.h \
+            linux-list.h linux-hash.h linux-uuid.h linux-wait.h linux-net.h \
             refcount.h processor.h xarray.h
diff --git a/libcfs/include/libcfs/linux/linux-net.h b/libcfs/include/libcfs/linux/linux-net.h
new file mode 100644 (file)
index 0000000..41484bd
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * GPL HEADER END
+ */
+
+#ifndef __LIBCFS_LINUX_NET_H__
+#define __LIBCFS_LINUX_NET_H__
+
+#ifdef HAVE_KERNEL_SETSOCKOPT
+
+#include <net/tcp.h>
+
+static inline void tcp_sock_set_quickack(struct sock *sk, int opt)
+{
+       struct socket *sock = sk->sk_socket;
+
+       kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK,
+                         (char *)&opt, sizeof(opt));
+}
+
+static inline void tcp_sock_set_nodelay(struct sock *sk)
+{
+       int opt = 1;
+       struct socket *sock = sk->sk_socket;
+
+       kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
+                         (char *)&opt, sizeof(opt));
+}
+
+static inline int tcp_sock_set_keepidle(struct sock *sk, int opt)
+{
+       struct socket *sock = sk->sk_socket;
+
+       return kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE,
+                                (char *)&opt, sizeof(opt));
+}
+
+static inline int tcp_sock_set_keepintvl(struct sock *sk, int opt)
+{
+       struct socket *sock = sk->sk_socket;
+
+       return kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
+                                (char *)&opt, sizeof(opt));
+}
+
+static inline int tcp_sock_set_keepcnt(struct sock *sk, int opt)
+{
+       struct socket *sock = sk->sk_socket;
+
+       return kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
+                                (char *)&opt, sizeof(opt));
+}
+#endif /* HAVE_KERNEL_SETSOCKOPT */
+
+#endif /* __LIBCFS_LINUX_NET_H__ */
index fc4db52..9bcf31c 100644 (file)
@@ -807,8 +807,8 @@ struct lnet_inetdev {
 };
 
 int lnet_inet_enumerate(struct lnet_inetdev **dev_list, struct net *ns);
-int lnet_sock_setbuf(struct socket *socket, int txbufsize, int rxbufsize);
-int lnet_sock_getbuf(struct socket *socket, int *txbufsize, int *rxbufsize);
+void lnet_sock_setbuf(struct socket *socket, int txbufsize, int rxbufsize);
+void lnet_sock_getbuf(struct socket *socket, int *txbufsize, int *rxbufsize);
 int lnet_sock_getaddr(struct socket *socket, bool remote,
                      struct sockaddr_storage *peer);
 int lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout);
index e0580bf..caf95bf 100644 (file)
@@ -56,6 +56,8 @@
 #include <lnet/lib-lnet.h>
 #include <lnet/socklnd.h>
 
+#include <libcfs/linux/linux-net.h>
+
 #ifndef NETIF_F_CSUM_MASK
 # define NETIF_F_CSUM_MASK NETIF_F_ALL_CSUM
 #endif
index 0cced9c..023ca82 100644 (file)
@@ -176,16 +176,14 @@ ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx,
 void
 ksocknal_lib_eager_ack(struct ksock_conn *conn)
 {
-       int opt = 1;
        struct socket *sock = conn->ksnc_sock;
 
-        /* Remind the socket to ACK eagerly.  If I don't, the socket might
-         * think I'm about to send something it could piggy-back the ACK
-         * on, introducing delay in completing zero-copy sends in my
-         * peer_ni. */
+       /* Remind the socket to ACK eagerly.  If I don't, the socket might
+        * think I'm about to send something it could piggy-back the ACK on,
+        * introducing delay in completing zero-copy sends in my peer_ni.
+        */
 
-       kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK,
-                         (char *)&opt, sizeof(opt));
+       tcp_sock_set_quickack(sock->sk, 1);
 }
 
 int
@@ -407,162 +405,132 @@ ksocknal_lib_csum_tx(struct ksock_tx *tx)
 int
 ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem, int *rxmem, int *nagle)
 {
-        struct socket *sock = conn->ksnc_sock;
-        int            len;
-        int            rc;
-
-        rc = ksocknal_connsock_addref(conn);
-        if (rc != 0) {
-                LASSERT (conn->ksnc_closing);
-                *txmem = *rxmem = *nagle = 0;
-                return (-ESHUTDOWN);
-        }
+       struct socket *sock = conn->ksnc_sock;
+       struct tcp_sock *tp = tcp_sk(sock->sk);
+
+       if (ksocknal_connsock_addref(conn) < 0) {
+               LASSERT(conn->ksnc_closing);
+               *txmem = 0;
+               *rxmem = 0;
+               *nagle = 0;
+               return -ESHUTDOWN;
+       }
 
-       rc = lnet_sock_getbuf(sock, txmem, rxmem);
-        if (rc == 0) {
-                len = sizeof(*nagle);
-               rc = kernel_getsockopt(sock, SOL_TCP, TCP_NODELAY,
-                                      (char *)nagle, &len);
-        }
+       lnet_sock_getbuf(sock, txmem, rxmem);
+
+       *nagle = !(tp->nonagle & TCP_NAGLE_OFF);
 
-        ksocknal_connsock_decref(conn);
+       ksocknal_connsock_decref(conn);
 
-        if (rc == 0)
-                *nagle = !*nagle;
-        else
-                *txmem = *rxmem = *nagle = 0;
 
-        return (rc);
+       return 0;
 }
 
 int
 ksocknal_lib_setup_sock (struct socket *sock)
 {
-        int             rc;
-        int             option;
-        int             keep_idle;
-        int             keep_intvl;
-        int             keep_count;
-        int             do_keepalive;
-        struct linger   linger;
-
-        sock->sk->sk_allocation = GFP_NOFS;
-
-        /* Ensure this socket aborts active sends immediately when we close
-         * it. */
-
-        linger.l_onoff = 0;
-        linger.l_linger = 0;
-
-       rc = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
-                              (char *)&linger, sizeof(linger));
-        if (rc != 0) {
-                CERROR ("Can't set SO_LINGER: %d\n", rc);
-                return (rc);
-        }
+       int rc;
+       int keep_idle;
+       int keep_intvl;
+       int keep_count;
+       int do_keepalive;
+       struct tcp_sock *tp = tcp_sk(sock->sk);
 
-        option = -1;
-       rc = kernel_setsockopt(sock, SOL_TCP, TCP_LINGER2,
-                              (char *)&option, sizeof(option));
-        if (rc != 0) {
-                CERROR ("Can't set SO_LINGER2: %d\n", rc);
-                return (rc);
-        }
+       sock->sk->sk_allocation = GFP_NOFS;
 
-        if (!*ksocknal_tunables.ksnd_nagle) {
-                option = 1;
+       /* Ensure this socket aborts active sends immediately when closed. */
+       sock_reset_flag(sock->sk, SOCK_LINGER);
 
-               rc = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
-                                      (char *)&option, sizeof(option));
-                if (rc != 0) {
-                        CERROR ("Can't disable nagle: %d\n", rc);
-                        return (rc);
-                }
-        }
+       tp->linger2 = -1;
 
-       rc = lnet_sock_setbuf(sock,
-                             *ksocknal_tunables.ksnd_tx_buffer_size,
-                             *ksocknal_tunables.ksnd_rx_buffer_size);
-        if (rc != 0) {
-                CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n",
-                        *ksocknal_tunables.ksnd_tx_buffer_size,
-                        *ksocknal_tunables.ksnd_rx_buffer_size, rc);
-                return (rc);
-        }
+       if (!*ksocknal_tunables.ksnd_nagle)
+               tcp_sock_set_nodelay(sock->sk);
+
+       lnet_sock_setbuf(sock,
+                        *ksocknal_tunables.ksnd_tx_buffer_size,
+                        *ksocknal_tunables.ksnd_rx_buffer_size);
 
 /* TCP_BACKOFF_* sockopt tunables unsupported in stock kernels */
 #ifdef SOCKNAL_BACKOFF
-        if (*ksocknal_tunables.ksnd_backoff_init > 0) {
-                option = *ksocknal_tunables.ksnd_backoff_init;
+       if (*ksocknal_tunables.ksnd_backoff_init > 0) {
+               int option = *ksocknal_tunables.ksnd_backoff_init;
 #ifdef SOCKNAL_BACKOFF_MS
-                option *= 1000;
+               option *= 1000;
 #endif
 
                rc = kernel_setsockopt(sock, SOL_TCP, TCP_BACKOFF_INIT,
                                       (char *)&option, sizeof(option));
-                if (rc != 0) {
-                        CERROR ("Can't set initial tcp backoff %d: %d\n",
-                                option, rc);
-                        return (rc);
-                }
-        }
+               if (rc != 0) {
+                       CERROR("Can't set initial tcp backoff %d: %d\n",
+                              option, rc);
+                       return rc;
+               }
+       }
 
-        if (*ksocknal_tunables.ksnd_backoff_max > 0) {
-                option = *ksocknal_tunables.ksnd_backoff_max;
+       if (*ksocknal_tunables.ksnd_backoff_max > 0) {
+               int option = *ksocknal_tunables.ksnd_backoff_max;
 #ifdef SOCKNAL_BACKOFF_MS
-                option *= 1000;
+               option *= 1000;
 #endif
 
                rc = kernel_setsockopt(sock, SOL_TCP, TCP_BACKOFF_MAX,
                                       (char *)&option, sizeof(option));
-                if (rc != 0) {
-                        CERROR ("Can't set maximum tcp backoff %d: %d\n",
-                                option, rc);
-                        return (rc);
-                }
-        }
+               if (rc != 0) {
+                       CERROR("Can't set maximum tcp backoff %d: %d\n",
+                              option, rc);
+                       return rc;
+               }
+       }
 #endif
 
-        /* snapshot tunables */
-        keep_idle  = *ksocknal_tunables.ksnd_keepalive_idle;
-        keep_count = *ksocknal_tunables.ksnd_keepalive_count;
-        keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
-
-        do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
+       /* snapshot tunables */
+       keep_idle  = *ksocknal_tunables.ksnd_keepalive_idle;
+       keep_count = *ksocknal_tunables.ksnd_keepalive_count;
+       keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
 
-        option = (do_keepalive ? 1 : 0);
-       rc = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
-                              (char *)&option, sizeof(option));
-        if (rc != 0) {
-                CERROR ("Can't set SO_KEEPALIVE: %d\n", rc);
-                return (rc);
-        }
+       do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
 
-        if (!do_keepalive)
-                return (0);
-
-       rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE,
-                              (char *)&keep_idle, sizeof(keep_idle));
-        if (rc != 0) {
-                CERROR ("Can't set TCP_KEEPIDLE: %d\n", rc);
-                return (rc);
-        }
+#ifdef HAVE_KERNEL_SETSOCKOPT
+       /* open-coded version doesn't work in all kernels, and
+        * there is no helper function, so call kernel_setsockopt()
+        * directly.
+        */
+       {
+               int option = (do_keepalive ? 1 : 0);
+               kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
+                                 (char *)&option, sizeof(option));
+       }
+#else
+       if (sock->sk->sk_prot->keepalive)
+               sock->sk->sk_prot->keepalive(sock->sk, do_keepalive);
+       if (do_keepalive)
+               sock_set_flag(sock->sk, SOCK_KEEPOPEN);
+       else
+               sock_reset_flag(sock->sk, SOCK_KEEPOPEN);
+#endif /* HAVE_KERNEL_SETSOCKOPT */
+
+       if (!do_keepalive)
+               return (0);
+
+       rc = tcp_sock_set_keepidle(sock->sk, keep_idle);
+       if (rc != 0) {
+               CERROR("Can't set TCP_KEEPIDLE: %d\n", rc);
+               return rc;
+       }
 
-       rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
-                              (char *)&keep_intvl, sizeof(keep_intvl));
-        if (rc != 0) {
-                CERROR ("Can't set TCP_KEEPINTVL: %d\n", rc);
-                return (rc);
-        }
+       rc = tcp_sock_set_keepintvl(sock->sk, keep_intvl);
+       if (rc != 0) {
+               CERROR("Can't set TCP_KEEPINTVL: %d\n", rc);
+               return rc;
+       }
 
-       rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
-                              (char *)&keep_count, sizeof(keep_count));
-        if (rc != 0) {
-                CERROR ("Can't set TCP_KEEPCNT: %d\n", rc);
-                return (rc);
-        }
+       rc = tcp_sock_set_keepcnt(sock->sk, keep_count);
+       if (rc != 0) {
+               CERROR("Can't set TCP_KEEPCNT: %d\n", rc);
+               return rc;
+       }
 
-        return (0);
+       return (0);
 }
 
 void
@@ -571,30 +539,27 @@ ksocknal_lib_push_conn(struct ksock_conn *conn)
        struct sock *sk;
        struct tcp_sock *tp;
        int nonagle;
-       int val = 1;
        int rc;
 
-        rc = ksocknal_connsock_addref(conn);
-        if (rc != 0)                            /* being shut down */
-                return;
+       rc = ksocknal_connsock_addref(conn);
+       if (rc != 0)                            /* being shut down */
+               return;
 
        sk = conn->ksnc_sock->sk;
        tp = tcp_sk(sk);
 
-        lock_sock (sk);
-        nonagle = tp->nonagle;
-        tp->nonagle = 1;
-        release_sock (sk);
+       lock_sock(sk);
+       nonagle = tp->nonagle;
+       tp->nonagle = TCP_NAGLE_OFF;
+       release_sock(sk);
 
-       rc = kernel_setsockopt(conn->ksnc_sock, SOL_TCP, TCP_NODELAY,
-                              (char *)&val, sizeof(val));
-        LASSERT (rc == 0);
+       tcp_sock_set_nodelay(conn->ksnc_sock->sk);
 
-        lock_sock (sk);
-        tp->nonagle = nonagle;
-        release_sock (sk);
+       lock_sock(sk);
+       tp->nonagle = nonagle;
+       release_sock(sk);
 
-        ksocknal_connsock_decref(conn);
+       ksocknal_connsock_decref(conn);
 }
 
 void ksocknal_read_callback(struct ksock_conn *conn);
index 9fa349f..97eef28 100644 (file)
@@ -44,6 +44,7 @@
 #include <linux/inetdevice.h>
 
 #include <libcfs/linux/linux-time.h>
+#include <libcfs/linux/linux-net.h>
 #include <libcfs/libcfs.h>
 #include <lnet/lib-lnet.h>
 
@@ -188,7 +189,6 @@ lnet_sock_create(int interface, struct sockaddr *remaddr,
 {
        struct socket *sock;
        int rc;
-       int option;
        int family;
 
        family = AF_INET6;
@@ -204,13 +204,7 @@ lnet_sock_create(int interface, struct sockaddr *remaddr,
                return ERR_PTR(rc);
        }
 
-       option = 1;
-       rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
-                              (char *)&option, sizeof(option));
-       if (rc) {
-               CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
-               goto failed;
-       }
+       sock->sk->sk_reuseport = 1;
 
        if (interface >= 0 || local_port != 0) {
                struct sockaddr_storage locaddr = {};
@@ -269,34 +263,21 @@ failed:
        return ERR_PTR(rc);
 }
 
-int
+void
 lnet_sock_setbuf(struct socket *sock, int txbufsize, int rxbufsize)
 {
-       int                 option;
-       int                 rc;
+       struct sock *sk = sock->sk;
 
        if (txbufsize != 0) {
-               option = txbufsize;
-               rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
-                                      (char *)&option, sizeof(option));
-               if (rc != 0) {
-                       CERROR("Can't set send buffer %d: %d\n",
-                               option, rc);
-                       return rc;
-               }
+               sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+               sk->sk_sndbuf = txbufsize;
+               sk->sk_write_space(sk);
        }
 
        if (rxbufsize != 0) {
-               option = rxbufsize;
-               rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
-                                      (char *)&option, sizeof(option));
-               if (rc != 0) {
-                       CERROR("Can't set receive buffer %d: %d\n",
-                               option, rc);
-                       return rc;
-               }
+               sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+               sk->sk_sndbuf = rxbufsize;
        }
-       return 0;
 }
 EXPORT_SYMBOL(lnet_sock_setbuf);
 
@@ -337,16 +318,13 @@ lnet_sock_getaddr(struct socket *sock, bool remote,
 }
 EXPORT_SYMBOL(lnet_sock_getaddr);
 
-int
-lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize)
+void lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize)
 {
        if (txbufsize != NULL)
                *txbufsize = sock->sk->sk_sndbuf;
 
        if (rxbufsize != NULL)
                *rxbufsize = sock->sk->sk_rcvbuf;
-
-       return 0;
 }
 EXPORT_SYMBOL(lnet_sock_getbuf);