4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2015, 2017, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
32 #define DEBUG_SUBSYSTEM S_LNET
36 #include <linux/net.h>
37 #include <linux/file.h>
38 #include <linux/pagemap.h>
39 /* For sys_open & sys_close */
40 #include <linux/syscalls.h>
42 #include <linux/inetdevice.h>
44 #include <libcfs/libcfs.h>
45 #include <lnet/lib-lnet.h>
48 * kernel 5.1: commit 7f1bc6e95d7840d4305595b3e4025cddda88cee5
50 * SO_TIMESTAMP, SO_TIMESTAMPNS and SO_TIMESTAMPING options, the
51 * way they are currently defined, are not y2038 safe.
52 * Subsequent patches in the series add new y2038 safe versions
53 * of these options which provide 64 bit timestamps on all
54 * architectures uniformly.
55 * Hence, rename existing options with OLD tag suffixes.
57 * NOTE: When updating to timespec64 change change these to '_NEW'.
61 #define SO_SNDTIMEO SO_SNDTIMEO_OLD
65 #define SO_RCVTIMEO SO_RCVTIMEO_OLD
69 lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
72 long jiffies_left = cfs_time_seconds(timeout);
77 /* Caller may pass a zero timeout if she thinks the socket buffer is
78 * empty enough to take the whole message immediately */
86 .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0
90 /* Set send timeout to remaining time */
91 jiffies_to_timeval(jiffies_left, &tv);
92 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO,
93 (char *)&tv, sizeof(tv));
95 CERROR("Can't set socket send timeout "
97 (long)tv.tv_sec, (int)tv.tv_usec, rc);
103 rc = kernel_sendmsg(sock, &msg, &iov, 1, nob);
104 jiffies_left -= jiffies - then;
113 CERROR("Unexpected zero rc\n");
114 return -ECONNABORTED;
117 if (jiffies_left <= 0)
120 buffer = ((char *)buffer) + rc;
125 EXPORT_SYMBOL(lnet_sock_write);
128 lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout)
131 long jiffies_left = cfs_time_seconds(timeout);
136 LASSERT(jiffies_left > 0);
143 struct msghdr msg = {
147 /* Set receive timeout to remaining time */
148 jiffies_to_timeval(jiffies_left, &tv);
149 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO,
150 (char *)&tv, sizeof(tv));
152 CERROR("Can't set socket recv timeout %ld.%06d: %d\n",
153 (long)tv.tv_sec, (int)tv.tv_usec, rc);
158 rc = kernel_recvmsg(sock, &msg, &iov, 1, nob, 0);
159 jiffies_left -= jiffies - then;
167 buffer = ((char *)buffer) + rc;
173 if (jiffies_left <= 0)
177 EXPORT_SYMBOL(lnet_sock_read);
179 int choose_ipv4_src(__u32 *ret, int interface, __u32 dst_ipaddr, struct net *ns)
181 struct net_device *dev;
182 struct in_device *in_dev;
184 DECLARE_CONST_IN_IFADDR(ifa);
187 dev = dev_get_by_index_rcu(ns, interface);
189 if (!dev || !(dev->flags & IFF_UP))
191 in_dev = __in_dev_get_rcu(dev);
195 in_dev_for_each_ifa_rcu(ifa, in_dev) {
197 ((dst_ipaddr ^ ntohl(ifa->ifa_local))
198 & ntohl(ifa->ifa_mask)) == 0) {
199 /* This address at least as good as what we
202 *ret = ntohl(ifa->ifa_local);
211 EXPORT_SYMBOL(choose_ipv4_src);
213 static struct socket *
214 lnet_sock_create(int interface, struct sockaddr *remaddr,
215 int local_port, struct net *ns)
221 #ifdef HAVE_SOCK_CREATE_KERN_USE_NET
222 rc = sock_create_kern(ns, PF_INET, SOCK_STREAM, 0, &sock);
224 rc = sock_create_kern(PF_INET, SOCK_STREAM, 0, &sock);
227 CERROR("Can't create socket: %d\n", rc);
232 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
233 (char *)&option, sizeof(option));
235 CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
239 if (interface >= 0 || local_port != 0) {
240 struct sockaddr_in locaddr = {};
242 locaddr.sin_family = AF_INET;
243 locaddr.sin_addr.s_addr = INADDR_ANY;
244 if (interface >= 0) {
245 struct sockaddr_in *sin = (void *)remaddr;
248 rc = choose_ipv4_src(&ip,
250 ntohl(sin->sin_addr.s_addr),
254 locaddr.sin_addr.s_addr = htonl(ip);
257 locaddr.sin_port = htons(local_port);
259 rc = kernel_bind(sock, (struct sockaddr *)&locaddr,
261 if (rc == -EADDRINUSE) {
262 CDEBUG(D_NET, "Port %d already in use\n", local_port);
266 CERROR("Error trying to bind to port %d: %d\n",
279 lnet_sock_setbuf(struct socket *sock, int txbufsize, int rxbufsize)
284 if (txbufsize != 0) {
286 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
287 (char *)&option, sizeof(option));
289 CERROR("Can't set send buffer %d: %d\n",
295 if (rxbufsize != 0) {
297 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
298 (char *)&option, sizeof(option));
300 CERROR("Can't set receive buffer %d: %d\n",
307 EXPORT_SYMBOL(lnet_sock_setbuf);
310 lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port)
312 struct sockaddr_in sin;
314 #ifndef HAVE_KERN_SOCK_GETNAME_2ARGS
315 int len = sizeof(sin);
319 rc = lnet_kernel_getpeername(sock,
320 (struct sockaddr *)&sin, &len);
322 rc = lnet_kernel_getsockname(sock,
323 (struct sockaddr *)&sin, &len);
325 CERROR("Error %d getting sock %s IP/port\n",
326 rc, remote ? "peer" : "local");
331 *ip = ntohl(sin.sin_addr.s_addr);
334 *port = ntohs(sin.sin_port);
338 EXPORT_SYMBOL(lnet_sock_getaddr);
341 lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize)
343 if (txbufsize != NULL)
344 *txbufsize = sock->sk->sk_sndbuf;
346 if (rxbufsize != NULL)
347 *rxbufsize = sock->sk->sk_rcvbuf;
351 EXPORT_SYMBOL(lnet_sock_getbuf);
354 lnet_sock_listen(int local_port, int backlog, struct net *ns)
359 sock = lnet_sock_create(-1, NULL, local_port, ns);
362 if (rc == -EADDRINUSE)
363 CERROR("Can't create socket: port %d already in use\n",
368 rc = kernel_listen(sock, backlog);
372 CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
378 lnet_sock_connect(int interface, int local_port,
379 __u32 peer_ip, int peer_port,
383 struct sockaddr_in srvaddr;
386 memset(&srvaddr, 0, sizeof(srvaddr));
387 srvaddr.sin_family = AF_INET;
388 srvaddr.sin_port = htons(peer_port);
389 srvaddr.sin_addr.s_addr = htonl(peer_ip);
391 sock = lnet_sock_create(interface, (struct sockaddr *)&srvaddr,
396 rc = kernel_connect(sock, (struct sockaddr *)&srvaddr,
401 /* EADDRNOTAVAIL probably means we're already connected to the same
402 * peer/port on the same local port on a differently typed
403 * connection. Let our caller retry with a different local
406 CDEBUG_LIMIT(rc == -EADDRNOTAVAIL ? D_NET : D_NETERROR,
407 "Error %d connecting %d -> %pI4h/%d\n", rc,
408 local_port, &peer_ip, peer_port);