4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2012, 2015, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 #define DEBUG_SUBSYSTEM S_LNET
38 #ifdef HAVE_COMPAT_RDMA
39 #include <linux/compat-2.6.h>
43 #include <linux/net.h>
44 #include <linux/file.h>
45 #include <linux/pagemap.h>
46 /* For sys_open & sys_close */
47 #include <linux/syscalls.h>
50 #include <libcfs/libcfs.h>
51 #include <lnet/lib-lnet.h>
54 kernel_sock_unlocked_ioctl(struct file *filp, int cmd, unsigned long arg)
56 mm_segment_t oldfs = get_fs();
60 err = filp->f_op->unlocked_ioctl(filp, cmd, arg);
67 lnet_sock_ioctl(int cmd, unsigned long arg)
69 struct file *sock_filp;
74 rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
76 CERROR("Can't create socket: %d\n", rc);
80 #if !defined(HAVE_SOCK_ALLOC_FILE) && !defined(HAVE_SOCK_ALLOC_FILE_3ARGS)
81 fd = sock_map_fd(sock, 0);
89 # ifdef HAVE_SOCK_ALLOC_FILE_3ARGS
90 sock_filp = sock_alloc_file(sock, 0, NULL);
92 sock_filp = sock_alloc_file(sock, 0);
95 if (IS_ERR(sock_filp)) {
96 rc = PTR_ERR(sock_filp);
101 rc = kernel_sock_unlocked_ioctl(sock_filp, cmd, arg);
111 lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask)
118 nob = strnlen(name, IFNAMSIZ);
119 if (nob == IFNAMSIZ) {
120 CERROR("Interface name %s too long\n", name);
124 CLASSERT(sizeof(ifr.ifr_name) >= IFNAMSIZ);
126 if (strlen(name) > sizeof(ifr.ifr_name)-1)
128 strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
130 rc = lnet_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr);
132 CERROR("Can't get flags for interface %s\n", name);
136 if ((ifr.ifr_flags & IFF_UP) == 0) {
137 CDEBUG(D_NET, "Interface %s down\n", name);
144 if (strlen(name) > sizeof(ifr.ifr_name)-1)
146 strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
148 ifr.ifr_addr.sa_family = AF_INET;
149 rc = lnet_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr);
152 CERROR("Can't get IP address for interface %s\n", name);
156 val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
159 if (strlen(name) > sizeof(ifr.ifr_name)-1)
161 strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
163 ifr.ifr_addr.sa_family = AF_INET;
164 rc = lnet_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr);
166 CERROR("Can't get netmask for interface %s\n", name);
170 val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr;
175 EXPORT_SYMBOL(lnet_ipif_query);
178 lnet_ipif_free_enumeration(char **names, int n)
184 for (i = 0; i < n && names[i] != NULL; i++)
185 LIBCFS_FREE(names[i], IFNAMSIZ);
187 LIBCFS_FREE(names, n * sizeof(*names));
189 EXPORT_SYMBOL(lnet_ipif_free_enumeration);
192 lnet_ipif_enumerate(char ***namesp)
194 /* Allocate and fill in 'names', returning # interfaces/error */
205 nalloc = 16; /* first guess at max interfaces */
208 if (nalloc * sizeof(*ifr) > PAGE_CACHE_SIZE) {
210 nalloc = PAGE_CACHE_SIZE/sizeof(*ifr);
211 CWARN("Too many interfaces: only enumerating "
212 "first %d\n", nalloc);
215 LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr));
217 CERROR("ENOMEM enumerating up to %d interfaces\n",
223 ifc.ifc_buf = (char *)ifr;
224 ifc.ifc_len = nalloc * sizeof(*ifr);
226 rc = lnet_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc);
228 CERROR("Error %d enumerating interfaces\n", rc);
234 nfound = ifc.ifc_len/sizeof(*ifr);
235 LASSERT(nfound <= nalloc);
237 if (nfound < nalloc || toobig)
240 LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
247 LIBCFS_ALLOC(names, nfound * sizeof(*names));
253 for (i = 0; i < nfound; i++) {
254 nob = strnlen(ifr[i].ifr_name, IFNAMSIZ);
255 if (nob == IFNAMSIZ) {
256 /* no space for terminating NULL */
257 CERROR("interface name %.*s too long (%d max)\n",
258 nob, ifr[i].ifr_name, IFNAMSIZ);
263 LIBCFS_ALLOC(names[i], IFNAMSIZ);
264 if (names[i] == NULL) {
269 memcpy(names[i], ifr[i].ifr_name, nob);
278 lnet_ipif_free_enumeration(names, nfound);
280 LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
284 EXPORT_SYMBOL(lnet_ipif_enumerate);
287 lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
290 long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
295 /* Caller may pass a zero timeout if she thinks the socket buffer is
296 * empty enough to take the whole message immediately */
303 struct msghdr msg = {
304 .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0
308 /* Set send timeout to remaining time */
309 tv = (struct timeval) {
310 .tv_sec = jiffies_left /
311 msecs_to_jiffies(MSEC_PER_SEC),
312 .tv_usec = ((jiffies_left %
313 msecs_to_jiffies(MSEC_PER_SEC)) *
315 msecs_to_jiffies(MSEC_PER_SEC)
318 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO,
319 (char *)&tv, sizeof(tv));
321 CERROR("Can't set socket send timeout "
323 (long)tv.tv_sec, (int)tv.tv_usec, rc);
329 rc = kernel_sendmsg(sock, &msg, &iov, 1, nob);
330 jiffies_left -= jiffies - then;
339 CERROR("Unexpected zero rc\n");
340 return -ECONNABORTED;
343 if (jiffies_left <= 0)
346 buffer = ((char *)buffer) + rc;
351 EXPORT_SYMBOL(lnet_sock_write);
354 lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout)
357 long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
362 LASSERT(jiffies_left > 0);
369 struct msghdr msg = {
373 /* Set receive timeout to remaining time */
374 tv = (struct timeval) {
375 .tv_sec = jiffies_left / msecs_to_jiffies(MSEC_PER_SEC),
376 .tv_usec = ((jiffies_left %
377 msecs_to_jiffies(MSEC_PER_SEC)) *
379 msecs_to_jiffies(MSEC_PER_SEC)
381 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO,
382 (char *)&tv, sizeof(tv));
384 CERROR("Can't set socket recv timeout %ld.%06d: %d\n",
385 (long)tv.tv_sec, (int)tv.tv_usec, rc);
390 rc = kernel_recvmsg(sock, &msg, &iov, 1, nob, 0);
391 jiffies_left -= jiffies - then;
399 buffer = ((char *)buffer) + rc;
405 if (jiffies_left <= 0)
409 EXPORT_SYMBOL(lnet_sock_read);
412 lnet_sock_create(struct socket **sockp, int *fatal,
413 __u32 local_ip, int local_port)
415 struct sockaddr_in locaddr;
420 /* All errors are fatal except bind failure if the port is in use */
423 rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
426 CERROR("Can't create socket: %d\n", rc);
431 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
432 (char *)&option, sizeof(option));
434 CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
438 if (local_ip != 0 || local_port != 0) {
439 memset(&locaddr, 0, sizeof(locaddr));
440 locaddr.sin_family = AF_INET;
441 locaddr.sin_port = htons(local_port);
442 locaddr.sin_addr.s_addr = (local_ip == 0) ?
443 INADDR_ANY : htonl(local_ip);
445 rc = kernel_bind(sock, (struct sockaddr *)&locaddr,
447 if (rc == -EADDRINUSE) {
448 CDEBUG(D_NET, "Port %d already in use\n", local_port);
453 CERROR("Error trying to bind to port %d: %d\n",
466 lnet_sock_setbuf(struct socket *sock, int txbufsize, int rxbufsize)
471 if (txbufsize != 0) {
473 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
474 (char *)&option, sizeof(option));
476 CERROR("Can't set send buffer %d: %d\n",
482 if (rxbufsize != 0) {
484 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
485 (char *)&option, sizeof(option));
487 CERROR("Can't set receive buffer %d: %d\n",
494 EXPORT_SYMBOL(lnet_sock_setbuf);
497 lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port)
499 struct sockaddr_in sin;
500 int len = sizeof(sin);
504 rc = kernel_getpeername(sock, (struct sockaddr *)&sin, &len);
506 rc = kernel_getsockname(sock, (struct sockaddr *)&sin, &len);
508 CERROR("Error %d getting sock %s IP/port\n",
509 rc, remote ? "peer" : "local");
514 *ip = ntohl(sin.sin_addr.s_addr);
517 *port = ntohs(sin.sin_port);
521 EXPORT_SYMBOL(lnet_sock_getaddr);
524 lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize)
526 if (txbufsize != NULL)
527 *txbufsize = sock->sk->sk_sndbuf;
529 if (rxbufsize != NULL)
530 *rxbufsize = sock->sk->sk_rcvbuf;
534 EXPORT_SYMBOL(lnet_sock_getbuf);
537 lnet_sock_listen(struct socket **sockp,
538 __u32 local_ip, int local_port, int backlog)
543 rc = lnet_sock_create(sockp, &fatal, local_ip, local_port);
546 CERROR("Can't create socket: port %d already in use\n",
551 rc = kernel_listen(*sockp, backlog);
555 CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
556 sock_release(*sockp);
560 #ifndef HAVE_SK_SLEEP
561 static inline wait_queue_head_t *sk_sleep(struct sock *sk)
568 lnet_sock_accept(struct socket **newsockp, struct socket *sock)
571 struct socket *newsock;
574 /* XXX this should add a ref to sock->ops->owner, if
575 * TCP could be a module */
576 rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock);
578 CERROR("Can't allocate socket\n");
582 newsock->ops = sock->ops;
584 rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
586 /* Nothing ready, so wait for activity */
587 init_waitqueue_entry(&wait, current);
588 add_wait_queue(sk_sleep(sock->sk), &wait);
589 set_current_state(TASK_INTERRUPTIBLE);
591 remove_wait_queue(sk_sleep(sock->sk), &wait);
592 rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
602 sock_release(newsock);
607 lnet_sock_connect(struct socket **sockp, int *fatal,
608 __u32 local_ip, int local_port,
609 __u32 peer_ip, int peer_port)
611 struct sockaddr_in srvaddr;
614 rc = lnet_sock_create(sockp, fatal, local_ip, local_port);
618 memset(&srvaddr, 0, sizeof(srvaddr));
619 srvaddr.sin_family = AF_INET;
620 srvaddr.sin_port = htons(peer_port);
621 srvaddr.sin_addr.s_addr = htonl(peer_ip);
623 rc = kernel_connect(*sockp, (struct sockaddr *)&srvaddr,
628 /* EADDRNOTAVAIL probably means we're already connected to the same
629 * peer/port on the same local port on a differently typed
630 * connection. Let our caller retry with a different local
632 *fatal = !(rc == -EADDRNOTAVAIL);
634 CDEBUG_LIMIT(*fatal ? D_NETERROR : D_NET,
635 "Error %d connecting %pI4h/%d -> %pI4h/%d\n", rc,
636 &local_ip, local_port, &peer_ip, peer_port);
638 sock_release(*sockp);