/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 only, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License version 2 for more details (a copy is included * in the LICENSE file that accompanied this code). * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * * Copyright (c) 2015, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ * Lustre is a trademark of Sun Microsystems, Inc. */ #define DEBUG_SUBSYSTEM S_LNET #include #include #include #include #include /* For sys_open & sys_close */ #include #include #include #include static int lnet_sock_ioctl(int cmd, unsigned long arg) { struct socket *sock; int rc; #ifdef HAVE_SOCK_CREATE_KERN_USE_NET rc = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, 0, &sock); #else rc = sock_create_kern(PF_INET, SOCK_STREAM, 0, &sock); #endif if (rc != 0) { CERROR("Can't create socket: %d\n", rc); return rc; } if (cmd == SIOCGIFFLAGS) { /* This cmd is used only to get IFF_UP flag */ struct ifreq *ifr = (struct ifreq *) arg; struct net_device *dev; dev = dev_get_by_name(sock_net(sock->sk), ifr->ifr_name); if (dev) { ifr->ifr_flags = dev->flags; dev_put(dev); rc = 0; } else { rc = -ENODEV; } } else { rc = kernel_sock_ioctl(sock, cmd, arg); } sock_release(sock); return rc; } int lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask) { struct ifreq ifr; int nob; int rc; __u32 val; nob = strnlen(name, IFNAMSIZ); if (nob == IFNAMSIZ) { CERROR("Interface name %s too long\n", name); return -EINVAL; } CLASSERT(sizeof(ifr.ifr_name) >= IFNAMSIZ); if (strlen(name) > sizeof(ifr.ifr_name)-1) return -E2BIG; strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); rc = lnet_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr); if (rc != 0) { CERROR("Can't get flags for interface %s\n", name); return rc; } if ((ifr.ifr_flags & IFF_UP) == 0) { CDEBUG(D_NET, "Interface %s down\n", name); *up = 0; *ip = *mask = 0; return 0; } *up = 1; if (strlen(name) > sizeof(ifr.ifr_name)-1) return -E2BIG; strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); ifr.ifr_addr.sa_family = AF_INET; rc = lnet_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr); if (rc != 0) { CERROR("Can't get IP address for interface %s\n", name); return rc; } val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; *ip = ntohl(val); if (strlen(name) > sizeof(ifr.ifr_name)-1) return -E2BIG; strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); ifr.ifr_addr.sa_family = AF_INET; rc = lnet_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr); if (rc != 0) { CERROR("Can't get netmask for interface %s\n", name); return rc; } val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr; *mask = ntohl(val); return 0; } EXPORT_SYMBOL(lnet_ipif_query); void lnet_ipif_free_enumeration(char **names, int n) { int i; LASSERT(n > 0); for (i = 0; i < n && names[i] != NULL; i++) LIBCFS_FREE(names[i], IFNAMSIZ); LIBCFS_FREE(names, n * sizeof(*names)); } EXPORT_SYMBOL(lnet_ipif_free_enumeration); int lnet_ipif_enumerate(char ***namesp) { /* Allocate and fill in 'names', returning # interfaces/error */ struct net_device *dev; struct socket *sock; char **names; int toobig; int nalloc; int nfound; int rc; int nob; int i; nalloc = 16; /* first guess at max interfaces */ toobig = 0; nfound = 0; #ifdef HAVE_SOCK_CREATE_KERN_USE_NET rc = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, 0, &sock); #else rc = sock_create_kern(PF_INET, SOCK_STREAM, 0, &sock); #endif if (rc) { CERROR("Can't create socket: %d\n", rc); return rc; } for_each_netdev(sock_net(sock->sk), dev) nfound++; if (nfound == 0) goto out_release_sock; LIBCFS_ALLOC(names, nfound * sizeof(*names)); if (names == NULL) { rc = -ENOMEM; goto out_release_sock; } i = 0; for_each_netdev(sock_net(sock->sk), dev) { nob = strnlen(dev->name, IFNAMSIZ); if (nob == IFNAMSIZ) { /* no space for terminating NULL */ CERROR("interface name %.*s too long (%d max)\n", nob, dev->name, IFNAMSIZ); rc = -ENAMETOOLONG; goto out_free_names; } LIBCFS_ALLOC(names[i], IFNAMSIZ); if (!names[i]) { rc = -ENOMEM; goto out_free_names; } memcpy(names[i], dev->name, nob); names[i][nob] = 0; i++; } *namesp = names; rc = i; out_free_names: if (rc < 0) lnet_ipif_free_enumeration(names, nfound); out_release_sock: sock_release(sock); return rc; } EXPORT_SYMBOL(lnet_ipif_enumerate); int lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout) { int rc; long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC); unsigned long then; struct timeval tv; LASSERT(nob > 0); /* Caller may pass a zero timeout if she thinks the socket buffer is * empty enough to take the whole message immediately */ for (;;) { struct kvec iov = { .iov_base = buffer, .iov_len = nob }; struct msghdr msg = { .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0 }; if (timeout != 0) { /* Set send timeout to remaining time */ tv = (struct timeval) { .tv_sec = jiffies_left / msecs_to_jiffies(MSEC_PER_SEC), .tv_usec = ((jiffies_left % msecs_to_jiffies(MSEC_PER_SEC)) * USEC_PER_SEC) / msecs_to_jiffies(MSEC_PER_SEC) }; rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&tv, sizeof(tv)); if (rc != 0) { CERROR("Can't set socket send timeout " "%ld.%06d: %d\n", (long)tv.tv_sec, (int)tv.tv_usec, rc); return rc; } } then = jiffies; rc = kernel_sendmsg(sock, &msg, &iov, 1, nob); jiffies_left -= jiffies - then; if (rc == nob) return 0; if (rc < 0) return rc; if (rc == 0) { CERROR("Unexpected zero rc\n"); return -ECONNABORTED; } if (jiffies_left <= 0) return -EAGAIN; buffer = ((char *)buffer) + rc; nob -= rc; } return 0; } EXPORT_SYMBOL(lnet_sock_write); int lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout) { int rc; long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC); unsigned long then; struct timeval tv; LASSERT(nob > 0); LASSERT(jiffies_left > 0); for (;;) { struct kvec iov = { .iov_base = buffer, .iov_len = nob }; struct msghdr msg = { .msg_flags = 0 }; /* Set receive timeout to remaining time */ tv = (struct timeval) { .tv_sec = jiffies_left / msecs_to_jiffies(MSEC_PER_SEC), .tv_usec = ((jiffies_left % msecs_to_jiffies(MSEC_PER_SEC)) * USEC_PER_SEC) / msecs_to_jiffies(MSEC_PER_SEC) }; rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&tv, sizeof(tv)); if (rc != 0) { CERROR("Can't set socket recv timeout %ld.%06d: %d\n", (long)tv.tv_sec, (int)tv.tv_usec, rc); return rc; } then = jiffies; rc = kernel_recvmsg(sock, &msg, &iov, 1, nob, 0); jiffies_left -= jiffies - then; if (rc < 0) return rc; if (rc == 0) return -ECONNRESET; buffer = ((char *)buffer) + rc; nob -= rc; if (nob == 0) return 0; if (jiffies_left <= 0) return -ETIMEDOUT; } } EXPORT_SYMBOL(lnet_sock_read); static int lnet_sock_create(struct socket **sockp, int *fatal, __u32 local_ip, int local_port) { struct sockaddr_in locaddr; struct socket *sock; int rc; int option; /* All errors are fatal except bind failure if the port is in use */ *fatal = 1; #ifdef HAVE_SOCK_CREATE_KERN_USE_NET rc = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, 0, &sock); #else rc = sock_create_kern(PF_INET, SOCK_STREAM, 0, &sock); #endif *sockp = sock; if (rc != 0) { CERROR("Can't create socket: %d\n", rc); return rc; } option = 1; rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&option, sizeof(option)); if (rc != 0) { CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc); goto failed; } if (local_ip != 0 || local_port != 0) { memset(&locaddr, 0, sizeof(locaddr)); locaddr.sin_family = AF_INET; locaddr.sin_port = htons(local_port); locaddr.sin_addr.s_addr = (local_ip == 0) ? INADDR_ANY : htonl(local_ip); rc = kernel_bind(sock, (struct sockaddr *)&locaddr, sizeof(locaddr)); if (rc == -EADDRINUSE) { CDEBUG(D_NET, "Port %d already in use\n", local_port); *fatal = 0; goto failed; } if (rc != 0) { CERROR("Error trying to bind to port %d: %d\n", local_port, rc); goto failed; } } return 0; failed: sock_release(sock); return rc; } int lnet_sock_setbuf(struct socket *sock, int txbufsize, int rxbufsize) { int option; int rc; if (txbufsize != 0) { option = txbufsize; rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDBUF, (char *)&option, sizeof(option)); if (rc != 0) { CERROR("Can't set send buffer %d: %d\n", option, rc); return rc; } } if (rxbufsize != 0) { option = rxbufsize; rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF, (char *)&option, sizeof(option)); if (rc != 0) { CERROR("Can't set receive buffer %d: %d\n", option, rc); return rc; } } return 0; } EXPORT_SYMBOL(lnet_sock_setbuf); int lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port) { struct sockaddr_in sin; int rc; #ifndef HAVE_KERN_SOCK_GETNAME_2ARGS int len = sizeof(sin); #endif if (remote) rc = lnet_kernel_getpeername(sock, (struct sockaddr *)&sin, &len); else rc = lnet_kernel_getsockname(sock, (struct sockaddr *)&sin, &len); if (rc < 0) { CERROR("Error %d getting sock %s IP/port\n", rc, remote ? "peer" : "local"); return rc; } if (ip != NULL) *ip = ntohl(sin.sin_addr.s_addr); if (port != NULL) *port = ntohs(sin.sin_port); return 0; } EXPORT_SYMBOL(lnet_sock_getaddr); int lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize) { if (txbufsize != NULL) *txbufsize = sock->sk->sk_sndbuf; if (rxbufsize != NULL) *rxbufsize = sock->sk->sk_rcvbuf; return 0; } EXPORT_SYMBOL(lnet_sock_getbuf); int lnet_sock_listen(struct socket **sockp, __u32 local_ip, int local_port, int backlog) { int fatal; int rc; rc = lnet_sock_create(sockp, &fatal, local_ip, local_port); if (rc != 0) { if (!fatal) CERROR("Can't create socket: port %d already in use\n", local_port); return rc; } rc = kernel_listen(*sockp, backlog); if (rc == 0) return 0; CERROR("Can't set listen backlog %d: %d\n", backlog, rc); sock_release(*sockp); return rc; } #ifndef HAVE_SK_SLEEP static inline wait_queue_head_t *sk_sleep(struct sock *sk) { return sk->sk_sleep; } #endif int lnet_sock_accept(struct socket **newsockp, struct socket *sock) { wait_queue_entry_t wait; struct socket *newsock; int rc; /* XXX this should add a ref to sock->ops->owner, if * TCP could be a module */ rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock); if (rc) { CERROR("Can't allocate socket\n"); return rc; } newsock->ops = sock->ops; #ifdef HAVE_KERN_SOCK_ACCEPT_FLAG_ARG rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false); #else rc = sock->ops->accept(sock, newsock, O_NONBLOCK); #endif if (rc == -EAGAIN) { /* Nothing ready, so wait for activity */ init_waitqueue_entry(&wait, current); add_wait_queue(sk_sleep(sock->sk), &wait); set_current_state(TASK_INTERRUPTIBLE); schedule(); remove_wait_queue(sk_sleep(sock->sk), &wait); #ifdef HAVE_KERN_SOCK_ACCEPT_FLAG_ARG rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false); #else rc = sock->ops->accept(sock, newsock, O_NONBLOCK); #endif } if (rc != 0) goto failed; *newsockp = newsock; return 0; failed: sock_release(newsock); return rc; } int lnet_sock_connect(struct socket **sockp, int *fatal, __u32 local_ip, int local_port, __u32 peer_ip, int peer_port) { struct sockaddr_in srvaddr; int rc; rc = lnet_sock_create(sockp, fatal, local_ip, local_port); if (rc != 0) return rc; memset(&srvaddr, 0, sizeof(srvaddr)); srvaddr.sin_family = AF_INET; srvaddr.sin_port = htons(peer_port); srvaddr.sin_addr.s_addr = htonl(peer_ip); rc = kernel_connect(*sockp, (struct sockaddr *)&srvaddr, sizeof(srvaddr), 0); if (rc == 0) return 0; /* EADDRNOTAVAIL probably means we're already connected to the same * peer/port on the same local port on a differently typed * connection. Let our caller retry with a different local * port... */ *fatal = !(rc == -EADDRNOTAVAIL); CDEBUG_LIMIT(*fatal ? D_NETERROR : D_NET, "Error %d connecting %pI4h/%d -> %pI4h/%d\n", rc, &local_ip, local_port, &peer_ip, peer_port); sock_release(*sockp); return rc; }