/* For sys_open & sys_close */
#include <linux/syscalls.h>
#include <net/sock.h>
+#include <linux/inetdevice.h>
+#include <libcfs/linux/linux-time.h>
#include <libcfs/libcfs.h>
#include <lnet/lib-lnet.h>
-static int
-lnet_sock_ioctl(int cmd, unsigned long arg)
-{
- struct socket *sock;
- int rc;
-
-#ifdef HAVE_SOCK_CREATE_KERN_USE_NET
- rc = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, 0, &sock);
-#else
- rc = sock_create_kern(PF_INET, SOCK_STREAM, 0, &sock);
-#endif
- if (rc != 0) {
- CERROR("Can't create socket: %d\n", rc);
- return rc;
- }
-
- if (cmd == SIOCGIFFLAGS) {
- /* This cmd is used only to get IFF_UP flag */
- struct ifreq *ifr = (struct ifreq *) arg;
- struct net_device *dev;
-
- dev = dev_get_by_name(sock_net(sock->sk), ifr->ifr_name);
- if (dev) {
- ifr->ifr_flags = dev->flags;
- dev_put(dev);
- rc = 0;
- } else {
- rc = -ENODEV;
- }
- } else {
- rc = kernel_sock_ioctl(sock, cmd, arg);
- }
- sock_release(sock);
-
- return rc;
-}
-
-int
-lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask)
-{
- struct ifreq ifr;
- int nob;
- int rc;
- __u32 val;
-
- nob = strnlen(name, IFNAMSIZ);
- if (nob == IFNAMSIZ) {
- CERROR("Interface name %s too long\n", name);
- return -EINVAL;
- }
-
- CLASSERT(sizeof(ifr.ifr_name) >= IFNAMSIZ);
-
- if (strlen(name) > sizeof(ifr.ifr_name)-1)
- return -E2BIG;
- strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
-
- rc = lnet_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr);
- if (rc != 0) {
- CERROR("Can't get flags for interface %s\n", name);
- return rc;
- }
-
- if ((ifr.ifr_flags & IFF_UP) == 0) {
- CDEBUG(D_NET, "Interface %s down\n", name);
- *up = 0;
- *ip = *mask = 0;
- return 0;
- }
- *up = 1;
-
- if (strlen(name) > sizeof(ifr.ifr_name)-1)
- return -E2BIG;
- strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
-
- ifr.ifr_addr.sa_family = AF_INET;
- rc = lnet_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr);
-
- if (rc != 0) {
- CERROR("Can't get IP address for interface %s\n", name);
- return rc;
- }
-
- val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
- *ip = ntohl(val);
-
- if (strlen(name) > sizeof(ifr.ifr_name)-1)
- return -E2BIG;
- strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
-
- ifr.ifr_addr.sa_family = AF_INET;
- rc = lnet_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr);
- if (rc != 0) {
- CERROR("Can't get netmask for interface %s\n", name);
- return rc;
- }
-
- val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr;
- *mask = ntohl(val);
-
- return 0;
-}
-EXPORT_SYMBOL(lnet_ipif_query);
-
-void
-lnet_ipif_free_enumeration(char **names, int n)
-{
- int i;
-
- LASSERT(n > 0);
-
- for (i = 0; i < n && names[i] != NULL; i++)
- LIBCFS_FREE(names[i], IFNAMSIZ);
-
- LIBCFS_FREE(names, n * sizeof(*names));
-}
-EXPORT_SYMBOL(lnet_ipif_free_enumeration);
-
-int
-lnet_ipif_enumerate(char ***namesp)
-{
- /* Allocate and fill in 'names', returning # interfaces/error */
- struct net_device *dev;
- struct socket *sock;
- char **names;
- int toobig;
- int nalloc;
- int nfound;
- int rc;
- int nob;
- int i;
-
- nalloc = 16; /* first guess at max interfaces */
- toobig = 0;
- nfound = 0;
-
-#ifdef HAVE_SOCK_CREATE_KERN_USE_NET
- rc = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, 0, &sock);
-#else
- rc = sock_create_kern(PF_INET, SOCK_STREAM, 0, &sock);
-#endif
- if (rc) {
- CERROR("Can't create socket: %d\n", rc);
- return rc;
- }
-
- for_each_netdev(sock_net(sock->sk), dev)
- nfound++;
-
- if (nfound == 0)
- goto out_release_sock;
-
- LIBCFS_ALLOC(names, nfound * sizeof(*names));
- if (names == NULL) {
- rc = -ENOMEM;
- goto out_release_sock;
- }
-
- i = 0;
- for_each_netdev(sock_net(sock->sk), dev) {
- nob = strnlen(dev->name, IFNAMSIZ);
- CERROR("netdev %s\n", dev->name);
- if (nob == IFNAMSIZ) {
- /* no space for terminating NULL */
- CERROR("interface name %.*s too long (%d max)\n",
- nob, dev->name, IFNAMSIZ);
- rc = -ENAMETOOLONG;
- goto out_free_names;
- }
-
- LIBCFS_ALLOC(names[i], IFNAMSIZ);
- if (!names[i]) {
- rc = -ENOMEM;
- goto out_free_names;
- }
-
- memcpy(names[i], dev->name, nob);
- names[i][nob] = 0;
- i++;
- }
-
- *namesp = names;
- rc = i;
-
-out_free_names:
- if (rc < 0)
- lnet_ipif_free_enumeration(names, nfound);
-out_release_sock:
- sock_release(sock);
- return rc;
-}
-EXPORT_SYMBOL(lnet_ipif_enumerate);
-
int
lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
{
- int rc;
- long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
- unsigned long then;
- struct timeval tv;
+ int rc;
+ long jiffies_left = cfs_time_seconds(timeout);
+ unsigned long then;
LASSERT(nob > 0);
/* Caller may pass a zero timeout if she thinks the socket buffer is
};
if (timeout != 0) {
+ struct sock *sk = sock->sk;
+
/* Set send timeout to remaining time */
- tv = (struct timeval) {
- .tv_sec = jiffies_left /
- msecs_to_jiffies(MSEC_PER_SEC),
- .tv_usec = ((jiffies_left %
- msecs_to_jiffies(MSEC_PER_SEC)) *
- USEC_PER_SEC) /
- msecs_to_jiffies(MSEC_PER_SEC)
- };
-
- rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO,
- (char *)&tv, sizeof(tv));
- if (rc != 0) {
- CERROR("Can't set socket send timeout "
- "%ld.%06d: %d\n",
- (long)tv.tv_sec, (int)tv.tv_usec, rc);
- return rc;
- }
+ lock_sock(sk);
+ sk->sk_sndtimeo = jiffies_left;
+ release_sock(sk);
}
then = jiffies;
int
lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout)
{
- int rc;
- long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
- unsigned long then;
- struct timeval tv;
+ int rc;
+ long jiffies_left = cfs_time_seconds(timeout);
+ unsigned long then;
LASSERT(nob > 0);
LASSERT(jiffies_left > 0);
struct msghdr msg = {
.msg_flags = 0
};
+ struct sock *sk = sock->sk;
/* Set receive timeout to remaining time */
- tv = (struct timeval) {
- .tv_sec = jiffies_left / msecs_to_jiffies(MSEC_PER_SEC),
- .tv_usec = ((jiffies_left %
- msecs_to_jiffies(MSEC_PER_SEC)) *
- USEC_PER_SEC) /
- msecs_to_jiffies(MSEC_PER_SEC)
- };
- rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO,
- (char *)&tv, sizeof(tv));
- if (rc != 0) {
- CERROR("Can't set socket recv timeout %ld.%06d: %d\n",
- (long)tv.tv_sec, (int)tv.tv_usec, rc);
- return rc;
- }
+ lock_sock(sk);
+ sk->sk_rcvtimeo = jiffies_left;
+ release_sock(sk);
then = jiffies;
rc = kernel_recvmsg(sock, &msg, &iov, 1, nob, 0);
}
EXPORT_SYMBOL(lnet_sock_read);
-static int
-lnet_sock_create(struct socket **sockp, int *fatal,
- __u32 local_ip, int local_port)
+int choose_ipv4_src(__u32 *ret, int interface, __u32 dst_ipaddr, struct net *ns)
+{
+ struct net_device *dev;
+ struct in_device *in_dev;
+ int err;
+ DECLARE_CONST_IN_IFADDR(ifa);
+
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(ns, interface);
+ err = -EINVAL;
+ if (!dev || !(dev->flags & IFF_UP))
+ goto out;
+ in_dev = __in_dev_get_rcu(dev);
+ if (!in_dev)
+ goto out;
+ err = -ENOENT;
+ in_dev_for_each_ifa_rcu(ifa, in_dev) {
+ if (err ||
+ ((dst_ipaddr ^ ntohl(ifa->ifa_local))
+ & ntohl(ifa->ifa_mask)) == 0) {
+ /* This address at least as good as what we
+ * already have
+ */
+ *ret = ntohl(ifa->ifa_local);
+ err = 0;
+ }
+ }
+ endfor_ifa(in_dev);
+out:
+ rcu_read_unlock();
+ return err;
+}
+EXPORT_SYMBOL(choose_ipv4_src);
+
+static struct socket *
+lnet_sock_create(int interface, struct sockaddr *remaddr,
+ int local_port, struct net *ns)
{
- struct sockaddr_in locaddr;
struct socket *sock;
int rc;
int option;
- /* All errors are fatal except bind failure if the port is in use */
- *fatal = 1;
-
#ifdef HAVE_SOCK_CREATE_KERN_USE_NET
- rc = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, 0, &sock);
+ rc = sock_create_kern(ns, PF_INET, SOCK_STREAM, 0, &sock);
#else
rc = sock_create_kern(PF_INET, SOCK_STREAM, 0, &sock);
#endif
- *sockp = sock;
- if (rc != 0) {
+ if (rc) {
CERROR("Can't create socket: %d\n", rc);
- return rc;
+ return ERR_PTR(rc);
}
option = 1;
rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
(char *)&option, sizeof(option));
- if (rc != 0) {
+ if (rc) {
CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
goto failed;
}
- if (local_ip != 0 || local_port != 0) {
- memset(&locaddr, 0, sizeof(locaddr));
+ if (interface >= 0 || local_port != 0) {
+ struct sockaddr_in locaddr = {};
+
locaddr.sin_family = AF_INET;
+ locaddr.sin_addr.s_addr = INADDR_ANY;
+ if (interface >= 0) {
+ struct sockaddr_in *sin = (void *)remaddr;
+ __u32 ip;
+
+ rc = choose_ipv4_src(&ip,
+ interface,
+ ntohl(sin->sin_addr.s_addr),
+ ns);
+ if (rc)
+ goto failed;
+ locaddr.sin_addr.s_addr = htonl(ip);
+ }
+
locaddr.sin_port = htons(local_port);
- locaddr.sin_addr.s_addr = (local_ip == 0) ?
- INADDR_ANY : htonl(local_ip);
rc = kernel_bind(sock, (struct sockaddr *)&locaddr,
sizeof(locaddr));
if (rc == -EADDRINUSE) {
CDEBUG(D_NET, "Port %d already in use\n", local_port);
- *fatal = 0;
goto failed;
}
if (rc != 0) {
goto failed;
}
}
- return 0;
+ return sock;
failed:
sock_release(sock);
- return rc;
+ return ERR_PTR(rc);
}
int
lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port)
{
struct sockaddr_in sin;
- int len = sizeof(sin);
- int rc;
+ int rc;
+#ifndef HAVE_KERN_SOCK_GETNAME_2ARGS
+ int len = sizeof(sin);
+#endif
if (remote)
- rc = kernel_getpeername(sock, (struct sockaddr *)&sin, &len);
+ rc = lnet_kernel_getpeername(sock,
+ (struct sockaddr *)&sin, &len);
else
- rc = kernel_getsockname(sock, (struct sockaddr *)&sin, &len);
- if (rc != 0) {
+ rc = lnet_kernel_getsockname(sock,
+ (struct sockaddr *)&sin, &len);
+ if (rc < 0) {
CERROR("Error %d getting sock %s IP/port\n",
rc, remote ? "peer" : "local");
return rc;
}
EXPORT_SYMBOL(lnet_sock_getbuf);
-int
-lnet_sock_listen(struct socket **sockp,
- __u32 local_ip, int local_port, int backlog)
+struct socket *
+lnet_sock_listen(int local_port, int backlog, struct net *ns)
{
- int fatal;
- int rc;
+ struct socket *sock;
+ int rc;
- rc = lnet_sock_create(sockp, &fatal, local_ip, local_port);
- if (rc != 0) {
- if (!fatal)
+ sock = lnet_sock_create(-1, NULL, local_port, ns);
+ if (IS_ERR(sock)) {
+ rc = PTR_ERR(sock);
+ if (rc == -EADDRINUSE)
CERROR("Can't create socket: port %d already in use\n",
local_port);
- return rc;
+ return ERR_PTR(rc);
}
- rc = kernel_listen(*sockp, backlog);
+ rc = kernel_listen(sock, backlog);
if (rc == 0)
- return 0;
+ return sock;
CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
- sock_release(*sockp);
- return rc;
-}
-
-#ifndef HAVE_SK_SLEEP
-static inline wait_queue_head_t *sk_sleep(struct sock *sk)
-{
- return sk->sk_sleep;
-}
-#endif
-
-int
-lnet_sock_accept(struct socket **newsockp, struct socket *sock)
-{
- wait_queue_entry_t wait;
- struct socket *newsock;
- int rc;
-
- /* XXX this should add a ref to sock->ops->owner, if
- * TCP could be a module */
- rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock);
- if (rc) {
- CERROR("Can't allocate socket\n");
- return rc;
- }
-
- newsock->ops = sock->ops;
-
-#ifdef HAVE_KERN_SOCK_ACCEPT_FLAG_ARG
- rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
-#else
- rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
-#endif
- if (rc == -EAGAIN) {
- /* Nothing ready, so wait for activity */
- init_waitqueue_entry(&wait, current);
- add_wait_queue(sk_sleep(sock->sk), &wait);
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
- remove_wait_queue(sk_sleep(sock->sk), &wait);
-#ifdef HAVE_KERN_SOCK_ACCEPT_FLAG_ARG
- rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
-#else
- rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
-#endif
- }
-
- if (rc != 0)
- goto failed;
-
- *newsockp = newsock;
- return 0;
-
-failed:
- sock_release(newsock);
- return rc;
+ sock_release(sock);
+ return ERR_PTR(rc);
}
-int
-lnet_sock_connect(struct socket **sockp, int *fatal,
- __u32 local_ip, int local_port,
- __u32 peer_ip, int peer_port)
+struct socket *
+lnet_sock_connect(int interface, int local_port,
+ struct sockaddr *peeraddr,
+ struct net *ns)
{
- struct sockaddr_in srvaddr;
- int rc;
-
- rc = lnet_sock_create(sockp, fatal, local_ip, local_port);
- if (rc != 0)
- return rc;
+ struct socket *sock;
+ int rc;
- memset(&srvaddr, 0, sizeof(srvaddr));
- srvaddr.sin_family = AF_INET;
- srvaddr.sin_port = htons(peer_port);
- srvaddr.sin_addr.s_addr = htonl(peer_ip);
+ sock = lnet_sock_create(interface, peeraddr, local_port, ns);
+ if (IS_ERR(sock))
+ return sock;
- rc = kernel_connect(*sockp, (struct sockaddr *)&srvaddr,
- sizeof(srvaddr), 0);
+ rc = kernel_connect(sock, peeraddr, sizeof(struct sockaddr_in), 0);
if (rc == 0)
- return 0;
+ return sock;
/* EADDRNOTAVAIL probably means we're already connected to the same
* peer/port on the same local port on a differently typed
* connection. Let our caller retry with a different local
* port... */
- *fatal = !(rc == -EADDRNOTAVAIL);
- CDEBUG_LIMIT(*fatal ? D_NETERROR : D_NET,
- "Error %d connecting %pI4h/%d -> %pI4h/%d\n", rc,
- &local_ip, local_port, &peer_ip, peer_port);
+ CDEBUG_LIMIT(rc == -EADDRNOTAVAIL ? D_NET : D_NETERROR,
+ "Error %d connecting %d -> %pISp\n", rc,
+ local_port, peeraddr);
- sock_release(*sockp);
- return rc;
+ sock_release(sock);
+ return ERR_PTR(rc);
}