X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Flnet%2Flib-socket.c;h=b801b4727344894aee8bf59e05b8c708c7b03e16;hp=8afc4b50fa1be4dbff03183bf0ec9e76287784b3;hb=d720fbaadad9343a481863ae3b04ba580541328d;hpb=dbb81e826290b2db27e24a85869c9d0736726caa diff --git a/lnet/lnet/lib-socket.c b/lnet/lnet/lib-socket.c index 8afc4b5..b801b47 100644 --- a/lnet/lnet/lib-socket.c +++ b/lnet/lnet/lib-socket.c @@ -39,207 +39,37 @@ /* For sys_open & sys_close */ #include #include +#include #include #include -static int -lnet_sock_ioctl(int cmd, unsigned long arg) -{ - struct socket *sock; - int rc; - -#ifdef HAVE_SOCK_CREATE_KERN_USE_NET - rc = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, 0, &sock); -#else - rc = sock_create_kern(PF_INET, SOCK_STREAM, 0, &sock); +/* + * kernel 5.1: commit 7f1bc6e95d7840d4305595b3e4025cddda88cee5 + * Y2038 64-bit time. + * SO_TIMESTAMP, SO_TIMESTAMPNS and SO_TIMESTAMPING options, the + * way they are currently defined, are not y2038 safe. + * Subsequent patches in the series add new y2038 safe versions + * of these options which provide 64 bit timestamps on all + * architectures uniformly. + * Hence, rename existing options with OLD tag suffixes. + * + * NOTE: When updating to timespec64 change change these to '_NEW'. + * + */ +#ifndef SO_SNDTIMEO +#define SO_SNDTIMEO SO_SNDTIMEO_OLD #endif - if (rc != 0) { - CERROR("Can't create socket: %d\n", rc); - return rc; - } - - if (cmd == SIOCGIFFLAGS) { - /* This cmd is used only to get IFF_UP flag */ - struct ifreq *ifr = (struct ifreq *) arg; - struct net_device *dev; - - dev = dev_get_by_name(sock_net(sock->sk), ifr->ifr_name); - if (dev) { - ifr->ifr_flags = dev->flags; - dev_put(dev); - rc = 0; - } else { - rc = -ENODEV; - } - } else { - rc = kernel_sock_ioctl(sock, cmd, arg); - } - sock_release(sock); - - return rc; -} - -int -lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask) -{ - struct ifreq ifr; - int nob; - int rc; - __u32 val; - - nob = strnlen(name, IFNAMSIZ); - if (nob == IFNAMSIZ) { - CERROR("Interface name %s too long\n", name); - return -EINVAL; - } - - CLASSERT(sizeof(ifr.ifr_name) >= IFNAMSIZ); - - if (strlen(name) > sizeof(ifr.ifr_name)-1) - return -E2BIG; - strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); - - rc = lnet_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr); - if (rc != 0) { - CERROR("Can't get flags for interface %s\n", name); - return rc; - } - - if ((ifr.ifr_flags & IFF_UP) == 0) { - CDEBUG(D_NET, "Interface %s down\n", name); - *up = 0; - *ip = *mask = 0; - return 0; - } - *up = 1; - - if (strlen(name) > sizeof(ifr.ifr_name)-1) - return -E2BIG; - strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); - ifr.ifr_addr.sa_family = AF_INET; - rc = lnet_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr); - - if (rc != 0) { - CERROR("Can't get IP address for interface %s\n", name); - return rc; - } - - val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; - *ip = ntohl(val); - - if (strlen(name) > sizeof(ifr.ifr_name)-1) - return -E2BIG; - strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)); - - ifr.ifr_addr.sa_family = AF_INET; - rc = lnet_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr); - if (rc != 0) { - CERROR("Can't get netmask for interface %s\n", name); - return rc; - } - - val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr; - *mask = ntohl(val); - - return 0; -} -EXPORT_SYMBOL(lnet_ipif_query); - -void -lnet_ipif_free_enumeration(char **names, int n) -{ - int i; - - LASSERT(n > 0); - - for (i = 0; i < n && names[i] != NULL; i++) - LIBCFS_FREE(names[i], IFNAMSIZ); - - LIBCFS_FREE(names, n * sizeof(*names)); -} -EXPORT_SYMBOL(lnet_ipif_free_enumeration); - -int -lnet_ipif_enumerate(char ***namesp) -{ - /* Allocate and fill in 'names', returning # interfaces/error */ - struct net_device *dev; - struct socket *sock; - char **names; - int toobig; - int nalloc; - int nfound; - int rc; - int nob; - int i; - - nalloc = 16; /* first guess at max interfaces */ - toobig = 0; - nfound = 0; - -#ifdef HAVE_SOCK_CREATE_KERN_USE_NET - rc = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, 0, &sock); -#else - rc = sock_create_kern(PF_INET, SOCK_STREAM, 0, &sock); +#ifndef SO_RCVTIMEO +#define SO_RCVTIMEO SO_RCVTIMEO_OLD #endif - if (rc) { - CERROR("Can't create socket: %d\n", rc); - return rc; - } - - for_each_netdev(sock_net(sock->sk), dev) - nfound++; - - if (nfound == 0) - goto out_release_sock; - - LIBCFS_ALLOC(names, nfound * sizeof(*names)); - if (names == NULL) { - rc = -ENOMEM; - goto out_release_sock; - } - - i = 0; - for_each_netdev(sock_net(sock->sk), dev) { - nob = strnlen(dev->name, IFNAMSIZ); - if (nob == IFNAMSIZ) { - /* no space for terminating NULL */ - CERROR("interface name %.*s too long (%d max)\n", - nob, dev->name, IFNAMSIZ); - rc = -ENAMETOOLONG; - goto out_free_names; - } - - LIBCFS_ALLOC(names[i], IFNAMSIZ); - if (!names[i]) { - rc = -ENOMEM; - goto out_free_names; - } - - memcpy(names[i], dev->name, nob); - names[i][nob] = 0; - i++; - } - - *namesp = names; - rc = i; - -out_free_names: - if (rc < 0) - lnet_ipif_free_enumeration(names, nfound); -out_release_sock: - sock_release(sock); - return rc; -} -EXPORT_SYMBOL(lnet_ipif_enumerate); int lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout) { int rc; - long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC); + long jiffies_left = cfs_time_seconds(timeout); unsigned long then; struct timeval tv; @@ -258,15 +88,7 @@ lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout) if (timeout != 0) { /* Set send timeout to remaining time */ - tv = (struct timeval) { - .tv_sec = jiffies_left / - msecs_to_jiffies(MSEC_PER_SEC), - .tv_usec = ((jiffies_left % - msecs_to_jiffies(MSEC_PER_SEC)) * - USEC_PER_SEC) / - msecs_to_jiffies(MSEC_PER_SEC) - }; - + jiffies_to_timeval(jiffies_left, &tv); rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&tv, sizeof(tv)); if (rc != 0) { @@ -306,7 +128,7 @@ int lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout) { int rc; - long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC); + long jiffies_left = cfs_time_seconds(timeout); unsigned long then; struct timeval tv; @@ -323,13 +145,7 @@ lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout) }; /* Set receive timeout to remaining time */ - tv = (struct timeval) { - .tv_sec = jiffies_left / msecs_to_jiffies(MSEC_PER_SEC), - .tv_usec = ((jiffies_left % - msecs_to_jiffies(MSEC_PER_SEC)) * - USEC_PER_SEC) / - msecs_to_jiffies(MSEC_PER_SEC) - }; + jiffies_to_timeval(jiffies_left, &tv); rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&tv, sizeof(tv)); if (rc != 0) { @@ -360,49 +176,90 @@ lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout) } EXPORT_SYMBOL(lnet_sock_read); -static int -lnet_sock_create(struct socket **sockp, int *fatal, - __u32 local_ip, int local_port) +int choose_ipv4_src(__u32 *ret, int interface, __u32 dst_ipaddr, struct net *ns) +{ + struct net_device *dev; + struct in_device *in_dev; + int err; + DECLARE_CONST_IN_IFADDR(ifa); + + rcu_read_lock(); + dev = dev_get_by_index_rcu(ns, interface); + err = -EINVAL; + if (!dev || !(dev->flags & IFF_UP)) + goto out; + in_dev = __in_dev_get_rcu(dev); + if (!in_dev) + goto out; + err = -ENOENT; + in_dev_for_each_ifa_rcu(ifa, in_dev) { + if (*ret == 0 || + ((dst_ipaddr ^ ntohl(ifa->ifa_local)) + & ntohl(ifa->ifa_mask)) == 0) { + /* This address at least as good as what we + * already have + */ + *ret = ntohl(ifa->ifa_local); + err = 0; + } + } + endfor_ifa(in_dev); +out: + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL(choose_ipv4_src); + +static struct socket * +lnet_sock_create(int interface, struct sockaddr *remaddr, + int local_port, struct net *ns) { - struct sockaddr_in locaddr; struct socket *sock; int rc; int option; - /* All errors are fatal except bind failure if the port is in use */ - *fatal = 1; - #ifdef HAVE_SOCK_CREATE_KERN_USE_NET - rc = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, 0, &sock); + rc = sock_create_kern(ns, PF_INET, SOCK_STREAM, 0, &sock); #else rc = sock_create_kern(PF_INET, SOCK_STREAM, 0, &sock); #endif - *sockp = sock; - if (rc != 0) { + if (rc) { CERROR("Can't create socket: %d\n", rc); - return rc; + return ERR_PTR(rc); } option = 1; rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&option, sizeof(option)); - if (rc != 0) { + if (rc) { CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc); goto failed; } - if (local_ip != 0 || local_port != 0) { - memset(&locaddr, 0, sizeof(locaddr)); + if (interface >= 0 || local_port != 0) { + struct sockaddr_in locaddr = {}; + locaddr.sin_family = AF_INET; + locaddr.sin_addr.s_addr = INADDR_ANY; + if (interface >= 0) { + struct sockaddr_in *sin = (void *)remaddr; + __u32 ip; + + rc = choose_ipv4_src(&ip, + interface, + ntohl(sin->sin_addr.s_addr), + ns); + if (rc) + goto failed; + locaddr.sin_addr.s_addr = htonl(ip); + } + locaddr.sin_port = htons(local_port); - locaddr.sin_addr.s_addr = (local_ip == 0) ? - INADDR_ANY : htonl(local_ip); rc = kernel_bind(sock, (struct sockaddr *)&locaddr, sizeof(locaddr)); if (rc == -EADDRINUSE) { CDEBUG(D_NET, "Port %d already in use\n", local_port); - *fatal = 0; goto failed; } if (rc != 0) { @@ -411,11 +268,11 @@ lnet_sock_create(struct socket **sockp, int *fatal, goto failed; } } - return 0; + return sock; failed: sock_release(sock); - return rc; + return ERR_PTR(rc); } int @@ -493,116 +350,63 @@ lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize) } EXPORT_SYMBOL(lnet_sock_getbuf); -int -lnet_sock_listen(struct socket **sockp, - __u32 local_ip, int local_port, int backlog) +struct socket * +lnet_sock_listen(int local_port, int backlog, struct net *ns) { - int fatal; - int rc; + struct socket *sock; + int rc; - rc = lnet_sock_create(sockp, &fatal, local_ip, local_port); - if (rc != 0) { - if (!fatal) + sock = lnet_sock_create(-1, NULL, local_port, ns); + if (IS_ERR(sock)) { + rc = PTR_ERR(sock); + if (rc == -EADDRINUSE) CERROR("Can't create socket: port %d already in use\n", local_port); - return rc; + return ERR_PTR(rc); } - rc = kernel_listen(*sockp, backlog); + rc = kernel_listen(sock, backlog); if (rc == 0) - return 0; + return sock; CERROR("Can't set listen backlog %d: %d\n", backlog, rc); - sock_release(*sockp); - return rc; -} - -#ifndef HAVE_SK_SLEEP -static inline wait_queue_head_t *sk_sleep(struct sock *sk) -{ - return sk->sk_sleep; -} -#endif - -int -lnet_sock_accept(struct socket **newsockp, struct socket *sock) -{ - wait_queue_entry_t wait; - struct socket *newsock; - int rc; - - /* XXX this should add a ref to sock->ops->owner, if - * TCP could be a module */ - rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock); - if (rc) { - CERROR("Can't allocate socket\n"); - return rc; - } - - newsock->ops = sock->ops; - -#ifdef HAVE_KERN_SOCK_ACCEPT_FLAG_ARG - rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false); -#else - rc = sock->ops->accept(sock, newsock, O_NONBLOCK); -#endif - if (rc == -EAGAIN) { - /* Nothing ready, so wait for activity */ - init_waitqueue_entry(&wait, current); - add_wait_queue(sk_sleep(sock->sk), &wait); - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - remove_wait_queue(sk_sleep(sock->sk), &wait); -#ifdef HAVE_KERN_SOCK_ACCEPT_FLAG_ARG - rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false); -#else - rc = sock->ops->accept(sock, newsock, O_NONBLOCK); -#endif - } - - if (rc != 0) - goto failed; - - *newsockp = newsock; - return 0; - -failed: - sock_release(newsock); - return rc; + sock_release(sock); + return ERR_PTR(rc); } -int -lnet_sock_connect(struct socket **sockp, int *fatal, - __u32 local_ip, int local_port, - __u32 peer_ip, int peer_port) +struct socket * +lnet_sock_connect(int interface, int local_port, + __u32 peer_ip, int peer_port, + struct net *ns) { - struct sockaddr_in srvaddr; - int rc; - - rc = lnet_sock_create(sockp, fatal, local_ip, local_port); - if (rc != 0) - return rc; + struct socket *sock; + struct sockaddr_in srvaddr; + int rc; memset(&srvaddr, 0, sizeof(srvaddr)); srvaddr.sin_family = AF_INET; srvaddr.sin_port = htons(peer_port); srvaddr.sin_addr.s_addr = htonl(peer_ip); - rc = kernel_connect(*sockp, (struct sockaddr *)&srvaddr, + sock = lnet_sock_create(interface, (struct sockaddr *)&srvaddr, + local_port, ns); + if (IS_ERR(sock)) + return sock; + + rc = kernel_connect(sock, (struct sockaddr *)&srvaddr, sizeof(srvaddr), 0); if (rc == 0) - return 0; + return sock; /* EADDRNOTAVAIL probably means we're already connected to the same * peer/port on the same local port on a differently typed * connection. Let our caller retry with a different local * port... */ - *fatal = !(rc == -EADDRNOTAVAIL); - CDEBUG_LIMIT(*fatal ? D_NETERROR : D_NET, - "Error %d connecting %pI4h/%d -> %pI4h/%d\n", rc, - &local_ip, local_port, &peer_ip, peer_port); + CDEBUG_LIMIT(rc == -EADDRNOTAVAIL ? D_NET : D_NETERROR, + "Error %d connecting %d -> %pI4h/%d\n", rc, + local_port, &peer_ip, peer_port); - sock_release(*sockp); - return rc; + sock_release(sock); + return ERR_PTR(rc); }