Whamcloud - gitweb
LU-10391 lnet: allow lnet_connect() to use IPv6 addresses.
[fs/lustre-release.git] / lnet / lnet / acceptor.c
index 4de013a..159a85e 100644 (file)
@@ -23,7 +23,7 @@
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2016, Intel Corporation.
+ * Copyright (c) 2011, 2017, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -35,6 +35,7 @@
 #include <linux/completion.h>
 #include <net/sock.h>
 #include <lnet/lib-lnet.h>
+#include <linux/sunrpc/addr.h>
 
 static int   accept_port    = 988;
 static int   accept_backlog = 127;
@@ -44,6 +45,14 @@ static struct {
        int                     pta_shutdown;
        struct socket           *pta_sock;
        struct completion       pta_signal;
+       struct net              *pta_ns;
+       wait_queue_head_t       pta_waitq;
+       atomic_t                pta_ready;
+#ifdef HAVE_SK_DATA_READY_ONE_ARG
+       void                    (*pta_odata)(struct sock *);
+#else
+       void                    (*pta_odata)(struct sock *, int);
+#endif
 } lnet_acceptor_state = {
        .pta_shutdown = 1
 };
@@ -63,9 +72,9 @@ lnet_accept_magic(__u32 magic, __u32 constant)
 
 EXPORT_SYMBOL(lnet_acceptor_port);
 
-static char *accept = "secure";
+static char *accept_type = "secure";
 
-module_param(accept, charp, 0444);
+module_param_named(accept, accept_type, charp, 0444);
 MODULE_PARM_DESC(accept, "Accept connections (secure|all|none)");
 module_param(accept_port, int, 0444);
 MODULE_PARM_DESC(accept_port, "Acceptor's port (same on all nodes)");
@@ -74,18 +83,6 @@ MODULE_PARM_DESC(accept_backlog, "Acceptor's listen backlog");
 module_param(accept_timeout, int, 0644);
 MODULE_PARM_DESC(accept_timeout, "Acceptor's timeout (seconds)");
 
-static char *accept_type = NULL;
-
-static int
-lnet_acceptor_get_tunables(void)
-{
-       /* Userland acceptor uses 'accept_type' instead of 'accept', due to
-        * conflict with 'accept(2)', but kernel acceptor still uses 'accept'
-        * for compatibility. Hence the trick. */
-       accept_type = accept;
-       return 0;
-}
-
 int
 lnet_acceptor_timeout(void)
 {
@@ -95,103 +92,86 @@ EXPORT_SYMBOL(lnet_acceptor_timeout);
 
 void
 lnet_connect_console_error (int rc, lnet_nid_t peer_nid,
-                          __u32 peer_ip, int peer_port)
+                           struct sockaddr *sa)
 {
        switch (rc) {
        /* "normal" errors */
        case -ECONNREFUSED:
-               CNETERR("Connection to %s at host %pI4h on port %d was "
-                       "refused: check that Lustre is running on that node.\n",
-                       libcfs_nid2str(peer_nid), &peer_ip, peer_port);
+               CNETERR("Connection to %s at host %pISp was refused: check that Lustre is running on that node.\n",
+                       libcfs_nid2str(peer_nid), sa);
                break;
        case -EHOSTUNREACH:
        case -ENETUNREACH:
-               CNETERR("Connection to %s at host %pI4h "
-                       "was unreachable: the network or that node may "
-                       "be down, or Lustre may be misconfigured.\n",
-                       libcfs_nid2str(peer_nid), &peer_ip);
+               CNETERR("Connection to %s at host %pIS was unreachable: the network or that node may be down, or Lustre may be misconfigured.\n",
+                       libcfs_nid2str(peer_nid), sa);
                break;
        case -ETIMEDOUT:
-               CNETERR("Connection to %s at host %pI4h on "
-                       "port %d took too long: that node may be hung "
-                       "or experiencing high load.\n",
-                       libcfs_nid2str(peer_nid), &peer_ip, peer_port);
+               CNETERR("Connection to %s at host %pISp took too long: that node may be hung or experiencing high load.\n",
+                       libcfs_nid2str(peer_nid), sa);
                break;
        case -ECONNRESET:
-               LCONSOLE_ERROR_MSG(0x11b, "Connection to %s at host %pI4h"
-                                  " on port %d was reset: "
-                                  "is it running a compatible version of "
-                                  "Lustre and is %s one of its NIDs?\n",
-                                  libcfs_nid2str(peer_nid), &peer_ip,
-                                  peer_port, libcfs_nid2str(peer_nid));
+               LCONSOLE_ERROR_MSG(0x11b,
+                                  "Connection to %s at host %pISp was reset: is it running a compatible version of Lustre and is %s one of its NIDs?\n",
+                                  libcfs_nid2str(peer_nid), sa,
+                                  libcfs_nid2str(peer_nid));
                break;
        case -EPROTO:
-               LCONSOLE_ERROR_MSG(0x11c, "Protocol error connecting to %s at "
-                                  "host %pI4h on port %d: is it running "
-                                  "a compatible version of Lustre?\n",
-                                  libcfs_nid2str(peer_nid), &peer_ip,
-                                  peer_port);
+               LCONSOLE_ERROR_MSG(0x11c,
+                                  "Protocol error connecting to %s at host %pISp: is it running a compatible version of Lustre?\n",
+                                  libcfs_nid2str(peer_nid), sa);
                break;
        case -EADDRINUSE:
-               LCONSOLE_ERROR_MSG(0x11d, "No privileged ports available to "
-                                  "connect to %s at host %pI4h on port "
-                                  "%d\n", libcfs_nid2str(peer_nid),
-                                  &peer_ip, peer_port);
+               LCONSOLE_ERROR_MSG(0x11d,
+                                  "No privileged ports available to connect to %s at host %pISp\n",
+                                  libcfs_nid2str(peer_nid), sa);
                break;
        default:
-               LCONSOLE_ERROR_MSG(0x11e, "Unexpected error %d connecting to %s"
-                                  " at host %pI4h on port %d\n", rc,
-                                  libcfs_nid2str(peer_nid),
-                                  &peer_ip, peer_port);
+               LCONSOLE_ERROR_MSG(0x11e,
+                                  "Unexpected error %d connecting to %s at host %pISp\n",
+                                  rc, libcfs_nid2str(peer_nid), sa);
                break;
        }
 }
 EXPORT_SYMBOL(lnet_connect_console_error);
 
-int
-lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
-           __u32 local_ip, __u32 peer_ip, int peer_port)
+struct socket *
+lnet_connect(lnet_nid_t peer_nid, int interface, struct sockaddr *peeraddr,
+            struct net *ns)
 {
-       lnet_acceptor_connreq_t cr;
+       struct lnet_acceptor_connreq cr;
        struct socket           *sock;
        int                     rc;
        int                     port;
-       int                     fatal;
 
-       CLASSERT(sizeof(cr) <= 16);             /* not too big to be on the stack */
+       BUILD_BUG_ON(sizeof(cr) > 16); /* not too big to be on the stack */
+
+       LASSERT(peeraddr->sa_family == AF_INET ||
+               peeraddr->sa_family == AF_INET6);
 
        for (port = LNET_ACCEPTOR_MAX_RESERVED_PORT;
             port >= LNET_ACCEPTOR_MIN_RESERVED_PORT;
             --port) {
                /* Iterate through reserved ports. */
-
-               rc = lnet_sock_connect(&sock, &fatal,
-                                        local_ip, port,
-                                        peer_ip, peer_port);
-               if (rc != 0) {
-                       if (fatal)
-                               goto failed;
-                       continue;
+               sock = lnet_sock_connect(interface, port, peeraddr, ns);
+               if (IS_ERR(sock)) {
+                       rc = PTR_ERR(sock);
+                       if (rc == -EADDRINUSE || rc == -EADDRNOTAVAIL)
+                               continue;
+                       goto failed;
                }
 
-               CLASSERT(LNET_PROTO_ACCEPTOR_VERSION == 1);
+               BUILD_BUG_ON(LNET_PROTO_ACCEPTOR_VERSION != 1);
 
                cr.acr_magic   = LNET_PROTO_ACCEPTOR_MAGIC;
                cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION;
                cr.acr_nid     = peer_nid;
 
-               if (the_lnet.ln_testprotocompat != 0) {
+               if (the_lnet.ln_testprotocompat) {
                        /* single-shot proto check */
-                       lnet_net_lock(LNET_LOCK_EX);
-                       if ((the_lnet.ln_testprotocompat & 4) != 0) {
+                       if (test_and_clear_bit(2, &the_lnet.ln_testprotocompat))
                                cr.acr_version++;
-                               the_lnet.ln_testprotocompat &= ~4;
-                       }
-                       if ((the_lnet.ln_testprotocompat & 8) != 0) {
+                       if (test_and_clear_bit(3, &the_lnet.ln_testprotocompat))
                                cr.acr_magic = LNET_PROTO_MAGIC;
-                               the_lnet.ln_testprotocompat &= ~8;
-                       }
-                       lnet_net_unlock(LNET_LOCK_EX);
                }
 
                rc = lnet_sock_write(sock, &cr, sizeof(cr),
@@ -199,8 +179,7 @@ lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
                if (rc != 0)
                        goto failed_sock;
 
-               *sockp = sock;
-               return 0;
+               return sock;
        }
 
        rc = -EADDRINUSE;
@@ -209,25 +188,24 @@ lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
 failed_sock:
        sock_release(sock);
 failed:
-       lnet_connect_console_error(rc, peer_nid, peer_ip, peer_port);
-       return rc;
+       lnet_connect_console_error(rc, peer_nid, peeraddr);
+       return ERR_PTR(rc);
 }
 EXPORT_SYMBOL(lnet_connect);
 
 static int
 lnet_accept(struct socket *sock, __u32 magic)
 {
-       lnet_acceptor_connreq_t cr;
-       __u32                   peer_ip;
-       int                     peer_port;
+       struct lnet_acceptor_connreq cr;
+       struct sockaddr_storage peer;
        int                     rc;
        int                     flip;
-       lnet_ni_t              *ni;
+       struct lnet_ni *ni;
        char                   *str;
 
        LASSERT(sizeof(cr) <= 16);              /* not too big for the stack */
 
-       rc = lnet_sock_getaddr(sock, true, &peer_ip, &peer_port);
+       rc = lnet_sock_getaddr(sock, true, &peer);
        LASSERT(rc == 0);                       /* we succeeded before */
 
        if (!lnet_accept_magic(magic, LNET_PROTO_ACCEPTOR_MAGIC)) {
@@ -245,20 +223,19 @@ lnet_accept(struct socket *sock, __u32 magic)
                                               accept_timeout);
 
                        if (rc != 0)
-                               CERROR("Error sending magic+version in response"
-                                      "to LNET magic from %pI4h: %d\n",
-                                      &peer_ip, rc);
+                               CERROR("Error sending magic+version in response to LNET magic from %pIS: %d\n",
+                                      &peer, rc);
                        return -EPROTO;
                }
 
-               if (magic == le32_to_cpu(LNET_PROTO_TCP_MAGIC))
+               if (lnet_accept_magic(magic, LNET_PROTO_TCP_MAGIC))
                        str = "'old' socknal/tcpnal";
                else
                        str = "unrecognised";
 
-               LCONSOLE_ERROR_MSG(0x11f, "Refusing connection from %pI4h"
+               LCONSOLE_ERROR_MSG(0x11f, "Refusing connection from %pIS"
                                   " magic %08x: %s acceptor protocol\n",
-                                  &peer_ip, magic, str);
+                                  &peer, magic, str);
                return -EPROTO;
        }
 
@@ -268,8 +245,8 @@ lnet_accept(struct socket *sock, __u32 magic)
                              sizeof(cr.acr_version),
                              accept_timeout);
        if (rc != 0) {
-               CERROR("Error %d reading connection request version from "
-                      "%pI4h\n", rc, &peer_ip);
+               CERROR("Error %d reading connection request version from %pIS\n",
+                      rc, &peer);
                return -EIO;
        }
 
@@ -291,71 +268,87 @@ lnet_accept(struct socket *sock, __u32 magic)
                                       accept_timeout);
 
                if (rc != 0)
-                       CERROR("Error sending magic+version in response"
-                              "to version %d from %pI4h: %d\n",
-                              peer_version, &peer_ip, rc);
+                       CERROR("Error sending magic+version in response to version %d from %pIS: %d\n",
+                              peer_version, &peer, rc);
                return -EPROTO;
        }
 
        rc = lnet_sock_read(sock, &cr.acr_nid,
                              sizeof(cr) -
-                             offsetof(lnet_acceptor_connreq_t, acr_nid),
+                             offsetof(struct lnet_acceptor_connreq, acr_nid),
                              accept_timeout);
        if (rc != 0) {
-               CERROR("Error %d reading connection request from "
-                      "%pI4h\n", rc, &peer_ip);
+               CERROR("Error %d reading connection request from %pIS\n",
+                      rc, &peer);
                return -EIO;
        }
 
        if (flip)
                __swab64s(&cr.acr_nid);
 
-       ni = lnet_net2ni(LNET_NIDNET(cr.acr_nid));
-       if (ni == NULL ||               /* no matching net */
+       ni = lnet_nid2ni_addref(cr.acr_nid);
+       if (ni == NULL ||               /* no matching net */
            ni->ni_nid != cr.acr_nid) { /* right NET, wrong NID! */
                if (ni != NULL)
                        lnet_ni_decref(ni);
-               LCONSOLE_ERROR_MSG(0x120, "Refusing connection from %pI4h "
-                                  "for %s: No matching NI\n",
-                                  &peer_ip, libcfs_nid2str(cr.acr_nid));
+               LCONSOLE_ERROR_MSG(0x120,
+                                  "Refusing connection from %pIS for %s: No matching NI\n",
+                                  &peer, libcfs_nid2str(cr.acr_nid));
                return -EPERM;
        }
 
-       if (ni->ni_lnd->lnd_accept == NULL) {
+       if (ni->ni_net->net_lnd->lnd_accept == NULL) {
                /* This catches a request for the loopback LND */
                lnet_ni_decref(ni);
-               LCONSOLE_ERROR_MSG(0x121, "Refusing connection from %pI4h "
-                                 "for %s: NI doesn not accept IP connections\n",
-                                 &peer_ip, libcfs_nid2str(cr.acr_nid));
+               LCONSOLE_ERROR_MSG(0x121,
+                                  "Refusing connection from %pIS for %s: NI doesn not accept IP connections\n",
+                                 &peer, libcfs_nid2str(cr.acr_nid));
                return -EPERM;
        }
 
        CDEBUG(D_NET, "Accept %s from %pI4h\n",
-              libcfs_nid2str(cr.acr_nid), &peer_ip);
+              libcfs_nid2str(cr.acr_nid), &peer);
 
-       rc = ni->ni_lnd->lnd_accept(ni, sock);
+       rc = ni->ni_net->net_lnd->lnd_accept(ni, sock);
 
        lnet_ni_decref(ni);
        return rc;
 }
 
+#ifdef HAVE_SK_DATA_READY_ONE_ARG
+static void lnet_acceptor_ready(struct sock *sk)
+#else
+static void lnet_acceptor_ready(struct sock *sk, int len)
+#endif
+{
+       /* Ensure pta_odata has actually been set before calling it */
+       rmb();
+#ifdef HAVE_SK_DATA_READY_ONE_ARG
+       lnet_acceptor_state.pta_odata(sk);
+#else
+       lnet_acceptor_state.pta_odata(sk, 0);
+#endif
+
+       atomic_set(&lnet_acceptor_state.pta_ready, 1);
+       wake_up(&lnet_acceptor_state.pta_waitq);
+}
+
 static int
 lnet_acceptor(void *arg)
 {
        struct socket  *newsock;
        int            rc;
        __u32          magic;
-       __u32          peer_ip;
-       int            peer_port;
+       struct sockaddr_storage peer;
        int            secure = (int)((uintptr_t)arg);
 
        LASSERT(lnet_acceptor_state.pta_sock == NULL);
 
-       cfs_block_allsigs();
-
-       rc = lnet_sock_listen(&lnet_acceptor_state.pta_sock,
-                               0, accept_port, accept_backlog);
-       if (rc != 0) {
+       lnet_acceptor_state.pta_sock =
+               lnet_sock_listen(accept_port, accept_backlog,
+                                lnet_acceptor_state.pta_ns);
+       if (IS_ERR(lnet_acceptor_state.pta_sock)) {
+               rc = PTR_ERR(lnet_acceptor_state.pta_sock);
                if (rc == -EADDRINUSE)
                        LCONSOLE_ERROR_MSG(0x122, "Can't start acceptor on port"
                                           " %d: port already in use\n",
@@ -367,7 +360,18 @@ lnet_acceptor(void *arg)
 
                lnet_acceptor_state.pta_sock = NULL;
        } else {
+               rc = 0;
                LCONSOLE(0, "Accept %s, port %d\n", accept_type, accept_port);
+               init_waitqueue_head(&lnet_acceptor_state.pta_waitq);
+               lnet_acceptor_state.pta_odata =
+                       lnet_acceptor_state.pta_sock->sk->sk_data_ready;
+               /* ensure pta_odata gets set before there is any chance of
+                * lnet_accept_ready() trying to read it.
+                */
+               wmb();
+               lnet_acceptor_state.pta_sock->sk->sk_data_ready =
+                       lnet_acceptor_ready;
+               atomic_set(&lnet_acceptor_state.pta_ready, 1);
        }
 
        /* set init status and unblock parent */
@@ -379,39 +383,45 @@ lnet_acceptor(void *arg)
 
        while (!lnet_acceptor_state.pta_shutdown) {
 
-               rc = lnet_sock_accept(&newsock, lnet_acceptor_state.pta_sock);
+               wait_event_idle(lnet_acceptor_state.pta_waitq,
+                               lnet_acceptor_state.pta_shutdown ||
+                               atomic_read(&lnet_acceptor_state.pta_ready));
+               if (!atomic_read(&lnet_acceptor_state.pta_ready))
+                       continue;
+               atomic_set(&lnet_acceptor_state.pta_ready, 0);
+               rc = kernel_accept(lnet_acceptor_state.pta_sock, &newsock,
+                                  SOCK_NONBLOCK);
                if (rc != 0) {
                        if (rc != -EAGAIN) {
                                CWARN("Accept error %d: pausing...\n", rc);
-                               set_current_state(TASK_UNINTERRUPTIBLE);
-                               schedule_timeout(cfs_time_seconds(1));
+                               schedule_timeout_uninterruptible(
+                                       cfs_time_seconds(1));
                        }
                        continue;
                }
 
-               /* maybe we're waken up with lnet_sock_abort_accept() */
-               if (lnet_acceptor_state.pta_shutdown) {
-                       sock_release(newsock);
-                       break;
-               }
+               /* make sure we call lnet_sock_accept() again, until it fails */
+               atomic_set(&lnet_acceptor_state.pta_ready, 1);
 
-               rc = lnet_sock_getaddr(newsock, true, &peer_ip, &peer_port);
+               rc = lnet_sock_getaddr(newsock, true, &peer);
                if (rc != 0) {
                        CERROR("Can't determine new connection's address\n");
                        goto failed;
                }
 
-               if (secure && peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
-                       CERROR("Refusing connection from %pI4h: "
-                              "insecure port %d\n", &peer_ip, peer_port);
+               if (secure &&
+                   rpc_get_port((struct sockaddr *)&peer) >
+                   LNET_ACCEPTOR_MAX_RESERVED_PORT) {
+                       CERROR("Refusing connection from %pISp: insecure port.\n",
+                              &peer);
                        goto failed;
                }
 
                rc = lnet_sock_read(newsock, &magic, sizeof(magic),
                                      accept_timeout);
                if (rc != 0) {
-                       CERROR("Error %d reading connection request from "
-                              "%pI4h\n", rc, &peer_ip);
+                       CERROR("Error %d reading connection request from %pIS\n",
+                              rc, &peer);
                        goto failed;
                }
 
@@ -425,6 +435,8 @@ failed:
                sock_release(newsock);
        }
 
+       lnet_acceptor_state.pta_sock->sk->sk_data_ready =
+               lnet_acceptor_state.pta_odata;
        sock_release(lnet_acceptor_state.pta_sock);
        lnet_acceptor_state.pta_sock = NULL;
 
@@ -467,24 +479,22 @@ lnet_acceptor_start(void)
 
        LASSERT(lnet_acceptor_state.pta_sock == NULL);
 
-       rc = lnet_acceptor_get_tunables();
-       if (rc != 0)
-               return rc;
-
        init_completion(&lnet_acceptor_state.pta_signal);
        rc = accept2secure(accept_type, &secure);
        if (rc <= 0)
                return rc;
 
-       if (lnet_count_acceptor_nis() == 0)  /* not required */
+       if (lnet_count_acceptor_nets() == 0)  /* not required */
                return 0;
-
+       if (current->nsproxy && current->nsproxy->net_ns)
+               lnet_acceptor_state.pta_ns = current->nsproxy->net_ns;
+       else
+               lnet_acceptor_state.pta_ns = &init_net;
        task = kthread_run(lnet_acceptor, (void *)(uintptr_t)secure,
                           "acceptor_%03ld", secure);
        if (IS_ERR(task)) {
                rc2 = PTR_ERR(task);
                CERROR("Can't start acceptor thread: %ld\n", rc2);
-
                return -ESRCH;
        }
 
@@ -505,17 +515,15 @@ lnet_acceptor_start(void)
 void
 lnet_acceptor_stop(void)
 {
-       struct sock *sk;
-
        if (lnet_acceptor_state.pta_shutdown) /* not running */
                return;
 
-       lnet_acceptor_state.pta_shutdown = 1;
-
-       sk = lnet_acceptor_state.pta_sock->sk;
+       /* If still required, return immediately */
+       if (the_lnet.ln_refcount && lnet_count_acceptor_nets() > 0)
+               return;
 
-       /* awake any sleepers using safe method */
-       sk->sk_state_change(sk);
+       lnet_acceptor_state.pta_shutdown = 1;
+       wake_up(&lnet_acceptor_state.pta_waitq);
 
        /* block until acceptor signals exit */
        wait_for_completion(&lnet_acceptor_state.pta_signal);