Whamcloud - gitweb
LU-12236 lnet: support non-default network namespace 68/34768/14
authorAurelien Degremont <degremoa@amazon.com>
Thu, 25 Apr 2019 13:15:56 +0000 (13:15 +0000)
committerOleg Drokin <green@whamcloud.com>
Thu, 15 Aug 2019 07:53:00 +0000 (07:53 +0000)
Replace hard coded references to default root network namespace
(&init_net) in LNET code (LNET, socklnd and o2iblnd).

When a network interface is created, Lustre records the current
network namespace. This patch improves the LNET code to use
this reference namespace most of the time instead of the root
network namespace. When using lctl, lnetctl or insmod, we
use the current process network namespace.
When starting the listening acceptor, we use the namespace of the
process that triggers this start.

An additional patch is needed for RPCSEC GSS support.

Signed-off-by: Aurelien Degremont <degremoa@amazon.com>
Change-Id: I56877ddcd7a27883662c86f245b196153211e7b2
Reviewed-on: https://review.whamcloud.com/34768
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Chris Horn <hornc@cray.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Shaun Tancheff <stancheff@cray.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/include/lnet/lib-lnet.h
lnet/klnds/o2iblnd/o2iblnd.c
lnet/klnds/o2iblnd/o2iblnd.h
lnet/klnds/o2iblnd/o2iblnd_cb.c
lnet/klnds/socklnd/socklnd.c
lnet/klnds/socklnd/socklnd_cb.c
lnet/lnet/acceptor.c
lnet/lnet/config.c
lnet/lnet/lib-socket.c

index ce84b1a..a4d0dfa 100644 (file)
@@ -834,7 +834,7 @@ void lnet_register_lnd(struct lnet_lnd *lnd);
 void lnet_unregister_lnd(struct lnet_lnd *lnd);
 
 int lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
-                __u32 local_ip, __u32 peer_ip, int peer_port);
+                __u32 local_ip, __u32 peer_ip, int peer_port, struct net *ns);
 void lnet_connect_console_error(int rc, lnet_nid_t peer_nid,
                                 __u32 peer_ip, int port);
 int lnet_count_acceptor_nets(void);
@@ -851,18 +851,19 @@ struct lnet_inetdev {
        char    li_name[IFNAMSIZ];
 };
 
-int lnet_inet_enumerate(struct lnet_inetdev **dev_list);
+int lnet_inet_enumerate(struct lnet_inetdev **dev_list, struct net *ns);
 int lnet_sock_setbuf(struct socket *socket, int txbufsize, int rxbufsize);
 int lnet_sock_getbuf(struct socket *socket, int *txbufsize, int *rxbufsize);
 int lnet_sock_getaddr(struct socket *socket, bool remote, __u32 *ip, int *port);
 int lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout);
 int lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout);
 
-int lnet_sock_listen(struct socket **sockp, __u32 ip, int port, int backlog);
+int lnet_sock_listen(struct socket **sockp, __u32 ip, int port, int backlog,
+                    struct net *ns);
 int lnet_sock_accept(struct socket **newsockp, struct socket *sock);
 int lnet_sock_connect(struct socket **sockp, int *fatal,
                        __u32 local_ip, int local_port,
-                       __u32 peer_ip, int peer_port);
+                       __u32 peer_ip, int peer_port, struct net *ns);
 
 int lnet_peers_start_down(void);
 int lnet_peer_buffer_credits(struct lnet_net *net);
index eaf59fc..53902ca 100644 (file)
@@ -2665,7 +2665,7 @@ kiblnd_dummy_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
 }
 
 static int
-kiblnd_dev_need_failover(struct kib_dev *dev)
+kiblnd_dev_need_failover(struct kib_dev *dev, struct net *ns)
 {
         struct rdma_cm_id  *cmid;
         struct sockaddr_in  srcaddr;
@@ -2687,8 +2687,8 @@ kiblnd_dev_need_failover(struct kib_dev *dev)
          *
          * a. rdma_bind_addr(), it will conflict with listener cmid
          * b. rdma_resolve_addr() to zero addr */
-        cmid = kiblnd_rdma_create_id(kiblnd_dummy_callback, dev, RDMA_PS_TCP,
-                                     IB_QPT_RC);
+       cmid = kiblnd_rdma_create_id(ns, kiblnd_dummy_callback, dev,
+                                    RDMA_PS_TCP, IB_QPT_RC);
         if (IS_ERR(cmid)) {
                 rc = PTR_ERR(cmid);
                 CERROR("Failed to create cmid for failover: %d\n", rc);
@@ -2717,7 +2717,7 @@ kiblnd_dev_need_failover(struct kib_dev *dev)
 }
 
 int
-kiblnd_dev_failover(struct kib_dev *dev)
+kiblnd_dev_failover(struct kib_dev *dev, struct net *ns)
 {
        struct list_head    zombie_tpo = LIST_HEAD_INIT(zombie_tpo);
        struct list_head    zombie_ppo = LIST_HEAD_INIT(zombie_ppo);
@@ -2736,7 +2736,7 @@ kiblnd_dev_failover(struct kib_dev *dev)
                  dev->ibd_can_failover ||
                  dev->ibd_hdev == NULL);
 
-        rc = kiblnd_dev_need_failover(dev);
+       rc = kiblnd_dev_need_failover(dev, ns);
         if (rc <= 0)
                 goto out;
 
@@ -2757,8 +2757,8 @@ kiblnd_dev_failover(struct kib_dev *dev)
                 rdma_destroy_id(cmid);
         }
 
-        cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, dev, RDMA_PS_TCP,
-                                     IB_QPT_RC);
+       cmid = kiblnd_rdma_create_id(ns, kiblnd_cm_callback, dev, RDMA_PS_TCP,
+                                    IB_QPT_RC);
         if (IS_ERR(cmid)) {
                 rc = PTR_ERR(cmid);
                 CERROR("Failed to create cmid for failover: %d\n", rc);
@@ -3024,7 +3024,7 @@ out:
 }
 
 static int
-kiblnd_base_startup(void)
+kiblnd_base_startup(struct net *ns)
 {
        struct kib_sched_info   *sched;
        int                     rc;
@@ -3097,7 +3097,7 @@ kiblnd_base_startup(void)
         }
 
        if (*kiblnd_tunables.kib_dev_failover != 0)
-               rc = kiblnd_thread_start(kiblnd_failover_thread, NULL,
+               rc = kiblnd_thread_start(kiblnd_failover_thread, ns,
                                         "kiblnd_failover");
 
         if (rc != 0) {
@@ -3196,7 +3196,7 @@ kiblnd_startup(struct lnet_ni *ni)
         LASSERT (ni->ni_net->net_lnd == &the_o2iblnd);
 
         if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) {
-                rc = kiblnd_base_startup();
+               rc = kiblnd_base_startup(ni->ni_net_ns);
                 if (rc != 0)
                         return rc;
         }
@@ -3232,7 +3232,7 @@ kiblnd_startup(struct lnet_ni *ni)
                 goto failed;
         }
 
-       rc = lnet_inet_enumerate(&ifaces);
+       rc = lnet_inet_enumerate(&ifaces, ni->ni_net_ns);
        if (rc < 0)
                goto failed;
 
@@ -3263,7 +3263,7 @@ kiblnd_startup(struct lnet_ni *ni)
        INIT_LIST_HEAD(&ibdev->ibd_fail_list);
 
        /* initialize the device */
-       rc = kiblnd_dev_failover(ibdev);
+       rc = kiblnd_dev_failover(ibdev, ni->ni_net_ns);
        if (rc) {
                CERROR("ko2iblnd: Can't initialize device: rc = %d\n", rc);
                goto failed;
index b0ca501..7a20963 100644 (file)
@@ -120,15 +120,16 @@ extern struct kib_tunables  kiblnd_tunables;
                                        t->lnd_peercredits_hiw)
 
 #ifdef HAVE_RDMA_CREATE_ID_5ARG
-# define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(current->nsproxy->net_ns, \
-                                                               cb, dev, \
-                                                               ps, qpt)
+# define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) rdma_create_id(ns, cb, \
+                                                                   dev, ps, \
+                                                                   qpt)
 #else
 # ifdef HAVE_RDMA_CREATE_ID_4ARG
-#  define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, \
-                                                                ps, qpt)
+#  define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) rdma_create_id(cb, dev, \
+                                                                    ps, qpt)
 # else
-#  define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps)
+#  define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) rdma_create_id(cb, dev, \
+                                                                    ps)
 # endif
 #endif
 
@@ -1180,7 +1181,7 @@ int  kiblnd_cm_callback(struct rdma_cm_id *cmid,
                         struct rdma_cm_event *event);
 int  kiblnd_translate_mtu(int value);
 
-int  kiblnd_dev_failover(struct kib_dev *dev);
+int  kiblnd_dev_failover(struct kib_dev *dev, struct net *ns);
 int kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer_ni **peerp,
                       lnet_nid_t nid);
 void kiblnd_destroy_peer(struct kib_peer_ni *peer);
index 49b0709..e6c5e16 100644 (file)
@@ -1412,8 +1412,9 @@ kiblnd_connect_peer(struct kib_peer_ni *peer_ni)
         LASSERT (net != NULL);
         LASSERT (peer_ni->ibp_connecting > 0);
 
-        cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, peer_ni, RDMA_PS_TCP,
-                                     IB_QPT_RC);
+       cmid = kiblnd_rdma_create_id(peer_ni->ibp_ni->ni_net_ns,
+                                    kiblnd_cm_callback, peer_ni,
+                                    RDMA_PS_TCP, IB_QPT_RC);
 
         if (IS_ERR(cmid)) {
                 CERROR("Can't create CMID for %s: %ld\n",
@@ -3901,6 +3902,7 @@ kiblnd_failover_thread(void *arg)
 {
        rwlock_t        *glock = &kiblnd_data.kib_global_lock;
        struct kib_dev *dev;
+       struct net *ns = arg;
        wait_queue_entry_t wait;
        unsigned long    flags;
        int              rc;
@@ -3929,7 +3931,7 @@ kiblnd_failover_thread(void *arg)
                         dev->ibd_failover = 1;
                        write_unlock_irqrestore(glock, flags);
 
-                       rc = kiblnd_dev_failover(dev);
+                       rc = kiblnd_dev_failover(dev, ns);
 
                        write_lock_irqsave(glock, flags);
 
index e064ae9..6c38119 100644 (file)
@@ -2728,7 +2728,7 @@ ksocknal_startup(struct lnet_ni *ni)
                net_tunables->lct_peer_rtr_credits =
                        *ksocknal_tunables.ksnd_peerrtrcredits;
 
-       rc = lnet_inet_enumerate(&ifaces);
+       rc = lnet_inet_enumerate(&ifaces, ni->ni_net_ns);
        if (rc < 0)
                goto fail_1;
 
index 962f6c6..1da3fe5 100644 (file)
@@ -1994,11 +1994,12 @@ ksocknal_connect(struct ksock_route *route)
                         goto failed;
                 }
 
-                rc = lnet_connect(&sock, peer_ni->ksnp_id.nid,
-                                  route->ksnr_myipaddr,
-                                  route->ksnr_ipaddr, route->ksnr_port);
-                if (rc != 0)
-                        goto failed;
+               rc = lnet_connect(&sock, peer_ni->ksnp_id.nid,
+                                 route->ksnr_myipaddr,
+                                 route->ksnr_ipaddr, route->ksnr_port,
+                                 peer_ni->ksnp_ni->ni_net_ns);
+               if (rc != 0)
+                       goto failed;
 
                 rc = ksocknal_create_conn(peer_ni->ksnp_ni, route, sock, type);
                 if (rc < 0) {
index 9eea92c..7dff9d5 100644 (file)
@@ -44,6 +44,7 @@ static struct {
        int                     pta_shutdown;
        struct socket           *pta_sock;
        struct completion       pta_signal;
+       struct net              *pta_ns;
 } lnet_acceptor_state = {
        .pta_shutdown = 1
 };
@@ -150,7 +151,7 @@ EXPORT_SYMBOL(lnet_connect_console_error);
 
 int
 lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
-           __u32 local_ip, __u32 peer_ip, int peer_port)
+           __u32 local_ip, __u32 peer_ip, int peer_port, struct net *ns)
 {
        struct lnet_acceptor_connreq cr;
        struct socket           *sock;
@@ -167,7 +168,7 @@ lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
 
                rc = lnet_sock_connect(&sock, &fatal,
                                         local_ip, port,
-                                        peer_ip, peer_port);
+                                        peer_ip, peer_port, ns);
                if (rc != 0) {
                        if (fatal)
                                goto failed;
@@ -354,7 +355,8 @@ lnet_acceptor(void *arg)
        cfs_block_allsigs();
 
        rc = lnet_sock_listen(&lnet_acceptor_state.pta_sock,
-                               0, accept_port, accept_backlog);
+                             0, accept_port, accept_backlog,
+                             lnet_acceptor_state.pta_ns);
        if (rc != 0) {
                if (rc == -EADDRINUSE)
                        LCONSOLE_ERROR_MSG(0x122, "Can't start acceptor on port"
@@ -479,6 +481,7 @@ lnet_acceptor_start(void)
        if (lnet_count_acceptor_nets() == 0)  /* not required */
                return 0;
 
+       lnet_acceptor_state.pta_ns = current->nsproxy->net_ns;
        task = kthread_run(lnet_acceptor, (void *)(uintptr_t)secure,
                           "acceptor_%03ld", secure);
        if (IS_ERR(task)) {
index 2394f39..b63148c 100644 (file)
@@ -1605,7 +1605,7 @@ lnet_match_networks (char **networksp, char *ip2nets, __u32 *ipaddrs, int nip)
        return count;
 }
 
-int lnet_inet_enumerate(struct lnet_inetdev **dev_list)
+int lnet_inet_enumerate(struct lnet_inetdev **dev_list, struct net *ns)
 {
        struct lnet_inetdev *ifaces = NULL;
        struct net_device *dev;
@@ -1613,7 +1613,7 @@ int lnet_inet_enumerate(struct lnet_inetdev **dev_list)
        int nip = 0;
 
        rtnl_lock();
-       for_each_netdev(&init_net, dev) {
+       for_each_netdev(ns, dev) {
                int flags = dev_get_flags(dev);
                struct in_device *in_dev;
                int node_id;
@@ -1686,7 +1686,7 @@ lnet_parse_ip2nets (char **networksp, char *ip2nets)
        int        rc;
        int i;
 
-       nip = lnet_inet_enumerate(&ifaces);
+       nip = lnet_inet_enumerate(&ifaces, current->nsproxy->net_ns);
        if (nip < 0) {
                if (nip != -ENOENT) {
                        LCONSOLE_ERROR_MSG(0x117,
index 0c9a640..4580beb 100644 (file)
@@ -177,7 +177,7 @@ EXPORT_SYMBOL(lnet_sock_read);
 
 static int
 lnet_sock_create(struct socket **sockp, int *fatal,
-                __u32 local_ip, int local_port)
+                __u32 local_ip, int local_port, struct net *ns)
 {
        struct sockaddr_in  locaddr;
        struct socket      *sock;
@@ -188,7 +188,7 @@ lnet_sock_create(struct socket **sockp, int *fatal,
        *fatal = 1;
 
 #ifdef HAVE_SOCK_CREATE_KERN_USE_NET
-       rc = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, 0, &sock);
+       rc = sock_create_kern(ns, PF_INET, SOCK_STREAM, 0, &sock);
 #else
        rc = sock_create_kern(PF_INET, SOCK_STREAM, 0, &sock);
 #endif
@@ -310,12 +310,12 @@ EXPORT_SYMBOL(lnet_sock_getbuf);
 
 int
 lnet_sock_listen(struct socket **sockp,
-                  __u32 local_ip, int local_port, int backlog)
+                  __u32 local_ip, int local_port, int backlog, struct net *ns)
 {
        int      fatal;
        int      rc;
 
-       rc = lnet_sock_create(sockp, &fatal, local_ip, local_port);
+       rc = lnet_sock_create(sockp, &fatal, local_ip, local_port, ns);
        if (rc != 0) {
                if (!fatal)
                        CERROR("Can't create socket: port %d already in use\n",
@@ -389,12 +389,13 @@ failed:
 int
 lnet_sock_connect(struct socket **sockp, int *fatal,
                  __u32 local_ip, int local_port,
-                 __u32 peer_ip, int peer_port)
+                 __u32 peer_ip, int peer_port,
+                 struct net *ns)
 {
        struct sockaddr_in  srvaddr;
        int                 rc;
 
-       rc = lnet_sock_create(sockp, fatal, local_ip, local_port);
+       rc = lnet_sock_create(sockp, fatal, local_ip, local_port, ns);
        if (rc != 0)
                return rc;