Whamcloud - gitweb
LU-12236 lnet: support non-default network namespace 69/36769/3
authorAurelien Degremont <degremoa@amazon.com>
Thu, 25 Apr 2019 13:15:56 +0000 (13:15 +0000)
committerOleg Drokin <green@whamcloud.com>
Thu, 12 Dec 2019 23:05:32 +0000 (23:05 +0000)
Replace hard coded references to default root network namespace
(&init_net) in LNET code (LNET, socklnd and o2iblnd).

When a network interface is created, Lustre records the current
network namespace. This patch improves the LNET code to use
this reference namespace most of the time instead of the root
network namespace. When using lctl, lnetctl or insmod, we
use the current process network namespace.
When starting the listening acceptor, we use the namespace of the
process that triggers this start.

An additional patch is needed for RPCSEC GSS support.

Lustre-change: https://review.whamcloud.com/34768
Lustre-commit: 93b08edfb1c6ae8aec7e1009d3aca450416358d7

Signed-off-by: Aurelien Degremont <degremoa@amazon.com>
Change-Id: I56877ddcd7a27883662c86f245b196153211e7b2
Reviewed-on: https://review.whamcloud.com/36769
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/include/lnet/lib-lnet.h
lnet/klnds/o2iblnd/o2iblnd.c
lnet/klnds/o2iblnd/o2iblnd.h
lnet/klnds/o2iblnd/o2iblnd_cb.c
lnet/klnds/socklnd/socklnd.c
lnet/klnds/socklnd/socklnd_cb.c
lnet/lnet/acceptor.c
lnet/lnet/config.c
lnet/lnet/lib-socket.c

index 492ba4a..61815a6 100644 (file)
@@ -831,7 +831,7 @@ void lnet_register_lnd(struct lnet_lnd *lnd);
 void lnet_unregister_lnd(struct lnet_lnd *lnd);
 
 int lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
-                __u32 local_ip, __u32 peer_ip, int peer_port);
+                __u32 local_ip, __u32 peer_ip, int peer_port, struct net *ns);
 void lnet_connect_console_error(int rc, lnet_nid_t peer_nid,
                                 __u32 peer_ip, int port);
 int lnet_count_acceptor_nets(void);
@@ -848,18 +848,19 @@ struct lnet_inetdev {
        char    li_name[IFNAMSIZ];
 };
 
-int lnet_inet_enumerate(struct lnet_inetdev **dev_list);
+int lnet_inet_enumerate(struct lnet_inetdev **dev_list, struct net *ns);
 int lnet_sock_setbuf(struct socket *socket, int txbufsize, int rxbufsize);
 int lnet_sock_getbuf(struct socket *socket, int *txbufsize, int *rxbufsize);
 int lnet_sock_getaddr(struct socket *socket, bool remote, __u32 *ip, int *port);
 int lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout);
 int lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout);
 
-int lnet_sock_listen(struct socket **sockp, __u32 ip, int port, int backlog);
+int lnet_sock_listen(struct socket **sockp, __u32 ip, int port, int backlog,
+                    struct net *ns);
 int lnet_sock_accept(struct socket **newsockp, struct socket *sock);
 int lnet_sock_connect(struct socket **sockp, int *fatal,
                        __u32 local_ip, int local_port,
-                       __u32 peer_ip, int peer_port);
+                       __u32 peer_ip, int peer_port, struct net *ns);
 
 int lnet_peers_start_down(void);
 int lnet_peer_buffer_credits(struct lnet_net *net);
index cdfcf80..64f6eef 100644 (file)
@@ -2672,7 +2672,7 @@ kiblnd_dummy_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
 }
 
 static int
-kiblnd_dev_need_failover(struct kib_dev *dev)
+kiblnd_dev_need_failover(struct kib_dev *dev, struct net *ns)
 {
         struct rdma_cm_id  *cmid;
         struct sockaddr_in  srcaddr;
@@ -2694,8 +2694,8 @@ kiblnd_dev_need_failover(struct kib_dev *dev)
          *
          * a. rdma_bind_addr(), it will conflict with listener cmid
          * b. rdma_resolve_addr() to zero addr */
-        cmid = kiblnd_rdma_create_id(kiblnd_dummy_callback, dev, RDMA_PS_TCP,
-                                     IB_QPT_RC);
+       cmid = kiblnd_rdma_create_id(ns, kiblnd_dummy_callback, dev,
+                                    RDMA_PS_TCP, IB_QPT_RC);
         if (IS_ERR(cmid)) {
                 rc = PTR_ERR(cmid);
                 CERROR("Failed to create cmid for failover: %d\n", rc);
@@ -2724,7 +2724,7 @@ kiblnd_dev_need_failover(struct kib_dev *dev)
 }
 
 int
-kiblnd_dev_failover(struct kib_dev *dev)
+kiblnd_dev_failover(struct kib_dev *dev, struct net *ns)
 {
        struct list_head    zombie_tpo = LIST_HEAD_INIT(zombie_tpo);
        struct list_head    zombie_ppo = LIST_HEAD_INIT(zombie_ppo);
@@ -2743,7 +2743,7 @@ kiblnd_dev_failover(struct kib_dev *dev)
                  dev->ibd_can_failover ||
                  dev->ibd_hdev == NULL);
 
-        rc = kiblnd_dev_need_failover(dev);
+       rc = kiblnd_dev_need_failover(dev, ns);
         if (rc <= 0)
                 goto out;
 
@@ -2764,8 +2764,8 @@ kiblnd_dev_failover(struct kib_dev *dev)
                 rdma_destroy_id(cmid);
         }
 
-        cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, dev, RDMA_PS_TCP,
-                                     IB_QPT_RC);
+       cmid = kiblnd_rdma_create_id(ns, kiblnd_cm_callback, dev, RDMA_PS_TCP,
+                                    IB_QPT_RC);
         if (IS_ERR(cmid)) {
                 rc = PTR_ERR(cmid);
                 CERROR("Failed to create cmid for failover: %d\n", rc);
@@ -3031,7 +3031,7 @@ out:
 }
 
 static int
-kiblnd_base_startup(void)
+kiblnd_base_startup(struct net *ns)
 {
        struct kib_sched_info   *sched;
        int                     rc;
@@ -3104,7 +3104,7 @@ kiblnd_base_startup(void)
         }
 
        if (*kiblnd_tunables.kib_dev_failover != 0)
-               rc = kiblnd_thread_start(kiblnd_failover_thread, NULL,
+               rc = kiblnd_thread_start(kiblnd_failover_thread, ns,
                                         "kiblnd_failover");
 
         if (rc != 0) {
@@ -3238,7 +3238,7 @@ kiblnd_startup(struct lnet_ni *ni)
        LASSERT(ni->ni_net->net_lnd == &the_o2iblnd);
 
        if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) {
-               rc = kiblnd_base_startup();
+               rc = kiblnd_base_startup(ni->ni_net_ns);
                if (rc != 0)
                        return rc;
        }
@@ -3278,7 +3278,7 @@ kiblnd_startup(struct lnet_ni *ni)
                goto failed;
        }
 
-       rc = lnet_inet_enumerate(&ifaces);
+       rc = lnet_inet_enumerate(&ifaces, ni->ni_net_ns);
        if (rc < 0)
                goto failed;
 
@@ -3313,7 +3313,7 @@ kiblnd_startup(struct lnet_ni *ni)
                INIT_LIST_HEAD(&ibdev->ibd_fail_list);
 
                /* initialize the device */
-               rc = kiblnd_dev_failover(ibdev);
+               rc = kiblnd_dev_failover(ibdev, ni->ni_net_ns);
                if (rc) {
                        CERROR("ko2iblnd: Can't initialize device: rc = %d\n", rc);
                        goto failed;
index 59a2be2..eb3a4d0 100644 (file)
@@ -120,15 +120,16 @@ extern struct kib_tunables  kiblnd_tunables;
                        min(t->lnd_peercredits_hiw, (__u32)conn->ibc_queue_depth - 1))
 
 #ifdef HAVE_RDMA_CREATE_ID_5ARG
-# define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(current->nsproxy->net_ns, \
-                                                               cb, dev, \
-                                                               ps, qpt)
+# define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) rdma_create_id(ns, cb, \
+                                                                   dev, ps, \
+                                                                   qpt)
 #else
 # ifdef HAVE_RDMA_CREATE_ID_4ARG
-#  define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, \
-                                                                ps, qpt)
+#  define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) rdma_create_id(cb, dev, \
+                                                                    ps, qpt)
 # else
-#  define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps)
+#  define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) rdma_create_id(cb, dev, \
+                                                                    ps)
 # endif
 #endif
 
@@ -1173,7 +1174,7 @@ int  kiblnd_cm_callback(struct rdma_cm_id *cmid,
                         struct rdma_cm_event *event);
 int  kiblnd_translate_mtu(int value);
 
-int  kiblnd_dev_failover(struct kib_dev *dev);
+int  kiblnd_dev_failover(struct kib_dev *dev, struct net *ns);
 int kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer_ni **peerp,
                       lnet_nid_t nid);
 void kiblnd_destroy_peer(struct kib_peer_ni *peer);
index a0edce8..8ea3583 100644 (file)
@@ -1412,8 +1412,9 @@ kiblnd_connect_peer(struct kib_peer_ni *peer_ni)
         LASSERT (net != NULL);
         LASSERT (peer_ni->ibp_connecting > 0);
 
-        cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, peer_ni, RDMA_PS_TCP,
-                                     IB_QPT_RC);
+       cmid = kiblnd_rdma_create_id(peer_ni->ibp_ni->ni_net_ns,
+                                    kiblnd_cm_callback, peer_ni,
+                                    RDMA_PS_TCP, IB_QPT_RC);
 
         if (IS_ERR(cmid)) {
                 CERROR("Can't create CMID for %s: %ld\n",
@@ -3901,6 +3902,7 @@ kiblnd_failover_thread(void *arg)
 {
        rwlock_t        *glock = &kiblnd_data.kib_global_lock;
        struct kib_dev *dev;
+       struct net *ns = arg;
        wait_queue_entry_t wait;
        unsigned long    flags;
        int              rc;
@@ -3929,7 +3931,7 @@ kiblnd_failover_thread(void *arg)
                         dev->ibd_failover = 1;
                        write_unlock_irqrestore(glock, flags);
 
-                       rc = kiblnd_dev_failover(dev);
+                       rc = kiblnd_dev_failover(dev, ns);
 
                        write_lock_irqsave(glock, flags);
 
index 36d4359..b713747 100644 (file)
@@ -2732,7 +2732,7 @@ ksocknal_startup(struct lnet_ni *ni)
                net_tunables->lct_peer_rtr_credits =
                        *ksocknal_tunables.ksnd_peerrtrcredits;
 
-       rc = lnet_inet_enumerate(&ifaces);
+       rc = lnet_inet_enumerate(&ifaces, ni->ni_net_ns);
        if (rc < 0)
                goto fail_1;
 
index 962f6c6..1da3fe5 100644 (file)
@@ -1994,11 +1994,12 @@ ksocknal_connect(struct ksock_route *route)
                         goto failed;
                 }
 
-                rc = lnet_connect(&sock, peer_ni->ksnp_id.nid,
-                                  route->ksnr_myipaddr,
-                                  route->ksnr_ipaddr, route->ksnr_port);
-                if (rc != 0)
-                        goto failed;
+               rc = lnet_connect(&sock, peer_ni->ksnp_id.nid,
+                                 route->ksnr_myipaddr,
+                                 route->ksnr_ipaddr, route->ksnr_port,
+                                 peer_ni->ksnp_ni->ni_net_ns);
+               if (rc != 0)
+                       goto failed;
 
                 rc = ksocknal_create_conn(peer_ni->ksnp_ni, route, sock, type);
                 if (rc < 0) {
index 9eea92c..7dff9d5 100644 (file)
@@ -44,6 +44,7 @@ static struct {
        int                     pta_shutdown;
        struct socket           *pta_sock;
        struct completion       pta_signal;
+       struct net              *pta_ns;
 } lnet_acceptor_state = {
        .pta_shutdown = 1
 };
@@ -150,7 +151,7 @@ EXPORT_SYMBOL(lnet_connect_console_error);
 
 int
 lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
-           __u32 local_ip, __u32 peer_ip, int peer_port)
+           __u32 local_ip, __u32 peer_ip, int peer_port, struct net *ns)
 {
        struct lnet_acceptor_connreq cr;
        struct socket           *sock;
@@ -167,7 +168,7 @@ lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
 
                rc = lnet_sock_connect(&sock, &fatal,
                                         local_ip, port,
-                                        peer_ip, peer_port);
+                                        peer_ip, peer_port, ns);
                if (rc != 0) {
                        if (fatal)
                                goto failed;
@@ -354,7 +355,8 @@ lnet_acceptor(void *arg)
        cfs_block_allsigs();
 
        rc = lnet_sock_listen(&lnet_acceptor_state.pta_sock,
-                               0, accept_port, accept_backlog);
+                             0, accept_port, accept_backlog,
+                             lnet_acceptor_state.pta_ns);
        if (rc != 0) {
                if (rc == -EADDRINUSE)
                        LCONSOLE_ERROR_MSG(0x122, "Can't start acceptor on port"
@@ -479,6 +481,7 @@ lnet_acceptor_start(void)
        if (lnet_count_acceptor_nets() == 0)  /* not required */
                return 0;
 
+       lnet_acceptor_state.pta_ns = current->nsproxy->net_ns;
        task = kthread_run(lnet_acceptor, (void *)(uintptr_t)secure,
                           "acceptor_%03ld", secure);
        if (IS_ERR(task)) {
index 2dfbe76..445d46b 100644 (file)
@@ -1605,7 +1605,7 @@ lnet_match_networks (char **networksp, char *ip2nets, __u32 *ipaddrs, int nip)
        return count;
 }
 
-int lnet_inet_enumerate(struct lnet_inetdev **dev_list)
+int lnet_inet_enumerate(struct lnet_inetdev **dev_list, struct net *ns)
 {
        struct lnet_inetdev *ifaces = NULL;
        struct net_device *dev;
@@ -1613,7 +1613,7 @@ int lnet_inet_enumerate(struct lnet_inetdev **dev_list)
        int nip = 0;
 
        rtnl_lock();
-       for_each_netdev(&init_net, dev) {
+       for_each_netdev(ns, dev) {
                int flags = dev_get_flags(dev);
                struct in_device *in_dev;
                int node_id;
@@ -1686,7 +1686,7 @@ lnet_parse_ip2nets (char **networksp, char *ip2nets)
        int        rc;
        int i;
 
-       nip = lnet_inet_enumerate(&ifaces);
+       nip = lnet_inet_enumerate(&ifaces, current->nsproxy->net_ns);
        if (nip < 0) {
                if (nip != -ENOENT) {
                        LCONSOLE_ERROR_MSG(0x117,
index 85bb413..fe532c3 100644 (file)
@@ -170,7 +170,7 @@ EXPORT_SYMBOL(lnet_sock_read);
 
 static int
 lnet_sock_create(struct socket **sockp, int *fatal,
-                __u32 local_ip, int local_port)
+                __u32 local_ip, int local_port, struct net *ns)
 {
        struct sockaddr_in  locaddr;
        struct socket      *sock;
@@ -181,7 +181,7 @@ lnet_sock_create(struct socket **sockp, int *fatal,
        *fatal = 1;
 
 #ifdef HAVE_SOCK_CREATE_KERN_USE_NET
-       rc = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, 0, &sock);
+       rc = sock_create_kern(ns, PF_INET, SOCK_STREAM, 0, &sock);
 #else
        rc = sock_create_kern(PF_INET, SOCK_STREAM, 0, &sock);
 #endif
@@ -303,12 +303,12 @@ EXPORT_SYMBOL(lnet_sock_getbuf);
 
 int
 lnet_sock_listen(struct socket **sockp,
-                  __u32 local_ip, int local_port, int backlog)
+                  __u32 local_ip, int local_port, int backlog, struct net *ns)
 {
        int      fatal;
        int      rc;
 
-       rc = lnet_sock_create(sockp, &fatal, local_ip, local_port);
+       rc = lnet_sock_create(sockp, &fatal, local_ip, local_port, ns);
        if (rc != 0) {
                if (!fatal)
                        CERROR("Can't create socket: port %d already in use\n",
@@ -382,12 +382,13 @@ failed:
 int
 lnet_sock_connect(struct socket **sockp, int *fatal,
                  __u32 local_ip, int local_port,
-                 __u32 peer_ip, int peer_port)
+                 __u32 peer_ip, int peer_port,
+                 struct net *ns)
 {
        struct sockaddr_in  srvaddr;
        int                 rc;
 
-       rc = lnet_sock_create(sockp, fatal, local_ip, local_port);
+       rc = lnet_sock_create(sockp, fatal, local_ip, local_port, ns);
        if (rc != 0)
                return rc;