summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
9b1e751)
Replace hard coded references to default root network namespace
(&init_net) in LNET code (LNET, socklnd and o2iblnd).
When a network interface is created, Lustre records the current
network namespace. This patch improves the LNET code to use
this reference namespace most of the time instead of the root
network namespace. When using lctl, lnetctl or insmod, we
use the current process network namespace.
When starting the listening acceptor, we use the namespace of the
process that triggers this start.
An additional patch is needed for RPCSEC GSS support.
Signed-off-by: Aurelien Degremont <degremoa@amazon.com>
Change-Id: I56877ddcd7a27883662c86f245b196153211e7b2
Reviewed-on: https://review.whamcloud.com/34768
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Chris Horn <hornc@cray.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Shaun Tancheff <stancheff@cray.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
void lnet_unregister_lnd(struct lnet_lnd *lnd);
int lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
void lnet_unregister_lnd(struct lnet_lnd *lnd);
int lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
- __u32 local_ip, __u32 peer_ip, int peer_port);
+ __u32 local_ip, __u32 peer_ip, int peer_port, struct net *ns);
void lnet_connect_console_error(int rc, lnet_nid_t peer_nid,
__u32 peer_ip, int port);
int lnet_count_acceptor_nets(void);
void lnet_connect_console_error(int rc, lnet_nid_t peer_nid,
__u32 peer_ip, int port);
int lnet_count_acceptor_nets(void);
char li_name[IFNAMSIZ];
};
char li_name[IFNAMSIZ];
};
-int lnet_inet_enumerate(struct lnet_inetdev **dev_list);
+int lnet_inet_enumerate(struct lnet_inetdev **dev_list, struct net *ns);
int lnet_sock_setbuf(struct socket *socket, int txbufsize, int rxbufsize);
int lnet_sock_getbuf(struct socket *socket, int *txbufsize, int *rxbufsize);
int lnet_sock_getaddr(struct socket *socket, bool remote, __u32 *ip, int *port);
int lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout);
int lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout);
int lnet_sock_setbuf(struct socket *socket, int txbufsize, int rxbufsize);
int lnet_sock_getbuf(struct socket *socket, int *txbufsize, int *rxbufsize);
int lnet_sock_getaddr(struct socket *socket, bool remote, __u32 *ip, int *port);
int lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout);
int lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout);
-int lnet_sock_listen(struct socket **sockp, __u32 ip, int port, int backlog);
+int lnet_sock_listen(struct socket **sockp, __u32 ip, int port, int backlog,
+ struct net *ns);
int lnet_sock_accept(struct socket **newsockp, struct socket *sock);
int lnet_sock_connect(struct socket **sockp, int *fatal,
__u32 local_ip, int local_port,
int lnet_sock_accept(struct socket **newsockp, struct socket *sock);
int lnet_sock_connect(struct socket **sockp, int *fatal,
__u32 local_ip, int local_port,
- __u32 peer_ip, int peer_port);
+ __u32 peer_ip, int peer_port, struct net *ns);
int lnet_peers_start_down(void);
int lnet_peer_buffer_credits(struct lnet_net *net);
int lnet_peers_start_down(void);
int lnet_peer_buffer_credits(struct lnet_net *net);
-kiblnd_dev_need_failover(struct kib_dev *dev)
+kiblnd_dev_need_failover(struct kib_dev *dev, struct net *ns)
{
struct rdma_cm_id *cmid;
struct sockaddr_in srcaddr;
{
struct rdma_cm_id *cmid;
struct sockaddr_in srcaddr;
*
* a. rdma_bind_addr(), it will conflict with listener cmid
* b. rdma_resolve_addr() to zero addr */
*
* a. rdma_bind_addr(), it will conflict with listener cmid
* b. rdma_resolve_addr() to zero addr */
- cmid = kiblnd_rdma_create_id(kiblnd_dummy_callback, dev, RDMA_PS_TCP,
- IB_QPT_RC);
+ cmid = kiblnd_rdma_create_id(ns, kiblnd_dummy_callback, dev,
+ RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(cmid)) {
rc = PTR_ERR(cmid);
CERROR("Failed to create cmid for failover: %d\n", rc);
if (IS_ERR(cmid)) {
rc = PTR_ERR(cmid);
CERROR("Failed to create cmid for failover: %d\n", rc);
-kiblnd_dev_failover(struct kib_dev *dev)
+kiblnd_dev_failover(struct kib_dev *dev, struct net *ns)
{
struct list_head zombie_tpo = LIST_HEAD_INIT(zombie_tpo);
struct list_head zombie_ppo = LIST_HEAD_INIT(zombie_ppo);
{
struct list_head zombie_tpo = LIST_HEAD_INIT(zombie_tpo);
struct list_head zombie_ppo = LIST_HEAD_INIT(zombie_ppo);
dev->ibd_can_failover ||
dev->ibd_hdev == NULL);
dev->ibd_can_failover ||
dev->ibd_hdev == NULL);
- rc = kiblnd_dev_need_failover(dev);
+ rc = kiblnd_dev_need_failover(dev, ns);
- cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, dev, RDMA_PS_TCP,
- IB_QPT_RC);
+ cmid = kiblnd_rdma_create_id(ns, kiblnd_cm_callback, dev, RDMA_PS_TCP,
+ IB_QPT_RC);
if (IS_ERR(cmid)) {
rc = PTR_ERR(cmid);
CERROR("Failed to create cmid for failover: %d\n", rc);
if (IS_ERR(cmid)) {
rc = PTR_ERR(cmid);
CERROR("Failed to create cmid for failover: %d\n", rc);
-kiblnd_base_startup(void)
+kiblnd_base_startup(struct net *ns)
{
struct kib_sched_info *sched;
int rc;
{
struct kib_sched_info *sched;
int rc;
}
if (*kiblnd_tunables.kib_dev_failover != 0)
}
if (*kiblnd_tunables.kib_dev_failover != 0)
- rc = kiblnd_thread_start(kiblnd_failover_thread, NULL,
+ rc = kiblnd_thread_start(kiblnd_failover_thread, ns,
"kiblnd_failover");
if (rc != 0) {
"kiblnd_failover");
if (rc != 0) {
LASSERT (ni->ni_net->net_lnd == &the_o2iblnd);
if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) {
LASSERT (ni->ni_net->net_lnd == &the_o2iblnd);
if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) {
- rc = kiblnd_base_startup();
+ rc = kiblnd_base_startup(ni->ni_net_ns);
if (rc != 0)
return rc;
}
if (rc != 0)
return rc;
}
- rc = lnet_inet_enumerate(&ifaces);
+ rc = lnet_inet_enumerate(&ifaces, ni->ni_net_ns);
INIT_LIST_HEAD(&ibdev->ibd_fail_list);
/* initialize the device */
INIT_LIST_HEAD(&ibdev->ibd_fail_list);
/* initialize the device */
- rc = kiblnd_dev_failover(ibdev);
+ rc = kiblnd_dev_failover(ibdev, ni->ni_net_ns);
if (rc) {
CERROR("ko2iblnd: Can't initialize device: rc = %d\n", rc);
goto failed;
if (rc) {
CERROR("ko2iblnd: Can't initialize device: rc = %d\n", rc);
goto failed;
t->lnd_peercredits_hiw)
#ifdef HAVE_RDMA_CREATE_ID_5ARG
t->lnd_peercredits_hiw)
#ifdef HAVE_RDMA_CREATE_ID_5ARG
-# define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(current->nsproxy->net_ns, \
- cb, dev, \
- ps, qpt)
+# define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) rdma_create_id(ns, cb, \
+ dev, ps, \
+ qpt)
#else
# ifdef HAVE_RDMA_CREATE_ID_4ARG
#else
# ifdef HAVE_RDMA_CREATE_ID_4ARG
-# define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, \
- ps, qpt)
+# define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) rdma_create_id(cb, dev, \
+ ps, qpt)
-# define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps)
+# define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) rdma_create_id(cb, dev, \
+ ps)
struct rdma_cm_event *event);
int kiblnd_translate_mtu(int value);
struct rdma_cm_event *event);
int kiblnd_translate_mtu(int value);
-int kiblnd_dev_failover(struct kib_dev *dev);
+int kiblnd_dev_failover(struct kib_dev *dev, struct net *ns);
int kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer_ni **peerp,
lnet_nid_t nid);
void kiblnd_destroy_peer(struct kib_peer_ni *peer);
int kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer_ni **peerp,
lnet_nid_t nid);
void kiblnd_destroy_peer(struct kib_peer_ni *peer);
LASSERT (net != NULL);
LASSERT (peer_ni->ibp_connecting > 0);
LASSERT (net != NULL);
LASSERT (peer_ni->ibp_connecting > 0);
- cmid = kiblnd_rdma_create_id(kiblnd_cm_callback, peer_ni, RDMA_PS_TCP,
- IB_QPT_RC);
+ cmid = kiblnd_rdma_create_id(peer_ni->ibp_ni->ni_net_ns,
+ kiblnd_cm_callback, peer_ni,
+ RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(cmid)) {
CERROR("Can't create CMID for %s: %ld\n",
if (IS_ERR(cmid)) {
CERROR("Can't create CMID for %s: %ld\n",
{
rwlock_t *glock = &kiblnd_data.kib_global_lock;
struct kib_dev *dev;
{
rwlock_t *glock = &kiblnd_data.kib_global_lock;
struct kib_dev *dev;
wait_queue_entry_t wait;
unsigned long flags;
int rc;
wait_queue_entry_t wait;
unsigned long flags;
int rc;
dev->ibd_failover = 1;
write_unlock_irqrestore(glock, flags);
dev->ibd_failover = 1;
write_unlock_irqrestore(glock, flags);
- rc = kiblnd_dev_failover(dev);
+ rc = kiblnd_dev_failover(dev, ns);
write_lock_irqsave(glock, flags);
write_lock_irqsave(glock, flags);
net_tunables->lct_peer_rtr_credits =
*ksocknal_tunables.ksnd_peerrtrcredits;
net_tunables->lct_peer_rtr_credits =
*ksocknal_tunables.ksnd_peerrtrcredits;
- rc = lnet_inet_enumerate(&ifaces);
+ rc = lnet_inet_enumerate(&ifaces, ni->ni_net_ns);
- rc = lnet_connect(&sock, peer_ni->ksnp_id.nid,
- route->ksnr_myipaddr,
- route->ksnr_ipaddr, route->ksnr_port);
- if (rc != 0)
- goto failed;
+ rc = lnet_connect(&sock, peer_ni->ksnp_id.nid,
+ route->ksnr_myipaddr,
+ route->ksnr_ipaddr, route->ksnr_port,
+ peer_ni->ksnp_ni->ni_net_ns);
+ if (rc != 0)
+ goto failed;
rc = ksocknal_create_conn(peer_ni->ksnp_ni, route, sock, type);
if (rc < 0) {
rc = ksocknal_create_conn(peer_ni->ksnp_ni, route, sock, type);
if (rc < 0) {
int pta_shutdown;
struct socket *pta_sock;
struct completion pta_signal;
int pta_shutdown;
struct socket *pta_sock;
struct completion pta_signal;
} lnet_acceptor_state = {
.pta_shutdown = 1
};
} lnet_acceptor_state = {
.pta_shutdown = 1
};
int
lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
int
lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
- __u32 local_ip, __u32 peer_ip, int peer_port)
+ __u32 local_ip, __u32 peer_ip, int peer_port, struct net *ns)
{
struct lnet_acceptor_connreq cr;
struct socket *sock;
{
struct lnet_acceptor_connreq cr;
struct socket *sock;
rc = lnet_sock_connect(&sock, &fatal,
local_ip, port,
rc = lnet_sock_connect(&sock, &fatal,
local_ip, port,
+ peer_ip, peer_port, ns);
if (rc != 0) {
if (fatal)
goto failed;
if (rc != 0) {
if (fatal)
goto failed;
cfs_block_allsigs();
rc = lnet_sock_listen(&lnet_acceptor_state.pta_sock,
cfs_block_allsigs();
rc = lnet_sock_listen(&lnet_acceptor_state.pta_sock,
- 0, accept_port, accept_backlog);
+ 0, accept_port, accept_backlog,
+ lnet_acceptor_state.pta_ns);
if (rc != 0) {
if (rc == -EADDRINUSE)
LCONSOLE_ERROR_MSG(0x122, "Can't start acceptor on port"
if (rc != 0) {
if (rc == -EADDRINUSE)
LCONSOLE_ERROR_MSG(0x122, "Can't start acceptor on port"
if (lnet_count_acceptor_nets() == 0) /* not required */
return 0;
if (lnet_count_acceptor_nets() == 0) /* not required */
return 0;
+ lnet_acceptor_state.pta_ns = current->nsproxy->net_ns;
task = kthread_run(lnet_acceptor, (void *)(uintptr_t)secure,
"acceptor_%03ld", secure);
if (IS_ERR(task)) {
task = kthread_run(lnet_acceptor, (void *)(uintptr_t)secure,
"acceptor_%03ld", secure);
if (IS_ERR(task)) {
-int lnet_inet_enumerate(struct lnet_inetdev **dev_list)
+int lnet_inet_enumerate(struct lnet_inetdev **dev_list, struct net *ns)
{
struct lnet_inetdev *ifaces = NULL;
struct net_device *dev;
{
struct lnet_inetdev *ifaces = NULL;
struct net_device *dev;
int nip = 0;
rtnl_lock();
int nip = 0;
rtnl_lock();
- for_each_netdev(&init_net, dev) {
+ for_each_netdev(ns, dev) {
int flags = dev_get_flags(dev);
struct in_device *in_dev;
int node_id;
int flags = dev_get_flags(dev);
struct in_device *in_dev;
int node_id;
- nip = lnet_inet_enumerate(&ifaces);
+ nip = lnet_inet_enumerate(&ifaces, current->nsproxy->net_ns);
if (nip < 0) {
if (nip != -ENOENT) {
LCONSOLE_ERROR_MSG(0x117,
if (nip < 0) {
if (nip != -ENOENT) {
LCONSOLE_ERROR_MSG(0x117,
static int
lnet_sock_create(struct socket **sockp, int *fatal,
static int
lnet_sock_create(struct socket **sockp, int *fatal,
- __u32 local_ip, int local_port)
+ __u32 local_ip, int local_port, struct net *ns)
{
struct sockaddr_in locaddr;
struct socket *sock;
{
struct sockaddr_in locaddr;
struct socket *sock;
*fatal = 1;
#ifdef HAVE_SOCK_CREATE_KERN_USE_NET
*fatal = 1;
#ifdef HAVE_SOCK_CREATE_KERN_USE_NET
- rc = sock_create_kern(&init_net, PF_INET, SOCK_STREAM, 0, &sock);
+ rc = sock_create_kern(ns, PF_INET, SOCK_STREAM, 0, &sock);
#else
rc = sock_create_kern(PF_INET, SOCK_STREAM, 0, &sock);
#endif
#else
rc = sock_create_kern(PF_INET, SOCK_STREAM, 0, &sock);
#endif
int
lnet_sock_listen(struct socket **sockp,
int
lnet_sock_listen(struct socket **sockp,
- __u32 local_ip, int local_port, int backlog)
+ __u32 local_ip, int local_port, int backlog, struct net *ns)
- rc = lnet_sock_create(sockp, &fatal, local_ip, local_port);
+ rc = lnet_sock_create(sockp, &fatal, local_ip, local_port, ns);
if (rc != 0) {
if (!fatal)
CERROR("Can't create socket: port %d already in use\n",
if (rc != 0) {
if (!fatal)
CERROR("Can't create socket: port %d already in use\n",
int
lnet_sock_connect(struct socket **sockp, int *fatal,
__u32 local_ip, int local_port,
int
lnet_sock_connect(struct socket **sockp, int *fatal,
__u32 local_ip, int local_port,
- __u32 peer_ip, int peer_port)
+ __u32 peer_ip, int peer_port,
+ struct net *ns)
{
struct sockaddr_in srvaddr;
int rc;
{
struct sockaddr_in srvaddr;
int rc;
- rc = lnet_sock_create(sockp, fatal, local_ip, local_port);
+ rc = lnet_sock_create(sockp, fatal, local_ip, local_port, ns);