From daee9d5d61fb14cf998a3444ed33152cfc27fab5 Mon Sep 17 00:00:00 2001 From: "Christopher J. Morrone" Date: Thu, 10 Feb 2011 20:39:28 -0800 Subject: [PATCH] LU-165: Support privileged ports in the o2iblnd driver. It is highly recommended that you have a version of the IB verbs driver that provides the rdma_set_reuseaddr() function. Otherwise you may run out of privileged ports. In this version of the patch, one may separately decide whether privileged ports will be required for passively accepted connections (require_privileged_port) and whether privileged ports will be used for activitely initiated connections (use_privileged_port). Original patch by Ira Weiny. Change-Id: Id3600094b08784be6e82b224cf510460f69d4dd6 Signed-off-by: Christopher J. Morrone Reviewed-on: http://review.whamcloud.com/366 Tested-by: Hudson Reviewed-by: Oleg Drokin --- lnet/autoconf/ofed.m4 | 19 ++++++++ lnet/klnds/o2iblnd/o2iblnd.h | 2 + lnet/klnds/o2iblnd/o2iblnd_cb.c | 87 ++++++++++++++++++++++++++++------ lnet/klnds/o2iblnd/o2iblnd_modparams.c | 11 +++++ 4 files changed, 105 insertions(+), 14 deletions(-) diff --git a/lnet/autoconf/ofed.m4 b/lnet/autoconf/ofed.m4 index aefc27d..826dcac 100644 --- a/lnet/autoconf/ofed.m4 +++ b/lnet/autoconf/ofed.m4 @@ -93,4 +93,23 @@ AC_DEFUN([LN_CONFIG_OFED_SPEC], ],[ AC_MSG_RESULT(no) ]) + + AC_MSG_CHECKING([if OFED has rdma_set_reuseaddr]) + LB_LINUX_TRY_COMPILE([ + #include + #include + #if !HAVE_GFP_T + typedef int gfp_t; + #endif + #include + ],[ + rdma_set_reuseaddr(NULL, 1); + return 0; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_OFED_RDMA_SET_REUSEADDR, 1, + [rdma_set_reuse defined]) + ],[ + AC_MSG_RESULT(no) + ]) ]) diff --git a/lnet/klnds/o2iblnd/o2iblnd.h b/lnet/klnds/o2iblnd/o2iblnd.h index db982898..4538b11 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.h +++ b/lnet/klnds/o2iblnd/o2iblnd.h @@ -125,6 +125,8 @@ typedef struct #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM cfs_sysctl_table_header_t *kib_sysctl; /* sysctl interface */ #endif + int *kib_require_priv_port;/* accept only privileged ports */ + int *kib_use_priv_port; /* use privileged port for active connect */ } kib_tunables_t; extern kib_tunables_t kiblnd_tunables; diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index 431aa90..8127bd2 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -1207,6 +1207,48 @@ kiblnd_queue_tx (kib_tx_t *tx, kib_conn_t *conn) kiblnd_check_sends(conn); } +static int kiblnd_resolve_addr(struct rdma_cm_id *cmid, + struct sockaddr_in *srcaddr, + struct sockaddr_in *dstaddr, + int timeout_ms) +{ + unsigned short port; + int rc; + +#ifdef HAVE_OFED_RDMA_SET_REUSEADDR + /* allow the port to be reused */ + rc = rdma_set_reuseaddr(cmid, 1); + if (rc != 0) { + CERROR("Unable to set reuse on cmid: %d\n", rc); + return rc; + } +#endif + + /* look for a free privileged port */ + for (port = PROT_SOCK-1; port > 0; port--) { + srcaddr->sin_port = htons(port); + rc = rdma_resolve_addr(cmid, + (struct sockaddr *)srcaddr, + (struct sockaddr *)dstaddr, + timeout_ms); + if (rc == 0) { + CDEBUG(D_NET, "bound to port %hu\n", port); + return 0; + } else if (rc == -EADDRINUSE || rc == -EADDRNOTAVAIL) { + CDEBUG(D_NET, "bind to port %hu failed: %d\n", + port, rc); + } else { + return rc; + } + } + + CERROR("Failed to bind to a free privileged port\n"); +#ifndef HAVE_OFED_RDMA_SET_REUSEADDR + CERROR("You may need IB verbs that supports rdma_set_reuseaddr()\n"); +#endif + return rc; +} + void kiblnd_connect_peer (kib_peer_t *peer) { @@ -1240,22 +1282,30 @@ kiblnd_connect_peer (kib_peer_t *peer) kiblnd_peer_addref(peer); /* cmid's ref */ - rc = rdma_resolve_addr(cmid, - (struct sockaddr *)&srcaddr, - (struct sockaddr *)&dstaddr, - *kiblnd_tunables.kib_timeout * 1000); - if (rc == 0) { - LASSERT (cmid->device != NULL); - CDEBUG(D_NET, "%s: connection bound to %s:%u.%u.%u.%u:%s\n", - libcfs_nid2str(peer->ibp_nid), dev->ibd_ifname, - HIPQUAD(dev->ibd_ifip), cmid->device->name); - return; + if (*kiblnd_tunables.kib_use_priv_port) { + rc = kiblnd_resolve_addr(cmid, &srcaddr, &dstaddr, + *kiblnd_tunables.kib_timeout * 1000); + } else { + rc = rdma_resolve_addr(cmid, + (struct sockaddr *)&srcaddr, + (struct sockaddr *)&dstaddr, + *kiblnd_tunables.kib_timeout * 1000); + } + if (rc != 0) { + /* Can't initiate address resolution: */ + CERROR("Can't resolve addr for %s: %d\n", + libcfs_nid2str(peer->ibp_nid), rc); + goto failed2; } - /* Can't initiate address resolution: */ - CERROR("Can't resolve addr for %s: %d\n", - libcfs_nid2str(peer->ibp_nid), rc); + LASSERT (cmid->device != NULL); + CDEBUG(D_NET, "%s: connection bound to %s:%u.%u.%u.%u:%s\n", + libcfs_nid2str(peer->ibp_nid), dev->ibd_ifname, + HIPQUAD(dev->ibd_ifip), cmid->device->name); + return; + + failed2: kiblnd_peer_decref(peer); /* cmid's ref */ rdma_destroy_id(cmid); failed: @@ -2126,7 +2176,7 @@ kiblnd_passive_connect (struct rdma_cm_id *cmid, void *priv, int priv_nob) int version = IBLND_MSG_VERSION; unsigned long flags; int rc; - + struct sockaddr_in *peer_addr; LASSERT (!cfs_in_interrupt()); /* cmid inherits 'context' from the corresponding listener id */ @@ -2138,6 +2188,15 @@ kiblnd_passive_connect (struct rdma_cm_id *cmid, void *priv, int priv_nob) rej.ibr_why = IBLND_REJECT_FATAL; rej.ibr_cp.ibcp_max_msg_size = IBLND_MSG_SIZE; + peer_addr = (struct sockaddr_in *)&(cmid->route.addr.dst_addr); + if (*kiblnd_tunables.kib_require_priv_port && + ntohs(peer_addr->sin_port) >= PROT_SOCK) { + __u32 ip = ntohl(peer_addr->sin_addr.s_addr); + CERROR("Peer's port (%u.%u.%u.%u:%hu) is not privileged\n", + HIPQUAD(ip), ntohs(peer_addr->sin_port)); + goto failed; + } + if (priv_nob < offsetof(kib_msg_t, ibm_type)) { CERROR("Short connection request\n"); goto failed; diff --git a/lnet/klnds/o2iblnd/o2iblnd_modparams.c b/lnet/klnds/o2iblnd/o2iblnd_modparams.c index 3b95cd3..52ce830 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_modparams.c +++ b/lnet/klnds/o2iblnd/o2iblnd_modparams.c @@ -129,6 +129,15 @@ static int dev_failover = 0; CFS_MODULE_PARM(dev_failover, "i", int, 0444, "HCA failover for bonding (0 off, 1 on, other values reserved)"); + +static int require_privileged_port = 0; +CFS_MODULE_PARM(require_privileged_port, "i", int, 0644, + "require privileged port when accepting connection"); + +static int use_privileged_port = 1; +CFS_MODULE_PARM(use_privileged_port, "i", int, 0644, + "use privileged port when initiating connection"); + kib_tunables_t kiblnd_tunables = { .kib_dev_failover = &dev_failover, .kib_service = &service, @@ -151,6 +160,8 @@ kib_tunables_t kiblnd_tunables = { .kib_fmr_flush_trigger = &fmr_flush_trigger, .kib_fmr_cache = &fmr_cache, .kib_pmr_pool_size = &pmr_pool_size, + .kib_require_priv_port = &require_privileged_port, + .kib_use_priv_port = &use_privileged_port }; #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM -- 1.8.3.1