Whamcloud - gitweb
LU-165: Support privileged ports in the o2iblnd driver.
authorChristopher J. Morrone <morrone2@llnl.gov>
Fri, 11 Feb 2011 04:39:28 +0000 (20:39 -0800)
committerOleg Drokin <green@whamcloud.com>
Tue, 7 Jun 2011 03:00:10 +0000 (20:00 -0700)
It is highly recommended that you have a version of the
IB verbs driver that provides the rdma_set_reuseaddr() function.
Otherwise you may run out of privileged ports.

In this version of the patch, one may separately decide whether
privileged ports will be required for passively accepted connections
(require_privileged_port) and whether privileged ports will be used
for activitely initiated connections (use_privileged_port).

Original patch by Ira Weiny.

Change-Id: Id3600094b08784be6e82b224cf510460f69d4dd6
Signed-off-by: Christopher J. Morrone <morrone2@llnl.gov>
Reviewed-on: http://review.whamcloud.com/366
Tested-by: Hudson
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/autoconf/ofed.m4
lnet/klnds/o2iblnd/o2iblnd.h
lnet/klnds/o2iblnd/o2iblnd_cb.c
lnet/klnds/o2iblnd/o2iblnd_modparams.c

index aefc27d..826dcac 100644 (file)
@@ -93,4 +93,23 @@ AC_DEFUN([LN_CONFIG_OFED_SPEC],
        ],[
                AC_MSG_RESULT(no)
        ])
        ],[
                AC_MSG_RESULT(no)
        ])
+
+       AC_MSG_CHECKING([if OFED has rdma_set_reuseaddr])
+       LB_LINUX_TRY_COMPILE([
+               #include <linux/version.h>
+               #include <linux/pci.h>
+               #if !HAVE_GFP_T
+               typedef int gfp_t;
+               #endif
+               #include <rdma/rdma_cm.h>
+       ],[
+               rdma_set_reuseaddr(NULL, 1);
+               return 0;
+       ],[
+               AC_MSG_RESULT(yes)
+               AC_DEFINE(HAVE_OFED_RDMA_SET_REUSEADDR, 1,
+                         [rdma_set_reuse defined])
+       ],[
+               AC_MSG_RESULT(no)
+       ])
 ])
 ])
index db98289..4538b11 100644 (file)
@@ -125,6 +125,8 @@ typedef struct
 #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
         cfs_sysctl_table_header_t *kib_sysctl;  /* sysctl interface */
 #endif
 #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
         cfs_sysctl_table_header_t *kib_sysctl;  /* sysctl interface */
 #endif
+        int              *kib_require_priv_port;/* accept only privileged ports */
+        int              *kib_use_priv_port;    /* use privileged port for active connect */
 } kib_tunables_t;
 
 extern kib_tunables_t  kiblnd_tunables;
 } kib_tunables_t;
 
 extern kib_tunables_t  kiblnd_tunables;
index 431aa90..8127bd2 100644 (file)
@@ -1207,6 +1207,48 @@ kiblnd_queue_tx (kib_tx_t *tx, kib_conn_t *conn)
         kiblnd_check_sends(conn);
 }
 
         kiblnd_check_sends(conn);
 }
 
+static int kiblnd_resolve_addr(struct rdma_cm_id *cmid,
+                               struct sockaddr_in *srcaddr,
+                               struct sockaddr_in *dstaddr,
+                               int timeout_ms)
+{
+        unsigned short port;
+        int rc;
+
+#ifdef HAVE_OFED_RDMA_SET_REUSEADDR
+        /* allow the port to be reused */
+        rc = rdma_set_reuseaddr(cmid, 1);
+        if (rc != 0) {
+                CERROR("Unable to set reuse on cmid: %d\n", rc);
+                return rc;
+        }
+#endif
+
+        /* look for a free privileged port */
+        for (port = PROT_SOCK-1; port > 0; port--) {
+                srcaddr->sin_port = htons(port);
+                rc = rdma_resolve_addr(cmid,
+                                       (struct sockaddr *)srcaddr,
+                                       (struct sockaddr *)dstaddr,
+                                       timeout_ms);
+                if (rc == 0) {
+                        CDEBUG(D_NET, "bound to port %hu\n", port);
+                        return 0;
+                } else if (rc == -EADDRINUSE || rc == -EADDRNOTAVAIL) {
+                        CDEBUG(D_NET, "bind to port %hu failed: %d\n",
+                               port, rc);
+                } else {
+                        return rc;
+                }
+        }
+
+        CERROR("Failed to bind to a free privileged port\n");
+#ifndef HAVE_OFED_RDMA_SET_REUSEADDR
+        CERROR("You may need IB verbs that supports rdma_set_reuseaddr()\n");
+#endif
+        return rc;
+}
+
 void
 kiblnd_connect_peer (kib_peer_t *peer)
 {
 void
 kiblnd_connect_peer (kib_peer_t *peer)
 {
@@ -1240,22 +1282,30 @@ kiblnd_connect_peer (kib_peer_t *peer)
 
         kiblnd_peer_addref(peer);               /* cmid's ref */
 
 
         kiblnd_peer_addref(peer);               /* cmid's ref */
 
-        rc = rdma_resolve_addr(cmid,
-                               (struct sockaddr *)&srcaddr,
-                               (struct sockaddr *)&dstaddr,
-                               *kiblnd_tunables.kib_timeout * 1000);
-        if (rc == 0) {
-                LASSERT (cmid->device != NULL);
-                CDEBUG(D_NET, "%s: connection bound to %s:%u.%u.%u.%u:%s\n",
-                       libcfs_nid2str(peer->ibp_nid), dev->ibd_ifname,
-                       HIPQUAD(dev->ibd_ifip), cmid->device->name);
-                return;
+        if (*kiblnd_tunables.kib_use_priv_port) {
+                rc = kiblnd_resolve_addr(cmid, &srcaddr, &dstaddr,
+                                         *kiblnd_tunables.kib_timeout * 1000);
+        } else {
+                rc = rdma_resolve_addr(cmid,
+                                       (struct sockaddr *)&srcaddr,
+                                       (struct sockaddr *)&dstaddr,
+                                       *kiblnd_tunables.kib_timeout * 1000);
+        }
+        if (rc != 0) {
+                /* Can't initiate address resolution:  */
+                CERROR("Can't resolve addr for %s: %d\n",
+                       libcfs_nid2str(peer->ibp_nid), rc);
+                goto failed2;
         }
 
         }
 
-        /* Can't initiate address resolution:  */
-        CERROR("Can't resolve addr for %s: %d\n",
-               libcfs_nid2str(peer->ibp_nid), rc);
+        LASSERT (cmid->device != NULL);
+        CDEBUG(D_NET, "%s: connection bound to %s:%u.%u.%u.%u:%s\n",
+               libcfs_nid2str(peer->ibp_nid), dev->ibd_ifname,
+               HIPQUAD(dev->ibd_ifip), cmid->device->name);
 
 
+        return;
+
+ failed2:
         kiblnd_peer_decref(peer);               /* cmid's ref */
         rdma_destroy_id(cmid);
  failed:
         kiblnd_peer_decref(peer);               /* cmid's ref */
         rdma_destroy_id(cmid);
  failed:
@@ -2126,7 +2176,7 @@ kiblnd_passive_connect (struct rdma_cm_id *cmid, void *priv, int priv_nob)
         int                    version = IBLND_MSG_VERSION;
         unsigned long          flags;
         int                    rc;
         int                    version = IBLND_MSG_VERSION;
         unsigned long          flags;
         int                    rc;
-
+        struct sockaddr_in    *peer_addr;
         LASSERT (!cfs_in_interrupt());
 
         /* cmid inherits 'context' from the corresponding listener id */
         LASSERT (!cfs_in_interrupt());
 
         /* cmid inherits 'context' from the corresponding listener id */
@@ -2138,6 +2188,15 @@ kiblnd_passive_connect (struct rdma_cm_id *cmid, void *priv, int priv_nob)
         rej.ibr_why                  = IBLND_REJECT_FATAL;
         rej.ibr_cp.ibcp_max_msg_size = IBLND_MSG_SIZE;
 
         rej.ibr_why                  = IBLND_REJECT_FATAL;
         rej.ibr_cp.ibcp_max_msg_size = IBLND_MSG_SIZE;
 
+        peer_addr = (struct sockaddr_in *)&(cmid->route.addr.dst_addr);
+        if (*kiblnd_tunables.kib_require_priv_port &&
+            ntohs(peer_addr->sin_port) >= PROT_SOCK) {
+                __u32 ip = ntohl(peer_addr->sin_addr.s_addr);
+                CERROR("Peer's port (%u.%u.%u.%u:%hu) is not privileged\n",
+                       HIPQUAD(ip), ntohs(peer_addr->sin_port));
+                goto failed;
+        }
+
         if (priv_nob < offsetof(kib_msg_t, ibm_type)) {
                 CERROR("Short connection request\n");
                 goto failed;
         if (priv_nob < offsetof(kib_msg_t, ibm_type)) {
                 CERROR("Short connection request\n");
                 goto failed;
index 3b95cd3..52ce830 100644 (file)
@@ -129,6 +129,15 @@ static int dev_failover = 0;
 CFS_MODULE_PARM(dev_failover, "i", int, 0444,
                "HCA failover for bonding (0 off, 1 on, other values reserved)");
 
 CFS_MODULE_PARM(dev_failover, "i", int, 0444,
                "HCA failover for bonding (0 off, 1 on, other values reserved)");
 
+
+static int require_privileged_port = 0;
+CFS_MODULE_PARM(require_privileged_port, "i", int, 0644,
+                "require privileged port when accepting connection");
+
+static int use_privileged_port = 1;
+CFS_MODULE_PARM(use_privileged_port, "i", int, 0644,
+                "use privileged port when initiating connection");
+
 kib_tunables_t kiblnd_tunables = {
         .kib_dev_failover           = &dev_failover,
         .kib_service                = &service,
 kib_tunables_t kiblnd_tunables = {
         .kib_dev_failover           = &dev_failover,
         .kib_service                = &service,
@@ -151,6 +160,8 @@ kib_tunables_t kiblnd_tunables = {
         .kib_fmr_flush_trigger      = &fmr_flush_trigger,
         .kib_fmr_cache              = &fmr_cache,
         .kib_pmr_pool_size          = &pmr_pool_size,
         .kib_fmr_flush_trigger      = &fmr_flush_trigger,
         .kib_fmr_cache              = &fmr_cache,
         .kib_pmr_pool_size          = &pmr_pool_size,
+        .kib_require_priv_port      = &require_privileged_port,
+        .kib_use_priv_port          = &use_privileged_port
 };
 
 #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
 };
 
 #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM