Whamcloud - gitweb
* socklnd:
authoreeb <eeb>
Sun, 23 Oct 2005 14:08:10 +0000 (14:08 +0000)
committereeb <eeb>
Sun, 23 Oct 2005 14:08:10 +0000 (14:08 +0000)
        fixed connection race that can occur with multiple routers

changed 'typed_conns' module parameter to RO

*   nidstrings:
        allow 0xnnnn parsing of numerical NIDs

libcfs_num_addr2str -> libcfs_decnum_addr2str (LO, QSW, PTL)
                               libcfs_hexnum_addr2str (GM)

*   gmlnd:
change from decimal to hex representation of GM addresses (they're the
lowest 4 bytes of the NIC's MAC address).

*   router:
        compare routers first on # uncompleted bytes, then credits
        for better load balance.

*   llmount:
        fixed error message (it's a NID, not a host)

        fixed bug in checking mdx & profile string lengths

lnet/klnds/iiblnd/iiblnd.c
lnet/klnds/socklnd/socklnd.c
lnet/klnds/socklnd/socklnd.h
lnet/klnds/socklnd/socklnd_cb.c
lnet/klnds/socklnd/socklnd_lib-linux.c
lnet/klnds/socklnd/socklnd_modparams.c
lnet/libcfs/nidstrings.c
lnet/lnet/lib-move.c
lnet/lnet/router.c
lnet/utils/gmlndnid.c

index c6dfb94..42077dd 100644 (file)
@@ -1785,7 +1785,8 @@ kibnal_startup (lnet_ni_t *ni)
                 }
         }
         
-        /* Find IP address from <ipif base name><number> */
+        /* Find IP address from <ipif base name><number+1> 
+         * (Infinicon IPoIB starts at ib1:) */
         snprintf(ipif_name, sizeof(ipif_name), "%s%d",
                  *kibnal_tunables.kib_ipif_basename, kibnal_data.kib_hca_idx + 1);
         if (strlen(ipif_name) == sizeof(ipif_name - 1)) {
index c77096a..cb81c78 100644 (file)
@@ -335,6 +335,7 @@ ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn)
         }
 
         route->ksnr_connected |= (1<<type);
+        route->ksnr_connecting &= ~(1<<type);
         route->ksnr_conn_count++;
 
         /* Successful connection => further attempts can
@@ -351,7 +352,7 @@ ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route)
         ksock_route_t     *route2;
 
         LASSERT (route->ksnr_peer == NULL);
-        LASSERT (!route->ksnr_connecting);
+        LASSERT (route->ksnr_connecting == 0);
         LASSERT (route->ksnr_connected == 0);
 
         /* LASSERT(unique) */
@@ -964,7 +965,9 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
         ksock_sched_t     *sched;
         unsigned int       irq;
         ksock_tx_t        *tx;
+        int                bits;
         int                rc;
+        char              *warn = NULL;
 
         LASSERT (route == NULL == (type == SOCKLND_CONN_NONE));
 
@@ -1024,8 +1027,17 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
         }
 
         rc = ksocknal_recv_hello (ni, conn, &peerid, &incarnation, ipaddrs);
-        if (rc < 0)
+        if (rc < 0) {
+                if (rc == -EALREADY) {
+                        CDEBUG(D_NET, "Lost connection race with %s\n", 
+                               libcfs_id2str(peerid));
+                        /* Not an actual failure: return +ve RC so active
+                         * connector can back off */
+                        rc = EALREADY;
+                }
                 goto failed_1;
+        }
+        
         nipaddrs = rc;
         LASSERT (peerid.nid != LNET_NID_ANY);
 
@@ -1037,6 +1049,7 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
                 ksocknal_create_routes(peer, conn->ksnc_port,
                                        ipaddrs, nipaddrs);
                 rc = 0;
+                write_lock_irqsave (global_lock, flags);
         } else {
                 rc = ksocknal_create_peer(&peer, ni, peerid);
                 if (rc != 0)
@@ -1050,33 +1063,65 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
                          * table (which takes my ref) */
                         list_add_tail(&peer->ksnp_list,
                                       ksocknal_nid2peerlist(peerid.nid));
-                } else  {
+                } else {
                         ksocknal_peer_decref(peer);
                         peer = peer2;
                 }
+
                 /* +1 ref for me */
                 ksocknal_peer_addref(peer);
 
+                /* Am I already connecting/connected to this guy?  Resolve in
+                 * favour of higher NID... */
+                rc = 0;
+                if (peerid.nid < ni->ni_nid) {
+                        bits = (1 << conn->ksnc_type);
+
+                        list_for_each(tmp, &peer->ksnp_routes) {
+                                route = list_entry(tmp, ksock_route_t, 
+                                                   ksnr_list);
+                        
+                                if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
+                                        continue;
+                                
+                                if ((route->ksnr_connecting & bits) == 0)
+                                        continue;
+
+                                rc = EALREADY;  /* not a failure */
+                                warn = "connection race";
+                                break;
+                        }
+                }
+                
                 write_unlock_irqrestore(global_lock, flags);
 
-                nipaddrs = ksocknal_select_ips(peer, ipaddrs, nipaddrs);
-                rc = ksocknal_send_hello (ni, conn, peerid.nid,
-                                          ipaddrs, nipaddrs);
-                if (rc < 0)
+                if (rc != 0) {
+                        /* set CONN_NONE makes returned HELLO acknowledge I
+                         * lost a connection race */
+                        conn->ksnc_type = SOCKLND_CONN_NONE;
+                        ksocknal_send_hello (ni, conn, peerid.nid,
+                                             ipaddrs, 0);
+                } else {
+                        nipaddrs = ksocknal_select_ips(peer, ipaddrs, nipaddrs);
+                        rc = ksocknal_send_hello (ni, conn, peerid.nid,
+                                                  ipaddrs, nipaddrs);
+                }
+                
+                write_lock_irqsave(global_lock, flags);
+                if (rc != 0)
                         goto failed_2;
         }
 
-        write_lock_irqsave (global_lock, flags);
-
         if (peer->ksnp_closing ||
             (route != NULL && route->ksnr_deleted)) {
                 /* route/peer got closed under me */
                 rc = -ESTALE;
-                goto failed_3;
+                warn = "peer/route removed";
+                goto failed_2;
         }
 
-        /* Refuse to duplicate an existing connection (both sides might
-         * connect at once), unless this is a loopback connection */
+        /* Refuse to duplicate an existing connection, unless this is a
+         * loopback connection */
         if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) {
                 list_for_each(tmp, &peer->ksnp_conns) {
                         conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
@@ -1087,11 +1132,9 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
                             conn2->ksnc_incarnation != incarnation)
                                 continue;
 
-                        CWARN("Not creating duplicate connection to "
-                              "%u.%u.%u.%u type %d\n",
-                              HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_type);
-                        rc = -EALREADY;
-                        goto failed_3;
+                        rc = 0;    /* more of a NOOP than a failure */
+                        warn = "duplicate";
+                        goto failed_2;
                 }
         }
 
@@ -1152,13 +1195,13 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
         }
 
         rc = ksocknal_close_stale_conns_locked(peer, incarnation);
+        write_unlock_irqrestore (global_lock, flags);
+
         if (rc != 0)
                 CERROR ("Closed %d stale conns to %s ip %d.%d.%d.%d\n",
                         rc, libcfs_id2str(conn->ksnc_peer->ksnp_id),
                         HIPQUAD(conn->ksnc_ipaddr));
 
-        write_unlock_irqrestore (global_lock, flags);
-
         ksocknal_lib_bind_irq (irq);
 
         /* Call the callbacks right now to get things going. */
@@ -1176,14 +1219,22 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
         ksocknal_conn_decref(conn);
         return (0);
 
- failed_3:
+ failed_2:
         if (!peer->ksnp_closing &&
             list_empty (&peer->ksnp_conns) &&
             list_empty (&peer->ksnp_routes))
                 ksocknal_unlink_peer_locked(peer);
         write_unlock_irqrestore(global_lock, flags);
 
- failed_2:
+        if (warn != NULL) {
+                if (rc < 0)
+                        CERROR("Not creating conn %s type %d: %s\n",
+                               libcfs_id2str(peerid), conn->ksnc_type, warn);
+                else
+                        CDEBUG(D_NET, "Not creating conn %s type %d: %s\n",
+                              libcfs_id2str(peerid), conn->ksnc_type, warn);
+        }
+        
         ksocknal_peer_decref(peer);
 
  failed_1:
@@ -1191,9 +1242,7 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
 
  failed_0:
         libcfs_sock_release(sock);
-
-        LASSERT (rc != 0);
-        return (rc);
+        return rc;
 }
 
 void
index aaf89ce..738e6d6 100644 (file)
@@ -313,7 +313,7 @@ typedef struct ksock_route
         __u32               ksnr_myipaddr;      /* my IP */
         __u32               ksnr_ipaddr;        /* IP address to connect to */
         int                 ksnr_port;          /* port to connect to */
-        unsigned int        ksnr_connecting:1;  /* autoconnect in progress */
+        unsigned int        ksnr_connecting:4;  /* autoconnect in progress by type */
         unsigned int        ksnr_connected:4;   /* connections established by type */
         unsigned int        ksnr_deleted:1;     /* been removed from peer? */
         unsigned int        ksnr_share_count;   /* created explicitly? */
index 46b6c0a..13ef288 100644 (file)
@@ -481,11 +481,18 @@ void
 ksocknal_launch_connection_locked (ksock_route_t *route)
 {
         unsigned long     flags;
+        int               bits;
 
         /* called holding write lock on ksnd_global_lock */
-        LASSERT (!route->ksnr_connecting);
+        LASSERT (route->ksnr_connecting == 0);
+
+        bits = *ksocknal_tunables.ksnd_typed_conns ?
+               KSNR_TYPED_ROUTES : (1 << SOCKLND_CONN_ANY);
+        bits &= ~route->ksnr_connected;
+        
+        LASSERT (bits != 0);
         
-        route->ksnr_connecting = 1;             /* scheduling conn for connd */
+        route->ksnr_connecting = bits;          /* scheduling conn for connd */
         ksocknal_route_addref(route);           /* extra ref for connd */
         
         spin_lock_irqsave (&ksocknal_data.ksnd_connd_lock, flags);
@@ -623,21 +630,17 @@ ksocknal_find_connectable_route_locked (ksock_peer_t *peer)
         
         list_for_each (tmp, &peer->ksnp_routes) {
                 route = list_entry (tmp, ksock_route_t, ksnr_list);
-                bits  = route->ksnr_connected;
+                bits  = route->ksnr_connected | route->ksnr_connecting;
 
                 if (*ksocknal_tunables.ksnd_typed_conns) {
-                        /* All typed connections established? */
+                        /* All typed connections (being) established? */
                         if ((bits & KSNR_TYPED_ROUTES) == KSNR_TYPED_ROUTES)
                                 continue;
                 } else {
-                        /* Untyped connection established? */
+                        /* Untyped connection (being) established? */
                         if ((bits & (1 << SOCKLND_CONN_ANY)) != 0)
                                 continue;
                 }
-                
-                /* connection being established? */
-                if (route->ksnr_connecting)
-                        continue;
 
                 /* too soon to retry this guy? */
                 if (!(route->ksnr_retry_interval == 0 || /* first attempt */
@@ -660,7 +663,7 @@ ksocknal_find_connecting_route_locked (ksock_peer_t *peer)
         list_for_each (tmp, &peer->ksnp_routes) {
                 route = list_entry (tmp, ksock_route_t, ksnr_list);
                 
-                if (route->ksnr_connecting)
+                if (route->ksnr_connecting != 0)
                         return (route);
         }
         
@@ -1348,12 +1351,10 @@ ksocknal_send_hello (lnet_ni_t *ni, ksock_conn_t *conn, lnet_nid_t peer_nid,
         int                  rc;
         lnet_nid_t           srcnid;
 
-        LASSERT (conn->ksnc_type != SOCKLND_CONN_NONE);
         LASSERT (0 <= nipaddrs && nipaddrs <= LNET_MAX_INTERFACES);
 
         /* No need for getconnsock/putconnsock */
         LASSERT (!conn->ksnc_closing);
-
         LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
         hmv->magic         = cpu_to_le32 (LNET_PROTO_TCP_MAGIC);
         hmv->version_major = cpu_to_le16 (LNET_PROTO_TCP_VERSION_MAJOR);
@@ -1437,6 +1438,7 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn,
         if (rc != 0) {
                 CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
                         rc, HIPQUAD(conn->ksnc_ipaddr));
+                LASSERT (rc < 0 && rc != -EALREADY);
                 return (rc);
         }
 
@@ -1454,6 +1456,7 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn,
                 if (rc != 0) {
                         CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
                                 rc, HIPQUAD(conn->ksnc_ipaddr));
+                        LASSERT (rc < 0 && rc != -EALREADY);
                         return (rc);
                 }
         }
@@ -1470,6 +1473,7 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn,
         if (rc != 0) {
                 CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
                         rc, HIPQUAD(conn->ksnc_ipaddr));
+                LASSERT (rc < 0 && rc != -EALREADY);
                 return (rc);
         }
         
@@ -1497,6 +1501,7 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn,
         if (rc != 0) {
                 CERROR ("Error %d reading rest of HELLO hdr from %u.%u.%u.%u\n",
                         rc, HIPQUAD(conn->ksnc_ipaddr));
+                LASSERT (rc < 0 && rc != -EALREADY);
                 return (rc);
         }
 
@@ -1561,6 +1566,9 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn,
                                 HIPQUAD(conn->ksnc_ipaddr));
                         return (-EPROTO);
                 }
+        } else if (type == SOCKLND_CONN_NONE) {
+                /* lost a connection race */
+                return -EALREADY;
         } else if (ksocknal_invert_type(type) != conn->ksnc_type) {
                 CERROR ("Mismatched types: me %d, %s ip %u.%u.%u.%u %d\n",
                         conn->ksnc_type, libcfs_id2str(*peerid), 
@@ -1613,8 +1621,12 @@ ksocknal_connect (ksock_route_t *route)
         unsigned long     flags;
         int               type;
         struct socket    *sock;
+        cfs_time_t        deadline;
         int               rc = 0;
 
+        deadline = cfs_time_add(cfs_time_current(), 
+                                cfs_time_seconds(*ksocknal_tunables.ksnd_timeout));
+
         write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
 
         for (;;) {
@@ -1636,32 +1648,43 @@ ksocknal_connect (ksock_route_t *route)
 
                 write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags);
 
+                if (cfs_time_aftereq(cfs_time_current(), deadline)) {
+                        lnet_connect_console_error(-ETIMEDOUT, peer->ksnp_id.nid,
+                                                   route->ksnr_ipaddr,
+                                                   route->ksnr_port);
+                        goto failed;
+                }
+                
                 rc = lnet_connect(&sock, peer->ksnp_id.nid,
-                                 route->ksnr_myipaddr, 
-                                 route->ksnr_ipaddr, route->ksnr_port);
+                                  route->ksnr_myipaddr, 
+                                  route->ksnr_ipaddr, route->ksnr_port);
                 if (rc != 0)
                         goto failed;
 
                 rc = ksocknal_create_conn(peer->ksnp_ni, route, sock, type);
-                if (rc != 0) {
+                if (rc < 0) {
                         lnet_connect_console_error(rc, peer->ksnp_id.nid,
-                                                  route->ksnr_ipaddr, 
-                                                  route->ksnr_port);
+                                                   route->ksnr_ipaddr, 
+                                                   route->ksnr_port);
                         goto failed;
                 }
+
+                if (rc != 0) {
+                        /* lost connection race; peer is connecting to me, so
+                         * give her some time... */
+                        cfs_pause(cfs_time_seconds(1));
+                }
                 
                 write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
         }
 
-        LASSERT (route->ksnr_connecting);
-        route->ksnr_connecting = 0;
+        LASSERT (route->ksnr_connecting == 0);
         write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
         return;
 
  failed:
         write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
 
-        LASSERT (route->ksnr_connecting);
         route->ksnr_connecting = 0;
 
         /* This is a retry rather than a new connection */
index 963aa9f..a3122f6 100644 (file)
@@ -42,7 +42,7 @@ ksocknal_lib_tunables_init ()
 #endif
         ksocknal_ctl_table[i++] = (ctl_table)
                {j++, "typed", ksocknal_tunables.ksnd_typed_conns, 
-                sizeof (int), 0644, NULL, &proc_dointvec};
+                sizeof (int), 0444, NULL, &proc_dointvec};
         ksocknal_ctl_table[i++] = (ctl_table)
                {j++, "min_bulk", ksocknal_tunables.ksnd_min_bulk, 
                 sizeof (int), 0644, NULL, &proc_dointvec};
index f413a42..ebac4c1 100644 (file)
@@ -49,7 +49,7 @@ CFS_MODULE_PARM(eager_ack, "i", int, 0644,
                 "send tcp ack packets eagerly");
 
 static int typed_conns = SOCKNAL_TYPED_CONNS;
-CFS_MODULE_PARM(typed_conns, "i", int, 0644,
+CFS_MODULE_PARM(typed_conns, "i", int, 0444,
                 "use different sockets for bulk");
 
 static int min_bulk = SOCKNAL_MIN_BULK;
index 955a70e..612d306 100644 (file)
@@ -87,7 +87,8 @@ libcfs_next_nidstring (void)
 static int  libcfs_lo_str2addr(char *str, int nob, __u32 *addr);
 static void libcfs_ip_addr2str(__u32 addr, char *str);
 static int  libcfs_ip_str2addr(char *str, int nob, __u32 *addr);
-static void libcfs_num_addr2str(__u32 addr, char *str);
+static void libcfs_decnum_addr2str(__u32 addr, char *str);
+static void libcfs_hexnum_addr2str(__u32 addr, char *str);
 static int  libcfs_num_str2addr(char *str, int nob, __u32 *addr);
 
 struct netstrfns {
@@ -102,7 +103,7 @@ static struct netstrfns  libcfs_netstrfns[] = {
         {.nf_type     = LOLND,
          .nf_name     = "lo",
          .nf_modname  = "klolnd",
-         .nf_addr2str = libcfs_num_addr2str,
+         .nf_addr2str = libcfs_decnum_addr2str,
          .nf_str2addr = libcfs_lo_str2addr},
         {.nf_type     = SOCKLND,
          .nf_name     = "tcp",
@@ -132,17 +133,17 @@ static struct netstrfns  libcfs_netstrfns[] = {
         {.nf_type     = QSWLND,
          .nf_name     = "elan",
          .nf_modname  = "kqswlnd",
-         .nf_addr2str = libcfs_num_addr2str,
+         .nf_addr2str = libcfs_decnum_addr2str,
          .nf_str2addr = libcfs_num_str2addr},
         {.nf_type     = GMLND,
          .nf_name     = "gm",
          .nf_modname  = "kgmlnd",
-         .nf_addr2str = libcfs_num_addr2str,
+         .nf_addr2str = libcfs_hexnum_addr2str,
          .nf_str2addr = libcfs_num_str2addr},
         {.nf_type     = PTLLND,
          .nf_name     = "ptl",
          .nf_modname  = "kptllnd",
-         .nf_addr2str = libcfs_num_addr2str,
+         .nf_addr2str = libcfs_decnum_addr2str,
          .nf_str2addr = libcfs_num_str2addr},
         /* placeholder for net0 alias.  It MUST BE THE LAST ENTRY */
         {.nf_type     = -1},
@@ -229,23 +230,35 @@ libcfs_ip_str2addr(char *str, int nob, __u32 *addr)
 }
 
 void
-libcfs_num_addr2str(__u32 addr, char *str)
+libcfs_decnum_addr2str(__u32 addr, char *str)
 {
         snprintf(str, LNET_NIDSTR_SIZE, "%u", addr);
 }
 
+void
+libcfs_hexnum_addr2str(__u32 addr, char *str)
+{
+        snprintf(str, LNET_NIDSTR_SIZE, "0x%x", addr);
+}
+
 int
 libcfs_num_str2addr(char *str, int nob, __u32 *addr)
 {
-        __u32   a;
-        int     n = nob;
+        int     n;
+
+        n = nob;
+        if (sscanf(str, "0x%x%n", addr, &n) >= 1 && n == nob)
+                return 1;
 
-        if (sscanf(str, "%u%n", &a, &n) < 1 ||
-            n != nob)
-                return 0;
+        n = nob;
+        if (sscanf(str, "0X%x%n", addr, &n) >= 1 && n == nob)
+                return 1;
 
-        *addr = a;
-        return 1;
+        n = nob;
+        if (sscanf(str, "%u%n", addr, &n) >= 1 && n == nob)
+                return 1;
+        
+        return 0;
 }
 
 struct netstrfns *
index 1bd9c4e..a418824 100644 (file)
@@ -736,25 +736,16 @@ lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
 int
 lnet_compare_routers(lnet_peer_t *p1, lnet_peer_t *p2)
 {
-        /* FIRST compare available send credits 
-         * (sends block immediately when peer credits are <= 0)
-         * THEN compare queue depth */
-        if (p1->lp_txcredits > 0) {
-                
-                if (p1->lp_txcredits > p2->lp_txcredits)
-                        return 1;
-                
-                if (p1->lp_txcredits < p2->lp_txcredits)
-                        return -1;
-
-        } else if (p2->lp_txcredits > 0) {
-                return -1;
-        }
-
+        if (p1->lp_txqnob < p2->lp_txqnob)
+                return 1;
+        
         if (p1->lp_txqnob > p2->lp_txqnob)
+                return -1;
+        
+        if (p1->lp_txcredits > p2->lp_txcredits)
                 return 1;
         
-        if (p1->lp_txqnob < p2->lp_txqnob)
+        if (p1->lp_txcredits < p2->lp_txcredits)
                 return -1;
         
         return 0;
index 01b9de2..0acb9e3 100644 (file)
@@ -602,6 +602,7 @@ lnet_alloc_rtrpools(int im_a_router)
         
         if (!strcmp(forwarding, "")) {
                 /* not set either way */
+                forwarding = im_a_router ? "enabled(implicit)" : "disabled(default)";
                 if (!im_a_router)
                         return 0;
         } else if (!strcmp(forwarding, "disabled")) {
index 8536792..f158745 100644 (file)
@@ -153,6 +153,6 @@ int main(int argc, char **argv)
         }
 
         nid = u_getgmnid(name, get_local_id);
-        printf("%u\n", nid);
+        printf("0x%x\n", nid);
         exit(0);
 }