From 8fa26e6b8d26075d38522f545c837d725ead6e23 Mon Sep 17 00:00:00 2001 From: eeb Date: Sun, 23 Oct 2005 14:08:10 +0000 Subject: [PATCH] * socklnd: fixed connection race that can occur with multiple routers changed 'typed_conns' module parameter to RO * nidstrings: allow 0xnnnn parsing of numerical NIDs libcfs_num_addr2str -> libcfs_decnum_addr2str (LO, QSW, PTL) libcfs_hexnum_addr2str (GM) * gmlnd: change from decimal to hex representation of GM addresses (they're the lowest 4 bytes of the NIC's MAC address). * router: compare routers first on # uncompleted bytes, then credits for better load balance. * llmount: fixed error message (it's a NID, not a host) fixed bug in checking mdx & profile string lengths --- lnet/klnds/iiblnd/iiblnd.c | 3 +- lnet/klnds/socklnd/socklnd.c | 97 +++++++++++++++++++++++++--------- lnet/klnds/socklnd/socklnd.h | 2 +- lnet/klnds/socklnd/socklnd_cb.c | 63 +++++++++++++++------- lnet/klnds/socklnd/socklnd_lib-linux.c | 2 +- lnet/klnds/socklnd/socklnd_modparams.c | 2 +- lnet/libcfs/nidstrings.c | 39 +++++++++----- lnet/lnet/lib-move.c | 23 +++----- lnet/lnet/router.c | 1 + lnet/utils/gmlndnid.c | 2 +- 10 files changed, 156 insertions(+), 78 deletions(-) diff --git a/lnet/klnds/iiblnd/iiblnd.c b/lnet/klnds/iiblnd/iiblnd.c index c6dfb94..42077dd 100644 --- a/lnet/klnds/iiblnd/iiblnd.c +++ b/lnet/klnds/iiblnd/iiblnd.c @@ -1785,7 +1785,8 @@ kibnal_startup (lnet_ni_t *ni) } } - /* Find IP address from */ + /* Find IP address from + * (Infinicon IPoIB starts at ib1:) */ snprintf(ipif_name, sizeof(ipif_name), "%s%d", *kibnal_tunables.kib_ipif_basename, kibnal_data.kib_hca_idx + 1); if (strlen(ipif_name) == sizeof(ipif_name - 1)) { diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index c77096a..cb81c78 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -335,6 +335,7 @@ ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn) } route->ksnr_connected |= (1<ksnr_connecting &= ~(1<ksnr_conn_count++; /* Successful connection => further attempts can @@ -351,7 +352,7 @@ ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route) ksock_route_t *route2; LASSERT (route->ksnr_peer == NULL); - LASSERT (!route->ksnr_connecting); + LASSERT (route->ksnr_connecting == 0); LASSERT (route->ksnr_connected == 0); /* LASSERT(unique) */ @@ -964,7 +965,9 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, ksock_sched_t *sched; unsigned int irq; ksock_tx_t *tx; + int bits; int rc; + char *warn = NULL; LASSERT (route == NULL == (type == SOCKLND_CONN_NONE)); @@ -1024,8 +1027,17 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, } rc = ksocknal_recv_hello (ni, conn, &peerid, &incarnation, ipaddrs); - if (rc < 0) + if (rc < 0) { + if (rc == -EALREADY) { + CDEBUG(D_NET, "Lost connection race with %s\n", + libcfs_id2str(peerid)); + /* Not an actual failure: return +ve RC so active + * connector can back off */ + rc = EALREADY; + } goto failed_1; + } + nipaddrs = rc; LASSERT (peerid.nid != LNET_NID_ANY); @@ -1037,6 +1049,7 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, ksocknal_create_routes(peer, conn->ksnc_port, ipaddrs, nipaddrs); rc = 0; + write_lock_irqsave (global_lock, flags); } else { rc = ksocknal_create_peer(&peer, ni, peerid); if (rc != 0) @@ -1050,33 +1063,65 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, * table (which takes my ref) */ list_add_tail(&peer->ksnp_list, ksocknal_nid2peerlist(peerid.nid)); - } else { + } else { ksocknal_peer_decref(peer); peer = peer2; } + /* +1 ref for me */ ksocknal_peer_addref(peer); + /* Am I already connecting/connected to this guy? Resolve in + * favour of higher NID... */ + rc = 0; + if (peerid.nid < ni->ni_nid) { + bits = (1 << conn->ksnc_type); + + list_for_each(tmp, &peer->ksnp_routes) { + route = list_entry(tmp, ksock_route_t, + ksnr_list); + + if (route->ksnr_ipaddr != conn->ksnc_ipaddr) + continue; + + if ((route->ksnr_connecting & bits) == 0) + continue; + + rc = EALREADY; /* not a failure */ + warn = "connection race"; + break; + } + } + write_unlock_irqrestore(global_lock, flags); - nipaddrs = ksocknal_select_ips(peer, ipaddrs, nipaddrs); - rc = ksocknal_send_hello (ni, conn, peerid.nid, - ipaddrs, nipaddrs); - if (rc < 0) + if (rc != 0) { + /* set CONN_NONE makes returned HELLO acknowledge I + * lost a connection race */ + conn->ksnc_type = SOCKLND_CONN_NONE; + ksocknal_send_hello (ni, conn, peerid.nid, + ipaddrs, 0); + } else { + nipaddrs = ksocknal_select_ips(peer, ipaddrs, nipaddrs); + rc = ksocknal_send_hello (ni, conn, peerid.nid, + ipaddrs, nipaddrs); + } + + write_lock_irqsave(global_lock, flags); + if (rc != 0) goto failed_2; } - write_lock_irqsave (global_lock, flags); - if (peer->ksnp_closing || (route != NULL && route->ksnr_deleted)) { /* route/peer got closed under me */ rc = -ESTALE; - goto failed_3; + warn = "peer/route removed"; + goto failed_2; } - /* Refuse to duplicate an existing connection (both sides might - * connect at once), unless this is a loopback connection */ + /* Refuse to duplicate an existing connection, unless this is a + * loopback connection */ if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) { list_for_each(tmp, &peer->ksnp_conns) { conn2 = list_entry(tmp, ksock_conn_t, ksnc_list); @@ -1087,11 +1132,9 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, conn2->ksnc_incarnation != incarnation) continue; - CWARN("Not creating duplicate connection to " - "%u.%u.%u.%u type %d\n", - HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_type); - rc = -EALREADY; - goto failed_3; + rc = 0; /* more of a NOOP than a failure */ + warn = "duplicate"; + goto failed_2; } } @@ -1152,13 +1195,13 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, } rc = ksocknal_close_stale_conns_locked(peer, incarnation); + write_unlock_irqrestore (global_lock, flags); + if (rc != 0) CERROR ("Closed %d stale conns to %s ip %d.%d.%d.%d\n", rc, libcfs_id2str(conn->ksnc_peer->ksnp_id), HIPQUAD(conn->ksnc_ipaddr)); - write_unlock_irqrestore (global_lock, flags); - ksocknal_lib_bind_irq (irq); /* Call the callbacks right now to get things going. */ @@ -1176,14 +1219,22 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, ksocknal_conn_decref(conn); return (0); - failed_3: + failed_2: if (!peer->ksnp_closing && list_empty (&peer->ksnp_conns) && list_empty (&peer->ksnp_routes)) ksocknal_unlink_peer_locked(peer); write_unlock_irqrestore(global_lock, flags); - failed_2: + if (warn != NULL) { + if (rc < 0) + CERROR("Not creating conn %s type %d: %s\n", + libcfs_id2str(peerid), conn->ksnc_type, warn); + else + CDEBUG(D_NET, "Not creating conn %s type %d: %s\n", + libcfs_id2str(peerid), conn->ksnc_type, warn); + } + ksocknal_peer_decref(peer); failed_1: @@ -1191,9 +1242,7 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, failed_0: libcfs_sock_release(sock); - - LASSERT (rc != 0); - return (rc); + return rc; } void diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index aaf89ce..738e6d6 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -313,7 +313,7 @@ typedef struct ksock_route __u32 ksnr_myipaddr; /* my IP */ __u32 ksnr_ipaddr; /* IP address to connect to */ int ksnr_port; /* port to connect to */ - unsigned int ksnr_connecting:1; /* autoconnect in progress */ + unsigned int ksnr_connecting:4; /* autoconnect in progress by type */ unsigned int ksnr_connected:4; /* connections established by type */ unsigned int ksnr_deleted:1; /* been removed from peer? */ unsigned int ksnr_share_count; /* created explicitly? */ diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index 46b6c0a..13ef288 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -481,11 +481,18 @@ void ksocknal_launch_connection_locked (ksock_route_t *route) { unsigned long flags; + int bits; /* called holding write lock on ksnd_global_lock */ - LASSERT (!route->ksnr_connecting); + LASSERT (route->ksnr_connecting == 0); + + bits = *ksocknal_tunables.ksnd_typed_conns ? + KSNR_TYPED_ROUTES : (1 << SOCKLND_CONN_ANY); + bits &= ~route->ksnr_connected; + + LASSERT (bits != 0); - route->ksnr_connecting = 1; /* scheduling conn for connd */ + route->ksnr_connecting = bits; /* scheduling conn for connd */ ksocknal_route_addref(route); /* extra ref for connd */ spin_lock_irqsave (&ksocknal_data.ksnd_connd_lock, flags); @@ -623,21 +630,17 @@ ksocknal_find_connectable_route_locked (ksock_peer_t *peer) list_for_each (tmp, &peer->ksnp_routes) { route = list_entry (tmp, ksock_route_t, ksnr_list); - bits = route->ksnr_connected; + bits = route->ksnr_connected | route->ksnr_connecting; if (*ksocknal_tunables.ksnd_typed_conns) { - /* All typed connections established? */ + /* All typed connections (being) established? */ if ((bits & KSNR_TYPED_ROUTES) == KSNR_TYPED_ROUTES) continue; } else { - /* Untyped connection established? */ + /* Untyped connection (being) established? */ if ((bits & (1 << SOCKLND_CONN_ANY)) != 0) continue; } - - /* connection being established? */ - if (route->ksnr_connecting) - continue; /* too soon to retry this guy? */ if (!(route->ksnr_retry_interval == 0 || /* first attempt */ @@ -660,7 +663,7 @@ ksocknal_find_connecting_route_locked (ksock_peer_t *peer) list_for_each (tmp, &peer->ksnp_routes) { route = list_entry (tmp, ksock_route_t, ksnr_list); - if (route->ksnr_connecting) + if (route->ksnr_connecting != 0) return (route); } @@ -1348,12 +1351,10 @@ ksocknal_send_hello (lnet_ni_t *ni, ksock_conn_t *conn, lnet_nid_t peer_nid, int rc; lnet_nid_t srcnid; - LASSERT (conn->ksnc_type != SOCKLND_CONN_NONE); LASSERT (0 <= nipaddrs && nipaddrs <= LNET_MAX_INTERFACES); /* No need for getconnsock/putconnsock */ LASSERT (!conn->ksnc_closing); - LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); hmv->magic = cpu_to_le32 (LNET_PROTO_TCP_MAGIC); hmv->version_major = cpu_to_le16 (LNET_PROTO_TCP_VERSION_MAJOR); @@ -1437,6 +1438,7 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, if (rc != 0) { CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n", rc, HIPQUAD(conn->ksnc_ipaddr)); + LASSERT (rc < 0 && rc != -EALREADY); return (rc); } @@ -1454,6 +1456,7 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, if (rc != 0) { CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n", rc, HIPQUAD(conn->ksnc_ipaddr)); + LASSERT (rc < 0 && rc != -EALREADY); return (rc); } } @@ -1470,6 +1473,7 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, if (rc != 0) { CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n", rc, HIPQUAD(conn->ksnc_ipaddr)); + LASSERT (rc < 0 && rc != -EALREADY); return (rc); } @@ -1497,6 +1501,7 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, if (rc != 0) { CERROR ("Error %d reading rest of HELLO hdr from %u.%u.%u.%u\n", rc, HIPQUAD(conn->ksnc_ipaddr)); + LASSERT (rc < 0 && rc != -EALREADY); return (rc); } @@ -1561,6 +1566,9 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, HIPQUAD(conn->ksnc_ipaddr)); return (-EPROTO); } + } else if (type == SOCKLND_CONN_NONE) { + /* lost a connection race */ + return -EALREADY; } else if (ksocknal_invert_type(type) != conn->ksnc_type) { CERROR ("Mismatched types: me %d, %s ip %u.%u.%u.%u %d\n", conn->ksnc_type, libcfs_id2str(*peerid), @@ -1613,8 +1621,12 @@ ksocknal_connect (ksock_route_t *route) unsigned long flags; int type; struct socket *sock; + cfs_time_t deadline; int rc = 0; + deadline = cfs_time_add(cfs_time_current(), + cfs_time_seconds(*ksocknal_tunables.ksnd_timeout)); + write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); for (;;) { @@ -1636,32 +1648,43 @@ ksocknal_connect (ksock_route_t *route) write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags); + if (cfs_time_aftereq(cfs_time_current(), deadline)) { + lnet_connect_console_error(-ETIMEDOUT, peer->ksnp_id.nid, + route->ksnr_ipaddr, + route->ksnr_port); + goto failed; + } + rc = lnet_connect(&sock, peer->ksnp_id.nid, - route->ksnr_myipaddr, - route->ksnr_ipaddr, route->ksnr_port); + route->ksnr_myipaddr, + route->ksnr_ipaddr, route->ksnr_port); if (rc != 0) goto failed; rc = ksocknal_create_conn(peer->ksnp_ni, route, sock, type); - if (rc != 0) { + if (rc < 0) { lnet_connect_console_error(rc, peer->ksnp_id.nid, - route->ksnr_ipaddr, - route->ksnr_port); + route->ksnr_ipaddr, + route->ksnr_port); goto failed; } + + if (rc != 0) { + /* lost connection race; peer is connecting to me, so + * give her some time... */ + cfs_pause(cfs_time_seconds(1)); + } write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); } - LASSERT (route->ksnr_connecting); - route->ksnr_connecting = 0; + LASSERT (route->ksnr_connecting == 0); write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); return; failed: write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); - LASSERT (route->ksnr_connecting); route->ksnr_connecting = 0; /* This is a retry rather than a new connection */ diff --git a/lnet/klnds/socklnd/socklnd_lib-linux.c b/lnet/klnds/socklnd/socklnd_lib-linux.c index 963aa9f..a3122f6 100644 --- a/lnet/klnds/socklnd/socklnd_lib-linux.c +++ b/lnet/klnds/socklnd/socklnd_lib-linux.c @@ -42,7 +42,7 @@ ksocknal_lib_tunables_init () #endif ksocknal_ctl_table[i++] = (ctl_table) {j++, "typed", ksocknal_tunables.ksnd_typed_conns, - sizeof (int), 0644, NULL, &proc_dointvec}; + sizeof (int), 0444, NULL, &proc_dointvec}; ksocknal_ctl_table[i++] = (ctl_table) {j++, "min_bulk", ksocknal_tunables.ksnd_min_bulk, sizeof (int), 0644, NULL, &proc_dointvec}; diff --git a/lnet/klnds/socklnd/socklnd_modparams.c b/lnet/klnds/socklnd/socklnd_modparams.c index f413a42..ebac4c1 100644 --- a/lnet/klnds/socklnd/socklnd_modparams.c +++ b/lnet/klnds/socklnd/socklnd_modparams.c @@ -49,7 +49,7 @@ CFS_MODULE_PARM(eager_ack, "i", int, 0644, "send tcp ack packets eagerly"); static int typed_conns = SOCKNAL_TYPED_CONNS; -CFS_MODULE_PARM(typed_conns, "i", int, 0644, +CFS_MODULE_PARM(typed_conns, "i", int, 0444, "use different sockets for bulk"); static int min_bulk = SOCKNAL_MIN_BULK; diff --git a/lnet/libcfs/nidstrings.c b/lnet/libcfs/nidstrings.c index 955a70e..612d306 100644 --- a/lnet/libcfs/nidstrings.c +++ b/lnet/libcfs/nidstrings.c @@ -87,7 +87,8 @@ libcfs_next_nidstring (void) static int libcfs_lo_str2addr(char *str, int nob, __u32 *addr); static void libcfs_ip_addr2str(__u32 addr, char *str); static int libcfs_ip_str2addr(char *str, int nob, __u32 *addr); -static void libcfs_num_addr2str(__u32 addr, char *str); +static void libcfs_decnum_addr2str(__u32 addr, char *str); +static void libcfs_hexnum_addr2str(__u32 addr, char *str); static int libcfs_num_str2addr(char *str, int nob, __u32 *addr); struct netstrfns { @@ -102,7 +103,7 @@ static struct netstrfns libcfs_netstrfns[] = { {.nf_type = LOLND, .nf_name = "lo", .nf_modname = "klolnd", - .nf_addr2str = libcfs_num_addr2str, + .nf_addr2str = libcfs_decnum_addr2str, .nf_str2addr = libcfs_lo_str2addr}, {.nf_type = SOCKLND, .nf_name = "tcp", @@ -132,17 +133,17 @@ static struct netstrfns libcfs_netstrfns[] = { {.nf_type = QSWLND, .nf_name = "elan", .nf_modname = "kqswlnd", - .nf_addr2str = libcfs_num_addr2str, + .nf_addr2str = libcfs_decnum_addr2str, .nf_str2addr = libcfs_num_str2addr}, {.nf_type = GMLND, .nf_name = "gm", .nf_modname = "kgmlnd", - .nf_addr2str = libcfs_num_addr2str, + .nf_addr2str = libcfs_hexnum_addr2str, .nf_str2addr = libcfs_num_str2addr}, {.nf_type = PTLLND, .nf_name = "ptl", .nf_modname = "kptllnd", - .nf_addr2str = libcfs_num_addr2str, + .nf_addr2str = libcfs_decnum_addr2str, .nf_str2addr = libcfs_num_str2addr}, /* placeholder for net0 alias. It MUST BE THE LAST ENTRY */ {.nf_type = -1}, @@ -229,23 +230,35 @@ libcfs_ip_str2addr(char *str, int nob, __u32 *addr) } void -libcfs_num_addr2str(__u32 addr, char *str) +libcfs_decnum_addr2str(__u32 addr, char *str) { snprintf(str, LNET_NIDSTR_SIZE, "%u", addr); } +void +libcfs_hexnum_addr2str(__u32 addr, char *str) +{ + snprintf(str, LNET_NIDSTR_SIZE, "0x%x", addr); +} + int libcfs_num_str2addr(char *str, int nob, __u32 *addr) { - __u32 a; - int n = nob; + int n; + + n = nob; + if (sscanf(str, "0x%x%n", addr, &n) >= 1 && n == nob) + return 1; - if (sscanf(str, "%u%n", &a, &n) < 1 || - n != nob) - return 0; + n = nob; + if (sscanf(str, "0X%x%n", addr, &n) >= 1 && n == nob) + return 1; - *addr = a; - return 1; + n = nob; + if (sscanf(str, "%u%n", addr, &n) >= 1 && n == nob) + return 1; + + return 0; } struct netstrfns * diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index 1bd9c4e..a418824 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -736,25 +736,16 @@ lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, int lnet_compare_routers(lnet_peer_t *p1, lnet_peer_t *p2) { - /* FIRST compare available send credits - * (sends block immediately when peer credits are <= 0) - * THEN compare queue depth */ - if (p1->lp_txcredits > 0) { - - if (p1->lp_txcredits > p2->lp_txcredits) - return 1; - - if (p1->lp_txcredits < p2->lp_txcredits) - return -1; - - } else if (p2->lp_txcredits > 0) { - return -1; - } - + if (p1->lp_txqnob < p2->lp_txqnob) + return 1; + if (p1->lp_txqnob > p2->lp_txqnob) + return -1; + + if (p1->lp_txcredits > p2->lp_txcredits) return 1; - if (p1->lp_txqnob < p2->lp_txqnob) + if (p1->lp_txcredits < p2->lp_txcredits) return -1; return 0; diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index 01b9de2..0acb9e3 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -602,6 +602,7 @@ lnet_alloc_rtrpools(int im_a_router) if (!strcmp(forwarding, "")) { /* not set either way */ + forwarding = im_a_router ? "enabled(implicit)" : "disabled(default)"; if (!im_a_router) return 0; } else if (!strcmp(forwarding, "disabled")) { diff --git a/lnet/utils/gmlndnid.c b/lnet/utils/gmlndnid.c index 8536792..f158745 100644 --- a/lnet/utils/gmlndnid.c +++ b/lnet/utils/gmlndnid.c @@ -153,6 +153,6 @@ int main(int argc, char **argv) } nid = u_getgmnid(name, get_local_id); - printf("%u\n", nid); + printf("0x%x\n", nid); exit(0); } -- 1.8.3.1