}
}
- /* Find IP address from <ipif base name><number> */
+ /* Find IP address from <ipif base name><number+1>
+ * (Infinicon IPoIB starts at ib1:) */
snprintf(ipif_name, sizeof(ipif_name), "%s%d",
*kibnal_tunables.kib_ipif_basename, kibnal_data.kib_hca_idx + 1);
if (strlen(ipif_name) == sizeof(ipif_name - 1)) {
}
route->ksnr_connected |= (1<<type);
+ route->ksnr_connecting &= ~(1<<type);
route->ksnr_conn_count++;
/* Successful connection => further attempts can
ksock_route_t *route2;
LASSERT (route->ksnr_peer == NULL);
- LASSERT (!route->ksnr_connecting);
+ LASSERT (route->ksnr_connecting == 0);
LASSERT (route->ksnr_connected == 0);
/* LASSERT(unique) */
ksock_sched_t *sched;
unsigned int irq;
ksock_tx_t *tx;
+ int bits;
int rc;
+ char *warn = NULL;
LASSERT (route == NULL == (type == SOCKLND_CONN_NONE));
}
rc = ksocknal_recv_hello (ni, conn, &peerid, &incarnation, ipaddrs);
- if (rc < 0)
+ if (rc < 0) {
+ if (rc == -EALREADY) {
+ CDEBUG(D_NET, "Lost connection race with %s\n",
+ libcfs_id2str(peerid));
+ /* Not an actual failure: return +ve RC so active
+ * connector can back off */
+ rc = EALREADY;
+ }
goto failed_1;
+ }
+
nipaddrs = rc;
LASSERT (peerid.nid != LNET_NID_ANY);
ksocknal_create_routes(peer, conn->ksnc_port,
ipaddrs, nipaddrs);
rc = 0;
+ write_lock_irqsave (global_lock, flags);
} else {
rc = ksocknal_create_peer(&peer, ni, peerid);
if (rc != 0)
* table (which takes my ref) */
list_add_tail(&peer->ksnp_list,
ksocknal_nid2peerlist(peerid.nid));
- } else {
+ } else {
ksocknal_peer_decref(peer);
peer = peer2;
}
+
/* +1 ref for me */
ksocknal_peer_addref(peer);
+ /* Am I already connecting/connected to this guy? Resolve in
+ * favour of higher NID... */
+ rc = 0;
+ if (peerid.nid < ni->ni_nid) {
+ bits = (1 << conn->ksnc_type);
+
+ list_for_each(tmp, &peer->ksnp_routes) {
+ route = list_entry(tmp, ksock_route_t,
+ ksnr_list);
+
+ if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
+ continue;
+
+ if ((route->ksnr_connecting & bits) == 0)
+ continue;
+
+ rc = EALREADY; /* not a failure */
+ warn = "connection race";
+ break;
+ }
+ }
+
write_unlock_irqrestore(global_lock, flags);
- nipaddrs = ksocknal_select_ips(peer, ipaddrs, nipaddrs);
- rc = ksocknal_send_hello (ni, conn, peerid.nid,
- ipaddrs, nipaddrs);
- if (rc < 0)
+ if (rc != 0) {
+ /* set CONN_NONE makes returned HELLO acknowledge I
+ * lost a connection race */
+ conn->ksnc_type = SOCKLND_CONN_NONE;
+ ksocknal_send_hello (ni, conn, peerid.nid,
+ ipaddrs, 0);
+ } else {
+ nipaddrs = ksocknal_select_ips(peer, ipaddrs, nipaddrs);
+ rc = ksocknal_send_hello (ni, conn, peerid.nid,
+ ipaddrs, nipaddrs);
+ }
+
+ write_lock_irqsave(global_lock, flags);
+ if (rc != 0)
goto failed_2;
}
- write_lock_irqsave (global_lock, flags);
-
if (peer->ksnp_closing ||
(route != NULL && route->ksnr_deleted)) {
/* route/peer got closed under me */
rc = -ESTALE;
- goto failed_3;
+ warn = "peer/route removed";
+ goto failed_2;
}
- /* Refuse to duplicate an existing connection (both sides might
- * connect at once), unless this is a loopback connection */
+ /* Refuse to duplicate an existing connection, unless this is a
+ * loopback connection */
if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) {
list_for_each(tmp, &peer->ksnp_conns) {
conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
conn2->ksnc_incarnation != incarnation)
continue;
- CWARN("Not creating duplicate connection to "
- "%u.%u.%u.%u type %d\n",
- HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_type);
- rc = -EALREADY;
- goto failed_3;
+ rc = 0; /* more of a NOOP than a failure */
+ warn = "duplicate";
+ goto failed_2;
}
}
}
rc = ksocknal_close_stale_conns_locked(peer, incarnation);
+ write_unlock_irqrestore (global_lock, flags);
+
if (rc != 0)
CERROR ("Closed %d stale conns to %s ip %d.%d.%d.%d\n",
rc, libcfs_id2str(conn->ksnc_peer->ksnp_id),
HIPQUAD(conn->ksnc_ipaddr));
- write_unlock_irqrestore (global_lock, flags);
-
ksocknal_lib_bind_irq (irq);
/* Call the callbacks right now to get things going. */
ksocknal_conn_decref(conn);
return (0);
- failed_3:
+ failed_2:
if (!peer->ksnp_closing &&
list_empty (&peer->ksnp_conns) &&
list_empty (&peer->ksnp_routes))
ksocknal_unlink_peer_locked(peer);
write_unlock_irqrestore(global_lock, flags);
- failed_2:
+ if (warn != NULL) {
+ if (rc < 0)
+ CERROR("Not creating conn %s type %d: %s\n",
+ libcfs_id2str(peerid), conn->ksnc_type, warn);
+ else
+ CDEBUG(D_NET, "Not creating conn %s type %d: %s\n",
+ libcfs_id2str(peerid), conn->ksnc_type, warn);
+ }
+
ksocknal_peer_decref(peer);
failed_1:
failed_0:
libcfs_sock_release(sock);
-
- LASSERT (rc != 0);
- return (rc);
+ return rc;
}
void
__u32 ksnr_myipaddr; /* my IP */
__u32 ksnr_ipaddr; /* IP address to connect to */
int ksnr_port; /* port to connect to */
- unsigned int ksnr_connecting:1; /* autoconnect in progress */
+ unsigned int ksnr_connecting:4; /* autoconnect in progress by type */
unsigned int ksnr_connected:4; /* connections established by type */
unsigned int ksnr_deleted:1; /* been removed from peer? */
unsigned int ksnr_share_count; /* created explicitly? */
ksocknal_launch_connection_locked (ksock_route_t *route)
{
unsigned long flags;
+ int bits;
/* called holding write lock on ksnd_global_lock */
- LASSERT (!route->ksnr_connecting);
+ LASSERT (route->ksnr_connecting == 0);
+
+ bits = *ksocknal_tunables.ksnd_typed_conns ?
+ KSNR_TYPED_ROUTES : (1 << SOCKLND_CONN_ANY);
+ bits &= ~route->ksnr_connected;
+
+ LASSERT (bits != 0);
- route->ksnr_connecting = 1; /* scheduling conn for connd */
+ route->ksnr_connecting = bits; /* scheduling conn for connd */
ksocknal_route_addref(route); /* extra ref for connd */
spin_lock_irqsave (&ksocknal_data.ksnd_connd_lock, flags);
list_for_each (tmp, &peer->ksnp_routes) {
route = list_entry (tmp, ksock_route_t, ksnr_list);
- bits = route->ksnr_connected;
+ bits = route->ksnr_connected | route->ksnr_connecting;
if (*ksocknal_tunables.ksnd_typed_conns) {
- /* All typed connections established? */
+ /* All typed connections (being) established? */
if ((bits & KSNR_TYPED_ROUTES) == KSNR_TYPED_ROUTES)
continue;
} else {
- /* Untyped connection established? */
+ /* Untyped connection (being) established? */
if ((bits & (1 << SOCKLND_CONN_ANY)) != 0)
continue;
}
-
- /* connection being established? */
- if (route->ksnr_connecting)
- continue;
/* too soon to retry this guy? */
if (!(route->ksnr_retry_interval == 0 || /* first attempt */
list_for_each (tmp, &peer->ksnp_routes) {
route = list_entry (tmp, ksock_route_t, ksnr_list);
- if (route->ksnr_connecting)
+ if (route->ksnr_connecting != 0)
return (route);
}
int rc;
lnet_nid_t srcnid;
- LASSERT (conn->ksnc_type != SOCKLND_CONN_NONE);
LASSERT (0 <= nipaddrs && nipaddrs <= LNET_MAX_INTERFACES);
/* No need for getconnsock/putconnsock */
LASSERT (!conn->ksnc_closing);
-
LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
hmv->magic = cpu_to_le32 (LNET_PROTO_TCP_MAGIC);
hmv->version_major = cpu_to_le16 (LNET_PROTO_TCP_VERSION_MAJOR);
if (rc != 0) {
CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
rc, HIPQUAD(conn->ksnc_ipaddr));
+ LASSERT (rc < 0 && rc != -EALREADY);
return (rc);
}
if (rc != 0) {
CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
rc, HIPQUAD(conn->ksnc_ipaddr));
+ LASSERT (rc < 0 && rc != -EALREADY);
return (rc);
}
}
if (rc != 0) {
CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
rc, HIPQUAD(conn->ksnc_ipaddr));
+ LASSERT (rc < 0 && rc != -EALREADY);
return (rc);
}
if (rc != 0) {
CERROR ("Error %d reading rest of HELLO hdr from %u.%u.%u.%u\n",
rc, HIPQUAD(conn->ksnc_ipaddr));
+ LASSERT (rc < 0 && rc != -EALREADY);
return (rc);
}
HIPQUAD(conn->ksnc_ipaddr));
return (-EPROTO);
}
+ } else if (type == SOCKLND_CONN_NONE) {
+ /* lost a connection race */
+ return -EALREADY;
} else if (ksocknal_invert_type(type) != conn->ksnc_type) {
CERROR ("Mismatched types: me %d, %s ip %u.%u.%u.%u %d\n",
conn->ksnc_type, libcfs_id2str(*peerid),
unsigned long flags;
int type;
struct socket *sock;
+ cfs_time_t deadline;
int rc = 0;
+ deadline = cfs_time_add(cfs_time_current(),
+ cfs_time_seconds(*ksocknal_tunables.ksnd_timeout));
+
write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
for (;;) {
write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags);
+ if (cfs_time_aftereq(cfs_time_current(), deadline)) {
+ lnet_connect_console_error(-ETIMEDOUT, peer->ksnp_id.nid,
+ route->ksnr_ipaddr,
+ route->ksnr_port);
+ goto failed;
+ }
+
rc = lnet_connect(&sock, peer->ksnp_id.nid,
- route->ksnr_myipaddr,
- route->ksnr_ipaddr, route->ksnr_port);
+ route->ksnr_myipaddr,
+ route->ksnr_ipaddr, route->ksnr_port);
if (rc != 0)
goto failed;
rc = ksocknal_create_conn(peer->ksnp_ni, route, sock, type);
- if (rc != 0) {
+ if (rc < 0) {
lnet_connect_console_error(rc, peer->ksnp_id.nid,
- route->ksnr_ipaddr,
- route->ksnr_port);
+ route->ksnr_ipaddr,
+ route->ksnr_port);
goto failed;
}
+
+ if (rc != 0) {
+ /* lost connection race; peer is connecting to me, so
+ * give her some time... */
+ cfs_pause(cfs_time_seconds(1));
+ }
write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
}
- LASSERT (route->ksnr_connecting);
- route->ksnr_connecting = 0;
+ LASSERT (route->ksnr_connecting == 0);
write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
return;
failed:
write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
- LASSERT (route->ksnr_connecting);
route->ksnr_connecting = 0;
/* This is a retry rather than a new connection */
#endif
ksocknal_ctl_table[i++] = (ctl_table)
{j++, "typed", ksocknal_tunables.ksnd_typed_conns,
- sizeof (int), 0644, NULL, &proc_dointvec};
+ sizeof (int), 0444, NULL, &proc_dointvec};
ksocknal_ctl_table[i++] = (ctl_table)
{j++, "min_bulk", ksocknal_tunables.ksnd_min_bulk,
sizeof (int), 0644, NULL, &proc_dointvec};
"send tcp ack packets eagerly");
static int typed_conns = SOCKNAL_TYPED_CONNS;
-CFS_MODULE_PARM(typed_conns, "i", int, 0644,
+CFS_MODULE_PARM(typed_conns, "i", int, 0444,
"use different sockets for bulk");
static int min_bulk = SOCKNAL_MIN_BULK;
static int libcfs_lo_str2addr(char *str, int nob, __u32 *addr);
static void libcfs_ip_addr2str(__u32 addr, char *str);
static int libcfs_ip_str2addr(char *str, int nob, __u32 *addr);
-static void libcfs_num_addr2str(__u32 addr, char *str);
+static void libcfs_decnum_addr2str(__u32 addr, char *str);
+static void libcfs_hexnum_addr2str(__u32 addr, char *str);
static int libcfs_num_str2addr(char *str, int nob, __u32 *addr);
struct netstrfns {
{.nf_type = LOLND,
.nf_name = "lo",
.nf_modname = "klolnd",
- .nf_addr2str = libcfs_num_addr2str,
+ .nf_addr2str = libcfs_decnum_addr2str,
.nf_str2addr = libcfs_lo_str2addr},
{.nf_type = SOCKLND,
.nf_name = "tcp",
{.nf_type = QSWLND,
.nf_name = "elan",
.nf_modname = "kqswlnd",
- .nf_addr2str = libcfs_num_addr2str,
+ .nf_addr2str = libcfs_decnum_addr2str,
.nf_str2addr = libcfs_num_str2addr},
{.nf_type = GMLND,
.nf_name = "gm",
.nf_modname = "kgmlnd",
- .nf_addr2str = libcfs_num_addr2str,
+ .nf_addr2str = libcfs_hexnum_addr2str,
.nf_str2addr = libcfs_num_str2addr},
{.nf_type = PTLLND,
.nf_name = "ptl",
.nf_modname = "kptllnd",
- .nf_addr2str = libcfs_num_addr2str,
+ .nf_addr2str = libcfs_decnum_addr2str,
.nf_str2addr = libcfs_num_str2addr},
/* placeholder for net0 alias. It MUST BE THE LAST ENTRY */
{.nf_type = -1},
}
void
-libcfs_num_addr2str(__u32 addr, char *str)
+libcfs_decnum_addr2str(__u32 addr, char *str)
{
snprintf(str, LNET_NIDSTR_SIZE, "%u", addr);
}
+void
+libcfs_hexnum_addr2str(__u32 addr, char *str)
+{
+ snprintf(str, LNET_NIDSTR_SIZE, "0x%x", addr);
+}
+
int
libcfs_num_str2addr(char *str, int nob, __u32 *addr)
{
- __u32 a;
- int n = nob;
+ int n;
+
+ n = nob;
+ if (sscanf(str, "0x%x%n", addr, &n) >= 1 && n == nob)
+ return 1;
- if (sscanf(str, "%u%n", &a, &n) < 1 ||
- n != nob)
- return 0;
+ n = nob;
+ if (sscanf(str, "0X%x%n", addr, &n) >= 1 && n == nob)
+ return 1;
- *addr = a;
- return 1;
+ n = nob;
+ if (sscanf(str, "%u%n", addr, &n) >= 1 && n == nob)
+ return 1;
+
+ return 0;
}
struct netstrfns *
int
lnet_compare_routers(lnet_peer_t *p1, lnet_peer_t *p2)
{
- /* FIRST compare available send credits
- * (sends block immediately when peer credits are <= 0)
- * THEN compare queue depth */
- if (p1->lp_txcredits > 0) {
-
- if (p1->lp_txcredits > p2->lp_txcredits)
- return 1;
-
- if (p1->lp_txcredits < p2->lp_txcredits)
- return -1;
-
- } else if (p2->lp_txcredits > 0) {
- return -1;
- }
-
+ if (p1->lp_txqnob < p2->lp_txqnob)
+ return 1;
+
if (p1->lp_txqnob > p2->lp_txqnob)
+ return -1;
+
+ if (p1->lp_txcredits > p2->lp_txcredits)
return 1;
- if (p1->lp_txqnob < p2->lp_txqnob)
+ if (p1->lp_txcredits < p2->lp_txcredits)
return -1;
return 0;
if (!strcmp(forwarding, "")) {
/* not set either way */
+ forwarding = im_a_router ? "enabled(implicit)" : "disabled(default)";
if (!im_a_router)
return 0;
} else if (!strcmp(forwarding, "disabled")) {
}
nid = u_getgmnid(name, get_local_id);
- printf("%u\n", nid);
+ printf("0x%x\n", nid);
exit(0);
}