ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x
* bug fixes
+Severity : major
+Bugzilla : 12014
+Description: ASSERTION failures when upgrading to the patchless zero-copy
+ socklnd
+Details : This bug affects "rolling upgrades", causing an inconsistent
+ protocol version negotiation and subsequent assertion failure
+ during rolling upgrades after the first wave of upgrades.
+
Severity : minor
Bugzilla : 11223
Details : Change "dropped message" CERRORs to D_NETERROR so they are
route->ksnr_deleted = 0;
route->ksnr_conn_count = 0;
route->ksnr_share_count = 0;
- route->ksnr_proto = &ksocknal_protocol_v2x;
return (route);
}
peer->ksnp_closing = 0;
peer->ksnp_accepting = 0;
peer->ksnp_zc_next_cookie = 1;
+ peer->ksnp_proto = NULL;
CFS_INIT_LIST_HEAD (&peer->ksnp_conns);
CFS_INIT_LIST_HEAD (&peer->ksnp_routes);
CFS_INIT_LIST_HEAD (&peer->ksnp_tx_queue);
}
int
+ksocknal_connecting (ksock_peer_t *peer, __u32 ipaddr)
+{
+ ksock_route_t *route;
+
+ list_for_each_entry (route, &peer->ksnp_routes, ksnr_list) {
+
+ if (route->ksnr_ipaddr == ipaddr)
+ return route->ksnr_connecting;
+ }
+ return 0;
+}
+
+int
ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
cfs_socket_t *sock, int type)
{
active = (route != NULL);
LASSERT (active == (type != SOCKLND_CONN_NONE));
- LASSERT (route == NULL || route->ksnr_proto != NULL);
irq = ksocknal_lib_sock_irq (sock);
atomic_set (&conn->ksnc_conn_refcount, 1); /* 1 ref for me */
conn->ksnc_zc_capable = ksocknal_lib_zc_capable(sock);
-
conn->ksnc_rx_ready = 0;
conn->ksnc_rx_scheduled = 0;
* eagerly */
if (active) {
- LASSERT(ni == route->ksnr_peer->ksnp_ni);
+ peer = route->ksnr_peer;
+ LASSERT(ni == peer->ksnp_ni);
/* Active connection sends HELLO eagerly */
hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips);
- peerid = route->ksnr_peer->ksnp_id;
- conn->ksnc_proto = route->ksnr_proto;
+ peerid = peer->ksnp_id;
+
+ write_lock_bh(global_lock);
+ conn->ksnc_proto = peer->ksnp_proto;
+ write_unlock_bh(global_lock);
+
+ if (conn->ksnc_proto == NULL) {
+ conn->ksnc_proto = &ksocknal_protocol_v2x;
+#if SOCKNAL_VERSION_DEBUG
+ if (*ksocknal_tunables.ksnd_protocol != 2)
+ conn->ksnc_proto = &ksocknal_protocol_v1x;
+#endif
+ }
rc = ksocknal_send_hello (ni, conn, peerid.nid, hello);
if (rc != 0)
}
rc = ksocknal_recv_hello (ni, conn, hello, &peerid, &incarnation);
- if (rc < 0) {
- if (rc == -EALREADY) {
- /* only active connection loses conn race */
- LASSERT (active);
-
- CDEBUG(D_NET, "Lost connection race with %s\n",
- libcfs_id2str(peerid));
- /* Not an actual failure: return +ve RC so active
- * connector can back off */
- rc = EALREADY;
- }
+ if (rc < 0)
goto failed_1;
- }
- if (active && route->ksnr_proto != conn->ksnc_proto) {
- /* Active connecting, and different protocol is returned */
- CDEBUG(D_NET, "Connecting by %d.x protocol is rejected,"
- " compatible version %d.x found.\n",
- route->ksnr_proto->pro_version,
- conn->ksnc_proto->pro_version);
- /* Not an actual failure: return +ve RC so active
- * connector can back off */
- rc = EPROTO;
-
- /* Retry with peer's protocol later */
- route->ksnr_proto = conn->ksnc_proto;
-
- goto failed_1;
- }
-
+ LASSERT (rc == 0 || active);
+ LASSERT (conn->ksnc_proto != NULL);
LASSERT (peerid.nid != LNET_NID_ANY);
if (active) {
- peer = route->ksnr_peer;
ksocknal_peer_addref(peer);
-
- /* additional routes after interface exchange? */
- ksocknal_create_routes(peer, conn->ksnc_port,
- hello->kshm_ips, hello->kshm_nips);
-
- /* setup the socket AFTER I've received hello (it disables
- * SO_LINGER). I might call back to the acceptor who may want
- * to send a protocol version response and then close the
- * socket; this ensures the socket only tears down after the
- * response has been sent. */
- rc = ksocknal_lib_setup_sock(sock);
-
write_lock_bh (global_lock);
-
- if (rc != 0)
- goto failed_2;
} else {
rc = ksocknal_create_peer(&peer, ni, peerid);
if (rc != 0)
/* Am I already connecting to this guy? Resolve in
* favour of higher NID... */
- rc = 0;
- if (peerid.nid < ni->ni_nid) {
- list_for_each(tmp, &peer->ksnp_routes) {
- route = list_entry(tmp, ksock_route_t,
- ksnr_list);
-
- if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
- continue;
-
- if (route->ksnr_connecting) {
- rc = EALREADY; /* not a failure */
- warn = "connection race";
- }
-
- break;
- }
- }
- route = NULL;
-
- write_unlock_bh (global_lock);
-
- if (rc != 0) {
- /* set CONN_NONE makes returned HELLO acknowledge I
- * lost a connection race */
- conn->ksnc_type = SOCKLND_CONN_NONE;
- hello->kshm_nips = 0;
- ksocknal_send_hello(ni, conn, peerid.nid, hello);
- } else {
- hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips,
- hello->kshm_nips);
- rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
-
- /* Setup the socket (it disables SO_LINGER). I don't
- * do it if I'm sending a negative response to ensure
- * the response isn't discarded when I close the socket
- * immediately after sending it. */
- if (rc == 0)
- rc = ksocknal_lib_setup_sock(sock);
- }
-
- write_lock_bh (global_lock);
- peer->ksnp_accepting--;
-
- if (rc != 0)
+ if (peerid.nid < ni->ni_nid &&
+ ksocknal_connecting(peer, conn->ksnc_ipaddr)) {
+ rc = EALREADY;
+ warn = "connection race resolution";
goto failed_2;
+ }
}
if (peer->ksnp_closing ||
goto failed_2;
}
+ if (peer->ksnp_proto == NULL) {
+ /* Never connected before.
+ * NB recv_hello may have returned EPROTO to signal my peer
+ * wants a different protocol than the one I asked for.
+ */
+ LASSERT (list_empty(&peer->ksnp_conns));
+
+ peer->ksnp_proto = conn->ksnc_proto;
+ peer->ksnp_incarnation = incarnation;
+ }
+
+ if (peer->ksnp_proto != conn->ksnc_proto ||
+ peer->ksnp_incarnation != incarnation) {
+ /* Peer rebooted or I've got the wrong protocol version */
+ ksocknal_close_peer_conns_locked(peer, 0, 0);
+
+ peer->ksnp_proto = NULL;
+ rc = ESTALE;
+ warn = peer->ksnp_incarnation != incarnation ?
+ "peer rebooted" :
+ "wrong proto version";
+ goto failed_2;
+ }
+
+ switch (rc) {
+ default:
+ LBUG();
+ case 0:
+ break;
+ case EALREADY:
+ warn = "lost conn race";
+ goto failed_2;
+ case EPROTO:
+ warn = "retry with different protocol version";
+ goto failed_2;
+ }
+
/* Refuse to duplicate an existing connection, unless this is a
* loopback connection */
if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) {
if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr ||
conn2->ksnc_myipaddr != conn->ksnc_myipaddr ||
- conn2->ksnc_type != conn->ksnc_type ||
- conn2->ksnc_incarnation != incarnation)
+ conn2->ksnc_type != conn->ksnc_type)
continue;
- rc = 0; /* more of a NOOP than a failure */
+ /* Reply on a passive connection attempt so the peer
+ * realises we're connected. */
+ LASSERT (rc == 0);
+ if (!active)
+ rc = EALREADY;
+
warn = "duplicate";
goto failed_2;
}
}
conn->ksnc_peer = peer; /* conn takes my ref on peer */
- conn->ksnc_incarnation = incarnation;
peer->ksnp_last_alive = cfs_time_current();
peer->ksnp_error = 0;
ksocknal_new_packet(conn, 0);
- /* NB my callbacks block while I hold ksnd_global_lock */
- ksocknal_lib_set_callback(sock, conn);
-
/* Take all the packets blocking for a connection.
* NB, it might be nicer to share these blocked packets among any
* other connections that are becoming established. */
ksocknal_queue_tx_locked (tx, conn);
}
- rc = ksocknal_close_stale_conns_locked(peer, incarnation);
write_unlock_bh (global_lock);
- if (rc != 0)
- CDEBUG(D_NET, "Closed %d stale conns to %s ip %d.%d.%d.%d\n",
- rc, libcfs_id2str(conn->ksnc_peer->ksnp_id),
- HIPQUAD(conn->ksnc_ipaddr));
+ /* We've now got a new connection. Any errors from here on are just
+ * like "normal" comms errors and we close the connection normally.
+ * NB (a) we still have to send the reply HELLO for passive
+ * connections,
+ * (b) normal I/O on the conn is blocked until I setup and call the
+ * socket callbacks.
+ */
ksocknal_lib_bind_irq (irq);
- /* Call the callbacks right now to get things going. */
- if (ksocknal_connsock_addref(conn) == 0) {
- ksocknal_read_callback(conn);
- ksocknal_write_callback(conn);
- ksocknal_connsock_decref(conn);
- }
-
- CDEBUG(D_NET, "New conn %s %u.%u.%u.%u -> %u.%u.%u.%u/%d"
+ CDEBUG(D_NET, "New conn %s p %d.x %u.%u.%u.%u -> %u.%u.%u.%u/%d"
" incarnation:"LPD64" sched[%d]/%d\n",
- libcfs_id2str(peerid), HIPQUAD(conn->ksnc_myipaddr),
- HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port, incarnation,
+ libcfs_id2str(peerid), conn->ksnc_proto->pro_version,
+ HIPQUAD(conn->ksnc_myipaddr), HIPQUAD(conn->ksnc_ipaddr),
+ conn->ksnc_port, incarnation,
(int)(conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers), irq);
+ if (active) {
+ /* additional routes after interface exchange? */
+ ksocknal_create_routes(peer, conn->ksnc_port,
+ hello->kshm_ips, hello->kshm_nips);
+ } else {
+ hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips,
+ hello->kshm_nips);
+ rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
+ }
+
LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t,
kshm_ips[LNET_MAX_INTERFACES]));
+ /* setup the socket AFTER I've received hello (it disables
+ * SO_LINGER). I might call back to the acceptor who may want
+ * to send a protocol version response and then close the
+ * socket; this ensures the socket only tears down after the
+ * response has been sent. */
+ if (rc == 0)
+ rc = ksocknal_lib_setup_sock(sock);
+
+ write_lock_bh(global_lock);
+
+ /* NB my callbacks block while I hold ksnd_global_lock */
+ ksocknal_lib_set_callback(sock, conn);
+
+ if (!active)
+ peer->ksnp_accepting--;
+
+ write_unlock_bh(global_lock);
+
+ if (rc != 0) {
+ write_lock_bh(global_lock);
+ ksocknal_close_conn_locked(conn, rc);
+ write_unlock_bh(global_lock);
+ } else if (ksocknal_connsock_addref(conn) == 0) {
+ /* Allow I/O to proceed. */
+ ksocknal_read_callback(conn);
+ ksocknal_write_callback(conn);
+ ksocknal_connsock_decref(conn);
+ }
+
ksocknal_conn_decref(conn);
- return (0);
+ return rc;
failed_2:
if (!peer->ksnp_closing &&
libcfs_id2str(peerid), conn->ksnc_type, warn);
}
+ if (!active) {
+ if (rc > 0) {
+ /* Request retry by replying with CONN_NONE
+ * ksnc_proto has been set already */
+ conn->ksnc_type = SOCKLND_CONN_NONE;
+ hello->kshm_nips = 0;
+ ksocknal_send_hello(ni, conn, peerid.nid, hello);
+ }
+
+ write_lock_bh(global_lock);
+ peer->ksnp_accepting--;
+ write_unlock_bh(global_lock);
+ }
+
ksocknal_txlist_done(ni, &zombies, 1);
ksocknal_peer_decref(peer);
if (list_empty (&peer->ksnp_conns)) {
/* No more connections to this peer */
+ peer->ksnp_proto = NULL; /* renegotiate protocol version */
peer->ksnp_error = error; /* stash last conn close reason */
if (list_empty (&peer->ksnp_routes)) {
}
int
-ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation)
-{
- ksock_conn_t *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- int count = 0;
-
- list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) {
- conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
-
- if (conn->ksnc_incarnation == incarnation)
- continue;
-
- CDEBUG(D_NET, "Closing stale conn %s ip:%08x/%d "
- "incarnation:"LPD64"("LPD64")\n",
- libcfs_id2str(peer->ksnp_id),
- conn->ksnc_ipaddr, conn->ksnc_port,
- conn->ksnc_incarnation, incarnation);
-
- count++;
- ksocknal_close_conn_locked (conn, -ESTALE);
- }
-
- return (count);
-}
-
-int
ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why)
{
ksock_peer_t *peer = conn->ksnc_peer;
#define SOCKNAL_SINGLE_FRAG_TX 0 /* disable multi-fragment sends */
#define SOCKNAL_SINGLE_FRAG_RX 0 /* disable multi-fragment receives */
+#define SOCKNAL_VERSION_DEBUG 0 /* enable protocol version debugging */
+
/* risk kmap deadlock on multi-frag I/O (backs off to single-frag if disabled).
* no risk if we're not running on a CONFIG_HIGHMEM platform. */
#ifdef CONFIG_HIGHMEM
int *ksnd_backoff_init; /* initial TCP backoff */
int *ksnd_backoff_max; /* maximum TCP backoff */
#endif
+#if SOCKNAL_VERSION_DEBUG
+ int *ksnd_protocol; /* protocol version */
+#endif
#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
cfs_sysctl_table_header_t *ksnd_sysctl; /* sysctl interface */
#endif
struct ksock_conn; /* forward ref */
struct ksock_peer; /* forward ref */
struct ksock_route; /* forward ref */
-struct ksock_protocol; /* forward ref */
+struct ksock_proto; /* forward ref */
typedef struct /* transmit packet */
{
int ksnc_closing:1; /* being shut down */
int ksnc_flip:1; /* flip or not, only for V2.x */
int ksnc_zc_capable:1; /* enable to ZC */
- __u64 ksnc_incarnation; /* peer's incarnation */
+ struct ksock_proto *ksnc_proto; /* protocol for the connection */
/* reader */
struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */
atomic_t ksnc_tx_nob; /* # bytes queued */
int ksnc_tx_ready; /* write space */
int ksnc_tx_scheduled; /* being progressed */
-
- struct ksock_protocol *ksnc_proto; /* protocol table for the connection */
#if !SOCKNAL_SINGLE_FRAG_RX
struct iovec ksnc_rx_scratch_iov[LNET_MAX_IOV];
unsigned int ksnr_deleted:1; /* been removed from peer? */
unsigned int ksnr_share_count; /* created explicitly? */
int ksnr_conn_count; /* # conns established by this route */
- struct ksock_protocol *ksnr_proto ; /* protocol table for connecting */
} ksock_route_t;
typedef struct ksock_peer
int ksnp_accepting; /* # passive connections pending */
int ksnp_error; /* errno on closing last conn */
__u64 ksnp_zc_next_cookie;/* ZC completion cookie */
+ __u64 ksnp_incarnation; /* latest known peer incarnation */
+ struct ksock_proto *ksnp_proto; /* latest known peer protocol */
struct list_head ksnp_conns; /* all active connections */
struct list_head ksnp_routes; /* routes */
struct list_head ksnp_tx_queue; /* waiting packets */
extern ksock_nal_data_t ksocknal_data;
extern ksock_tunables_t ksocknal_tunables;
-typedef struct ksock_protocol
+typedef struct ksock_proto
{
int pro_version; /* version number of protocol */
int (*pro_send_hello)(ksock_conn_t *, ksock_hello_msg_t *); /* handshake function */
int (*pro_recv_hello)(ksock_conn_t *, ksock_hello_msg_t *, int);/* handshake function */
void (*pro_pack)(ksock_tx_t *); /* message pack */
void (*pro_unpack)(ksock_msg_t *); /* message unpack */
-} ksock_protocol_t;
+} ksock_proto_t;
-extern ksock_protocol_t ksocknal_protocol_v1x;
-extern ksock_protocol_t ksocknal_protocol_v2x;
+extern ksock_proto_t ksocknal_protocol_v1x;
+extern ksock_proto_t ksocknal_protocol_v2x;
#define KSOCK_PROTO_V1_MAJOR LNET_PROTO_TCP_VERSION_MAJOR
#define KSOCK_PROTO_V1_MINOR LNET_PROTO_TCP_VERSION_MINOR
extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why);
extern void ksocknal_terminate_conn (ksock_conn_t *conn);
extern void ksocknal_destroy_conn (ksock_conn_t *conn);
-extern int ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation);
+extern int ksocknal_close_peer_conns_locked (ksock_peer_t *peer,
+ __u32 ipaddr, int why);
extern int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why);
extern int ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr);
extern int ksocknal_scheduler (void *arg);
extern int ksocknal_connd (void *arg);
extern int ksocknal_reaper (void *arg);
-extern ksock_protocol_t * ksocknal_compat_protocol(ksock_hello_msg_t *);
extern int ksocknal_send_hello (lnet_ni_t *ni, ksock_conn_t *conn,
lnet_nid_t peer_nid, ksock_hello_msg_t *hello);
extern int ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn,
SOCK_WMEM_QUEUED(c->ksnc_sock);
#endif
LASSERT (!c->ksnc_closing);
- LASSERT(c->ksnc_proto != NULL);
+ LASSERT (c->ksnc_proto != NULL);
if (fallback == NULL || nob < fnob) {
fallback = c;
EXIT;
}
-ksock_protocol_t *
-ksocknal_compat_protocol (ksock_hello_msg_t *hello)
+ksock_proto_t *
+ksocknal_parse_proto_version (ksock_hello_msg_t *hello)
{
if ((hello->kshm_magic == LNET_PROTO_MAGIC &&
hello->kshm_version == KSOCK_PROTO_V2) ||
(hello->kshm_magic == __swab32(LNET_PROTO_MAGIC) &&
- hello->kshm_version == __swab32(KSOCK_PROTO_V2)))
+ hello->kshm_version == __swab32(KSOCK_PROTO_V2))) {
+#if SOCKNAL_VERSION_DEBUG
+ if (*ksocknal_tunables.ksnd_protocol != 2)
+ return NULL;
+#endif
return &ksocknal_protocol_v2x;
+ }
if (hello->kshm_magic == le32_to_cpu(LNET_PROTO_TCP_MAGIC)) {
lnet_magicversion_t *hmv = (lnet_magicversion_t *)hello;
return; /* Do nothing */
}
-ksock_protocol_t ksocknal_protocol_v1x =
+ksock_proto_t ksocknal_protocol_v1x =
{
KSOCK_PROTO_V1,
ksocknal_send_hello_v1,
ksocknal_unpack_msg_v1
};
-ksock_protocol_t ksocknal_protocol_v2x =
+ksock_proto_t ksocknal_protocol_v2x =
{
KSOCK_PROTO_V2,
ksocknal_send_hello_v2,
ksock_hello_msg_t *hello, lnet_process_id_t *peerid,
__u64 *incarnation)
{
+ /* Return < 0 fatal error
+ * 0 success
+ * EALREADY lost connection race
+ * EPROTO protocol version mismatch
+ */
cfs_socket_t *sock = conn->ksnc_sock;
- int active;
+ int active = (conn->ksnc_proto != NULL);
int timeout;
- int match = 0;
+ int proto_match;
int rc;
- ksock_protocol_t *proto;
+ ksock_proto_t *proto;
lnet_process_id_t recv_id;
- active = (peerid->nid != LNET_NID_ANY);
+ /* socket type set on active connections - not set on passive */
+ LASSERT (!active == !(conn->ksnc_type != SOCKLND_CONN_NONE));
+
timeout = active ? *ksocknal_tunables.ksnd_timeout :
lnet_acceptor_timeout();
if (rc != 0) {
CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
rc, HIPQUAD(conn->ksnc_ipaddr));
- LASSERT (rc < 0 && rc != -EALREADY);
+ LASSERT (rc < 0);
return rc;
}
if (rc != 0) {
CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
rc, HIPQUAD(conn->ksnc_ipaddr));
- LASSERT (rc < 0 && rc != -EALREADY);
+ LASSERT (rc < 0);
return rc;
}
if (rc != 0) {
CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
rc, HIPQUAD(conn->ksnc_ipaddr));
- LASSERT (rc < 0 && rc != -EALREADY);
+ LASSERT (rc < 0);
return rc;
}
- proto = ksocknal_compat_protocol(hello);
+ proto = ksocknal_parse_proto_version(hello);
if (proto == NULL) {
if (!active) {
/* unknown protocol from peer, tell peer my protocol */
conn->ksnc_proto = &ksocknal_protocol_v2x;
+#if SOCKNAL_VERSION_DEBUG
+ if (*ksocknal_tunables.ksnd_protocol != 2)
+ conn->ksnc_proto = &ksocknal_protocol_v1x;
+#endif
hello->kshm_nips = 0;
ksocknal_send_hello(ni, conn, ni->ni_nid, hello);
}
return -EPROTO;
}
- if (conn->ksnc_proto == proto)
- match = 1;
-
+ proto_match = (conn->ksnc_proto == proto);
conn->ksnc_proto = proto;
/* receive the rest of hello message anyway */
if (rc != 0) {
CERROR("Error %d reading or checking hello from from %u.%u.%u.%u\n",
rc, HIPQUAD(conn->ksnc_ipaddr));
+ LASSERT (rc < 0);
return rc;
}
+ *incarnation = hello->kshm_src_incarnation;
+
if (hello->kshm_src_nid == LNET_NID_ANY) {
CERROR("Expecting a HELLO hdr with a NID, but got LNET_NID_ANY"
"from %u.%u.%u.%u\n", HIPQUAD(conn->ksnc_ipaddr));
}
- if (!active) { /* don't know peer's nid yet */
+ if (!active) {
*peerid = recv_id;
- } else if (peerid->pid != recv_id.pid ||
- !lnet_ptlcompat_matchnid(peerid->nid, recv_id.nid)) {
+
+ /* peer determines type */
+ conn->ksnc_type = ksocknal_invert_type(hello->kshm_ctype);
+ if (conn->ksnc_type == SOCKLND_CONN_NONE) {
+ CERROR ("Unexpected type %d from %s ip %u.%u.%u.%u\n",
+ hello->kshm_ctype, libcfs_id2str(*peerid),
+ HIPQUAD(conn->ksnc_ipaddr));
+ return -EPROTO;
+ }
+
+ return 0;
+ }
+
+ if (peerid->pid != recv_id.pid ||
+ !lnet_ptlcompat_matchnid(peerid->nid, recv_id.nid)) {
LCONSOLE_ERROR_MSG(0x130, "Connected successfully to %s on host"
" %u.%u.%u.%u, but they claimed they were "
"%s; please check your Lustre "
return -EPROTO;
}
- if (conn->ksnc_type == SOCKLND_CONN_NONE) {
- /* I've accepted this connection; peer determines type */
- conn->ksnc_type = ksocknal_invert_type(hello->kshm_ctype);
- if (conn->ksnc_type == SOCKLND_CONN_NONE) {
- CERROR ("Unexpected type %d from %s ip %u.%u.%u.%u\n",
- hello->kshm_ctype, libcfs_id2str(*peerid),
- HIPQUAD(conn->ksnc_ipaddr));
- return -EPROTO;
- }
- } else if (hello->kshm_ctype == SOCKLND_CONN_NONE) {
- if (match) {
- /* lost a connection race */
- return -EALREADY;
- }
- /* unmatched protocol get SOCKLND_CONN_NONE anyway */
- } else if (ksocknal_invert_type(hello->kshm_ctype) != conn->ksnc_type) {
+ if (hello->kshm_ctype == SOCKLND_CONN_NONE) {
+ /* Possible protocol mismatch or I lost the connection race */
+ return proto_match ? EALREADY : EPROTO;
+ }
+
+ if (ksocknal_invert_type(hello->kshm_ctype) != conn->ksnc_type) {
CERROR ("Mismatched types: me %d, %s ip %u.%u.%u.%u %d\n",
conn->ksnc_type, libcfs_id2str(*peerid),
HIPQUAD(conn->ksnc_ipaddr),
return -EPROTO;
}
- *incarnation = hello->kshm_src_incarnation;
-
return 0;
}
goto failed;
rc = ksocknal_create_conn(peer->ksnp_ni, route, sock, type);
-
if (rc < 0) {
lnet_connect_console_error(rc, peer->ksnp_id.nid,
route->ksnr_ipaddr,
goto failed;
}
- /* rc == EALREADY means I lost a connection race and my
- * peer is connecting to me.
- * rc == EPROTO means my peer is speaking an older
- * protocol version. */
- LASSERT (rc == 0 || rc == EALREADY || rc == EPROTO);
-
- retry_later = rc != 0;
+ /* A +ve RC means I have to retry because I lost the connection
+ * race or I have to renegotiate protocol version */
+ retry_later = (rc != 0);
if (retry_later)
CDEBUG(D_NET, "peer %s: conn race, retry later.\n",
libcfs_nid2str(peer->ksnp_id.nid));
int i = 0;
int j = 1;
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
- {
- .ctl_name = j++,
- .procname = "timeout",
- .data = ksocknal_tunables.ksnd_timeout,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
- {
- .ctl_name = j++,
- .procname = "credits",
- .data = ksocknal_tunables.ksnd_credits,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
- {
- .ctl_name = j++,
- .procname = "peer_credits",
- .data = ksocknal_tunables.ksnd_peercredits,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
- {
- .ctl_name = j++,
- .procname = "nconnds",
- .data = ksocknal_tunables.ksnd_nconnds,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
- {
- .ctl_name = j++,
- .procname = "min_reconnectms",
- .data = ksocknal_tunables.ksnd_min_reconnectms,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
- {
- .ctl_name = j++,
- .procname = "max_reconnectms",
- .data = ksocknal_tunables.ksnd_max_reconnectms,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
- {
- .ctl_name = j++,
- .procname = "eager_ack",
- .data = ksocknal_tunables.ksnd_eager_ack,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
- {
- .ctl_name = j++,
- .procname = "zero_copy",
- .data = ksocknal_tunables.ksnd_zc_min_frag,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
- {
- .ctl_name = j++,
- .procname = "typed",
- .data = ksocknal_tunables.ksnd_typed_conns,
- .maxlen = sizeof (int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
- {
- .ctl_name = j++,
- .procname = "min_bulk",
- .data = ksocknal_tunables.ksnd_min_bulk,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
- {
- .ctl_name = j++,
- .procname = "rx_buffer_size",
- .data = ksocknal_tunables.ksnd_rx_buffer_size,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
- {
- .ctl_name = j++,
- .procname = "tx_buffer_size",
- .data = ksocknal_tunables.ksnd_tx_buffer_size,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
- {
- .ctl_name = j++,
- .procname = "nagle",
- .data = ksocknal_tunables.ksnd_nagle,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
+ ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+ .ctl_name = j++,
+ .procname = "timeout",
+ .data = ksocknal_tunables.ksnd_timeout,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ };
+ ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+ .ctl_name = j++,
+ .procname = "credits",
+ .data = ksocknal_tunables.ksnd_credits,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ };
+ ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+ .ctl_name = j++,
+ .procname = "peer_credits",
+ .data = ksocknal_tunables.ksnd_peercredits,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ };
+ ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+ .ctl_name = j++,
+ .procname = "nconnds",
+ .data = ksocknal_tunables.ksnd_nconnds,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ };
+ ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+ .ctl_name = j++,
+ .procname = "min_reconnectms",
+ .data = ksocknal_tunables.ksnd_min_reconnectms,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ };
+ ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+ .ctl_name = j++,
+ .procname = "max_reconnectms",
+ .data = ksocknal_tunables.ksnd_max_reconnectms,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ };
+ ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+ .ctl_name = j++,
+ .procname = "eager_ack",
+ .data = ksocknal_tunables.ksnd_eager_ack,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ };
+ ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+ .ctl_name = j++,
+ .procname = "zero_copy",
+ .data = ksocknal_tunables.ksnd_zc_min_frag,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ };
+ ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+ .ctl_name = j++,
+ .procname = "typed",
+ .data = ksocknal_tunables.ksnd_typed_conns,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ };
+ ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+ .ctl_name = j++,
+ .procname = "min_bulk",
+ .data = ksocknal_tunables.ksnd_min_bulk,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ };
+ ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+ .ctl_name = j++,
+ .procname = "rx_buffer_size",
+ .data = ksocknal_tunables.ksnd_rx_buffer_size,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ };
+ ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+ .ctl_name = j++,
+ .procname = "tx_buffer_size",
+ .data = ksocknal_tunables.ksnd_tx_buffer_size,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ };
+ ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+ .ctl_name = j++,
+ .procname = "nagle",
+ .data = ksocknal_tunables.ksnd_nagle,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ };
#if CPU_AFFINITY
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
- {
- .ctl_name = j++,
- .procname = "irq_affinity",
- .data = ksocknal_tunables.ksnd_irq_affinity,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
+ ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+ .ctl_name = j++,
+ .procname = "irq_affinity",
+ .data = ksocknal_tunables.ksnd_irq_affinity,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ };
#endif
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
- {
- .ctl_name = j++,
- .procname = "keepalive_idle",
- .data = ksocknal_tunables.ksnd_keepalive_idle,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
- {
- .ctl_name = j++,
- .procname = "keepalive_count",
- .data = ksocknal_tunables.ksnd_keepalive_count,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
- {
- .ctl_name = j++,
- .procname = "keepalive_intvl",
- .data = ksocknal_tunables.ksnd_keepalive_intvl,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
+ ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+ .ctl_name = j++,
+ .procname = "keepalive_idle",
+ .data = ksocknal_tunables.ksnd_keepalive_idle,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ };
+ ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+ .ctl_name = j++,
+ .procname = "keepalive_count",
+ .data = ksocknal_tunables.ksnd_keepalive_count,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ };
+ ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+ .ctl_name = j++,
+ .procname = "keepalive_intvl",
+ .data = ksocknal_tunables.ksnd_keepalive_intvl,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ };
#ifdef SOCKNAL_BACKOFF
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
- {
- .ctl_name = j++,
- .procname = "backoff_init",
- .data = ksocknal_tunables.ksnd_backoff_init,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
- ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
- {
- .ctl_name = j++,
- .procname = "backoff_max",
- .data = ksocknal_tunables.ksnd_backoff_max,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- };
+ ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+ .ctl_name = j++,
+ .procname = "backoff_init",
+ .data = ksocknal_tunables.ksnd_backoff_init,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ };
+ ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+ .ctl_name = j++,
+ .procname = "backoff_max",
+ .data = ksocknal_tunables.ksnd_backoff_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ };
+#endif
+#if SOCKNAL_VERSION_DEBUG
+ ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+ .ctl_name = j++,
+ .procname = "protocol",
+ .data = ksocknal_tunables.ksnd_protocol,
+ .maxlin = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ };
#endif
-
LASSERT (j == i+1);
LASSERT (i < sizeof(ksocknal_ctl_table)/sizeof(ksocknal_ctl_table[0]));
if (mode) { /* transmission can continue ... */
+#error "This is out of date - we should be calling ksocknal_write_callback()"
conn->ksnc_tx_ready = 1;
if (tx) {
"seconds for maximum tcp backoff");
#endif
+#if SOCKNAL_VERSION_DEBUG
+static int protocol = 2;
+CFS_MODULE_PARM(protocol, "i", int, 0644,
+ "protocol version");
+#endif
+
ksock_tunables_t ksocknal_tunables = {
.ksnd_timeout = &sock_timeout,
.ksnd_credits = &credits,
.ksnd_backoff_init = &backoff_init,
.ksnd_backoff_max = &backoff_max,
#endif
+#if SOCKNAL_VERSION_DEBUG
+ .ksnd_protocol = &protocol,
+#endif
};