From d66fdcebffdfdc36962c93b33f6b511d5492058f Mon Sep 17 00:00:00 2001 From: eeb Date: Mon, 6 Aug 2007 11:46:18 +0000 Subject: [PATCH] * Landing 12014: ASSERTION failures when upgrading to the patchless zero-copy * fixed some initialiser struct formatting in socklnd_lib-linux.c --- lnet/ChangeLog | 8 + lnet/klnds/socklnd/socklnd.c | 291 ++++++++++++++-------------- lnet/klnds/socklnd/socklnd.h | 26 +-- lnet/klnds/socklnd/socklnd_cb.c | 105 ++++++----- lnet/klnds/socklnd/socklnd_lib-linux.c | 334 ++++++++++++++++----------------- lnet/klnds/socklnd/socklnd_lib-winnt.c | 1 + lnet/klnds/socklnd/socklnd_modparams.c | 9 + 7 files changed, 401 insertions(+), 373 deletions(-) diff --git a/lnet/ChangeLog b/lnet/ChangeLog index a28de69..5906642 100644 --- a/lnet/ChangeLog +++ b/lnet/ChangeLog @@ -27,6 +27,14 @@ ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x * bug fixes +Severity : major +Bugzilla : 12014 +Description: ASSERTION failures when upgrading to the patchless zero-copy + socklnd +Details : This bug affects "rolling upgrades", causing an inconsistent + protocol version negotiation and subsequent assertion failure + during rolling upgrades after the first wave of upgrades. + Severity : minor Bugzilla : 11223 Details : Change "dropped message" CERRORs to D_NETERROR so they are diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 7967bda..07bbf95 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -76,7 +76,6 @@ ksocknal_create_route (__u32 ipaddr, int port) route->ksnr_deleted = 0; route->ksnr_conn_count = 0; route->ksnr_share_count = 0; - route->ksnr_proto = &ksocknal_protocol_v2x; return (route); } @@ -114,6 +113,7 @@ ksocknal_create_peer (ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id) peer->ksnp_closing = 0; peer->ksnp_accepting = 0; peer->ksnp_zc_next_cookie = 1; + peer->ksnp_proto = NULL; CFS_INIT_LIST_HEAD (&peer->ksnp_conns); CFS_INIT_LIST_HEAD (&peer->ksnp_routes); CFS_INIT_LIST_HEAD (&peer->ksnp_tx_queue); @@ -985,6 +985,19 @@ ksocknal_accept (lnet_ni_t *ni, cfs_socket_t *sock) } int +ksocknal_connecting (ksock_peer_t *peer, __u32 ipaddr) +{ + ksock_route_t *route; + + list_for_each_entry (route, &peer->ksnp_routes, ksnr_list) { + + if (route->ksnr_ipaddr == ipaddr) + return route->ksnr_connecting; + } + return 0; +} + +int ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, cfs_socket_t *sock, int type) { @@ -1008,7 +1021,6 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, active = (route != NULL); LASSERT (active == (type != SOCKLND_CONN_NONE)); - LASSERT (route == NULL || route->ksnr_proto != NULL); irq = ksocknal_lib_sock_irq (sock); @@ -1028,7 +1040,6 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, atomic_set (&conn->ksnc_conn_refcount, 1); /* 1 ref for me */ conn->ksnc_zc_capable = ksocknal_lib_zc_capable(sock); - conn->ksnc_rx_ready = 0; conn->ksnc_rx_scheduled = 0; @@ -1056,12 +1067,24 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, * eagerly */ if (active) { - LASSERT(ni == route->ksnr_peer->ksnp_ni); + peer = route->ksnr_peer; + LASSERT(ni == peer->ksnp_ni); /* Active connection sends HELLO eagerly */ hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips); - peerid = route->ksnr_peer->ksnp_id; - conn->ksnc_proto = route->ksnr_proto; + peerid = peer->ksnp_id; + + write_lock_bh(global_lock); + conn->ksnc_proto = peer->ksnp_proto; + write_unlock_bh(global_lock); + + if (conn->ksnc_proto == NULL) { + conn->ksnc_proto = &ksocknal_protocol_v2x; +#if SOCKNAL_VERSION_DEBUG + if (*ksocknal_tunables.ksnd_protocol != 2) + conn->ksnc_proto = &ksocknal_protocol_v1x; +#endif + } rc = ksocknal_send_hello (ni, conn, peerid.nid, hello); if (rc != 0) @@ -1075,57 +1098,16 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, } rc = ksocknal_recv_hello (ni, conn, hello, &peerid, &incarnation); - if (rc < 0) { - if (rc == -EALREADY) { - /* only active connection loses conn race */ - LASSERT (active); - - CDEBUG(D_NET, "Lost connection race with %s\n", - libcfs_id2str(peerid)); - /* Not an actual failure: return +ve RC so active - * connector can back off */ - rc = EALREADY; - } + if (rc < 0) goto failed_1; - } - if (active && route->ksnr_proto != conn->ksnc_proto) { - /* Active connecting, and different protocol is returned */ - CDEBUG(D_NET, "Connecting by %d.x protocol is rejected," - " compatible version %d.x found.\n", - route->ksnr_proto->pro_version, - conn->ksnc_proto->pro_version); - /* Not an actual failure: return +ve RC so active - * connector can back off */ - rc = EPROTO; - - /* Retry with peer's protocol later */ - route->ksnr_proto = conn->ksnc_proto; - - goto failed_1; - } - + LASSERT (rc == 0 || active); + LASSERT (conn->ksnc_proto != NULL); LASSERT (peerid.nid != LNET_NID_ANY); if (active) { - peer = route->ksnr_peer; ksocknal_peer_addref(peer); - - /* additional routes after interface exchange? */ - ksocknal_create_routes(peer, conn->ksnc_port, - hello->kshm_ips, hello->kshm_nips); - - /* setup the socket AFTER I've received hello (it disables - * SO_LINGER). I might call back to the acceptor who may want - * to send a protocol version response and then close the - * socket; this ensures the socket only tears down after the - * response has been sent. */ - rc = ksocknal_lib_setup_sock(sock); - write_lock_bh (global_lock); - - if (rc != 0) - goto failed_2; } else { rc = ksocknal_create_peer(&peer, ni, peerid); if (rc != 0) @@ -1150,51 +1132,12 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, /* Am I already connecting to this guy? Resolve in * favour of higher NID... */ - rc = 0; - if (peerid.nid < ni->ni_nid) { - list_for_each(tmp, &peer->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, - ksnr_list); - - if (route->ksnr_ipaddr != conn->ksnc_ipaddr) - continue; - - if (route->ksnr_connecting) { - rc = EALREADY; /* not a failure */ - warn = "connection race"; - } - - break; - } - } - route = NULL; - - write_unlock_bh (global_lock); - - if (rc != 0) { - /* set CONN_NONE makes returned HELLO acknowledge I - * lost a connection race */ - conn->ksnc_type = SOCKLND_CONN_NONE; - hello->kshm_nips = 0; - ksocknal_send_hello(ni, conn, peerid.nid, hello); - } else { - hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips, - hello->kshm_nips); - rc = ksocknal_send_hello(ni, conn, peerid.nid, hello); - - /* Setup the socket (it disables SO_LINGER). I don't - * do it if I'm sending a negative response to ensure - * the response isn't discarded when I close the socket - * immediately after sending it. */ - if (rc == 0) - rc = ksocknal_lib_setup_sock(sock); - } - - write_lock_bh (global_lock); - peer->ksnp_accepting--; - - if (rc != 0) + if (peerid.nid < ni->ni_nid && + ksocknal_connecting(peer, conn->ksnc_ipaddr)) { + rc = EALREADY; + warn = "connection race resolution"; goto failed_2; + } } if (peer->ksnp_closing || @@ -1205,6 +1148,43 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, goto failed_2; } + if (peer->ksnp_proto == NULL) { + /* Never connected before. + * NB recv_hello may have returned EPROTO to signal my peer + * wants a different protocol than the one I asked for. + */ + LASSERT (list_empty(&peer->ksnp_conns)); + + peer->ksnp_proto = conn->ksnc_proto; + peer->ksnp_incarnation = incarnation; + } + + if (peer->ksnp_proto != conn->ksnc_proto || + peer->ksnp_incarnation != incarnation) { + /* Peer rebooted or I've got the wrong protocol version */ + ksocknal_close_peer_conns_locked(peer, 0, 0); + + peer->ksnp_proto = NULL; + rc = ESTALE; + warn = peer->ksnp_incarnation != incarnation ? + "peer rebooted" : + "wrong proto version"; + goto failed_2; + } + + switch (rc) { + default: + LBUG(); + case 0: + break; + case EALREADY: + warn = "lost conn race"; + goto failed_2; + case EPROTO: + warn = "retry with different protocol version"; + goto failed_2; + } + /* Refuse to duplicate an existing connection, unless this is a * loopback connection */ if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) { @@ -1213,11 +1193,15 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr || conn2->ksnc_myipaddr != conn->ksnc_myipaddr || - conn2->ksnc_type != conn->ksnc_type || - conn2->ksnc_incarnation != incarnation) + conn2->ksnc_type != conn->ksnc_type) continue; - rc = 0; /* more of a NOOP than a failure */ + /* Reply on a passive connection attempt so the peer + * realises we're connected. */ + LASSERT (rc == 0); + if (!active) + rc = EALREADY; + warn = "duplicate"; goto failed_2; } @@ -1249,7 +1233,6 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, } conn->ksnc_peer = peer; /* conn takes my ref on peer */ - conn->ksnc_incarnation = incarnation; peer->ksnp_last_alive = cfs_time_current(); peer->ksnp_error = 0; @@ -1267,9 +1250,6 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, ksocknal_new_packet(conn, 0); - /* NB my callbacks block while I hold ksnd_global_lock */ - ksocknal_lib_set_callback(sock, conn); - /* Take all the packets blocking for a connection. * NB, it might be nicer to share these blocked packets among any * other connections that are becoming established. */ @@ -1281,34 +1261,69 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, ksocknal_queue_tx_locked (tx, conn); } - rc = ksocknal_close_stale_conns_locked(peer, incarnation); write_unlock_bh (global_lock); - if (rc != 0) - CDEBUG(D_NET, "Closed %d stale conns to %s ip %d.%d.%d.%d\n", - rc, libcfs_id2str(conn->ksnc_peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr)); + /* We've now got a new connection. Any errors from here on are just + * like "normal" comms errors and we close the connection normally. + * NB (a) we still have to send the reply HELLO for passive + * connections, + * (b) normal I/O on the conn is blocked until I setup and call the + * socket callbacks. + */ ksocknal_lib_bind_irq (irq); - /* Call the callbacks right now to get things going. */ - if (ksocknal_connsock_addref(conn) == 0) { - ksocknal_read_callback(conn); - ksocknal_write_callback(conn); - ksocknal_connsock_decref(conn); - } - - CDEBUG(D_NET, "New conn %s %u.%u.%u.%u -> %u.%u.%u.%u/%d" + CDEBUG(D_NET, "New conn %s p %d.x %u.%u.%u.%u -> %u.%u.%u.%u/%d" " incarnation:"LPD64" sched[%d]/%d\n", - libcfs_id2str(peerid), HIPQUAD(conn->ksnc_myipaddr), - HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port, incarnation, + libcfs_id2str(peerid), conn->ksnc_proto->pro_version, + HIPQUAD(conn->ksnc_myipaddr), HIPQUAD(conn->ksnc_ipaddr), + conn->ksnc_port, incarnation, (int)(conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers), irq); + if (active) { + /* additional routes after interface exchange? */ + ksocknal_create_routes(peer, conn->ksnc_port, + hello->kshm_ips, hello->kshm_nips); + } else { + hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips, + hello->kshm_nips); + rc = ksocknal_send_hello(ni, conn, peerid.nid, hello); + } + LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t, kshm_ips[LNET_MAX_INTERFACES])); + /* setup the socket AFTER I've received hello (it disables + * SO_LINGER). I might call back to the acceptor who may want + * to send a protocol version response and then close the + * socket; this ensures the socket only tears down after the + * response has been sent. */ + if (rc == 0) + rc = ksocknal_lib_setup_sock(sock); + + write_lock_bh(global_lock); + + /* NB my callbacks block while I hold ksnd_global_lock */ + ksocknal_lib_set_callback(sock, conn); + + if (!active) + peer->ksnp_accepting--; + + write_unlock_bh(global_lock); + + if (rc != 0) { + write_lock_bh(global_lock); + ksocknal_close_conn_locked(conn, rc); + write_unlock_bh(global_lock); + } else if (ksocknal_connsock_addref(conn) == 0) { + /* Allow I/O to proceed. */ + ksocknal_read_callback(conn); + ksocknal_write_callback(conn); + ksocknal_connsock_decref(conn); + } + ksocknal_conn_decref(conn); - return (0); + return rc; failed_2: if (!peer->ksnp_closing && @@ -1330,6 +1345,20 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, libcfs_id2str(peerid), conn->ksnc_type, warn); } + if (!active) { + if (rc > 0) { + /* Request retry by replying with CONN_NONE + * ksnc_proto has been set already */ + conn->ksnc_type = SOCKLND_CONN_NONE; + hello->kshm_nips = 0; + ksocknal_send_hello(ni, conn, peerid.nid, hello); + } + + write_lock_bh(global_lock); + peer->ksnp_accepting--; + write_unlock_bh(global_lock); + } + ksocknal_txlist_done(ni, &zombies, 1); ksocknal_peer_decref(peer); @@ -1394,6 +1423,7 @@ ksocknal_close_conn_locked (ksock_conn_t *conn, int error) if (list_empty (&peer->ksnp_conns)) { /* No more connections to this peer */ + peer->ksnp_proto = NULL; /* renegotiate protocol version */ peer->ksnp_error = error; /* stash last conn close reason */ if (list_empty (&peer->ksnp_routes)) { @@ -1621,33 +1651,6 @@ ksocknal_close_peer_conns_locked (ksock_peer_t *peer, __u32 ipaddr, int why) } int -ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation) -{ - ksock_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { - conn = list_entry (ctmp, ksock_conn_t, ksnc_list); - - if (conn->ksnc_incarnation == incarnation) - continue; - - CDEBUG(D_NET, "Closing stale conn %s ip:%08x/%d " - "incarnation:"LPD64"("LPD64")\n", - libcfs_id2str(peer->ksnp_id), - conn->ksnc_ipaddr, conn->ksnc_port, - conn->ksnc_incarnation, incarnation); - - count++; - ksocknal_close_conn_locked (conn, -ESTALE); - } - - return (count); -} - -int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why) { ksock_peer_t *peer = conn->ksnc_peer; diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index efc35d3..825a2af 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -54,6 +54,8 @@ #define SOCKNAL_SINGLE_FRAG_TX 0 /* disable multi-fragment sends */ #define SOCKNAL_SINGLE_FRAG_RX 0 /* disable multi-fragment receives */ +#define SOCKNAL_VERSION_DEBUG 0 /* enable protocol version debugging */ + /* risk kmap deadlock on multi-frag I/O (backs off to single-frag if disabled). * no risk if we're not running on a CONFIG_HIGHMEM platform. */ #ifdef CONFIG_HIGHMEM @@ -115,6 +117,9 @@ typedef struct int *ksnd_backoff_init; /* initial TCP backoff */ int *ksnd_backoff_max; /* maximum TCP backoff */ #endif +#if SOCKNAL_VERSION_DEBUG + int *ksnd_protocol; /* protocol version */ +#endif #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM cfs_sysctl_table_header_t *ksnd_sysctl; /* sysctl interface */ #endif @@ -186,7 +191,7 @@ typedef struct struct ksock_conn; /* forward ref */ struct ksock_peer; /* forward ref */ struct ksock_route; /* forward ref */ -struct ksock_protocol; /* forward ref */ +struct ksock_proto; /* forward ref */ typedef struct /* transmit packet */ { @@ -251,7 +256,7 @@ typedef struct ksock_conn int ksnc_closing:1; /* being shut down */ int ksnc_flip:1; /* flip or not, only for V2.x */ int ksnc_zc_capable:1; /* enable to ZC */ - __u64 ksnc_incarnation; /* peer's incarnation */ + struct ksock_proto *ksnc_proto; /* protocol for the connection */ /* reader */ struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */ @@ -285,8 +290,6 @@ typedef struct ksock_conn atomic_t ksnc_tx_nob; /* # bytes queued */ int ksnc_tx_ready; /* write space */ int ksnc_tx_scheduled; /* being progressed */ - - struct ksock_protocol *ksnc_proto; /* protocol table for the connection */ #if !SOCKNAL_SINGLE_FRAG_RX struct iovec ksnc_rx_scratch_iov[LNET_MAX_IOV]; @@ -313,7 +316,6 @@ typedef struct ksock_route unsigned int ksnr_deleted:1; /* been removed from peer? */ unsigned int ksnr_share_count; /* created explicitly? */ int ksnr_conn_count; /* # conns established by this route */ - struct ksock_protocol *ksnr_proto ; /* protocol table for connecting */ } ksock_route_t; typedef struct ksock_peer @@ -326,6 +328,8 @@ typedef struct ksock_peer int ksnp_accepting; /* # passive connections pending */ int ksnp_error; /* errno on closing last conn */ __u64 ksnp_zc_next_cookie;/* ZC completion cookie */ + __u64 ksnp_incarnation; /* latest known peer incarnation */ + struct ksock_proto *ksnp_proto; /* latest known peer protocol */ struct list_head ksnp_conns; /* all active connections */ struct list_head ksnp_routes; /* routes */ struct list_head ksnp_tx_queue; /* waiting packets */ @@ -347,17 +351,17 @@ typedef struct ksock_connreq extern ksock_nal_data_t ksocknal_data; extern ksock_tunables_t ksocknal_tunables; -typedef struct ksock_protocol +typedef struct ksock_proto { int pro_version; /* version number of protocol */ int (*pro_send_hello)(ksock_conn_t *, ksock_hello_msg_t *); /* handshake function */ int (*pro_recv_hello)(ksock_conn_t *, ksock_hello_msg_t *, int);/* handshake function */ void (*pro_pack)(ksock_tx_t *); /* message pack */ void (*pro_unpack)(ksock_msg_t *); /* message unpack */ -} ksock_protocol_t; +} ksock_proto_t; -extern ksock_protocol_t ksocknal_protocol_v1x; -extern ksock_protocol_t ksocknal_protocol_v2x; +extern ksock_proto_t ksocknal_protocol_v1x; +extern ksock_proto_t ksocknal_protocol_v2x; #define KSOCK_PROTO_V1_MAJOR LNET_PROTO_TCP_VERSION_MAJOR #define KSOCK_PROTO_V1_MINOR LNET_PROTO_TCP_VERSION_MINOR @@ -497,7 +501,8 @@ extern int ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why); extern void ksocknal_terminate_conn (ksock_conn_t *conn); extern void ksocknal_destroy_conn (ksock_conn_t *conn); -extern int ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation); +extern int ksocknal_close_peer_conns_locked (ksock_peer_t *peer, + __u32 ipaddr, int why); extern int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why); extern int ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr); @@ -511,7 +516,6 @@ extern int ksocknal_new_packet (ksock_conn_t *conn, int skip); extern int ksocknal_scheduler (void *arg); extern int ksocknal_connd (void *arg); extern int ksocknal_reaper (void *arg); -extern ksock_protocol_t * ksocknal_compat_protocol(ksock_hello_msg_t *); extern int ksocknal_send_hello (lnet_ni_t *ni, ksock_conn_t *conn, lnet_nid_t peer_nid, ksock_hello_msg_t *hello); extern int ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index 507b719..71d08ae 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -613,7 +613,7 @@ ksocknal_find_conn_locked (int payload_nob, ksock_peer_t *peer) SOCK_WMEM_QUEUED(c->ksnc_sock); #endif LASSERT (!c->ksnc_closing); - LASSERT(c->ksnc_proto != NULL); + LASSERT (c->ksnc_proto != NULL); if (fallback == NULL || nob < fnob) { fallback = c; @@ -1739,14 +1739,19 @@ void ksocknal_write_callback (ksock_conn_t *conn) EXIT; } -ksock_protocol_t * -ksocknal_compat_protocol (ksock_hello_msg_t *hello) +ksock_proto_t * +ksocknal_parse_proto_version (ksock_hello_msg_t *hello) { if ((hello->kshm_magic == LNET_PROTO_MAGIC && hello->kshm_version == KSOCK_PROTO_V2) || (hello->kshm_magic == __swab32(LNET_PROTO_MAGIC) && - hello->kshm_version == __swab32(KSOCK_PROTO_V2))) + hello->kshm_version == __swab32(KSOCK_PROTO_V2))) { +#if SOCKNAL_VERSION_DEBUG + if (*ksocknal_tunables.ksnd_protocol != 2) + return NULL; +#endif return &ksocknal_protocol_v2x; + } if (hello->kshm_magic == le32_to_cpu(LNET_PROTO_TCP_MAGIC)) { lnet_magicversion_t *hmv = (lnet_magicversion_t *)hello; @@ -2071,7 +2076,7 @@ ksocknal_unpack_msg_v2(ksock_msg_t *msg) return; /* Do nothing */ } -ksock_protocol_t ksocknal_protocol_v1x = +ksock_proto_t ksocknal_protocol_v1x = { KSOCK_PROTO_V1, ksocknal_send_hello_v1, @@ -2080,7 +2085,7 @@ ksock_protocol_t ksocknal_protocol_v1x = ksocknal_unpack_msg_v1 }; -ksock_protocol_t ksocknal_protocol_v2x = +ksock_proto_t ksocknal_protocol_v2x = { KSOCK_PROTO_V2, ksocknal_send_hello_v2, @@ -2137,15 +2142,22 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, ksock_hello_msg_t *hello, lnet_process_id_t *peerid, __u64 *incarnation) { + /* Return < 0 fatal error + * 0 success + * EALREADY lost connection race + * EPROTO protocol version mismatch + */ cfs_socket_t *sock = conn->ksnc_sock; - int active; + int active = (conn->ksnc_proto != NULL); int timeout; - int match = 0; + int proto_match; int rc; - ksock_protocol_t *proto; + ksock_proto_t *proto; lnet_process_id_t recv_id; - active = (peerid->nid != LNET_NID_ANY); + /* socket type set on active connections - not set on passive */ + LASSERT (!active == !(conn->ksnc_type != SOCKLND_CONN_NONE)); + timeout = active ? *ksocknal_tunables.ksnd_timeout : lnet_acceptor_timeout(); @@ -2153,7 +2165,7 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, if (rc != 0) { CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n", rc, HIPQUAD(conn->ksnc_ipaddr)); - LASSERT (rc < 0 && rc != -EALREADY); + LASSERT (rc < 0); return rc; } @@ -2185,7 +2197,7 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, if (rc != 0) { CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n", rc, HIPQUAD(conn->ksnc_ipaddr)); - LASSERT (rc < 0 && rc != -EALREADY); + LASSERT (rc < 0); return rc; } @@ -2204,15 +2216,19 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, if (rc != 0) { CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n", rc, HIPQUAD(conn->ksnc_ipaddr)); - LASSERT (rc < 0 && rc != -EALREADY); + LASSERT (rc < 0); return rc; } - proto = ksocknal_compat_protocol(hello); + proto = ksocknal_parse_proto_version(hello); if (proto == NULL) { if (!active) { /* unknown protocol from peer, tell peer my protocol */ conn->ksnc_proto = &ksocknal_protocol_v2x; +#if SOCKNAL_VERSION_DEBUG + if (*ksocknal_tunables.ksnd_protocol != 2) + conn->ksnc_proto = &ksocknal_protocol_v1x; +#endif hello->kshm_nips = 0; ksocknal_send_hello(ni, conn, ni->ni_nid, hello); } @@ -2225,9 +2241,7 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, return -EPROTO; } - if (conn->ksnc_proto == proto) - match = 1; - + proto_match = (conn->ksnc_proto == proto); conn->ksnc_proto = proto; /* receive the rest of hello message anyway */ @@ -2235,9 +2249,12 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, if (rc != 0) { CERROR("Error %d reading or checking hello from from %u.%u.%u.%u\n", rc, HIPQUAD(conn->ksnc_ipaddr)); + LASSERT (rc < 0); return rc; } + *incarnation = hello->kshm_src_incarnation; + if (hello->kshm_src_nid == LNET_NID_ANY) { CERROR("Expecting a HELLO hdr with a NID, but got LNET_NID_ANY" "from %u.%u.%u.%u\n", HIPQUAD(conn->ksnc_ipaddr)); @@ -2259,10 +2276,23 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, } - if (!active) { /* don't know peer's nid yet */ + if (!active) { *peerid = recv_id; - } else if (peerid->pid != recv_id.pid || - !lnet_ptlcompat_matchnid(peerid->nid, recv_id.nid)) { + + /* peer determines type */ + conn->ksnc_type = ksocknal_invert_type(hello->kshm_ctype); + if (conn->ksnc_type == SOCKLND_CONN_NONE) { + CERROR ("Unexpected type %d from %s ip %u.%u.%u.%u\n", + hello->kshm_ctype, libcfs_id2str(*peerid), + HIPQUAD(conn->ksnc_ipaddr)); + return -EPROTO; + } + + return 0; + } + + if (peerid->pid != recv_id.pid || + !lnet_ptlcompat_matchnid(peerid->nid, recv_id.nid)) { LCONSOLE_ERROR_MSG(0x130, "Connected successfully to %s on host" " %u.%u.%u.%u, but they claimed they were " "%s; please check your Lustre " @@ -2273,22 +2303,12 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, return -EPROTO; } - if (conn->ksnc_type == SOCKLND_CONN_NONE) { - /* I've accepted this connection; peer determines type */ - conn->ksnc_type = ksocknal_invert_type(hello->kshm_ctype); - if (conn->ksnc_type == SOCKLND_CONN_NONE) { - CERROR ("Unexpected type %d from %s ip %u.%u.%u.%u\n", - hello->kshm_ctype, libcfs_id2str(*peerid), - HIPQUAD(conn->ksnc_ipaddr)); - return -EPROTO; - } - } else if (hello->kshm_ctype == SOCKLND_CONN_NONE) { - if (match) { - /* lost a connection race */ - return -EALREADY; - } - /* unmatched protocol get SOCKLND_CONN_NONE anyway */ - } else if (ksocknal_invert_type(hello->kshm_ctype) != conn->ksnc_type) { + if (hello->kshm_ctype == SOCKLND_CONN_NONE) { + /* Possible protocol mismatch or I lost the connection race */ + return proto_match ? EALREADY : EPROTO; + } + + if (ksocknal_invert_type(hello->kshm_ctype) != conn->ksnc_type) { CERROR ("Mismatched types: me %d, %s ip %u.%u.%u.%u %d\n", conn->ksnc_type, libcfs_id2str(*peerid), HIPQUAD(conn->ksnc_ipaddr), @@ -2296,8 +2316,6 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, return -EPROTO; } - *incarnation = hello->kshm_src_incarnation; - return 0; } @@ -2373,7 +2391,6 @@ ksocknal_connect (ksock_route_t *route) goto failed; rc = ksocknal_create_conn(peer->ksnp_ni, route, sock, type); - if (rc < 0) { lnet_connect_console_error(rc, peer->ksnp_id.nid, route->ksnr_ipaddr, @@ -2381,13 +2398,9 @@ ksocknal_connect (ksock_route_t *route) goto failed; } - /* rc == EALREADY means I lost a connection race and my - * peer is connecting to me. - * rc == EPROTO means my peer is speaking an older - * protocol version. */ - LASSERT (rc == 0 || rc == EALREADY || rc == EPROTO); - - retry_later = rc != 0; + /* A +ve RC means I have to retry because I lost the connection + * race or I have to renegotiate protocol version */ + retry_later = (rc != 0); if (retry_later) CDEBUG(D_NET, "peer %s: conn race, retry later.\n", libcfs_nid2str(peer->ksnp_id.nid)); diff --git a/lnet/klnds/socklnd/socklnd_lib-linux.c b/lnet/klnds/socklnd/socklnd_lib-linux.c index d4f0502..e702476 100644 --- a/lnet/klnds/socklnd/socklnd_lib-linux.c +++ b/lnet/klnds/socklnd/socklnd_lib-linux.c @@ -25,182 +25,172 @@ ksocknal_lib_tunables_init () int i = 0; int j = 1; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) - { - .ctl_name = j++, - .procname = "timeout", - .data = ksocknal_tunables.ksnd_timeout, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) - { - .ctl_name = j++, - .procname = "credits", - .data = ksocknal_tunables.ksnd_credits, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) - { - .ctl_name = j++, - .procname = "peer_credits", - .data = ksocknal_tunables.ksnd_peercredits, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) - { - .ctl_name = j++, - .procname = "nconnds", - .data = ksocknal_tunables.ksnd_nconnds, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) - { - .ctl_name = j++, - .procname = "min_reconnectms", - .data = ksocknal_tunables.ksnd_min_reconnectms, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) - { - .ctl_name = j++, - .procname = "max_reconnectms", - .data = ksocknal_tunables.ksnd_max_reconnectms, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) - { - .ctl_name = j++, - .procname = "eager_ack", - .data = ksocknal_tunables.ksnd_eager_ack, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) - { - .ctl_name = j++, - .procname = "zero_copy", - .data = ksocknal_tunables.ksnd_zc_min_frag, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) - { - .ctl_name = j++, - .procname = "typed", - .data = ksocknal_tunables.ksnd_typed_conns, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) - { - .ctl_name = j++, - .procname = "min_bulk", - .data = ksocknal_tunables.ksnd_min_bulk, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) - { - .ctl_name = j++, - .procname = "rx_buffer_size", - .data = ksocknal_tunables.ksnd_rx_buffer_size, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) - { - .ctl_name = j++, - .procname = "tx_buffer_size", - .data = ksocknal_tunables.ksnd_tx_buffer_size, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) - { - .ctl_name = j++, - .procname = "nagle", - .data = ksocknal_tunables.ksnd_nagle, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; + ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { + .ctl_name = j++, + .procname = "timeout", + .data = ksocknal_tunables.ksnd_timeout, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec + }; + ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { + .ctl_name = j++, + .procname = "credits", + .data = ksocknal_tunables.ksnd_credits, + .maxlen = sizeof (int), + .mode = 0444, + .proc_handler = &proc_dointvec + }; + ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { + .ctl_name = j++, + .procname = "peer_credits", + .data = ksocknal_tunables.ksnd_peercredits, + .maxlen = sizeof (int), + .mode = 0444, + .proc_handler = &proc_dointvec + }; + ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { + .ctl_name = j++, + .procname = "nconnds", + .data = ksocknal_tunables.ksnd_nconnds, + .maxlen = sizeof (int), + .mode = 0444, + .proc_handler = &proc_dointvec + }; + ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { + .ctl_name = j++, + .procname = "min_reconnectms", + .data = ksocknal_tunables.ksnd_min_reconnectms, + .maxlen = sizeof (int), + .mode = 0444, + .proc_handler = &proc_dointvec + }; + ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { + .ctl_name = j++, + .procname = "max_reconnectms", + .data = ksocknal_tunables.ksnd_max_reconnectms, + .maxlen = sizeof (int), + .mode = 0444, + .proc_handler = &proc_dointvec + }; + ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { + .ctl_name = j++, + .procname = "eager_ack", + .data = ksocknal_tunables.ksnd_eager_ack, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec + }; + ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { + .ctl_name = j++, + .procname = "zero_copy", + .data = ksocknal_tunables.ksnd_zc_min_frag, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec + }; + ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { + .ctl_name = j++, + .procname = "typed", + .data = ksocknal_tunables.ksnd_typed_conns, + .maxlen = sizeof (int), + .mode = 0444, + .proc_handler = &proc_dointvec + }; + ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { + .ctl_name = j++, + .procname = "min_bulk", + .data = ksocknal_tunables.ksnd_min_bulk, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec + }; + ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { + .ctl_name = j++, + .procname = "rx_buffer_size", + .data = ksocknal_tunables.ksnd_rx_buffer_size, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }; + ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { + .ctl_name = j++, + .procname = "tx_buffer_size", + .data = ksocknal_tunables.ksnd_tx_buffer_size, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }; + ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { + .ctl_name = j++, + .procname = "nagle", + .data = ksocknal_tunables.ksnd_nagle, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }; #if CPU_AFFINITY - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) - { - .ctl_name = j++, - .procname = "irq_affinity", - .data = ksocknal_tunables.ksnd_irq_affinity, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; + ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { + .ctl_name = j++, + .procname = "irq_affinity", + .data = ksocknal_tunables.ksnd_irq_affinity, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }; #endif - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) - { - .ctl_name = j++, - .procname = "keepalive_idle", - .data = ksocknal_tunables.ksnd_keepalive_idle, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) - { - .ctl_name = j++, - .procname = "keepalive_count", - .data = ksocknal_tunables.ksnd_keepalive_count, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) - { - .ctl_name = j++, - .procname = "keepalive_intvl", - .data = ksocknal_tunables.ksnd_keepalive_intvl, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; + ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { + .ctl_name = j++, + .procname = "keepalive_idle", + .data = ksocknal_tunables.ksnd_keepalive_idle, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }; + ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { + .ctl_name = j++, + .procname = "keepalive_count", + .data = ksocknal_tunables.ksnd_keepalive_count, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }; + ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { + .ctl_name = j++, + .procname = "keepalive_intvl", + .data = ksocknal_tunables.ksnd_keepalive_intvl, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }; #ifdef SOCKNAL_BACKOFF - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) - { - .ctl_name = j++, - .procname = "backoff_init", - .data = ksocknal_tunables.ksnd_backoff_init, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; - ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) - { - .ctl_name = j++, - .procname = "backoff_max", - .data = ksocknal_tunables.ksnd_backoff_max, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }; + ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { + .ctl_name = j++, + .procname = "backoff_init", + .data = ksocknal_tunables.ksnd_backoff_init, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }; + ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { + .ctl_name = j++, + .procname = "backoff_max", + .data = ksocknal_tunables.ksnd_backoff_max, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }; +#endif +#if SOCKNAL_VERSION_DEBUG + ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) { + .ctl_name = j++, + .procname = "protocol", + .data = ksocknal_tunables.ksnd_protocol, + .maxlin = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }; #endif - LASSERT (j == i+1); LASSERT (i < sizeof(ksocknal_ctl_table)/sizeof(ksocknal_ctl_table[0])); diff --git a/lnet/klnds/socklnd/socklnd_lib-winnt.c b/lnet/klnds/socklnd/socklnd_lib-winnt.c index 7669c77..c51129b 100755 --- a/lnet/klnds/socklnd/socklnd_lib-winnt.c +++ b/lnet/klnds/socklnd/socklnd_lib-winnt.c @@ -656,6 +656,7 @@ ksocknal_sched_conn (ksock_conn_t *conn, int mode, ksock_tx_t *tx) if (mode) { /* transmission can continue ... */ +#error "This is out of date - we should be calling ksocknal_write_callback()" conn->ksnc_tx_ready = 1; if (tx) { diff --git a/lnet/klnds/socklnd/socklnd_modparams.c b/lnet/klnds/socklnd/socklnd_modparams.c index 917d4d7..d9c9fc9 100644 --- a/lnet/klnds/socklnd/socklnd_modparams.c +++ b/lnet/klnds/socklnd/socklnd_modparams.c @@ -126,6 +126,12 @@ CFS_MODULE_PARM(backoff_max, "i", int, 0644, "seconds for maximum tcp backoff"); #endif +#if SOCKNAL_VERSION_DEBUG +static int protocol = 2; +CFS_MODULE_PARM(protocol, "i", int, 0644, + "protocol version"); +#endif + ksock_tunables_t ksocknal_tunables = { .ksnd_timeout = &sock_timeout, .ksnd_credits = &credits, @@ -152,5 +158,8 @@ ksock_tunables_t ksocknal_tunables = { .ksnd_backoff_init = &backoff_init, .ksnd_backoff_max = &backoff_max, #endif +#if SOCKNAL_VERSION_DEBUG + .ksnd_protocol = &protocol, +#endif }; -- 1.8.3.1