From 73cd68d6e298abd792a2507103933d412f1529fd Mon Sep 17 00:00:00 2001 From: eeb Date: Mon, 3 Nov 2003 16:48:18 +0000 Subject: [PATCH] * Stale socknal connections culled when a rebooted node reconnects (Bug 2062) * Removed all userspace HELLO handshake (== nid exchange) code. It's all done by 1 procedure in the kernel now. * Portals protocol version up: 0.3 --- lnet/include/lnet/lib-types.h | 8 +- lnet/include/lnet/socklnd.h | 1 + lnet/klnds/socklnd/socklnd.c | 71 ++++++++++++++-- lnet/klnds/socklnd/socklnd.h | 9 ++- lnet/klnds/socklnd/socklnd_cb.c | 73 ++++++++++++----- lnet/lnet/lib-move.c | 12 ++- lnet/utils/acceptor.c | 125 ++--------------------------- lnet/utils/portals.c | 100 +---------------------- lnet/utils/wirecheck.c | 5 ++ lustre/portals/include/portals/lib-types.h | 8 +- lustre/portals/include/portals/socknal.h | 1 + lustre/portals/knals/socknal/socknal.c | 71 ++++++++++++++-- lustre/portals/knals/socknal/socknal.h | 9 ++- lustre/portals/knals/socknal/socknal_cb.c | 73 ++++++++++++----- lustre/portals/portals/lib-move.c | 12 ++- lustre/portals/utils/acceptor.c | 125 ++--------------------------- lustre/portals/utils/portals.c | 100 +---------------------- lustre/portals/utils/wirecheck.c | 5 ++ 18 files changed, 310 insertions(+), 498 deletions(-) diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 047628b..30e56af 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -84,6 +84,11 @@ typedef struct ptl_reply { ptl_handle_wire_t dst_wmd; } WIRE_ATTR ptl_reply_t; +typedef struct ptl_hello { + __u64 incarnation; + __u32 type; +} WIRE_ATTR ptl_hello_t; + typedef struct { ptl_nid_t dest_nid; ptl_nid_t src_nid; @@ -97,6 +102,7 @@ typedef struct { ptl_put_t put; ptl_get_t get; ptl_reply_t reply; + ptl_hello_t hello; } msg; } WIRE_ATTR ptl_hdr_t; @@ -119,7 +125,7 @@ typedef struct { #define PORTALS_PROTO_MAGIC 0xeebc0ded #define PORTALS_PROTO_VERSION_MAJOR 0 -#define PORTALS_PROTO_VERSION_MINOR 2 +#define PORTALS_PROTO_VERSION_MINOR 3 typedef struct { long recv_count, recv_length, send_count, send_length, drop_count, diff --git a/lnet/include/lnet/socklnd.h b/lnet/include/lnet/socklnd.h index 6d75e5f..27e6f8e 100644 --- a/lnet/include/lnet/socklnd.h +++ b/lnet/include/lnet/socklnd.h @@ -6,6 +6,7 @@ * #defines shared between socknal implementation and utilities */ +#define SOCKNAL_CONN_NONE (-1) #define SOCKNAL_CONN_ANY 0 #define SOCKNAL_CONN_CONTROL 1 #define SOCKNAL_CONN_BULK_IN 2 diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index bb8e247..da47785 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -680,9 +680,11 @@ ksocknal_choose_scheduler_locked (unsigned int irq) } int -ksocknal_create_conn (ptl_nid_t nid, ksock_route_t *route, - struct socket *sock, int bind_irq, int type) +ksocknal_create_conn (ksock_route_t *route, struct socket *sock, + int bind_irq, int type) { + ptl_nid_t nid; + __u64 incarnation; unsigned long flags; ksock_conn_t *conn; ksock_peer_t *peer; @@ -703,6 +705,19 @@ ksocknal_create_conn (ptl_nid_t nid, ksock_route_t *route, if (rc != 0) return (rc); + if (route == NULL) { + /* acceptor or explicit connect */ + nid = PTL_NID_ANY; + } else { + LASSERT (type != SOCKNAL_CONN_NONE); + /* autoconnect: expect this nid on exchange */ + nid = route->ksnr_peer->ksnp_nid; + } + + rc = ksocknal_hello (sock, &nid, &type, &incarnation); + if (rc != 0) + return (rc); + peer = NULL; if (route == NULL) { /* not autoconnect */ /* Assume this socket connects to a brand new peer */ @@ -723,6 +738,7 @@ ksocknal_create_conn (ptl_nid_t nid, ksock_route_t *route, conn->ksnc_route = NULL; conn->ksnc_sock = sock; conn->ksnc_type = type; + conn->ksnc_incarnation = incarnation; conn->ksnc_saved_data_ready = sock->sk->sk_data_ready; conn->ksnc_saved_write_space = sock->sk->sk_write_space; atomic_set (&conn->ksnc_refcount, 1); /* 1 ref for me */ @@ -815,8 +831,13 @@ ksocknal_create_conn (ptl_nid_t nid, ksock_route_t *route, ksocknal_queue_tx_locked (tx, conn); } + rc = ksocknal_close_stale_conns_locked (peer, incarnation); + write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); + if (rc != 0) + CERROR ("Closed %d stale conns to "LPX64"\n", rc, nid); + if (bind_irq) /* irq binding required */ ksocknal_bind_irq (irq); @@ -1031,6 +1052,27 @@ ksocknal_close_peer_conns_locked (ksock_peer_t *peer, __u32 ipaddr, int why) } int +ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation) +{ + ksock_conn_t *conn; + struct list_head *ctmp; + struct list_head *cnxt; + int count = 0; + + list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { + conn = list_entry (ctmp, ksock_conn_t, ksnc_list); + + if (conn->ksnc_incarnation == incarnation) + continue; + + count++; + ksocknal_close_conn_locked (conn, -ESTALE); + } + + return (count); +} + +int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why) { ksock_peer_t *peer = conn->ksnc_peer; @@ -1307,12 +1349,12 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private) break; switch (type) { + case SOCKNAL_CONN_NONE: case SOCKNAL_CONN_ANY: case SOCKNAL_CONN_CONTROL: case SOCKNAL_CONN_BULK_IN: case SOCKNAL_CONN_BULK_OUT: - rc = ksocknal_create_conn(pcfg->pcfg_nid, NULL, sock, - pcfg->pcfg_flags, type); + rc = ksocknal_create_conn(NULL, sock, pcfg->pcfg_flags, type); default: break; } @@ -1373,7 +1415,7 @@ ksocknal_free_buffers (void) ksocknal_data.ksnd_peer_hash_size); } -void /*__exit*/ +void ksocknal_module_fini (void) { int i; @@ -1457,6 +1499,22 @@ ksocknal_module_fini (void) } +void __init +ksocknal_init_incarnation (void) +{ + struct timeval tv; + + /* The incarnation number is the time this module loaded and it + * identifies this particular instance of the socknal. Hopefully + * we won't be able to reboot more frequently than 1MHz for the + * forseeable future :) */ + + do_gettimeofday(&tv); + + ksocknal_data.ksnd_incarnation = + (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; +} + int __init ksocknal_module_init (void) { @@ -1494,7 +1552,8 @@ ksocknal_module_init (void) #if SOCKNAL_ZC ksocknal_data.ksnd_zc_min_frag = SOCKNAL_ZC_MIN_FRAG; #endif - + ksocknal_init_incarnation(); + ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE; PORTAL_ALLOC (ksocknal_data.ksnd_peers, sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size); diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index 7bfc009..8c906e2 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -152,6 +152,7 @@ typedef struct { unsigned int ksnd_zc_min_frag; /* minimum zero copy frag size */ #endif struct ctl_table_header *ksnd_sysctl; /* sysctl interface */ + __u64 ksnd_incarnation; /* my epoch */ rwlock_t ksnd_global_lock; /* stabilize peer/conn ops */ struct list_head *ksnd_peers; /* hash table of all my known peers */ @@ -307,8 +308,9 @@ typedef struct ksock_conn int ksnc_port; /* peer's port */ int ksnc_closing; /* being shut down */ int ksnc_type; /* type of connection */ + __u64 ksnc_incarnation; /* peer's incarnation */ - /* READER */ + /* reader */ struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */ unsigned long ksnc_rx_deadline; /* when (in jiffies) receive times out */ int ksnc_rx_started; /* started receiving a message */ @@ -411,12 +413,13 @@ extern ksock_peer_t *ksocknal_find_peer_locked (ptl_nid_t nid); extern ksock_peer_t *ksocknal_get_peer (ptl_nid_t nid); extern int ksocknal_del_route (ptl_nid_t nid, __u32 ipaddr, int single, int keep_conn); -extern int ksocknal_create_conn (ptl_nid_t nid, ksock_route_t *route, +extern int ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int bind_irq, int type); extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why); extern void ksocknal_terminate_conn (ksock_conn_t *conn); extern void ksocknal_destroy_conn (ksock_conn_t *conn); extern void ksocknal_put_conn (ksock_conn_t *conn); +extern int ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation); extern int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why); extern int ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr); @@ -433,3 +436,5 @@ extern void ksocknal_write_space(struct sock *sk); extern int ksocknal_autoconnectd (void *arg); extern int ksocknal_reaper (void *arg); extern int ksocknal_setup_sock (struct socket *sock); +extern int ksocknal_hello (struct socket *sock, + ptl_nid_t *nid, int *type, __u64 *incarnation); diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index dde434a..346d60e 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -1995,7 +1995,7 @@ ksocknal_sock_read (struct socket *sock, void *buffer, int nob) } int -ksocknal_exchange_nids (struct socket *sock, ptl_nid_t nid, int type) +ksocknal_hello (struct socket *sock, ptl_nid_t *nid, int *type, __u64 *incarnation) { int rc; ptl_hdr_t hdr; @@ -2011,25 +2011,27 @@ ksocknal_exchange_nids (struct socket *sock, ptl_nid_t nid, int type) hdr.src_nid = __cpu_to_le64 (ksocknal_lib.ni.nid); hdr.type = __cpu_to_le32 (PTL_MSG_HELLO); - *(__u32 *)&hdr.msg = __cpu_to_le32 (type); - + hdr.msg.hello.type = __cpu_to_le32 (*type); + hdr.msg.hello.incarnation = + __cpu_to_le64 (ksocknal_data.ksnd_incarnation); + /* Assume sufficient socket buffering for this message */ rc = ksocknal_sock_write (sock, &hdr, sizeof (hdr)); if (rc != 0) { - CERROR ("Error %d sending HELLO to "LPX64"\n", rc, nid); + CERROR ("Error %d sending HELLO to "LPX64"\n", rc, *nid); return (rc); } rc = ksocknal_sock_read (sock, hmv, sizeof (*hmv)); if (rc != 0) { - CERROR ("Error %d reading HELLO from "LPX64"\n", rc, nid); + CERROR ("Error %d reading HELLO from "LPX64"\n", rc, *nid); return (rc); } if (hmv->magic != __le32_to_cpu (PORTALS_PROTO_MAGIC)) { CERROR ("Bad magic %#08x (%#08x expected) from "LPX64"\n", - __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC, nid); - return (-EINVAL); + __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC, *nid); + return (-EPROTO); } if (hmv->version_major != __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) || @@ -2040,8 +2042,8 @@ ksocknal_exchange_nids (struct socket *sock, ptl_nid_t nid, int type) __le16_to_cpu (hmv->version_minor), PORTALS_PROTO_VERSION_MAJOR, PORTALS_PROTO_VERSION_MINOR, - nid); - return (-EINVAL); + *nid); + return (-EPROTO); } #if (PORTALS_PROTO_VERSION_MAJOR != 0) @@ -2053,7 +2055,7 @@ ksocknal_exchange_nids (struct socket *sock, ptl_nid_t nid, int type) rc = ksocknal_sock_read (sock, hmv + 1, sizeof (hdr) - sizeof (*hmv)); if (rc != 0) { CERROR ("Error %d reading rest of HELLO hdr from "LPX64"\n", - rc, nid); + rc, *nid); return (rc); } @@ -2063,16 +2065,48 @@ ksocknal_exchange_nids (struct socket *sock, ptl_nid_t nid, int type) CERROR ("Expecting a HELLO hdr with 0 payload," " but got type %d with %d payload from "LPX64"\n", __le32_to_cpu (hdr.type), - __le32_to_cpu (hdr.payload_length), nid); - return (-EINVAL); + __le32_to_cpu (hdr.payload_length), *nid); + return (-EPROTO); } - - if (__le64_to_cpu (hdr.src_nid) != nid) { + + if (__le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) { + CERROR("Expecting a HELLO hdr with a NID, but got PTL_NID_ANY\n"); + return (-EPROTO); + } + + if (*nid == PTL_NID_ANY) { /* don't know peer's nid yet */ + *nid = __le64_to_cpu(hdr.src_nid); + } else if (*nid != __le64_to_cpu (hdr.src_nid)) { CERROR ("Connected to nid "LPX64", but expecting "LPX64"\n", - __le64_to_cpu (hdr.src_nid), nid); - return (-EINVAL); + __le64_to_cpu (hdr.src_nid), *nid); + return (-EPROTO); } + if (*type == SOCKNAL_CONN_NONE) { + /* I've accepted this connection; peer determines type */ + *type = __le32_to_cpu(hdr.msg.hello.type); + switch (*type) { + case SOCKNAL_CONN_ANY: + case SOCKNAL_CONN_CONTROL: + break; + case SOCKNAL_CONN_BULK_IN: + *type = SOCKNAL_CONN_BULK_OUT; + break; + case SOCKNAL_CONN_BULK_OUT: + *type = SOCKNAL_CONN_BULK_IN; + break; + default: + CERROR ("Unexpected type %d from "LPX64"\n", *type, *nid); + return (-EPROTO); + } + } else if (__le32_to_cpu(hdr.msg.hello.type) != SOCKNAL_CONN_NONE) { + CERROR ("Mismatched types: me %d "LPX64" %d\n", + *type, *nid, __le32_to_cpu(hdr.msg.hello.type)); + return (-EPROTO); + } + + *incarnation = __le64_to_cpu(hdr.msg.hello.incarnation); + return (0); } @@ -2262,12 +2296,7 @@ ksocknal_connect_peer (ksock_route_t *route, int type) goto out; } - rc = ksocknal_exchange_nids (sock, route->ksnr_peer->ksnp_nid, type); - if (rc != 0) - goto out; - - rc = ksocknal_create_conn (route->ksnr_peer->ksnp_nid, route, sock, - route->ksnr_irq_affinity, type); + rc = ksocknal_create_conn (route, sock, route->ksnr_irq_affinity, type); if (rc == 0) { /* Take an extra ref on sock->file to compensate for the * upcoming close which will lose fd's ref on it. */ diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index 491bb87..6e904ba 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -1347,14 +1347,14 @@ int do_PtlGet(nal_cb_t * nal, void *private, void *v_args, void *v_ret) void lib_assert_wire_constants (void) { /* Wire protocol assertions generated by 'wirecheck' - * running on Linux mdev2 2.4.18-p4smp-15llp #1 SMP Wed Oct 8 11:01:07 PDT 2003 i686 unknown - * with gcc version 2.96 20000731 (Red Hat Linux 7.3 2.96-113) */ + * running on Linux robert.bartonsoftware.com 2.4.20-18.9 #1 Thu May 29 06:54:41 EDT 2003 i68 + * with gcc version 3.2.2 20030222 (Red Hat Linux 3.2.2-5) */ /* Constants... */ LASSERT (PORTALS_PROTO_MAGIC == 0xeebc0ded); LASSERT (PORTALS_PROTO_VERSION_MAJOR == 0); - LASSERT (PORTALS_PROTO_VERSION_MINOR == 2); + LASSERT (PORTALS_PROTO_VERSION_MINOR == 3); LASSERT (PTL_MSG_ACK == 0); LASSERT (PTL_MSG_PUT == 1); LASSERT (PTL_MSG_GET == 2); @@ -1429,4 +1429,10 @@ void lib_assert_wire_constants (void) /* Reply */ LASSERT (offsetof(ptl_hdr_t, msg.reply.dst_wmd) == 32); LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.reply.dst_wmd) == 16); + + /* Hello */ + LASSERT (offsetof(ptl_hdr_t, msg.hello.incarnation) == 32); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.hello.incarnation) == 8); + LASSERT (offsetof(ptl_hdr_t, msg.hello.type) == 40); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.hello.type) == 4); } diff --git a/lnet/utils/acceptor.c b/lnet/utils/acceptor.c index 54a57d4..626a2eb 100644 --- a/lnet/utils/acceptor.c +++ b/lnet/utils/acceptor.c @@ -101,7 +101,7 @@ parse_size (int *sizep, char *str) } void -show_connection (int fd, __u32 net_ip, ptl_nid_t nid, int type) +show_connection (int fd, __u32 net_ip) { struct hostent *h = gethostbyaddr ((char *)&net_ip, sizeof net_ip, AF_INET); __u32 host_ip = ntohl (net_ip); @@ -129,12 +129,8 @@ show_connection (int fd, __u32 net_ip, ptl_nid_t nid, int type) else snprintf (host, sizeof(host), "%s", h->h_name); - syslog (LOG_INFO, "Accepted host: %s NID: "LPX64" snd: %d rcv %d nagle: %s type %s\n", - host, nid, txmem, rxmem, nonagle ? "disabled" : "enabled", - (type == SOCKNAL_CONN_ANY) ? "A" : - (type == SOCKNAL_CONN_CONTROL) ? "C" : - (type == SOCKNAL_CONN_BULK_IN) ? "I" : - (type == SOCKNAL_CONN_BULK_OUT) ? "O" : "?"); + syslog (LOG_INFO, "Accepted host: %s snd: %d rcv %d nagle: %s\n", + host, txmem, rxmem, nonagle ? "disabled" : "enabled"); } int @@ -193,96 +189,6 @@ sock_read (int cfd, void *buffer, int nob) return (0); } -int -exchange_nids (int cfd, ptl_nid_t my_nid, ptl_nid_t *peer_nid, int *type) -{ - int rc; - int t; - ptl_hdr_t hdr; - ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid; - - LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); - - memset (&hdr, 0, sizeof (hdr)); - - hmv->magic = __cpu_to_le32 (PORTALS_PROTO_MAGIC); - hmv->version_major = __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR); - hmv->version_minor = __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR); - - hdr.src_nid = __cpu_to_le64 (my_nid); - hdr.type = __cpu_to_le32 (PTL_MSG_HELLO); - - /* Assume there's sufficient socket buffering for a portals HELLO header */ - rc = sock_write (cfd, &hdr, sizeof (hdr)); - if (rc != 0) { - perror ("Can't send initial HELLO"); - return (-1); - } - - /* First few bytes down the wire are the portals protocol magic and - * version, no matter what protocol version we're running. */ - - rc = sock_read (cfd, hmv, sizeof (*hmv)); - if (rc != 0) { - perror ("Can't read from peer"); - return (-1); - } - - if (__cpu_to_le32 (hmv->magic) != PORTALS_PROTO_MAGIC) { - fprintf (stderr, "Bad magic %#08x (%#08x expected)\n", - __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC); - return (-1); - } - - if (__cpu_to_le16 (hmv->version_major) != PORTALS_PROTO_VERSION_MAJOR || - __cpu_to_le16 (hmv->version_minor) != PORTALS_PROTO_VERSION_MINOR) { - fprintf (stderr, "Incompatible protocol version %d.%d (%d.%d expected)\n", - __cpu_to_le16 (hmv->version_major), - __cpu_to_le16 (hmv->version_minor), - PORTALS_PROTO_VERSION_MAJOR, - PORTALS_PROTO_VERSION_MINOR); - } - - /* version 0 sends magic/version as the dest_nid of a 'hello' header, - * so read the rest of it in now... */ - LASSERT (PORTALS_PROTO_VERSION_MAJOR == 0); - rc = sock_read (cfd, hmv + 1, sizeof (hdr) - sizeof (*hmv)); - if (rc != 0) { - perror ("Can't read rest of HELLO hdr"); - return (-1); - } - - /* ...and check we got what we expected */ - if (__cpu_to_le32 (hdr.type) != PTL_MSG_HELLO || - __cpu_to_le32 (hdr.payload_length) != 0) { - fprintf (stderr, "Expecting a HELLO hdr with 0 payload," - " but got type %d with %d payload\n", - __cpu_to_le32 (hdr.type), - __cpu_to_le32 (hdr.payload_length)); - return (-1); - } - - *peer_nid = __le64_to_cpu (hdr.src_nid); - - t = __le32_to_cpu (*(__u32 *)&hdr.msg); - switch (t) { /* swap sense of connection type */ - case SOCKNAL_CONN_CONTROL: - break; - case SOCKNAL_CONN_BULK_IN: - t = SOCKNAL_CONN_BULK_OUT; - break; - case SOCKNAL_CONN_BULK_OUT: - t = SOCKNAL_CONN_BULK_IN; - break; - default: - t = SOCKNAL_CONN_ANY; - break; - } - *type = t; - - return (0); -} - void usage (char *myname) { @@ -301,7 +207,6 @@ int main(int argc, char **argv) int nonagle = 1; int nal = SOCKNAL; int bind_irq = 0; - int type = 0; while ((c = getopt (argc, argv, "N:r:s:nli")) != -1) switch (c) @@ -429,7 +334,6 @@ int main(int argc, char **argv) int cfd; struct portal_ioctl_data data; struct portals_cfg pcfg; - ptl_nid_t peer_nid; cfd = accept(fd, (struct sockaddr *)&clntaddr, &len); if ( cfd < 0 ) { @@ -438,37 +342,20 @@ int main(int argc, char **argv) continue; } - PORTAL_IOC_INIT (data); - data.ioc_nal = nal; - rc = ioctl (pfd, IOC_PORTAL_GET_NID, &data); - if (rc < 0) { - perror ("Can't get my NID"); - close (cfd); - continue; - } - - rc = exchange_nids (cfd, data.ioc_nid, &peer_nid, &type); - if (rc != 0) { - close (cfd); - continue; - } - - show_connection (cfd, clntaddr.sin_addr.s_addr, peer_nid, type); + show_connection (cfd, clntaddr.sin_addr.s_addr); PCFG_INIT(pcfg, NAL_CMD_REGISTER_PEER_FD); pcfg.pcfg_nal = nal; pcfg.pcfg_fd = cfd; - pcfg.pcfg_nid = peer_nid; pcfg.pcfg_flags = bind_irq; - pcfg.pcfg_misc = type; - + pcfg.pcfg_misc = SOCKNAL_CONN_NONE; /* == incoming connection */ + PORTAL_IOC_INIT(data); data.ioc_pbuf1 = (char*)&pcfg; data.ioc_plen1 = sizeof(pcfg); if (ioctl(pfd, IOC_PORTAL_NAL_CMD, &data) < 0) { perror("ioctl failed"); - } else { printf("client registered\n"); } diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c index 270a840..af34cba 100644 --- a/lnet/utils/portals.c +++ b/lnet/utils/portals.c @@ -628,84 +628,8 @@ jt_ptl_print_connections (int argc, char **argv) return 0; } -int -exchange_nids (int cfd, ptl_nid_t my_nid, int type, ptl_nid_t *peer_nid) -{ - int rc; - ptl_hdr_t hdr; - ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid; - - LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); - - memset (&hdr, 0, sizeof (hdr)); - - hmv->magic = __cpu_to_le32 (PORTALS_PROTO_MAGIC); - hmv->version_major = __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR); - hmv->version_minor = __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR); - - hdr.src_nid = __cpu_to_le64 (my_nid); - hdr.type = __cpu_to_le32 (PTL_MSG_HELLO); - - *(__u32 *)&hdr.msg = __cpu_to_le32(type); - - /* Assume there's sufficient socket buffering for a portals HELLO header */ - rc = sock_write (cfd, &hdr, sizeof (hdr)); - if (rc != 0) { - perror ("Can't send initial HELLO"); - return (-1); - } - - /* First few bytes down the wire are the portals protocol magic and - * version, no matter what protocol version we're running. */ - - rc = sock_read (cfd, hmv, sizeof (*hmv)); - if (rc != 0) { - perror ("Can't read from peer"); - return (-1); - } - - if (hmv->magic != __cpu_to_le32 (PORTALS_PROTO_MAGIC)) { - fprintf (stderr, "Bad magic %#08x (%#08x expected)\n", - __le32_to_cpu (hmv->magic), PORTALS_PROTO_MAGIC); - return (-1); - } - - if (hmv->version_major != __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) || - hmv->version_minor != __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR)) { - fprintf (stderr, "Incompatible protocol version %d.%d (%d.%d expected)\n", - __le16_to_cpu (hmv->version_major), - __le16_to_cpu (hmv->version_minor), - PORTALS_PROTO_VERSION_MAJOR, - PORTALS_PROTO_VERSION_MINOR); - } - - /* version 0 sends magic/version as the dest_nid of a 'hello' header, - * so read the rest of it in now... */ - LASSERT (PORTALS_PROTO_VERSION_MAJOR == 0); - rc = sock_read (cfd, hmv + 1, sizeof (hdr) - sizeof (*hmv)); - if (rc != 0) { - perror ("Can't read rest of HELLO hdr"); - return (-1); - } - - /* ...and check we got what we expected */ - if (hdr.type != __cpu_to_le32 (PTL_MSG_HELLO) || - hdr.payload_length != __cpu_to_le32 (0)) { - fprintf (stderr, "Expecting a HELLO hdr with 0 payload," - " but got type %d with %d payload\n", - __le32_to_cpu (hdr.type), - __le32_to_cpu (hdr.payload_length)); - return (-1); - } - - *peer_nid = __le64_to_cpu (hdr.src_nid); - return (0); -} - int jt_ptl_connect(int argc, char **argv) { - ptl_nid_t peer_nid; - struct portal_ioctl_data data; struct portals_cfg pcfg; struct sockaddr_in srvaddr; __u32 ipaddr; @@ -829,32 +753,16 @@ int jt_ptl_connect(int argc, char **argv) if (getsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &nonagle, &olen) != 0) fprintf (stderr, "Can't get nagle: %s\n", strerror (errno)); - PORTAL_IOC_INIT (data); - data.ioc_nal = g_nal; - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_NID, &data); - if (rc != 0) { - fprintf (stderr, "failed to get my nid: %s\n", - strerror (errno)); - close (fd); - return (-1); - } - - rc = exchange_nids (fd, data.ioc_nid, type, &peer_nid); - if (rc != 0) { - close (fd); - return (-1); - } - - printf("Connected host: %s NID "LPX64" snd: %d rcv: %d nagle: %s type: %s\n", - argv[1], peer_nid, txmem, rxmem, nonagle ? "Disabled" : "Enabled", + printf("Connected host: %s snd: %d rcv: %d nagle: %s type: %s\n", + argv[1], txmem, rxmem, nonagle ? "Disabled" : "Enabled", (type == SOCKNAL_CONN_ANY) ? "A" : (type == SOCKNAL_CONN_CONTROL) ? "C" : (type == SOCKNAL_CONN_BULK_IN) ? "I" : (type == SOCKNAL_CONN_BULK_OUT) ? "O" : "?"); PCFG_INIT(pcfg, NAL_CMD_REGISTER_PEER_FD); + pcfg.pcfg_nal = g_nal; pcfg.pcfg_fd = fd; - pcfg.pcfg_nid = peer_nid; pcfg.pcfg_flags = bind_irq; pcfg.pcfg_misc = type; @@ -866,7 +774,7 @@ int jt_ptl_connect(int argc, char **argv) return -1; } - printf("Connection to "LPX64" registered with socknal\n", peer_nid); + printf("Connection to %s registered with socknal\n", argv[1]); rc = close(fd); if (rc) diff --git a/lnet/utils/wirecheck.c b/lnet/utils/wirecheck.c index 21b3dda..77ad126 100644 --- a/lnet/utils/wirecheck.c +++ b/lnet/utils/wirecheck.c @@ -109,6 +109,11 @@ check_ptl_hdr (void) BLANK_LINE (); COMMENT ("Reply"); CHECK_MEMBER (ptl_hdr_t, msg.reply.dst_wmd); + + BLANK_LINE (); + COMMENT ("Hello"); + CHECK_MEMBER (ptl_hdr_t, msg.hello.incarnation); + CHECK_MEMBER (ptl_hdr_t, msg.hello.type); } void diff --git a/lustre/portals/include/portals/lib-types.h b/lustre/portals/include/portals/lib-types.h index 047628b..30e56af 100644 --- a/lustre/portals/include/portals/lib-types.h +++ b/lustre/portals/include/portals/lib-types.h @@ -84,6 +84,11 @@ typedef struct ptl_reply { ptl_handle_wire_t dst_wmd; } WIRE_ATTR ptl_reply_t; +typedef struct ptl_hello { + __u64 incarnation; + __u32 type; +} WIRE_ATTR ptl_hello_t; + typedef struct { ptl_nid_t dest_nid; ptl_nid_t src_nid; @@ -97,6 +102,7 @@ typedef struct { ptl_put_t put; ptl_get_t get; ptl_reply_t reply; + ptl_hello_t hello; } msg; } WIRE_ATTR ptl_hdr_t; @@ -119,7 +125,7 @@ typedef struct { #define PORTALS_PROTO_MAGIC 0xeebc0ded #define PORTALS_PROTO_VERSION_MAJOR 0 -#define PORTALS_PROTO_VERSION_MINOR 2 +#define PORTALS_PROTO_VERSION_MINOR 3 typedef struct { long recv_count, recv_length, send_count, send_length, drop_count, diff --git a/lustre/portals/include/portals/socknal.h b/lustre/portals/include/portals/socknal.h index 6d75e5f..27e6f8e 100644 --- a/lustre/portals/include/portals/socknal.h +++ b/lustre/portals/include/portals/socknal.h @@ -6,6 +6,7 @@ * #defines shared between socknal implementation and utilities */ +#define SOCKNAL_CONN_NONE (-1) #define SOCKNAL_CONN_ANY 0 #define SOCKNAL_CONN_CONTROL 1 #define SOCKNAL_CONN_BULK_IN 2 diff --git a/lustre/portals/knals/socknal/socknal.c b/lustre/portals/knals/socknal/socknal.c index bb8e247..da47785 100644 --- a/lustre/portals/knals/socknal/socknal.c +++ b/lustre/portals/knals/socknal/socknal.c @@ -680,9 +680,11 @@ ksocknal_choose_scheduler_locked (unsigned int irq) } int -ksocknal_create_conn (ptl_nid_t nid, ksock_route_t *route, - struct socket *sock, int bind_irq, int type) +ksocknal_create_conn (ksock_route_t *route, struct socket *sock, + int bind_irq, int type) { + ptl_nid_t nid; + __u64 incarnation; unsigned long flags; ksock_conn_t *conn; ksock_peer_t *peer; @@ -703,6 +705,19 @@ ksocknal_create_conn (ptl_nid_t nid, ksock_route_t *route, if (rc != 0) return (rc); + if (route == NULL) { + /* acceptor or explicit connect */ + nid = PTL_NID_ANY; + } else { + LASSERT (type != SOCKNAL_CONN_NONE); + /* autoconnect: expect this nid on exchange */ + nid = route->ksnr_peer->ksnp_nid; + } + + rc = ksocknal_hello (sock, &nid, &type, &incarnation); + if (rc != 0) + return (rc); + peer = NULL; if (route == NULL) { /* not autoconnect */ /* Assume this socket connects to a brand new peer */ @@ -723,6 +738,7 @@ ksocknal_create_conn (ptl_nid_t nid, ksock_route_t *route, conn->ksnc_route = NULL; conn->ksnc_sock = sock; conn->ksnc_type = type; + conn->ksnc_incarnation = incarnation; conn->ksnc_saved_data_ready = sock->sk->sk_data_ready; conn->ksnc_saved_write_space = sock->sk->sk_write_space; atomic_set (&conn->ksnc_refcount, 1); /* 1 ref for me */ @@ -815,8 +831,13 @@ ksocknal_create_conn (ptl_nid_t nid, ksock_route_t *route, ksocknal_queue_tx_locked (tx, conn); } + rc = ksocknal_close_stale_conns_locked (peer, incarnation); + write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); + if (rc != 0) + CERROR ("Closed %d stale conns to "LPX64"\n", rc, nid); + if (bind_irq) /* irq binding required */ ksocknal_bind_irq (irq); @@ -1031,6 +1052,27 @@ ksocknal_close_peer_conns_locked (ksock_peer_t *peer, __u32 ipaddr, int why) } int +ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation) +{ + ksock_conn_t *conn; + struct list_head *ctmp; + struct list_head *cnxt; + int count = 0; + + list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { + conn = list_entry (ctmp, ksock_conn_t, ksnc_list); + + if (conn->ksnc_incarnation == incarnation) + continue; + + count++; + ksocknal_close_conn_locked (conn, -ESTALE); + } + + return (count); +} + +int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why) { ksock_peer_t *peer = conn->ksnc_peer; @@ -1307,12 +1349,12 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private) break; switch (type) { + case SOCKNAL_CONN_NONE: case SOCKNAL_CONN_ANY: case SOCKNAL_CONN_CONTROL: case SOCKNAL_CONN_BULK_IN: case SOCKNAL_CONN_BULK_OUT: - rc = ksocknal_create_conn(pcfg->pcfg_nid, NULL, sock, - pcfg->pcfg_flags, type); + rc = ksocknal_create_conn(NULL, sock, pcfg->pcfg_flags, type); default: break; } @@ -1373,7 +1415,7 @@ ksocknal_free_buffers (void) ksocknal_data.ksnd_peer_hash_size); } -void /*__exit*/ +void ksocknal_module_fini (void) { int i; @@ -1457,6 +1499,22 @@ ksocknal_module_fini (void) } +void __init +ksocknal_init_incarnation (void) +{ + struct timeval tv; + + /* The incarnation number is the time this module loaded and it + * identifies this particular instance of the socknal. Hopefully + * we won't be able to reboot more frequently than 1MHz for the + * forseeable future :) */ + + do_gettimeofday(&tv); + + ksocknal_data.ksnd_incarnation = + (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; +} + int __init ksocknal_module_init (void) { @@ -1494,7 +1552,8 @@ ksocknal_module_init (void) #if SOCKNAL_ZC ksocknal_data.ksnd_zc_min_frag = SOCKNAL_ZC_MIN_FRAG; #endif - + ksocknal_init_incarnation(); + ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE; PORTAL_ALLOC (ksocknal_data.ksnd_peers, sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size); diff --git a/lustre/portals/knals/socknal/socknal.h b/lustre/portals/knals/socknal/socknal.h index 7bfc009..8c906e2 100644 --- a/lustre/portals/knals/socknal/socknal.h +++ b/lustre/portals/knals/socknal/socknal.h @@ -152,6 +152,7 @@ typedef struct { unsigned int ksnd_zc_min_frag; /* minimum zero copy frag size */ #endif struct ctl_table_header *ksnd_sysctl; /* sysctl interface */ + __u64 ksnd_incarnation; /* my epoch */ rwlock_t ksnd_global_lock; /* stabilize peer/conn ops */ struct list_head *ksnd_peers; /* hash table of all my known peers */ @@ -307,8 +308,9 @@ typedef struct ksock_conn int ksnc_port; /* peer's port */ int ksnc_closing; /* being shut down */ int ksnc_type; /* type of connection */ + __u64 ksnc_incarnation; /* peer's incarnation */ - /* READER */ + /* reader */ struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */ unsigned long ksnc_rx_deadline; /* when (in jiffies) receive times out */ int ksnc_rx_started; /* started receiving a message */ @@ -411,12 +413,13 @@ extern ksock_peer_t *ksocknal_find_peer_locked (ptl_nid_t nid); extern ksock_peer_t *ksocknal_get_peer (ptl_nid_t nid); extern int ksocknal_del_route (ptl_nid_t nid, __u32 ipaddr, int single, int keep_conn); -extern int ksocknal_create_conn (ptl_nid_t nid, ksock_route_t *route, +extern int ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int bind_irq, int type); extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why); extern void ksocknal_terminate_conn (ksock_conn_t *conn); extern void ksocknal_destroy_conn (ksock_conn_t *conn); extern void ksocknal_put_conn (ksock_conn_t *conn); +extern int ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation); extern int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why); extern int ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr); @@ -433,3 +436,5 @@ extern void ksocknal_write_space(struct sock *sk); extern int ksocknal_autoconnectd (void *arg); extern int ksocknal_reaper (void *arg); extern int ksocknal_setup_sock (struct socket *sock); +extern int ksocknal_hello (struct socket *sock, + ptl_nid_t *nid, int *type, __u64 *incarnation); diff --git a/lustre/portals/knals/socknal/socknal_cb.c b/lustre/portals/knals/socknal/socknal_cb.c index dde434a..346d60e 100644 --- a/lustre/portals/knals/socknal/socknal_cb.c +++ b/lustre/portals/knals/socknal/socknal_cb.c @@ -1995,7 +1995,7 @@ ksocknal_sock_read (struct socket *sock, void *buffer, int nob) } int -ksocknal_exchange_nids (struct socket *sock, ptl_nid_t nid, int type) +ksocknal_hello (struct socket *sock, ptl_nid_t *nid, int *type, __u64 *incarnation) { int rc; ptl_hdr_t hdr; @@ -2011,25 +2011,27 @@ ksocknal_exchange_nids (struct socket *sock, ptl_nid_t nid, int type) hdr.src_nid = __cpu_to_le64 (ksocknal_lib.ni.nid); hdr.type = __cpu_to_le32 (PTL_MSG_HELLO); - *(__u32 *)&hdr.msg = __cpu_to_le32 (type); - + hdr.msg.hello.type = __cpu_to_le32 (*type); + hdr.msg.hello.incarnation = + __cpu_to_le64 (ksocknal_data.ksnd_incarnation); + /* Assume sufficient socket buffering for this message */ rc = ksocknal_sock_write (sock, &hdr, sizeof (hdr)); if (rc != 0) { - CERROR ("Error %d sending HELLO to "LPX64"\n", rc, nid); + CERROR ("Error %d sending HELLO to "LPX64"\n", rc, *nid); return (rc); } rc = ksocknal_sock_read (sock, hmv, sizeof (*hmv)); if (rc != 0) { - CERROR ("Error %d reading HELLO from "LPX64"\n", rc, nid); + CERROR ("Error %d reading HELLO from "LPX64"\n", rc, *nid); return (rc); } if (hmv->magic != __le32_to_cpu (PORTALS_PROTO_MAGIC)) { CERROR ("Bad magic %#08x (%#08x expected) from "LPX64"\n", - __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC, nid); - return (-EINVAL); + __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC, *nid); + return (-EPROTO); } if (hmv->version_major != __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) || @@ -2040,8 +2042,8 @@ ksocknal_exchange_nids (struct socket *sock, ptl_nid_t nid, int type) __le16_to_cpu (hmv->version_minor), PORTALS_PROTO_VERSION_MAJOR, PORTALS_PROTO_VERSION_MINOR, - nid); - return (-EINVAL); + *nid); + return (-EPROTO); } #if (PORTALS_PROTO_VERSION_MAJOR != 0) @@ -2053,7 +2055,7 @@ ksocknal_exchange_nids (struct socket *sock, ptl_nid_t nid, int type) rc = ksocknal_sock_read (sock, hmv + 1, sizeof (hdr) - sizeof (*hmv)); if (rc != 0) { CERROR ("Error %d reading rest of HELLO hdr from "LPX64"\n", - rc, nid); + rc, *nid); return (rc); } @@ -2063,16 +2065,48 @@ ksocknal_exchange_nids (struct socket *sock, ptl_nid_t nid, int type) CERROR ("Expecting a HELLO hdr with 0 payload," " but got type %d with %d payload from "LPX64"\n", __le32_to_cpu (hdr.type), - __le32_to_cpu (hdr.payload_length), nid); - return (-EINVAL); + __le32_to_cpu (hdr.payload_length), *nid); + return (-EPROTO); } - - if (__le64_to_cpu (hdr.src_nid) != nid) { + + if (__le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) { + CERROR("Expecting a HELLO hdr with a NID, but got PTL_NID_ANY\n"); + return (-EPROTO); + } + + if (*nid == PTL_NID_ANY) { /* don't know peer's nid yet */ + *nid = __le64_to_cpu(hdr.src_nid); + } else if (*nid != __le64_to_cpu (hdr.src_nid)) { CERROR ("Connected to nid "LPX64", but expecting "LPX64"\n", - __le64_to_cpu (hdr.src_nid), nid); - return (-EINVAL); + __le64_to_cpu (hdr.src_nid), *nid); + return (-EPROTO); } + if (*type == SOCKNAL_CONN_NONE) { + /* I've accepted this connection; peer determines type */ + *type = __le32_to_cpu(hdr.msg.hello.type); + switch (*type) { + case SOCKNAL_CONN_ANY: + case SOCKNAL_CONN_CONTROL: + break; + case SOCKNAL_CONN_BULK_IN: + *type = SOCKNAL_CONN_BULK_OUT; + break; + case SOCKNAL_CONN_BULK_OUT: + *type = SOCKNAL_CONN_BULK_IN; + break; + default: + CERROR ("Unexpected type %d from "LPX64"\n", *type, *nid); + return (-EPROTO); + } + } else if (__le32_to_cpu(hdr.msg.hello.type) != SOCKNAL_CONN_NONE) { + CERROR ("Mismatched types: me %d "LPX64" %d\n", + *type, *nid, __le32_to_cpu(hdr.msg.hello.type)); + return (-EPROTO); + } + + *incarnation = __le64_to_cpu(hdr.msg.hello.incarnation); + return (0); } @@ -2262,12 +2296,7 @@ ksocknal_connect_peer (ksock_route_t *route, int type) goto out; } - rc = ksocknal_exchange_nids (sock, route->ksnr_peer->ksnp_nid, type); - if (rc != 0) - goto out; - - rc = ksocknal_create_conn (route->ksnr_peer->ksnp_nid, route, sock, - route->ksnr_irq_affinity, type); + rc = ksocknal_create_conn (route, sock, route->ksnr_irq_affinity, type); if (rc == 0) { /* Take an extra ref on sock->file to compensate for the * upcoming close which will lose fd's ref on it. */ diff --git a/lustre/portals/portals/lib-move.c b/lustre/portals/portals/lib-move.c index 491bb87..6e904ba 100644 --- a/lustre/portals/portals/lib-move.c +++ b/lustre/portals/portals/lib-move.c @@ -1347,14 +1347,14 @@ int do_PtlGet(nal_cb_t * nal, void *private, void *v_args, void *v_ret) void lib_assert_wire_constants (void) { /* Wire protocol assertions generated by 'wirecheck' - * running on Linux mdev2 2.4.18-p4smp-15llp #1 SMP Wed Oct 8 11:01:07 PDT 2003 i686 unknown - * with gcc version 2.96 20000731 (Red Hat Linux 7.3 2.96-113) */ + * running on Linux robert.bartonsoftware.com 2.4.20-18.9 #1 Thu May 29 06:54:41 EDT 2003 i68 + * with gcc version 3.2.2 20030222 (Red Hat Linux 3.2.2-5) */ /* Constants... */ LASSERT (PORTALS_PROTO_MAGIC == 0xeebc0ded); LASSERT (PORTALS_PROTO_VERSION_MAJOR == 0); - LASSERT (PORTALS_PROTO_VERSION_MINOR == 2); + LASSERT (PORTALS_PROTO_VERSION_MINOR == 3); LASSERT (PTL_MSG_ACK == 0); LASSERT (PTL_MSG_PUT == 1); LASSERT (PTL_MSG_GET == 2); @@ -1429,4 +1429,10 @@ void lib_assert_wire_constants (void) /* Reply */ LASSERT (offsetof(ptl_hdr_t, msg.reply.dst_wmd) == 32); LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.reply.dst_wmd) == 16); + + /* Hello */ + LASSERT (offsetof(ptl_hdr_t, msg.hello.incarnation) == 32); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.hello.incarnation) == 8); + LASSERT (offsetof(ptl_hdr_t, msg.hello.type) == 40); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.hello.type) == 4); } diff --git a/lustre/portals/utils/acceptor.c b/lustre/portals/utils/acceptor.c index 54a57d4..626a2eb 100644 --- a/lustre/portals/utils/acceptor.c +++ b/lustre/portals/utils/acceptor.c @@ -101,7 +101,7 @@ parse_size (int *sizep, char *str) } void -show_connection (int fd, __u32 net_ip, ptl_nid_t nid, int type) +show_connection (int fd, __u32 net_ip) { struct hostent *h = gethostbyaddr ((char *)&net_ip, sizeof net_ip, AF_INET); __u32 host_ip = ntohl (net_ip); @@ -129,12 +129,8 @@ show_connection (int fd, __u32 net_ip, ptl_nid_t nid, int type) else snprintf (host, sizeof(host), "%s", h->h_name); - syslog (LOG_INFO, "Accepted host: %s NID: "LPX64" snd: %d rcv %d nagle: %s type %s\n", - host, nid, txmem, rxmem, nonagle ? "disabled" : "enabled", - (type == SOCKNAL_CONN_ANY) ? "A" : - (type == SOCKNAL_CONN_CONTROL) ? "C" : - (type == SOCKNAL_CONN_BULK_IN) ? "I" : - (type == SOCKNAL_CONN_BULK_OUT) ? "O" : "?"); + syslog (LOG_INFO, "Accepted host: %s snd: %d rcv %d nagle: %s\n", + host, txmem, rxmem, nonagle ? "disabled" : "enabled"); } int @@ -193,96 +189,6 @@ sock_read (int cfd, void *buffer, int nob) return (0); } -int -exchange_nids (int cfd, ptl_nid_t my_nid, ptl_nid_t *peer_nid, int *type) -{ - int rc; - int t; - ptl_hdr_t hdr; - ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid; - - LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); - - memset (&hdr, 0, sizeof (hdr)); - - hmv->magic = __cpu_to_le32 (PORTALS_PROTO_MAGIC); - hmv->version_major = __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR); - hmv->version_minor = __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR); - - hdr.src_nid = __cpu_to_le64 (my_nid); - hdr.type = __cpu_to_le32 (PTL_MSG_HELLO); - - /* Assume there's sufficient socket buffering for a portals HELLO header */ - rc = sock_write (cfd, &hdr, sizeof (hdr)); - if (rc != 0) { - perror ("Can't send initial HELLO"); - return (-1); - } - - /* First few bytes down the wire are the portals protocol magic and - * version, no matter what protocol version we're running. */ - - rc = sock_read (cfd, hmv, sizeof (*hmv)); - if (rc != 0) { - perror ("Can't read from peer"); - return (-1); - } - - if (__cpu_to_le32 (hmv->magic) != PORTALS_PROTO_MAGIC) { - fprintf (stderr, "Bad magic %#08x (%#08x expected)\n", - __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC); - return (-1); - } - - if (__cpu_to_le16 (hmv->version_major) != PORTALS_PROTO_VERSION_MAJOR || - __cpu_to_le16 (hmv->version_minor) != PORTALS_PROTO_VERSION_MINOR) { - fprintf (stderr, "Incompatible protocol version %d.%d (%d.%d expected)\n", - __cpu_to_le16 (hmv->version_major), - __cpu_to_le16 (hmv->version_minor), - PORTALS_PROTO_VERSION_MAJOR, - PORTALS_PROTO_VERSION_MINOR); - } - - /* version 0 sends magic/version as the dest_nid of a 'hello' header, - * so read the rest of it in now... */ - LASSERT (PORTALS_PROTO_VERSION_MAJOR == 0); - rc = sock_read (cfd, hmv + 1, sizeof (hdr) - sizeof (*hmv)); - if (rc != 0) { - perror ("Can't read rest of HELLO hdr"); - return (-1); - } - - /* ...and check we got what we expected */ - if (__cpu_to_le32 (hdr.type) != PTL_MSG_HELLO || - __cpu_to_le32 (hdr.payload_length) != 0) { - fprintf (stderr, "Expecting a HELLO hdr with 0 payload," - " but got type %d with %d payload\n", - __cpu_to_le32 (hdr.type), - __cpu_to_le32 (hdr.payload_length)); - return (-1); - } - - *peer_nid = __le64_to_cpu (hdr.src_nid); - - t = __le32_to_cpu (*(__u32 *)&hdr.msg); - switch (t) { /* swap sense of connection type */ - case SOCKNAL_CONN_CONTROL: - break; - case SOCKNAL_CONN_BULK_IN: - t = SOCKNAL_CONN_BULK_OUT; - break; - case SOCKNAL_CONN_BULK_OUT: - t = SOCKNAL_CONN_BULK_IN; - break; - default: - t = SOCKNAL_CONN_ANY; - break; - } - *type = t; - - return (0); -} - void usage (char *myname) { @@ -301,7 +207,6 @@ int main(int argc, char **argv) int nonagle = 1; int nal = SOCKNAL; int bind_irq = 0; - int type = 0; while ((c = getopt (argc, argv, "N:r:s:nli")) != -1) switch (c) @@ -429,7 +334,6 @@ int main(int argc, char **argv) int cfd; struct portal_ioctl_data data; struct portals_cfg pcfg; - ptl_nid_t peer_nid; cfd = accept(fd, (struct sockaddr *)&clntaddr, &len); if ( cfd < 0 ) { @@ -438,37 +342,20 @@ int main(int argc, char **argv) continue; } - PORTAL_IOC_INIT (data); - data.ioc_nal = nal; - rc = ioctl (pfd, IOC_PORTAL_GET_NID, &data); - if (rc < 0) { - perror ("Can't get my NID"); - close (cfd); - continue; - } - - rc = exchange_nids (cfd, data.ioc_nid, &peer_nid, &type); - if (rc != 0) { - close (cfd); - continue; - } - - show_connection (cfd, clntaddr.sin_addr.s_addr, peer_nid, type); + show_connection (cfd, clntaddr.sin_addr.s_addr); PCFG_INIT(pcfg, NAL_CMD_REGISTER_PEER_FD); pcfg.pcfg_nal = nal; pcfg.pcfg_fd = cfd; - pcfg.pcfg_nid = peer_nid; pcfg.pcfg_flags = bind_irq; - pcfg.pcfg_misc = type; - + pcfg.pcfg_misc = SOCKNAL_CONN_NONE; /* == incoming connection */ + PORTAL_IOC_INIT(data); data.ioc_pbuf1 = (char*)&pcfg; data.ioc_plen1 = sizeof(pcfg); if (ioctl(pfd, IOC_PORTAL_NAL_CMD, &data) < 0) { perror("ioctl failed"); - } else { printf("client registered\n"); } diff --git a/lustre/portals/utils/portals.c b/lustre/portals/utils/portals.c index 270a840..af34cba 100644 --- a/lustre/portals/utils/portals.c +++ b/lustre/portals/utils/portals.c @@ -628,84 +628,8 @@ jt_ptl_print_connections (int argc, char **argv) return 0; } -int -exchange_nids (int cfd, ptl_nid_t my_nid, int type, ptl_nid_t *peer_nid) -{ - int rc; - ptl_hdr_t hdr; - ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid; - - LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); - - memset (&hdr, 0, sizeof (hdr)); - - hmv->magic = __cpu_to_le32 (PORTALS_PROTO_MAGIC); - hmv->version_major = __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR); - hmv->version_minor = __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR); - - hdr.src_nid = __cpu_to_le64 (my_nid); - hdr.type = __cpu_to_le32 (PTL_MSG_HELLO); - - *(__u32 *)&hdr.msg = __cpu_to_le32(type); - - /* Assume there's sufficient socket buffering for a portals HELLO header */ - rc = sock_write (cfd, &hdr, sizeof (hdr)); - if (rc != 0) { - perror ("Can't send initial HELLO"); - return (-1); - } - - /* First few bytes down the wire are the portals protocol magic and - * version, no matter what protocol version we're running. */ - - rc = sock_read (cfd, hmv, sizeof (*hmv)); - if (rc != 0) { - perror ("Can't read from peer"); - return (-1); - } - - if (hmv->magic != __cpu_to_le32 (PORTALS_PROTO_MAGIC)) { - fprintf (stderr, "Bad magic %#08x (%#08x expected)\n", - __le32_to_cpu (hmv->magic), PORTALS_PROTO_MAGIC); - return (-1); - } - - if (hmv->version_major != __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) || - hmv->version_minor != __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR)) { - fprintf (stderr, "Incompatible protocol version %d.%d (%d.%d expected)\n", - __le16_to_cpu (hmv->version_major), - __le16_to_cpu (hmv->version_minor), - PORTALS_PROTO_VERSION_MAJOR, - PORTALS_PROTO_VERSION_MINOR); - } - - /* version 0 sends magic/version as the dest_nid of a 'hello' header, - * so read the rest of it in now... */ - LASSERT (PORTALS_PROTO_VERSION_MAJOR == 0); - rc = sock_read (cfd, hmv + 1, sizeof (hdr) - sizeof (*hmv)); - if (rc != 0) { - perror ("Can't read rest of HELLO hdr"); - return (-1); - } - - /* ...and check we got what we expected */ - if (hdr.type != __cpu_to_le32 (PTL_MSG_HELLO) || - hdr.payload_length != __cpu_to_le32 (0)) { - fprintf (stderr, "Expecting a HELLO hdr with 0 payload," - " but got type %d with %d payload\n", - __le32_to_cpu (hdr.type), - __le32_to_cpu (hdr.payload_length)); - return (-1); - } - - *peer_nid = __le64_to_cpu (hdr.src_nid); - return (0); -} - int jt_ptl_connect(int argc, char **argv) { - ptl_nid_t peer_nid; - struct portal_ioctl_data data; struct portals_cfg pcfg; struct sockaddr_in srvaddr; __u32 ipaddr; @@ -829,32 +753,16 @@ int jt_ptl_connect(int argc, char **argv) if (getsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &nonagle, &olen) != 0) fprintf (stderr, "Can't get nagle: %s\n", strerror (errno)); - PORTAL_IOC_INIT (data); - data.ioc_nal = g_nal; - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_NID, &data); - if (rc != 0) { - fprintf (stderr, "failed to get my nid: %s\n", - strerror (errno)); - close (fd); - return (-1); - } - - rc = exchange_nids (fd, data.ioc_nid, type, &peer_nid); - if (rc != 0) { - close (fd); - return (-1); - } - - printf("Connected host: %s NID "LPX64" snd: %d rcv: %d nagle: %s type: %s\n", - argv[1], peer_nid, txmem, rxmem, nonagle ? "Disabled" : "Enabled", + printf("Connected host: %s snd: %d rcv: %d nagle: %s type: %s\n", + argv[1], txmem, rxmem, nonagle ? "Disabled" : "Enabled", (type == SOCKNAL_CONN_ANY) ? "A" : (type == SOCKNAL_CONN_CONTROL) ? "C" : (type == SOCKNAL_CONN_BULK_IN) ? "I" : (type == SOCKNAL_CONN_BULK_OUT) ? "O" : "?"); PCFG_INIT(pcfg, NAL_CMD_REGISTER_PEER_FD); + pcfg.pcfg_nal = g_nal; pcfg.pcfg_fd = fd; - pcfg.pcfg_nid = peer_nid; pcfg.pcfg_flags = bind_irq; pcfg.pcfg_misc = type; @@ -866,7 +774,7 @@ int jt_ptl_connect(int argc, char **argv) return -1; } - printf("Connection to "LPX64" registered with socknal\n", peer_nid); + printf("Connection to %s registered with socknal\n", argv[1]); rc = close(fd); if (rc) diff --git a/lustre/portals/utils/wirecheck.c b/lustre/portals/utils/wirecheck.c index 21b3dda..77ad126 100644 --- a/lustre/portals/utils/wirecheck.c +++ b/lustre/portals/utils/wirecheck.c @@ -109,6 +109,11 @@ check_ptl_hdr (void) BLANK_LINE (); COMMENT ("Reply"); CHECK_MEMBER (ptl_hdr_t, msg.reply.dst_wmd); + + BLANK_LINE (); + COMMENT ("Hello"); + CHECK_MEMBER (ptl_hdr_t, msg.hello.incarnation); + CHECK_MEMBER (ptl_hdr_t, msg.hello.type); } void -- 1.8.3.1