From 6ab8555d13c77d86c1b5c37b964a500116497aba Mon Sep 17 00:00:00 2001 From: eeb Date: Wed, 29 Oct 2003 20:15:37 +0000 Subject: [PATCH] * Cleaned up ptl_hdr_t definition; removed unused fields and moved the common length field into the common part of the header. * Socknal connection handshake no longer optional (it was --nid_exchange in lconf, the 'x' option to lclt::[auto_]connect, the -x option in acceptor etc). * Added 'uname -a' output and gcc version info in a comment to the output of portals wirecheck. * Bumped up the portals protocol version number. * Added "typed" socknal connections to establish either a single connection... 'A' bidirectional control + bulk ...or 3 connections... 'C' bidirection control 'I' bulk in 'O' bulk out Individual connections can be created using the relevent flag in lctl::connect. Autoconnect defaults to creating three CIO connections, but can create a single A connection if /proc/sys/socknal/typed is zero when the connection is established (CAVEAT EMPTOR this also causes ALL existing connections to be driven bi-directionally). Note that connections types are determined by the initiator. The connection handshake tells the acceptor what type of connection she has just accepted, and she tells her socknal. * /proc/sys/socknal/eager_ack is now a bitmap of socket types to do eager ack on.... 1 enable on 'A' connections 2 enable on 'C' connections 4 enable on 'I' connections 8 enable on 'O' connections (if any messages are ever received) Default: 0 I doubt this facility will be of much use... * /proc/sys/socknal/min_bulk controls the size at which messages are considered 'bulk' and preferrential sent on 'C' rather than 'O' connections. * Changed error handling on connections so that when an error is detected on a connection... (a) ALL connections to the same NID at the same IP are removed. (b) The autoconnect route that generated the connection becomes the least favourite so that the next autoconnect attempt will use a different IP if redundant autoconnect routes have been specified. --- lnet/include/lnet/lib-types.h | 78 ++++---- lnet/include/lnet/socklnd.h | 13 ++ lnet/klnds/qswlnd/qswlnd.c | 2 +- lnet/klnds/qswlnd/qswlnd_cb.c | 13 +- lnet/klnds/socklnd/socklnd.c | 212 ++++++++++++--------- lnet/klnds/socklnd/socklnd.h | 30 ++- lnet/klnds/socklnd/socklnd_cb.c | 288 ++++++++++++++++++----------- lnet/klnds/toelnd/toenal_cb.c | 21 +-- lnet/lnet/lib-move.c | 195 +++++++++---------- lnet/lnet/lib-msg.c | 2 +- lnet/utils/acceptor.c | 79 ++++---- lnet/utils/portals.c | 107 ++++++----- lnet/utils/ptlctl.c | 4 +- lnet/utils/wirecheck.c | 89 +++++++-- lustre/portals/include/portals/lib-types.h | 78 ++++---- lustre/portals/include/portals/socknal.h | 13 ++ lustre/portals/knals/qswnal/qswnal.c | 2 +- lustre/portals/knals/qswnal/qswnal_cb.c | 13 +- lustre/portals/knals/socknal/socknal.c | 212 ++++++++++++--------- lustre/portals/knals/socknal/socknal.h | 30 ++- lustre/portals/knals/socknal/socknal_cb.c | 288 ++++++++++++++++++----------- lustre/portals/knals/toenal/toenal_cb.c | 21 +-- lustre/portals/portals/lib-move.c | 195 +++++++++---------- lustre/portals/portals/lib-msg.c | 2 +- lustre/portals/utils/acceptor.c | 79 ++++---- lustre/portals/utils/portals.c | 107 ++++++----- lustre/portals/utils/ptlctl.c | 4 +- lustre/portals/utils/wirecheck.c | 89 +++++++-- 28 files changed, 1312 insertions(+), 954 deletions(-) create mode 100644 lnet/include/lnet/socklnd.h create mode 100644 lustre/portals/include/portals/socknal.h diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index e5447d7..047628b 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -54,72 +54,62 @@ typedef enum { PTL_MSG_HELLO, } ptl_msg_type_t; -/* Each of these structs should start with an odd number of - * __u32, or the compiler could add its own padding and confuse - * everyone. - * - * Also, "length" needs to be at offset 28 of each struct. - */ +/* The variant fields of the portals message header are aligned on an 8 + * byte boundary in the message header. Note that all types used in these + * wire structs MUST be fixed size and the smaller types are placed at the + * end. */ typedef struct ptl_ack { - ptl_size_t mlength; - ptl_handle_wire_t dst_wmd; - ptl_match_bits_t match_bits; - ptl_size_t length; /* common length (0 for acks) moving out RSN */ + ptl_handle_wire_t dst_wmd; + ptl_match_bits_t match_bits; + ptl_size_t mlength; } WIRE_ATTR ptl_ack_t; typedef struct ptl_put { - ptl_pt_index_t ptl_index; - ptl_handle_wire_t ack_wmd; - ptl_match_bits_t match_bits; - ptl_size_t length; /* common length moving out RSN */ - ptl_size_t offset; - ptl_hdr_data_t hdr_data; + ptl_handle_wire_t ack_wmd; + ptl_match_bits_t match_bits; + ptl_hdr_data_t hdr_data; + ptl_pt_index_t ptl_index; + ptl_size_t offset; } WIRE_ATTR ptl_put_t; typedef struct ptl_get { - ptl_pt_index_t ptl_index; - ptl_handle_wire_t return_wmd; - ptl_match_bits_t match_bits; - ptl_size_t length; /* common length (0 for gets) moving out RSN */ - ptl_size_t src_offset; - ptl_size_t return_offset; /* unused: going RSN */ - ptl_size_t sink_length; + ptl_handle_wire_t return_wmd; + ptl_match_bits_t match_bits; + ptl_pt_index_t ptl_index; + ptl_size_t src_offset; + ptl_size_t sink_length; } WIRE_ATTR ptl_get_t; typedef struct ptl_reply { - __u32 unused1; /* unused fields going RSN */ - ptl_handle_wire_t dst_wmd; - ptl_size_t dst_offset; /* unused: going RSN */ - __u32 unused2; - ptl_size_t length; /* common length moving out RSN */ + ptl_handle_wire_t dst_wmd; } WIRE_ATTR ptl_reply_t; typedef struct { - ptl_nid_t dest_nid; - ptl_nid_t src_nid; - ptl_pid_t dest_pid; - ptl_pid_t src_pid; - __u32 type; /* ptl_msg_type_t */ + ptl_nid_t dest_nid; + ptl_nid_t src_nid; + ptl_pid_t dest_pid; + ptl_pid_t src_pid; + __u32 type; /* ptl_msg_type_t */ + __u32 payload_length; /* payload data to follow */ + /*<------__u64 aligned------->*/ union { - ptl_ack_t ack; - ptl_put_t put; - ptl_get_t get; + ptl_ack_t ack; + ptl_put_t put; + ptl_get_t get; ptl_reply_t reply; } msg; } WIRE_ATTR ptl_hdr_t; -/* All length fields in individual unions at same offset */ -/* LASSERT for same in lib-move.c */ -#define PTL_HDR_LENGTH(h) ((h)->msg.ack.length) - /* A HELLO message contains the portals magic number and protocol version * code in the header's dest_nid, the peer's NID in the src_nid, and - * PTL_MSG_HELLO in the type field. All other fields are zero (including - * PTL_HDR_LENGTH; i.e. no payload). + * PTL_MSG_HELLO in the type field. All other common fields are zero + * (including payload_size; i.e. no payload). * This is for use by byte-stream NALs (e.g. TCP/IP) to check the peer is * running the same protocol and to find out its NID, so that hosts with * multiple IP interfaces can have a single NID. These NALs should exchange - * HELLO messages when a connection is first established. */ + * HELLO messages when a connection is first established. + * Individual NALs can put whatever else they fancy in ptl_hdr_t::msg. + */ typedef struct { __u32 magic; /* PORTALS_PROTO_MAGIC */ __u16 version_major; /* increment on incompatible change */ @@ -129,7 +119,7 @@ typedef struct { #define PORTALS_PROTO_MAGIC 0xeebc0ded #define PORTALS_PROTO_VERSION_MAJOR 0 -#define PORTALS_PROTO_VERSION_MINOR 1 +#define PORTALS_PROTO_VERSION_MINOR 2 typedef struct { long recv_count, recv_length, send_count, send_length, drop_count, diff --git a/lnet/include/lnet/socklnd.h b/lnet/include/lnet/socklnd.h new file mode 100644 index 0000000..6d75e5f --- /dev/null +++ b/lnet/include/lnet/socklnd.h @@ -0,0 +1,13 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * + * + * #defines shared between socknal implementation and utilities + */ + +#define SOCKNAL_CONN_ANY 0 +#define SOCKNAL_CONN_CONTROL 1 +#define SOCKNAL_CONN_BULK_IN 2 +#define SOCKNAL_CONN_BULK_OUT 3 +#define SOCKNAL_CONN_NTYPES 4 diff --git a/lnet/klnds/qswlnd/qswlnd.c b/lnet/klnds/qswlnd/qswlnd.c index 4472e30..9caf381 100644 --- a/lnet/klnds/qswlnd/qswlnd.c +++ b/lnet/klnds/qswlnd/qswlnd.c @@ -131,7 +131,7 @@ kqswnal_get_tx_desc (struct portals_cfg *pcfg) pcfg->pcfg_pbuf1 = (char *)ktx; pcfg->pcfg_count = NTOH__u32(ktx->ktx_wire_hdr->type); - pcfg->pcfg_size = NTOH__u32(PTL_HDR_LENGTH(ktx->ktx_wire_hdr)); + pcfg->pcfg_size = NTOH__u32(ktx->ktx_wire_hdr->payload_length); pcfg->pcfg_nid = NTOH__u64(ktx->ktx_wire_hdr->dest_nid); pcfg->pcfg_nid2 = ktx->ktx_nid; pcfg->pcfg_misc = ktx->ktx_launcher; diff --git a/lnet/klnds/qswlnd/qswlnd_cb.c b/lnet/klnds/qswlnd/qswlnd_cb.c index 006ea49..43926c9 100644 --- a/lnet/klnds/qswlnd/qswlnd_cb.c +++ b/lnet/klnds/qswlnd/qswlnd_cb.c @@ -542,8 +542,9 @@ kqswnal_cerror_hdr(ptl_hdr_t * hdr) { char *type_str = hdr_type_string (hdr); - CERROR("P3 Header at %p of type %s\n", hdr, type_str); - CERROR(" From nid/pid "LPU64"/%u", NTOH__u64(hdr->src_nid), + CERROR("P3 Header at %p of type %s length %d\n", hdr, type_str, + NTOH__u32(hdr->payload_length)); + CERROR(" From nid/pid "LPU64"/%u\n", NTOH__u64(hdr->src_nid), NTOH__u32(hdr->src_pid)); CERROR(" To nid/pid "LPU64"/%u\n", NTOH__u64(hdr->dest_nid), NTOH__u32(hdr->dest_pid)); @@ -556,8 +557,7 @@ kqswnal_cerror_hdr(ptl_hdr_t * hdr) hdr->msg.put.ack_wmd.wh_interface_cookie, hdr->msg.put.ack_wmd.wh_object_cookie, NTOH__u64 (hdr->msg.put.match_bits)); - CERROR(" Length %d, offset %d, hdr data "LPX64"\n", - NTOH__u32(PTL_HDR_LENGTH(hdr)), + CERROR(" offset %d, hdr data "LPX64"\n", NTOH__u32(hdr->msg.put.offset), hdr->msg.put.hdr_data); break; @@ -582,10 +582,9 @@ kqswnal_cerror_hdr(ptl_hdr_t * hdr) break; case PTL_MSG_REPLY: - CERROR(" dst md "LPX64"."LPX64", length %d\n", + CERROR(" dst md "LPX64"."LPX64"\n", hdr->msg.reply.dst_wmd.wh_interface_cookie, - hdr->msg.reply.dst_wmd.wh_object_cookie, - NTOH__u32 (PTL_HDR_LENGTH(hdr))); + hdr->msg.reply.dst_wmd.wh_object_cookie); } } /* end of print_hdr() */ diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 3d0c758..bb8e247 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -45,6 +45,8 @@ kpr_nal_interface_t ksocknal_router_interface = { #define SOCKNAL_SYSCTL_TIMEOUT 1 #define SOCKNAL_SYSCTL_EAGER_ACK 2 #define SOCKNAL_SYSCTL_ZERO_COPY 3 +#define SOCKNAL_SYSCTL_TYPED 4 +#define SOCKNAL_SYSCTL_MIN_BULK 5 static ctl_table ksocknal_ctl_table[] = { {SOCKNAL_SYSCTL_TIMEOUT, "timeout", @@ -58,6 +60,12 @@ static ctl_table ksocknal_ctl_table[] = { &ksocknal_data.ksnd_zc_min_frag, sizeof (int), 0644, NULL, &proc_dointvec}, #endif + {SOCKNAL_SYSCTL_TYPED, "typed", + &ksocknal_data.ksnd_typed_conns, sizeof (int), + 0644, NULL, &proc_dointvec}, + {SOCKNAL_SYSCTL_MIN_BULK, "min_bulk", + &ksocknal_data.ksnd_min_bulk, sizeof (int), + 0644, NULL, &proc_dointvec}, { 0 } }; @@ -86,7 +94,7 @@ ksocknal_api_shutdown(nal_t *nal, int ni) CDEBUG (D_NET, "closing all connections\n"); ksocknal_del_route (PTL_NID_ANY, 0, 0, 0); - ksocknal_close_conn (PTL_NID_ANY, 0); + ksocknal_close_matching_conns (PTL_NID_ANY, 0); return PTL_OK; } @@ -198,7 +206,7 @@ ksocknal_bind_irq (unsigned int irq) ksock_route_t * ksocknal_create_route (__u32 ipaddr, int port, int buffer_size, - int nonagel, int xchange_nids, int irq_affinity, int eager) + int nonagel, int irq_affinity, int eager) { ksock_route_t *route; @@ -215,13 +223,12 @@ ksocknal_create_route (__u32 ipaddr, int port, int buffer_size, route->ksnr_port = port; route->ksnr_buffer_size = buffer_size; route->ksnr_irq_affinity = irq_affinity; - route->ksnr_xchange_nids = xchange_nids; route->ksnr_nonagel = nonagel; route->ksnr_eager = eager; route->ksnr_connecting = 0; + route->ksnr_connected = 0; route->ksnr_deleted = 0; - route->ksnr_generation = 0; - route->ksnr_conn = NULL; + route->ksnr_conn_count = 0; return (route); } @@ -230,7 +237,6 @@ void ksocknal_destroy_route (ksock_route_t *route) { LASSERT (route->ksnr_sharecount == 0); - LASSERT (route->ksnr_conn == NULL); if (route->ksnr_peer != NULL) ksocknal_put_peer (route->ksnr_peer); @@ -397,8 +403,7 @@ ksocknal_get_route_by_idx (int index) int ksocknal_add_route (ptl_nid_t nid, __u32 ipaddr, int port, int bufnob, - int nonagle, int xchange_nids, int bind_irq, - int share, int eager) + int nonagle, int bind_irq, int share, int eager) { unsigned long flags; ksock_peer_t *peer; @@ -415,8 +420,8 @@ ksocknal_add_route (ptl_nid_t nid, __u32 ipaddr, int port, int bufnob, if (peer == NULL) return (-ENOMEM); - route = ksocknal_create_route (ipaddr, port, bufnob, nonagle, - xchange_nids, bind_irq, eager); + route = ksocknal_create_route (ipaddr, port, bufnob, + nonagle, bind_irq, eager); if (route == NULL) { ksocknal_put_peer (peer); return (-ENOMEM); @@ -455,7 +460,7 @@ ksocknal_add_route (ptl_nid_t nid, __u32 ipaddr, int port, int bufnob, route->ksnr_peer = peer; atomic_inc (&peer->ksnp_refcount); /* peer's route list takes existing ref on route */ - list_add (&route->ksnr_list, &peer->ksnp_routes); + list_add_tail (&route->ksnr_list, &peer->ksnp_routes); } route->ksnr_sharecount++; @@ -468,8 +473,10 @@ ksocknal_add_route (ptl_nid_t nid, __u32 ipaddr, int port, int bufnob, void ksocknal_del_route_locked (ksock_route_t *route, int share, int keep_conn) { - ksock_peer_t *peer = route->ksnr_peer; - ksock_conn_t *conn = route->ksnr_conn; + ksock_peer_t *peer = route->ksnr_peer; + ksock_conn_t *conn; + struct list_head *ctmp; + struct list_head *cnxt; if (!share) route->ksnr_sharecount = 0; @@ -479,18 +486,22 @@ ksocknal_del_route_locked (ksock_route_t *route, int share, int keep_conn) return; } - if (conn != NULL) { - if (!keep_conn) + list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { + conn = list_entry(ctmp, ksock_conn_t, ksnc_list); + + if (conn->ksnc_route != route) + continue; + + if (!keep_conn) { ksocknal_close_conn_locked (conn, 0); - else { - /* keeping the conn; just dissociate it and route... */ - conn->ksnc_route = NULL; - route->ksnr_conn = NULL; - ksocknal_put_route (route); /* drop conn's ref on route */ - ksocknal_put_conn (conn); /* drop route's ref on conn */ + continue; } + + /* keeping the conn; just dissociate it and route... */ + conn->ksnc_route = NULL; + ksocknal_put_route (route); /* drop conn's ref on route */ } - + route->ksnr_deleted = 1; list_del (&route->ksnr_list); ksocknal_put_route (route); /* drop peer's ref */ @@ -670,7 +681,7 @@ ksocknal_choose_scheduler_locked (unsigned int irq) int ksocknal_create_conn (ptl_nid_t nid, ksock_route_t *route, - struct socket *sock, int bind_irq) + struct socket *sock, int bind_irq, int type) { unsigned long flags; ksock_conn_t *conn; @@ -711,6 +722,7 @@ ksocknal_create_conn (ptl_nid_t nid, ksock_route_t *route, conn->ksnc_peer = NULL; conn->ksnc_route = NULL; conn->ksnc_sock = sock; + conn->ksnc_type = type; conn->ksnc_saved_data_ready = sock->sk->sk_data_ready; conn->ksnc_saved_write_space = sock->sk->sk_write_space; atomic_set (&conn->ksnc_refcount, 1); /* 1 ref for me */ @@ -732,7 +744,8 @@ ksocknal_create_conn (ptl_nid_t nid, ksock_route_t *route, if (route != NULL) { /* Autoconnected! */ - LASSERT (route->ksnr_conn == NULL && route->ksnr_connecting); + LASSERT ((route->ksnr_connected & (1 << type)) == 0); + LASSERT ((route->ksnr_connecting & (1 << type)) != 0); if (route->ksnr_deleted) { /* This conn was autoconnected, but the autoconnect @@ -745,14 +758,13 @@ ksocknal_create_conn (ptl_nid_t nid, ksock_route_t *route, } - /* associate conn/route for auto-reconnect */ - route->ksnr_conn = conn; - atomic_inc (&conn->ksnc_refcount); + /* associate conn/route */ conn->ksnc_route = route; atomic_inc (&route->ksnr_refcount); - route->ksnr_connecting = 0; - route->ksnr_generation++; + route->ksnr_connecting &= ~(1 << type); + route->ksnr_connected |= (1 << type); + route->ksnr_conn_count++; route->ksnr_retry_interval = SOCKNAL_MIN_RECONNECT_INTERVAL; peer = route->ksnr_peer; @@ -836,14 +848,17 @@ ksocknal_close_conn_locked (ksock_conn_t *conn, int error) route = conn->ksnc_route; if (route != NULL) { /* dissociate conn from route... */ - LASSERT (!route->ksnr_connecting && - !route->ksnr_deleted); + LASSERT (!route->ksnr_deleted); + LASSERT ((route->ksnr_connecting & (1 << conn->ksnc_type)) == 0); + LASSERT ((route->ksnr_connected & (1 << conn->ksnc_type)) != 0); - route->ksnr_conn = NULL; + route->ksnr_connected &= ~(1 << conn->ksnc_type); conn->ksnc_route = NULL; + list_del (&route->ksnr_list); /* make route least favourite */ + list_add_tail (&route->ksnr_list, &peer->ksnp_routes); + ksocknal_put_route (route); /* drop conn's ref on route */ - ksocknal_put_conn (conn); /* drop route's ref on conn */ } /* ksnd_deathrow_conns takes over peer's ref */ @@ -869,24 +884,6 @@ ksocknal_close_conn_locked (ksock_conn_t *conn, int error) spin_unlock (&ksocknal_data.ksnd_reaper_lock); } -int -ksocknal_close_conn_unlocked (ksock_conn_t *conn, int why) -{ - unsigned long flags; - int did_it = 0; - - write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); - - if (!conn->ksnc_closing) { - did_it = 1; - ksocknal_close_conn_locked (conn, why); - } - - write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); - - return (did_it); -} - void ksocknal_terminate_conn (ksock_conn_t *conn) { @@ -958,9 +955,10 @@ ksocknal_destroy_conn (ksock_conn_t *conn) ksock_tx_t *tx = list_entry (conn->ksnc_tx_queue.next, ksock_tx_t, tx_list); - CERROR ("Deleting packet type %d len %d ("LPX64"->"LPX64")\n", + CERROR ("Deleting packet %p type %d len %d ("LPX64"->"LPX64")\n", + tx, NTOH__u32 (tx->tx_hdr->type), - NTOH__u32 (PTL_HDR_LENGTH(tx->tx_hdr)), + NTOH__u32 (tx->tx_hdr->payload_length), NTOH__u64 (tx->tx_hdr->src_nid), NTOH__u64 (tx->tx_hdr->dest_nid)); @@ -1012,19 +1010,54 @@ ksocknal_put_conn (ksock_conn_t *conn) } int -ksocknal_close_conn (ptl_nid_t nid, __u32 ipaddr) +ksocknal_close_peer_conns_locked (ksock_peer_t *peer, __u32 ipaddr, int why) { - unsigned long flags; ksock_conn_t *conn; struct list_head *ctmp; struct list_head *cnxt; + int count = 0; + + list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { + conn = list_entry (ctmp, ksock_conn_t, ksnc_list); + + if (ipaddr == 0 || + conn->ksnc_ipaddr == ipaddr) { + count++; + ksocknal_close_conn_locked (conn, why); + } + } + + return (count); +} + +int +ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why) +{ + ksock_peer_t *peer = conn->ksnc_peer; + __u32 ipaddr = conn->ksnc_ipaddr; + unsigned long flags; + int count; + + write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); + + count = ksocknal_close_peer_conns_locked (peer, ipaddr, why); + + write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); + + return (count); +} + +int +ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr) +{ + unsigned long flags; ksock_peer_t *peer; struct list_head *ptmp; struct list_head *pnxt; int lo; int hi; int i; - int rc = -ENOENT; + int count = 0; write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); @@ -1043,24 +1076,17 @@ ksocknal_close_conn (ptl_nid_t nid, __u32 ipaddr) if (!(nid == PTL_NID_ANY || nid == peer->ksnp_nid)) continue; - list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { - - conn = list_entry (ctmp, ksock_conn_t, - ksnc_list); - - if (!(ipaddr == 0 || - conn->ksnc_ipaddr == ipaddr)) - continue; - - rc = 0; - ksocknal_close_conn_locked (conn, 0); - } + count += ksocknal_close_peer_conns_locked (peer, ipaddr, 0); } } write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); - return (rc); + /* wildcards always succeed */ + if (nid == PTL_NID_ANY || ipaddr == 0) + return (0); + + return (count == 0 ? -ENOENT : 0); } void @@ -1073,7 +1099,7 @@ ksocknal_notify (void *arg, ptl_nid_t gw_nid, int alive) if (!alive) { /* If the gateway crashed, close all open connections... */ - ksocknal_close_conn (gw_nid, 0); + ksocknal_close_matching_conns (gw_nid, 0); return; } @@ -1233,13 +1259,12 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private) pcfg->pcfg_nid = route->ksnr_peer->ksnp_nid; pcfg->pcfg_id = route->ksnr_ipaddr; pcfg->pcfg_misc = route->ksnr_port; - pcfg->pcfg_count = route->ksnr_generation; + pcfg->pcfg_count = route->ksnr_conn_count; pcfg->pcfg_size = route->ksnr_buffer_size; pcfg->pcfg_wait = route->ksnr_sharecount; pcfg->pcfg_flags = (route->ksnr_nonagel ? 1 : 0) | - (route->ksnr_xchange_nids ? 2 : 0) | - (route->ksnr_irq_affinity ? 4 : 0) | - (route->ksnr_eager ? 8 : 0); + (route->ksnr_irq_affinity ? 2 : 0) | + (route->ksnr_eager ? 4 : 0); ksocknal_put_route (route); } break; @@ -1250,8 +1275,7 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private) (pcfg->pcfg_flags & 0x01) != 0, (pcfg->pcfg_flags & 0x02) != 0, (pcfg->pcfg_flags & 0x04) != 0, - (pcfg->pcfg_flags & 0x08) != 0, - (pcfg->pcfg_flags & 0x10) != 0); + (pcfg->pcfg_flags & 0x08) != 0); break; } case NAL_CMD_DEL_AUTOCONN: { @@ -1267,26 +1291,38 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private) rc = -ENOENT; else { rc = 0; - pcfg->pcfg_nid = conn->ksnc_peer->ksnp_nid; - pcfg->pcfg_id = conn->ksnc_ipaddr; - pcfg->pcfg_misc = conn->ksnc_port; + pcfg->pcfg_nid = conn->ksnc_peer->ksnp_nid; + pcfg->pcfg_id = conn->ksnc_ipaddr; + pcfg->pcfg_misc = conn->ksnc_port; + pcfg->pcfg_flags = conn->ksnc_type; ksocknal_put_conn (conn); } break; } case NAL_CMD_REGISTER_PEER_FD: { struct socket *sock = sockfd_lookup (pcfg->pcfg_fd, &rc); + int type = pcfg->pcfg_misc; + + if (sock == NULL) + break; - if (sock != NULL) { - rc = ksocknal_create_conn (pcfg->pcfg_nid, NULL, - sock, pcfg->pcfg_flags); - if (rc != 0) - fput (sock->file); + switch (type) { + case SOCKNAL_CONN_ANY: + case SOCKNAL_CONN_CONTROL: + case SOCKNAL_CONN_BULK_IN: + case SOCKNAL_CONN_BULK_OUT: + rc = ksocknal_create_conn(pcfg->pcfg_nid, NULL, sock, + pcfg->pcfg_flags, type); + default: + break; } + if (rc != 0) + fput (sock->file); break; } case NAL_CMD_CLOSE_CONNECTION: { - rc = ksocknal_close_conn (pcfg->pcfg_nid, pcfg->pcfg_id); + rc = ksocknal_close_matching_conns (pcfg->pcfg_nid, + pcfg->pcfg_id); break; } case NAL_CMD_REGISTER_MYNID: { @@ -1434,7 +1470,9 @@ ksocknal_module_init (void) /* the following must be sizeof(int) for proc_dointvec() */ LASSERT(sizeof (ksocknal_data.ksnd_io_timeout) == sizeof (int)); LASSERT(sizeof (ksocknal_data.ksnd_eager_ack) == sizeof (int)); - + /* check ksnr_connected/connecting field large enough */ + LASSERT(SOCKNAL_CONN_NTYPES <= 4); + LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); ksocknal_api.forward = ksocknal_api_forward; @@ -1451,6 +1489,8 @@ ksocknal_module_init (void) ksocknal_data.ksnd_io_timeout = SOCKNAL_IO_TIMEOUT; ksocknal_data.ksnd_eager_ack = SOCKNAL_EAGER_ACK; + ksocknal_data.ksnd_typed_conns = SOCKNAL_TYPED_CONNS; + ksocknal_data.ksnd_min_bulk = SOCKNAL_MIN_BULK; #if SOCKNAL_ZC ksocknal_data.ksnd_zc_min_frag = SOCKNAL_ZC_MIN_FRAG; #endif diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index 1c73ae8..7bfc009 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -25,7 +25,9 @@ */ #define DEBUG_PORTAL_ALLOC -#define EXPORT_SYMTAB +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif #include #include @@ -58,6 +60,7 @@ #include #include #include +#include #if CONFIG_SMP # define SOCKNAL_N_SCHED num_online_cpus() /* # socknal schedulers */ @@ -71,9 +74,10 @@ /* default vals for runtime tunables */ #define SOCKNAL_IO_TIMEOUT 50 /* default comms timeout (seconds) */ -#define SOCKNAL_EAGER_ACK 1 /* default eager ack (boolean) */ +#define SOCKNAL_EAGER_ACK 0 /* default eager ack (boolean) */ +#define SOCKNAL_TYPED_CONNS 1 /* unidirectional large, bidirectional small? */ #define SOCKNAL_ZC_MIN_FRAG (2<<10) /* default smallest zerocopy fragment */ - +#define SOCKNAL_MIN_BULK (1<<10) /* smallest "large" message */ #define SOCKNAL_USE_KEEPALIVES 0 /* use tcp/ip keepalive? */ #define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */ @@ -142,6 +146,8 @@ typedef struct { int ksnd_init; /* initialisation state */ int ksnd_io_timeout; /* "stuck" socket timeout (seconds) */ int ksnd_eager_ack; /* make TCP ack eagerly? */ + int ksnd_typed_conns; /* drive sockets by type? */ + int ksnd_min_bulk; /* smallest "large" message */ #if SOCKNAL_ZC unsigned int ksnd_zc_min_frag; /* minimum zero copy frag size */ #endif @@ -300,6 +306,7 @@ typedef struct ksock_conn __u32 ksnc_ipaddr; /* peer's IP */ int ksnc_port; /* peer's port */ int ksnc_closing; /* being shut down */ + int ksnc_type; /* type of connection */ /* READER */ struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */ @@ -327,6 +334,10 @@ typedef struct ksock_conn int ksnc_tx_scheduled; /* being progressed */ } ksock_conn_t; +#define KSNR_TYPED_ROUTES ((1 << SOCKNAL_CONN_CONTROL) | \ + (1 << SOCKNAL_CONN_BULK_IN) | \ + (1 << SOCKNAL_CONN_BULK_OUT)) + typedef struct ksock_route { struct list_head ksnr_list; /* chain on peer route list */ @@ -340,13 +351,12 @@ typedef struct ksock_route int ksnr_port; /* port to connect to */ int ksnr_buffer_size; /* size of socket buffers */ unsigned int ksnr_irq_affinity:1; /* set affinity? */ - unsigned int ksnr_xchange_nids:1; /* do hello protocol? */ unsigned int ksnr_nonagel:1; /* disable nagle? */ unsigned int ksnr_eager:1; /* connect eagery? */ - unsigned int ksnr_connecting:1; /* autoconnect in progress? */ + unsigned int ksnr_connecting:4; /* autoconnects in progress by type */ + unsigned int ksnr_connected:4; /* connections established by type */ unsigned int ksnr_deleted:1; /* been removed from peer? */ - int ksnr_generation; /* connection incarnation # */ - ksock_conn_t *ksnr_conn; /* NULL/active connection */ + int ksnr_conn_count; /* # conns established by this route */ } ksock_route_t; typedef struct ksock_peer @@ -402,13 +412,13 @@ extern ksock_peer_t *ksocknal_get_peer (ptl_nid_t nid); extern int ksocknal_del_route (ptl_nid_t nid, __u32 ipaddr, int single, int keep_conn); extern int ksocknal_create_conn (ptl_nid_t nid, ksock_route_t *route, - struct socket *sock, int bind_irq); + struct socket *sock, int bind_irq, int type); extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why); -extern int ksocknal_close_conn_unlocked (ksock_conn_t *conn, int why); extern void ksocknal_terminate_conn (ksock_conn_t *conn); extern void ksocknal_destroy_conn (ksock_conn_t *conn); extern void ksocknal_put_conn (ksock_conn_t *conn); -extern int ksocknal_close_conn (ptl_nid_t nid, __u32 ipaddr); +extern int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why); +extern int ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr); extern void ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn); extern void ksocknal_tx_done (ksock_tx_t *tx, int asynch); diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index 8ce6777..dde434a 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -218,9 +218,9 @@ ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) struct iovec *iov = tx->tx_iov; int fragsize = iov->iov_len; unsigned long vaddr = (unsigned long)iov->iov_base; - int more = (!list_empty (&conn->ksnc_tx_queue)) | - (tx->tx_niov > 1) | - (tx->tx_nkiov > 1); + int more = (tx->tx_niov > 1) || + (tx->tx_nkiov > 0) || + (!list_empty (&conn->ksnc_tx_queue)); #if SOCKNAL_ZC int offset = vaddr & (PAGE_SIZE - 1); int zcsize = MIN (fragsize, PAGE_SIZE - offset); @@ -266,7 +266,7 @@ ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) .msg_flags = more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT }; mm_segment_t oldmm = get_fs(); - + set_fs (KERNEL_DS); rc = sock_sendmsg(sock, &msg, fragsize); set_fs (oldmm); @@ -298,8 +298,8 @@ ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) int fragsize = kiov->kiov_len; struct page *page = kiov->kiov_page; int offset = kiov->kiov_offset; - int more = (!list_empty (&conn->ksnc_tx_queue)) | - (tx->tx_nkiov > 1); + int more = (tx->tx_nkiov > 1) || + (!list_empty (&conn->ksnc_tx_queue)); int rc; /* NB we can't trust socket ops to either consume our iovs @@ -464,7 +464,7 @@ ksocknal_recv_iov (ksock_conn_t *conn) * or leave them alone, so we only receive 1 frag at a time. */ LASSERT (conn->ksnc_rx_niov > 0); LASSERT (fragsize <= conn->ksnc_rx_nob_wanted); - + set_fs (KERNEL_DS); rc = sock_recvmsg (conn->ksnc_sock, &msg, fragsize, MSG_DONTWAIT); /* NB this is just a boolean............................^ */ @@ -521,7 +521,7 @@ ksocknal_recv_kiov (ksock_conn_t *conn) LASSERT (fragsize <= conn->ksnc_rx_nob_wanted); LASSERT (conn->ksnc_rx_nkiov > 0); LASSERT (offset + fragsize <= PAGE_SIZE); - + set_fs (KERNEL_DS); rc = sock_recvmsg (conn->ksnc_sock, &msg, fragsize, MSG_DONTWAIT); /* NB this is just a boolean............................^ */ @@ -597,7 +597,7 @@ ksocknal_recvmsg (ksock_conn_t *conn) if (conn->ksnc_rx_nob_wanted == 0) { /* Completed a message segment (header or payload) */ - if (ksocknal_data.ksnd_eager_ack && + if ((ksocknal_data.ksnd_eager_ack & conn->ksnc_type) != 0 && (conn->ksnc_rx_state == SOCKNAL_RX_BODY || conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD)) { /* Remind the socket to ack eagerly... */ @@ -721,12 +721,12 @@ ksocknal_process_transmit (ksock_sched_t *sched, unsigned long *irq_flags) CDEBUG (D_NET, "send(%d) %d\n", tx->tx_resid, rc); if (rc != 0) { - if (ksocknal_close_conn_unlocked (conn, rc)) { - /* I'm the first to close */ + if (!conn->ksnc_closing) CERROR ("[%p] Error %d on write to "LPX64" ip %08x:%d\n", conn, rc, conn->ksnc_peer->ksnp_nid, conn->ksnc_ipaddr, conn->ksnc_port); - } + ksocknal_close_conn_and_siblings (conn, rc); + ksocknal_tx_launched (tx); spin_lock_irqsave (&sched->kss_lock, *irq_flags); @@ -764,10 +764,17 @@ ksocknal_launch_autoconnect_locked (ksock_route_t *route) /* called holding write lock on ksnd_global_lock */ - LASSERT (route->ksnr_conn == NULL); - LASSERT (!route->ksnr_deleted && !route->ksnr_connecting); + LASSERT (!route->ksnr_deleted); + LASSERT ((route->ksnr_connected & (1 << SOCKNAL_CONN_ANY)) == 0); + LASSERT ((route->ksnr_connected & KSNR_TYPED_ROUTES) != KSNR_TYPED_ROUTES); + LASSERT (!route->ksnr_connecting); - route->ksnr_connecting = 1; + if (ksocknal_data.ksnd_typed_conns) + route->ksnr_connecting = + KSNR_TYPED_ROUTES & ~route->ksnr_connected; + else + route->ksnr_connecting = (1 << SOCKNAL_CONN_ANY); + atomic_inc (&route->ksnr_refcount); /* extra ref for asynchd */ spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags); @@ -814,21 +821,51 @@ ksock_conn_t * ksocknal_find_conn_locked (ksock_tx_t *tx, ksock_peer_t *peer) { struct list_head *tmp; - ksock_conn_t *conn = NULL; - + ksock_conn_t *typed = NULL; + int tnob = 0; + ksock_conn_t *fallback = NULL; + int fnob = 0; + /* Find the conn with the shortest tx queue */ list_for_each (tmp, &peer->ksnp_conns) { - ksock_conn_t *c = list_entry (tmp, ksock_conn_t, ksnc_list); + ksock_conn_t *c = list_entry(tmp, ksock_conn_t, ksnc_list); + int nob = atomic_read(&c->ksnc_tx_nob); LASSERT (!c->ksnc_closing); - - if (conn == NULL || - atomic_read (&conn->ksnc_tx_nob) > - atomic_read (&c->ksnc_tx_nob)) - conn = c; + + if (fallback == NULL || nob < fnob) { + fallback = c; + fnob = nob; + } + + if (!ksocknal_data.ksnd_typed_conns) + continue; + + switch (c->ksnc_type) { + default: + LBUG(); + case SOCKNAL_CONN_ANY: + break; + case SOCKNAL_CONN_BULK_IN: + continue; + case SOCKNAL_CONN_BULK_OUT: + if (tx->tx_nob < ksocknal_data.ksnd_min_bulk) + continue; + break; + case SOCKNAL_CONN_CONTROL: + if (tx->tx_nob >= ksocknal_data.ksnd_min_bulk) + continue; + break; + } + + if (typed == NULL || nob < tnob) { + typed = c; + tnob = nob; + } } - return (conn); + /* prefer the typed selection */ + return ((typed != NULL) ? typed : fallback); } void @@ -870,22 +907,46 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) } ksock_route_t * -ksocknal_find_connectable_route_locked (ksock_peer_t *peer, int eager_only) +ksocknal_find_connectable_route_locked (ksock_peer_t *peer) { struct list_head *tmp; ksock_route_t *route; + ksock_route_t *candidate = NULL; + int found = 0; + int bits; list_for_each (tmp, &peer->ksnp_routes) { route = list_entry (tmp, ksock_route_t, ksnr_list); + bits = route->ksnr_connected; - if (route->ksnr_conn == NULL && /* not connected */ - !route->ksnr_connecting && /* not connecting */ - (!eager_only || route->ksnr_eager) && /* wants to connect */ - time_after_eq (jiffies, route->ksnr_timeout)) /* OK to retry */ + if ((bits & KSNR_TYPED_ROUTES) == KSNR_TYPED_ROUTES || + (bits & (1 << SOCKNAL_CONN_ANY)) != 0 || + route->ksnr_connecting != 0) { + /* All typed connections have been established, or + * an untyped connection has been established, or + * connections are currently being established */ + found = 1; + continue; + } + + /* too soon to retry this guy? */ + if (!time_after_eq (jiffies, route->ksnr_timeout)) + continue; + + /* always do eager routes */ + if (route->ksnr_eager) return (route); + + if (candidate == NULL) { + /* If we don't find any other route that is fully + * connected or connecting, the first connectable + * route is returned. If it fails to connect, it + * will get placed at the end of the list */ + candidate = route; + } } - - return (NULL); + + return (found ? NULL : candidate); } ksock_route_t * @@ -897,7 +958,7 @@ ksocknal_find_connecting_route_locked (ksock_peer_t *peer) list_for_each (tmp, &peer->ksnp_routes) { route = list_entry (tmp, ksock_route_t, ksnr_list); - if (route->ksnr_connecting) + if (route->ksnr_connecting != 0) return (route); } @@ -912,7 +973,7 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) ksock_conn_t *conn; ksock_route_t *route; rwlock_t *g_lock; - + /* Ensure the frags we've been given EXACTLY match the number of * bytes we want to send. Many TCP/IP stacks disregard any total * size parameters passed to them and just look at the frags. @@ -936,18 +997,17 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) peer = ksocknal_find_target_peer_locked (tx, nid); if (peer == NULL) { read_unlock (g_lock); - return (PTL_FAIL); + return (-EHOSTUNREACH); } - if (ksocknal_find_connectable_route_locked(peer, 1) == NULL) { + if (ksocknal_find_connectable_route_locked(peer) == NULL) { conn = ksocknal_find_conn_locked (tx, peer); if (conn != NULL) { - /* I've got no unconnected autoconnect routes that - * need to be connected, and I do have an actual - * connection... */ + /* I've got no autoconnect routes that need to be + * connecting and I do have an actual connection... */ ksocknal_queue_tx_locked (tx, conn); read_unlock (g_lock); - return (PTL_OK); + return (0); } } @@ -960,14 +1020,13 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) if (peer->ksnp_closing) { /* peer deleted as I blocked! */ write_unlock_irqrestore (g_lock, flags); ksocknal_put_peer (peer); - return (PTL_FAIL); + return (-EHOSTUNREACH); } ksocknal_put_peer (peer); /* drop ref I got above */ - for (;;) { - /* launch all eager autoconnections */ - route = ksocknal_find_connectable_route_locked (peer, 1); + /* launch any/all autoconnections that need it */ + route = ksocknal_find_connectable_route_locked (peer); if (route == NULL) break; @@ -979,26 +1038,20 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) /* Connection exists; queue message on it */ ksocknal_queue_tx_locked (tx, conn); write_unlock_irqrestore (g_lock, flags); - return (PTL_OK); + return (0); } - if (ksocknal_find_connecting_route_locked (peer) == NULL) { - /* no autoconnect routes actually connecting now. Scrape - * the barrel for non-eager autoconnects */ - route = ksocknal_find_connectable_route_locked (peer, 0); - if (route != NULL) { - ksocknal_launch_autoconnect_locked (route); - } else { - write_unlock_irqrestore (g_lock, flags); - return (PTL_FAIL); - } + route = ksocknal_find_connecting_route_locked (peer); + if (route != NULL) { + /* At least 1 connection is being established; queue the + * message... */ + list_add_tail (&tx->tx_list, &peer->ksnp_tx_queue); + write_unlock_irqrestore (g_lock, flags); + return (0); } - - /* At least 1 connection is being established; queue the message... */ - list_add_tail (&tx->tx_list, &peer->ksnp_tx_queue); - + write_unlock_irqrestore (g_lock, flags); - return (PTL_OK); + return (-EHOSTUNREACH); } ksock_ltx_t * @@ -1069,10 +1122,11 @@ ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie, ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len; rc = ksocknal_launch_packet (<x->ltx_tx, nid); - if (rc != PTL_OK) - ksocknal_put_ltx (ltx); + if (rc == 0) + return (PTL_OK); - return (rc); + ksocknal_put_ltx (ltx); + return (PTL_FAIL); } int @@ -1104,10 +1158,11 @@ ksocknal_send_pages (nal_cb_t *nal, void *private, lib_msg_t *cookie, ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len; rc = ksocknal_launch_packet (<x->ltx_tx, nid); - if (rc != PTL_OK) - ksocknal_put_ltx (ltx); - - return (rc); + if (rc == 0) + return (PTL_OK); + + ksocknal_put_ltx (ltx); + return (PTL_FAIL); } void @@ -1133,10 +1188,8 @@ ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) tx->tx_hdr = (ptl_hdr_t *)fwd->kprfd_iov[0].iov_base; rc = ksocknal_launch_packet (tx, nid); - if (rc != 0) { - /* FIXME, could pass a better completion error */ - kpr_fwd_done (&ksocknal_data.ksnd_router, fwd, -EHOSTUNREACH); - } + if (rc != 0) + kpr_fwd_done (&ksocknal_data.ksnd_router, fwd, rc); } int @@ -1353,7 +1406,7 @@ ksocknal_fwd_parse (ksock_conn_t *conn) { ksock_peer_t *peer; ptl_nid_t dest_nid = NTOH__u64 (conn->ksnc_hdr.dest_nid); - int body_len = NTOH__u32 (PTL_HDR_LENGTH(&conn->ksnc_hdr)); + int body_len = NTOH__u32 (conn->ksnc_hdr.payload_length); CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d parsing header\n", conn, NTOH__u64 (conn->ksnc_hdr.src_nid), @@ -1368,7 +1421,6 @@ ksocknal_fwd_parse (ksock_conn_t *conn) dest_nid, body_len); ksocknal_new_packet (conn, 0); /* on to new packet */ - ksocknal_close_conn_unlocked (conn, -EINVAL); /* give up on conn */ return; } @@ -1512,17 +1564,15 @@ ksocknal_process_receive (ksock_sched_t *sched, unsigned long *irq_flags) rc = ksocknal_recvmsg(conn); if (rc <= 0) { - if (ksocknal_close_conn_unlocked (conn, rc)) { - /* I'm the first to close */ - if (rc < 0) - CERROR ("[%p] Error %d on read from "LPX64" ip %08x:%d\n", - conn, rc, conn->ksnc_peer->ksnp_nid, - conn->ksnc_ipaddr, conn->ksnc_port); - else - CWARN ("[%p] EOF from "LPX64" ip %08x:%d\n", - conn, conn->ksnc_peer->ksnp_nid, - conn->ksnc_ipaddr, conn->ksnc_port); - } + if (rc == 0) + CWARN ("[%p] EOF from "LPX64" ip %08x:%d\n", + conn, conn->ksnc_peer->ksnp_nid, + conn->ksnc_ipaddr, conn->ksnc_port); + else if (!conn->ksnc_closing) + CERROR ("[%p] Error %d on read from "LPX64" ip %08x:%d\n", + conn, rc, conn->ksnc_peer->ksnp_nid, + conn->ksnc_ipaddr, conn->ksnc_port); + ksocknal_close_conn_and_siblings (conn, rc); goto out; } @@ -1945,7 +1995,7 @@ ksocknal_sock_read (struct socket *sock, void *buffer, int nob) } int -ksocknal_exchange_nids (struct socket *sock, ptl_nid_t nid) +ksocknal_exchange_nids (struct socket *sock, ptl_nid_t nid, int type) { int rc; ptl_hdr_t hdr; @@ -1960,6 +2010,8 @@ ksocknal_exchange_nids (struct socket *sock, ptl_nid_t nid) hdr.src_nid = __cpu_to_le64 (ksocknal_lib.ni.nid); hdr.type = __cpu_to_le32 (PTL_MSG_HELLO); + + *(__u32 *)&hdr.msg = __cpu_to_le32 (type); /* Assume sufficient socket buffering for this message */ rc = ksocknal_sock_write (sock, &hdr, sizeof (hdr)); @@ -1992,7 +2044,9 @@ ksocknal_exchange_nids (struct socket *sock, ptl_nid_t nid) return (-EINVAL); } - LASSERT (PORTALS_PROTO_VERSION_MAJOR == 0); +#if (PORTALS_PROTO_VERSION_MAJOR != 0) +# error "This code only understands protocol version 0.x" +#endif /* version 0 sends magic/version as the dest_nid of a 'hello' header, * so read the rest of it in now... */ @@ -2005,11 +2059,11 @@ ksocknal_exchange_nids (struct socket *sock, ptl_nid_t nid) /* ...and check we got what we expected */ if (hdr.type != __cpu_to_le32 (PTL_MSG_HELLO) || - PTL_HDR_LENGTH (&hdr) != __cpu_to_le32 (0)) { + hdr.payload_length != __cpu_to_le32 (0)) { CERROR ("Expecting a HELLO hdr with 0 payload," " but got type %d with %d payload from "LPX64"\n", __le32_to_cpu (hdr.type), - __le32_to_cpu (PTL_HDR_LENGTH (&hdr)), nid); + __le32_to_cpu (hdr.payload_length), nid); return (-EINVAL); } @@ -2103,7 +2157,7 @@ ksocknal_setup_sock (struct socket *sock) } int -ksocknal_connect_peer (ksock_route_t *route) +ksocknal_connect_peer (ksock_route_t *route, int type) { struct sockaddr_in peer_addr; mm_segment_t oldmm = get_fs(); @@ -2208,14 +2262,12 @@ ksocknal_connect_peer (ksock_route_t *route) goto out; } - if (route->ksnr_xchange_nids) { - rc = ksocknal_exchange_nids (sock, route->ksnr_peer->ksnp_nid); - if (rc != 0) - goto out; - } + rc = ksocknal_exchange_nids (sock, route->ksnr_peer->ksnp_nid, type); + if (rc != 0) + goto out; - rc = ksocknal_create_conn (route->ksnr_peer->ksnp_nid, - route, sock, route->ksnr_irq_affinity); + rc = ksocknal_create_conn (route->ksnr_peer->ksnp_nid, route, sock, + route->ksnr_irq_affinity, type); if (rc == 0) { /* Take an extra ref on sock->file to compensate for the * upcoming close which will lose fd's ref on it. */ @@ -2235,20 +2287,36 @@ ksocknal_autoconnect (ksock_route_t *route) ksock_peer_t *peer; unsigned long flags; int rc; + int type; - rc = ksocknal_connect_peer (route); - if (rc == 0) { + for (;;) { + for (type = 0; type < SOCKNAL_CONN_NTYPES; type++) + if ((route->ksnr_connecting & (1 << type)) != 0) + break; + LASSERT (type < SOCKNAL_CONN_NTYPES); + + rc = ksocknal_connect_peer (route, type); + + if (rc != 0) + break; + /* successfully autoconnected: create_conn did the - * route/conn binding and scheduled any blocked packets, - * so there's nothing left to do now. */ - return; + * route/conn binding and scheduled any blocked packets */ + + if (route->ksnr_connecting == 0) { + /* No more connections required */ + return; + } } + /* Connection attempt failed */ + write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); peer = route->ksnr_peer; route->ksnr_connecting = 0; + /* This is a retry rather than a new connection */ LASSERT (route->ksnr_retry_interval != 0); route->ksnr_timeout = jiffies + route->ksnr_retry_interval; route->ksnr_retry_interval = MIN (route->ksnr_retry_interval * 2, @@ -2268,6 +2336,12 @@ ksocknal_autoconnect (ksock_route_t *route) } while (!list_empty (&peer->ksnp_tx_queue)); } + /* make this route least-favourite for re-selection */ + if (!route->ksnr_deleted) { + list_del(&route->ksnr_list); + list_add_tail(&route->ksnr_list, &peer->ksnp_routes); + } + write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); while (!list_empty (&zombies)) { @@ -2275,7 +2349,7 @@ ksocknal_autoconnect (ksock_route_t *route) CERROR ("Deleting packet type %d len %d ("LPX64"->"LPX64")\n", NTOH__u32 (tx->tx_hdr->type), - NTOH__u32 (PTL_HDR_LENGTH(tx->tx_hdr)), + NTOH__u32 (tx->tx_hdr->payload_length), NTOH__u64 (tx->tx_hdr->src_nid), NTOH__u64 (tx->tx_hdr->dest_nid)); @@ -2393,13 +2467,11 @@ ksocknal_check_peer_timeouts (int idx) if (conn != NULL) { read_unlock (&ksocknal_data.ksnd_global_lock); - if (ksocknal_close_conn_unlocked (conn, -ETIMEDOUT)) { - /* I actually closed... */ - CERROR ("Timeout out conn->"LPX64" ip %x:%d\n", - peer->ksnp_nid, conn->ksnc_ipaddr, - conn->ksnc_port); - } - + CERROR ("Timeout out conn->"LPX64" ip %x:%d\n", + peer->ksnp_nid, conn->ksnc_ipaddr, + conn->ksnc_port); + ksocknal_close_conn_and_siblings (conn, -ETIMEDOUT); + /* NB we won't find this one again, but we can't * just proceed with the next peer, since we dropped * ksnd_global_lock and it might be dead already! */ diff --git a/lnet/klnds/toelnd/toenal_cb.c b/lnet/klnds/toelnd/toenal_cb.c index 983fa71..37e3f1e 100644 --- a/lnet/klnds/toelnd/toenal_cb.c +++ b/lnet/klnds/toelnd/toenal_cb.c @@ -708,26 +708,7 @@ ktoenal_fwd_parse (ksock_conn_t *conn) LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER); LASSERT (conn->ksnc_rx_scheduled); - switch (conn->ksnc_hdr.type) - { - case PTL_MSG_GET: - case PTL_MSG_ACK: - body_len = 0; - break; - case PTL_MSG_PUT: - body_len = conn->ksnc_hdr.msg.put.length; - break; - case PTL_MSG_REPLY: - body_len = conn->ksnc_hdr.msg.reply.length; - break; - default: - /* Unrecognised packet type */ - CERROR ("Unrecognised packet type %d from "LPX64" for "LPX64"\n", - conn->ksnc_hdr.type, conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid); - /* Ignore this header and go back to reading a new packet. */ - ktoenal_new_packet (conn, 0); - return; - } + body_len = conn->ksnc_hdr.payload_length; if (body_len < 0) /* length corrupt */ { diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index e73cbb8..491bb87 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -583,7 +583,7 @@ static int parse_put(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) me = lib_find_me(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT, hdr->src_nid, hdr->src_pid, - PTL_HDR_LENGTH (hdr), hdr->msg.put.offset, + hdr->payload_length, hdr->msg.put.offset, hdr->msg.put.match_bits, &mlength, &offset, &unlink); if (me == NULL) @@ -592,7 +592,7 @@ static int parse_put(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) md = me->md; CDEBUG(D_NET, "Incoming put index %x from "LPU64"/%u of length %d/%d " "into md "LPX64" [%d] + %d\n", hdr->msg.put.ptl_index, - hdr->src_nid, hdr->src_pid, mlength, PTL_HDR_LENGTH(hdr), + hdr->src_nid, hdr->src_pid, mlength, hdr->payload_length, md->md_lh.lh_cookie, md->md_niov, offset); msg = get_new_msg (nal, md); @@ -617,7 +617,7 @@ static int parse_put(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) msg->ev.initiator.pid = hdr->src_pid; msg->ev.portal = hdr->msg.put.ptl_index; msg->ev.match_bits = hdr->msg.put.match_bits; - msg->ev.rlength = PTL_HDR_LENGTH(hdr); + msg->ev.rlength = hdr->payload_length; msg->ev.mlength = mlength; msg->ev.offset = offset; msg->ev.hdr_data = hdr->msg.put.hdr_data; @@ -646,14 +646,14 @@ static int parse_put(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) state_unlock(nal, &flags); - lib_recv (nal, private, msg, md, offset, mlength, PTL_HDR_LENGTH (hdr)); + lib_recv (nal, private, msg, md, offset, mlength, hdr->payload_length); return 0; drop: nal->ni.counters.drop_count++; - nal->ni.counters.drop_length += PTL_HDR_LENGTH(hdr); + nal->ni.counters.drop_length += hdr->payload_length; state_unlock (nal, &flags); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); + lib_recv (nal, private, NULL, NULL, 0, 0, hdr->payload_length); return -1; } @@ -676,11 +676,6 @@ static int parse_get(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) hdr->msg.get.sink_length = NTOH__u32 (hdr->msg.get.sink_length); hdr->msg.get.src_offset = NTOH__u32 (hdr->msg.get.src_offset); - /* compatibility check until field is deleted */ - if (hdr->msg.get.return_offset != 0) - CERROR("Unexpected non-zero get.return_offset %x from " - LPU64"\n", hdr->msg.get.return_offset, hdr->src_nid); - state_lock(nal, &flags); me = lib_find_me(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET, @@ -694,7 +689,7 @@ static int parse_get(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) md = me->md; CDEBUG(D_NET, "Incoming get index %d from "LPU64".%u of length %d/%d " "from md "LPX64" [%d] + %d\n", hdr->msg.get.ptl_index, - hdr->src_nid, hdr->src_pid, mlength, PTL_HDR_LENGTH(hdr), + hdr->src_nid, hdr->src_pid, mlength, hdr->payload_length, md->md_lh.lh_cookie, md->md_niov, offset); msg = get_new_msg (nal, md); @@ -710,7 +705,7 @@ static int parse_get(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) msg->ev.initiator.pid = hdr->src_pid; msg->ev.portal = hdr->msg.get.ptl_index; msg->ev.match_bits = hdr->msg.get.match_bits; - msg->ev.rlength = PTL_HDR_LENGTH(hdr); + msg->ev.rlength = hdr->payload_length; msg->ev.mlength = mlength; msg->ev.offset = offset; msg->ev.hdr_data = 0; @@ -745,7 +740,7 @@ static int parse_get(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) reply.src_nid = HTON__u64 (ni->nid); reply.dest_pid = HTON__u32 (hdr->src_pid); reply.src_pid = HTON__u32 (ni->pid); - PTL_HDR_LENGTH(&reply) = HTON__u32 (mlength); + reply.payload_length = HTON__u32 (mlength); reply.msg.reply.dst_wmd = hdr->msg.get.return_wmd; @@ -763,13 +758,13 @@ static int parse_get(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) } /* Complete the incoming message */ - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); + lib_recv (nal, private, NULL, NULL, 0, 0, hdr->payload_length); return (rc); drop: ni->counters.drop_count++; ni->counters.drop_length += hdr->msg.get.sink_length; state_unlock(nal, &flags); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); + lib_recv (nal, private, NULL, NULL, 0, 0, hdr->payload_length); return -1; } @@ -782,11 +777,6 @@ static int parse_reply(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) lib_msg_t *msg; unsigned long flags; - /* compatibility check until field is deleted */ - if (hdr->msg.reply.dst_offset != 0) - CERROR("Unexpected non-zero reply.dst_offset %x from "LPU64"\n", - hdr->msg.reply.dst_offset, hdr->src_nid); - state_lock(nal, &flags); /* NB handles only looked up by creator (no flips) */ @@ -802,7 +792,7 @@ static int parse_reply(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) LASSERT (md->offset == 0); - length = rlength = PTL_HDR_LENGTH(hdr); + length = rlength = hdr->payload_length; if (length > md->length) { if ((md->options & PTL_MD_TRUNCATE) == 0) { @@ -848,9 +838,9 @@ static int parse_reply(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) drop: nal->ni.counters.drop_count++; - nal->ni.counters.drop_length += PTL_HDR_LENGTH(hdr); + nal->ni.counters.drop_length += hdr->payload_length; state_unlock (nal, &flags); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); + lib_recv (nal, private, NULL, NULL, 0, 0, hdr->payload_length); return -1; } @@ -901,13 +891,13 @@ static int parse_ack(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) ni->counters.recv_count++; state_unlock(nal, &flags); - lib_recv (nal, private, msg, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); + lib_recv (nal, private, msg, NULL, 0, 0, hdr->payload_length); return 0; drop: nal->ni.counters.drop_count++; state_unlock (nal, &flags); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); + lib_recv (nal, private, NULL, NULL, 0, 0, hdr->payload_length); return -1; } @@ -954,7 +944,7 @@ void print_hdr(nal_cb_t * nal, ptl_hdr_t * hdr) hdr->msg.put.match_bits); nal->cb_printf(nal, " Length %d, offset %d, hdr data "LPX64"\n", - PTL_HDR_LENGTH(hdr), hdr->msg.put.offset, + hdr->payload_length, hdr->msg.put.offset, hdr->msg.put.hdr_data); break; @@ -984,7 +974,7 @@ void print_hdr(nal_cb_t * nal, ptl_hdr_t * hdr) "length %d\n", hdr->msg.reply.dst_wmd.wh_interface_cookie, hdr->msg.reply.dst_wmd.wh_object_cookie, - PTL_HDR_LENGTH(hdr)); + hdr->payload_length); } } /* end of print_hdr() */ @@ -994,21 +984,13 @@ int lib_parse(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) { unsigned long flags; - /* NB static check; optimizer will elide this if it's right */ - LASSERT (offsetof (ptl_hdr_t, msg.ack.length) == - offsetof (ptl_hdr_t, msg.put.length)); - LASSERT (offsetof (ptl_hdr_t, msg.ack.length) == - offsetof (ptl_hdr_t, msg.get.length)); - LASSERT (offsetof (ptl_hdr_t, msg.ack.length) == - offsetof (ptl_hdr_t, msg.reply.length)); - /* convert common fields to host byte order */ hdr->dest_nid = NTOH__u64 (hdr->dest_nid); hdr->src_nid = NTOH__u64 (hdr->src_nid); hdr->dest_pid = NTOH__u32 (hdr->dest_pid); hdr->src_pid = NTOH__u32 (hdr->src_pid); hdr->type = NTOH__u32 (hdr->type); - PTL_HDR_LENGTH(hdr) = NTOH__u32 (PTL_HDR_LENGTH(hdr)); + hdr->payload_length = NTOH__u32(hdr->payload_length); #if 0 nal->cb_printf(nal, "%d: lib_parse: nal=%p hdr=%p type=%d\n", nal->ni.nid, nal, hdr, hdr->type); @@ -1023,7 +1005,7 @@ int lib_parse(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) nal->ni.nid, mv->magic, mv->version_major, mv->version_minor, hdr->src_nid); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); + lib_recv (nal, private, NULL, NULL, 0, 0, hdr->payload_length); return (-1); } @@ -1034,10 +1016,10 @@ int lib_parse(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) state_lock (nal, &flags); nal->ni.counters.drop_count++; - nal->ni.counters.drop_length += PTL_HDR_LENGTH(hdr); + nal->ni.counters.drop_length += hdr->payload_length; state_unlock (nal, &flags); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); + lib_recv (nal, private, NULL, NULL, 0, 0, hdr->payload_length); return (-1); } @@ -1048,7 +1030,7 @@ int lib_parse(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) ": simulated failure\n", nal->ni.nid, hdr_type_string (hdr), hdr->src_nid); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); + lib_recv (nal, private, NULL, NULL, 0, 0, hdr->payload_length); return (-1); } @@ -1069,7 +1051,7 @@ int lib_parse(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) ": Bad type=0x%x\n", nal->ni.nid, hdr->src_nid, hdr->type); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); + lib_recv (nal, private, NULL, NULL, 0, 0, hdr->payload_length); return (-1); } } @@ -1126,7 +1108,7 @@ int do_PtlPut(nal_cb_t * nal, void *private, void *v_args, void *v_ret) hdr.src_nid = HTON__u64 (ni->nid); hdr.dest_pid = HTON__u32 (id->pid); hdr.src_pid = HTON__u32 (ni->pid); - PTL_HDR_LENGTH(&hdr) = HTON__u32 (md->length); + hdr.payload_length = HTON__u32 (md->length); /* NB handles only looked up by creator (no flips) */ if (args->ack_req_in == PTL_ACK_REQ) { @@ -1303,7 +1285,7 @@ int do_PtlGet(nal_cb_t * nal, void *private, void *v_args, void *v_ret) hdr.src_nid = HTON__u64 (ni->nid); hdr.dest_pid = HTON__u32 (id->pid); hdr.src_pid = HTON__u32 (ni->pid); - PTL_HDR_LENGTH(&hdr) = 0; + hdr.payload_length = 0; /* NB handles only looked up by creator (no flips) */ hdr.msg.get.return_wmd.wh_interface_cookie = ni->ni_interface_cookie; @@ -1364,12 +1346,15 @@ int do_PtlGet(nal_cb_t * nal, void *private, void *v_args, void *v_ret) void lib_assert_wire_constants (void) { - /* Wire protocol assertions generated by 'wirecheck' */ + /* Wire protocol assertions generated by 'wirecheck' + * running on Linux mdev2 2.4.18-p4smp-15llp #1 SMP Wed Oct 8 11:01:07 PDT 2003 i686 unknown + * with gcc version 2.96 20000731 (Red Hat Linux 7.3 2.96-113) */ + /* Constants... */ LASSERT (PORTALS_PROTO_MAGIC == 0xeebc0ded); LASSERT (PORTALS_PROTO_VERSION_MAJOR == 0); - LASSERT (PORTALS_PROTO_VERSION_MINOR == 1); + LASSERT (PORTALS_PROTO_VERSION_MINOR == 2); LASSERT (PTL_MSG_ACK == 0); LASSERT (PTL_MSG_PUT == 1); LASSERT (PTL_MSG_GET == 2); @@ -1377,79 +1362,71 @@ void lib_assert_wire_constants (void) LASSERT (PTL_MSG_HELLO == 4); /* Checks for struct ptl_handle_wire_t */ - LASSERT (sizeof (ptl_handle_wire_t) == 16); - LASSERT (offsetof (ptl_handle_wire_t, wh_interface_cookie) == 0); - LASSERT (sizeof (((ptl_handle_wire_t *)0)->wh_interface_cookie) == 8); - LASSERT (offsetof (ptl_handle_wire_t, wh_object_cookie) == 8); - LASSERT (sizeof (((ptl_handle_wire_t *)0)->wh_object_cookie) == 8); + LASSERT ((int)sizeof(ptl_handle_wire_t) == 16); + LASSERT (offsetof(ptl_handle_wire_t, wh_interface_cookie) == 0); + LASSERT ((int)sizeof(((ptl_handle_wire_t *)0)->wh_interface_cookie) == 8); + LASSERT (offsetof(ptl_handle_wire_t, wh_object_cookie) == 8); + LASSERT ((int)sizeof(((ptl_handle_wire_t *)0)->wh_object_cookie) == 8); /* Checks for struct ptl_magicversion_t */ - LASSERT (sizeof (ptl_magicversion_t) == 8); - LASSERT (offsetof (ptl_magicversion_t, magic) == 0); - LASSERT (sizeof (((ptl_magicversion_t *)0)->magic) == 4); - LASSERT (offsetof (ptl_magicversion_t, version_major) == 4); - LASSERT (sizeof (((ptl_magicversion_t *)0)->version_major) == 2); - LASSERT (offsetof (ptl_magicversion_t, version_minor) == 6); - LASSERT (sizeof (((ptl_magicversion_t *)0)->version_minor) == 2); + LASSERT ((int)sizeof(ptl_magicversion_t) == 8); + LASSERT (offsetof(ptl_magicversion_t, magic) == 0); + LASSERT ((int)sizeof(((ptl_magicversion_t *)0)->magic) == 4); + LASSERT (offsetof(ptl_magicversion_t, version_major) == 4); + LASSERT ((int)sizeof(((ptl_magicversion_t *)0)->version_major) == 2); + LASSERT (offsetof(ptl_magicversion_t, version_minor) == 6); + LASSERT ((int)sizeof(((ptl_magicversion_t *)0)->version_minor) == 2); /* Checks for struct ptl_hdr_t */ - LASSERT (sizeof (ptl_hdr_t) == 72); - LASSERT (offsetof (ptl_hdr_t, dest_nid) == 0); - LASSERT (sizeof (((ptl_hdr_t *)0)->dest_nid) == 8); - LASSERT (offsetof (ptl_hdr_t, src_nid) == 8); - LASSERT (sizeof (((ptl_hdr_t *)0)->src_nid) == 8); - LASSERT (offsetof (ptl_hdr_t, dest_pid) == 16); - LASSERT (sizeof (((ptl_hdr_t *)0)->dest_pid) == 4); - LASSERT (offsetof (ptl_hdr_t, src_pid) == 20); - LASSERT (sizeof (((ptl_hdr_t *)0)->src_pid) == 4); - LASSERT (offsetof (ptl_hdr_t, type) == 24); - LASSERT (sizeof (((ptl_hdr_t *)0)->type) == 4); + LASSERT ((int)sizeof(ptl_hdr_t) == 72); + LASSERT (offsetof(ptl_hdr_t, dest_nid) == 0); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->dest_nid) == 8); + LASSERT (offsetof(ptl_hdr_t, src_nid) == 8); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->src_nid) == 8); + LASSERT (offsetof(ptl_hdr_t, dest_pid) == 16); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->dest_pid) == 4); + LASSERT (offsetof(ptl_hdr_t, src_pid) == 20); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->src_pid) == 4); + LASSERT (offsetof(ptl_hdr_t, type) == 24); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->type) == 4); + LASSERT (offsetof(ptl_hdr_t, payload_length) == 28); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->payload_length) == 4); + LASSERT (offsetof(ptl_hdr_t, msg) == 32); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg) == 40); /* Ack */ - LASSERT (offsetof (ptl_hdr_t, msg.ack.mlength) == 28); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.ack.mlength) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.ack.dst_wmd) == 32); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.ack.dst_wmd) == 16); - LASSERT (offsetof (ptl_hdr_t, msg.ack.match_bits) == 48); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.ack.match_bits) == 8); - LASSERT (offsetof (ptl_hdr_t, msg.ack.length) == 56); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.ack.length) == 4); + LASSERT (offsetof(ptl_hdr_t, msg.ack.dst_wmd) == 32); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.ack.dst_wmd) == 16); + LASSERT (offsetof(ptl_hdr_t, msg.ack.match_bits) == 48); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.ack.match_bits) == 8); + LASSERT (offsetof(ptl_hdr_t, msg.ack.mlength) == 56); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.ack.mlength) == 4); /* Put */ - LASSERT (offsetof (ptl_hdr_t, msg.put.ptl_index) == 28); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.ptl_index) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.put.ack_wmd) == 32); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.ack_wmd) == 16); - LASSERT (offsetof (ptl_hdr_t, msg.put.match_bits) == 48); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.match_bits) == 8); - LASSERT (offsetof (ptl_hdr_t, msg.put.length) == 56); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.length) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.put.offset) == 60); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.offset) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.put.hdr_data) == 64); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.hdr_data) == 8); + LASSERT (offsetof(ptl_hdr_t, msg.put.ack_wmd) == 32); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.ack_wmd) == 16); + LASSERT (offsetof(ptl_hdr_t, msg.put.match_bits) == 48); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.match_bits) == 8); + LASSERT (offsetof(ptl_hdr_t, msg.put.hdr_data) == 56); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.hdr_data) == 8); + LASSERT (offsetof(ptl_hdr_t, msg.put.ptl_index) == 64); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.ptl_index) == 4); + LASSERT (offsetof(ptl_hdr_t, msg.put.offset) == 68); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.offset) == 4); /* Get */ - LASSERT (offsetof (ptl_hdr_t, msg.get.ptl_index) == 28); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.ptl_index) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.get.return_wmd) == 32); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.return_wmd) == 16); - LASSERT (offsetof (ptl_hdr_t, msg.get.match_bits) == 48); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.match_bits) == 8); - LASSERT (offsetof (ptl_hdr_t, msg.get.length) == 56); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.length) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.get.src_offset) == 60); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.src_offset) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.get.return_offset) == 64); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.return_offset) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.get.sink_length) == 68); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.sink_length) == 4); + LASSERT (offsetof(ptl_hdr_t, msg.get.return_wmd) == 32); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.return_wmd) == 16); + LASSERT (offsetof(ptl_hdr_t, msg.get.match_bits) == 48); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.match_bits) == 8); + LASSERT (offsetof(ptl_hdr_t, msg.get.ptl_index) == 56); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.ptl_index) == 4); + LASSERT (offsetof(ptl_hdr_t, msg.get.src_offset) == 60); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.src_offset) == 4); + LASSERT (offsetof(ptl_hdr_t, msg.get.sink_length) == 64); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.sink_length) == 4); /* Reply */ - LASSERT (offsetof (ptl_hdr_t, msg.reply.dst_wmd) == 32); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.reply.dst_wmd) == 16); - LASSERT (offsetof (ptl_hdr_t, msg.reply.dst_offset) == 48); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.reply.dst_offset) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.reply.length) == 56); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.reply.length) == 4); + LASSERT (offsetof(ptl_hdr_t, msg.reply.dst_wmd) == 32); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.reply.dst_wmd) == 16); } diff --git a/lnet/lnet/lib-msg.c b/lnet/lnet/lib-msg.c index 9363251..9840ff5 100644 --- a/lnet/lnet/lib-msg.c +++ b/lnet/lnet/lib-msg.c @@ -59,7 +59,7 @@ int lib_finalize(nal_cb_t * nal, void *private, lib_msg_t *msg) ack.src_nid = HTON__u64 (nal->ni.nid); ack.dest_pid = HTON__u32 (msg->pid); ack.src_pid = HTON__u32 (nal->ni.pid); - PTL_HDR_LENGTH(&ack) = 0; + ack.payload_length = 0; ack.msg.ack.dst_wmd = msg->ack_wmd; ack.msg.ack.match_bits = msg->ev.match_bits; diff --git a/lnet/utils/acceptor.c b/lnet/utils/acceptor.c index 9fb2759..54a57d4 100644 --- a/lnet/utils/acceptor.c +++ b/lnet/utils/acceptor.c @@ -19,6 +19,7 @@ #include #include #include +#include /* should get this from autoconf somehow */ #ifndef PIDFILE_DIR @@ -100,7 +101,7 @@ parse_size (int *sizep, char *str) } void -show_connection (int fd, __u32 net_ip, ptl_nid_t nid) +show_connection (int fd, __u32 net_ip, ptl_nid_t nid, int type) { struct hostent *h = gethostbyaddr ((char *)&net_ip, sizeof net_ip, AF_INET); __u32 host_ip = ntohl (net_ip); @@ -128,8 +129,12 @@ show_connection (int fd, __u32 net_ip, ptl_nid_t nid) else snprintf (host, sizeof(host), "%s", h->h_name); - syslog (LOG_INFO, "Accepted host: %s NID: "LPX64" snd: %d rcv %d nagle: %s\n", - host, nid, txmem, rxmem, nonagle ? "disabled" : "enabled"); + syslog (LOG_INFO, "Accepted host: %s NID: "LPX64" snd: %d rcv %d nagle: %s type %s\n", + host, nid, txmem, rxmem, nonagle ? "disabled" : "enabled", + (type == SOCKNAL_CONN_ANY) ? "A" : + (type == SOCKNAL_CONN_CONTROL) ? "C" : + (type == SOCKNAL_CONN_BULK_IN) ? "I" : + (type == SOCKNAL_CONN_BULK_OUT) ? "O" : "?"); } int @@ -189,9 +194,10 @@ sock_read (int cfd, void *buffer, int nob) } int -exchange_nids (int cfd, ptl_nid_t my_nid, ptl_nid_t *peer_nid) +exchange_nids (int cfd, ptl_nid_t my_nid, ptl_nid_t *peer_nid, int *type) { int rc; + int t; ptl_hdr_t hdr; ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid; @@ -248,15 +254,32 @@ exchange_nids (int cfd, ptl_nid_t my_nid, ptl_nid_t *peer_nid) /* ...and check we got what we expected */ if (__cpu_to_le32 (hdr.type) != PTL_MSG_HELLO || - __cpu_to_le32 (PTL_HDR_LENGTH (&hdr)) != 0) { + __cpu_to_le32 (hdr.payload_length) != 0) { fprintf (stderr, "Expecting a HELLO hdr with 0 payload," " but got type %d with %d payload\n", __cpu_to_le32 (hdr.type), - __cpu_to_le32 (PTL_HDR_LENGTH (&hdr))); + __cpu_to_le32 (hdr.payload_length)); return (-1); } *peer_nid = __le64_to_cpu (hdr.src_nid); + + t = __le32_to_cpu (*(__u32 *)&hdr.msg); + switch (t) { /* swap sense of connection type */ + case SOCKNAL_CONN_CONTROL: + break; + case SOCKNAL_CONN_BULK_IN: + t = SOCKNAL_CONN_BULK_OUT; + break; + case SOCKNAL_CONN_BULK_OUT: + t = SOCKNAL_CONN_BULK_IN; + break; + default: + t = SOCKNAL_CONN_ANY; + break; + } + *type = t; + return (0); } @@ -277,10 +300,10 @@ int main(int argc, char **argv) int noclose = 0; int nonagle = 1; int nal = SOCKNAL; - int xchg_nids = 0; int bind_irq = 0; + int type = 0; - while ((c = getopt (argc, argv, "N:r:s:nlxi")) != -1) + while ((c = getopt (argc, argv, "N:r:s:nli")) != -1) switch (c) { case 'r': @@ -301,10 +324,6 @@ int main(int argc, char **argv) noclose = 1; break; - case 'x': - xchg_nids = 1; - break; - case 'i': bind_irq = 1; break; @@ -419,35 +438,29 @@ int main(int argc, char **argv) continue; } - if (!xchg_nids) - peer_nid = ntohl (clntaddr.sin_addr.s_addr); /* HOST byte order */ - else - { - PORTAL_IOC_INIT (data); - data.ioc_nal = nal; - rc = ioctl (pfd, IOC_PORTAL_GET_NID, &data); - if (rc < 0) - { - perror ("Can't get my NID"); - close (cfd); - continue; - } - - rc = exchange_nids (cfd, data.ioc_nid, &peer_nid); - if (rc != 0) - { - close (cfd); - continue; - } + PORTAL_IOC_INIT (data); + data.ioc_nal = nal; + rc = ioctl (pfd, IOC_PORTAL_GET_NID, &data); + if (rc < 0) { + perror ("Can't get my NID"); + close (cfd); + continue; + } + + rc = exchange_nids (cfd, data.ioc_nid, &peer_nid, &type); + if (rc != 0) { + close (cfd); + continue; } - show_connection (cfd, clntaddr.sin_addr.s_addr, peer_nid); + show_connection (cfd, clntaddr.sin_addr.s_addr, peer_nid, type); PCFG_INIT(pcfg, NAL_CMD_REGISTER_PEER_FD); pcfg.pcfg_nal = nal; pcfg.pcfg_fd = cfd; pcfg.pcfg_nid = peer_nid; pcfg.pcfg_flags = bind_irq; + pcfg.pcfg_misc = type; PORTAL_IOC_INIT(data); data.ioc_pbuf1 = (char*)&pcfg; diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c index b444115..23f2a5f 100644 --- a/lnet/utils/portals.c +++ b/lnet/utils/portals.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "parser.h" unsigned int portal_debug; @@ -435,14 +436,13 @@ jt_ptl_print_autoconnects (int argc, char **argv) if (rc != 0) break; - printf (LPX64"@%s:%d #%d buffer %d nonagle %s xchg %s " - "affinity %s eager %s share %d\n", + printf (LPX64"@%s:%d #%d buffer %d " + "nonagle %s affinity %s eager %s share %d\n", pcfg.pcfg_nid, ptl_ipaddr_2_str (pcfg.pcfg_id, buffer), pcfg.pcfg_misc, pcfg.pcfg_count, pcfg.pcfg_size, (pcfg.pcfg_flags & 1) ? "on" : "off", (pcfg.pcfg_flags & 2) ? "on" : "off", (pcfg.pcfg_flags & 4) ? "on" : "off", - (pcfg.pcfg_flags & 8) ? "on" : "off", pcfg.pcfg_wait); } @@ -458,14 +458,13 @@ jt_ptl_add_autoconnect (int argc, char **argv) ptl_nid_t nid; __u32 ip; int port; - int xchange_nids = 0; int irq_affinity = 0; int share = 0; int eager = 0; int rc; if (argc < 4 || argc > 5) { - fprintf (stderr, "usage: %s nid ipaddr port [ixse]\n", argv[0]); + fprintf (stderr, "usage: %s nid ipaddr port [ise]\n", argv[0]); return 0; } @@ -493,9 +492,6 @@ jt_ptl_add_autoconnect (int argc, char **argv) while (*opts != 0) switch (*opts++) { - case 'x': - xchange_nids = 1; - break; case 'i': irq_affinity = 1; break; @@ -519,10 +515,9 @@ jt_ptl_add_autoconnect (int argc, char **argv) /* only passing one buffer size! */ pcfg.pcfg_size = MAX (g_socket_rxmem, g_socket_txmem); pcfg.pcfg_flags = (g_socket_nonagle ? 0x01 : 0) | - (xchange_nids ? 0x02 : 0) | - (irq_affinity ? 0x04 : 0) | - (share ? 0x08 : 0) | - (eager ? 0x10 : 0); + (irq_affinity ? 0x02 : 0) | + (share ? 0x04 : 0) | + (eager ? 0x08 : 0); rc = pcfg_ioctl (&pcfg); if (rc != 0) { @@ -618,10 +613,14 @@ jt_ptl_print_connections (int argc, char **argv) if (rc != 0) break; - printf (LPX64"@%s:%d\n", + printf (LPX64"@%s:%d:%s\n", pcfg.pcfg_nid, ptl_ipaddr_2_str (pcfg.pcfg_id, buffer), - pcfg.pcfg_misc); + pcfg.pcfg_misc, + (pcfg.pcfg_flags == SOCKNAL_CONN_ANY) ? "A" : + (pcfg.pcfg_flags == SOCKNAL_CONN_CONTROL) ? "C" : + (pcfg.pcfg_flags == SOCKNAL_CONN_BULK_IN) ? "I" : + (pcfg.pcfg_flags == SOCKNAL_CONN_BULK_OUT) ? "O" : "?"); } if (index == 0) @@ -630,7 +629,7 @@ jt_ptl_print_connections (int argc, char **argv) } int -exchange_nids (int cfd, ptl_nid_t my_nid, ptl_nid_t *peer_nid) +exchange_nids (int cfd, ptl_nid_t my_nid, int type, ptl_nid_t *peer_nid) { int rc; ptl_hdr_t hdr; @@ -646,6 +645,8 @@ exchange_nids (int cfd, ptl_nid_t my_nid, ptl_nid_t *peer_nid) hdr.src_nid = __cpu_to_le64 (my_nid); hdr.type = __cpu_to_le32 (PTL_MSG_HELLO); + + *(__u32 *)&hdr.msg = __cpu_to_le32(type); /* Assume there's sufficient socket buffering for a portals HELLO header */ rc = sock_write (cfd, &hdr, sizeof (hdr)); @@ -689,11 +690,11 @@ exchange_nids (int cfd, ptl_nid_t my_nid, ptl_nid_t *peer_nid) /* ...and check we got what we expected */ if (hdr.type != __cpu_to_le32 (PTL_MSG_HELLO) || - PTL_HDR_LENGTH (&hdr) != __cpu_to_le32 (0)) { + hdr.payload_length != __cpu_to_le32 (0)) { fprintf (stderr, "Expecting a HELLO hdr with 0 payload," " but got type %d with %d payload\n", __le32_to_cpu (hdr.type), - __le32_to_cpu (PTL_HDR_LENGTH (&hdr))); + __le32_to_cpu (hdr.payload_length)); return (-1); } @@ -714,13 +715,13 @@ int jt_ptl_connect(int argc, char **argv) int rxmem = 0; int txmem = 0; int bind_irq = 0; - int xchange_nids = 0; + int type = SOCKNAL_CONN_ANY; int port; int o; int olen; if (argc < 3) { - fprintf(stderr, "usage: %s ip port [xi]\n", argv[0]); + fprintf(stderr, "usage: %s ip port [xibctr]\n", argv[0]); return 0; } @@ -746,8 +747,28 @@ int jt_ptl_connect(int argc, char **argv) bind_irq = 1; break; - case 'x': - xchange_nids = 1; + case 'I': + if (type != SOCKNAL_CONN_ANY) { + fprintf(stderr, "Can't flag type twice\n"); + return -1; + } + type = SOCKNAL_CONN_BULK_IN; + break; + + case 'O': + if (type != SOCKNAL_CONN_ANY) { + fprintf(stderr, "Can't flag type twice\n"); + return -1; + } + type = SOCKNAL_CONN_BULK_OUT; + break; + + case 'C': + if (type != SOCKNAL_CONN_ANY) { + fprintf(stderr, "Can't flag type twice\n"); + return -1; + } + type = SOCKNAL_CONN_CONTROL; break; default: @@ -808,33 +829,35 @@ int jt_ptl_connect(int argc, char **argv) if (getsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &nonagle, &olen) != 0) fprintf (stderr, "Can't get nagle: %s\n", strerror (errno)); - if (!xchange_nids) - peer_nid = ipaddr; - else { - PORTAL_IOC_INIT (data); - data.ioc_nal = g_nal; - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_NID, &data); - if (rc != 0) { - fprintf (stderr, "failed to get my nid: %s\n", - strerror (errno)); - close (fd); - return (-1); - } + PORTAL_IOC_INIT (data); + data.ioc_nal = g_nal; + rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_NID, &data); + if (rc != 0) { + fprintf (stderr, "failed to get my nid: %s\n", + strerror (errno)); + close (fd); + return (-1); + } - rc = exchange_nids (fd, data.ioc_nid, &peer_nid); - if (rc != 0) { - close (fd); - return (-1); - } - } - printf("Connected host: %s NID "LPX64" snd: %d rcv: %d nagle: %s\n", argv[1], - peer_nid, txmem, rxmem, nonagle ? "Disabled" : "Enabled"); + rc = exchange_nids (fd, data.ioc_nid, type, &peer_nid); + if (rc != 0) { + close (fd); + return (-1); + } + + printf("Connected host: %s NID "LPX64" snd: %d rcv: %d nagle: %s type: %s\n", + argv[1], peer_nid, txmem, rxmem, nonagle ? "Disabled" : "Enabled", + (type == SOCKNAL_CONN_ANY) ? "A" : + (type == SOCKNAL_CONN_CONTROL) ? "C" : + (type == SOCKNAL_CONN_BULK_IN) ? "I" : + (type == SOCKNAL_CONN_BULK_OUT) ? "O" : "?"); PCFG_INIT(pcfg, NAL_CMD_REGISTER_PEER_FD); pcfg.pcfg_fd = fd; pcfg.pcfg_nid = peer_nid; pcfg.pcfg_flags = bind_irq; - + pcfg.pcfg_misc = type; + rc = pcfg_ioctl(&pcfg); if (rc) { fprintf(stderr, "failed to register fd with portals: %s\n", diff --git a/lnet/utils/ptlctl.c b/lnet/utils/ptlctl.c index 1a8e637..c65ecb2 100644 --- a/lnet/utils/ptlctl.c +++ b/lnet/utils/ptlctl.c @@ -31,10 +31,10 @@ command_t list[] = { {"network", jt_ptl_network, 0,"setup the NAL (args: nal name)"}, {"print_autoconns", jt_ptl_print_autoconnects, 0, "print autoconnect entries (no args)"}, - {"add_autoconn", jt_ptl_add_autoconnect, 0, "add autoconnect entry (args: nid host [ixse])"}, + {"add_autoconn", jt_ptl_add_autoconnect, 0, "add autoconnect entry (args: nid host [ise])"}, {"del_autoconn", jt_ptl_del_autoconnect, 0, "delete autoconnect entry (args: [nid] [host] [ks])"}, {"print_conns", jt_ptl_print_connections, 0, "print connections (no args)"}, - {"connect", jt_ptl_connect, 0, "connect to a remote nid (args: host port [xi])"}, + {"connect", jt_ptl_connect, 0, "connect to a remote nid (args: host port [iIOC])"}, {"disconnect", jt_ptl_disconnect, 0, "disconnect from a remote nid (args: [nid] [host]"}, {"push", jt_ptl_push_connection, 0, "flush connection to a remote nid (args: [nid]"}, {"active_tx", jt_ptl_print_active_txs, 0, "print active transmits (no args)"}, diff --git a/lnet/utils/wirecheck.c b/lnet/utils/wirecheck.c index 6a4377b..21b3dda 100644 --- a/lnet/utils/wirecheck.c +++ b/lnet/utils/wirecheck.c @@ -2,10 +2,14 @@ * vim:expandtab:shiftwidth=8:tabstop=8: */ #include +#include +#include #include #include #include +extern size_t strnlen(const char *, size_t); + #define BLANK_LINE() \ do { \ printf ("\n"); \ @@ -77,47 +81,104 @@ check_ptl_hdr (void) CHECK_MEMBER (ptl_hdr_t, dest_pid); CHECK_MEMBER (ptl_hdr_t, src_pid); CHECK_MEMBER (ptl_hdr_t, type); - + CHECK_MEMBER (ptl_hdr_t, payload_length); + CHECK_MEMBER (ptl_hdr_t, msg); + BLANK_LINE (); COMMENT ("Ack"); - CHECK_MEMBER (ptl_hdr_t, msg.ack.mlength); CHECK_MEMBER (ptl_hdr_t, msg.ack.dst_wmd); CHECK_MEMBER (ptl_hdr_t, msg.ack.match_bits); - CHECK_MEMBER (ptl_hdr_t, msg.ack.length); + CHECK_MEMBER (ptl_hdr_t, msg.ack.mlength); BLANK_LINE (); COMMENT ("Put"); - CHECK_MEMBER (ptl_hdr_t, msg.put.ptl_index); CHECK_MEMBER (ptl_hdr_t, msg.put.ack_wmd); CHECK_MEMBER (ptl_hdr_t, msg.put.match_bits); - CHECK_MEMBER (ptl_hdr_t, msg.put.length); - CHECK_MEMBER (ptl_hdr_t, msg.put.offset); CHECK_MEMBER (ptl_hdr_t, msg.put.hdr_data); + CHECK_MEMBER (ptl_hdr_t, msg.put.ptl_index); + CHECK_MEMBER (ptl_hdr_t, msg.put.offset); BLANK_LINE (); COMMENT ("Get"); - CHECK_MEMBER (ptl_hdr_t, msg.get.ptl_index); CHECK_MEMBER (ptl_hdr_t, msg.get.return_wmd); CHECK_MEMBER (ptl_hdr_t, msg.get.match_bits); - CHECK_MEMBER (ptl_hdr_t, msg.get.length); + CHECK_MEMBER (ptl_hdr_t, msg.get.ptl_index); CHECK_MEMBER (ptl_hdr_t, msg.get.src_offset); - CHECK_MEMBER (ptl_hdr_t, msg.get.return_offset); CHECK_MEMBER (ptl_hdr_t, msg.get.sink_length); BLANK_LINE (); COMMENT ("Reply"); CHECK_MEMBER (ptl_hdr_t, msg.reply.dst_wmd); - CHECK_MEMBER (ptl_hdr_t, msg.reply.dst_offset); - CHECK_MEMBER (ptl_hdr_t, msg.reply.length); +} + +void +system_string (char *cmdline, char *str, int len) +{ + int fds[2]; + int rc; + pid_t pid; + + rc = pipe (fds); + if (rc != 0) + abort (); + + pid = fork (); + if (pid == 0) { + /* child */ + int fd = fileno(stdout); + + rc = dup2(fds[1], fd); + if (rc != fd) + abort(); + + exit(system(cmdline)); + /* notreached */ + } else if ((int)pid < 0) { + abort(); + } else { + FILE *f = fdopen (fds[0], "r"); + + if (f == NULL) + abort(); + + close(fds[1]); + + if (fgets(str, len, f) == NULL) + abort(); + + if (waitpid(pid, &rc, 0) != pid) + abort(); + + if (!WIFEXITED(rc) || + WEXITSTATUS(rc) != 0) + abort(); + + if (strnlen(str, len) == len) + str[len - 1] = 0; + + if (str[strlen(str) - 1] == '\n') + str[strlen(str) - 1] = 0; + + fclose(f); + } } int main (int argc, char **argv) { + char unameinfo[80]; + char gccinfo[80]; + + system_string("uname -a", unameinfo, sizeof(unameinfo)); + system_string("gcc -v 2>&1 | tail -1", gccinfo, sizeof(gccinfo)); + printf ("void lib_assert_wire_constants (void)\n" - "{\n"); - - COMMENT ("Wire protocol assertions generated by 'wirecheck'"); + "{\n" + " /* Wire protocol assertions generated by 'wirecheck'\n" + " * running on %s\n" + " * with %s */\n" + "\n", unameinfo, gccinfo); + BLANK_LINE (); COMMENT ("Constants..."); diff --git a/lustre/portals/include/portals/lib-types.h b/lustre/portals/include/portals/lib-types.h index e5447d7..047628b 100644 --- a/lustre/portals/include/portals/lib-types.h +++ b/lustre/portals/include/portals/lib-types.h @@ -54,72 +54,62 @@ typedef enum { PTL_MSG_HELLO, } ptl_msg_type_t; -/* Each of these structs should start with an odd number of - * __u32, or the compiler could add its own padding and confuse - * everyone. - * - * Also, "length" needs to be at offset 28 of each struct. - */ +/* The variant fields of the portals message header are aligned on an 8 + * byte boundary in the message header. Note that all types used in these + * wire structs MUST be fixed size and the smaller types are placed at the + * end. */ typedef struct ptl_ack { - ptl_size_t mlength; - ptl_handle_wire_t dst_wmd; - ptl_match_bits_t match_bits; - ptl_size_t length; /* common length (0 for acks) moving out RSN */ + ptl_handle_wire_t dst_wmd; + ptl_match_bits_t match_bits; + ptl_size_t mlength; } WIRE_ATTR ptl_ack_t; typedef struct ptl_put { - ptl_pt_index_t ptl_index; - ptl_handle_wire_t ack_wmd; - ptl_match_bits_t match_bits; - ptl_size_t length; /* common length moving out RSN */ - ptl_size_t offset; - ptl_hdr_data_t hdr_data; + ptl_handle_wire_t ack_wmd; + ptl_match_bits_t match_bits; + ptl_hdr_data_t hdr_data; + ptl_pt_index_t ptl_index; + ptl_size_t offset; } WIRE_ATTR ptl_put_t; typedef struct ptl_get { - ptl_pt_index_t ptl_index; - ptl_handle_wire_t return_wmd; - ptl_match_bits_t match_bits; - ptl_size_t length; /* common length (0 for gets) moving out RSN */ - ptl_size_t src_offset; - ptl_size_t return_offset; /* unused: going RSN */ - ptl_size_t sink_length; + ptl_handle_wire_t return_wmd; + ptl_match_bits_t match_bits; + ptl_pt_index_t ptl_index; + ptl_size_t src_offset; + ptl_size_t sink_length; } WIRE_ATTR ptl_get_t; typedef struct ptl_reply { - __u32 unused1; /* unused fields going RSN */ - ptl_handle_wire_t dst_wmd; - ptl_size_t dst_offset; /* unused: going RSN */ - __u32 unused2; - ptl_size_t length; /* common length moving out RSN */ + ptl_handle_wire_t dst_wmd; } WIRE_ATTR ptl_reply_t; typedef struct { - ptl_nid_t dest_nid; - ptl_nid_t src_nid; - ptl_pid_t dest_pid; - ptl_pid_t src_pid; - __u32 type; /* ptl_msg_type_t */ + ptl_nid_t dest_nid; + ptl_nid_t src_nid; + ptl_pid_t dest_pid; + ptl_pid_t src_pid; + __u32 type; /* ptl_msg_type_t */ + __u32 payload_length; /* payload data to follow */ + /*<------__u64 aligned------->*/ union { - ptl_ack_t ack; - ptl_put_t put; - ptl_get_t get; + ptl_ack_t ack; + ptl_put_t put; + ptl_get_t get; ptl_reply_t reply; } msg; } WIRE_ATTR ptl_hdr_t; -/* All length fields in individual unions at same offset */ -/* LASSERT for same in lib-move.c */ -#define PTL_HDR_LENGTH(h) ((h)->msg.ack.length) - /* A HELLO message contains the portals magic number and protocol version * code in the header's dest_nid, the peer's NID in the src_nid, and - * PTL_MSG_HELLO in the type field. All other fields are zero (including - * PTL_HDR_LENGTH; i.e. no payload). + * PTL_MSG_HELLO in the type field. All other common fields are zero + * (including payload_size; i.e. no payload). * This is for use by byte-stream NALs (e.g. TCP/IP) to check the peer is * running the same protocol and to find out its NID, so that hosts with * multiple IP interfaces can have a single NID. These NALs should exchange - * HELLO messages when a connection is first established. */ + * HELLO messages when a connection is first established. + * Individual NALs can put whatever else they fancy in ptl_hdr_t::msg. + */ typedef struct { __u32 magic; /* PORTALS_PROTO_MAGIC */ __u16 version_major; /* increment on incompatible change */ @@ -129,7 +119,7 @@ typedef struct { #define PORTALS_PROTO_MAGIC 0xeebc0ded #define PORTALS_PROTO_VERSION_MAJOR 0 -#define PORTALS_PROTO_VERSION_MINOR 1 +#define PORTALS_PROTO_VERSION_MINOR 2 typedef struct { long recv_count, recv_length, send_count, send_length, drop_count, diff --git a/lustre/portals/include/portals/socknal.h b/lustre/portals/include/portals/socknal.h new file mode 100644 index 0000000..6d75e5f --- /dev/null +++ b/lustre/portals/include/portals/socknal.h @@ -0,0 +1,13 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * + * + * #defines shared between socknal implementation and utilities + */ + +#define SOCKNAL_CONN_ANY 0 +#define SOCKNAL_CONN_CONTROL 1 +#define SOCKNAL_CONN_BULK_IN 2 +#define SOCKNAL_CONN_BULK_OUT 3 +#define SOCKNAL_CONN_NTYPES 4 diff --git a/lustre/portals/knals/qswnal/qswnal.c b/lustre/portals/knals/qswnal/qswnal.c index 4472e30..9caf381 100644 --- a/lustre/portals/knals/qswnal/qswnal.c +++ b/lustre/portals/knals/qswnal/qswnal.c @@ -131,7 +131,7 @@ kqswnal_get_tx_desc (struct portals_cfg *pcfg) pcfg->pcfg_pbuf1 = (char *)ktx; pcfg->pcfg_count = NTOH__u32(ktx->ktx_wire_hdr->type); - pcfg->pcfg_size = NTOH__u32(PTL_HDR_LENGTH(ktx->ktx_wire_hdr)); + pcfg->pcfg_size = NTOH__u32(ktx->ktx_wire_hdr->payload_length); pcfg->pcfg_nid = NTOH__u64(ktx->ktx_wire_hdr->dest_nid); pcfg->pcfg_nid2 = ktx->ktx_nid; pcfg->pcfg_misc = ktx->ktx_launcher; diff --git a/lustre/portals/knals/qswnal/qswnal_cb.c b/lustre/portals/knals/qswnal/qswnal_cb.c index 006ea49..43926c9 100644 --- a/lustre/portals/knals/qswnal/qswnal_cb.c +++ b/lustre/portals/knals/qswnal/qswnal_cb.c @@ -542,8 +542,9 @@ kqswnal_cerror_hdr(ptl_hdr_t * hdr) { char *type_str = hdr_type_string (hdr); - CERROR("P3 Header at %p of type %s\n", hdr, type_str); - CERROR(" From nid/pid "LPU64"/%u", NTOH__u64(hdr->src_nid), + CERROR("P3 Header at %p of type %s length %d\n", hdr, type_str, + NTOH__u32(hdr->payload_length)); + CERROR(" From nid/pid "LPU64"/%u\n", NTOH__u64(hdr->src_nid), NTOH__u32(hdr->src_pid)); CERROR(" To nid/pid "LPU64"/%u\n", NTOH__u64(hdr->dest_nid), NTOH__u32(hdr->dest_pid)); @@ -556,8 +557,7 @@ kqswnal_cerror_hdr(ptl_hdr_t * hdr) hdr->msg.put.ack_wmd.wh_interface_cookie, hdr->msg.put.ack_wmd.wh_object_cookie, NTOH__u64 (hdr->msg.put.match_bits)); - CERROR(" Length %d, offset %d, hdr data "LPX64"\n", - NTOH__u32(PTL_HDR_LENGTH(hdr)), + CERROR(" offset %d, hdr data "LPX64"\n", NTOH__u32(hdr->msg.put.offset), hdr->msg.put.hdr_data); break; @@ -582,10 +582,9 @@ kqswnal_cerror_hdr(ptl_hdr_t * hdr) break; case PTL_MSG_REPLY: - CERROR(" dst md "LPX64"."LPX64", length %d\n", + CERROR(" dst md "LPX64"."LPX64"\n", hdr->msg.reply.dst_wmd.wh_interface_cookie, - hdr->msg.reply.dst_wmd.wh_object_cookie, - NTOH__u32 (PTL_HDR_LENGTH(hdr))); + hdr->msg.reply.dst_wmd.wh_object_cookie); } } /* end of print_hdr() */ diff --git a/lustre/portals/knals/socknal/socknal.c b/lustre/portals/knals/socknal/socknal.c index 3d0c758..bb8e247 100644 --- a/lustre/portals/knals/socknal/socknal.c +++ b/lustre/portals/knals/socknal/socknal.c @@ -45,6 +45,8 @@ kpr_nal_interface_t ksocknal_router_interface = { #define SOCKNAL_SYSCTL_TIMEOUT 1 #define SOCKNAL_SYSCTL_EAGER_ACK 2 #define SOCKNAL_SYSCTL_ZERO_COPY 3 +#define SOCKNAL_SYSCTL_TYPED 4 +#define SOCKNAL_SYSCTL_MIN_BULK 5 static ctl_table ksocknal_ctl_table[] = { {SOCKNAL_SYSCTL_TIMEOUT, "timeout", @@ -58,6 +60,12 @@ static ctl_table ksocknal_ctl_table[] = { &ksocknal_data.ksnd_zc_min_frag, sizeof (int), 0644, NULL, &proc_dointvec}, #endif + {SOCKNAL_SYSCTL_TYPED, "typed", + &ksocknal_data.ksnd_typed_conns, sizeof (int), + 0644, NULL, &proc_dointvec}, + {SOCKNAL_SYSCTL_MIN_BULK, "min_bulk", + &ksocknal_data.ksnd_min_bulk, sizeof (int), + 0644, NULL, &proc_dointvec}, { 0 } }; @@ -86,7 +94,7 @@ ksocknal_api_shutdown(nal_t *nal, int ni) CDEBUG (D_NET, "closing all connections\n"); ksocknal_del_route (PTL_NID_ANY, 0, 0, 0); - ksocknal_close_conn (PTL_NID_ANY, 0); + ksocknal_close_matching_conns (PTL_NID_ANY, 0); return PTL_OK; } @@ -198,7 +206,7 @@ ksocknal_bind_irq (unsigned int irq) ksock_route_t * ksocknal_create_route (__u32 ipaddr, int port, int buffer_size, - int nonagel, int xchange_nids, int irq_affinity, int eager) + int nonagel, int irq_affinity, int eager) { ksock_route_t *route; @@ -215,13 +223,12 @@ ksocknal_create_route (__u32 ipaddr, int port, int buffer_size, route->ksnr_port = port; route->ksnr_buffer_size = buffer_size; route->ksnr_irq_affinity = irq_affinity; - route->ksnr_xchange_nids = xchange_nids; route->ksnr_nonagel = nonagel; route->ksnr_eager = eager; route->ksnr_connecting = 0; + route->ksnr_connected = 0; route->ksnr_deleted = 0; - route->ksnr_generation = 0; - route->ksnr_conn = NULL; + route->ksnr_conn_count = 0; return (route); } @@ -230,7 +237,6 @@ void ksocknal_destroy_route (ksock_route_t *route) { LASSERT (route->ksnr_sharecount == 0); - LASSERT (route->ksnr_conn == NULL); if (route->ksnr_peer != NULL) ksocknal_put_peer (route->ksnr_peer); @@ -397,8 +403,7 @@ ksocknal_get_route_by_idx (int index) int ksocknal_add_route (ptl_nid_t nid, __u32 ipaddr, int port, int bufnob, - int nonagle, int xchange_nids, int bind_irq, - int share, int eager) + int nonagle, int bind_irq, int share, int eager) { unsigned long flags; ksock_peer_t *peer; @@ -415,8 +420,8 @@ ksocknal_add_route (ptl_nid_t nid, __u32 ipaddr, int port, int bufnob, if (peer == NULL) return (-ENOMEM); - route = ksocknal_create_route (ipaddr, port, bufnob, nonagle, - xchange_nids, bind_irq, eager); + route = ksocknal_create_route (ipaddr, port, bufnob, + nonagle, bind_irq, eager); if (route == NULL) { ksocknal_put_peer (peer); return (-ENOMEM); @@ -455,7 +460,7 @@ ksocknal_add_route (ptl_nid_t nid, __u32 ipaddr, int port, int bufnob, route->ksnr_peer = peer; atomic_inc (&peer->ksnp_refcount); /* peer's route list takes existing ref on route */ - list_add (&route->ksnr_list, &peer->ksnp_routes); + list_add_tail (&route->ksnr_list, &peer->ksnp_routes); } route->ksnr_sharecount++; @@ -468,8 +473,10 @@ ksocknal_add_route (ptl_nid_t nid, __u32 ipaddr, int port, int bufnob, void ksocknal_del_route_locked (ksock_route_t *route, int share, int keep_conn) { - ksock_peer_t *peer = route->ksnr_peer; - ksock_conn_t *conn = route->ksnr_conn; + ksock_peer_t *peer = route->ksnr_peer; + ksock_conn_t *conn; + struct list_head *ctmp; + struct list_head *cnxt; if (!share) route->ksnr_sharecount = 0; @@ -479,18 +486,22 @@ ksocknal_del_route_locked (ksock_route_t *route, int share, int keep_conn) return; } - if (conn != NULL) { - if (!keep_conn) + list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { + conn = list_entry(ctmp, ksock_conn_t, ksnc_list); + + if (conn->ksnc_route != route) + continue; + + if (!keep_conn) { ksocknal_close_conn_locked (conn, 0); - else { - /* keeping the conn; just dissociate it and route... */ - conn->ksnc_route = NULL; - route->ksnr_conn = NULL; - ksocknal_put_route (route); /* drop conn's ref on route */ - ksocknal_put_conn (conn); /* drop route's ref on conn */ + continue; } + + /* keeping the conn; just dissociate it and route... */ + conn->ksnc_route = NULL; + ksocknal_put_route (route); /* drop conn's ref on route */ } - + route->ksnr_deleted = 1; list_del (&route->ksnr_list); ksocknal_put_route (route); /* drop peer's ref */ @@ -670,7 +681,7 @@ ksocknal_choose_scheduler_locked (unsigned int irq) int ksocknal_create_conn (ptl_nid_t nid, ksock_route_t *route, - struct socket *sock, int bind_irq) + struct socket *sock, int bind_irq, int type) { unsigned long flags; ksock_conn_t *conn; @@ -711,6 +722,7 @@ ksocknal_create_conn (ptl_nid_t nid, ksock_route_t *route, conn->ksnc_peer = NULL; conn->ksnc_route = NULL; conn->ksnc_sock = sock; + conn->ksnc_type = type; conn->ksnc_saved_data_ready = sock->sk->sk_data_ready; conn->ksnc_saved_write_space = sock->sk->sk_write_space; atomic_set (&conn->ksnc_refcount, 1); /* 1 ref for me */ @@ -732,7 +744,8 @@ ksocknal_create_conn (ptl_nid_t nid, ksock_route_t *route, if (route != NULL) { /* Autoconnected! */ - LASSERT (route->ksnr_conn == NULL && route->ksnr_connecting); + LASSERT ((route->ksnr_connected & (1 << type)) == 0); + LASSERT ((route->ksnr_connecting & (1 << type)) != 0); if (route->ksnr_deleted) { /* This conn was autoconnected, but the autoconnect @@ -745,14 +758,13 @@ ksocknal_create_conn (ptl_nid_t nid, ksock_route_t *route, } - /* associate conn/route for auto-reconnect */ - route->ksnr_conn = conn; - atomic_inc (&conn->ksnc_refcount); + /* associate conn/route */ conn->ksnc_route = route; atomic_inc (&route->ksnr_refcount); - route->ksnr_connecting = 0; - route->ksnr_generation++; + route->ksnr_connecting &= ~(1 << type); + route->ksnr_connected |= (1 << type); + route->ksnr_conn_count++; route->ksnr_retry_interval = SOCKNAL_MIN_RECONNECT_INTERVAL; peer = route->ksnr_peer; @@ -836,14 +848,17 @@ ksocknal_close_conn_locked (ksock_conn_t *conn, int error) route = conn->ksnc_route; if (route != NULL) { /* dissociate conn from route... */ - LASSERT (!route->ksnr_connecting && - !route->ksnr_deleted); + LASSERT (!route->ksnr_deleted); + LASSERT ((route->ksnr_connecting & (1 << conn->ksnc_type)) == 0); + LASSERT ((route->ksnr_connected & (1 << conn->ksnc_type)) != 0); - route->ksnr_conn = NULL; + route->ksnr_connected &= ~(1 << conn->ksnc_type); conn->ksnc_route = NULL; + list_del (&route->ksnr_list); /* make route least favourite */ + list_add_tail (&route->ksnr_list, &peer->ksnp_routes); + ksocknal_put_route (route); /* drop conn's ref on route */ - ksocknal_put_conn (conn); /* drop route's ref on conn */ } /* ksnd_deathrow_conns takes over peer's ref */ @@ -869,24 +884,6 @@ ksocknal_close_conn_locked (ksock_conn_t *conn, int error) spin_unlock (&ksocknal_data.ksnd_reaper_lock); } -int -ksocknal_close_conn_unlocked (ksock_conn_t *conn, int why) -{ - unsigned long flags; - int did_it = 0; - - write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); - - if (!conn->ksnc_closing) { - did_it = 1; - ksocknal_close_conn_locked (conn, why); - } - - write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); - - return (did_it); -} - void ksocknal_terminate_conn (ksock_conn_t *conn) { @@ -958,9 +955,10 @@ ksocknal_destroy_conn (ksock_conn_t *conn) ksock_tx_t *tx = list_entry (conn->ksnc_tx_queue.next, ksock_tx_t, tx_list); - CERROR ("Deleting packet type %d len %d ("LPX64"->"LPX64")\n", + CERROR ("Deleting packet %p type %d len %d ("LPX64"->"LPX64")\n", + tx, NTOH__u32 (tx->tx_hdr->type), - NTOH__u32 (PTL_HDR_LENGTH(tx->tx_hdr)), + NTOH__u32 (tx->tx_hdr->payload_length), NTOH__u64 (tx->tx_hdr->src_nid), NTOH__u64 (tx->tx_hdr->dest_nid)); @@ -1012,19 +1010,54 @@ ksocknal_put_conn (ksock_conn_t *conn) } int -ksocknal_close_conn (ptl_nid_t nid, __u32 ipaddr) +ksocknal_close_peer_conns_locked (ksock_peer_t *peer, __u32 ipaddr, int why) { - unsigned long flags; ksock_conn_t *conn; struct list_head *ctmp; struct list_head *cnxt; + int count = 0; + + list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { + conn = list_entry (ctmp, ksock_conn_t, ksnc_list); + + if (ipaddr == 0 || + conn->ksnc_ipaddr == ipaddr) { + count++; + ksocknal_close_conn_locked (conn, why); + } + } + + return (count); +} + +int +ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why) +{ + ksock_peer_t *peer = conn->ksnc_peer; + __u32 ipaddr = conn->ksnc_ipaddr; + unsigned long flags; + int count; + + write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); + + count = ksocknal_close_peer_conns_locked (peer, ipaddr, why); + + write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); + + return (count); +} + +int +ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr) +{ + unsigned long flags; ksock_peer_t *peer; struct list_head *ptmp; struct list_head *pnxt; int lo; int hi; int i; - int rc = -ENOENT; + int count = 0; write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); @@ -1043,24 +1076,17 @@ ksocknal_close_conn (ptl_nid_t nid, __u32 ipaddr) if (!(nid == PTL_NID_ANY || nid == peer->ksnp_nid)) continue; - list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { - - conn = list_entry (ctmp, ksock_conn_t, - ksnc_list); - - if (!(ipaddr == 0 || - conn->ksnc_ipaddr == ipaddr)) - continue; - - rc = 0; - ksocknal_close_conn_locked (conn, 0); - } + count += ksocknal_close_peer_conns_locked (peer, ipaddr, 0); } } write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); - return (rc); + /* wildcards always succeed */ + if (nid == PTL_NID_ANY || ipaddr == 0) + return (0); + + return (count == 0 ? -ENOENT : 0); } void @@ -1073,7 +1099,7 @@ ksocknal_notify (void *arg, ptl_nid_t gw_nid, int alive) if (!alive) { /* If the gateway crashed, close all open connections... */ - ksocknal_close_conn (gw_nid, 0); + ksocknal_close_matching_conns (gw_nid, 0); return; } @@ -1233,13 +1259,12 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private) pcfg->pcfg_nid = route->ksnr_peer->ksnp_nid; pcfg->pcfg_id = route->ksnr_ipaddr; pcfg->pcfg_misc = route->ksnr_port; - pcfg->pcfg_count = route->ksnr_generation; + pcfg->pcfg_count = route->ksnr_conn_count; pcfg->pcfg_size = route->ksnr_buffer_size; pcfg->pcfg_wait = route->ksnr_sharecount; pcfg->pcfg_flags = (route->ksnr_nonagel ? 1 : 0) | - (route->ksnr_xchange_nids ? 2 : 0) | - (route->ksnr_irq_affinity ? 4 : 0) | - (route->ksnr_eager ? 8 : 0); + (route->ksnr_irq_affinity ? 2 : 0) | + (route->ksnr_eager ? 4 : 0); ksocknal_put_route (route); } break; @@ -1250,8 +1275,7 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private) (pcfg->pcfg_flags & 0x01) != 0, (pcfg->pcfg_flags & 0x02) != 0, (pcfg->pcfg_flags & 0x04) != 0, - (pcfg->pcfg_flags & 0x08) != 0, - (pcfg->pcfg_flags & 0x10) != 0); + (pcfg->pcfg_flags & 0x08) != 0); break; } case NAL_CMD_DEL_AUTOCONN: { @@ -1267,26 +1291,38 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private) rc = -ENOENT; else { rc = 0; - pcfg->pcfg_nid = conn->ksnc_peer->ksnp_nid; - pcfg->pcfg_id = conn->ksnc_ipaddr; - pcfg->pcfg_misc = conn->ksnc_port; + pcfg->pcfg_nid = conn->ksnc_peer->ksnp_nid; + pcfg->pcfg_id = conn->ksnc_ipaddr; + pcfg->pcfg_misc = conn->ksnc_port; + pcfg->pcfg_flags = conn->ksnc_type; ksocknal_put_conn (conn); } break; } case NAL_CMD_REGISTER_PEER_FD: { struct socket *sock = sockfd_lookup (pcfg->pcfg_fd, &rc); + int type = pcfg->pcfg_misc; + + if (sock == NULL) + break; - if (sock != NULL) { - rc = ksocknal_create_conn (pcfg->pcfg_nid, NULL, - sock, pcfg->pcfg_flags); - if (rc != 0) - fput (sock->file); + switch (type) { + case SOCKNAL_CONN_ANY: + case SOCKNAL_CONN_CONTROL: + case SOCKNAL_CONN_BULK_IN: + case SOCKNAL_CONN_BULK_OUT: + rc = ksocknal_create_conn(pcfg->pcfg_nid, NULL, sock, + pcfg->pcfg_flags, type); + default: + break; } + if (rc != 0) + fput (sock->file); break; } case NAL_CMD_CLOSE_CONNECTION: { - rc = ksocknal_close_conn (pcfg->pcfg_nid, pcfg->pcfg_id); + rc = ksocknal_close_matching_conns (pcfg->pcfg_nid, + pcfg->pcfg_id); break; } case NAL_CMD_REGISTER_MYNID: { @@ -1434,7 +1470,9 @@ ksocknal_module_init (void) /* the following must be sizeof(int) for proc_dointvec() */ LASSERT(sizeof (ksocknal_data.ksnd_io_timeout) == sizeof (int)); LASSERT(sizeof (ksocknal_data.ksnd_eager_ack) == sizeof (int)); - + /* check ksnr_connected/connecting field large enough */ + LASSERT(SOCKNAL_CONN_NTYPES <= 4); + LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); ksocknal_api.forward = ksocknal_api_forward; @@ -1451,6 +1489,8 @@ ksocknal_module_init (void) ksocknal_data.ksnd_io_timeout = SOCKNAL_IO_TIMEOUT; ksocknal_data.ksnd_eager_ack = SOCKNAL_EAGER_ACK; + ksocknal_data.ksnd_typed_conns = SOCKNAL_TYPED_CONNS; + ksocknal_data.ksnd_min_bulk = SOCKNAL_MIN_BULK; #if SOCKNAL_ZC ksocknal_data.ksnd_zc_min_frag = SOCKNAL_ZC_MIN_FRAG; #endif diff --git a/lustre/portals/knals/socknal/socknal.h b/lustre/portals/knals/socknal/socknal.h index 1c73ae8..7bfc009 100644 --- a/lustre/portals/knals/socknal/socknal.h +++ b/lustre/portals/knals/socknal/socknal.h @@ -25,7 +25,9 @@ */ #define DEBUG_PORTAL_ALLOC -#define EXPORT_SYMTAB +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif #include #include @@ -58,6 +60,7 @@ #include #include #include +#include #if CONFIG_SMP # define SOCKNAL_N_SCHED num_online_cpus() /* # socknal schedulers */ @@ -71,9 +74,10 @@ /* default vals for runtime tunables */ #define SOCKNAL_IO_TIMEOUT 50 /* default comms timeout (seconds) */ -#define SOCKNAL_EAGER_ACK 1 /* default eager ack (boolean) */ +#define SOCKNAL_EAGER_ACK 0 /* default eager ack (boolean) */ +#define SOCKNAL_TYPED_CONNS 1 /* unidirectional large, bidirectional small? */ #define SOCKNAL_ZC_MIN_FRAG (2<<10) /* default smallest zerocopy fragment */ - +#define SOCKNAL_MIN_BULK (1<<10) /* smallest "large" message */ #define SOCKNAL_USE_KEEPALIVES 0 /* use tcp/ip keepalive? */ #define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */ @@ -142,6 +146,8 @@ typedef struct { int ksnd_init; /* initialisation state */ int ksnd_io_timeout; /* "stuck" socket timeout (seconds) */ int ksnd_eager_ack; /* make TCP ack eagerly? */ + int ksnd_typed_conns; /* drive sockets by type? */ + int ksnd_min_bulk; /* smallest "large" message */ #if SOCKNAL_ZC unsigned int ksnd_zc_min_frag; /* minimum zero copy frag size */ #endif @@ -300,6 +306,7 @@ typedef struct ksock_conn __u32 ksnc_ipaddr; /* peer's IP */ int ksnc_port; /* peer's port */ int ksnc_closing; /* being shut down */ + int ksnc_type; /* type of connection */ /* READER */ struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */ @@ -327,6 +334,10 @@ typedef struct ksock_conn int ksnc_tx_scheduled; /* being progressed */ } ksock_conn_t; +#define KSNR_TYPED_ROUTES ((1 << SOCKNAL_CONN_CONTROL) | \ + (1 << SOCKNAL_CONN_BULK_IN) | \ + (1 << SOCKNAL_CONN_BULK_OUT)) + typedef struct ksock_route { struct list_head ksnr_list; /* chain on peer route list */ @@ -340,13 +351,12 @@ typedef struct ksock_route int ksnr_port; /* port to connect to */ int ksnr_buffer_size; /* size of socket buffers */ unsigned int ksnr_irq_affinity:1; /* set affinity? */ - unsigned int ksnr_xchange_nids:1; /* do hello protocol? */ unsigned int ksnr_nonagel:1; /* disable nagle? */ unsigned int ksnr_eager:1; /* connect eagery? */ - unsigned int ksnr_connecting:1; /* autoconnect in progress? */ + unsigned int ksnr_connecting:4; /* autoconnects in progress by type */ + unsigned int ksnr_connected:4; /* connections established by type */ unsigned int ksnr_deleted:1; /* been removed from peer? */ - int ksnr_generation; /* connection incarnation # */ - ksock_conn_t *ksnr_conn; /* NULL/active connection */ + int ksnr_conn_count; /* # conns established by this route */ } ksock_route_t; typedef struct ksock_peer @@ -402,13 +412,13 @@ extern ksock_peer_t *ksocknal_get_peer (ptl_nid_t nid); extern int ksocknal_del_route (ptl_nid_t nid, __u32 ipaddr, int single, int keep_conn); extern int ksocknal_create_conn (ptl_nid_t nid, ksock_route_t *route, - struct socket *sock, int bind_irq); + struct socket *sock, int bind_irq, int type); extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why); -extern int ksocknal_close_conn_unlocked (ksock_conn_t *conn, int why); extern void ksocknal_terminate_conn (ksock_conn_t *conn); extern void ksocknal_destroy_conn (ksock_conn_t *conn); extern void ksocknal_put_conn (ksock_conn_t *conn); -extern int ksocknal_close_conn (ptl_nid_t nid, __u32 ipaddr); +extern int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why); +extern int ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr); extern void ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn); extern void ksocknal_tx_done (ksock_tx_t *tx, int asynch); diff --git a/lustre/portals/knals/socknal/socknal_cb.c b/lustre/portals/knals/socknal/socknal_cb.c index 8ce6777..dde434a 100644 --- a/lustre/portals/knals/socknal/socknal_cb.c +++ b/lustre/portals/knals/socknal/socknal_cb.c @@ -218,9 +218,9 @@ ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) struct iovec *iov = tx->tx_iov; int fragsize = iov->iov_len; unsigned long vaddr = (unsigned long)iov->iov_base; - int more = (!list_empty (&conn->ksnc_tx_queue)) | - (tx->tx_niov > 1) | - (tx->tx_nkiov > 1); + int more = (tx->tx_niov > 1) || + (tx->tx_nkiov > 0) || + (!list_empty (&conn->ksnc_tx_queue)); #if SOCKNAL_ZC int offset = vaddr & (PAGE_SIZE - 1); int zcsize = MIN (fragsize, PAGE_SIZE - offset); @@ -266,7 +266,7 @@ ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) .msg_flags = more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT }; mm_segment_t oldmm = get_fs(); - + set_fs (KERNEL_DS); rc = sock_sendmsg(sock, &msg, fragsize); set_fs (oldmm); @@ -298,8 +298,8 @@ ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) int fragsize = kiov->kiov_len; struct page *page = kiov->kiov_page; int offset = kiov->kiov_offset; - int more = (!list_empty (&conn->ksnc_tx_queue)) | - (tx->tx_nkiov > 1); + int more = (tx->tx_nkiov > 1) || + (!list_empty (&conn->ksnc_tx_queue)); int rc; /* NB we can't trust socket ops to either consume our iovs @@ -464,7 +464,7 @@ ksocknal_recv_iov (ksock_conn_t *conn) * or leave them alone, so we only receive 1 frag at a time. */ LASSERT (conn->ksnc_rx_niov > 0); LASSERT (fragsize <= conn->ksnc_rx_nob_wanted); - + set_fs (KERNEL_DS); rc = sock_recvmsg (conn->ksnc_sock, &msg, fragsize, MSG_DONTWAIT); /* NB this is just a boolean............................^ */ @@ -521,7 +521,7 @@ ksocknal_recv_kiov (ksock_conn_t *conn) LASSERT (fragsize <= conn->ksnc_rx_nob_wanted); LASSERT (conn->ksnc_rx_nkiov > 0); LASSERT (offset + fragsize <= PAGE_SIZE); - + set_fs (KERNEL_DS); rc = sock_recvmsg (conn->ksnc_sock, &msg, fragsize, MSG_DONTWAIT); /* NB this is just a boolean............................^ */ @@ -597,7 +597,7 @@ ksocknal_recvmsg (ksock_conn_t *conn) if (conn->ksnc_rx_nob_wanted == 0) { /* Completed a message segment (header or payload) */ - if (ksocknal_data.ksnd_eager_ack && + if ((ksocknal_data.ksnd_eager_ack & conn->ksnc_type) != 0 && (conn->ksnc_rx_state == SOCKNAL_RX_BODY || conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD)) { /* Remind the socket to ack eagerly... */ @@ -721,12 +721,12 @@ ksocknal_process_transmit (ksock_sched_t *sched, unsigned long *irq_flags) CDEBUG (D_NET, "send(%d) %d\n", tx->tx_resid, rc); if (rc != 0) { - if (ksocknal_close_conn_unlocked (conn, rc)) { - /* I'm the first to close */ + if (!conn->ksnc_closing) CERROR ("[%p] Error %d on write to "LPX64" ip %08x:%d\n", conn, rc, conn->ksnc_peer->ksnp_nid, conn->ksnc_ipaddr, conn->ksnc_port); - } + ksocknal_close_conn_and_siblings (conn, rc); + ksocknal_tx_launched (tx); spin_lock_irqsave (&sched->kss_lock, *irq_flags); @@ -764,10 +764,17 @@ ksocknal_launch_autoconnect_locked (ksock_route_t *route) /* called holding write lock on ksnd_global_lock */ - LASSERT (route->ksnr_conn == NULL); - LASSERT (!route->ksnr_deleted && !route->ksnr_connecting); + LASSERT (!route->ksnr_deleted); + LASSERT ((route->ksnr_connected & (1 << SOCKNAL_CONN_ANY)) == 0); + LASSERT ((route->ksnr_connected & KSNR_TYPED_ROUTES) != KSNR_TYPED_ROUTES); + LASSERT (!route->ksnr_connecting); - route->ksnr_connecting = 1; + if (ksocknal_data.ksnd_typed_conns) + route->ksnr_connecting = + KSNR_TYPED_ROUTES & ~route->ksnr_connected; + else + route->ksnr_connecting = (1 << SOCKNAL_CONN_ANY); + atomic_inc (&route->ksnr_refcount); /* extra ref for asynchd */ spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags); @@ -814,21 +821,51 @@ ksock_conn_t * ksocknal_find_conn_locked (ksock_tx_t *tx, ksock_peer_t *peer) { struct list_head *tmp; - ksock_conn_t *conn = NULL; - + ksock_conn_t *typed = NULL; + int tnob = 0; + ksock_conn_t *fallback = NULL; + int fnob = 0; + /* Find the conn with the shortest tx queue */ list_for_each (tmp, &peer->ksnp_conns) { - ksock_conn_t *c = list_entry (tmp, ksock_conn_t, ksnc_list); + ksock_conn_t *c = list_entry(tmp, ksock_conn_t, ksnc_list); + int nob = atomic_read(&c->ksnc_tx_nob); LASSERT (!c->ksnc_closing); - - if (conn == NULL || - atomic_read (&conn->ksnc_tx_nob) > - atomic_read (&c->ksnc_tx_nob)) - conn = c; + + if (fallback == NULL || nob < fnob) { + fallback = c; + fnob = nob; + } + + if (!ksocknal_data.ksnd_typed_conns) + continue; + + switch (c->ksnc_type) { + default: + LBUG(); + case SOCKNAL_CONN_ANY: + break; + case SOCKNAL_CONN_BULK_IN: + continue; + case SOCKNAL_CONN_BULK_OUT: + if (tx->tx_nob < ksocknal_data.ksnd_min_bulk) + continue; + break; + case SOCKNAL_CONN_CONTROL: + if (tx->tx_nob >= ksocknal_data.ksnd_min_bulk) + continue; + break; + } + + if (typed == NULL || nob < tnob) { + typed = c; + tnob = nob; + } } - return (conn); + /* prefer the typed selection */ + return ((typed != NULL) ? typed : fallback); } void @@ -870,22 +907,46 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) } ksock_route_t * -ksocknal_find_connectable_route_locked (ksock_peer_t *peer, int eager_only) +ksocknal_find_connectable_route_locked (ksock_peer_t *peer) { struct list_head *tmp; ksock_route_t *route; + ksock_route_t *candidate = NULL; + int found = 0; + int bits; list_for_each (tmp, &peer->ksnp_routes) { route = list_entry (tmp, ksock_route_t, ksnr_list); + bits = route->ksnr_connected; - if (route->ksnr_conn == NULL && /* not connected */ - !route->ksnr_connecting && /* not connecting */ - (!eager_only || route->ksnr_eager) && /* wants to connect */ - time_after_eq (jiffies, route->ksnr_timeout)) /* OK to retry */ + if ((bits & KSNR_TYPED_ROUTES) == KSNR_TYPED_ROUTES || + (bits & (1 << SOCKNAL_CONN_ANY)) != 0 || + route->ksnr_connecting != 0) { + /* All typed connections have been established, or + * an untyped connection has been established, or + * connections are currently being established */ + found = 1; + continue; + } + + /* too soon to retry this guy? */ + if (!time_after_eq (jiffies, route->ksnr_timeout)) + continue; + + /* always do eager routes */ + if (route->ksnr_eager) return (route); + + if (candidate == NULL) { + /* If we don't find any other route that is fully + * connected or connecting, the first connectable + * route is returned. If it fails to connect, it + * will get placed at the end of the list */ + candidate = route; + } } - - return (NULL); + + return (found ? NULL : candidate); } ksock_route_t * @@ -897,7 +958,7 @@ ksocknal_find_connecting_route_locked (ksock_peer_t *peer) list_for_each (tmp, &peer->ksnp_routes) { route = list_entry (tmp, ksock_route_t, ksnr_list); - if (route->ksnr_connecting) + if (route->ksnr_connecting != 0) return (route); } @@ -912,7 +973,7 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) ksock_conn_t *conn; ksock_route_t *route; rwlock_t *g_lock; - + /* Ensure the frags we've been given EXACTLY match the number of * bytes we want to send. Many TCP/IP stacks disregard any total * size parameters passed to them and just look at the frags. @@ -936,18 +997,17 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) peer = ksocknal_find_target_peer_locked (tx, nid); if (peer == NULL) { read_unlock (g_lock); - return (PTL_FAIL); + return (-EHOSTUNREACH); } - if (ksocknal_find_connectable_route_locked(peer, 1) == NULL) { + if (ksocknal_find_connectable_route_locked(peer) == NULL) { conn = ksocknal_find_conn_locked (tx, peer); if (conn != NULL) { - /* I've got no unconnected autoconnect routes that - * need to be connected, and I do have an actual - * connection... */ + /* I've got no autoconnect routes that need to be + * connecting and I do have an actual connection... */ ksocknal_queue_tx_locked (tx, conn); read_unlock (g_lock); - return (PTL_OK); + return (0); } } @@ -960,14 +1020,13 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) if (peer->ksnp_closing) { /* peer deleted as I blocked! */ write_unlock_irqrestore (g_lock, flags); ksocknal_put_peer (peer); - return (PTL_FAIL); + return (-EHOSTUNREACH); } ksocknal_put_peer (peer); /* drop ref I got above */ - for (;;) { - /* launch all eager autoconnections */ - route = ksocknal_find_connectable_route_locked (peer, 1); + /* launch any/all autoconnections that need it */ + route = ksocknal_find_connectable_route_locked (peer); if (route == NULL) break; @@ -979,26 +1038,20 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) /* Connection exists; queue message on it */ ksocknal_queue_tx_locked (tx, conn); write_unlock_irqrestore (g_lock, flags); - return (PTL_OK); + return (0); } - if (ksocknal_find_connecting_route_locked (peer) == NULL) { - /* no autoconnect routes actually connecting now. Scrape - * the barrel for non-eager autoconnects */ - route = ksocknal_find_connectable_route_locked (peer, 0); - if (route != NULL) { - ksocknal_launch_autoconnect_locked (route); - } else { - write_unlock_irqrestore (g_lock, flags); - return (PTL_FAIL); - } + route = ksocknal_find_connecting_route_locked (peer); + if (route != NULL) { + /* At least 1 connection is being established; queue the + * message... */ + list_add_tail (&tx->tx_list, &peer->ksnp_tx_queue); + write_unlock_irqrestore (g_lock, flags); + return (0); } - - /* At least 1 connection is being established; queue the message... */ - list_add_tail (&tx->tx_list, &peer->ksnp_tx_queue); - + write_unlock_irqrestore (g_lock, flags); - return (PTL_OK); + return (-EHOSTUNREACH); } ksock_ltx_t * @@ -1069,10 +1122,11 @@ ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie, ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len; rc = ksocknal_launch_packet (<x->ltx_tx, nid); - if (rc != PTL_OK) - ksocknal_put_ltx (ltx); + if (rc == 0) + return (PTL_OK); - return (rc); + ksocknal_put_ltx (ltx); + return (PTL_FAIL); } int @@ -1104,10 +1158,11 @@ ksocknal_send_pages (nal_cb_t *nal, void *private, lib_msg_t *cookie, ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len; rc = ksocknal_launch_packet (<x->ltx_tx, nid); - if (rc != PTL_OK) - ksocknal_put_ltx (ltx); - - return (rc); + if (rc == 0) + return (PTL_OK); + + ksocknal_put_ltx (ltx); + return (PTL_FAIL); } void @@ -1133,10 +1188,8 @@ ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) tx->tx_hdr = (ptl_hdr_t *)fwd->kprfd_iov[0].iov_base; rc = ksocknal_launch_packet (tx, nid); - if (rc != 0) { - /* FIXME, could pass a better completion error */ - kpr_fwd_done (&ksocknal_data.ksnd_router, fwd, -EHOSTUNREACH); - } + if (rc != 0) + kpr_fwd_done (&ksocknal_data.ksnd_router, fwd, rc); } int @@ -1353,7 +1406,7 @@ ksocknal_fwd_parse (ksock_conn_t *conn) { ksock_peer_t *peer; ptl_nid_t dest_nid = NTOH__u64 (conn->ksnc_hdr.dest_nid); - int body_len = NTOH__u32 (PTL_HDR_LENGTH(&conn->ksnc_hdr)); + int body_len = NTOH__u32 (conn->ksnc_hdr.payload_length); CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d parsing header\n", conn, NTOH__u64 (conn->ksnc_hdr.src_nid), @@ -1368,7 +1421,6 @@ ksocknal_fwd_parse (ksock_conn_t *conn) dest_nid, body_len); ksocknal_new_packet (conn, 0); /* on to new packet */ - ksocknal_close_conn_unlocked (conn, -EINVAL); /* give up on conn */ return; } @@ -1512,17 +1564,15 @@ ksocknal_process_receive (ksock_sched_t *sched, unsigned long *irq_flags) rc = ksocknal_recvmsg(conn); if (rc <= 0) { - if (ksocknal_close_conn_unlocked (conn, rc)) { - /* I'm the first to close */ - if (rc < 0) - CERROR ("[%p] Error %d on read from "LPX64" ip %08x:%d\n", - conn, rc, conn->ksnc_peer->ksnp_nid, - conn->ksnc_ipaddr, conn->ksnc_port); - else - CWARN ("[%p] EOF from "LPX64" ip %08x:%d\n", - conn, conn->ksnc_peer->ksnp_nid, - conn->ksnc_ipaddr, conn->ksnc_port); - } + if (rc == 0) + CWARN ("[%p] EOF from "LPX64" ip %08x:%d\n", + conn, conn->ksnc_peer->ksnp_nid, + conn->ksnc_ipaddr, conn->ksnc_port); + else if (!conn->ksnc_closing) + CERROR ("[%p] Error %d on read from "LPX64" ip %08x:%d\n", + conn, rc, conn->ksnc_peer->ksnp_nid, + conn->ksnc_ipaddr, conn->ksnc_port); + ksocknal_close_conn_and_siblings (conn, rc); goto out; } @@ -1945,7 +1995,7 @@ ksocknal_sock_read (struct socket *sock, void *buffer, int nob) } int -ksocknal_exchange_nids (struct socket *sock, ptl_nid_t nid) +ksocknal_exchange_nids (struct socket *sock, ptl_nid_t nid, int type) { int rc; ptl_hdr_t hdr; @@ -1960,6 +2010,8 @@ ksocknal_exchange_nids (struct socket *sock, ptl_nid_t nid) hdr.src_nid = __cpu_to_le64 (ksocknal_lib.ni.nid); hdr.type = __cpu_to_le32 (PTL_MSG_HELLO); + + *(__u32 *)&hdr.msg = __cpu_to_le32 (type); /* Assume sufficient socket buffering for this message */ rc = ksocknal_sock_write (sock, &hdr, sizeof (hdr)); @@ -1992,7 +2044,9 @@ ksocknal_exchange_nids (struct socket *sock, ptl_nid_t nid) return (-EINVAL); } - LASSERT (PORTALS_PROTO_VERSION_MAJOR == 0); +#if (PORTALS_PROTO_VERSION_MAJOR != 0) +# error "This code only understands protocol version 0.x" +#endif /* version 0 sends magic/version as the dest_nid of a 'hello' header, * so read the rest of it in now... */ @@ -2005,11 +2059,11 @@ ksocknal_exchange_nids (struct socket *sock, ptl_nid_t nid) /* ...and check we got what we expected */ if (hdr.type != __cpu_to_le32 (PTL_MSG_HELLO) || - PTL_HDR_LENGTH (&hdr) != __cpu_to_le32 (0)) { + hdr.payload_length != __cpu_to_le32 (0)) { CERROR ("Expecting a HELLO hdr with 0 payload," " but got type %d with %d payload from "LPX64"\n", __le32_to_cpu (hdr.type), - __le32_to_cpu (PTL_HDR_LENGTH (&hdr)), nid); + __le32_to_cpu (hdr.payload_length), nid); return (-EINVAL); } @@ -2103,7 +2157,7 @@ ksocknal_setup_sock (struct socket *sock) } int -ksocknal_connect_peer (ksock_route_t *route) +ksocknal_connect_peer (ksock_route_t *route, int type) { struct sockaddr_in peer_addr; mm_segment_t oldmm = get_fs(); @@ -2208,14 +2262,12 @@ ksocknal_connect_peer (ksock_route_t *route) goto out; } - if (route->ksnr_xchange_nids) { - rc = ksocknal_exchange_nids (sock, route->ksnr_peer->ksnp_nid); - if (rc != 0) - goto out; - } + rc = ksocknal_exchange_nids (sock, route->ksnr_peer->ksnp_nid, type); + if (rc != 0) + goto out; - rc = ksocknal_create_conn (route->ksnr_peer->ksnp_nid, - route, sock, route->ksnr_irq_affinity); + rc = ksocknal_create_conn (route->ksnr_peer->ksnp_nid, route, sock, + route->ksnr_irq_affinity, type); if (rc == 0) { /* Take an extra ref on sock->file to compensate for the * upcoming close which will lose fd's ref on it. */ @@ -2235,20 +2287,36 @@ ksocknal_autoconnect (ksock_route_t *route) ksock_peer_t *peer; unsigned long flags; int rc; + int type; - rc = ksocknal_connect_peer (route); - if (rc == 0) { + for (;;) { + for (type = 0; type < SOCKNAL_CONN_NTYPES; type++) + if ((route->ksnr_connecting & (1 << type)) != 0) + break; + LASSERT (type < SOCKNAL_CONN_NTYPES); + + rc = ksocknal_connect_peer (route, type); + + if (rc != 0) + break; + /* successfully autoconnected: create_conn did the - * route/conn binding and scheduled any blocked packets, - * so there's nothing left to do now. */ - return; + * route/conn binding and scheduled any blocked packets */ + + if (route->ksnr_connecting == 0) { + /* No more connections required */ + return; + } } + /* Connection attempt failed */ + write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); peer = route->ksnr_peer; route->ksnr_connecting = 0; + /* This is a retry rather than a new connection */ LASSERT (route->ksnr_retry_interval != 0); route->ksnr_timeout = jiffies + route->ksnr_retry_interval; route->ksnr_retry_interval = MIN (route->ksnr_retry_interval * 2, @@ -2268,6 +2336,12 @@ ksocknal_autoconnect (ksock_route_t *route) } while (!list_empty (&peer->ksnp_tx_queue)); } + /* make this route least-favourite for re-selection */ + if (!route->ksnr_deleted) { + list_del(&route->ksnr_list); + list_add_tail(&route->ksnr_list, &peer->ksnp_routes); + } + write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); while (!list_empty (&zombies)) { @@ -2275,7 +2349,7 @@ ksocknal_autoconnect (ksock_route_t *route) CERROR ("Deleting packet type %d len %d ("LPX64"->"LPX64")\n", NTOH__u32 (tx->tx_hdr->type), - NTOH__u32 (PTL_HDR_LENGTH(tx->tx_hdr)), + NTOH__u32 (tx->tx_hdr->payload_length), NTOH__u64 (tx->tx_hdr->src_nid), NTOH__u64 (tx->tx_hdr->dest_nid)); @@ -2393,13 +2467,11 @@ ksocknal_check_peer_timeouts (int idx) if (conn != NULL) { read_unlock (&ksocknal_data.ksnd_global_lock); - if (ksocknal_close_conn_unlocked (conn, -ETIMEDOUT)) { - /* I actually closed... */ - CERROR ("Timeout out conn->"LPX64" ip %x:%d\n", - peer->ksnp_nid, conn->ksnc_ipaddr, - conn->ksnc_port); - } - + CERROR ("Timeout out conn->"LPX64" ip %x:%d\n", + peer->ksnp_nid, conn->ksnc_ipaddr, + conn->ksnc_port); + ksocknal_close_conn_and_siblings (conn, -ETIMEDOUT); + /* NB we won't find this one again, but we can't * just proceed with the next peer, since we dropped * ksnd_global_lock and it might be dead already! */ diff --git a/lustre/portals/knals/toenal/toenal_cb.c b/lustre/portals/knals/toenal/toenal_cb.c index 983fa71..37e3f1e 100644 --- a/lustre/portals/knals/toenal/toenal_cb.c +++ b/lustre/portals/knals/toenal/toenal_cb.c @@ -708,26 +708,7 @@ ktoenal_fwd_parse (ksock_conn_t *conn) LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER); LASSERT (conn->ksnc_rx_scheduled); - switch (conn->ksnc_hdr.type) - { - case PTL_MSG_GET: - case PTL_MSG_ACK: - body_len = 0; - break; - case PTL_MSG_PUT: - body_len = conn->ksnc_hdr.msg.put.length; - break; - case PTL_MSG_REPLY: - body_len = conn->ksnc_hdr.msg.reply.length; - break; - default: - /* Unrecognised packet type */ - CERROR ("Unrecognised packet type %d from "LPX64" for "LPX64"\n", - conn->ksnc_hdr.type, conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid); - /* Ignore this header and go back to reading a new packet. */ - ktoenal_new_packet (conn, 0); - return; - } + body_len = conn->ksnc_hdr.payload_length; if (body_len < 0) /* length corrupt */ { diff --git a/lustre/portals/portals/lib-move.c b/lustre/portals/portals/lib-move.c index e73cbb8..491bb87 100644 --- a/lustre/portals/portals/lib-move.c +++ b/lustre/portals/portals/lib-move.c @@ -583,7 +583,7 @@ static int parse_put(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) me = lib_find_me(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT, hdr->src_nid, hdr->src_pid, - PTL_HDR_LENGTH (hdr), hdr->msg.put.offset, + hdr->payload_length, hdr->msg.put.offset, hdr->msg.put.match_bits, &mlength, &offset, &unlink); if (me == NULL) @@ -592,7 +592,7 @@ static int parse_put(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) md = me->md; CDEBUG(D_NET, "Incoming put index %x from "LPU64"/%u of length %d/%d " "into md "LPX64" [%d] + %d\n", hdr->msg.put.ptl_index, - hdr->src_nid, hdr->src_pid, mlength, PTL_HDR_LENGTH(hdr), + hdr->src_nid, hdr->src_pid, mlength, hdr->payload_length, md->md_lh.lh_cookie, md->md_niov, offset); msg = get_new_msg (nal, md); @@ -617,7 +617,7 @@ static int parse_put(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) msg->ev.initiator.pid = hdr->src_pid; msg->ev.portal = hdr->msg.put.ptl_index; msg->ev.match_bits = hdr->msg.put.match_bits; - msg->ev.rlength = PTL_HDR_LENGTH(hdr); + msg->ev.rlength = hdr->payload_length; msg->ev.mlength = mlength; msg->ev.offset = offset; msg->ev.hdr_data = hdr->msg.put.hdr_data; @@ -646,14 +646,14 @@ static int parse_put(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) state_unlock(nal, &flags); - lib_recv (nal, private, msg, md, offset, mlength, PTL_HDR_LENGTH (hdr)); + lib_recv (nal, private, msg, md, offset, mlength, hdr->payload_length); return 0; drop: nal->ni.counters.drop_count++; - nal->ni.counters.drop_length += PTL_HDR_LENGTH(hdr); + nal->ni.counters.drop_length += hdr->payload_length; state_unlock (nal, &flags); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); + lib_recv (nal, private, NULL, NULL, 0, 0, hdr->payload_length); return -1; } @@ -676,11 +676,6 @@ static int parse_get(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) hdr->msg.get.sink_length = NTOH__u32 (hdr->msg.get.sink_length); hdr->msg.get.src_offset = NTOH__u32 (hdr->msg.get.src_offset); - /* compatibility check until field is deleted */ - if (hdr->msg.get.return_offset != 0) - CERROR("Unexpected non-zero get.return_offset %x from " - LPU64"\n", hdr->msg.get.return_offset, hdr->src_nid); - state_lock(nal, &flags); me = lib_find_me(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET, @@ -694,7 +689,7 @@ static int parse_get(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) md = me->md; CDEBUG(D_NET, "Incoming get index %d from "LPU64".%u of length %d/%d " "from md "LPX64" [%d] + %d\n", hdr->msg.get.ptl_index, - hdr->src_nid, hdr->src_pid, mlength, PTL_HDR_LENGTH(hdr), + hdr->src_nid, hdr->src_pid, mlength, hdr->payload_length, md->md_lh.lh_cookie, md->md_niov, offset); msg = get_new_msg (nal, md); @@ -710,7 +705,7 @@ static int parse_get(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) msg->ev.initiator.pid = hdr->src_pid; msg->ev.portal = hdr->msg.get.ptl_index; msg->ev.match_bits = hdr->msg.get.match_bits; - msg->ev.rlength = PTL_HDR_LENGTH(hdr); + msg->ev.rlength = hdr->payload_length; msg->ev.mlength = mlength; msg->ev.offset = offset; msg->ev.hdr_data = 0; @@ -745,7 +740,7 @@ static int parse_get(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) reply.src_nid = HTON__u64 (ni->nid); reply.dest_pid = HTON__u32 (hdr->src_pid); reply.src_pid = HTON__u32 (ni->pid); - PTL_HDR_LENGTH(&reply) = HTON__u32 (mlength); + reply.payload_length = HTON__u32 (mlength); reply.msg.reply.dst_wmd = hdr->msg.get.return_wmd; @@ -763,13 +758,13 @@ static int parse_get(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) } /* Complete the incoming message */ - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); + lib_recv (nal, private, NULL, NULL, 0, 0, hdr->payload_length); return (rc); drop: ni->counters.drop_count++; ni->counters.drop_length += hdr->msg.get.sink_length; state_unlock(nal, &flags); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); + lib_recv (nal, private, NULL, NULL, 0, 0, hdr->payload_length); return -1; } @@ -782,11 +777,6 @@ static int parse_reply(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) lib_msg_t *msg; unsigned long flags; - /* compatibility check until field is deleted */ - if (hdr->msg.reply.dst_offset != 0) - CERROR("Unexpected non-zero reply.dst_offset %x from "LPU64"\n", - hdr->msg.reply.dst_offset, hdr->src_nid); - state_lock(nal, &flags); /* NB handles only looked up by creator (no flips) */ @@ -802,7 +792,7 @@ static int parse_reply(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) LASSERT (md->offset == 0); - length = rlength = PTL_HDR_LENGTH(hdr); + length = rlength = hdr->payload_length; if (length > md->length) { if ((md->options & PTL_MD_TRUNCATE) == 0) { @@ -848,9 +838,9 @@ static int parse_reply(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) drop: nal->ni.counters.drop_count++; - nal->ni.counters.drop_length += PTL_HDR_LENGTH(hdr); + nal->ni.counters.drop_length += hdr->payload_length; state_unlock (nal, &flags); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); + lib_recv (nal, private, NULL, NULL, 0, 0, hdr->payload_length); return -1; } @@ -901,13 +891,13 @@ static int parse_ack(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) ni->counters.recv_count++; state_unlock(nal, &flags); - lib_recv (nal, private, msg, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); + lib_recv (nal, private, msg, NULL, 0, 0, hdr->payload_length); return 0; drop: nal->ni.counters.drop_count++; state_unlock (nal, &flags); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); + lib_recv (nal, private, NULL, NULL, 0, 0, hdr->payload_length); return -1; } @@ -954,7 +944,7 @@ void print_hdr(nal_cb_t * nal, ptl_hdr_t * hdr) hdr->msg.put.match_bits); nal->cb_printf(nal, " Length %d, offset %d, hdr data "LPX64"\n", - PTL_HDR_LENGTH(hdr), hdr->msg.put.offset, + hdr->payload_length, hdr->msg.put.offset, hdr->msg.put.hdr_data); break; @@ -984,7 +974,7 @@ void print_hdr(nal_cb_t * nal, ptl_hdr_t * hdr) "length %d\n", hdr->msg.reply.dst_wmd.wh_interface_cookie, hdr->msg.reply.dst_wmd.wh_object_cookie, - PTL_HDR_LENGTH(hdr)); + hdr->payload_length); } } /* end of print_hdr() */ @@ -994,21 +984,13 @@ int lib_parse(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) { unsigned long flags; - /* NB static check; optimizer will elide this if it's right */ - LASSERT (offsetof (ptl_hdr_t, msg.ack.length) == - offsetof (ptl_hdr_t, msg.put.length)); - LASSERT (offsetof (ptl_hdr_t, msg.ack.length) == - offsetof (ptl_hdr_t, msg.get.length)); - LASSERT (offsetof (ptl_hdr_t, msg.ack.length) == - offsetof (ptl_hdr_t, msg.reply.length)); - /* convert common fields to host byte order */ hdr->dest_nid = NTOH__u64 (hdr->dest_nid); hdr->src_nid = NTOH__u64 (hdr->src_nid); hdr->dest_pid = NTOH__u32 (hdr->dest_pid); hdr->src_pid = NTOH__u32 (hdr->src_pid); hdr->type = NTOH__u32 (hdr->type); - PTL_HDR_LENGTH(hdr) = NTOH__u32 (PTL_HDR_LENGTH(hdr)); + hdr->payload_length = NTOH__u32(hdr->payload_length); #if 0 nal->cb_printf(nal, "%d: lib_parse: nal=%p hdr=%p type=%d\n", nal->ni.nid, nal, hdr, hdr->type); @@ -1023,7 +1005,7 @@ int lib_parse(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) nal->ni.nid, mv->magic, mv->version_major, mv->version_minor, hdr->src_nid); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); + lib_recv (nal, private, NULL, NULL, 0, 0, hdr->payload_length); return (-1); } @@ -1034,10 +1016,10 @@ int lib_parse(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) state_lock (nal, &flags); nal->ni.counters.drop_count++; - nal->ni.counters.drop_length += PTL_HDR_LENGTH(hdr); + nal->ni.counters.drop_length += hdr->payload_length; state_unlock (nal, &flags); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); + lib_recv (nal, private, NULL, NULL, 0, 0, hdr->payload_length); return (-1); } @@ -1048,7 +1030,7 @@ int lib_parse(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) ": simulated failure\n", nal->ni.nid, hdr_type_string (hdr), hdr->src_nid); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); + lib_recv (nal, private, NULL, NULL, 0, 0, hdr->payload_length); return (-1); } @@ -1069,7 +1051,7 @@ int lib_parse(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) ": Bad type=0x%x\n", nal->ni.nid, hdr->src_nid, hdr->type); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); + lib_recv (nal, private, NULL, NULL, 0, 0, hdr->payload_length); return (-1); } } @@ -1126,7 +1108,7 @@ int do_PtlPut(nal_cb_t * nal, void *private, void *v_args, void *v_ret) hdr.src_nid = HTON__u64 (ni->nid); hdr.dest_pid = HTON__u32 (id->pid); hdr.src_pid = HTON__u32 (ni->pid); - PTL_HDR_LENGTH(&hdr) = HTON__u32 (md->length); + hdr.payload_length = HTON__u32 (md->length); /* NB handles only looked up by creator (no flips) */ if (args->ack_req_in == PTL_ACK_REQ) { @@ -1303,7 +1285,7 @@ int do_PtlGet(nal_cb_t * nal, void *private, void *v_args, void *v_ret) hdr.src_nid = HTON__u64 (ni->nid); hdr.dest_pid = HTON__u32 (id->pid); hdr.src_pid = HTON__u32 (ni->pid); - PTL_HDR_LENGTH(&hdr) = 0; + hdr.payload_length = 0; /* NB handles only looked up by creator (no flips) */ hdr.msg.get.return_wmd.wh_interface_cookie = ni->ni_interface_cookie; @@ -1364,12 +1346,15 @@ int do_PtlGet(nal_cb_t * nal, void *private, void *v_args, void *v_ret) void lib_assert_wire_constants (void) { - /* Wire protocol assertions generated by 'wirecheck' */ + /* Wire protocol assertions generated by 'wirecheck' + * running on Linux mdev2 2.4.18-p4smp-15llp #1 SMP Wed Oct 8 11:01:07 PDT 2003 i686 unknown + * with gcc version 2.96 20000731 (Red Hat Linux 7.3 2.96-113) */ + /* Constants... */ LASSERT (PORTALS_PROTO_MAGIC == 0xeebc0ded); LASSERT (PORTALS_PROTO_VERSION_MAJOR == 0); - LASSERT (PORTALS_PROTO_VERSION_MINOR == 1); + LASSERT (PORTALS_PROTO_VERSION_MINOR == 2); LASSERT (PTL_MSG_ACK == 0); LASSERT (PTL_MSG_PUT == 1); LASSERT (PTL_MSG_GET == 2); @@ -1377,79 +1362,71 @@ void lib_assert_wire_constants (void) LASSERT (PTL_MSG_HELLO == 4); /* Checks for struct ptl_handle_wire_t */ - LASSERT (sizeof (ptl_handle_wire_t) == 16); - LASSERT (offsetof (ptl_handle_wire_t, wh_interface_cookie) == 0); - LASSERT (sizeof (((ptl_handle_wire_t *)0)->wh_interface_cookie) == 8); - LASSERT (offsetof (ptl_handle_wire_t, wh_object_cookie) == 8); - LASSERT (sizeof (((ptl_handle_wire_t *)0)->wh_object_cookie) == 8); + LASSERT ((int)sizeof(ptl_handle_wire_t) == 16); + LASSERT (offsetof(ptl_handle_wire_t, wh_interface_cookie) == 0); + LASSERT ((int)sizeof(((ptl_handle_wire_t *)0)->wh_interface_cookie) == 8); + LASSERT (offsetof(ptl_handle_wire_t, wh_object_cookie) == 8); + LASSERT ((int)sizeof(((ptl_handle_wire_t *)0)->wh_object_cookie) == 8); /* Checks for struct ptl_magicversion_t */ - LASSERT (sizeof (ptl_magicversion_t) == 8); - LASSERT (offsetof (ptl_magicversion_t, magic) == 0); - LASSERT (sizeof (((ptl_magicversion_t *)0)->magic) == 4); - LASSERT (offsetof (ptl_magicversion_t, version_major) == 4); - LASSERT (sizeof (((ptl_magicversion_t *)0)->version_major) == 2); - LASSERT (offsetof (ptl_magicversion_t, version_minor) == 6); - LASSERT (sizeof (((ptl_magicversion_t *)0)->version_minor) == 2); + LASSERT ((int)sizeof(ptl_magicversion_t) == 8); + LASSERT (offsetof(ptl_magicversion_t, magic) == 0); + LASSERT ((int)sizeof(((ptl_magicversion_t *)0)->magic) == 4); + LASSERT (offsetof(ptl_magicversion_t, version_major) == 4); + LASSERT ((int)sizeof(((ptl_magicversion_t *)0)->version_major) == 2); + LASSERT (offsetof(ptl_magicversion_t, version_minor) == 6); + LASSERT ((int)sizeof(((ptl_magicversion_t *)0)->version_minor) == 2); /* Checks for struct ptl_hdr_t */ - LASSERT (sizeof (ptl_hdr_t) == 72); - LASSERT (offsetof (ptl_hdr_t, dest_nid) == 0); - LASSERT (sizeof (((ptl_hdr_t *)0)->dest_nid) == 8); - LASSERT (offsetof (ptl_hdr_t, src_nid) == 8); - LASSERT (sizeof (((ptl_hdr_t *)0)->src_nid) == 8); - LASSERT (offsetof (ptl_hdr_t, dest_pid) == 16); - LASSERT (sizeof (((ptl_hdr_t *)0)->dest_pid) == 4); - LASSERT (offsetof (ptl_hdr_t, src_pid) == 20); - LASSERT (sizeof (((ptl_hdr_t *)0)->src_pid) == 4); - LASSERT (offsetof (ptl_hdr_t, type) == 24); - LASSERT (sizeof (((ptl_hdr_t *)0)->type) == 4); + LASSERT ((int)sizeof(ptl_hdr_t) == 72); + LASSERT (offsetof(ptl_hdr_t, dest_nid) == 0); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->dest_nid) == 8); + LASSERT (offsetof(ptl_hdr_t, src_nid) == 8); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->src_nid) == 8); + LASSERT (offsetof(ptl_hdr_t, dest_pid) == 16); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->dest_pid) == 4); + LASSERT (offsetof(ptl_hdr_t, src_pid) == 20); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->src_pid) == 4); + LASSERT (offsetof(ptl_hdr_t, type) == 24); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->type) == 4); + LASSERT (offsetof(ptl_hdr_t, payload_length) == 28); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->payload_length) == 4); + LASSERT (offsetof(ptl_hdr_t, msg) == 32); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg) == 40); /* Ack */ - LASSERT (offsetof (ptl_hdr_t, msg.ack.mlength) == 28); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.ack.mlength) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.ack.dst_wmd) == 32); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.ack.dst_wmd) == 16); - LASSERT (offsetof (ptl_hdr_t, msg.ack.match_bits) == 48); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.ack.match_bits) == 8); - LASSERT (offsetof (ptl_hdr_t, msg.ack.length) == 56); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.ack.length) == 4); + LASSERT (offsetof(ptl_hdr_t, msg.ack.dst_wmd) == 32); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.ack.dst_wmd) == 16); + LASSERT (offsetof(ptl_hdr_t, msg.ack.match_bits) == 48); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.ack.match_bits) == 8); + LASSERT (offsetof(ptl_hdr_t, msg.ack.mlength) == 56); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.ack.mlength) == 4); /* Put */ - LASSERT (offsetof (ptl_hdr_t, msg.put.ptl_index) == 28); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.ptl_index) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.put.ack_wmd) == 32); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.ack_wmd) == 16); - LASSERT (offsetof (ptl_hdr_t, msg.put.match_bits) == 48); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.match_bits) == 8); - LASSERT (offsetof (ptl_hdr_t, msg.put.length) == 56); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.length) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.put.offset) == 60); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.offset) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.put.hdr_data) == 64); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.hdr_data) == 8); + LASSERT (offsetof(ptl_hdr_t, msg.put.ack_wmd) == 32); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.ack_wmd) == 16); + LASSERT (offsetof(ptl_hdr_t, msg.put.match_bits) == 48); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.match_bits) == 8); + LASSERT (offsetof(ptl_hdr_t, msg.put.hdr_data) == 56); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.hdr_data) == 8); + LASSERT (offsetof(ptl_hdr_t, msg.put.ptl_index) == 64); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.ptl_index) == 4); + LASSERT (offsetof(ptl_hdr_t, msg.put.offset) == 68); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.offset) == 4); /* Get */ - LASSERT (offsetof (ptl_hdr_t, msg.get.ptl_index) == 28); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.ptl_index) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.get.return_wmd) == 32); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.return_wmd) == 16); - LASSERT (offsetof (ptl_hdr_t, msg.get.match_bits) == 48); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.match_bits) == 8); - LASSERT (offsetof (ptl_hdr_t, msg.get.length) == 56); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.length) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.get.src_offset) == 60); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.src_offset) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.get.return_offset) == 64); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.return_offset) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.get.sink_length) == 68); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.sink_length) == 4); + LASSERT (offsetof(ptl_hdr_t, msg.get.return_wmd) == 32); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.return_wmd) == 16); + LASSERT (offsetof(ptl_hdr_t, msg.get.match_bits) == 48); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.match_bits) == 8); + LASSERT (offsetof(ptl_hdr_t, msg.get.ptl_index) == 56); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.ptl_index) == 4); + LASSERT (offsetof(ptl_hdr_t, msg.get.src_offset) == 60); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.src_offset) == 4); + LASSERT (offsetof(ptl_hdr_t, msg.get.sink_length) == 64); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.sink_length) == 4); /* Reply */ - LASSERT (offsetof (ptl_hdr_t, msg.reply.dst_wmd) == 32); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.reply.dst_wmd) == 16); - LASSERT (offsetof (ptl_hdr_t, msg.reply.dst_offset) == 48); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.reply.dst_offset) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.reply.length) == 56); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.reply.length) == 4); + LASSERT (offsetof(ptl_hdr_t, msg.reply.dst_wmd) == 32); + LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.reply.dst_wmd) == 16); } diff --git a/lustre/portals/portals/lib-msg.c b/lustre/portals/portals/lib-msg.c index 9363251..9840ff5 100644 --- a/lustre/portals/portals/lib-msg.c +++ b/lustre/portals/portals/lib-msg.c @@ -59,7 +59,7 @@ int lib_finalize(nal_cb_t * nal, void *private, lib_msg_t *msg) ack.src_nid = HTON__u64 (nal->ni.nid); ack.dest_pid = HTON__u32 (msg->pid); ack.src_pid = HTON__u32 (nal->ni.pid); - PTL_HDR_LENGTH(&ack) = 0; + ack.payload_length = 0; ack.msg.ack.dst_wmd = msg->ack_wmd; ack.msg.ack.match_bits = msg->ev.match_bits; diff --git a/lustre/portals/utils/acceptor.c b/lustre/portals/utils/acceptor.c index 9fb2759..54a57d4 100644 --- a/lustre/portals/utils/acceptor.c +++ b/lustre/portals/utils/acceptor.c @@ -19,6 +19,7 @@ #include #include #include +#include /* should get this from autoconf somehow */ #ifndef PIDFILE_DIR @@ -100,7 +101,7 @@ parse_size (int *sizep, char *str) } void -show_connection (int fd, __u32 net_ip, ptl_nid_t nid) +show_connection (int fd, __u32 net_ip, ptl_nid_t nid, int type) { struct hostent *h = gethostbyaddr ((char *)&net_ip, sizeof net_ip, AF_INET); __u32 host_ip = ntohl (net_ip); @@ -128,8 +129,12 @@ show_connection (int fd, __u32 net_ip, ptl_nid_t nid) else snprintf (host, sizeof(host), "%s", h->h_name); - syslog (LOG_INFO, "Accepted host: %s NID: "LPX64" snd: %d rcv %d nagle: %s\n", - host, nid, txmem, rxmem, nonagle ? "disabled" : "enabled"); + syslog (LOG_INFO, "Accepted host: %s NID: "LPX64" snd: %d rcv %d nagle: %s type %s\n", + host, nid, txmem, rxmem, nonagle ? "disabled" : "enabled", + (type == SOCKNAL_CONN_ANY) ? "A" : + (type == SOCKNAL_CONN_CONTROL) ? "C" : + (type == SOCKNAL_CONN_BULK_IN) ? "I" : + (type == SOCKNAL_CONN_BULK_OUT) ? "O" : "?"); } int @@ -189,9 +194,10 @@ sock_read (int cfd, void *buffer, int nob) } int -exchange_nids (int cfd, ptl_nid_t my_nid, ptl_nid_t *peer_nid) +exchange_nids (int cfd, ptl_nid_t my_nid, ptl_nid_t *peer_nid, int *type) { int rc; + int t; ptl_hdr_t hdr; ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid; @@ -248,15 +254,32 @@ exchange_nids (int cfd, ptl_nid_t my_nid, ptl_nid_t *peer_nid) /* ...and check we got what we expected */ if (__cpu_to_le32 (hdr.type) != PTL_MSG_HELLO || - __cpu_to_le32 (PTL_HDR_LENGTH (&hdr)) != 0) { + __cpu_to_le32 (hdr.payload_length) != 0) { fprintf (stderr, "Expecting a HELLO hdr with 0 payload," " but got type %d with %d payload\n", __cpu_to_le32 (hdr.type), - __cpu_to_le32 (PTL_HDR_LENGTH (&hdr))); + __cpu_to_le32 (hdr.payload_length)); return (-1); } *peer_nid = __le64_to_cpu (hdr.src_nid); + + t = __le32_to_cpu (*(__u32 *)&hdr.msg); + switch (t) { /* swap sense of connection type */ + case SOCKNAL_CONN_CONTROL: + break; + case SOCKNAL_CONN_BULK_IN: + t = SOCKNAL_CONN_BULK_OUT; + break; + case SOCKNAL_CONN_BULK_OUT: + t = SOCKNAL_CONN_BULK_IN; + break; + default: + t = SOCKNAL_CONN_ANY; + break; + } + *type = t; + return (0); } @@ -277,10 +300,10 @@ int main(int argc, char **argv) int noclose = 0; int nonagle = 1; int nal = SOCKNAL; - int xchg_nids = 0; int bind_irq = 0; + int type = 0; - while ((c = getopt (argc, argv, "N:r:s:nlxi")) != -1) + while ((c = getopt (argc, argv, "N:r:s:nli")) != -1) switch (c) { case 'r': @@ -301,10 +324,6 @@ int main(int argc, char **argv) noclose = 1; break; - case 'x': - xchg_nids = 1; - break; - case 'i': bind_irq = 1; break; @@ -419,35 +438,29 @@ int main(int argc, char **argv) continue; } - if (!xchg_nids) - peer_nid = ntohl (clntaddr.sin_addr.s_addr); /* HOST byte order */ - else - { - PORTAL_IOC_INIT (data); - data.ioc_nal = nal; - rc = ioctl (pfd, IOC_PORTAL_GET_NID, &data); - if (rc < 0) - { - perror ("Can't get my NID"); - close (cfd); - continue; - } - - rc = exchange_nids (cfd, data.ioc_nid, &peer_nid); - if (rc != 0) - { - close (cfd); - continue; - } + PORTAL_IOC_INIT (data); + data.ioc_nal = nal; + rc = ioctl (pfd, IOC_PORTAL_GET_NID, &data); + if (rc < 0) { + perror ("Can't get my NID"); + close (cfd); + continue; + } + + rc = exchange_nids (cfd, data.ioc_nid, &peer_nid, &type); + if (rc != 0) { + close (cfd); + continue; } - show_connection (cfd, clntaddr.sin_addr.s_addr, peer_nid); + show_connection (cfd, clntaddr.sin_addr.s_addr, peer_nid, type); PCFG_INIT(pcfg, NAL_CMD_REGISTER_PEER_FD); pcfg.pcfg_nal = nal; pcfg.pcfg_fd = cfd; pcfg.pcfg_nid = peer_nid; pcfg.pcfg_flags = bind_irq; + pcfg.pcfg_misc = type; PORTAL_IOC_INIT(data); data.ioc_pbuf1 = (char*)&pcfg; diff --git a/lustre/portals/utils/portals.c b/lustre/portals/utils/portals.c index b444115..23f2a5f 100644 --- a/lustre/portals/utils/portals.c +++ b/lustre/portals/utils/portals.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "parser.h" unsigned int portal_debug; @@ -435,14 +436,13 @@ jt_ptl_print_autoconnects (int argc, char **argv) if (rc != 0) break; - printf (LPX64"@%s:%d #%d buffer %d nonagle %s xchg %s " - "affinity %s eager %s share %d\n", + printf (LPX64"@%s:%d #%d buffer %d " + "nonagle %s affinity %s eager %s share %d\n", pcfg.pcfg_nid, ptl_ipaddr_2_str (pcfg.pcfg_id, buffer), pcfg.pcfg_misc, pcfg.pcfg_count, pcfg.pcfg_size, (pcfg.pcfg_flags & 1) ? "on" : "off", (pcfg.pcfg_flags & 2) ? "on" : "off", (pcfg.pcfg_flags & 4) ? "on" : "off", - (pcfg.pcfg_flags & 8) ? "on" : "off", pcfg.pcfg_wait); } @@ -458,14 +458,13 @@ jt_ptl_add_autoconnect (int argc, char **argv) ptl_nid_t nid; __u32 ip; int port; - int xchange_nids = 0; int irq_affinity = 0; int share = 0; int eager = 0; int rc; if (argc < 4 || argc > 5) { - fprintf (stderr, "usage: %s nid ipaddr port [ixse]\n", argv[0]); + fprintf (stderr, "usage: %s nid ipaddr port [ise]\n", argv[0]); return 0; } @@ -493,9 +492,6 @@ jt_ptl_add_autoconnect (int argc, char **argv) while (*opts != 0) switch (*opts++) { - case 'x': - xchange_nids = 1; - break; case 'i': irq_affinity = 1; break; @@ -519,10 +515,9 @@ jt_ptl_add_autoconnect (int argc, char **argv) /* only passing one buffer size! */ pcfg.pcfg_size = MAX (g_socket_rxmem, g_socket_txmem); pcfg.pcfg_flags = (g_socket_nonagle ? 0x01 : 0) | - (xchange_nids ? 0x02 : 0) | - (irq_affinity ? 0x04 : 0) | - (share ? 0x08 : 0) | - (eager ? 0x10 : 0); + (irq_affinity ? 0x02 : 0) | + (share ? 0x04 : 0) | + (eager ? 0x08 : 0); rc = pcfg_ioctl (&pcfg); if (rc != 0) { @@ -618,10 +613,14 @@ jt_ptl_print_connections (int argc, char **argv) if (rc != 0) break; - printf (LPX64"@%s:%d\n", + printf (LPX64"@%s:%d:%s\n", pcfg.pcfg_nid, ptl_ipaddr_2_str (pcfg.pcfg_id, buffer), - pcfg.pcfg_misc); + pcfg.pcfg_misc, + (pcfg.pcfg_flags == SOCKNAL_CONN_ANY) ? "A" : + (pcfg.pcfg_flags == SOCKNAL_CONN_CONTROL) ? "C" : + (pcfg.pcfg_flags == SOCKNAL_CONN_BULK_IN) ? "I" : + (pcfg.pcfg_flags == SOCKNAL_CONN_BULK_OUT) ? "O" : "?"); } if (index == 0) @@ -630,7 +629,7 @@ jt_ptl_print_connections (int argc, char **argv) } int -exchange_nids (int cfd, ptl_nid_t my_nid, ptl_nid_t *peer_nid) +exchange_nids (int cfd, ptl_nid_t my_nid, int type, ptl_nid_t *peer_nid) { int rc; ptl_hdr_t hdr; @@ -646,6 +645,8 @@ exchange_nids (int cfd, ptl_nid_t my_nid, ptl_nid_t *peer_nid) hdr.src_nid = __cpu_to_le64 (my_nid); hdr.type = __cpu_to_le32 (PTL_MSG_HELLO); + + *(__u32 *)&hdr.msg = __cpu_to_le32(type); /* Assume there's sufficient socket buffering for a portals HELLO header */ rc = sock_write (cfd, &hdr, sizeof (hdr)); @@ -689,11 +690,11 @@ exchange_nids (int cfd, ptl_nid_t my_nid, ptl_nid_t *peer_nid) /* ...and check we got what we expected */ if (hdr.type != __cpu_to_le32 (PTL_MSG_HELLO) || - PTL_HDR_LENGTH (&hdr) != __cpu_to_le32 (0)) { + hdr.payload_length != __cpu_to_le32 (0)) { fprintf (stderr, "Expecting a HELLO hdr with 0 payload," " but got type %d with %d payload\n", __le32_to_cpu (hdr.type), - __le32_to_cpu (PTL_HDR_LENGTH (&hdr))); + __le32_to_cpu (hdr.payload_length)); return (-1); } @@ -714,13 +715,13 @@ int jt_ptl_connect(int argc, char **argv) int rxmem = 0; int txmem = 0; int bind_irq = 0; - int xchange_nids = 0; + int type = SOCKNAL_CONN_ANY; int port; int o; int olen; if (argc < 3) { - fprintf(stderr, "usage: %s ip port [xi]\n", argv[0]); + fprintf(stderr, "usage: %s ip port [xibctr]\n", argv[0]); return 0; } @@ -746,8 +747,28 @@ int jt_ptl_connect(int argc, char **argv) bind_irq = 1; break; - case 'x': - xchange_nids = 1; + case 'I': + if (type != SOCKNAL_CONN_ANY) { + fprintf(stderr, "Can't flag type twice\n"); + return -1; + } + type = SOCKNAL_CONN_BULK_IN; + break; + + case 'O': + if (type != SOCKNAL_CONN_ANY) { + fprintf(stderr, "Can't flag type twice\n"); + return -1; + } + type = SOCKNAL_CONN_BULK_OUT; + break; + + case 'C': + if (type != SOCKNAL_CONN_ANY) { + fprintf(stderr, "Can't flag type twice\n"); + return -1; + } + type = SOCKNAL_CONN_CONTROL; break; default: @@ -808,33 +829,35 @@ int jt_ptl_connect(int argc, char **argv) if (getsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &nonagle, &olen) != 0) fprintf (stderr, "Can't get nagle: %s\n", strerror (errno)); - if (!xchange_nids) - peer_nid = ipaddr; - else { - PORTAL_IOC_INIT (data); - data.ioc_nal = g_nal; - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_NID, &data); - if (rc != 0) { - fprintf (stderr, "failed to get my nid: %s\n", - strerror (errno)); - close (fd); - return (-1); - } + PORTAL_IOC_INIT (data); + data.ioc_nal = g_nal; + rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_NID, &data); + if (rc != 0) { + fprintf (stderr, "failed to get my nid: %s\n", + strerror (errno)); + close (fd); + return (-1); + } - rc = exchange_nids (fd, data.ioc_nid, &peer_nid); - if (rc != 0) { - close (fd); - return (-1); - } - } - printf("Connected host: %s NID "LPX64" snd: %d rcv: %d nagle: %s\n", argv[1], - peer_nid, txmem, rxmem, nonagle ? "Disabled" : "Enabled"); + rc = exchange_nids (fd, data.ioc_nid, type, &peer_nid); + if (rc != 0) { + close (fd); + return (-1); + } + + printf("Connected host: %s NID "LPX64" snd: %d rcv: %d nagle: %s type: %s\n", + argv[1], peer_nid, txmem, rxmem, nonagle ? "Disabled" : "Enabled", + (type == SOCKNAL_CONN_ANY) ? "A" : + (type == SOCKNAL_CONN_CONTROL) ? "C" : + (type == SOCKNAL_CONN_BULK_IN) ? "I" : + (type == SOCKNAL_CONN_BULK_OUT) ? "O" : "?"); PCFG_INIT(pcfg, NAL_CMD_REGISTER_PEER_FD); pcfg.pcfg_fd = fd; pcfg.pcfg_nid = peer_nid; pcfg.pcfg_flags = bind_irq; - + pcfg.pcfg_misc = type; + rc = pcfg_ioctl(&pcfg); if (rc) { fprintf(stderr, "failed to register fd with portals: %s\n", diff --git a/lustre/portals/utils/ptlctl.c b/lustre/portals/utils/ptlctl.c index 1a8e637..c65ecb2 100644 --- a/lustre/portals/utils/ptlctl.c +++ b/lustre/portals/utils/ptlctl.c @@ -31,10 +31,10 @@ command_t list[] = { {"network", jt_ptl_network, 0,"setup the NAL (args: nal name)"}, {"print_autoconns", jt_ptl_print_autoconnects, 0, "print autoconnect entries (no args)"}, - {"add_autoconn", jt_ptl_add_autoconnect, 0, "add autoconnect entry (args: nid host [ixse])"}, + {"add_autoconn", jt_ptl_add_autoconnect, 0, "add autoconnect entry (args: nid host [ise])"}, {"del_autoconn", jt_ptl_del_autoconnect, 0, "delete autoconnect entry (args: [nid] [host] [ks])"}, {"print_conns", jt_ptl_print_connections, 0, "print connections (no args)"}, - {"connect", jt_ptl_connect, 0, "connect to a remote nid (args: host port [xi])"}, + {"connect", jt_ptl_connect, 0, "connect to a remote nid (args: host port [iIOC])"}, {"disconnect", jt_ptl_disconnect, 0, "disconnect from a remote nid (args: [nid] [host]"}, {"push", jt_ptl_push_connection, 0, "flush connection to a remote nid (args: [nid]"}, {"active_tx", jt_ptl_print_active_txs, 0, "print active transmits (no args)"}, diff --git a/lustre/portals/utils/wirecheck.c b/lustre/portals/utils/wirecheck.c index 6a4377b..21b3dda 100644 --- a/lustre/portals/utils/wirecheck.c +++ b/lustre/portals/utils/wirecheck.c @@ -2,10 +2,14 @@ * vim:expandtab:shiftwidth=8:tabstop=8: */ #include +#include +#include #include #include #include +extern size_t strnlen(const char *, size_t); + #define BLANK_LINE() \ do { \ printf ("\n"); \ @@ -77,47 +81,104 @@ check_ptl_hdr (void) CHECK_MEMBER (ptl_hdr_t, dest_pid); CHECK_MEMBER (ptl_hdr_t, src_pid); CHECK_MEMBER (ptl_hdr_t, type); - + CHECK_MEMBER (ptl_hdr_t, payload_length); + CHECK_MEMBER (ptl_hdr_t, msg); + BLANK_LINE (); COMMENT ("Ack"); - CHECK_MEMBER (ptl_hdr_t, msg.ack.mlength); CHECK_MEMBER (ptl_hdr_t, msg.ack.dst_wmd); CHECK_MEMBER (ptl_hdr_t, msg.ack.match_bits); - CHECK_MEMBER (ptl_hdr_t, msg.ack.length); + CHECK_MEMBER (ptl_hdr_t, msg.ack.mlength); BLANK_LINE (); COMMENT ("Put"); - CHECK_MEMBER (ptl_hdr_t, msg.put.ptl_index); CHECK_MEMBER (ptl_hdr_t, msg.put.ack_wmd); CHECK_MEMBER (ptl_hdr_t, msg.put.match_bits); - CHECK_MEMBER (ptl_hdr_t, msg.put.length); - CHECK_MEMBER (ptl_hdr_t, msg.put.offset); CHECK_MEMBER (ptl_hdr_t, msg.put.hdr_data); + CHECK_MEMBER (ptl_hdr_t, msg.put.ptl_index); + CHECK_MEMBER (ptl_hdr_t, msg.put.offset); BLANK_LINE (); COMMENT ("Get"); - CHECK_MEMBER (ptl_hdr_t, msg.get.ptl_index); CHECK_MEMBER (ptl_hdr_t, msg.get.return_wmd); CHECK_MEMBER (ptl_hdr_t, msg.get.match_bits); - CHECK_MEMBER (ptl_hdr_t, msg.get.length); + CHECK_MEMBER (ptl_hdr_t, msg.get.ptl_index); CHECK_MEMBER (ptl_hdr_t, msg.get.src_offset); - CHECK_MEMBER (ptl_hdr_t, msg.get.return_offset); CHECK_MEMBER (ptl_hdr_t, msg.get.sink_length); BLANK_LINE (); COMMENT ("Reply"); CHECK_MEMBER (ptl_hdr_t, msg.reply.dst_wmd); - CHECK_MEMBER (ptl_hdr_t, msg.reply.dst_offset); - CHECK_MEMBER (ptl_hdr_t, msg.reply.length); +} + +void +system_string (char *cmdline, char *str, int len) +{ + int fds[2]; + int rc; + pid_t pid; + + rc = pipe (fds); + if (rc != 0) + abort (); + + pid = fork (); + if (pid == 0) { + /* child */ + int fd = fileno(stdout); + + rc = dup2(fds[1], fd); + if (rc != fd) + abort(); + + exit(system(cmdline)); + /* notreached */ + } else if ((int)pid < 0) { + abort(); + } else { + FILE *f = fdopen (fds[0], "r"); + + if (f == NULL) + abort(); + + close(fds[1]); + + if (fgets(str, len, f) == NULL) + abort(); + + if (waitpid(pid, &rc, 0) != pid) + abort(); + + if (!WIFEXITED(rc) || + WEXITSTATUS(rc) != 0) + abort(); + + if (strnlen(str, len) == len) + str[len - 1] = 0; + + if (str[strlen(str) - 1] == '\n') + str[strlen(str) - 1] = 0; + + fclose(f); + } } int main (int argc, char **argv) { + char unameinfo[80]; + char gccinfo[80]; + + system_string("uname -a", unameinfo, sizeof(unameinfo)); + system_string("gcc -v 2>&1 | tail -1", gccinfo, sizeof(gccinfo)); + printf ("void lib_assert_wire_constants (void)\n" - "{\n"); - - COMMENT ("Wire protocol assertions generated by 'wirecheck'"); + "{\n" + " /* Wire protocol assertions generated by 'wirecheck'\n" + " * running on %s\n" + " * with %s */\n" + "\n", unameinfo, gccinfo); + BLANK_LINE (); COMMENT ("Constants..."); -- 1.8.3.1