From f81b68e6bbd956e000016bac453e53ca48584eab Mon Sep 17 00:00:00 2001 From: eeb Date: Wed, 11 May 2005 20:12:29 +0000 Subject: [PATCH] * multiple TCP networks pass initial tests --- lnet/include/lnet/lib-lnet.h | 2 +- lnet/include/lnet/lib-p30.h | 2 +- lnet/klnds/socklnd/socklnd.c | 35 +++++++++++++++++++++-------------- lnet/klnds/socklnd/socklnd.h | 7 ++++--- lnet/klnds/socklnd/socklnd_cb.c | 22 +++++++++++----------- lnet/libcfs/nidstrings.c | 12 +----------- lnet/lnet/api-ni.c | 14 +++++++++----- lnet/lnet/config.c | 11 ++++++++--- lnet/lnet/lib-move.c | 31 +++++++++++++++++++------------ lnet/lnet/lo.c | 6 +++--- lnet/lnet/module.c | 8 +++++--- 11 files changed, 83 insertions(+), 67 deletions(-) diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index f81e488..e6a8e72 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -412,7 +412,7 @@ ptl_ni_decref(ptl_ni_t *ni) } extern ptl_nal_t ptl_lonal; -extern ptl_ni_t ptl_loni; +extern ptl_ni_t *ptl_loni; extern ptl_err_t ptl_get_apinih (ptl_handle_ni_t *nih); diff --git a/lnet/include/lnet/lib-p30.h b/lnet/include/lnet/lib-p30.h index f81e488..e6a8e72 100644 --- a/lnet/include/lnet/lib-p30.h +++ b/lnet/include/lnet/lib-p30.h @@ -412,7 +412,7 @@ ptl_ni_decref(ptl_ni_t *ni) } extern ptl_nal_t ptl_lonal; -extern ptl_ni_t ptl_loni; +extern ptl_ni_t *ptl_loni; extern ptl_err_t ptl_get_apinih (ptl_handle_ni_t *nih); diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 5bccc6f..f60c567 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -636,9 +636,9 @@ ksocknal_local_ipvec (ptl_ni_t *ni, __u32 *ipaddrs) read_lock (&ksocknal_data.ksnd_global_lock); nip = net->ksnn_ninterfaces; - for (i = 0; i < nip; i++) { - LASSERT (i < PTL_MAX_INTERFACES); + LASSERT (nip < PTL_MAX_INTERFACES); + for (i = 0; i < nip; i++) { ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr; LASSERT (ipaddrs[i] != 0); } @@ -990,7 +990,6 @@ ksocknal_stop_listener(void) int ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) { - int passive = (type == SOCKNAL_CONN_NONE); rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; __u32 ipaddrs[PTL_MAX_INTERFACES]; int nipaddrs; @@ -1007,7 +1006,7 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) ksock_tx_t *tx; int rc; - LASSERT (route == NULL || !passive); + LASSERT (route == NULL == (type == SOCKNAL_CONN_NONE)); rc = ksocknal_lib_setup_sock (sock); if (rc != 0) @@ -1043,14 +1042,16 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) if (rc != 0) goto failed_1; - if (!passive) { + if (route != NULL) { + ptl_ni_t *ni = route->ksnr_peer->ksnp_ni; + ksock_net_t *net = ni->ni_data; + /* Active connection sends HELLO eagerly */ - rc = ksocknal_local_ipvec(route->ksnr_peer->ksnp_ni, ipaddrs); - if (rc < 0) - goto failed_1; - nipaddrs = rc; + nipaddrs = ksocknal_local_ipvec(ni, ipaddrs); - rc = ksocknal_send_hello (conn, ipaddrs, nipaddrs); + rc = ksocknal_send_hello (conn, ni->ni_nid, + net->ksnn_incarnation, + ipaddrs, nipaddrs); if (rc != 0) goto failed_1; } @@ -1108,14 +1109,20 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) write_unlock_irqrestore(global_lock, flags); } - if (!passive) { + if (route != NULL) { + /* additional routes after interface exchange? */ ksocknal_create_routes(peer, conn->ksnc_port, ipaddrs, nipaddrs); rc = 0; } else { - rc = ksocknal_select_ips(peer, ipaddrs, nipaddrs); - LASSERT (rc >= 0); - rc = ksocknal_send_hello (conn, ipaddrs, rc); + ptl_ni_t *ni = peer->ksnp_ni; + ksock_net_t *net = ni->ni_data; + + nipaddrs = ksocknal_select_ips(peer, ipaddrs, nipaddrs); + + rc = ksocknal_send_hello (conn, ni->ni_nid, + net->ksnn_incarnation, + ipaddrs, nipaddrs); } if (rc < 0) goto failed_2; diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index de3cb63..5936445 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -528,9 +528,10 @@ extern int ksocknal_new_packet (ksock_conn_t *conn, int skip); extern int ksocknal_scheduler (void *arg); extern int ksocknal_connd (void *arg); extern int ksocknal_reaper (void *arg); -extern int ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs); -extern int ksocknal_recv_hello (ksock_conn_t *conn, - ptl_nid_t *nid, __u64 *incarnation, __u32 *ipaddrs); +extern int ksocknal_send_hello (ksock_conn_t *conn, ptl_nid_t nid, + __u64 incarnation, __u32 *ipaddrs, int nipaddrs); +extern int ksocknal_recv_hello (ksock_conn_t *conn, ptl_nid_t *nid, + __u64 *incarnation, __u32 *ipaddrs); extern void ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn); extern void ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn); diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index 1172e72..1ff7bd4 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -1683,11 +1683,11 @@ void ksocknal_write_callback (ksock_conn_t *conn) } int -ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs) +ksocknal_send_hello (ksock_conn_t *conn, + ptl_nid_t srcnid, __u64 incarnation, + __u32 *ipaddrs, int nipaddrs) { /* CAVEAT EMPTOR: this byte flips 'ipaddrs' */ - ptl_ni_t *ni = conn->ksnc_peer->ksnp_ni; - ksock_net_t *net = ni->ni_data; struct socket *sock = conn->ksnc_sock; ptl_hdr_t hdr; ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid; @@ -1695,7 +1695,7 @@ ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs) int rc; LASSERT (conn->ksnc_type != SOCKNAL_CONN_NONE); - LASSERT (nipaddrs <= PTL_MAX_INTERFACES); + LASSERT (0 <= nipaddrs && nipaddrs <= PTL_MAX_INTERFACES); /* No need for getconnsock/putconnsock */ LASSERT (!conn->ksnc_closing); @@ -1705,14 +1705,18 @@ ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs) hmv->version_major = cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR); hmv->version_minor = cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR); - hdr.src_nid = cpu_to_le64 (ni->ni_nid); + hdr.src_nid = cpu_to_le64 (srcnid); hdr.type = cpu_to_le32 (PTL_MSG_HELLO); hdr.payload_length = cpu_to_le32 (nipaddrs * sizeof(*ipaddrs)); hdr.msg.hello.type = cpu_to_le32 (conn->ksnc_type); - hdr.msg.hello.incarnation = cpu_to_le64 (net->ksnn_incarnation); + hdr.msg.hello.incarnation = cpu_to_le64 (incarnation); - /* Receiver is eager */ + for (i = 0; i < nipaddrs; i++) { + ipaddrs[i] = __cpu_to_le32 (ipaddrs[i]); + } + + /* Receiver should be eager */ rc = ksocknal_lib_sock_write (sock, &hdr, sizeof(hdr)); if (rc != 0) { CERROR ("Error %d sending HELLO hdr to %u.%u.%u.%u/%d\n", @@ -1723,10 +1727,6 @@ ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs) if (nipaddrs == 0) return (0); - for (i = 0; i < nipaddrs; i++) { - ipaddrs[i] = __cpu_to_le32 (ipaddrs[i]); - } - rc = ksocknal_lib_sock_write (sock, ipaddrs, nipaddrs * sizeof(*ipaddrs)); if (rc != 0) CERROR ("Error %d sending HELLO payload (%d)" diff --git a/lnet/libcfs/nidstrings.c b/lnet/libcfs/nidstrings.c index 1d2be09..afbe53d 100644 --- a/lnet/libcfs/nidstrings.c +++ b/lnet/libcfs/nidstrings.c @@ -80,7 +80,6 @@ libcfs_next_nidstring (void) } #if !CRAY_PORTALS -static void libcfs_lo_addr2str(__u32 addr, char *str); static int libcfs_lo_str2addr(char *str, int nob, __u32 *addr); static void libcfs_ip_addr2str(__u32 addr, char *str); static int libcfs_ip_str2addr(char *str, int nob, __u32 *addr); @@ -99,7 +98,7 @@ static struct nalstrfns libcfs_nalstrfns[] = { {.nf_nal = LONAL, .nf_name = "lo", .nf_modname = "klonal", - .nf_addr2str = libcfs_lo_addr2str, + .nf_addr2str = libcfs_num_addr2str, .nf_str2addr = libcfs_lo_str2addr}, {.nf_nal = SOCKNAL, .nf_name = "tcp", @@ -140,18 +139,9 @@ static struct nalstrfns libcfs_nalstrfns[] = { const int libcfs_nnalstrfns = sizeof(libcfs_nalstrfns)/sizeof(libcfs_nalstrfns[0]); -void -libcfs_lo_addr2str(__u32 addr, char *str) -{ - /* don't print anything */ -} - int libcfs_lo_str2addr(char *str, int nob, __u32 *addr) { - if (nob != 0) /* expecting the empty string */ - return 0; - *addr = 0; return 1; } diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 79b47cc..89e075d 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -644,6 +644,7 @@ ptl_shutdown_nalnis (void) LASSERT (!in_interrupt()); atomic_dec(&ni->ni_nal->nal_refcount); (ni->ni_nal->nal_shutdown)(ni); + PORTAL_FREE(ni, sizeof(*ni)); PTL_LOCK(flags); @@ -763,6 +764,11 @@ PtlInit(int *max_interfaces) pthread_mutex_init(&ptl_apini.apini_api_mutex); #endif + ptl_apini.apini_init = 1; + + if (max_interfaces != NULL) + *max_interfaces = 1; + /* NALs in separate modules register themselves when their module * loads, and unregister themselves when their module is unloaded. * Otherwise they are plugged in explicitly here... */ @@ -771,10 +777,6 @@ PtlInit(int *max_interfaces) #ifndef __KERNEL__ ptl_register_nal (&tcpnal_nal); #endif - ptl_apini.apini_init = 1; - - if (max_interfaces != NULL) - *max_interfaces = 1; return PTL_OK; } @@ -785,10 +787,12 @@ PtlFini(void) LASSERT (ptl_apini.apini_init); LASSERT (ptl_apini.apini_refcount == 0); -#ifndef __KERNEL__ /* See comment where tcpnal_nal registers itself */ +#ifndef __KERNEL__ ptl_unregister_nal(&tcpnal_nal); #endif + ptl_unregister_nal(&ptl_lonal); + LASSERT (list_empty(&ptl_apini.apini_nals)); ptl_apini.apini_init = 0; diff --git a/lnet/lnet/config.c b/lnet/lnet/config.c index 1f8d105..3834c16 100644 --- a/lnet/lnet/config.c +++ b/lnet/lnet/config.c @@ -208,11 +208,16 @@ ptl_parse_networks(struct list_head *nilist, char *networks) memcpy (tokens, networks, tokensize); str = tokens; + PORTAL_ALLOC(ptl_loni, sizeof(*ptl_loni)); + if (ptl_loni == NULL) { + CERROR("Can't allocate LO NI\n"); + goto failed; + } /* Add in the loopback network */ /* zero counters/flags, NULL pointers... */ - memset(&ptl_loni, 0, sizeof(ptl_loni)); - ptl_loni.ni_nid = PTL_MKNID(PTL_MKNET(LONAL, 0), 0); - list_add_tail(&ptl_loni.ni_list, nilist); + memset(ptl_loni, 0, sizeof(*ptl_loni)); + ptl_loni->ni_nid = PTL_MKNID(PTL_MKNET(LONAL, 0), 0); + list_add_tail(&ptl_loni->ni_list, nilist); while (str != NULL && *str != 0) { char *comma = strchr(str, ','); diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index 408bded..6dd787d 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -576,7 +576,7 @@ ptl_send (ptl_ni_t *ni, void *private, ptl_msg_t *msg, ptl_libmd_t *md, ptl_size_t offset, ptl_size_t len) { ptl_nid_t gw_nid; - int routing; + int routing = 0; ptl_err_t rc; /* CAVEAT EMPTOR! ni != NULL == interface pre-determined (ACK) */ @@ -587,17 +587,23 @@ ptl_send (ptl_ni_t *ni, void *private, ptl_msg_t *msg, return PTL_FAIL; } - if (target.nid != ni->ni_nid) { - /* will gateway have to forward? */ - routing = (gw_nid != ni->ni_nid); - } else { - /* it's for me! */ - ptl_ni_addref(&ptl_loni); - ptl_ni_decref(ni); - ni = &ptl_loni; - routing = 0; + if (PTL_NETNAL(PTL_NIDNET(ni->ni_nid)) == LONAL) { + if (target.nid != ni->ni_nid) { + /* will gateway have to forward? */ + routing = (gw_nid != ni->ni_nid); + } else if (allow_destination_aliases) { + /* it's for me (force lonal) */ + ptl_ni_addref(ptl_loni); + ptl_ni_decref(ni); + ni = ptl_loni; + } else { + ptl_ni_decref(ni); + CERROR("Attempt to send to self via %s, not LONAL\n", + libcfs_nid2str(target.nid)); + return PTL_FAIL; + } } - + hdr->type = cpu_to_le32(type); hdr->dest_nid = cpu_to_le64(target.nid); hdr->dest_pid = cpu_to_le32(target.pid); @@ -1047,7 +1053,8 @@ ptl_parse(ptl_ni_t *ni, ptl_hdr_t *hdr, void *private) if (!allow_destination_aliases) { /* dest is another local NI; sender should have used * this node's NID on its own network */ - CERROR ("%s: Dropping message from %s: nid %s is a local alias\n", + CERROR ("%s: Dropping message from %s: nid %s " + "is a local alias\n", libcfs_nid2str(ni->ni_nid), libcfs_nid2str(le64_to_cpu(hdr->src_nid)), libcfs_nid2str(dest_nid)); diff --git a/lnet/lnet/lo.c b/lnet/lnet/lo.c index b58df75..4727e54 100644 --- a/lnet/lnet/lo.c +++ b/lnet/lnet/lo.c @@ -255,7 +255,7 @@ void lonal_shutdown(ptl_ni_t *ni) { CDEBUG (D_NET, "shutdown\n"); - LASSERT (ni == &ptl_loni); + LASSERT (ni == ptl_loni); LASSERT (lonal_instanced); lonal_instanced = 0; @@ -265,7 +265,7 @@ ptl_err_t lonal_startup (ptl_ni_t *ni) { LASSERT (ni->ni_nal == &ptl_lonal); - LASSERT (ni == &ptl_loni); + LASSERT (ni == ptl_loni); LASSERT (!lonal_instanced); lonal_instanced = 1; @@ -284,4 +284,4 @@ ptl_nal_t ptl_lonal = { #endif }; -ptl_ni_t ptl_loni; +ptl_ni_t *ptl_loni; diff --git a/lnet/lnet/module.c b/lnet/lnet/module.c index 7b9bd0c..a435fa2 100644 --- a/lnet/lnet/module.c +++ b/lnet/lnet/module.c @@ -78,7 +78,7 @@ static int init_kportals_module(void) ENTRY; rc = PtlInit(NULL); - if (rc) { + if (rc != PTL_OK) { CERROR("PtlInit: error %d\n", rc); RETURN(rc); } @@ -93,8 +93,10 @@ static int init_kportals_module(void) rc = PtlNIInit(PTL_IFACE_DEFAULT, LUSTRE_SRV_PTL_PID, NULL, NULL, &nih); if (rc != PTL_OK) { - PtlFini(); - return -ENETDOWN; + /* Can't PtlFini or fail now if I loaded NALs */ + PTL_MUTEX_DOWN(&ptl_apini.apini_api_mutex); + ptl_apini.apini_niinit_self = 0; + PTL_MUTEX_UP(&ptl_apini.apini_api_mutex); } } -- 1.8.3.1