From 4e215b719b184a1ccddcff5c0ec5c36fe00562d9 Mon Sep 17 00:00:00 2001 From: eeb Date: Tue, 5 Jul 2005 18:35:31 +0000 Subject: [PATCH] * Compiles after merging b1_4 * Changed socknal... - use PID as well as NID to match connections so userspace (tcpnal) clients can be distinguished without changing the NID format. - unprivileged port == userspace client - don't create new connections to userspace clients - derive the NID/PID of a userspace client from the remote IP/port * Changed tcpnal... - use non-privileged ports - no concept of own NID (peer assigns) - don't accept connections --- lnet/include/libcfs/darwin/kp30.h | 2 +- lnet/include/libcfs/darwin/libcfs.h | 2 +- lnet/include/libcfs/kp30.h | 6 +- lnet/include/libcfs/libcfs.h | 8 + lnet/include/libcfs/linux/kp30.h | 2 + lnet/include/libcfs/linux/libcfs.h | 2 +- lnet/include/lnet/lib-lnet.h | 2 + lnet/include/lnet/lib-p30.h | 2 + lnet/klnds/openiblnd/openiblnd_cb.c | 2 +- lnet/klnds/qswlnd/qswlnd.h | 2 +- lnet/klnds/qswlnd/qswlnd_cb.c | 3 +- lnet/klnds/socklnd/socklnd.c | 200 +++++++++-------- lnet/klnds/socklnd/socklnd.h | 14 +- lnet/klnds/socklnd/socklnd_cb.c | 115 ++++++---- lnet/klnds/socklnd/socklnd_lib-darwin.c | 8 +- lnet/libcfs/darwin/darwin-proc.c | 1 + lnet/libcfs/debug.c | 3 + lnet/libcfs/linux/linux-proc.c | 3 + lnet/libcfs/nidstrings.c | 32 +-- lnet/lnet/acceptor.c | 12 +- lnet/lnet/api-ni.c | 2 +- lnet/lnet/lib-msg.c | 5 + lnet/lnet/module.c | 1 + lnet/lnet/router.c | 7 +- lnet/ulnds/Makefile.am | 7 +- lnet/ulnds/address.c | 147 ------------ lnet/ulnds/connection.c | 386 ++++++++++++++++++-------------- lnet/ulnds/connection.h | 26 +-- lnet/ulnds/ipmap.h | 38 ---- lnet/ulnds/procapi.c | 22 +- lnet/ulnds/procbridge.h | 2 - lnet/ulnds/proclib.c | 4 - lnet/ulnds/socklnd/Makefile.am | 7 +- lnet/ulnds/socklnd/address.c | 147 ------------ lnet/ulnds/socklnd/connection.c | 386 ++++++++++++++++++-------------- lnet/ulnds/socklnd/connection.h | 26 +-- lnet/ulnds/socklnd/ipmap.h | 38 ---- lnet/ulnds/socklnd/procapi.c | 22 +- lnet/ulnds/socklnd/procbridge.h | 2 - lnet/ulnds/socklnd/proclib.c | 4 - lnet/ulnds/socklnd/table.c | 2 +- lnet/ulnds/socklnd/table.h | 5 +- lnet/ulnds/socklnd/tcplnd.c | 8 +- lnet/ulnds/table.c | 2 +- lnet/ulnds/table.h | 5 +- lnet/ulnds/tcplnd.c | 8 +- lnet/utils/portals.c | 24 +- 47 files changed, 797 insertions(+), 957 deletions(-) delete mode 100644 lnet/ulnds/address.c delete mode 100644 lnet/ulnds/ipmap.h delete mode 100644 lnet/ulnds/socklnd/address.c delete mode 100644 lnet/ulnds/socklnd/ipmap.h diff --git a/lnet/include/libcfs/darwin/kp30.h b/lnet/include/libcfs/darwin/kp30.h index a95d769..fa1e10e 100644 --- a/lnet/include/libcfs/darwin/kp30.h +++ b/lnet/include/libcfs/darwin/kp30.h @@ -32,7 +32,7 @@ #define LASSERT_SPIN_LOCKED(lock) do {} while(0) #endif -#define LBUG_WITH_LOC(file, func, line) do {} while(0) +#define LBUG_WITH_LOC(file, func, line) do {portals_catastrophe = 1;} while(0) /* --------------------------------------------------------------------- */ diff --git a/lnet/include/libcfs/darwin/libcfs.h b/lnet/include/libcfs/darwin/libcfs.h index 8e4eb89..6ec2486 100644 --- a/lnet/include/libcfs/darwin/libcfs.h +++ b/lnet/include/libcfs/darwin/libcfs.h @@ -155,7 +155,7 @@ __entry_nesting(&__cdd); /* ENTRY_NESTING_SUPPORT */ #endif -#define LUSTRE_PTL_PID 12345 +#define LUSTRE_PTL_PID 456 /* <= 1023 (TCP reserved port) */ #define _XNU_LIBCFS_H diff --git a/lnet/include/libcfs/kp30.h b/lnet/include/libcfs/kp30.h index d99a977..0a7d73c 100644 --- a/lnet/include/libcfs/kp30.h +++ b/lnet/include/libcfs/kp30.h @@ -252,7 +252,7 @@ struct portals_device_userstate * USER LEVEL STUFF BELOW */ -#define PORTAL_IOCTL_VERSION 0x00010009 +#define PORTAL_IOCTL_VERSION 0x0001000a struct portal_ioctl_data { __u32 ioc_len; @@ -264,7 +264,7 @@ struct portal_ioctl_data { __u32 ioc_flags; __u32 ioc_count; __u32 ioc_net; - __u32 ioc_u32[6]; + __u32 ioc_u32[7]; __u32 ioc_inllen1; char *ioc_inlbuf1; @@ -452,8 +452,6 @@ enum { RANAL = 8, }; -#define PTL_NALFMT_SIZE 32 /* %u:%u.%u.%u.%u,%u (10+4+4+4+3+5+1) */ - enum { DEBUG_DAEMON_START = 1, DEBUG_DAEMON_STOP = 2, diff --git a/lnet/include/libcfs/libcfs.h b/lnet/include/libcfs/libcfs.h index e75e31e..bbd77f3 100644 --- a/lnet/include/libcfs/libcfs.h +++ b/lnet/include/libcfs/libcfs.h @@ -38,6 +38,9 @@ extern unsigned int portal_stack; extern unsigned int portal_debug; extern unsigned int portal_printk; +/* Has there been an LBUG? */ +extern unsigned int portals_catastrophe; + /* * struct ptldebug_header is defined in libcfs//libcfs.h */ @@ -221,6 +224,7 @@ do { \ #endif /* !CDEBUG_ENTRY_EXIT */ #else /* !1 */ +#define CDEBUG_LIMIT(mask, format, a...) do { } while (0) #define CDEBUG(mask, format, a...) do { } while (0) #define CWARN(format, a...) printk(KERN_WARNING format, ## a) #define CERROR(format, a...) printk(KERN_ERR format, ## a) @@ -236,6 +240,7 @@ do { \ #define EXIT do { } while (0) #endif /* !1 */ #else /* !__KERNEL__ */ +#define CDEBUG_LIMIT(mask, format, a...) do { } while (0) #define CDEBUG(mask, format, a...) do { } while (0) #define LCONSOLE(mask, format, a...) fprintf(stderr, format, ## a) #define CWARN(format, a...) fprintf(stderr, format, ## a) @@ -274,6 +279,9 @@ int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand); int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand); /* libcfs tcpip */ +#define PTL_ACCEPTOR_MIN_RESERVED_PORT 512 +#define PTL_ACCEPTOR_MAX_RESERVED_PORT 1023 + int libcfs_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask); int libcfs_ipif_enumerate(char ***names); void libcfs_ipif_free_enumeration(char **names, int n); diff --git a/lnet/include/libcfs/linux/kp30.h b/lnet/include/libcfs/linux/kp30.h index e495cf7..01052fc 100644 --- a/lnet/include/libcfs/linux/kp30.h +++ b/lnet/include/libcfs/linux/kp30.h @@ -93,6 +93,7 @@ static inline void our_cond_resched(void) #define LBUG_WITH_LOC(file, func, line) \ do { \ CEMERG("LBUG - trying to dump log to /tmp/lustre-log\n"); \ + portals_catastrophe = 1; \ portals_debug_dumplog(); \ portals_run_lbug_upcall(file, func, line); \ panic("LBUG"); \ @@ -101,6 +102,7 @@ do { \ #define LBUG_WITH_LOC(file, func, line) \ do { \ CEMERG("LBUG\n"); \ + portals_catastrophe = 1; \ portals_debug_dumpstack(NULL); \ portals_debug_dumplog(); \ portals_run_lbug_upcall(file, func, line); \ diff --git a/lnet/include/libcfs/linux/libcfs.h b/lnet/include/libcfs/linux/libcfs.h index e62ac48..12d6204 100644 --- a/lnet/include/libcfs/linux/libcfs.h +++ b/lnet/include/libcfs/linux/libcfs.h @@ -127,7 +127,7 @@ struct ptldebug_header { * */ #define LUSTRE_PTL_PID 9 # else -#define LUSTRE_PTL_PID 12345 +#define LUSTRE_PTL_PID 456 /* <= 1023 (TCP reserved port) */ # endif #define ENTRY_NESTING_SUPPORT (0) diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 7f5a6dc..7781d22 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -467,6 +467,8 @@ extern int ptl_extract_kiov (int dst_niov, ptl_kiov_t *dst, int src_niov, ptl_kiov_t *src, ptl_size_t offset, ptl_size_t len); +extern ptl_pid_t ptl_getpid(void); + extern ptl_err_t ptl_recv (ptl_ni_t *ni, void *private, ptl_msg_t *msg, ptl_libmd_t *md, ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen); extern ptl_err_t ptl_send (ptl_ni_t *ni, void *private, ptl_msg_t *msg, diff --git a/lnet/include/lnet/lib-p30.h b/lnet/include/lnet/lib-p30.h index 7f5a6dc..7781d22 100644 --- a/lnet/include/lnet/lib-p30.h +++ b/lnet/include/lnet/lib-p30.h @@ -467,6 +467,8 @@ extern int ptl_extract_kiov (int dst_niov, ptl_kiov_t *dst, int src_niov, ptl_kiov_t *src, ptl_size_t offset, ptl_size_t len); +extern ptl_pid_t ptl_getpid(void); + extern ptl_err_t ptl_recv (ptl_ni_t *ni, void *private, ptl_msg_t *msg, ptl_libmd_t *md, ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen); extern ptl_err_t ptl_send (ptl_ni_t *ni, void *private, ptl_msg_t *msg, diff --git a/lnet/klnds/openiblnd/openiblnd_cb.c b/lnet/klnds/openiblnd/openiblnd_cb.c index d05ba14..e629c53 100644 --- a/lnet/klnds/openiblnd/openiblnd_cb.c +++ b/lnet/klnds/openiblnd/openiblnd_cb.c @@ -964,7 +964,7 @@ kibnal_launch_tx (kib_tx_t *tx, ptl_nid_t nid) write_unlock_irqrestore (g_lock, flags); if (retry) { - CERROR("Can't find per %s\n", libcfs_nid2str(nid)); + CERROR("Can't find peer %s\n", libcfs_nid2str(nid)); tx->tx_status = -EHOSTUNREACH; kibnal_tx_done (tx); return; diff --git a/lnet/klnds/qswlnd/qswlnd.h b/lnet/klnds/qswlnd/qswlnd.h index 77ae7f6..36ff15e 100644 --- a/lnet/klnds/qswlnd/qswlnd.h +++ b/lnet/klnds/qswlnd/qswlnd.h @@ -290,7 +290,7 @@ kqswnal_elanid2nid (int elanid) static inline int kqswnal_nid2elanid (ptl_nid_t nid) { - int elanid = PTL_NIDADDR(nid); + __u32 elanid = PTL_NIDADDR(nid); /* not in this cluster? */ return (elanid >= kqswnal_data.kqn_nnodes) ? -1 : elanid; diff --git a/lnet/klnds/qswlnd/qswlnd_cb.c b/lnet/klnds/qswlnd/qswlnd_cb.c index 6001e06..67a52d6 100644 --- a/lnet/klnds/qswlnd/qswlnd_cb.c +++ b/lnet/klnds/qswlnd/qswlnd_cb.c @@ -1513,8 +1513,7 @@ kqswnal_parse (kqswnal_rx_t *krx) dest_nid = le64_to_cpu(hdr->dest_nid); /* final dest */ src_nid = le64_to_cpu(hdr->src_nid); /* original source */ - sender_nid = PTL_MKNID(PTL_NIDNET(kqswnal_data.kqn_ni->ni_nid), - ep_rxd_node(krx->krx_rxd)); /* who sent it to me */ + sender_nid = kqswnal_elanid2nid(ep_rxd_node(krx->krx_rxd)); /* who sent it to me */ #if KQSW_CHECKSUM LASSERTF (0, "checksums for forwarded packets not implemented\n"); #endif diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index a552472..09138c6 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -94,13 +94,14 @@ ksocknal_destroy_route (ksock_route_t *route) } int -ksocknal_create_peer (ksock_peer_t **peerp, ptl_ni_t *ni, ptl_nid_t nid) +ksocknal_create_peer (ksock_peer_t **peerp, ptl_ni_t *ni, ptl_process_id_t id) { ksock_net_t *net = ni->ni_data; ksock_peer_t *peer; unsigned long flags; - LASSERT (nid != PTL_NID_ANY); + LASSERT (id.nid != PTL_NID_ANY); + LASSERT (id.pid != PTL_PID_ANY); LASSERT (!in_interrupt()); PORTAL_ALLOC (peer, sizeof (*peer)); @@ -110,7 +111,7 @@ ksocknal_create_peer (ksock_peer_t **peerp, ptl_ni_t *ni, ptl_nid_t nid) memset (peer, 0, sizeof (*peer)); /* NULL pointers/clear flags etc */ peer->ksnp_ni = ni; - peer->ksnp_nid = nid; + peer->ksnp_id = id; atomic_set (&peer->ksnp_refcount, 1); /* 1 ref for caller */ peer->ksnp_closing = 0; CFS_INIT_LIST_HEAD (&peer->ksnp_conns); @@ -142,7 +143,7 @@ ksocknal_destroy_peer (ksock_peer_t *peer) unsigned long flags; CDEBUG (D_NET, "peer %s %p deleted\n", - libcfs_nid2str(peer->ksnp_nid), peer); + libcfs_id2str(peer->ksnp_id), peer); LASSERT (atomic_read (&peer->ksnp_refcount) == 0); LASSERT (list_empty (&peer->ksnp_conns)); @@ -161,9 +162,9 @@ ksocknal_destroy_peer (ksock_peer_t *peer) } ksock_peer_t * -ksocknal_find_peer_locked (ptl_ni_t *ni, ptl_nid_t nid) +ksocknal_find_peer_locked (ptl_ni_t *ni, ptl_process_id_t id) { - struct list_head *peer_list = ksocknal_nid2peerlist (nid); + struct list_head *peer_list = ksocknal_nid2peerlist(id.nid); struct list_head *tmp; ksock_peer_t *peer; @@ -176,11 +177,12 @@ ksocknal_find_peer_locked (ptl_ni_t *ni, ptl_nid_t nid) if (peer->ksnp_ni != ni) continue; - if (peer->ksnp_nid != nid) + if (peer->ksnp_id.nid != id.nid || + peer->ksnp_id.pid != id.pid) continue; CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n", - peer, libcfs_nid2str(nid), + peer, libcfs_id2str(id), atomic_read(&peer->ksnp_refcount)); return (peer); } @@ -188,12 +190,12 @@ ksocknal_find_peer_locked (ptl_ni_t *ni, ptl_nid_t nid) } ksock_peer_t * -ksocknal_find_peer (ptl_ni_t *ni, ptl_nid_t nid) +ksocknal_find_peer (ptl_ni_t *ni, ptl_process_id_t id) { ksock_peer_t *peer; read_lock (&ksocknal_data.ksnd_global_lock); - peer = ksocknal_find_peer_locked (ni, nid); + peer = ksocknal_find_peer_locked (ni, id); if (peer != NULL) /* +1 ref for caller? */ ksocknal_peer_addref(peer); read_unlock (&ksocknal_data.ksnd_global_lock); @@ -225,7 +227,7 @@ ksocknal_unlink_peer_locked (ksock_peer_t *peer) int ksocknal_get_peer_info (ptl_ni_t *ni, int index, - ptl_nid_t *nid, __u32 *myip, __u32 *peer_ip, int *port, + ptl_process_id_t *id, __u32 *myip, __u32 *peer_ip, int *port, int *conn_count, int *share_count) { ksock_peer_t *peer; @@ -251,7 +253,7 @@ ksocknal_get_peer_info (ptl_ni_t *ni, int index, if (index-- > 0) continue; - *nid = peer->ksnp_nid; + *id = peer->ksnp_id; *myip = 0; *peer_ip = 0; *port = 0; @@ -265,7 +267,7 @@ ksocknal_get_peer_info (ptl_ni_t *ni, int index, if (index-- > 0) continue; - *nid = peer->ksnp_nid; + *id = peer->ksnp_id; *myip = peer->ksnp_passive_ips[j]; *peer_ip = 0; *port = 0; @@ -282,7 +284,7 @@ ksocknal_get_peer_info (ptl_ni_t *ni, int index, route = list_entry(rtmp, ksock_route_t, ksnr_list); - *nid = peer->ksnp_nid; + *id = peer->ksnp_id; *myip = route->ksnr_myipaddr; *peer_ip = route->ksnr_ipaddr; *port = route->ksnr_port; @@ -312,13 +314,13 @@ ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn) if (route->ksnr_myipaddr == 0) { /* route wasn't bound locally yet (the initial route) */ CDEBUG(D_NET, "Binding %s %u.%u.%u.%u to %u.%u.%u.%u\n", - libcfs_nid2str(peer->ksnp_nid), + libcfs_id2str(peer->ksnp_id), HIPQUAD(route->ksnr_ipaddr), HIPQUAD(conn->ksnc_myipaddr)); } else { CDEBUG(D_NET, "Rebinding %s %u.%u.%u.%u from " "%u.%u.%u.%u to %u.%u.%u.%u\n", - libcfs_nid2str(peer->ksnp_nid), + libcfs_id2str(peer->ksnp_id), HIPQUAD(route->ksnr_ipaddr), HIPQUAD(route->ksnr_myipaddr), HIPQUAD(conn->ksnc_myipaddr)); @@ -361,7 +363,7 @@ ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route) if (route2->ksnr_ipaddr == route->ksnr_ipaddr) { CERROR ("Duplicate route %s %u.%u.%u.%u\n", - libcfs_nid2str(peer->ksnp_nid), + libcfs_id2str(peer->ksnp_id), HIPQUAD(route->ksnr_ipaddr)); LBUG(); } @@ -425,7 +427,7 @@ ksocknal_del_route_locked (ksock_route_t *route) } int -ksocknal_add_peer (ptl_ni_t *ni, ptl_nid_t nid, __u32 ipaddr, int port) +ksocknal_add_peer (ptl_ni_t *ni, ptl_process_id_t id, __u32 ipaddr, int port) { unsigned long flags; struct list_head *tmp; @@ -435,11 +437,12 @@ ksocknal_add_peer (ptl_ni_t *ni, ptl_nid_t nid, __u32 ipaddr, int port) ksock_route_t *route2; int rc; - if (nid == PTL_NID_ANY) + if (id.nid == PTL_NID_ANY || + id.pid == PTL_PID_ANY) return (-EINVAL); /* Have a brand new peer ready... */ - rc = ksocknal_create_peer(&peer, ni, nid); + rc = ksocknal_create_peer(&peer, ni, id); if (rc != 0) return rc; @@ -451,14 +454,14 @@ ksocknal_add_peer (ptl_ni_t *ni, ptl_nid_t nid, __u32 ipaddr, int port) write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); - peer2 = ksocknal_find_peer_locked (ni, nid); + peer2 = ksocknal_find_peer_locked (ni, id); if (peer2 != NULL) { ksocknal_peer_decref(peer); peer = peer2; } else { /* peer table takes my ref on peer */ list_add_tail (&peer->ksnp_list, - ksocknal_nid2peerlist (nid)); + ksocknal_nid2peerlist (id.nid)); } route2 = NULL; @@ -539,7 +542,7 @@ ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip) } int -ksocknal_del_peer (ptl_ni_t *ni, ptl_nid_t nid, __u32 ip) +ksocknal_del_peer (ptl_ni_t *ni, ptl_process_id_t id, __u32 ip) { unsigned long flags; struct list_head *ptmp; @@ -552,8 +555,8 @@ ksocknal_del_peer (ptl_ni_t *ni, ptl_nid_t nid, __u32 ip) write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); - if (nid != PTL_NID_ANY) - lo = hi = ksocknal_nid2peerlist(nid) - ksocknal_data.ksnd_peers; + if (id.nid != PTL_NID_ANY) + lo = hi = ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers; else { lo = 0; hi = ksocknal_data.ksnd_peer_hash_size - 1; @@ -566,7 +569,8 @@ ksocknal_del_peer (ptl_ni_t *ni, ptl_nid_t nid, __u32 ip) if (peer->ksnp_ni != ni) continue; - if (!(nid == PTL_NID_ANY || peer->ksnp_nid == nid)) + if (!((id.nid == PTL_NID_ANY || peer->ksnp_id.nid == id.nid) && + (id.pid == PTL_PID_ANY || peer->ksnp_id.pid == id.pid))) continue; ksocknal_del_peer_locked (peer, ip); @@ -953,7 +957,7 @@ ksocknal_create_conn (ptl_ni_t *ni, ksock_route_t *route, ksock_net_t *net = (ksock_net_t *)ni->ni_data; __u32 ipaddrs[PTL_MAX_INTERFACES]; int nipaddrs; - ptl_nid_t nid; + ptl_process_id_t peerid; struct list_head *tmp; __u64 incarnation; unsigned long flags; @@ -1017,12 +1021,18 @@ ksocknal_create_conn (ptl_ni_t *ni, ksock_route_t *route, * vector of interfaces she's willing to let me connect to. * Passive connections use the listener timeout since the peer sends * eagerly */ - nid = (route == NULL) ? PTL_NID_ANY : route->ksnr_peer->ksnp_nid; - rc = ksocknal_recv_hello (ni, conn, &nid, &incarnation, ipaddrs); + if (route == NULL) { + peerid.nid = PTL_NID_ANY; + peerid.pid = PTL_PID_ANY; + } else { + peerid = route->ksnr_peer->ksnp_id; + } + + rc = ksocknal_recv_hello (ni, conn, &peerid, &incarnation, ipaddrs); if (rc < 0) goto failed_1; nipaddrs = rc; - LASSERT (nid != PTL_NID_ANY); + LASSERT (peerid.nid != PTL_NID_ANY); if (route != NULL) { peer = route->ksnr_peer; @@ -1033,18 +1043,18 @@ ksocknal_create_conn (ptl_ni_t *ni, ksock_route_t *route, ipaddrs, nipaddrs); rc = 0; } else { - rc = ksocknal_create_peer(&peer, ni, nid); + rc = ksocknal_create_peer(&peer, ni, peerid); if (rc != 0) goto failed_1; write_lock_irqsave(global_lock, flags); - peer2 = ksocknal_find_peer_locked(ni, nid); + peer2 = ksocknal_find_peer_locked(ni, peerid); if (peer2 == NULL) { /* NB this puts an "empty" peer in the peer * table (which takes my ref) */ list_add_tail(&peer->ksnp_list, - ksocknal_nid2peerlist(nid)); + ksocknal_nid2peerlist(peerid.nid)); } else { ksocknal_peer_decref(peer); peer = peer2; @@ -1095,7 +1105,7 @@ ksocknal_create_conn (ptl_ni_t *ni, ksock_route_t *route, if (route != NULL && route->ksnr_ipaddr != conn->ksnc_ipaddr) { CERROR("Route %s %u.%u.%u.%u connected to %u.%u.%u.%u\n", - libcfs_nid2str(peer->ksnp_nid), + libcfs_id2str(peer->ksnp_id), HIPQUAD(route->ksnr_ipaddr), HIPQUAD(conn->ksnc_ipaddr)); } @@ -1147,8 +1157,8 @@ ksocknal_create_conn (ptl_ni_t *ni, ksock_route_t *route, rc = ksocknal_close_stale_conns_locked(peer, incarnation); if (rc != 0) - CERROR ("Closed %d stale conns to nid %s ip %d.%d.%d.%d\n", - rc, libcfs_nid2str(conn->ksnc_peer->ksnp_nid), + CERROR ("Closed %d stale conns to %s ip %d.%d.%d.%d\n", + rc, libcfs_id2str(conn->ksnc_peer->ksnp_id), HIPQUAD(conn->ksnc_ipaddr)); write_unlock_irqrestore (global_lock, flags); @@ -1163,7 +1173,7 @@ ksocknal_create_conn (ptl_ni_t *ni, ksock_route_t *route, CDEBUG(D_NET, "New conn %s %u.%u.%u.%u -> %u.%u.%u.%u/%d" " incarnation:"LPD64" sched[%d]/%d\n", - libcfs_nid2str(nid), HIPQUAD(conn->ksnc_myipaddr), + libcfs_id2str(peerid), HIPQUAD(conn->ksnc_myipaddr), HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port, incarnation, (int)(conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers), irq); @@ -1320,7 +1330,7 @@ ksocknal_terminate_conn (ksock_conn_t *conn) ksocknal_connsock_decref(conn); if (notify) - kpr_notify (peer->ksnp_ni, peer->ksnp_nid, 0, then); + kpr_notify (peer->ksnp_ni, peer->ksnp_id.nid, 0, then); } void @@ -1357,7 +1367,7 @@ ksocknal_destroy_conn (ksock_conn_t *conn) case SOCKNAL_RX_BODY: CERROR("Completing partial receive from %s" ", ip %d.%d.%d.%d:%d, with error\n", - libcfs_nid2str(conn->ksnc_peer->ksnp_nid), + libcfs_id2str(conn->ksnc_peer->ksnp_id), HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); ptl_finalize (conn->ksnc_peer->ksnp_ni, NULL, conn->ksnc_cookie, PTL_FAIL); @@ -1416,7 +1426,7 @@ ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation) CDEBUG(D_NET, "Closing stale conn %s ip:%08x/%d " "incarnation:"LPD64"("LPD64")\n", - libcfs_nid2str(peer->ksnp_nid), + libcfs_id2str(peer->ksnp_id), conn->ksnc_ipaddr, conn->ksnc_port, conn->ksnc_incarnation, incarnation); @@ -1445,7 +1455,7 @@ ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why) } int -ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr) +ksocknal_close_matching_conns (ptl_process_id_t id, __u32 ipaddr) { unsigned long flags; ksock_peer_t *peer; @@ -1458,8 +1468,8 @@ ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr) write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); - if (nid != PTL_NID_ANY) - lo = hi = ksocknal_nid2peerlist(nid) - ksocknal_data.ksnd_peers; + if (id.nid != PTL_NID_ANY) + lo = hi = ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers; else { lo = 0; hi = ksocknal_data.ksnd_peer_hash_size - 1; @@ -1470,7 +1480,8 @@ ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr) peer = list_entry (ptmp, ksock_peer_t, ksnp_list); - if (!(nid == PTL_NID_ANY || nid == peer->ksnp_nid)) + if (!((id.nid == PTL_NID_ANY || id.nid == peer->ksnp_id.nid) && + (id.pid == PTL_PID_ANY || id.pid == peer->ksnp_id.pid))) continue; count += ksocknal_close_peer_conns_locked (peer, ipaddr, 0); @@ -1480,7 +1491,7 @@ ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr) write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); /* wildcards always succeed */ - if (nid == PTL_NID_ANY || ipaddr == 0) + if (id.nid == PTL_NID_ANY || id.pid == PTL_PID_ANY || ipaddr == 0) return (0); return (count == 0 ? -ENOENT : 0); @@ -1491,13 +1502,14 @@ ksocknal_notify (ptl_ni_t *ni, ptl_nid_t gw_nid, int alive) { /* The router is telling me she's been notified of a change in * gateway state.... */ + ptl_process_id_t id = {.nid = gw_nid, .pid = PTL_PID_ANY}; CDEBUG (D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid), alive ? "up" : "down"); if (!alive) { /* If the gateway crashed, close all open connections... */ - ksocknal_close_matching_conns (gw_nid, 0); + ksocknal_close_matching_conns (id, 0); return; } @@ -1538,7 +1550,7 @@ ksocknal_push_peer (ksock_peer_t *peer) } int -ksocknal_push (ptl_ni_t *ni, ptl_nid_t nid) +ksocknal_push (ptl_ni_t *ni, ptl_process_id_t id) { ksock_peer_t *peer; struct list_head *tmp; @@ -1547,17 +1559,6 @@ ksocknal_push (ptl_ni_t *ni, ptl_nid_t nid) int j; int rc = -ENOENT; - if (nid != PTL_NID_ANY) { - peer = ksocknal_find_peer (ni, nid); - - if (peer != NULL) { - rc = 0; - ksocknal_push_peer (peer); - ksocknal_peer_decref(peer); - } - return (rc); - } - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { for (j = 0; ; j++) { read_lock (&ksocknal_data.ksnd_global_lock); @@ -1566,9 +1567,18 @@ ksocknal_push (ptl_ni_t *ni, ptl_nid_t nid) peer = NULL; list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) { + peer = list_entry(tmp, ksock_peer_t, + ksnp_list); + + if (!((id.nid == PTL_NID_ANY || + id.nid == peer->ksnp_id.nid) && + (id.pid == PTL_PID_ANY || + id.pid == peer->ksnp_id.pid))) { + peer = NULL; + continue; + } + if (index++ == j) { - peer = list_entry(tmp, ksock_peer_t, - ksnp_list); ksocknal_peer_addref(peer); break; } @@ -1776,39 +1786,42 @@ ksocknal_ctl(ptl_ni_t *ni, unsigned int cmd, void *arg) data->ioc_u32[0]); /* IP address */ case IOC_PORTAL_GET_PEER: { - ptl_nid_t nid = 0; - __u32 myip = 0; - __u32 ip = 0; - int port = 0; - int conn_count = 0; - int share_count = 0; + ptl_process_id_t id = {0,}; + __u32 myip = 0; + __u32 ip = 0; + int port = 0; + int conn_count = 0; + int share_count = 0; rc = ksocknal_get_peer_info(ni, data->ioc_count, - &nid, &myip, &ip, &port, + &id, &myip, &ip, &port, &conn_count, &share_count); if (rc != 0) return rc; - data->ioc_nid = nid; + data->ioc_nid = id.nid; data->ioc_count = share_count; data->ioc_u32[0] = ip; data->ioc_u32[1] = port; data->ioc_u32[2] = myip; data->ioc_u32[3] = conn_count; + data->ioc_u32[4] = id.pid; return 0; } - case IOC_PORTAL_ADD_PEER: - return ksocknal_add_peer (ni, - data->ioc_nid, + case IOC_PORTAL_ADD_PEER: { + ptl_process_id_t id = {.nid = data->ioc_nid, + .pid = LUSTRE_SRV_PTL_PID}; + return ksocknal_add_peer (ni, id, data->ioc_u32[0], /* IP */ data->ioc_u32[1]); /* port */ - - case IOC_PORTAL_DEL_PEER: - return ksocknal_del_peer (ni, - data->ioc_nid, + } + case IOC_PORTAL_DEL_PEER: { + ptl_process_id_t id = {.nid = data->ioc_nid, + .pid = PTL_PID_ANY}; + return ksocknal_del_peer (ni, id, data->ioc_u32[0]); /* IP */ - + } case IOC_PORTAL_GET_CONN: { int txmem; int rxmem; @@ -1821,7 +1834,7 @@ ksocknal_ctl(ptl_ni_t *ni, unsigned int cmd, void *arg) ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle); data->ioc_count = txmem; - data->ioc_nid = conn->ksnc_peer->ksnp_nid; + data->ioc_nid = conn->ksnc_peer->ksnp_id.nid; data->ioc_flags = nagle; data->ioc_u32[0] = conn->ksnc_ipaddr; data->ioc_u32[1] = conn->ksnc_port; @@ -1830,14 +1843,18 @@ ksocknal_ctl(ptl_ni_t *ni, unsigned int cmd, void *arg) data->ioc_u32[4] = conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers; data->ioc_u32[5] = rxmem; + data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid; ksocknal_conn_decref(conn); return 0; } - case IOC_PORTAL_CLOSE_CONNECTION: - return ksocknal_close_matching_conns (data->ioc_nid, - data->ioc_u32[0]); + case IOC_PORTAL_CLOSE_CONNECTION: { + ptl_process_id_t id = {.nid = data->ioc_nid, + .pid = PTL_PID_ANY}; + return ksocknal_close_matching_conns (id, + data->ioc_u32[0]); + } case IOC_PORTAL_REGISTER_MYNID: /* Ignore if this is a noop */ if (data->ioc_nid == ni->ni_nid) @@ -1848,9 +1865,12 @@ ksocknal_ctl(ptl_ni_t *ni, unsigned int cmd, void *arg) libcfs_nid2str(ni->ni_nid)); return -EINVAL; - case IOC_PORTAL_PUSH_CONNECTION: - return ksocknal_push (ni, data->ioc_nid); - + case IOC_PORTAL_PUSH_CONNECTION: { + ptl_process_id_t id = {.nid = data->ioc_nid, + .pid = PTL_PID_ANY}; + + return ksocknal_push(ni, id); + } default: return -EINVAL; } @@ -2146,9 +2166,11 @@ ksocknal_base_startup (void) void ksocknal_shutdown (ptl_ni_t *ni) { - ksock_net_t *net = ni->ni_data; - int i; - unsigned long flags; + ksock_net_t *net = ni->ni_data; + int i; + unsigned long flags; + ptl_process_id_t anyid = {.nid = PTL_NID_ANY, + .pid = PTL_PID_ANY}; LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL); LASSERT(ksocknal_data.ksnd_nnets > 0); @@ -2158,7 +2180,7 @@ ksocknal_shutdown (ptl_ni_t *ni) spin_unlock_irqrestore(&net->ksnn_lock, flags); /* Delete all peers */ - ksocknal_del_peer(ni, PTL_NID_ANY, 0); + ksocknal_del_peer(ni, anyid, 0); /* Wait for all peer state to clean up */ i = 2; @@ -2333,7 +2355,9 @@ ksocknal_module_init (void) CLASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t)); /* check ksnr_connected/connecting field large enough */ CLASSERT(SOCKNAL_CONN_NTYPES <= 4); - + /* kernel PID should be in the "secure" TCP port range */ + CLASSERT(LUSTRE_SRV_PTL_PID <= PTL_ACCEPTOR_MAX_RESERVED_PORT); + rc = ksocknal_lib_tunables_init(); if (rc != 0) return rc; diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index d2789de..539c6de 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -365,7 +365,7 @@ typedef struct ksock_route typedef struct ksock_peer { struct list_head ksnp_list; /* stash on global peer list */ - ptl_nid_t ksnp_nid; /* who's on the other end(s) */ + ptl_process_id_t ksnp_id; /* who's on the other end(s) */ atomic_t ksnp_refcount; /* # users */ int ksnp_sharecount; /* lconf usage counter */ int ksnp_closing; /* being closed */ @@ -499,9 +499,9 @@ ptl_err_t ksocknal_recv_pages(ptl_ni_t *ni, void *private, size_t mlen, size_t rlen); ptl_err_t ksocknal_accept(ptl_ni_t *ni, struct socket *sock); -extern int ksocknal_add_peer(ptl_ni_t *ni, ptl_nid_t nid, __u32 ip, int port); -extern ksock_peer_t *ksocknal_find_peer_locked (ptl_ni_t *ni, ptl_nid_t nid); -extern ksock_peer_t *ksocknal_find_peer (ptl_ni_t *ni, ptl_nid_t nid); +extern int ksocknal_add_peer(ptl_ni_t *ni, ptl_process_id_t id, __u32 ip, int port); +extern ksock_peer_t *ksocknal_find_peer_locked (ptl_ni_t *ni, ptl_process_id_t id); +extern ksock_peer_t *ksocknal_find_peer (ptl_ni_t *ni, ptl_process_id_t id); extern int ksocknal_create_conn (ptl_ni_t *ni, ksock_route_t *route, struct socket *sock, int type); extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why); @@ -509,7 +509,7 @@ extern void ksocknal_terminate_conn (ksock_conn_t *conn); extern void ksocknal_destroy_conn (ksock_conn_t *conn); extern int ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation); extern int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why); -extern int ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr); +extern int ksocknal_close_matching_conns (ptl_process_id_t id, __u32 ipaddr); extern void ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn); extern void ksocknal_tx_done (ksock_peer_t *peer, ksock_tx_t *tx, int asynch); @@ -525,8 +525,8 @@ extern int ksocknal_reaper (void *arg); extern int ksocknal_send_hello (ptl_ni_t *ni, ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs); extern int ksocknal_recv_hello (ptl_ni_t *ni, ksock_conn_t *conn, - ptl_nid_t *nid, __u64 *incarnation, - __u32 *ipaddrs); + ptl_process_id_t *id, + __u64 *incarnation, __u32 *ipaddrs); extern void ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn); extern void ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn); diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index 3105543..2d12fe4 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -480,7 +480,7 @@ ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx) } CERROR("[%p] Error %d on write to %s" " ip %d.%d.%d.%d:%d\n", conn, rc, - libcfs_nid2str(conn->ksnc_peer->ksnp_nid), + libcfs_id2str(conn->ksnc_peer->ksnp_id), HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); } @@ -590,7 +590,7 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) LASSERT(tx->tx_resid == tx->tx_nob); CDEBUG (D_NET, "Sending to %s ip %d.%d.%d.%d:%d\n", - libcfs_nid2str(conn->ksnc_peer->ksnp_nid), + libcfs_id2str(conn->ksnc_peer->ksnp_id), HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); @@ -682,7 +682,7 @@ ksocknal_find_connecting_route_locked (ksock_peer_t *peer) } int -ksocknal_launch_packet (ptl_ni_t *ni, ksock_tx_t *tx, ptl_nid_t nid) +ksocknal_launch_packet (ptl_ni_t *ni, ksock_tx_t *tx, ptl_process_id_t id) { unsigned long flags; ksock_peer_t *peer; @@ -716,7 +716,7 @@ ksocknal_launch_packet (ptl_ni_t *ni, ksock_tx_t *tx, ptl_nid_t nid) for (retry = 0;; retry = 1) { #if !SOCKNAL_ROUND_ROBIN read_lock (g_lock); - peer = ksocknal_find_peer_locked(ni, nid); + peer = ksocknal_find_peer_locked(ni, id); if (peer != NULL) { if (ksocknal_find_connectable_route_locked(peer) == NULL) { conn = ksocknal_find_conn_locked (tx, peer); @@ -736,22 +736,29 @@ ksocknal_launch_packet (ptl_ni_t *ni, ksock_tx_t *tx, ptl_nid_t nid) #endif write_lock_irqsave(g_lock, flags); - peer = ksocknal_find_peer_locked(ni, nid); + peer = ksocknal_find_peer_locked(ni, id); if (peer != NULL) break; write_unlock_irqrestore(g_lock, flags); + if (id.pid > PTL_ACCEPTOR_MAX_RESERVED_PORT) { + CERROR("Refusing to create a connection to " + "userspace process %s\n", libcfs_id2str(id)); + return -EHOSTUNREACH; + } + if (retry) { - CERROR("Can't find peer %s\n", libcfs_nid2str(nid)); + CERROR("Can't find peer %s\n", libcfs_id2str(id)); return -EHOSTUNREACH; } - rc = ksocknal_add_peer(ni, nid, - PTL_NIDADDR(nid), ptl_acceptor_port()); + rc = ksocknal_add_peer(ni, id, + PTL_NIDADDR(id.nid), + ptl_acceptor_port()); if (rc != 0) { CERROR("Can't add peer %s: %d\n", - libcfs_nid2str(nid), rc); + libcfs_id2str(id), rc); return rc; } } @@ -784,7 +791,7 @@ ksocknal_launch_packet (ptl_ni_t *ni, ksock_tx_t *tx, ptl_nid_t nid) write_unlock_irqrestore (g_lock, flags); - CERROR("Peer entry with no routes: %s\n", libcfs_nid2str(nid)); + CERROR("Peer entry with no routes: %s\n", libcfs_id2str(id)); return (-EHOSTUNREACH); } @@ -876,7 +883,7 @@ ksocknal_sendmsg(ptl_ni_t *ni, payload_offset, payload_nob); } - rc = ksocknal_launch_packet(ni, <x->ltx_tx, target.nid); + rc = ksocknal_launch_packet(ni, <x->ltx_tx, target); if (rc == 0) return (PTL_OK); @@ -911,13 +918,18 @@ ksocknal_send_pages (ptl_ni_t *ni, void *private, ptl_msg_t *cookie, void ksocknal_fwd_packet (ptl_ni_t *ni, kpr_fwd_desc_t *fwd) { - ptl_nid_t nid = fwd->kprfd_gateway_nid; + ptl_process_id_t id = {.nid = fwd->kprfd_gateway_nid, + .pid = LUSTRE_SRV_PTL_PID}; + /* CAVEAT EMPTOR: + * LUSTRE_SRV_PTL_PID assumes my target is another socknal instance and + * not a tcpnal (userspace/liblustre) instance. These can't route in + * any case until we sort out how to make the RPC replies use the same + * connections as RPC requests. */ ksock_ftx_t *ftx = (ksock_ftx_t *)&fwd->kprfd_scratch; int rc; CDEBUG (D_NET, "Forwarding [%p] -> %s (%s))\n", fwd, - libcfs_nid2str(fwd->kprfd_gateway_nid), - libcfs_nid2str(fwd->kprfd_target_nid)); + libcfs_id2str(id), libcfs_nid2str(fwd->kprfd_target_nid)); /* setup iov for hdr */ ftx->ftx_iov.iov_base = fwd->kprfd_hdr; @@ -930,7 +942,7 @@ ksocknal_fwd_packet (ptl_ni_t *ni, kpr_fwd_desc_t *fwd) ftx->ftx_tx.tx_nkiov = fwd->kprfd_niov; ftx->ftx_tx.tx_kiov = fwd->kprfd_kiov; - rc = ksocknal_launch_packet (ni, &ftx->ftx_tx, nid); + rc = ksocknal_launch_packet (ni, &ftx->ftx_tx, id); if (rc != 0) kpr_fwd_done (ni, fwd, rc); } @@ -1059,7 +1071,7 @@ ksocknal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb) int payload_nob = conn->ksnc_rx_nob_left; ptl_nid_t src_nid = le64_to_cpu(conn->ksnc_hdr.src_nid); ptl_nid_t dest_nid = le64_to_cpu(conn->ksnc_hdr.dest_nid); - ptl_nid_t sender_nid = conn->ksnc_peer->ksnp_nid; + ptl_nid_t sender_nid = conn->ksnc_peer->ksnp_id.nid; int niov = 0; int nob = payload_nob; @@ -1260,14 +1272,14 @@ ksocknal_process_receive (ksock_conn_t *conn) if (rc == 0) CDEBUG (D_NET, "[%p] EOF from %s ip %d.%d.%d.%d:%d\n", conn, - libcfs_nid2str(conn->ksnc_peer->ksnp_nid), + libcfs_id2str(conn->ksnc_peer->ksnp_id), HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); else if (!conn->ksnc_closing) CERROR ("[%p] Error %d on read from %s" " ip %d.%d.%d.%d:%d\n", conn, rc, - libcfs_nid2str(conn->ksnc_peer->ksnp_nid), + libcfs_id2str(conn->ksnc_peer->ksnp_id), HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); @@ -1282,6 +1294,14 @@ ksocknal_process_receive (ksock_conn_t *conn) switch (conn->ksnc_rx_state) { case SOCKNAL_RX_HEADER: + if (conn->ksnc_port > PTL_ACCEPTOR_MAX_RESERVED_PORT) { + /* Userspace NAL */ + ptl_process_id_t *id = &conn->ksnc_peer->ksnp_id; + + /* Substitute process ID assigned at connection time */ + conn->ksnc_hdr.src_pid = cpu_to_le32(id->pid); + conn->ksnc_hdr.src_nid = cpu_to_le64(id->nid); + } rc = ptl_parse(conn->ksnc_peer->ksnp_ni, &conn->ksnc_hdr, conn); switch (rc) { @@ -1681,6 +1701,7 @@ ksocknal_send_hello (ptl_ni_t *ni, ksock_conn_t *conn, hmv->version_minor = cpu_to_le16 (PTL_PROTO_TCP_VERSION_MINOR); hdr.src_nid = cpu_to_le64 (ni->ni_nid); + hdr.src_pid = cpu_to_le64 (ptl_getpid()); hdr.type = cpu_to_le32 (PTL_MSG_HELLO); hdr.payload_length = cpu_to_le32 (nipaddrs * sizeof(*ipaddrs)); @@ -1730,7 +1751,8 @@ ksocknal_invert_type(int type) int ksocknal_recv_hello (ptl_ni_t *ni, ksock_conn_t *conn, - ptl_nid_t *nid, __u64 *incarnation, __u32 *ipaddrs) + ptl_process_id_t *peerid, + __u64 *incarnation, __u32 *ipaddrs) { struct socket *sock = conn->ksnc_sock; int active; @@ -1740,9 +1762,10 @@ ksocknal_recv_hello (ptl_ni_t *ni, ksock_conn_t *conn, int i; int type; ptl_hdr_t hdr; + ptl_process_id_t recv_id; ptl_magicversion_t *hmv; - active = (*nid != PTL_NID_ANY); + active = (peerid->nid != PTL_NID_ANY); timeout = active ? *ksocknal_tunables.ksnd_timeout : ptl_acceptor_timeout(); @@ -1831,21 +1854,32 @@ ksocknal_recv_hello (ptl_ni_t *ni, ksock_conn_t *conn, return (-EPROTO); } + if (conn->ksnc_port > PTL_ACCEPTOR_MAX_RESERVED_PORT) { + /* Userspace NAL assigns peer process ID from socket */ + recv_id.pid = conn->ksnc_port; + recv_id.nid = PTL_MKNID(PTL_NIDNET(ni->ni_nid), conn->ksnc_ipaddr); + } else { + recv_id.pid = le32_to_cpu(hdr.src_pid); + recv_id.nid = le64_to_cpu (hdr.src_nid); + } + if (!active) { /* don't know peer's nid yet */ - *nid = le64_to_cpu(hdr.src_nid); - } else if (*nid != le64_to_cpu (hdr.src_nid)) { - LCONSOLE_ERROR("Connected successfully to nid %s on host " + *peerid = recv_id; + } else if (peerid->pid != recv_id.pid || + peerid->pid != recv_id.nid) { + LCONSOLE_ERROR("Connected successfully to %s on host " "%u.%u.%u.%u, but they claimed they were " - "nid %s; please check your Lustre " + "%s; please check your Lustre " "configuration.\n", - libcfs_nid2str(*nid), HIPQUAD(conn->ksnc_ipaddr), - libcfs_nid2str(le64_to_cpu(hdr.src_nid))); + libcfs_id2str(*peerid), + HIPQUAD(conn->ksnc_ipaddr), + libcfs_id2str(recv_id)); - CERROR ("Connected to nid %s ip %u.%u.%u.%u " + CERROR ("Connected to %s ip %u.%u.%u.%u " "but expecting %s\n", - libcfs_nid2str(le64_to_cpu (hdr.src_nid)), + libcfs_id2str(recv_id), HIPQUAD(conn->ksnc_ipaddr), - libcfs_nid2str(*nid)); + libcfs_id2str(*peerid)); return (-EPROTO); } @@ -1856,13 +1890,13 @@ ksocknal_recv_hello (ptl_ni_t *ni, ksock_conn_t *conn, conn->ksnc_type = ksocknal_invert_type(type); if (conn->ksnc_type == SOCKNAL_CONN_NONE) { CERROR ("Unexpected type %d from %s ip %u.%u.%u.%u\n", - type, libcfs_nid2str(*nid), + type, libcfs_id2str(*peerid), HIPQUAD(conn->ksnc_ipaddr)); return (-EPROTO); } } else if (ksocknal_invert_type(type) != conn->ksnc_type) { CERROR ("Mismatched types: me %d, %s ip %u.%u.%u.%u %d\n", - conn->ksnc_type, libcfs_nid2str(*nid), + conn->ksnc_type, libcfs_id2str(*peerid), HIPQUAD(conn->ksnc_ipaddr), le32_to_cpu(hdr.msg.hello.type)); return (-EPROTO); @@ -1876,7 +1910,7 @@ ksocknal_recv_hello (ptl_ni_t *ni, ksock_conn_t *conn, nips * sizeof(__u32) != __le32_to_cpu (hdr.payload_length)) { CERROR("Bad payload length %d from %s ip %u.%u.%u.%u\n", __le32_to_cpu (hdr.payload_length), - libcfs_nid2str(*nid), HIPQUAD(conn->ksnc_ipaddr)); + libcfs_id2str(*peerid), HIPQUAD(conn->ksnc_ipaddr)); } if (nips == 0) @@ -1885,7 +1919,7 @@ ksocknal_recv_hello (ptl_ni_t *ni, ksock_conn_t *conn, rc = libcfs_sock_read(sock, ipaddrs, nips * sizeof(*ipaddrs), timeout); if (rc != 0) { CERROR ("Error %d reading IPs from %s ip %u.%u.%u.%u\n", - rc, libcfs_nid2str(*nid), HIPQUAD(conn->ksnc_ipaddr)); + rc, libcfs_id2str(*peerid), HIPQUAD(conn->ksnc_ipaddr)); return (rc); } @@ -1894,7 +1928,8 @@ ksocknal_recv_hello (ptl_ni_t *ni, ksock_conn_t *conn, if (ipaddrs[i] == 0) { CERROR("Zero IP[%d] from %s ip %u.%u.%u.%u\n", - i, libcfs_nid2str(*nid), HIPQUAD(conn->ksnc_ipaddr)); + i, libcfs_id2str(*peerid), + HIPQUAD(conn->ksnc_ipaddr)); return (-EPROTO); } } @@ -1934,7 +1969,7 @@ ksocknal_connect (ksock_route_t *route) write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags); - rc = ptl_connect(&sock, peer->ksnp_nid, + rc = ptl_connect(&sock, peer->ksnp_id.nid, route->ksnr_myipaddr, route->ksnr_ipaddr, route->ksnr_port); if (rc != PTL_OK) @@ -1942,7 +1977,7 @@ ksocknal_connect (ksock_route_t *route) rc = ksocknal_create_conn(peer->ksnp_ni, route, sock, type); if (rc != 0) { - ptl_connect_console_error(rc, peer->ksnp_nid, + ptl_connect_console_error(rc, peer->ksnp_id.nid, route->ksnr_ipaddr, route->ksnr_port); goto failed; @@ -2129,7 +2164,7 @@ ksocknal_find_timed_out_conn (ksock_peer_t *peer) /* Something (e.g. failed keepalive) set the socket error */ CERROR ("Socket error %d: %s %p %d.%d.%d.%d\n", SOCK_ERROR(conn->ksnc_sock), - libcfs_nid2str(peer->ksnp_nid), + libcfs_id2str(peer->ksnp_id), conn, HIPQUAD(conn->ksnc_ipaddr)); return (conn); @@ -2145,7 +2180,7 @@ ksocknal_find_timed_out_conn (ksock_peer_t *peer) "may be down.\n", HIPQUAD(conn->ksnc_ipaddr)); CERROR ("Timed out RX from %s %p %d.%d.%d.%d\n", - libcfs_nid2str(peer->ksnp_nid), + libcfs_id2str(peer->ksnp_id), conn, HIPQUAD(conn->ksnc_ipaddr)); return (conn); } @@ -2162,7 +2197,7 @@ ksocknal_find_timed_out_conn (ksock_peer_t *peer) "may be down.\n", HIPQUAD(conn->ksnc_ipaddr)); CERROR ("Timed out TX to %s %s%d %p %d.%d.%d.%d\n", - libcfs_nid2str(peer->ksnp_nid), + libcfs_id2str(peer->ksnp_id), list_empty (&conn->ksnc_tx_queue) ? "" : "Q ", SOCK_WMEM_QUEUED(conn->ksnc_sock), conn, HIPQUAD(conn->ksnc_ipaddr)); @@ -2195,7 +2230,7 @@ ksocknal_check_peer_timeouts (int idx) read_unlock (&ksocknal_data.ksnd_global_lock); CERROR ("Timeout out conn->%s ip %d.%d.%d.%d:%d\n", - libcfs_nid2str(peer->ksnp_nid), + libcfs_id2str(peer->ksnp_id), HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); ksocknal_close_conn_and_siblings (conn, -ETIMEDOUT); diff --git a/lnet/klnds/socklnd/socklnd_lib-darwin.c b/lnet/klnds/socklnd/socklnd_lib-darwin.c index 08c88dc..9660780 100644 --- a/lnet/klnds/socklnd/socklnd_lib-darwin.c +++ b/lnet/klnds/socklnd/socklnd_lib-darwin.c @@ -821,10 +821,10 @@ ksocknal_lib_connect_sock (struct socket **sockp, int *fatal, if (rc != 0) { CFS_NET_EX; if (rc != EADDRNOTAVAIL && rc != EADDRINUSE) - CERROR ("Can't connect to nid "LPX64 + CERROR ("Can't connect to %s" " local IP: %u.%u.%u.%u," " remote IP: %u.%u.%u.%u/%d: %d\n", - route->ksnr_peer->ksnp_nid, + libcfs_id2str(route->ksnr_peer->ksnp_id, HIPQUAD(route->ksnr_myipaddr), HIPQUAD(route->ksnr_ipaddr), route->ksnr_port, rc); @@ -842,10 +842,10 @@ ksocknal_lib_connect_sock (struct socket **sockp, int *fatal, rc = so->so_error; if (rc != 0) { - CERROR ("Error %d waiting for connection to nid "LPX64 + CERROR ("Error %d waiting for connection to %s" " local IP: %u.%u.%u.%u," " remote IP: %u.%u.%u.%u/%d: %d\n", rc, - route->ksnr_peer->ksnp_nid, + libcfs_id2str(route->ksnr_peer->ksnp_id), HIPQUAD(route->ksnr_myipaddr), HIPQUAD(route->ksnr_ipaddr), route->ksnr_port, rc); diff --git a/lnet/libcfs/darwin/darwin-proc.c b/lnet/libcfs/darwin/darwin-proc.c index efa51aa..01251cf 100644 --- a/lnet/libcfs/darwin/darwin-proc.c +++ b/lnet/libcfs/darwin/darwin-proc.c @@ -36,6 +36,7 @@ extern unsigned int portal_debug; extern char debug_file_path[1024]; extern unsigned int portal_subsystem_debug; extern unsigned int portal_printk; +extern unsigned int portals_catastrophe; extern atomic_t portal_kmemory; extern long max_debug_mb; diff --git a/lnet/libcfs/debug.c b/lnet/libcfs/debug.c index 5f02708..59d7997 100644 --- a/lnet/libcfs/debug.c +++ b/lnet/libcfs/debug.c @@ -44,6 +44,9 @@ EXPORT_SYMBOL(portal_printk); unsigned int portal_stack; EXPORT_SYMBOL(portal_stack); +unsigned int portals_catastrophe; +EXPORT_SYMBOL(portals_catastrophe); + #ifdef __KERNEL__ atomic_t portal_kmemory = ATOMIC_INIT(0); EXPORT_SYMBOL(portal_kmemory); diff --git a/lnet/libcfs/linux/linux-proc.c b/lnet/libcfs/linux/linux-proc.c index 9d43be3..deb79e3 100644 --- a/lnet/libcfs/linux/linux-proc.c +++ b/lnet/libcfs/linux/linux-proc.c @@ -71,6 +71,7 @@ enum { PSDEV_DEBUG_DUMP_PATH, /* crashdump tracelog location */ PSDEV_PORTALS_UPCALL, /* User mode upcall script */ PSDEV_PORTALS_MEMUSED, /* bytes currently PORTAL_ALLOCated */ + PSDEV_PORTALS_CATASTROPHE,/* if we have LBUGged or panic'd */ }; static struct ctl_table portals_table[] = { @@ -87,6 +88,8 @@ static struct ctl_table portals_table[] = { &sysctl_string}, {PSDEV_PORTALS_MEMUSED, "memused", (int *)&portal_kmemory.counter, sizeof(int), 0644, NULL, &proc_dointvec}, + {PSDEV_PORTALS_CATASTROPHE, "catastrophe", &portals_catastrophe, + sizeof(int), 0444, NULL, &proc_dointvec}, {0} }; diff --git a/lnet/libcfs/nidstrings.c b/lnet/libcfs/nidstrings.c index db73ccd..b0ed502 100644 --- a/lnet/libcfs/nidstrings.c +++ b/lnet/libcfs/nidstrings.c @@ -44,7 +44,10 @@ * between getting its string and using it. */ -static char libcfs_nidstrings[128][PTL_NALFMT_SIZE]; +#define PTL_NIDSTR_COUNT 128 /* # of nidstrings */ +#define PTL_NIDSTR_SIZE 32 /* size of each one (see below for usage) */ + +static char libcfs_nidstrings[PTL_NIDSTR_COUNT][PTL_NIDSTR_SIZE]; static int libcfs_nidstring_idx = 0; #ifdef __KERNEL__ @@ -153,13 +156,12 @@ libcfs_ip_addr2str(__u32 addr, char *str) __u32 netip = htonl(addr); struct hostent *he = gethostbyaddr(&netip, sizeof(netip), AF_INET); - if (he != NULL && - strlen(he->h_name) < PTL_NALFMT_SIZE) { - strcpy(str, he->h_name); + if (he != NULL) { + snprintf(str, PTL_NIDSTR_SIZE, "%s", he->h_name); return; } #endif - snprintf(str, PTL_NALFMT_SIZE, "%u.%u.%u.%u", + snprintf(str, PTL_NIDSTR_SIZE, "%u.%u.%u.%u", (addr >> 24) & 0xff, (addr >> 16) & 0xff, (addr >> 8) & 0xff, addr & 0xff); } @@ -221,7 +223,7 @@ libcfs_ip_str2addr(char *str, int nob, __u32 *addr) void libcfs_num_addr2str(__u32 addr, char *str) { - snprintf(str, PTL_NALFMT_SIZE, "%u", addr); + snprintf(str, PTL_NIDSTR_SIZE, "%u", addr); } int @@ -286,7 +288,7 @@ libcfs_nal2str(int nal) return nf->nf_name; str = libcfs_next_nidstring(); - snprintf(str, PTL_NALFMT_SIZE, "?%u?", nal); + snprintf(str, PTL_NIDSTR_SIZE, "?%u?", nal); return str; } @@ -310,11 +312,11 @@ libcfs_net2str(__u32 net) char *str = libcfs_next_nidstring(); if (nf == NULL) - snprintf(str, PTL_NALFMT_SIZE, "<%u:%u>", nal, num); + snprintf(str, PTL_NIDSTR_SIZE, "<%u:%u>", nal, num); else if (num == 0) - snprintf(str, PTL_NALFMT_SIZE, "%s", nf->nf_name); + snprintf(str, PTL_NIDSTR_SIZE, "%s", nf->nf_name); else - snprintf(str, PTL_NALFMT_SIZE, "%s%u", nf->nf_name, num); + snprintf(str, PTL_NIDSTR_SIZE, "%s%u", nf->nf_name, num); return str; } @@ -337,15 +339,15 @@ libcfs_nid2str(ptl_nid_t nid) str = libcfs_next_nidstring(); if (nf == NULL) - snprintf(str, PTL_NALFMT_SIZE, "%x@<%u:%u>", addr, nal, nnum); + snprintf(str, PTL_NIDSTR_SIZE, "%x@<%u:%u>", addr, nal, nnum); else { nf->nf_addr2str(addr, str); nob = strlen(str); if (nnum == 0) - snprintf(str + nob, PTL_NALFMT_SIZE - nob, "@%s", + snprintf(str + nob, PTL_NIDSTR_SIZE - nob, "@%s", nf->nf_name); else - snprintf(str + nob, PTL_NALFMT_SIZE - nob, "@%s%u", + snprintf(str + nob, PTL_NIDSTR_SIZE - nob, "@%s%u", nf->nf_name, nnum); } @@ -460,7 +462,7 @@ libcfs_nid2str(ptl_nid_t nid) { char *str = libcfs_next_nidstring(); - snprintf(str, PTL_NALFMT_SIZE, "%llx", (unsigned long long)nid); + snprintf(str, PTL_NIDSTR_SIZE, "%llx", (unsigned long long)nid); } __u32 @@ -500,7 +502,7 @@ libcfs_id2str(ptl_process_id_t id) char *str = libcfs_nid2str(id.nid); int len = strlen(str); - snprintf(str + len, PTL_NALFMT_SIZE - len, "-%u", id.pid); + snprintf(str + len, PTL_NIDSTR_SIZE - len, "-%u", id.pid); return str; } diff --git a/lnet/lnet/acceptor.c b/lnet/lnet/acceptor.c index b65ea5c..c311164 100644 --- a/lnet/lnet/acceptor.c +++ b/lnet/lnet/acceptor.c @@ -23,9 +23,6 @@ #define DEBUG_SUBSYSTEM S_PORTALS #include -#define MIN_RESERVED_PORT 512 -#define MAX_RESERVED_PORT 1023 - #ifdef __KERNEL__ static int acceptor_port = 988; CFS_MODULE_PARM(acceptor_port, "i", int, 0444, @@ -139,7 +136,9 @@ ptl_connect(struct socket **sockp, ptl_nid_t peer_nid, CLASSERT (sizeof(cr) <= 16); /* not too big to be on the stack */ - for (port = MAX_RESERVED_PORT; port >= MIN_RESERVED_PORT; --port) { + for (port = PTL_ACCEPTOR_MAX_RESERVED_PORT; + port >= PTL_ACCEPTOR_MIN_RESERVED_PORT; + --port) { /* Iterate through reserved ports. */ rc = libcfs_sock_connect(&sock, &fatal, @@ -375,9 +374,10 @@ ptl_acceptor(void *arg) } if (accept_secure_only && - peer_port > MAX_RESERVED_PORT) { + peer_port > PTL_ACCEPTOR_MAX_RESERVED_PORT) { CERROR("Refusing connection from %u.%u.%u.%u: " - "insecure port %d\n", HIPQUAD(peer_ip), peer_port); + "insecure port %d\n", + HIPQUAD(peer_ip), peer_port); goto failed; } diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index c51f882..2314c9e 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -995,7 +995,7 @@ PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id) LASSERT (ptl_apini.apini_refcount > 0); /* pretty useless; just return the NID of the first local interface, - * that isn't LONAL; it has the same NID on all nodes */ + * that isn't LONAL (it has the same NID on all nodes) */ PTL_LOCK(flags); diff --git a/lnet/lnet/lib-msg.c b/lnet/lnet/lib-msg.c index edd0ce4..d93f92f 100644 --- a/lnet/lnet/lib-msg.c +++ b/lnet/lnet/lib-msg.c @@ -130,3 +130,8 @@ ptl_finalize (ptl_ni_t *ni, void *private, ptl_msg_t *msg, ptl_err_t status) PTL_UNLOCK(flags); } + +ptl_pid_t ptl_getpid(void) +{ + return ptl_apini.apini_pid; +} diff --git a/lnet/lnet/module.c b/lnet/lnet/module.c index 0a96659..a8310d6 100644 --- a/lnet/lnet/module.c +++ b/lnet/lnet/module.c @@ -151,6 +151,7 @@ EXPORT_SYMBOL(ptl_finalize); EXPORT_SYMBOL(ptl_parse); EXPORT_SYMBOL(ptl_create_reply_msg); EXPORT_SYMBOL(ptl_net2ni); +EXPORT_SYMBOL(ptl_getpid); MODULE_AUTHOR("Peter J. Braam "); MODULE_DESCRIPTION("Portals v3.1"); diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index aebf507..62a7fe4 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -45,7 +45,7 @@ void kpr_do_upcall (void *arg) { kpr_upcall_t *u = (kpr_upcall_t *)arg; - char nidstr[PTL_NALFMT_SIZE]; + char nidstr[36]; char whenstr[36]; char *argv[] = { NULL, @@ -55,7 +55,7 @@ kpr_do_upcall (void *arg) whenstr, NULL}; - strcpy(nidstr, libcfs_nid2str(u->kpru_nid)); + snprintf (nidstr, sizeof(nidstr), "%s", libcfs_nid2str(u->kpru_nid)); snprintf (whenstr, sizeof(whenstr), "%ld", u->kpru_when); portals_run_upcall (argv); @@ -902,7 +902,8 @@ kpr_lookup (ptl_ni_t **nip, ptl_nid_t target_nid, int nob) } } - CERROR("Nid %s is not on a local network\n", + CERROR("Nid %s is not on a local network and " + "userspace portals does not support routing\n", libcfs_nid2str(target_nid)); return PTL_NID_ANY; diff --git a/lnet/ulnds/Makefile.am b/lnet/ulnds/Makefile.am index 3437d39..5b84424 100644 --- a/lnet/ulnds/Makefile.am +++ b/lnet/ulnds/Makefile.am @@ -4,7 +4,10 @@ noinst_LIBRARIES = libtcpnal.a endif endif -noinst_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h ipmap.h bridge.h procbridge.h -libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h dispatch.h table.h timer.h address.c procapi.c proclib.c connection.c tcpnal.c connection.h +noinst_HEADERS = pqtimer.h dispatch.h table.h timer.h \ + connection.h bridge.h procbridge.h +libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h \ + dispatch.h table.h timer.h procapi.c proclib.c \ + connection.c tcpnal.c connection.h libtcpnal_a_CPPFLAGS = $(LLCPPFLAGS) libtcpnal_a_CFLAGS = $(LLCFLAGS) diff --git a/lnet/ulnds/address.c b/lnet/ulnds/address.c deleted file mode 100644 index f47964c..0000000 --- a/lnet/ulnds/address.c +++ /dev/null @@ -1,147 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* address.c: - * this file provides functions to aquire the IP address of the node - * and translate them into a NID/PID pair which supports a static - * mapping of virtual nodes into the port range of an IP socket. -*/ - -#define DEBUG_SUBSYSTEM S_NAL - -#include -#include -#include -#include -#include -#include -#include - - -/* Function: get_node_id - * Returns: a 32 bit id for this node, actually a big-endian IP address - * - * get_node_id() determines the host name and uses the resolver to - * find out its ip address. This is fairly fragile and inflexible, but - * explicitly asking about interfaces and their addresses is very - * complicated and nonportable. - */ -static unsigned int get_node_id(void) -{ - char buffer[255]; - unsigned int x; - struct hostent *he; - char * host_envp; - - if (!(host_envp = getenv("PTL_HOSTID"))) - { - gethostname(buffer,sizeof(buffer)); - he=gethostbyname(buffer); - if (he) - x=*(unsigned int *)he->h_addr_list[0]; - else - x = 0; - return(ntohl(x)); - } - else - { - if (host_envp[1] != 'x') - { - int a, b, c, d; - sscanf(host_envp, "%d.%d.%d.%d", &a, &b, &c, &d); - return ((a<<24) | (b<<16) | (c<<8) | d); - } - else - { - long long hostid = strtoll(host_envp, 0, 0); - return((unsigned int) hostid); - } - } -} - - -/* Function: set_address - * Arugments: t: a procnal structure to populate with the request - * - * set_address performs the bit manipulations to set the nid, pid, and - * iptop8 fields of the procnal structures. - * - * TODO: fix pidrequest to try to do dynamic binding if PTL_ID_ANY - */ - -#ifdef DIRECT_IP_MODE -void set_address(bridge t,ptl_pid_t pidrequest) -{ - int port; - if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0; - else port=pidrequest; - t->b_ni->ni_nid=get_node_id(); - ptl_apini.apini_pid=port; -} -#else - -void set_address(bridge t,ptl_pid_t pidrequest) -{ - int virtnode, in_addr, port; - ptl_pid_t pid; - - /* get and remember my node id*/ - if (!getenv("PTL_VIRTNODE")) - virtnode = 0; - else - { - int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT - >> PNAL_VNODE_SHIFT); - virtnode = atoi(getenv("PTL_VIRTNODE")); - if (virtnode > maxvnode) - { - fprintf(stderr, "PTL_VIRTNODE of %d is too large - max %d\n", - virtnode, maxvnode); - return; - } - } - - in_addr = get_node_id(); - - t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */ - t->b_ni->ni_nid = ((in_addr & PNAL_HOSTID_MASK) - << PNAL_VNODE_SHIFT) - + virtnode; - pid=pidrequest; - /* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */ -#ifdef notyet - if (pid==(unsigned short)PTL_PID_ANY) port = 0; -#endif - if (pid==(unsigned short)PTL_PID_ANY) - { - fprintf(stderr, "portal pid PTL_ID_ANY is not currently supported\n"); - return; - } - else if (pid > PNAL_PID_MASK) - { - fprintf(stderr, "portal pid of %d is too large - max %d\n", - pid, PNAL_PID_MASK); - return; - } - else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT; - ptl_apini->apini_pid=pid; -} -#endif diff --git a/lnet/ulnds/connection.c b/lnet/ulnds/connection.c index 0828fd0..67abfe3 100644 --- a/lnet/ulnds/connection.c +++ b/lnet/ulnds/connection.c @@ -22,8 +22,7 @@ /* connection.c: This file provides a simple stateful connection manager which builds tcp connections on demand and leaves them open for - future use. It also provides the machinery to allow peers - to connect to it + future use. */ #include @@ -49,9 +48,40 @@ #include #endif -/* global variable: acceptor port */ -unsigned short tcpnal_acceptor_port = 988; +/* tunables (via environment) */ +int tcpnal_acceptor_port = 988; +int tcpnal_buffer_size = 2 * (PTL_MTU + sizeof(ptl_hdr_t)); +int tcpnal_nagle = 0; +int +tcpnal_env_param (char *name, int *val) +{ + char *env = getenv(name); + int n; + + if (env == NULL) + return 1; + + n = strlen(env); /* scanf may not assign on EOS */ + if (sscanf(env, "%i%n", val, &n) >= 1 && + n == strlen(env)) + return 1; + + CERROR("Can't parse environment variable '%s=%s'\n", + name, env); + return 0; +} + +int +tcpnal_set_global_params (void) +{ + return tcpnal_env_param("TCPNAL_ACCEPTOR_PORT", + &tcpnal_acceptor_port) && + tcpnal_env_param("TCPNAL_BUFFER_SIZE", + &tcpnal_buffer_size) && + tcpnal_env_param("TCPNAL_NAGLE", + &tcpnal_nagle); +} /* Function: compare_connection * Arguments: connection c: a connection in the hash table @@ -63,29 +93,21 @@ unsigned short tcpnal_acceptor_port = 988; static int compare_connection(void *arg1, void *arg2) { connection c = arg1; - unsigned int * id = arg2; -#if 0 - return((c->ip==id[0]) && (c->port==id[1])); -#else - /* CFS specific hacking */ - return (c->ip == id[0]); -#endif -} + ptl_nid_t *nid = arg2; + return (c->peer_nid == *nid); +} /* Function: connection_key * Arguments: ptl_process_id_t id: an id to hash * Returns: a not-particularily-well-distributed hash * of the id */ -static unsigned int connection_key(unsigned int *id) +static unsigned int connection_key(void *arg) { -#if 0 - return(id[0]^id[1]); -#else - /* CFS specific hacking */ - return (unsigned int) id[0]; -#endif + ptl_nid_t *nid = arg; + + return (unsigned int)(*nid); } @@ -95,11 +117,8 @@ static unsigned int connection_key(unsigned int *id) void remove_connection(void *arg) { connection c = arg; - unsigned int id[2]; - id[0]=c->ip; - id[1]=c->port; - hash_table_remove(c->m->connections,id); + hash_table_remove(c->m->connections,&c->peer_nid); close(c->fd); free(c); } @@ -149,111 +168,131 @@ static int connection_input(void *d) } -/* Function: allocate_connection - * Arguments: t: tcpnal the allocation is occuring in the context of - * dest: portal endpoint address for this connection - * fd: open file descriptor for the socket - * Returns: an allocated connection structure - * - * just encompasses the action common to active and passive - * connections of allocation and placement in the global table - */ -static connection allocate_connection(manager m, - unsigned int ip, - unsigned short port, - int fd) +static connection +allocate_connection(manager m, + ptl_nid_t nid, + int fd) { connection c=malloc(sizeof(struct connection)); - unsigned int id[2]; + c->m=m; c->fd=fd; - c->ip=ip; - c->port=port; - id[0]=ip; - id[1]=port; + c->peer_nid = nid; + register_io_handler(fd,READ_HANDLER,connection_input,c); - hash_table_insert(m->connections,c,id); + hash_table_insert(m->connections,c,&nid); return(c); } - -/* Function: new_connection - * Arguments: t: opaque argument holding the tcpname - * Returns: 1 in order to reregister for new connection requests - * - * called when the bound service socket recieves - * a new connection request, it always accepts and - * installs a new connection - */ -static int new_connection(void *z) +int +tcpnal_write(ptl_nid_t nid, int sockfd, void *buffer, int nob) { - manager m=z; - struct sockaddr_in s; - int len=sizeof(struct sockaddr_in); - int fd=accept(m->bound,(struct sockaddr *)&s,&len); - unsigned int nid=*((unsigned int *)&s.sin_addr); - /* cfs specific hack */ - //unsigned short pid=s.sin_port; - pthread_mutex_lock(&m->conn_lock); - allocate_connection(m,htonl(nid),0/*pid*/,fd); - pthread_mutex_unlock(&m->conn_lock); - return(1); + int rc = syscall(SYS_write, sockfd, buffer, nob); + + /* NB called on an 'empty' socket with huge buffering! */ + if (rc == nob) + return 0; + + if (rc < 0) { + CERROR("Failed to send to %s: %s\n", + libcfs_nid2str(nid), strerror(errno)); + return -1; + } + + CERROR("Short send to %s: %d/%d\n", + libcfs_nid2str(nid), rc, nob); + return -1; } -extern ptl_nid_t tcpnal_mynid; +int +tcpnal_read(ptl_nid_t nid, int sockfd, void *buffer, int nob) +{ + int rc; + + while (nob > 0) { + rc = syscall(SYS_read, sockfd, buffer, nob); + + if (rc == 0) { + CERROR("Unexpected EOF from %s\n", + libcfs_nid2str(nid)); + return -1; + } + + if (rc < 0) { + CERROR("Failed to receive from %s: %s\n", + libcfs_nid2str(nid), strerror(errno)); + return -1; + } + + nob -= rc; + } + return 0; +} int -tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation) +tcpnal_hello (int sockfd, ptl_nid_t nid) { - int rc; - int nob; - ptl_hdr_t hdr; - ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid; + struct timeval tv; + __u64 incarnation; + int rc; + int nob; + ptl_acceptor_connreq_t cr; + ptl_hdr_t hdr; + ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid; - LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); + gettimeofday(&tv, NULL); + incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; + + memset(&cr, 0, sizeof(cr)); + cr.acr_magic = PTL_PROTO_ACCEPTOR_MAGIC; + cr.acr_version = PTL_PROTO_ACCEPTOR_VERSION; + cr.acr_nid = nid; + + CLASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); memset (&hdr, 0, sizeof (hdr)); hmv->magic = cpu_to_le32(PTL_PROTO_TCP_MAGIC); hmv->version_major = cpu_to_le32(PTL_PROTO_TCP_VERSION_MAJOR); hmv->version_minor = cpu_to_le32(PTL_PROTO_TCP_VERSION_MINOR); - hdr.src_nid = cpu_to_le64(tcpnal_mynid); - hdr.type = cpu_to_le32(PTL_MSG_HELLO); + /* hdr.src_nid/src_pid are ignored at dest */ - hdr.msg.hello.type = cpu_to_le32(type); + hdr.type = cpu_to_le32(PTL_MSG_HELLO); + hdr.msg.hello.type = cpu_to_le32(SOCKNAL_CONN_ANY); hdr.msg.hello.incarnation = cpu_to_le64(incarnation); /* I don't send any interface info */ - /* Assume sufficient socket buffering for this message */ - rc = syscall(SYS_write, sockfd, &hdr, sizeof(hdr)); - if (rc <= 0) { - CERROR ("Error %d sending HELLO to "LPX64"\n", rc, *nid); - return (rc); - } + /* Assume sufficient socket buffering for these messages... */ + rc = tcpnal_write(nid, sockfd, &cr, sizeof(cr)); + if (rc != 0) + return -1; - rc = syscall(SYS_read, sockfd, hmv, sizeof(*hmv)); - if (rc <= 0) { - CERROR ("Error %d reading HELLO from "LPX64"\n", rc, *nid); - return (rc); - } + rc = tcpnal_write(nid, sockfd, &hdr, sizeof(hdr)); + if (rc != 0) + return -1; + + rc = tcpnal_read(nid, sockfd, hmv, sizeof(*hmv)); + if (rc != 0) + return -1; if (hmv->magic != le32_to_cpu(PTL_PROTO_TCP_MAGIC)) { - CERROR ("Bad magic %#08x (%#08x expected) from "LPX64"\n", - cpu_to_le32(hmv->magic), PTL_PROTO_TCP_MAGIC, *nid); - return (-EPROTO); + CERROR ("Bad magic %#08x (%#08x expected) from %s\n", + cpu_to_le32(hmv->magic), PTL_PROTO_TCP_MAGIC, + libcfs_nid2str(nid)); + return -1; } if (hmv->version_major != cpu_to_le16 (PTL_PROTO_TCP_VERSION_MAJOR) || hmv->version_minor != cpu_to_le16 (PTL_PROTO_TCP_VERSION_MINOR)) { CERROR ("Incompatible protocol version %d.%d (%d.%d expected)" - " from "LPX64"\n", + " from %s\n", le16_to_cpu (hmv->version_major), le16_to_cpu (hmv->version_minor), PTL_PROTO_TCP_VERSION_MAJOR, PTL_PROTO_TCP_VERSION_MINOR, - *nid); - return (-EPROTO); + libcfs_nid2str(nid)); + return -1; } #if (PTL_PROTO_TCP_VERSION_MAJOR != 1) @@ -262,59 +301,40 @@ tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation) /* version 1 sends magic/version as the dest_nid of a 'hello' header, * so read the rest of it in now... */ - rc = syscall(SYS_read, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv)); - if (rc <= 0) { - CERROR ("Error %d reading rest of HELLO hdr from "LPX64"\n", - rc, *nid); - return (rc); - } + rc = tcpnal_read(nid, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv)); + if (rc != 0) + return -1; /* ...and check we got what we expected */ if (hdr.type != cpu_to_le32 (PTL_MSG_HELLO)) { CERROR ("Expecting a HELLO hdr " - " but got type %d with %d payload from "LPX64"\n", + " but got type %d with %d payload from %s\n", le32_to_cpu (hdr.type), - le32_to_cpu (hdr.payload_length), *nid); - return (-EPROTO); + le32_to_cpu (hdr.payload_length), libcfs_nid2str(nid)); + return -1; } if (le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) { CERROR("Expecting a HELLO hdr with a NID, but got PTL_NID_ANY\n"); - return (-EPROTO); + return -1; } - if (*nid == PTL_NID_ANY) { /* don't know peer's nid yet */ - *nid = le64_to_cpu(hdr.src_nid); - } else if (*nid != le64_to_cpu (hdr.src_nid)) { - CERROR ("Connected to nid "LPX64", but expecting "LPX64"\n", - le64_to_cpu (hdr.src_nid), *nid); - return (-EPROTO); + if (nid != le64_to_cpu (hdr.src_nid)) { + CERROR ("Connected to %s, but expecting %s\n", + libcfs_nid2str(le64_to_cpu (hdr.src_nid)), + libcfs_nid2str(nid)); + return -1; } /* Ignore any interface info in the payload */ nob = le32_to_cpu(hdr.payload_length); - if (nob > getpagesize()) { - CERROR("Unexpected HELLO payload %d from "LPX64"\n", - nob, *nid); - return (-EPROTO); - } - if (nob > 0) { - char *space = (char *)malloc(nob); - - if (space == NULL) { - CERROR("Can't allocate scratch buffer %d\n", nob); - return (-ENOMEM); - } - - rc = syscall(SYS_read, sockfd, space, nob); - if (rc <= 0) { - CERROR("Error %d skipping HELLO payload from " - LPX64"\n", rc, *nid); - return (rc); - } + if (nob != 0) { + CERROR("Unexpected HELLO payload %d from %s\n", + nob, libcfs_nid2str(nid)); + return -1; } - return (0); + return 0; } /* Function: force_tcp_connection @@ -324,43 +344,56 @@ tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation) * a pre-existing one, or a new connection */ connection force_tcp_connection(manager m, - unsigned int ip, - unsigned short port, + ptl_nid_t nid, procbridge pb) { - connection conn; + unsigned int ip = PTL_NIDADDR(nid); + connection conn; struct sockaddr_in addr; struct sockaddr_in locaddr; - unsigned int id[2]; - struct timeval tv; - __u64 incarnation; - - int fd; - int option; - int rc; - int rport; - ptl_nid_t peernid = PTL_NID_ANY; - - port = tcpnal_acceptor_port; - - id[0] = ip; - id[1] = port; + int fd; + int option; + int rc; pthread_mutex_lock(&m->conn_lock); - conn = hash_table_find(m->connections, id); + conn = hash_table_find(m->connections, &nid); if (conn) goto out; memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl(ip); - addr.sin_port = htons(port); + addr.sin_port = htons(tcpnal_acceptor_port); memset(&locaddr, 0, sizeof(locaddr)); locaddr.sin_family = AF_INET; locaddr.sin_addr.s_addr = INADDR_ANY; +#if 1 /* tcpnal connects from a non-privileged port */ + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd < 0) { + perror("tcpnal socket failed"); + goto out; + } + + option = 1; + rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, + &option, sizeof(option)); + if (rc != 0) { + perror ("Can't set SO_REUSEADDR for socket"); + close(fd); + goto out; + } + + rc = connect(fd, (struct sockaddr *)&addr, + sizeof(struct sockaddr_in)); + if (rc != 0) { + perror("Error connecting to remote host"); + close(fd); + goto out; + } +#else for (rport = IPPORT_RESERVED - 1; rport > IPPORT_RESERVED / 2; --rport) { fd = socket(AF_INET, SOCK_STREAM, 0); if (fd < 0) { @@ -401,24 +434,20 @@ connection force_tcp_connection(manager m, fprintf(stderr, "Out of ports trying to bind to a reserved port\n"); goto out; } +#endif -#if 1 - option = 1; + option = tcpnal_nagle ? 0 : 1; setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option)); - option = 1<<20; + option = tcpnal_buffer_size; setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option)); - option = 1<<20; + option = tcpnal_buffer_size; setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option)); -#endif - gettimeofday(&tv, NULL); - incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; - /* say hello */ - if (tcpnal_hello(fd, &peernid, SOCKNAL_CONN_ANY, incarnation)) - exit(-1); + if (tcpnal_hello(fd, nid)) + goto out; - conn = allocate_connection(m, ip, port, fd); + conn = allocate_connection(m, nid, fd); /* let nal thread know this event right away */ if (conn) @@ -430,6 +459,30 @@ out: } +#if 0 /* we don't accept connections */ +/* Function: new_connection + * Arguments: t: opaque argument holding the tcpname + * Returns: 1 in order to reregister for new connection requests + * + * called when the bound service socket recieves + * a new connection request, it always accepts and + * installs a new connection + */ +static int new_connection(void *z) +{ + manager m=z; + struct sockaddr_in s; + int len=sizeof(struct sockaddr_in); + int fd=accept(m->bound,(struct sockaddr *)&s,&len); + unsigned int nid=*((unsigned int *)&s.sin_addr); + /* cfs specific hack */ + //unsigned short pid=s.sin_port; + pthread_mutex_lock(&m->conn_lock); + allocate_connection(m,htonl(nid),0/*pid*/,fd); + pthread_mutex_unlock(&m->conn_lock); + return(1); +} + /* Function: bind_socket * Arguments: t: the nal state for this interface * port: the port to attempt to bind to @@ -467,6 +520,7 @@ static int bind_socket(manager m,unsigned short port) m->port=addr.sin_port; return(1); } +#endif /* Function: shutdown_connections @@ -476,32 +530,36 @@ static int bind_socket(manager m,unsigned short port) */ void shutdown_connections(manager m) { - close(m->bound); - remove_io_handler(m->bound_handler); - hash_destroy_table(m->connections,remove_connection); - free(m); +#if 0 + /* we don't accept connections */ + close(m->bound); + remove_io_handler(m->bound_handler); +#endif + hash_destroy_table(m->connections,remove_connection); + free(m); } /* Function: init_connections * Arguments: t: the nal state for this interface - * port: the port to attempt to bind to * Returns: a newly allocated manager structure, or * zero if the fixed port could not be bound */ -manager init_connections(unsigned short pid, - int (*input)(void *, void *), - void *a) +manager init_connections(int (*input)(void *, void *), void *a) { manager m = (manager)malloc(sizeof(struct manager)); + m->connections = hash_create_table(compare_connection,connection_key); m->handler = input; m->handler_arg = a; pthread_mutex_init(&m->conn_lock, 0); + return m; +#if 0 if (bind_socket(m,pid)) return(m); free(m); return(0); +#endif } diff --git a/lnet/ulnds/connection.h b/lnet/ulnds/connection.h index 343ffa6..f4e8544 100644 --- a/lnet/ulnds/connection.h +++ b/lnet/ulnds/connection.h @@ -10,26 +10,26 @@ #include typedef struct manager { - table connections; + table connections; pthread_mutex_t conn_lock; /* protect connections table */ - int bound; - io_handler bound_handler; - int (*handler)(void *, void *); - void *handler_arg; - unsigned short port; +#if 0 /* we don't accept connections */ + int bound; + io_handler bound_handler; +#endif + int (*handler)(void *, void *); + void *handler_arg; + unsigned short port; } *manager; typedef struct connection { - unsigned int ip; - unsigned short port; - int fd; - manager m; + ptl_nid_t peer_nid; + int fd; + manager m; } *connection; -connection force_tcp_connection(manager m, unsigned int ip, unsigned int short, - procbridge pb); -manager init_connections(unsigned short, int (*f)(void *, void *), void *); +connection force_tcp_connection(manager m, ptl_nid_t nid, procbridge pb); +manager init_connections(int (*f)(void *, void *), void *); void remove_connection(void *arg); void shutdown_connections(manager m); int read_connection(connection c, unsigned char *dest, int len); diff --git a/lnet/ulnds/ipmap.h b/lnet/ulnds/ipmap.h deleted file mode 100644 index 85b1e18..0000000 --- a/lnet/ulnds/ipmap.h +++ /dev/null @@ -1,38 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -#define DIRECT_IP_MODE -#ifdef DIRECT_IP_MODE -#define PNAL_NID(in_addr, port) (in_addr) -#define PNAL_PID(pid) (pid) -#define PNAL_IP(in_addr, port) (in_addr) -#define PNAL_PORT(nid, pid) (pid) -#else - -#define PNAL_BASE_PORT 4096 -#define PNAL_HOSTID_SHIFT 24 -#define PNAL_HOSTID_MASK ((1 << PNAL_HOSTID_SHIFT) - 1) -#define PNAL_VNODE_SHIFT 8 -#define PNAL_VNODE_MASK ((1 << PNAL_VNODE_SHIFT) - 1) -#define PNAL_PID_SHIFT 8 -#define PNAL_PID_MASK ((1 << PNAL_PID_SHIFT) - 1) - -#define PNAL_NID(in_addr, port) (((ntohl(in_addr) & PNAL_HOSTID_MASK) \ - << PNAL_VNODE_SHIFT) \ - | (((ntohs(port)-PNAL_BASE_PORT) >>\ - PNAL_PID_SHIFT))) -#define PNAL_PID(port) ((ntohs(port) - PNAL_BASE_PORT) & PNAL_PID_MASK) - -#define PNAL_IP(nid,t) (htonl((((unsigned)(nid))\ - >> PNAL_VNODE_SHIFT)\ - | (t->iptop8 << PNAL_HOSTID_SHIFT))) -#define PNAL_PORT(nid, pid) (htons(((((nid) & PNAL_VNODE_MASK) \ - << PNAL_VNODE_SHIFT) \ - | ((pid) & PNAL_PID_MASK)) \ - + PNAL_BASE_PORT)) -#endif diff --git a/lnet/ulnds/procapi.c b/lnet/ulnds/procapi.c index 0538920..a13b6b7 100644 --- a/lnet/ulnds/procapi.c +++ b/lnet/ulnds/procapi.c @@ -33,13 +33,18 @@ #include #include #ifndef __CYGWIN__ -#include +# include #endif +#include #include +#include #include #include #include #include +#ifdef HAVE_GETHOSTBYNAME +# include +#endif /* XXX CFS workaround, to give a chance to let nal thread wake up * from waiting in select @@ -67,8 +72,6 @@ ptl_nal_t tcpnal_nal = { .nal_recv = tcpnal_recv, }; int tcpnal_running; -ptl_nid_t tcpnal_mynid; - /* Function: shutdown * Arguments: ni: the instance of me @@ -117,10 +120,15 @@ ptl_err_t procbridge_startup (ptl_ni_t *ni) { procbridge p; - bridge b; + bridge b; + + /* NB The local NID is not assigned. We only ever connect to the socknal, + * which assigns the src nid/pid on incoming non-privileged connections + * (i.e. us), and we don't accept connections. */ - LASSERT(ni->ni_nal == &tcpnal_nal); - LASSERT (!tcpnal_running); /* only single instance supported */ + LASSERT (ni->ni_nal == &tcpnal_nal); + LASSERT (!tcpnal_running); /* only single instance supported */ + LASSERT (ni->ni_interfaces[0] == NULL); /* explicit interface(s) not supported */ init_unix_timer(); @@ -170,8 +178,6 @@ procbridge_startup (ptl_ni_t *ni) if (p->nal_flags & NAL_FLAG_STOPPED) return PTL_FAIL; - /* so what a load of bollocks set_address() is... */ - ni->ni_nid = tcpnal_mynid; tcpnal_running = 1; return PTL_OK; diff --git a/lnet/ulnds/procbridge.h b/lnet/ulnds/procbridge.h index f2de984..017db57 100644 --- a/lnet/ulnds/procbridge.h +++ b/lnet/ulnds/procbridge.h @@ -12,7 +12,6 @@ #include #include -#include #define NAL_FLAG_RUNNING 1 @@ -39,7 +38,6 @@ typedef struct nal_init_args { extern void *nal_thread(void *); -extern void set_address(bridge t,ptl_pid_t pidrequest); extern void procbridge_wakeup_nal(procbridge p); extern ptl_err_t procbridge_startup (ptl_ni_t *); diff --git a/lnet/ulnds/proclib.c b/lnet/ulnds/proclib.c index a93004b..c2dda30 100644 --- a/lnet/ulnds/proclib.c +++ b/lnet/ulnds/proclib.c @@ -82,10 +82,6 @@ void *nal_thread(void *z) procbridge p=b->local; int rc; - /* _the_ NI (ptl_apini) has already been set up with a requested pid; pass - * that to set_address... */ - set_address(b, ptl_apini.apini_pid); - rc = tcpnal_init(b); /* diff --git a/lnet/ulnds/socklnd/Makefile.am b/lnet/ulnds/socklnd/Makefile.am index 3437d39..5b84424 100644 --- a/lnet/ulnds/socklnd/Makefile.am +++ b/lnet/ulnds/socklnd/Makefile.am @@ -4,7 +4,10 @@ noinst_LIBRARIES = libtcpnal.a endif endif -noinst_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h ipmap.h bridge.h procbridge.h -libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h dispatch.h table.h timer.h address.c procapi.c proclib.c connection.c tcpnal.c connection.h +noinst_HEADERS = pqtimer.h dispatch.h table.h timer.h \ + connection.h bridge.h procbridge.h +libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h \ + dispatch.h table.h timer.h procapi.c proclib.c \ + connection.c tcpnal.c connection.h libtcpnal_a_CPPFLAGS = $(LLCPPFLAGS) libtcpnal_a_CFLAGS = $(LLCFLAGS) diff --git a/lnet/ulnds/socklnd/address.c b/lnet/ulnds/socklnd/address.c deleted file mode 100644 index f47964c..0000000 --- a/lnet/ulnds/socklnd/address.c +++ /dev/null @@ -1,147 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* address.c: - * this file provides functions to aquire the IP address of the node - * and translate them into a NID/PID pair which supports a static - * mapping of virtual nodes into the port range of an IP socket. -*/ - -#define DEBUG_SUBSYSTEM S_NAL - -#include -#include -#include -#include -#include -#include -#include - - -/* Function: get_node_id - * Returns: a 32 bit id for this node, actually a big-endian IP address - * - * get_node_id() determines the host name and uses the resolver to - * find out its ip address. This is fairly fragile and inflexible, but - * explicitly asking about interfaces and their addresses is very - * complicated and nonportable. - */ -static unsigned int get_node_id(void) -{ - char buffer[255]; - unsigned int x; - struct hostent *he; - char * host_envp; - - if (!(host_envp = getenv("PTL_HOSTID"))) - { - gethostname(buffer,sizeof(buffer)); - he=gethostbyname(buffer); - if (he) - x=*(unsigned int *)he->h_addr_list[0]; - else - x = 0; - return(ntohl(x)); - } - else - { - if (host_envp[1] != 'x') - { - int a, b, c, d; - sscanf(host_envp, "%d.%d.%d.%d", &a, &b, &c, &d); - return ((a<<24) | (b<<16) | (c<<8) | d); - } - else - { - long long hostid = strtoll(host_envp, 0, 0); - return((unsigned int) hostid); - } - } -} - - -/* Function: set_address - * Arugments: t: a procnal structure to populate with the request - * - * set_address performs the bit manipulations to set the nid, pid, and - * iptop8 fields of the procnal structures. - * - * TODO: fix pidrequest to try to do dynamic binding if PTL_ID_ANY - */ - -#ifdef DIRECT_IP_MODE -void set_address(bridge t,ptl_pid_t pidrequest) -{ - int port; - if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0; - else port=pidrequest; - t->b_ni->ni_nid=get_node_id(); - ptl_apini.apini_pid=port; -} -#else - -void set_address(bridge t,ptl_pid_t pidrequest) -{ - int virtnode, in_addr, port; - ptl_pid_t pid; - - /* get and remember my node id*/ - if (!getenv("PTL_VIRTNODE")) - virtnode = 0; - else - { - int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT - >> PNAL_VNODE_SHIFT); - virtnode = atoi(getenv("PTL_VIRTNODE")); - if (virtnode > maxvnode) - { - fprintf(stderr, "PTL_VIRTNODE of %d is too large - max %d\n", - virtnode, maxvnode); - return; - } - } - - in_addr = get_node_id(); - - t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */ - t->b_ni->ni_nid = ((in_addr & PNAL_HOSTID_MASK) - << PNAL_VNODE_SHIFT) - + virtnode; - pid=pidrequest; - /* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */ -#ifdef notyet - if (pid==(unsigned short)PTL_PID_ANY) port = 0; -#endif - if (pid==(unsigned short)PTL_PID_ANY) - { - fprintf(stderr, "portal pid PTL_ID_ANY is not currently supported\n"); - return; - } - else if (pid > PNAL_PID_MASK) - { - fprintf(stderr, "portal pid of %d is too large - max %d\n", - pid, PNAL_PID_MASK); - return; - } - else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT; - ptl_apini->apini_pid=pid; -} -#endif diff --git a/lnet/ulnds/socklnd/connection.c b/lnet/ulnds/socklnd/connection.c index 0828fd0..67abfe3 100644 --- a/lnet/ulnds/socklnd/connection.c +++ b/lnet/ulnds/socklnd/connection.c @@ -22,8 +22,7 @@ /* connection.c: This file provides a simple stateful connection manager which builds tcp connections on demand and leaves them open for - future use. It also provides the machinery to allow peers - to connect to it + future use. */ #include @@ -49,9 +48,40 @@ #include #endif -/* global variable: acceptor port */ -unsigned short tcpnal_acceptor_port = 988; +/* tunables (via environment) */ +int tcpnal_acceptor_port = 988; +int tcpnal_buffer_size = 2 * (PTL_MTU + sizeof(ptl_hdr_t)); +int tcpnal_nagle = 0; +int +tcpnal_env_param (char *name, int *val) +{ + char *env = getenv(name); + int n; + + if (env == NULL) + return 1; + + n = strlen(env); /* scanf may not assign on EOS */ + if (sscanf(env, "%i%n", val, &n) >= 1 && + n == strlen(env)) + return 1; + + CERROR("Can't parse environment variable '%s=%s'\n", + name, env); + return 0; +} + +int +tcpnal_set_global_params (void) +{ + return tcpnal_env_param("TCPNAL_ACCEPTOR_PORT", + &tcpnal_acceptor_port) && + tcpnal_env_param("TCPNAL_BUFFER_SIZE", + &tcpnal_buffer_size) && + tcpnal_env_param("TCPNAL_NAGLE", + &tcpnal_nagle); +} /* Function: compare_connection * Arguments: connection c: a connection in the hash table @@ -63,29 +93,21 @@ unsigned short tcpnal_acceptor_port = 988; static int compare_connection(void *arg1, void *arg2) { connection c = arg1; - unsigned int * id = arg2; -#if 0 - return((c->ip==id[0]) && (c->port==id[1])); -#else - /* CFS specific hacking */ - return (c->ip == id[0]); -#endif -} + ptl_nid_t *nid = arg2; + return (c->peer_nid == *nid); +} /* Function: connection_key * Arguments: ptl_process_id_t id: an id to hash * Returns: a not-particularily-well-distributed hash * of the id */ -static unsigned int connection_key(unsigned int *id) +static unsigned int connection_key(void *arg) { -#if 0 - return(id[0]^id[1]); -#else - /* CFS specific hacking */ - return (unsigned int) id[0]; -#endif + ptl_nid_t *nid = arg; + + return (unsigned int)(*nid); } @@ -95,11 +117,8 @@ static unsigned int connection_key(unsigned int *id) void remove_connection(void *arg) { connection c = arg; - unsigned int id[2]; - id[0]=c->ip; - id[1]=c->port; - hash_table_remove(c->m->connections,id); + hash_table_remove(c->m->connections,&c->peer_nid); close(c->fd); free(c); } @@ -149,111 +168,131 @@ static int connection_input(void *d) } -/* Function: allocate_connection - * Arguments: t: tcpnal the allocation is occuring in the context of - * dest: portal endpoint address for this connection - * fd: open file descriptor for the socket - * Returns: an allocated connection structure - * - * just encompasses the action common to active and passive - * connections of allocation and placement in the global table - */ -static connection allocate_connection(manager m, - unsigned int ip, - unsigned short port, - int fd) +static connection +allocate_connection(manager m, + ptl_nid_t nid, + int fd) { connection c=malloc(sizeof(struct connection)); - unsigned int id[2]; + c->m=m; c->fd=fd; - c->ip=ip; - c->port=port; - id[0]=ip; - id[1]=port; + c->peer_nid = nid; + register_io_handler(fd,READ_HANDLER,connection_input,c); - hash_table_insert(m->connections,c,id); + hash_table_insert(m->connections,c,&nid); return(c); } - -/* Function: new_connection - * Arguments: t: opaque argument holding the tcpname - * Returns: 1 in order to reregister for new connection requests - * - * called when the bound service socket recieves - * a new connection request, it always accepts and - * installs a new connection - */ -static int new_connection(void *z) +int +tcpnal_write(ptl_nid_t nid, int sockfd, void *buffer, int nob) { - manager m=z; - struct sockaddr_in s; - int len=sizeof(struct sockaddr_in); - int fd=accept(m->bound,(struct sockaddr *)&s,&len); - unsigned int nid=*((unsigned int *)&s.sin_addr); - /* cfs specific hack */ - //unsigned short pid=s.sin_port; - pthread_mutex_lock(&m->conn_lock); - allocate_connection(m,htonl(nid),0/*pid*/,fd); - pthread_mutex_unlock(&m->conn_lock); - return(1); + int rc = syscall(SYS_write, sockfd, buffer, nob); + + /* NB called on an 'empty' socket with huge buffering! */ + if (rc == nob) + return 0; + + if (rc < 0) { + CERROR("Failed to send to %s: %s\n", + libcfs_nid2str(nid), strerror(errno)); + return -1; + } + + CERROR("Short send to %s: %d/%d\n", + libcfs_nid2str(nid), rc, nob); + return -1; } -extern ptl_nid_t tcpnal_mynid; +int +tcpnal_read(ptl_nid_t nid, int sockfd, void *buffer, int nob) +{ + int rc; + + while (nob > 0) { + rc = syscall(SYS_read, sockfd, buffer, nob); + + if (rc == 0) { + CERROR("Unexpected EOF from %s\n", + libcfs_nid2str(nid)); + return -1; + } + + if (rc < 0) { + CERROR("Failed to receive from %s: %s\n", + libcfs_nid2str(nid), strerror(errno)); + return -1; + } + + nob -= rc; + } + return 0; +} int -tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation) +tcpnal_hello (int sockfd, ptl_nid_t nid) { - int rc; - int nob; - ptl_hdr_t hdr; - ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid; + struct timeval tv; + __u64 incarnation; + int rc; + int nob; + ptl_acceptor_connreq_t cr; + ptl_hdr_t hdr; + ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid; - LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); + gettimeofday(&tv, NULL); + incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; + + memset(&cr, 0, sizeof(cr)); + cr.acr_magic = PTL_PROTO_ACCEPTOR_MAGIC; + cr.acr_version = PTL_PROTO_ACCEPTOR_VERSION; + cr.acr_nid = nid; + + CLASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); memset (&hdr, 0, sizeof (hdr)); hmv->magic = cpu_to_le32(PTL_PROTO_TCP_MAGIC); hmv->version_major = cpu_to_le32(PTL_PROTO_TCP_VERSION_MAJOR); hmv->version_minor = cpu_to_le32(PTL_PROTO_TCP_VERSION_MINOR); - hdr.src_nid = cpu_to_le64(tcpnal_mynid); - hdr.type = cpu_to_le32(PTL_MSG_HELLO); + /* hdr.src_nid/src_pid are ignored at dest */ - hdr.msg.hello.type = cpu_to_le32(type); + hdr.type = cpu_to_le32(PTL_MSG_HELLO); + hdr.msg.hello.type = cpu_to_le32(SOCKNAL_CONN_ANY); hdr.msg.hello.incarnation = cpu_to_le64(incarnation); /* I don't send any interface info */ - /* Assume sufficient socket buffering for this message */ - rc = syscall(SYS_write, sockfd, &hdr, sizeof(hdr)); - if (rc <= 0) { - CERROR ("Error %d sending HELLO to "LPX64"\n", rc, *nid); - return (rc); - } + /* Assume sufficient socket buffering for these messages... */ + rc = tcpnal_write(nid, sockfd, &cr, sizeof(cr)); + if (rc != 0) + return -1; - rc = syscall(SYS_read, sockfd, hmv, sizeof(*hmv)); - if (rc <= 0) { - CERROR ("Error %d reading HELLO from "LPX64"\n", rc, *nid); - return (rc); - } + rc = tcpnal_write(nid, sockfd, &hdr, sizeof(hdr)); + if (rc != 0) + return -1; + + rc = tcpnal_read(nid, sockfd, hmv, sizeof(*hmv)); + if (rc != 0) + return -1; if (hmv->magic != le32_to_cpu(PTL_PROTO_TCP_MAGIC)) { - CERROR ("Bad magic %#08x (%#08x expected) from "LPX64"\n", - cpu_to_le32(hmv->magic), PTL_PROTO_TCP_MAGIC, *nid); - return (-EPROTO); + CERROR ("Bad magic %#08x (%#08x expected) from %s\n", + cpu_to_le32(hmv->magic), PTL_PROTO_TCP_MAGIC, + libcfs_nid2str(nid)); + return -1; } if (hmv->version_major != cpu_to_le16 (PTL_PROTO_TCP_VERSION_MAJOR) || hmv->version_minor != cpu_to_le16 (PTL_PROTO_TCP_VERSION_MINOR)) { CERROR ("Incompatible protocol version %d.%d (%d.%d expected)" - " from "LPX64"\n", + " from %s\n", le16_to_cpu (hmv->version_major), le16_to_cpu (hmv->version_minor), PTL_PROTO_TCP_VERSION_MAJOR, PTL_PROTO_TCP_VERSION_MINOR, - *nid); - return (-EPROTO); + libcfs_nid2str(nid)); + return -1; } #if (PTL_PROTO_TCP_VERSION_MAJOR != 1) @@ -262,59 +301,40 @@ tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation) /* version 1 sends magic/version as the dest_nid of a 'hello' header, * so read the rest of it in now... */ - rc = syscall(SYS_read, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv)); - if (rc <= 0) { - CERROR ("Error %d reading rest of HELLO hdr from "LPX64"\n", - rc, *nid); - return (rc); - } + rc = tcpnal_read(nid, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv)); + if (rc != 0) + return -1; /* ...and check we got what we expected */ if (hdr.type != cpu_to_le32 (PTL_MSG_HELLO)) { CERROR ("Expecting a HELLO hdr " - " but got type %d with %d payload from "LPX64"\n", + " but got type %d with %d payload from %s\n", le32_to_cpu (hdr.type), - le32_to_cpu (hdr.payload_length), *nid); - return (-EPROTO); + le32_to_cpu (hdr.payload_length), libcfs_nid2str(nid)); + return -1; } if (le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) { CERROR("Expecting a HELLO hdr with a NID, but got PTL_NID_ANY\n"); - return (-EPROTO); + return -1; } - if (*nid == PTL_NID_ANY) { /* don't know peer's nid yet */ - *nid = le64_to_cpu(hdr.src_nid); - } else if (*nid != le64_to_cpu (hdr.src_nid)) { - CERROR ("Connected to nid "LPX64", but expecting "LPX64"\n", - le64_to_cpu (hdr.src_nid), *nid); - return (-EPROTO); + if (nid != le64_to_cpu (hdr.src_nid)) { + CERROR ("Connected to %s, but expecting %s\n", + libcfs_nid2str(le64_to_cpu (hdr.src_nid)), + libcfs_nid2str(nid)); + return -1; } /* Ignore any interface info in the payload */ nob = le32_to_cpu(hdr.payload_length); - if (nob > getpagesize()) { - CERROR("Unexpected HELLO payload %d from "LPX64"\n", - nob, *nid); - return (-EPROTO); - } - if (nob > 0) { - char *space = (char *)malloc(nob); - - if (space == NULL) { - CERROR("Can't allocate scratch buffer %d\n", nob); - return (-ENOMEM); - } - - rc = syscall(SYS_read, sockfd, space, nob); - if (rc <= 0) { - CERROR("Error %d skipping HELLO payload from " - LPX64"\n", rc, *nid); - return (rc); - } + if (nob != 0) { + CERROR("Unexpected HELLO payload %d from %s\n", + nob, libcfs_nid2str(nid)); + return -1; } - return (0); + return 0; } /* Function: force_tcp_connection @@ -324,43 +344,56 @@ tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation) * a pre-existing one, or a new connection */ connection force_tcp_connection(manager m, - unsigned int ip, - unsigned short port, + ptl_nid_t nid, procbridge pb) { - connection conn; + unsigned int ip = PTL_NIDADDR(nid); + connection conn; struct sockaddr_in addr; struct sockaddr_in locaddr; - unsigned int id[2]; - struct timeval tv; - __u64 incarnation; - - int fd; - int option; - int rc; - int rport; - ptl_nid_t peernid = PTL_NID_ANY; - - port = tcpnal_acceptor_port; - - id[0] = ip; - id[1] = port; + int fd; + int option; + int rc; pthread_mutex_lock(&m->conn_lock); - conn = hash_table_find(m->connections, id); + conn = hash_table_find(m->connections, &nid); if (conn) goto out; memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl(ip); - addr.sin_port = htons(port); + addr.sin_port = htons(tcpnal_acceptor_port); memset(&locaddr, 0, sizeof(locaddr)); locaddr.sin_family = AF_INET; locaddr.sin_addr.s_addr = INADDR_ANY; +#if 1 /* tcpnal connects from a non-privileged port */ + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd < 0) { + perror("tcpnal socket failed"); + goto out; + } + + option = 1; + rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, + &option, sizeof(option)); + if (rc != 0) { + perror ("Can't set SO_REUSEADDR for socket"); + close(fd); + goto out; + } + + rc = connect(fd, (struct sockaddr *)&addr, + sizeof(struct sockaddr_in)); + if (rc != 0) { + perror("Error connecting to remote host"); + close(fd); + goto out; + } +#else for (rport = IPPORT_RESERVED - 1; rport > IPPORT_RESERVED / 2; --rport) { fd = socket(AF_INET, SOCK_STREAM, 0); if (fd < 0) { @@ -401,24 +434,20 @@ connection force_tcp_connection(manager m, fprintf(stderr, "Out of ports trying to bind to a reserved port\n"); goto out; } +#endif -#if 1 - option = 1; + option = tcpnal_nagle ? 0 : 1; setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option)); - option = 1<<20; + option = tcpnal_buffer_size; setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option)); - option = 1<<20; + option = tcpnal_buffer_size; setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option)); -#endif - gettimeofday(&tv, NULL); - incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; - /* say hello */ - if (tcpnal_hello(fd, &peernid, SOCKNAL_CONN_ANY, incarnation)) - exit(-1); + if (tcpnal_hello(fd, nid)) + goto out; - conn = allocate_connection(m, ip, port, fd); + conn = allocate_connection(m, nid, fd); /* let nal thread know this event right away */ if (conn) @@ -430,6 +459,30 @@ out: } +#if 0 /* we don't accept connections */ +/* Function: new_connection + * Arguments: t: opaque argument holding the tcpname + * Returns: 1 in order to reregister for new connection requests + * + * called when the bound service socket recieves + * a new connection request, it always accepts and + * installs a new connection + */ +static int new_connection(void *z) +{ + manager m=z; + struct sockaddr_in s; + int len=sizeof(struct sockaddr_in); + int fd=accept(m->bound,(struct sockaddr *)&s,&len); + unsigned int nid=*((unsigned int *)&s.sin_addr); + /* cfs specific hack */ + //unsigned short pid=s.sin_port; + pthread_mutex_lock(&m->conn_lock); + allocate_connection(m,htonl(nid),0/*pid*/,fd); + pthread_mutex_unlock(&m->conn_lock); + return(1); +} + /* Function: bind_socket * Arguments: t: the nal state for this interface * port: the port to attempt to bind to @@ -467,6 +520,7 @@ static int bind_socket(manager m,unsigned short port) m->port=addr.sin_port; return(1); } +#endif /* Function: shutdown_connections @@ -476,32 +530,36 @@ static int bind_socket(manager m,unsigned short port) */ void shutdown_connections(manager m) { - close(m->bound); - remove_io_handler(m->bound_handler); - hash_destroy_table(m->connections,remove_connection); - free(m); +#if 0 + /* we don't accept connections */ + close(m->bound); + remove_io_handler(m->bound_handler); +#endif + hash_destroy_table(m->connections,remove_connection); + free(m); } /* Function: init_connections * Arguments: t: the nal state for this interface - * port: the port to attempt to bind to * Returns: a newly allocated manager structure, or * zero if the fixed port could not be bound */ -manager init_connections(unsigned short pid, - int (*input)(void *, void *), - void *a) +manager init_connections(int (*input)(void *, void *), void *a) { manager m = (manager)malloc(sizeof(struct manager)); + m->connections = hash_create_table(compare_connection,connection_key); m->handler = input; m->handler_arg = a; pthread_mutex_init(&m->conn_lock, 0); + return m; +#if 0 if (bind_socket(m,pid)) return(m); free(m); return(0); +#endif } diff --git a/lnet/ulnds/socklnd/connection.h b/lnet/ulnds/socklnd/connection.h index 343ffa6..f4e8544 100644 --- a/lnet/ulnds/socklnd/connection.h +++ b/lnet/ulnds/socklnd/connection.h @@ -10,26 +10,26 @@ #include typedef struct manager { - table connections; + table connections; pthread_mutex_t conn_lock; /* protect connections table */ - int bound; - io_handler bound_handler; - int (*handler)(void *, void *); - void *handler_arg; - unsigned short port; +#if 0 /* we don't accept connections */ + int bound; + io_handler bound_handler; +#endif + int (*handler)(void *, void *); + void *handler_arg; + unsigned short port; } *manager; typedef struct connection { - unsigned int ip; - unsigned short port; - int fd; - manager m; + ptl_nid_t peer_nid; + int fd; + manager m; } *connection; -connection force_tcp_connection(manager m, unsigned int ip, unsigned int short, - procbridge pb); -manager init_connections(unsigned short, int (*f)(void *, void *), void *); +connection force_tcp_connection(manager m, ptl_nid_t nid, procbridge pb); +manager init_connections(int (*f)(void *, void *), void *); void remove_connection(void *arg); void shutdown_connections(manager m); int read_connection(connection c, unsigned char *dest, int len); diff --git a/lnet/ulnds/socklnd/ipmap.h b/lnet/ulnds/socklnd/ipmap.h deleted file mode 100644 index 85b1e18..0000000 --- a/lnet/ulnds/socklnd/ipmap.h +++ /dev/null @@ -1,38 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -#define DIRECT_IP_MODE -#ifdef DIRECT_IP_MODE -#define PNAL_NID(in_addr, port) (in_addr) -#define PNAL_PID(pid) (pid) -#define PNAL_IP(in_addr, port) (in_addr) -#define PNAL_PORT(nid, pid) (pid) -#else - -#define PNAL_BASE_PORT 4096 -#define PNAL_HOSTID_SHIFT 24 -#define PNAL_HOSTID_MASK ((1 << PNAL_HOSTID_SHIFT) - 1) -#define PNAL_VNODE_SHIFT 8 -#define PNAL_VNODE_MASK ((1 << PNAL_VNODE_SHIFT) - 1) -#define PNAL_PID_SHIFT 8 -#define PNAL_PID_MASK ((1 << PNAL_PID_SHIFT) - 1) - -#define PNAL_NID(in_addr, port) (((ntohl(in_addr) & PNAL_HOSTID_MASK) \ - << PNAL_VNODE_SHIFT) \ - | (((ntohs(port)-PNAL_BASE_PORT) >>\ - PNAL_PID_SHIFT))) -#define PNAL_PID(port) ((ntohs(port) - PNAL_BASE_PORT) & PNAL_PID_MASK) - -#define PNAL_IP(nid,t) (htonl((((unsigned)(nid))\ - >> PNAL_VNODE_SHIFT)\ - | (t->iptop8 << PNAL_HOSTID_SHIFT))) -#define PNAL_PORT(nid, pid) (htons(((((nid) & PNAL_VNODE_MASK) \ - << PNAL_VNODE_SHIFT) \ - | ((pid) & PNAL_PID_MASK)) \ - + PNAL_BASE_PORT)) -#endif diff --git a/lnet/ulnds/socklnd/procapi.c b/lnet/ulnds/socklnd/procapi.c index 0538920..a13b6b7 100644 --- a/lnet/ulnds/socklnd/procapi.c +++ b/lnet/ulnds/socklnd/procapi.c @@ -33,13 +33,18 @@ #include #include #ifndef __CYGWIN__ -#include +# include #endif +#include #include +#include #include #include #include #include +#ifdef HAVE_GETHOSTBYNAME +# include +#endif /* XXX CFS workaround, to give a chance to let nal thread wake up * from waiting in select @@ -67,8 +72,6 @@ ptl_nal_t tcpnal_nal = { .nal_recv = tcpnal_recv, }; int tcpnal_running; -ptl_nid_t tcpnal_mynid; - /* Function: shutdown * Arguments: ni: the instance of me @@ -117,10 +120,15 @@ ptl_err_t procbridge_startup (ptl_ni_t *ni) { procbridge p; - bridge b; + bridge b; + + /* NB The local NID is not assigned. We only ever connect to the socknal, + * which assigns the src nid/pid on incoming non-privileged connections + * (i.e. us), and we don't accept connections. */ - LASSERT(ni->ni_nal == &tcpnal_nal); - LASSERT (!tcpnal_running); /* only single instance supported */ + LASSERT (ni->ni_nal == &tcpnal_nal); + LASSERT (!tcpnal_running); /* only single instance supported */ + LASSERT (ni->ni_interfaces[0] == NULL); /* explicit interface(s) not supported */ init_unix_timer(); @@ -170,8 +178,6 @@ procbridge_startup (ptl_ni_t *ni) if (p->nal_flags & NAL_FLAG_STOPPED) return PTL_FAIL; - /* so what a load of bollocks set_address() is... */ - ni->ni_nid = tcpnal_mynid; tcpnal_running = 1; return PTL_OK; diff --git a/lnet/ulnds/socklnd/procbridge.h b/lnet/ulnds/socklnd/procbridge.h index f2de984..017db57 100644 --- a/lnet/ulnds/socklnd/procbridge.h +++ b/lnet/ulnds/socklnd/procbridge.h @@ -12,7 +12,6 @@ #include #include -#include #define NAL_FLAG_RUNNING 1 @@ -39,7 +38,6 @@ typedef struct nal_init_args { extern void *nal_thread(void *); -extern void set_address(bridge t,ptl_pid_t pidrequest); extern void procbridge_wakeup_nal(procbridge p); extern ptl_err_t procbridge_startup (ptl_ni_t *); diff --git a/lnet/ulnds/socklnd/proclib.c b/lnet/ulnds/socklnd/proclib.c index a93004b..c2dda30 100644 --- a/lnet/ulnds/socklnd/proclib.c +++ b/lnet/ulnds/socklnd/proclib.c @@ -82,10 +82,6 @@ void *nal_thread(void *z) procbridge p=b->local; int rc; - /* _the_ NI (ptl_apini) has already been set up with a requested pid; pass - * that to set_address... */ - set_address(b, ptl_apini.apini_pid); - rc = tcpnal_init(b); /* diff --git a/lnet/ulnds/socklnd/table.c b/lnet/ulnds/socklnd/table.c index 662775a..eb390c4 100644 --- a/lnet/ulnds/socklnd/table.c +++ b/lnet/ulnds/socklnd/table.c @@ -110,7 +110,7 @@ unsigned int key_from_string(char *s) * Returns: a pointer to the new table */ table hash_create_table (int (*compare_function)(void *, void *), - unsigned int (*key_function)(unsigned int *)) + unsigned int (*key_function)(void *)) { table new=(table)malloc(sizeof(struct table)); memset(new, 0, sizeof(struct table)); diff --git a/lnet/ulnds/socklnd/table.h b/lnet/ulnds/socklnd/table.h index 7fab586..0cb9669 100644 --- a/lnet/ulnds/socklnd/table.h +++ b/lnet/ulnds/socklnd/table.h @@ -22,13 +22,14 @@ typedef struct table { int number_of_entries; table_entry *entries; int (*compare_function)(void *, void *); - unsigned int (*key_function)(unsigned int *); + unsigned int (*key_function)(void *); } *table; /* table.c */ unsigned int key_from_int(int i); unsigned int key_from_string(char *s); -table hash_create_table(int (*compare_function)(void *, void *), unsigned int (*key_function)(unsigned int *)); +table hash_create_table(int (*compare_function)(void *, void *), + unsigned int (*key_function)(void *)); void *hash_table_find(table t, void *comparator); void hash_table_insert(table t, void *value, void *comparator); void hash_table_remove(table t, void *comparator); diff --git a/lnet/ulnds/socklnd/tcplnd.c b/lnet/ulnds/socklnd/tcplnd.c index 8573a3d..3bbc1ec 100644 --- a/lnet/ulnds/socklnd/tcplnd.c +++ b/lnet/ulnds/socklnd/tcplnd.c @@ -80,9 +80,7 @@ ptl_err_t tcpnal_send(ptl_ni_t *ni, return PTL_FAIL; } - if (!(c=force_tcp_connection((manager)b->lower, - PNAL_IP(target.nid,b), - PNAL_PORT(target.nid,target.pid), + if (!(c=force_tcp_connection((manager)b->lower, target.nid, b->local))) return(PTL_FAIL); @@ -242,9 +240,7 @@ int tcpnal_init(bridge b) { manager m; - if (!(m=init_connections(PNAL_PORT(b->b_ni->ni_nid, - ptl_apini.apini_pid), - from_connection,b))){ + if (!(m=init_connections(from_connection,b))){ /* TODO: this needs to shut down the newly created junk */ return(PTL_NAL_FAILED); diff --git a/lnet/ulnds/table.c b/lnet/ulnds/table.c index 662775a..eb390c4 100644 --- a/lnet/ulnds/table.c +++ b/lnet/ulnds/table.c @@ -110,7 +110,7 @@ unsigned int key_from_string(char *s) * Returns: a pointer to the new table */ table hash_create_table (int (*compare_function)(void *, void *), - unsigned int (*key_function)(unsigned int *)) + unsigned int (*key_function)(void *)) { table new=(table)malloc(sizeof(struct table)); memset(new, 0, sizeof(struct table)); diff --git a/lnet/ulnds/table.h b/lnet/ulnds/table.h index 7fab586..0cb9669 100644 --- a/lnet/ulnds/table.h +++ b/lnet/ulnds/table.h @@ -22,13 +22,14 @@ typedef struct table { int number_of_entries; table_entry *entries; int (*compare_function)(void *, void *); - unsigned int (*key_function)(unsigned int *); + unsigned int (*key_function)(void *); } *table; /* table.c */ unsigned int key_from_int(int i); unsigned int key_from_string(char *s); -table hash_create_table(int (*compare_function)(void *, void *), unsigned int (*key_function)(unsigned int *)); +table hash_create_table(int (*compare_function)(void *, void *), + unsigned int (*key_function)(void *)); void *hash_table_find(table t, void *comparator); void hash_table_insert(table t, void *value, void *comparator); void hash_table_remove(table t, void *comparator); diff --git a/lnet/ulnds/tcplnd.c b/lnet/ulnds/tcplnd.c index 8573a3d..3bbc1ec 100644 --- a/lnet/ulnds/tcplnd.c +++ b/lnet/ulnds/tcplnd.c @@ -80,9 +80,7 @@ ptl_err_t tcpnal_send(ptl_ni_t *ni, return PTL_FAIL; } - if (!(c=force_tcp_connection((manager)b->lower, - PNAL_IP(target.nid,b), - PNAL_PORT(target.nid,target.pid), + if (!(c=force_tcp_connection((manager)b->lower, target.nid, b->local))) return(PTL_FAIL); @@ -242,9 +240,7 @@ int tcpnal_init(bridge b) { manager m; - if (!(m=init_connections(PNAL_PORT(b->b_ni->ni_nid, - ptl_apini.apini_pid), - from_connection,b))){ + if (!(m=init_connections(from_connection,b))){ /* TODO: this needs to shut down the newly created junk */ return(PTL_NAL_FAILED); diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c index 057b760..886e5f7 100644 --- a/lnet/utils/portals.c +++ b/lnet/utils/portals.c @@ -512,6 +512,7 @@ int jt_ptl_print_peers (int argc, char **argv) { struct portal_ioctl_data data; + ptl_process_id_t id; char buffer[2][64]; int index; int rc; @@ -529,23 +530,26 @@ jt_ptl_print_peers (int argc, char **argv) if (rc != 0) break; - if (g_net_is_compatible(NULL, SOCKNAL, 0)) + if (g_net_is_compatible(NULL, SOCKNAL, 0)) { + id.nid = data.ioc_nid; + id.pid = data.ioc_u32[4]; printf ("%-20s [%d]%s->%s:%d #%d\n", - libcfs_nid2str(data.ioc_nid), + libcfs_id2str(id), data.ioc_count, /* persistence */ ptl_ipaddr_2_str (data.ioc_u32[2], buffer[0], 1), /* my ip */ ptl_ipaddr_2_str (data.ioc_u32[0], buffer[1], 1), /* peer ip */ data.ioc_u32[1], /* peer port */ data.ioc_u32[3]); /* conn_count */ - else if (g_net_is_compatible(NULL, RANAL, OPENIBNAL, VIBNAL, 0)) + } else if (g_net_is_compatible(NULL, RANAL, OPENIBNAL, VIBNAL, 0)) { printf ("%-20s [%d]@%s:%d\n", libcfs_nid2str(data.ioc_nid), data.ioc_count, ptl_ipaddr_2_str (data.ioc_u32[0], buffer[1], 1), /* peer ip */ data.ioc_u32[1]); /* peer port */ - else + } else { printf ("%-20s [%d]\n", libcfs_nid2str(data.ioc_nid), data.ioc_count); + } } if (index == 0) { @@ -681,6 +685,7 @@ int jt_ptl_print_connections (int argc, char **argv) { struct portal_ioctl_data data; + ptl_process_id_t id; char buffer[2][64]; int index; int rc; @@ -698,9 +703,11 @@ jt_ptl_print_connections (int argc, char **argv) if (rc != 0) break; - if (g_net_is_compatible (NULL, SOCKNAL, 0)) + if (g_net_is_compatible (NULL, SOCKNAL, 0)) { + id.nid = data.ioc_nid; + id.pid = data.ioc_u32[6]; printf ("%-20s %s[%d]%s->%s:%d %d/%d %s\n", - libcfs_nid2str(data.ioc_nid), + libcfs_id2str(id), (data.ioc_u32[3] == SOCKNAL_CONN_ANY) ? "A" : (data.ioc_u32[3] == SOCKNAL_CONN_CONTROL) ? "C" : (data.ioc_u32[3] == SOCKNAL_CONN_BULK_IN) ? "I" : @@ -712,12 +719,13 @@ jt_ptl_print_connections (int argc, char **argv) data.ioc_count, /* tx buffer size */ data.ioc_u32[5], /* rx buffer size */ data.ioc_flags ? "nagle" : "nonagle"); - else if (g_net_is_compatible (NULL, RANAL, 0)) + } else if (g_net_is_compatible (NULL, RANAL, 0)) { printf ("%-20s [%d]\n", libcfs_nid2str(data.ioc_nid), data.ioc_u32[0] /* device id */); - else + } else { printf ("%s\n", libcfs_nid2str(data.ioc_nid)); + } } if (index == 0) { -- 1.8.3.1