* Changed socknal...
- use PID as well as NID to match connections so userspace (tcpnal) clients
can be distinguished without changing the NID format.
- unprivileged port == userspace client
- don't create new connections to userspace clients
- derive the NID/PID of a userspace client from the remote IP/port
* Changed tcpnal...
- use non-privileged ports
- no concept of own NID (peer assigns)
- don't accept connections
#define LASSERT_SPIN_LOCKED(lock) do {} while(0)
#endif
-#define LBUG_WITH_LOC(file, func, line) do {} while(0)
+#define LBUG_WITH_LOC(file, func, line) do {portals_catastrophe = 1;} while(0)
/* --------------------------------------------------------------------- */
/* ENTRY_NESTING_SUPPORT */
#endif
-#define LUSTRE_PTL_PID 12345
+#define LUSTRE_PTL_PID 456 /* <= 1023 (TCP reserved port) */
#define _XNU_LIBCFS_H
* USER LEVEL STUFF BELOW
*/
-#define PORTAL_IOCTL_VERSION 0x00010009
+#define PORTAL_IOCTL_VERSION 0x0001000a
struct portal_ioctl_data {
__u32 ioc_len;
__u32 ioc_flags;
__u32 ioc_count;
__u32 ioc_net;
- __u32 ioc_u32[6];
+ __u32 ioc_u32[7];
__u32 ioc_inllen1;
char *ioc_inlbuf1;
RANAL = 8,
};
-#define PTL_NALFMT_SIZE 32 /* %u:%u.%u.%u.%u,%u (10+4+4+4+3+5+1) */
-
enum {
DEBUG_DAEMON_START = 1,
DEBUG_DAEMON_STOP = 2,
extern unsigned int portal_debug;
extern unsigned int portal_printk;
+/* Has there been an LBUG? */
+extern unsigned int portals_catastrophe;
+
/*
* struct ptldebug_header is defined in libcfs/<os>/libcfs.h
*/
#endif /* !CDEBUG_ENTRY_EXIT */
#else /* !1 */
+#define CDEBUG_LIMIT(mask, format, a...) do { } while (0)
#define CDEBUG(mask, format, a...) do { } while (0)
#define CWARN(format, a...) printk(KERN_WARNING format, ## a)
#define CERROR(format, a...) printk(KERN_ERR format, ## a)
#define EXIT do { } while (0)
#endif /* !1 */
#else /* !__KERNEL__ */
+#define CDEBUG_LIMIT(mask, format, a...) do { } while (0)
#define CDEBUG(mask, format, a...) do { } while (0)
#define LCONSOLE(mask, format, a...) fprintf(stderr, format, ## a)
#define CWARN(format, a...) fprintf(stderr, format, ## a)
int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand);
/* libcfs tcpip */
+#define PTL_ACCEPTOR_MIN_RESERVED_PORT 512
+#define PTL_ACCEPTOR_MAX_RESERVED_PORT 1023
+
int libcfs_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask);
int libcfs_ipif_enumerate(char ***names);
void libcfs_ipif_free_enumeration(char **names, int n);
#define LBUG_WITH_LOC(file, func, line) \
do { \
CEMERG("LBUG - trying to dump log to /tmp/lustre-log\n"); \
+ portals_catastrophe = 1; \
portals_debug_dumplog(); \
portals_run_lbug_upcall(file, func, line); \
panic("LBUG"); \
#define LBUG_WITH_LOC(file, func, line) \
do { \
CEMERG("LBUG\n"); \
+ portals_catastrophe = 1; \
portals_debug_dumpstack(NULL); \
portals_debug_dumplog(); \
portals_run_lbug_upcall(file, func, line); \
* */
#define LUSTRE_PTL_PID 9
# else
-#define LUSTRE_PTL_PID 12345
+#define LUSTRE_PTL_PID 456 /* <= 1023 (TCP reserved port) */
# endif
#define ENTRY_NESTING_SUPPORT (0)
int src_niov, ptl_kiov_t *src,
ptl_size_t offset, ptl_size_t len);
+extern ptl_pid_t ptl_getpid(void);
+
extern ptl_err_t ptl_recv (ptl_ni_t *ni, void *private, ptl_msg_t *msg, ptl_libmd_t *md,
ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen);
extern ptl_err_t ptl_send (ptl_ni_t *ni, void *private, ptl_msg_t *msg,
int src_niov, ptl_kiov_t *src,
ptl_size_t offset, ptl_size_t len);
+extern ptl_pid_t ptl_getpid(void);
+
extern ptl_err_t ptl_recv (ptl_ni_t *ni, void *private, ptl_msg_t *msg, ptl_libmd_t *md,
ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen);
extern ptl_err_t ptl_send (ptl_ni_t *ni, void *private, ptl_msg_t *msg,
write_unlock_irqrestore (g_lock, flags);
if (retry) {
- CERROR("Can't find per %s\n", libcfs_nid2str(nid));
+ CERROR("Can't find peer %s\n", libcfs_nid2str(nid));
tx->tx_status = -EHOSTUNREACH;
kibnal_tx_done (tx);
return;
static inline int
kqswnal_nid2elanid (ptl_nid_t nid)
{
- int elanid = PTL_NIDADDR(nid);
+ __u32 elanid = PTL_NIDADDR(nid);
/* not in this cluster? */
return (elanid >= kqswnal_data.kqn_nnodes) ? -1 : elanid;
dest_nid = le64_to_cpu(hdr->dest_nid); /* final dest */
src_nid = le64_to_cpu(hdr->src_nid); /* original source */
- sender_nid = PTL_MKNID(PTL_NIDNET(kqswnal_data.kqn_ni->ni_nid),
- ep_rxd_node(krx->krx_rxd)); /* who sent it to me */
+ sender_nid = kqswnal_elanid2nid(ep_rxd_node(krx->krx_rxd)); /* who sent it to me */
#if KQSW_CHECKSUM
LASSERTF (0, "checksums for forwarded packets not implemented\n");
#endif
}
int
-ksocknal_create_peer (ksock_peer_t **peerp, ptl_ni_t *ni, ptl_nid_t nid)
+ksocknal_create_peer (ksock_peer_t **peerp, ptl_ni_t *ni, ptl_process_id_t id)
{
ksock_net_t *net = ni->ni_data;
ksock_peer_t *peer;
unsigned long flags;
- LASSERT (nid != PTL_NID_ANY);
+ LASSERT (id.nid != PTL_NID_ANY);
+ LASSERT (id.pid != PTL_PID_ANY);
LASSERT (!in_interrupt());
PORTAL_ALLOC (peer, sizeof (*peer));
memset (peer, 0, sizeof (*peer)); /* NULL pointers/clear flags etc */
peer->ksnp_ni = ni;
- peer->ksnp_nid = nid;
+ peer->ksnp_id = id;
atomic_set (&peer->ksnp_refcount, 1); /* 1 ref for caller */
peer->ksnp_closing = 0;
CFS_INIT_LIST_HEAD (&peer->ksnp_conns);
unsigned long flags;
CDEBUG (D_NET, "peer %s %p deleted\n",
- libcfs_nid2str(peer->ksnp_nid), peer);
+ libcfs_id2str(peer->ksnp_id), peer);
LASSERT (atomic_read (&peer->ksnp_refcount) == 0);
LASSERT (list_empty (&peer->ksnp_conns));
}
ksock_peer_t *
-ksocknal_find_peer_locked (ptl_ni_t *ni, ptl_nid_t nid)
+ksocknal_find_peer_locked (ptl_ni_t *ni, ptl_process_id_t id)
{
- struct list_head *peer_list = ksocknal_nid2peerlist (nid);
+ struct list_head *peer_list = ksocknal_nid2peerlist(id.nid);
struct list_head *tmp;
ksock_peer_t *peer;
if (peer->ksnp_ni != ni)
continue;
- if (peer->ksnp_nid != nid)
+ if (peer->ksnp_id.nid != id.nid ||
+ peer->ksnp_id.pid != id.pid)
continue;
CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
- peer, libcfs_nid2str(nid),
+ peer, libcfs_id2str(id),
atomic_read(&peer->ksnp_refcount));
return (peer);
}
}
ksock_peer_t *
-ksocknal_find_peer (ptl_ni_t *ni, ptl_nid_t nid)
+ksocknal_find_peer (ptl_ni_t *ni, ptl_process_id_t id)
{
ksock_peer_t *peer;
read_lock (&ksocknal_data.ksnd_global_lock);
- peer = ksocknal_find_peer_locked (ni, nid);
+ peer = ksocknal_find_peer_locked (ni, id);
if (peer != NULL) /* +1 ref for caller? */
ksocknal_peer_addref(peer);
read_unlock (&ksocknal_data.ksnd_global_lock);
int
ksocknal_get_peer_info (ptl_ni_t *ni, int index,
- ptl_nid_t *nid, __u32 *myip, __u32 *peer_ip, int *port,
+ ptl_process_id_t *id, __u32 *myip, __u32 *peer_ip, int *port,
int *conn_count, int *share_count)
{
ksock_peer_t *peer;
if (index-- > 0)
continue;
- *nid = peer->ksnp_nid;
+ *id = peer->ksnp_id;
*myip = 0;
*peer_ip = 0;
*port = 0;
if (index-- > 0)
continue;
- *nid = peer->ksnp_nid;
+ *id = peer->ksnp_id;
*myip = peer->ksnp_passive_ips[j];
*peer_ip = 0;
*port = 0;
route = list_entry(rtmp, ksock_route_t,
ksnr_list);
- *nid = peer->ksnp_nid;
+ *id = peer->ksnp_id;
*myip = route->ksnr_myipaddr;
*peer_ip = route->ksnr_ipaddr;
*port = route->ksnr_port;
if (route->ksnr_myipaddr == 0) {
/* route wasn't bound locally yet (the initial route) */
CDEBUG(D_NET, "Binding %s %u.%u.%u.%u to %u.%u.%u.%u\n",
- libcfs_nid2str(peer->ksnp_nid),
+ libcfs_id2str(peer->ksnp_id),
HIPQUAD(route->ksnr_ipaddr),
HIPQUAD(conn->ksnc_myipaddr));
} else {
CDEBUG(D_NET, "Rebinding %s %u.%u.%u.%u from "
"%u.%u.%u.%u to %u.%u.%u.%u\n",
- libcfs_nid2str(peer->ksnp_nid),
+ libcfs_id2str(peer->ksnp_id),
HIPQUAD(route->ksnr_ipaddr),
HIPQUAD(route->ksnr_myipaddr),
HIPQUAD(conn->ksnc_myipaddr));
if (route2->ksnr_ipaddr == route->ksnr_ipaddr) {
CERROR ("Duplicate route %s %u.%u.%u.%u\n",
- libcfs_nid2str(peer->ksnp_nid),
+ libcfs_id2str(peer->ksnp_id),
HIPQUAD(route->ksnr_ipaddr));
LBUG();
}
}
int
-ksocknal_add_peer (ptl_ni_t *ni, ptl_nid_t nid, __u32 ipaddr, int port)
+ksocknal_add_peer (ptl_ni_t *ni, ptl_process_id_t id, __u32 ipaddr, int port)
{
unsigned long flags;
struct list_head *tmp;
ksock_route_t *route2;
int rc;
- if (nid == PTL_NID_ANY)
+ if (id.nid == PTL_NID_ANY ||
+ id.pid == PTL_PID_ANY)
return (-EINVAL);
/* Have a brand new peer ready... */
- rc = ksocknal_create_peer(&peer, ni, nid);
+ rc = ksocknal_create_peer(&peer, ni, id);
if (rc != 0)
return rc;
write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
- peer2 = ksocknal_find_peer_locked (ni, nid);
+ peer2 = ksocknal_find_peer_locked (ni, id);
if (peer2 != NULL) {
ksocknal_peer_decref(peer);
peer = peer2;
} else {
/* peer table takes my ref on peer */
list_add_tail (&peer->ksnp_list,
- ksocknal_nid2peerlist (nid));
+ ksocknal_nid2peerlist (id.nid));
}
route2 = NULL;
}
int
-ksocknal_del_peer (ptl_ni_t *ni, ptl_nid_t nid, __u32 ip)
+ksocknal_del_peer (ptl_ni_t *ni, ptl_process_id_t id, __u32 ip)
{
unsigned long flags;
struct list_head *ptmp;
write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
- if (nid != PTL_NID_ANY)
- lo = hi = ksocknal_nid2peerlist(nid) - ksocknal_data.ksnd_peers;
+ if (id.nid != PTL_NID_ANY)
+ lo = hi = ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers;
else {
lo = 0;
hi = ksocknal_data.ksnd_peer_hash_size - 1;
if (peer->ksnp_ni != ni)
continue;
- if (!(nid == PTL_NID_ANY || peer->ksnp_nid == nid))
+ if (!((id.nid == PTL_NID_ANY || peer->ksnp_id.nid == id.nid) &&
+ (id.pid == PTL_PID_ANY || peer->ksnp_id.pid == id.pid)))
continue;
ksocknal_del_peer_locked (peer, ip);
ksock_net_t *net = (ksock_net_t *)ni->ni_data;
__u32 ipaddrs[PTL_MAX_INTERFACES];
int nipaddrs;
- ptl_nid_t nid;
+ ptl_process_id_t peerid;
struct list_head *tmp;
__u64 incarnation;
unsigned long flags;
* vector of interfaces she's willing to let me connect to.
* Passive connections use the listener timeout since the peer sends
* eagerly */
- nid = (route == NULL) ? PTL_NID_ANY : route->ksnr_peer->ksnp_nid;
- rc = ksocknal_recv_hello (ni, conn, &nid, &incarnation, ipaddrs);
+ if (route == NULL) {
+ peerid.nid = PTL_NID_ANY;
+ peerid.pid = PTL_PID_ANY;
+ } else {
+ peerid = route->ksnr_peer->ksnp_id;
+ }
+
+ rc = ksocknal_recv_hello (ni, conn, &peerid, &incarnation, ipaddrs);
if (rc < 0)
goto failed_1;
nipaddrs = rc;
- LASSERT (nid != PTL_NID_ANY);
+ LASSERT (peerid.nid != PTL_NID_ANY);
if (route != NULL) {
peer = route->ksnr_peer;
ipaddrs, nipaddrs);
rc = 0;
} else {
- rc = ksocknal_create_peer(&peer, ni, nid);
+ rc = ksocknal_create_peer(&peer, ni, peerid);
if (rc != 0)
goto failed_1;
write_lock_irqsave(global_lock, flags);
- peer2 = ksocknal_find_peer_locked(ni, nid);
+ peer2 = ksocknal_find_peer_locked(ni, peerid);
if (peer2 == NULL) {
/* NB this puts an "empty" peer in the peer
* table (which takes my ref) */
list_add_tail(&peer->ksnp_list,
- ksocknal_nid2peerlist(nid));
+ ksocknal_nid2peerlist(peerid.nid));
} else {
ksocknal_peer_decref(peer);
peer = peer2;
if (route != NULL &&
route->ksnr_ipaddr != conn->ksnc_ipaddr) {
CERROR("Route %s %u.%u.%u.%u connected to %u.%u.%u.%u\n",
- libcfs_nid2str(peer->ksnp_nid),
+ libcfs_id2str(peer->ksnp_id),
HIPQUAD(route->ksnr_ipaddr),
HIPQUAD(conn->ksnc_ipaddr));
}
rc = ksocknal_close_stale_conns_locked(peer, incarnation);
if (rc != 0)
- CERROR ("Closed %d stale conns to nid %s ip %d.%d.%d.%d\n",
- rc, libcfs_nid2str(conn->ksnc_peer->ksnp_nid),
+ CERROR ("Closed %d stale conns to %s ip %d.%d.%d.%d\n",
+ rc, libcfs_id2str(conn->ksnc_peer->ksnp_id),
HIPQUAD(conn->ksnc_ipaddr));
write_unlock_irqrestore (global_lock, flags);
CDEBUG(D_NET, "New conn %s %u.%u.%u.%u -> %u.%u.%u.%u/%d"
" incarnation:"LPD64" sched[%d]/%d\n",
- libcfs_nid2str(nid), HIPQUAD(conn->ksnc_myipaddr),
+ libcfs_id2str(peerid), HIPQUAD(conn->ksnc_myipaddr),
HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port, incarnation,
(int)(conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers), irq);
ksocknal_connsock_decref(conn);
if (notify)
- kpr_notify (peer->ksnp_ni, peer->ksnp_nid, 0, then);
+ kpr_notify (peer->ksnp_ni, peer->ksnp_id.nid, 0, then);
}
void
case SOCKNAL_RX_BODY:
CERROR("Completing partial receive from %s"
", ip %d.%d.%d.%d:%d, with error\n",
- libcfs_nid2str(conn->ksnc_peer->ksnp_nid),
+ libcfs_id2str(conn->ksnc_peer->ksnp_id),
HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
ptl_finalize (conn->ksnc_peer->ksnp_ni, NULL,
conn->ksnc_cookie, PTL_FAIL);
CDEBUG(D_NET, "Closing stale conn %s ip:%08x/%d "
"incarnation:"LPD64"("LPD64")\n",
- libcfs_nid2str(peer->ksnp_nid),
+ libcfs_id2str(peer->ksnp_id),
conn->ksnc_ipaddr, conn->ksnc_port,
conn->ksnc_incarnation, incarnation);
}
int
-ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr)
+ksocknal_close_matching_conns (ptl_process_id_t id, __u32 ipaddr)
{
unsigned long flags;
ksock_peer_t *peer;
write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
- if (nid != PTL_NID_ANY)
- lo = hi = ksocknal_nid2peerlist(nid) - ksocknal_data.ksnd_peers;
+ if (id.nid != PTL_NID_ANY)
+ lo = hi = ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers;
else {
lo = 0;
hi = ksocknal_data.ksnd_peer_hash_size - 1;
peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
- if (!(nid == PTL_NID_ANY || nid == peer->ksnp_nid))
+ if (!((id.nid == PTL_NID_ANY || id.nid == peer->ksnp_id.nid) &&
+ (id.pid == PTL_PID_ANY || id.pid == peer->ksnp_id.pid)))
continue;
count += ksocknal_close_peer_conns_locked (peer, ipaddr, 0);
write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
/* wildcards always succeed */
- if (nid == PTL_NID_ANY || ipaddr == 0)
+ if (id.nid == PTL_NID_ANY || id.pid == PTL_PID_ANY || ipaddr == 0)
return (0);
return (count == 0 ? -ENOENT : 0);
{
/* The router is telling me she's been notified of a change in
* gateway state.... */
+ ptl_process_id_t id = {.nid = gw_nid, .pid = PTL_PID_ANY};
CDEBUG (D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid),
alive ? "up" : "down");
if (!alive) {
/* If the gateway crashed, close all open connections... */
- ksocknal_close_matching_conns (gw_nid, 0);
+ ksocknal_close_matching_conns (id, 0);
return;
}
}
int
-ksocknal_push (ptl_ni_t *ni, ptl_nid_t nid)
+ksocknal_push (ptl_ni_t *ni, ptl_process_id_t id)
{
ksock_peer_t *peer;
struct list_head *tmp;
int j;
int rc = -ENOENT;
- if (nid != PTL_NID_ANY) {
- peer = ksocknal_find_peer (ni, nid);
-
- if (peer != NULL) {
- rc = 0;
- ksocknal_push_peer (peer);
- ksocknal_peer_decref(peer);
- }
- return (rc);
- }
-
for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
for (j = 0; ; j++) {
read_lock (&ksocknal_data.ksnd_global_lock);
peer = NULL;
list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) {
+ peer = list_entry(tmp, ksock_peer_t,
+ ksnp_list);
+
+ if (!((id.nid == PTL_NID_ANY ||
+ id.nid == peer->ksnp_id.nid) &&
+ (id.pid == PTL_PID_ANY ||
+ id.pid == peer->ksnp_id.pid))) {
+ peer = NULL;
+ continue;
+ }
+
if (index++ == j) {
- peer = list_entry(tmp, ksock_peer_t,
- ksnp_list);
ksocknal_peer_addref(peer);
break;
}
data->ioc_u32[0]); /* IP address */
case IOC_PORTAL_GET_PEER: {
- ptl_nid_t nid = 0;
- __u32 myip = 0;
- __u32 ip = 0;
- int port = 0;
- int conn_count = 0;
- int share_count = 0;
+ ptl_process_id_t id = {0,};
+ __u32 myip = 0;
+ __u32 ip = 0;
+ int port = 0;
+ int conn_count = 0;
+ int share_count = 0;
rc = ksocknal_get_peer_info(ni, data->ioc_count,
- &nid, &myip, &ip, &port,
+ &id, &myip, &ip, &port,
&conn_count, &share_count);
if (rc != 0)
return rc;
- data->ioc_nid = nid;
+ data->ioc_nid = id.nid;
data->ioc_count = share_count;
data->ioc_u32[0] = ip;
data->ioc_u32[1] = port;
data->ioc_u32[2] = myip;
data->ioc_u32[3] = conn_count;
+ data->ioc_u32[4] = id.pid;
return 0;
}
- case IOC_PORTAL_ADD_PEER:
- return ksocknal_add_peer (ni,
- data->ioc_nid,
+ case IOC_PORTAL_ADD_PEER: {
+ ptl_process_id_t id = {.nid = data->ioc_nid,
+ .pid = LUSTRE_SRV_PTL_PID};
+ return ksocknal_add_peer (ni, id,
data->ioc_u32[0], /* IP */
data->ioc_u32[1]); /* port */
-
- case IOC_PORTAL_DEL_PEER:
- return ksocknal_del_peer (ni,
- data->ioc_nid,
+ }
+ case IOC_PORTAL_DEL_PEER: {
+ ptl_process_id_t id = {.nid = data->ioc_nid,
+ .pid = PTL_PID_ANY};
+ return ksocknal_del_peer (ni, id,
data->ioc_u32[0]); /* IP */
-
+ }
case IOC_PORTAL_GET_CONN: {
int txmem;
int rxmem;
ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle);
data->ioc_count = txmem;
- data->ioc_nid = conn->ksnc_peer->ksnp_nid;
+ data->ioc_nid = conn->ksnc_peer->ksnp_id.nid;
data->ioc_flags = nagle;
data->ioc_u32[0] = conn->ksnc_ipaddr;
data->ioc_u32[1] = conn->ksnc_port;
data->ioc_u32[4] = conn->ksnc_scheduler -
ksocknal_data.ksnd_schedulers;
data->ioc_u32[5] = rxmem;
+ data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid;
ksocknal_conn_decref(conn);
return 0;
}
- case IOC_PORTAL_CLOSE_CONNECTION:
- return ksocknal_close_matching_conns (data->ioc_nid,
- data->ioc_u32[0]);
+ case IOC_PORTAL_CLOSE_CONNECTION: {
+ ptl_process_id_t id = {.nid = data->ioc_nid,
+ .pid = PTL_PID_ANY};
+ return ksocknal_close_matching_conns (id,
+ data->ioc_u32[0]);
+ }
case IOC_PORTAL_REGISTER_MYNID:
/* Ignore if this is a noop */
if (data->ioc_nid == ni->ni_nid)
libcfs_nid2str(ni->ni_nid));
return -EINVAL;
- case IOC_PORTAL_PUSH_CONNECTION:
- return ksocknal_push (ni, data->ioc_nid);
-
+ case IOC_PORTAL_PUSH_CONNECTION: {
+ ptl_process_id_t id = {.nid = data->ioc_nid,
+ .pid = PTL_PID_ANY};
+
+ return ksocknal_push(ni, id);
+ }
default:
return -EINVAL;
}
void
ksocknal_shutdown (ptl_ni_t *ni)
{
- ksock_net_t *net = ni->ni_data;
- int i;
- unsigned long flags;
+ ksock_net_t *net = ni->ni_data;
+ int i;
+ unsigned long flags;
+ ptl_process_id_t anyid = {.nid = PTL_NID_ANY,
+ .pid = PTL_PID_ANY};
LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL);
LASSERT(ksocknal_data.ksnd_nnets > 0);
spin_unlock_irqrestore(&net->ksnn_lock, flags);
/* Delete all peers */
- ksocknal_del_peer(ni, PTL_NID_ANY, 0);
+ ksocknal_del_peer(ni, anyid, 0);
/* Wait for all peer state to clean up */
i = 2;
CLASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t));
/* check ksnr_connected/connecting field large enough */
CLASSERT(SOCKNAL_CONN_NTYPES <= 4);
-
+ /* kernel PID should be in the "secure" TCP port range */
+ CLASSERT(LUSTRE_SRV_PTL_PID <= PTL_ACCEPTOR_MAX_RESERVED_PORT);
+
rc = ksocknal_lib_tunables_init();
if (rc != 0)
return rc;
typedef struct ksock_peer
{
struct list_head ksnp_list; /* stash on global peer list */
- ptl_nid_t ksnp_nid; /* who's on the other end(s) */
+ ptl_process_id_t ksnp_id; /* who's on the other end(s) */
atomic_t ksnp_refcount; /* # users */
int ksnp_sharecount; /* lconf usage counter */
int ksnp_closing; /* being closed */
size_t mlen, size_t rlen);
ptl_err_t ksocknal_accept(ptl_ni_t *ni, struct socket *sock);
-extern int ksocknal_add_peer(ptl_ni_t *ni, ptl_nid_t nid, __u32 ip, int port);
-extern ksock_peer_t *ksocknal_find_peer_locked (ptl_ni_t *ni, ptl_nid_t nid);
-extern ksock_peer_t *ksocknal_find_peer (ptl_ni_t *ni, ptl_nid_t nid);
+extern int ksocknal_add_peer(ptl_ni_t *ni, ptl_process_id_t id, __u32 ip, int port);
+extern ksock_peer_t *ksocknal_find_peer_locked (ptl_ni_t *ni, ptl_process_id_t id);
+extern ksock_peer_t *ksocknal_find_peer (ptl_ni_t *ni, ptl_process_id_t id);
extern int ksocknal_create_conn (ptl_ni_t *ni, ksock_route_t *route,
struct socket *sock, int type);
extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why);
extern void ksocknal_destroy_conn (ksock_conn_t *conn);
extern int ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation);
extern int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why);
-extern int ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr);
+extern int ksocknal_close_matching_conns (ptl_process_id_t id, __u32 ipaddr);
extern void ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn);
extern void ksocknal_tx_done (ksock_peer_t *peer, ksock_tx_t *tx, int asynch);
extern int ksocknal_send_hello (ptl_ni_t *ni, ksock_conn_t *conn,
__u32 *ipaddrs, int nipaddrs);
extern int ksocknal_recv_hello (ptl_ni_t *ni, ksock_conn_t *conn,
- ptl_nid_t *nid, __u64 *incarnation,
- __u32 *ipaddrs);
+ ptl_process_id_t *id,
+ __u64 *incarnation, __u32 *ipaddrs);
extern void ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn);
extern void ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn);
}
CERROR("[%p] Error %d on write to %s"
" ip %d.%d.%d.%d:%d\n", conn, rc,
- libcfs_nid2str(conn->ksnc_peer->ksnp_nid),
+ libcfs_id2str(conn->ksnc_peer->ksnp_id),
HIPQUAD(conn->ksnc_ipaddr),
conn->ksnc_port);
}
LASSERT(tx->tx_resid == tx->tx_nob);
CDEBUG (D_NET, "Sending to %s ip %d.%d.%d.%d:%d\n",
- libcfs_nid2str(conn->ksnc_peer->ksnp_nid),
+ libcfs_id2str(conn->ksnc_peer->ksnp_id),
HIPQUAD(conn->ksnc_ipaddr),
conn->ksnc_port);
}
int
-ksocknal_launch_packet (ptl_ni_t *ni, ksock_tx_t *tx, ptl_nid_t nid)
+ksocknal_launch_packet (ptl_ni_t *ni, ksock_tx_t *tx, ptl_process_id_t id)
{
unsigned long flags;
ksock_peer_t *peer;
for (retry = 0;; retry = 1) {
#if !SOCKNAL_ROUND_ROBIN
read_lock (g_lock);
- peer = ksocknal_find_peer_locked(ni, nid);
+ peer = ksocknal_find_peer_locked(ni, id);
if (peer != NULL) {
if (ksocknal_find_connectable_route_locked(peer) == NULL) {
conn = ksocknal_find_conn_locked (tx, peer);
#endif
write_lock_irqsave(g_lock, flags);
- peer = ksocknal_find_peer_locked(ni, nid);
+ peer = ksocknal_find_peer_locked(ni, id);
if (peer != NULL)
break;
write_unlock_irqrestore(g_lock, flags);
+ if (id.pid > PTL_ACCEPTOR_MAX_RESERVED_PORT) {
+ CERROR("Refusing to create a connection to "
+ "userspace process %s\n", libcfs_id2str(id));
+ return -EHOSTUNREACH;
+ }
+
if (retry) {
- CERROR("Can't find peer %s\n", libcfs_nid2str(nid));
+ CERROR("Can't find peer %s\n", libcfs_id2str(id));
return -EHOSTUNREACH;
}
- rc = ksocknal_add_peer(ni, nid,
- PTL_NIDADDR(nid), ptl_acceptor_port());
+ rc = ksocknal_add_peer(ni, id,
+ PTL_NIDADDR(id.nid),
+ ptl_acceptor_port());
if (rc != 0) {
CERROR("Can't add peer %s: %d\n",
- libcfs_nid2str(nid), rc);
+ libcfs_id2str(id), rc);
return rc;
}
}
write_unlock_irqrestore (g_lock, flags);
- CERROR("Peer entry with no routes: %s\n", libcfs_nid2str(nid));
+ CERROR("Peer entry with no routes: %s\n", libcfs_id2str(id));
return (-EHOSTUNREACH);
}
payload_offset, payload_nob);
}
- rc = ksocknal_launch_packet(ni, <x->ltx_tx, target.nid);
+ rc = ksocknal_launch_packet(ni, <x->ltx_tx, target);
if (rc == 0)
return (PTL_OK);
void
ksocknal_fwd_packet (ptl_ni_t *ni, kpr_fwd_desc_t *fwd)
{
- ptl_nid_t nid = fwd->kprfd_gateway_nid;
+ ptl_process_id_t id = {.nid = fwd->kprfd_gateway_nid,
+ .pid = LUSTRE_SRV_PTL_PID};
+ /* CAVEAT EMPTOR:
+ * LUSTRE_SRV_PTL_PID assumes my target is another socknal instance and
+ * not a tcpnal (userspace/liblustre) instance. These can't route in
+ * any case until we sort out how to make the RPC replies use the same
+ * connections as RPC requests. */
ksock_ftx_t *ftx = (ksock_ftx_t *)&fwd->kprfd_scratch;
int rc;
CDEBUG (D_NET, "Forwarding [%p] -> %s (%s))\n", fwd,
- libcfs_nid2str(fwd->kprfd_gateway_nid),
- libcfs_nid2str(fwd->kprfd_target_nid));
+ libcfs_id2str(id), libcfs_nid2str(fwd->kprfd_target_nid));
/* setup iov for hdr */
ftx->ftx_iov.iov_base = fwd->kprfd_hdr;
ftx->ftx_tx.tx_nkiov = fwd->kprfd_niov;
ftx->ftx_tx.tx_kiov = fwd->kprfd_kiov;
- rc = ksocknal_launch_packet (ni, &ftx->ftx_tx, nid);
+ rc = ksocknal_launch_packet (ni, &ftx->ftx_tx, id);
if (rc != 0)
kpr_fwd_done (ni, fwd, rc);
}
int payload_nob = conn->ksnc_rx_nob_left;
ptl_nid_t src_nid = le64_to_cpu(conn->ksnc_hdr.src_nid);
ptl_nid_t dest_nid = le64_to_cpu(conn->ksnc_hdr.dest_nid);
- ptl_nid_t sender_nid = conn->ksnc_peer->ksnp_nid;
+ ptl_nid_t sender_nid = conn->ksnc_peer->ksnp_id.nid;
int niov = 0;
int nob = payload_nob;
if (rc == 0)
CDEBUG (D_NET, "[%p] EOF from %s ip %d.%d.%d.%d:%d\n",
conn,
- libcfs_nid2str(conn->ksnc_peer->ksnp_nid),
+ libcfs_id2str(conn->ksnc_peer->ksnp_id),
HIPQUAD(conn->ksnc_ipaddr),
conn->ksnc_port);
else if (!conn->ksnc_closing)
CERROR ("[%p] Error %d on read from %s"
" ip %d.%d.%d.%d:%d\n",
conn, rc,
- libcfs_nid2str(conn->ksnc_peer->ksnp_nid),
+ libcfs_id2str(conn->ksnc_peer->ksnp_id),
HIPQUAD(conn->ksnc_ipaddr),
conn->ksnc_port);
switch (conn->ksnc_rx_state) {
case SOCKNAL_RX_HEADER:
+ if (conn->ksnc_port > PTL_ACCEPTOR_MAX_RESERVED_PORT) {
+ /* Userspace NAL */
+ ptl_process_id_t *id = &conn->ksnc_peer->ksnp_id;
+
+ /* Substitute process ID assigned at connection time */
+ conn->ksnc_hdr.src_pid = cpu_to_le32(id->pid);
+ conn->ksnc_hdr.src_nid = cpu_to_le64(id->nid);
+ }
rc = ptl_parse(conn->ksnc_peer->ksnp_ni, &conn->ksnc_hdr, conn);
switch (rc) {
hmv->version_minor = cpu_to_le16 (PTL_PROTO_TCP_VERSION_MINOR);
hdr.src_nid = cpu_to_le64 (ni->ni_nid);
+ hdr.src_pid = cpu_to_le64 (ptl_getpid());
hdr.type = cpu_to_le32 (PTL_MSG_HELLO);
hdr.payload_length = cpu_to_le32 (nipaddrs * sizeof(*ipaddrs));
int
ksocknal_recv_hello (ptl_ni_t *ni, ksock_conn_t *conn,
- ptl_nid_t *nid, __u64 *incarnation, __u32 *ipaddrs)
+ ptl_process_id_t *peerid,
+ __u64 *incarnation, __u32 *ipaddrs)
{
struct socket *sock = conn->ksnc_sock;
int active;
int i;
int type;
ptl_hdr_t hdr;
+ ptl_process_id_t recv_id;
ptl_magicversion_t *hmv;
- active = (*nid != PTL_NID_ANY);
+ active = (peerid->nid != PTL_NID_ANY);
timeout = active ? *ksocknal_tunables.ksnd_timeout :
ptl_acceptor_timeout();
return (-EPROTO);
}
+ if (conn->ksnc_port > PTL_ACCEPTOR_MAX_RESERVED_PORT) {
+ /* Userspace NAL assigns peer process ID from socket */
+ recv_id.pid = conn->ksnc_port;
+ recv_id.nid = PTL_MKNID(PTL_NIDNET(ni->ni_nid), conn->ksnc_ipaddr);
+ } else {
+ recv_id.pid = le32_to_cpu(hdr.src_pid);
+ recv_id.nid = le64_to_cpu (hdr.src_nid);
+ }
+
if (!active) { /* don't know peer's nid yet */
- *nid = le64_to_cpu(hdr.src_nid);
- } else if (*nid != le64_to_cpu (hdr.src_nid)) {
- LCONSOLE_ERROR("Connected successfully to nid %s on host "
+ *peerid = recv_id;
+ } else if (peerid->pid != recv_id.pid ||
+ peerid->pid != recv_id.nid) {
+ LCONSOLE_ERROR("Connected successfully to %s on host "
"%u.%u.%u.%u, but they claimed they were "
- "nid %s; please check your Lustre "
+ "%s; please check your Lustre "
"configuration.\n",
- libcfs_nid2str(*nid), HIPQUAD(conn->ksnc_ipaddr),
- libcfs_nid2str(le64_to_cpu(hdr.src_nid)));
+ libcfs_id2str(*peerid),
+ HIPQUAD(conn->ksnc_ipaddr),
+ libcfs_id2str(recv_id));
- CERROR ("Connected to nid %s ip %u.%u.%u.%u "
+ CERROR ("Connected to %s ip %u.%u.%u.%u "
"but expecting %s\n",
- libcfs_nid2str(le64_to_cpu (hdr.src_nid)),
+ libcfs_id2str(recv_id),
HIPQUAD(conn->ksnc_ipaddr),
- libcfs_nid2str(*nid));
+ libcfs_id2str(*peerid));
return (-EPROTO);
}
conn->ksnc_type = ksocknal_invert_type(type);
if (conn->ksnc_type == SOCKNAL_CONN_NONE) {
CERROR ("Unexpected type %d from %s ip %u.%u.%u.%u\n",
- type, libcfs_nid2str(*nid),
+ type, libcfs_id2str(*peerid),
HIPQUAD(conn->ksnc_ipaddr));
return (-EPROTO);
}
} else if (ksocknal_invert_type(type) != conn->ksnc_type) {
CERROR ("Mismatched types: me %d, %s ip %u.%u.%u.%u %d\n",
- conn->ksnc_type, libcfs_nid2str(*nid),
+ conn->ksnc_type, libcfs_id2str(*peerid),
HIPQUAD(conn->ksnc_ipaddr),
le32_to_cpu(hdr.msg.hello.type));
return (-EPROTO);
nips * sizeof(__u32) != __le32_to_cpu (hdr.payload_length)) {
CERROR("Bad payload length %d from %s ip %u.%u.%u.%u\n",
__le32_to_cpu (hdr.payload_length),
- libcfs_nid2str(*nid), HIPQUAD(conn->ksnc_ipaddr));
+ libcfs_id2str(*peerid), HIPQUAD(conn->ksnc_ipaddr));
}
if (nips == 0)
rc = libcfs_sock_read(sock, ipaddrs, nips * sizeof(*ipaddrs), timeout);
if (rc != 0) {
CERROR ("Error %d reading IPs from %s ip %u.%u.%u.%u\n",
- rc, libcfs_nid2str(*nid), HIPQUAD(conn->ksnc_ipaddr));
+ rc, libcfs_id2str(*peerid), HIPQUAD(conn->ksnc_ipaddr));
return (rc);
}
if (ipaddrs[i] == 0) {
CERROR("Zero IP[%d] from %s ip %u.%u.%u.%u\n",
- i, libcfs_nid2str(*nid), HIPQUAD(conn->ksnc_ipaddr));
+ i, libcfs_id2str(*peerid),
+ HIPQUAD(conn->ksnc_ipaddr));
return (-EPROTO);
}
}
write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags);
- rc = ptl_connect(&sock, peer->ksnp_nid,
+ rc = ptl_connect(&sock, peer->ksnp_id.nid,
route->ksnr_myipaddr,
route->ksnr_ipaddr, route->ksnr_port);
if (rc != PTL_OK)
rc = ksocknal_create_conn(peer->ksnp_ni, route, sock, type);
if (rc != 0) {
- ptl_connect_console_error(rc, peer->ksnp_nid,
+ ptl_connect_console_error(rc, peer->ksnp_id.nid,
route->ksnr_ipaddr,
route->ksnr_port);
goto failed;
/* Something (e.g. failed keepalive) set the socket error */
CERROR ("Socket error %d: %s %p %d.%d.%d.%d\n",
SOCK_ERROR(conn->ksnc_sock),
- libcfs_nid2str(peer->ksnp_nid),
+ libcfs_id2str(peer->ksnp_id),
conn, HIPQUAD(conn->ksnc_ipaddr));
return (conn);
"may be down.\n",
HIPQUAD(conn->ksnc_ipaddr));
CERROR ("Timed out RX from %s %p %d.%d.%d.%d\n",
- libcfs_nid2str(peer->ksnp_nid),
+ libcfs_id2str(peer->ksnp_id),
conn, HIPQUAD(conn->ksnc_ipaddr));
return (conn);
}
"may be down.\n",
HIPQUAD(conn->ksnc_ipaddr));
CERROR ("Timed out TX to %s %s%d %p %d.%d.%d.%d\n",
- libcfs_nid2str(peer->ksnp_nid),
+ libcfs_id2str(peer->ksnp_id),
list_empty (&conn->ksnc_tx_queue) ? "" : "Q ",
SOCK_WMEM_QUEUED(conn->ksnc_sock), conn,
HIPQUAD(conn->ksnc_ipaddr));
read_unlock (&ksocknal_data.ksnd_global_lock);
CERROR ("Timeout out conn->%s ip %d.%d.%d.%d:%d\n",
- libcfs_nid2str(peer->ksnp_nid),
+ libcfs_id2str(peer->ksnp_id),
HIPQUAD(conn->ksnc_ipaddr),
conn->ksnc_port);
ksocknal_close_conn_and_siblings (conn, -ETIMEDOUT);
if (rc != 0) {
CFS_NET_EX;
if (rc != EADDRNOTAVAIL && rc != EADDRINUSE)
- CERROR ("Can't connect to nid "LPX64
+ CERROR ("Can't connect to %s"
" local IP: %u.%u.%u.%u,"
" remote IP: %u.%u.%u.%u/%d: %d\n",
- route->ksnr_peer->ksnp_nid,
+ libcfs_id2str(route->ksnr_peer->ksnp_id,
HIPQUAD(route->ksnr_myipaddr),
HIPQUAD(route->ksnr_ipaddr),
route->ksnr_port, rc);
rc = so->so_error;
if (rc != 0) {
- CERROR ("Error %d waiting for connection to nid "LPX64
+ CERROR ("Error %d waiting for connection to %s"
" local IP: %u.%u.%u.%u,"
" remote IP: %u.%u.%u.%u/%d: %d\n", rc,
- route->ksnr_peer->ksnp_nid,
+ libcfs_id2str(route->ksnr_peer->ksnp_id),
HIPQUAD(route->ksnr_myipaddr),
HIPQUAD(route->ksnr_ipaddr),
route->ksnr_port, rc);
extern char debug_file_path[1024];
extern unsigned int portal_subsystem_debug;
extern unsigned int portal_printk;
+extern unsigned int portals_catastrophe;
extern atomic_t portal_kmemory;
extern long max_debug_mb;
unsigned int portal_stack;
EXPORT_SYMBOL(portal_stack);
+unsigned int portals_catastrophe;
+EXPORT_SYMBOL(portals_catastrophe);
+
#ifdef __KERNEL__
atomic_t portal_kmemory = ATOMIC_INIT(0);
EXPORT_SYMBOL(portal_kmemory);
PSDEV_DEBUG_DUMP_PATH, /* crashdump tracelog location */
PSDEV_PORTALS_UPCALL, /* User mode upcall script */
PSDEV_PORTALS_MEMUSED, /* bytes currently PORTAL_ALLOCated */
+ PSDEV_PORTALS_CATASTROPHE,/* if we have LBUGged or panic'd */
};
static struct ctl_table portals_table[] = {
&sysctl_string},
{PSDEV_PORTALS_MEMUSED, "memused", (int *)&portal_kmemory.counter,
sizeof(int), 0644, NULL, &proc_dointvec},
+ {PSDEV_PORTALS_CATASTROPHE, "catastrophe", &portals_catastrophe,
+ sizeof(int), 0444, NULL, &proc_dointvec},
{0}
};
* between getting its string and using it.
*/
-static char libcfs_nidstrings[128][PTL_NALFMT_SIZE];
+#define PTL_NIDSTR_COUNT 128 /* # of nidstrings */
+#define PTL_NIDSTR_SIZE 32 /* size of each one (see below for usage) */
+
+static char libcfs_nidstrings[PTL_NIDSTR_COUNT][PTL_NIDSTR_SIZE];
static int libcfs_nidstring_idx = 0;
#ifdef __KERNEL__
__u32 netip = htonl(addr);
struct hostent *he = gethostbyaddr(&netip, sizeof(netip), AF_INET);
- if (he != NULL &&
- strlen(he->h_name) < PTL_NALFMT_SIZE) {
- strcpy(str, he->h_name);
+ if (he != NULL) {
+ snprintf(str, PTL_NIDSTR_SIZE, "%s", he->h_name);
return;
}
#endif
- snprintf(str, PTL_NALFMT_SIZE, "%u.%u.%u.%u",
+ snprintf(str, PTL_NIDSTR_SIZE, "%u.%u.%u.%u",
(addr >> 24) & 0xff, (addr >> 16) & 0xff,
(addr >> 8) & 0xff, addr & 0xff);
}
void
libcfs_num_addr2str(__u32 addr, char *str)
{
- snprintf(str, PTL_NALFMT_SIZE, "%u", addr);
+ snprintf(str, PTL_NIDSTR_SIZE, "%u", addr);
}
int
return nf->nf_name;
str = libcfs_next_nidstring();
- snprintf(str, PTL_NALFMT_SIZE, "?%u?", nal);
+ snprintf(str, PTL_NIDSTR_SIZE, "?%u?", nal);
return str;
}
char *str = libcfs_next_nidstring();
if (nf == NULL)
- snprintf(str, PTL_NALFMT_SIZE, "<%u:%u>", nal, num);
+ snprintf(str, PTL_NIDSTR_SIZE, "<%u:%u>", nal, num);
else if (num == 0)
- snprintf(str, PTL_NALFMT_SIZE, "%s", nf->nf_name);
+ snprintf(str, PTL_NIDSTR_SIZE, "%s", nf->nf_name);
else
- snprintf(str, PTL_NALFMT_SIZE, "%s%u", nf->nf_name, num);
+ snprintf(str, PTL_NIDSTR_SIZE, "%s%u", nf->nf_name, num);
return str;
}
str = libcfs_next_nidstring();
if (nf == NULL)
- snprintf(str, PTL_NALFMT_SIZE, "%x@<%u:%u>", addr, nal, nnum);
+ snprintf(str, PTL_NIDSTR_SIZE, "%x@<%u:%u>", addr, nal, nnum);
else {
nf->nf_addr2str(addr, str);
nob = strlen(str);
if (nnum == 0)
- snprintf(str + nob, PTL_NALFMT_SIZE - nob, "@%s",
+ snprintf(str + nob, PTL_NIDSTR_SIZE - nob, "@%s",
nf->nf_name);
else
- snprintf(str + nob, PTL_NALFMT_SIZE - nob, "@%s%u",
+ snprintf(str + nob, PTL_NIDSTR_SIZE - nob, "@%s%u",
nf->nf_name, nnum);
}
{
char *str = libcfs_next_nidstring();
- snprintf(str, PTL_NALFMT_SIZE, "%llx", (unsigned long long)nid);
+ snprintf(str, PTL_NIDSTR_SIZE, "%llx", (unsigned long long)nid);
}
__u32
char *str = libcfs_nid2str(id.nid);
int len = strlen(str);
- snprintf(str + len, PTL_NALFMT_SIZE - len, "-%u", id.pid);
+ snprintf(str + len, PTL_NIDSTR_SIZE - len, "-%u", id.pid);
return str;
}
#define DEBUG_SUBSYSTEM S_PORTALS
#include <portals/lib-p30.h>
-#define MIN_RESERVED_PORT 512
-#define MAX_RESERVED_PORT 1023
-
#ifdef __KERNEL__
static int acceptor_port = 988;
CFS_MODULE_PARM(acceptor_port, "i", int, 0444,
CLASSERT (sizeof(cr) <= 16); /* not too big to be on the stack */
- for (port = MAX_RESERVED_PORT; port >= MIN_RESERVED_PORT; --port) {
+ for (port = PTL_ACCEPTOR_MAX_RESERVED_PORT;
+ port >= PTL_ACCEPTOR_MIN_RESERVED_PORT;
+ --port) {
/* Iterate through reserved ports. */
rc = libcfs_sock_connect(&sock, &fatal,
}
if (accept_secure_only &&
- peer_port > MAX_RESERVED_PORT) {
+ peer_port > PTL_ACCEPTOR_MAX_RESERVED_PORT) {
CERROR("Refusing connection from %u.%u.%u.%u: "
- "insecure port %d\n", HIPQUAD(peer_ip), peer_port);
+ "insecure port %d\n",
+ HIPQUAD(peer_ip), peer_port);
goto failed;
}
LASSERT (ptl_apini.apini_refcount > 0);
/* pretty useless; just return the NID of the first local interface,
- * that isn't LONAL; it has the same NID on all nodes */
+ * that isn't LONAL (it has the same NID on all nodes) */
PTL_LOCK(flags);
PTL_UNLOCK(flags);
}
+
+ptl_pid_t ptl_getpid(void)
+{
+ return ptl_apini.apini_pid;
+}
EXPORT_SYMBOL(ptl_parse);
EXPORT_SYMBOL(ptl_create_reply_msg);
EXPORT_SYMBOL(ptl_net2ni);
+EXPORT_SYMBOL(ptl_getpid);
MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
MODULE_DESCRIPTION("Portals v3.1");
kpr_do_upcall (void *arg)
{
kpr_upcall_t *u = (kpr_upcall_t *)arg;
- char nidstr[PTL_NALFMT_SIZE];
+ char nidstr[36];
char whenstr[36];
char *argv[] = {
NULL,
whenstr,
NULL};
- strcpy(nidstr, libcfs_nid2str(u->kpru_nid));
+ snprintf (nidstr, sizeof(nidstr), "%s", libcfs_nid2str(u->kpru_nid));
snprintf (whenstr, sizeof(whenstr), "%ld", u->kpru_when);
portals_run_upcall (argv);
}
}
- CERROR("Nid %s is not on a local network\n",
+ CERROR("Nid %s is not on a local network and "
+ "userspace portals does not support routing\n",
libcfs_nid2str(target_nid));
return PTL_NID_ANY;
endif
endif
-noinst_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h ipmap.h bridge.h procbridge.h
-libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h dispatch.h table.h timer.h address.c procapi.c proclib.c connection.c tcpnal.c connection.h
+noinst_HEADERS = pqtimer.h dispatch.h table.h timer.h \
+ connection.h bridge.h procbridge.h
+libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h \
+ dispatch.h table.h timer.h procapi.c proclib.c \
+ connection.c tcpnal.c connection.h
libtcpnal_a_CPPFLAGS = $(LLCPPFLAGS)
libtcpnal_a_CFLAGS = $(LLCFLAGS)
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (c) 2002 Cray Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/* address.c:
- * this file provides functions to aquire the IP address of the node
- * and translate them into a NID/PID pair which supports a static
- * mapping of virtual nodes into the port range of an IP socket.
-*/
-
-#define DEBUG_SUBSYSTEM S_NAL
-
-#include <stdlib.h>
-#include <netdb.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <portals/p30.h>
-#include <bridge.h>
-#include <ipmap.h>
-
-
-/* Function: get_node_id
- * Returns: a 32 bit id for this node, actually a big-endian IP address
- *
- * get_node_id() determines the host name and uses the resolver to
- * find out its ip address. This is fairly fragile and inflexible, but
- * explicitly asking about interfaces and their addresses is very
- * complicated and nonportable.
- */
-static unsigned int get_node_id(void)
-{
- char buffer[255];
- unsigned int x;
- struct hostent *he;
- char * host_envp;
-
- if (!(host_envp = getenv("PTL_HOSTID")))
- {
- gethostname(buffer,sizeof(buffer));
- he=gethostbyname(buffer);
- if (he)
- x=*(unsigned int *)he->h_addr_list[0];
- else
- x = 0;
- return(ntohl(x));
- }
- else
- {
- if (host_envp[1] != 'x')
- {
- int a, b, c, d;
- sscanf(host_envp, "%d.%d.%d.%d", &a, &b, &c, &d);
- return ((a<<24) | (b<<16) | (c<<8) | d);
- }
- else
- {
- long long hostid = strtoll(host_envp, 0, 0);
- return((unsigned int) hostid);
- }
- }
-}
-
-
-/* Function: set_address
- * Arugments: t: a procnal structure to populate with the request
- *
- * set_address performs the bit manipulations to set the nid, pid, and
- * iptop8 fields of the procnal structures.
- *
- * TODO: fix pidrequest to try to do dynamic binding if PTL_ID_ANY
- */
-
-#ifdef DIRECT_IP_MODE
-void set_address(bridge t,ptl_pid_t pidrequest)
-{
- int port;
- if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0;
- else port=pidrequest;
- t->b_ni->ni_nid=get_node_id();
- ptl_apini.apini_pid=port;
-}
-#else
-
-void set_address(bridge t,ptl_pid_t pidrequest)
-{
- int virtnode, in_addr, port;
- ptl_pid_t pid;
-
- /* get and remember my node id*/
- if (!getenv("PTL_VIRTNODE"))
- virtnode = 0;
- else
- {
- int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT
- >> PNAL_VNODE_SHIFT);
- virtnode = atoi(getenv("PTL_VIRTNODE"));
- if (virtnode > maxvnode)
- {
- fprintf(stderr, "PTL_VIRTNODE of %d is too large - max %d\n",
- virtnode, maxvnode);
- return;
- }
- }
-
- in_addr = get_node_id();
-
- t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */
- t->b_ni->ni_nid = ((in_addr & PNAL_HOSTID_MASK)
- << PNAL_VNODE_SHIFT)
- + virtnode;
- pid=pidrequest;
- /* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */
-#ifdef notyet
- if (pid==(unsigned short)PTL_PID_ANY) port = 0;
-#endif
- if (pid==(unsigned short)PTL_PID_ANY)
- {
- fprintf(stderr, "portal pid PTL_ID_ANY is not currently supported\n");
- return;
- }
- else if (pid > PNAL_PID_MASK)
- {
- fprintf(stderr, "portal pid of %d is too large - max %d\n",
- pid, PNAL_PID_MASK);
- return;
- }
- else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT;
- ptl_apini->apini_pid=pid;
-}
-#endif
/* connection.c:
This file provides a simple stateful connection manager which
builds tcp connections on demand and leaves them open for
- future use. It also provides the machinery to allow peers
- to connect to it
+ future use.
*/
#include <stdlib.h>
#include <syscall.h>
#endif
-/* global variable: acceptor port */
-unsigned short tcpnal_acceptor_port = 988;
+/* tunables (via environment) */
+int tcpnal_acceptor_port = 988;
+int tcpnal_buffer_size = 2 * (PTL_MTU + sizeof(ptl_hdr_t));
+int tcpnal_nagle = 0;
+int
+tcpnal_env_param (char *name, int *val)
+{
+ char *env = getenv(name);
+ int n;
+
+ if (env == NULL)
+ return 1;
+
+ n = strlen(env); /* scanf may not assign on EOS */
+ if (sscanf(env, "%i%n", val, &n) >= 1 &&
+ n == strlen(env))
+ return 1;
+
+ CERROR("Can't parse environment variable '%s=%s'\n",
+ name, env);
+ return 0;
+}
+
+int
+tcpnal_set_global_params (void)
+{
+ return tcpnal_env_param("TCPNAL_ACCEPTOR_PORT",
+ &tcpnal_acceptor_port) &&
+ tcpnal_env_param("TCPNAL_BUFFER_SIZE",
+ &tcpnal_buffer_size) &&
+ tcpnal_env_param("TCPNAL_NAGLE",
+ &tcpnal_nagle);
+}
/* Function: compare_connection
* Arguments: connection c: a connection in the hash table
static int compare_connection(void *arg1, void *arg2)
{
connection c = arg1;
- unsigned int * id = arg2;
-#if 0
- return((c->ip==id[0]) && (c->port==id[1]));
-#else
- /* CFS specific hacking */
- return (c->ip == id[0]);
-#endif
-}
+ ptl_nid_t *nid = arg2;
+ return (c->peer_nid == *nid);
+}
/* Function: connection_key
* Arguments: ptl_process_id_t id: an id to hash
* Returns: a not-particularily-well-distributed hash
* of the id
*/
-static unsigned int connection_key(unsigned int *id)
+static unsigned int connection_key(void *arg)
{
-#if 0
- return(id[0]^id[1]);
-#else
- /* CFS specific hacking */
- return (unsigned int) id[0];
-#endif
+ ptl_nid_t *nid = arg;
+
+ return (unsigned int)(*nid);
}
void remove_connection(void *arg)
{
connection c = arg;
- unsigned int id[2];
- id[0]=c->ip;
- id[1]=c->port;
- hash_table_remove(c->m->connections,id);
+ hash_table_remove(c->m->connections,&c->peer_nid);
close(c->fd);
free(c);
}
}
-/* Function: allocate_connection
- * Arguments: t: tcpnal the allocation is occuring in the context of
- * dest: portal endpoint address for this connection
- * fd: open file descriptor for the socket
- * Returns: an allocated connection structure
- *
- * just encompasses the action common to active and passive
- * connections of allocation and placement in the global table
- */
-static connection allocate_connection(manager m,
- unsigned int ip,
- unsigned short port,
- int fd)
+static connection
+allocate_connection(manager m,
+ ptl_nid_t nid,
+ int fd)
{
connection c=malloc(sizeof(struct connection));
- unsigned int id[2];
+
c->m=m;
c->fd=fd;
- c->ip=ip;
- c->port=port;
- id[0]=ip;
- id[1]=port;
+ c->peer_nid = nid;
+
register_io_handler(fd,READ_HANDLER,connection_input,c);
- hash_table_insert(m->connections,c,id);
+ hash_table_insert(m->connections,c,&nid);
return(c);
}
-
-/* Function: new_connection
- * Arguments: t: opaque argument holding the tcpname
- * Returns: 1 in order to reregister for new connection requests
- *
- * called when the bound service socket recieves
- * a new connection request, it always accepts and
- * installs a new connection
- */
-static int new_connection(void *z)
+int
+tcpnal_write(ptl_nid_t nid, int sockfd, void *buffer, int nob)
{
- manager m=z;
- struct sockaddr_in s;
- int len=sizeof(struct sockaddr_in);
- int fd=accept(m->bound,(struct sockaddr *)&s,&len);
- unsigned int nid=*((unsigned int *)&s.sin_addr);
- /* cfs specific hack */
- //unsigned short pid=s.sin_port;
- pthread_mutex_lock(&m->conn_lock);
- allocate_connection(m,htonl(nid),0/*pid*/,fd);
- pthread_mutex_unlock(&m->conn_lock);
- return(1);
+ int rc = syscall(SYS_write, sockfd, buffer, nob);
+
+ /* NB called on an 'empty' socket with huge buffering! */
+ if (rc == nob)
+ return 0;
+
+ if (rc < 0) {
+ CERROR("Failed to send to %s: %s\n",
+ libcfs_nid2str(nid), strerror(errno));
+ return -1;
+ }
+
+ CERROR("Short send to %s: %d/%d\n",
+ libcfs_nid2str(nid), rc, nob);
+ return -1;
}
-extern ptl_nid_t tcpnal_mynid;
+int
+tcpnal_read(ptl_nid_t nid, int sockfd, void *buffer, int nob)
+{
+ int rc;
+
+ while (nob > 0) {
+ rc = syscall(SYS_read, sockfd, buffer, nob);
+
+ if (rc == 0) {
+ CERROR("Unexpected EOF from %s\n",
+ libcfs_nid2str(nid));
+ return -1;
+ }
+
+ if (rc < 0) {
+ CERROR("Failed to receive from %s: %s\n",
+ libcfs_nid2str(nid), strerror(errno));
+ return -1;
+ }
+
+ nob -= rc;
+ }
+ return 0;
+}
int
-tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation)
+tcpnal_hello (int sockfd, ptl_nid_t nid)
{
- int rc;
- int nob;
- ptl_hdr_t hdr;
- ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid;
+ struct timeval tv;
+ __u64 incarnation;
+ int rc;
+ int nob;
+ ptl_acceptor_connreq_t cr;
+ ptl_hdr_t hdr;
+ ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid;
- LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
+ gettimeofday(&tv, NULL);
+ incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
+
+ memset(&cr, 0, sizeof(cr));
+ cr.acr_magic = PTL_PROTO_ACCEPTOR_MAGIC;
+ cr.acr_version = PTL_PROTO_ACCEPTOR_VERSION;
+ cr.acr_nid = nid;
+
+ CLASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
memset (&hdr, 0, sizeof (hdr));
hmv->magic = cpu_to_le32(PTL_PROTO_TCP_MAGIC);
hmv->version_major = cpu_to_le32(PTL_PROTO_TCP_VERSION_MAJOR);
hmv->version_minor = cpu_to_le32(PTL_PROTO_TCP_VERSION_MINOR);
- hdr.src_nid = cpu_to_le64(tcpnal_mynid);
- hdr.type = cpu_to_le32(PTL_MSG_HELLO);
+ /* hdr.src_nid/src_pid are ignored at dest */
- hdr.msg.hello.type = cpu_to_le32(type);
+ hdr.type = cpu_to_le32(PTL_MSG_HELLO);
+ hdr.msg.hello.type = cpu_to_le32(SOCKNAL_CONN_ANY);
hdr.msg.hello.incarnation = cpu_to_le64(incarnation);
/* I don't send any interface info */
- /* Assume sufficient socket buffering for this message */
- rc = syscall(SYS_write, sockfd, &hdr, sizeof(hdr));
- if (rc <= 0) {
- CERROR ("Error %d sending HELLO to "LPX64"\n", rc, *nid);
- return (rc);
- }
+ /* Assume sufficient socket buffering for these messages... */
+ rc = tcpnal_write(nid, sockfd, &cr, sizeof(cr));
+ if (rc != 0)
+ return -1;
- rc = syscall(SYS_read, sockfd, hmv, sizeof(*hmv));
- if (rc <= 0) {
- CERROR ("Error %d reading HELLO from "LPX64"\n", rc, *nid);
- return (rc);
- }
+ rc = tcpnal_write(nid, sockfd, &hdr, sizeof(hdr));
+ if (rc != 0)
+ return -1;
+
+ rc = tcpnal_read(nid, sockfd, hmv, sizeof(*hmv));
+ if (rc != 0)
+ return -1;
if (hmv->magic != le32_to_cpu(PTL_PROTO_TCP_MAGIC)) {
- CERROR ("Bad magic %#08x (%#08x expected) from "LPX64"\n",
- cpu_to_le32(hmv->magic), PTL_PROTO_TCP_MAGIC, *nid);
- return (-EPROTO);
+ CERROR ("Bad magic %#08x (%#08x expected) from %s\n",
+ cpu_to_le32(hmv->magic), PTL_PROTO_TCP_MAGIC,
+ libcfs_nid2str(nid));
+ return -1;
}
if (hmv->version_major != cpu_to_le16 (PTL_PROTO_TCP_VERSION_MAJOR) ||
hmv->version_minor != cpu_to_le16 (PTL_PROTO_TCP_VERSION_MINOR)) {
CERROR ("Incompatible protocol version %d.%d (%d.%d expected)"
- " from "LPX64"\n",
+ " from %s\n",
le16_to_cpu (hmv->version_major),
le16_to_cpu (hmv->version_minor),
PTL_PROTO_TCP_VERSION_MAJOR,
PTL_PROTO_TCP_VERSION_MINOR,
- *nid);
- return (-EPROTO);
+ libcfs_nid2str(nid));
+ return -1;
}
#if (PTL_PROTO_TCP_VERSION_MAJOR != 1)
/* version 1 sends magic/version as the dest_nid of a 'hello' header,
* so read the rest of it in now... */
- rc = syscall(SYS_read, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv));
- if (rc <= 0) {
- CERROR ("Error %d reading rest of HELLO hdr from "LPX64"\n",
- rc, *nid);
- return (rc);
- }
+ rc = tcpnal_read(nid, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv));
+ if (rc != 0)
+ return -1;
/* ...and check we got what we expected */
if (hdr.type != cpu_to_le32 (PTL_MSG_HELLO)) {
CERROR ("Expecting a HELLO hdr "
- " but got type %d with %d payload from "LPX64"\n",
+ " but got type %d with %d payload from %s\n",
le32_to_cpu (hdr.type),
- le32_to_cpu (hdr.payload_length), *nid);
- return (-EPROTO);
+ le32_to_cpu (hdr.payload_length), libcfs_nid2str(nid));
+ return -1;
}
if (le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) {
CERROR("Expecting a HELLO hdr with a NID, but got PTL_NID_ANY\n");
- return (-EPROTO);
+ return -1;
}
- if (*nid == PTL_NID_ANY) { /* don't know peer's nid yet */
- *nid = le64_to_cpu(hdr.src_nid);
- } else if (*nid != le64_to_cpu (hdr.src_nid)) {
- CERROR ("Connected to nid "LPX64", but expecting "LPX64"\n",
- le64_to_cpu (hdr.src_nid), *nid);
- return (-EPROTO);
+ if (nid != le64_to_cpu (hdr.src_nid)) {
+ CERROR ("Connected to %s, but expecting %s\n",
+ libcfs_nid2str(le64_to_cpu (hdr.src_nid)),
+ libcfs_nid2str(nid));
+ return -1;
}
/* Ignore any interface info in the payload */
nob = le32_to_cpu(hdr.payload_length);
- if (nob > getpagesize()) {
- CERROR("Unexpected HELLO payload %d from "LPX64"\n",
- nob, *nid);
- return (-EPROTO);
- }
- if (nob > 0) {
- char *space = (char *)malloc(nob);
-
- if (space == NULL) {
- CERROR("Can't allocate scratch buffer %d\n", nob);
- return (-ENOMEM);
- }
-
- rc = syscall(SYS_read, sockfd, space, nob);
- if (rc <= 0) {
- CERROR("Error %d skipping HELLO payload from "
- LPX64"\n", rc, *nid);
- return (rc);
- }
+ if (nob != 0) {
+ CERROR("Unexpected HELLO payload %d from %s\n",
+ nob, libcfs_nid2str(nid));
+ return -1;
}
- return (0);
+ return 0;
}
/* Function: force_tcp_connection
* a pre-existing one, or a new connection
*/
connection force_tcp_connection(manager m,
- unsigned int ip,
- unsigned short port,
+ ptl_nid_t nid,
procbridge pb)
{
- connection conn;
+ unsigned int ip = PTL_NIDADDR(nid);
+ connection conn;
struct sockaddr_in addr;
struct sockaddr_in locaddr;
- unsigned int id[2];
- struct timeval tv;
- __u64 incarnation;
-
- int fd;
- int option;
- int rc;
- int rport;
- ptl_nid_t peernid = PTL_NID_ANY;
-
- port = tcpnal_acceptor_port;
-
- id[0] = ip;
- id[1] = port;
+ int fd;
+ int option;
+ int rc;
pthread_mutex_lock(&m->conn_lock);
- conn = hash_table_find(m->connections, id);
+ conn = hash_table_find(m->connections, &nid);
if (conn)
goto out;
memset(&addr, 0, sizeof(addr));
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = htonl(ip);
- addr.sin_port = htons(port);
+ addr.sin_port = htons(tcpnal_acceptor_port);
memset(&locaddr, 0, sizeof(locaddr));
locaddr.sin_family = AF_INET;
locaddr.sin_addr.s_addr = INADDR_ANY;
+#if 1 /* tcpnal connects from a non-privileged port */
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (fd < 0) {
+ perror("tcpnal socket failed");
+ goto out;
+ }
+
+ option = 1;
+ rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
+ &option, sizeof(option));
+ if (rc != 0) {
+ perror ("Can't set SO_REUSEADDR for socket");
+ close(fd);
+ goto out;
+ }
+
+ rc = connect(fd, (struct sockaddr *)&addr,
+ sizeof(struct sockaddr_in));
+ if (rc != 0) {
+ perror("Error connecting to remote host");
+ close(fd);
+ goto out;
+ }
+#else
for (rport = IPPORT_RESERVED - 1; rport > IPPORT_RESERVED / 2; --rport) {
fd = socket(AF_INET, SOCK_STREAM, 0);
if (fd < 0) {
fprintf(stderr, "Out of ports trying to bind to a reserved port\n");
goto out;
}
+#endif
-#if 1
- option = 1;
+ option = tcpnal_nagle ? 0 : 1;
setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option));
- option = 1<<20;
+ option = tcpnal_buffer_size;
setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option));
- option = 1<<20;
+ option = tcpnal_buffer_size;
setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option));
-#endif
- gettimeofday(&tv, NULL);
- incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-
/* say hello */
- if (tcpnal_hello(fd, &peernid, SOCKNAL_CONN_ANY, incarnation))
- exit(-1);
+ if (tcpnal_hello(fd, nid))
+ goto out;
- conn = allocate_connection(m, ip, port, fd);
+ conn = allocate_connection(m, nid, fd);
/* let nal thread know this event right away */
if (conn)
}
+#if 0 /* we don't accept connections */
+/* Function: new_connection
+ * Arguments: t: opaque argument holding the tcpname
+ * Returns: 1 in order to reregister for new connection requests
+ *
+ * called when the bound service socket recieves
+ * a new connection request, it always accepts and
+ * installs a new connection
+ */
+static int new_connection(void *z)
+{
+ manager m=z;
+ struct sockaddr_in s;
+ int len=sizeof(struct sockaddr_in);
+ int fd=accept(m->bound,(struct sockaddr *)&s,&len);
+ unsigned int nid=*((unsigned int *)&s.sin_addr);
+ /* cfs specific hack */
+ //unsigned short pid=s.sin_port;
+ pthread_mutex_lock(&m->conn_lock);
+ allocate_connection(m,htonl(nid),0/*pid*/,fd);
+ pthread_mutex_unlock(&m->conn_lock);
+ return(1);
+}
+
/* Function: bind_socket
* Arguments: t: the nal state for this interface
* port: the port to attempt to bind to
m->port=addr.sin_port;
return(1);
}
+#endif
/* Function: shutdown_connections
*/
void shutdown_connections(manager m)
{
- close(m->bound);
- remove_io_handler(m->bound_handler);
- hash_destroy_table(m->connections,remove_connection);
- free(m);
+#if 0
+ /* we don't accept connections */
+ close(m->bound);
+ remove_io_handler(m->bound_handler);
+#endif
+ hash_destroy_table(m->connections,remove_connection);
+ free(m);
}
/* Function: init_connections
* Arguments: t: the nal state for this interface
- * port: the port to attempt to bind to
* Returns: a newly allocated manager structure, or
* zero if the fixed port could not be bound
*/
-manager init_connections(unsigned short pid,
- int (*input)(void *, void *),
- void *a)
+manager init_connections(int (*input)(void *, void *), void *a)
{
manager m = (manager)malloc(sizeof(struct manager));
+
m->connections = hash_create_table(compare_connection,connection_key);
m->handler = input;
m->handler_arg = a;
pthread_mutex_init(&m->conn_lock, 0);
+ return m;
+#if 0
if (bind_socket(m,pid))
return(m);
free(m);
return(0);
+#endif
}
#include <procbridge.h>
typedef struct manager {
- table connections;
+ table connections;
pthread_mutex_t conn_lock; /* protect connections table */
- int bound;
- io_handler bound_handler;
- int (*handler)(void *, void *);
- void *handler_arg;
- unsigned short port;
+#if 0 /* we don't accept connections */
+ int bound;
+ io_handler bound_handler;
+#endif
+ int (*handler)(void *, void *);
+ void *handler_arg;
+ unsigned short port;
} *manager;
typedef struct connection {
- unsigned int ip;
- unsigned short port;
- int fd;
- manager m;
+ ptl_nid_t peer_nid;
+ int fd;
+ manager m;
} *connection;
-connection force_tcp_connection(manager m, unsigned int ip, unsigned int short,
- procbridge pb);
-manager init_connections(unsigned short, int (*f)(void *, void *), void *);
+connection force_tcp_connection(manager m, ptl_nid_t nid, procbridge pb);
+manager init_connections(int (*f)(void *, void *), void *);
void remove_connection(void *arg);
void shutdown_connections(manager m);
int read_connection(connection c, unsigned char *dest, int len);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (c) 2002 Cray Inc.
- *
- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- */
-
-#define DIRECT_IP_MODE
-#ifdef DIRECT_IP_MODE
-#define PNAL_NID(in_addr, port) (in_addr)
-#define PNAL_PID(pid) (pid)
-#define PNAL_IP(in_addr, port) (in_addr)
-#define PNAL_PORT(nid, pid) (pid)
-#else
-
-#define PNAL_BASE_PORT 4096
-#define PNAL_HOSTID_SHIFT 24
-#define PNAL_HOSTID_MASK ((1 << PNAL_HOSTID_SHIFT) - 1)
-#define PNAL_VNODE_SHIFT 8
-#define PNAL_VNODE_MASK ((1 << PNAL_VNODE_SHIFT) - 1)
-#define PNAL_PID_SHIFT 8
-#define PNAL_PID_MASK ((1 << PNAL_PID_SHIFT) - 1)
-
-#define PNAL_NID(in_addr, port) (((ntohl(in_addr) & PNAL_HOSTID_MASK) \
- << PNAL_VNODE_SHIFT) \
- | (((ntohs(port)-PNAL_BASE_PORT) >>\
- PNAL_PID_SHIFT)))
-#define PNAL_PID(port) ((ntohs(port) - PNAL_BASE_PORT) & PNAL_PID_MASK)
-
-#define PNAL_IP(nid,t) (htonl((((unsigned)(nid))\
- >> PNAL_VNODE_SHIFT)\
- | (t->iptop8 << PNAL_HOSTID_SHIFT)))
-#define PNAL_PORT(nid, pid) (htons(((((nid) & PNAL_VNODE_MASK) \
- << PNAL_VNODE_SHIFT) \
- | ((pid) & PNAL_PID_MASK)) \
- + PNAL_BASE_PORT))
-#endif
#include <unistd.h>
#include <string.h>
#ifndef __CYGWIN__
-#include <syscall.h>
+# include <syscall.h>
#endif
+#include <netdb.h>
#include <sys/socket.h>
+#include <netinet/in.h>
#include <procbridge.h>
#include <pqtimer.h>
#include <dispatch.h>
#include <errno.h>
+#ifdef HAVE_GETHOSTBYNAME
+# include <sys/utsname.h>
+#endif
/* XXX CFS workaround, to give a chance to let nal thread wake up
* from waiting in select
.nal_recv = tcpnal_recv,
};
int tcpnal_running;
-ptl_nid_t tcpnal_mynid;
-
/* Function: shutdown
* Arguments: ni: the instance of me
procbridge_startup (ptl_ni_t *ni)
{
procbridge p;
- bridge b;
+ bridge b;
+
+ /* NB The local NID is not assigned. We only ever connect to the socknal,
+ * which assigns the src nid/pid on incoming non-privileged connections
+ * (i.e. us), and we don't accept connections. */
- LASSERT(ni->ni_nal == &tcpnal_nal);
- LASSERT (!tcpnal_running); /* only single instance supported */
+ LASSERT (ni->ni_nal == &tcpnal_nal);
+ LASSERT (!tcpnal_running); /* only single instance supported */
+ LASSERT (ni->ni_interfaces[0] == NULL); /* explicit interface(s) not supported */
init_unix_timer();
if (p->nal_flags & NAL_FLAG_STOPPED)
return PTL_FAIL;
- /* so what a load of bollocks set_address() is... */
- ni->ni_nid = tcpnal_mynid;
tcpnal_running = 1;
return PTL_OK;
#include <pthread.h>
#include <bridge.h>
-#include <ipmap.h>
#define NAL_FLAG_RUNNING 1
extern void *nal_thread(void *);
-extern void set_address(bridge t,ptl_pid_t pidrequest);
extern void procbridge_wakeup_nal(procbridge p);
extern ptl_err_t procbridge_startup (ptl_ni_t *);
procbridge p=b->local;
int rc;
- /* _the_ NI (ptl_apini) has already been set up with a requested pid; pass
- * that to set_address... */
- set_address(b, ptl_apini.apini_pid);
-
rc = tcpnal_init(b);
/*
endif
endif
-noinst_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h ipmap.h bridge.h procbridge.h
-libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h dispatch.h table.h timer.h address.c procapi.c proclib.c connection.c tcpnal.c connection.h
+noinst_HEADERS = pqtimer.h dispatch.h table.h timer.h \
+ connection.h bridge.h procbridge.h
+libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h \
+ dispatch.h table.h timer.h procapi.c proclib.c \
+ connection.c tcpnal.c connection.h
libtcpnal_a_CPPFLAGS = $(LLCPPFLAGS)
libtcpnal_a_CFLAGS = $(LLCFLAGS)
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (c) 2002 Cray Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/* address.c:
- * this file provides functions to aquire the IP address of the node
- * and translate them into a NID/PID pair which supports a static
- * mapping of virtual nodes into the port range of an IP socket.
-*/
-
-#define DEBUG_SUBSYSTEM S_NAL
-
-#include <stdlib.h>
-#include <netdb.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <portals/p30.h>
-#include <bridge.h>
-#include <ipmap.h>
-
-
-/* Function: get_node_id
- * Returns: a 32 bit id for this node, actually a big-endian IP address
- *
- * get_node_id() determines the host name and uses the resolver to
- * find out its ip address. This is fairly fragile and inflexible, but
- * explicitly asking about interfaces and their addresses is very
- * complicated and nonportable.
- */
-static unsigned int get_node_id(void)
-{
- char buffer[255];
- unsigned int x;
- struct hostent *he;
- char * host_envp;
-
- if (!(host_envp = getenv("PTL_HOSTID")))
- {
- gethostname(buffer,sizeof(buffer));
- he=gethostbyname(buffer);
- if (he)
- x=*(unsigned int *)he->h_addr_list[0];
- else
- x = 0;
- return(ntohl(x));
- }
- else
- {
- if (host_envp[1] != 'x')
- {
- int a, b, c, d;
- sscanf(host_envp, "%d.%d.%d.%d", &a, &b, &c, &d);
- return ((a<<24) | (b<<16) | (c<<8) | d);
- }
- else
- {
- long long hostid = strtoll(host_envp, 0, 0);
- return((unsigned int) hostid);
- }
- }
-}
-
-
-/* Function: set_address
- * Arugments: t: a procnal structure to populate with the request
- *
- * set_address performs the bit manipulations to set the nid, pid, and
- * iptop8 fields of the procnal structures.
- *
- * TODO: fix pidrequest to try to do dynamic binding if PTL_ID_ANY
- */
-
-#ifdef DIRECT_IP_MODE
-void set_address(bridge t,ptl_pid_t pidrequest)
-{
- int port;
- if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0;
- else port=pidrequest;
- t->b_ni->ni_nid=get_node_id();
- ptl_apini.apini_pid=port;
-}
-#else
-
-void set_address(bridge t,ptl_pid_t pidrequest)
-{
- int virtnode, in_addr, port;
- ptl_pid_t pid;
-
- /* get and remember my node id*/
- if (!getenv("PTL_VIRTNODE"))
- virtnode = 0;
- else
- {
- int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT
- >> PNAL_VNODE_SHIFT);
- virtnode = atoi(getenv("PTL_VIRTNODE"));
- if (virtnode > maxvnode)
- {
- fprintf(stderr, "PTL_VIRTNODE of %d is too large - max %d\n",
- virtnode, maxvnode);
- return;
- }
- }
-
- in_addr = get_node_id();
-
- t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */
- t->b_ni->ni_nid = ((in_addr & PNAL_HOSTID_MASK)
- << PNAL_VNODE_SHIFT)
- + virtnode;
- pid=pidrequest;
- /* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */
-#ifdef notyet
- if (pid==(unsigned short)PTL_PID_ANY) port = 0;
-#endif
- if (pid==(unsigned short)PTL_PID_ANY)
- {
- fprintf(stderr, "portal pid PTL_ID_ANY is not currently supported\n");
- return;
- }
- else if (pid > PNAL_PID_MASK)
- {
- fprintf(stderr, "portal pid of %d is too large - max %d\n",
- pid, PNAL_PID_MASK);
- return;
- }
- else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT;
- ptl_apini->apini_pid=pid;
-}
-#endif
/* connection.c:
This file provides a simple stateful connection manager which
builds tcp connections on demand and leaves them open for
- future use. It also provides the machinery to allow peers
- to connect to it
+ future use.
*/
#include <stdlib.h>
#include <syscall.h>
#endif
-/* global variable: acceptor port */
-unsigned short tcpnal_acceptor_port = 988;
+/* tunables (via environment) */
+int tcpnal_acceptor_port = 988;
+int tcpnal_buffer_size = 2 * (PTL_MTU + sizeof(ptl_hdr_t));
+int tcpnal_nagle = 0;
+int
+tcpnal_env_param (char *name, int *val)
+{
+ char *env = getenv(name);
+ int n;
+
+ if (env == NULL)
+ return 1;
+
+ n = strlen(env); /* scanf may not assign on EOS */
+ if (sscanf(env, "%i%n", val, &n) >= 1 &&
+ n == strlen(env))
+ return 1;
+
+ CERROR("Can't parse environment variable '%s=%s'\n",
+ name, env);
+ return 0;
+}
+
+int
+tcpnal_set_global_params (void)
+{
+ return tcpnal_env_param("TCPNAL_ACCEPTOR_PORT",
+ &tcpnal_acceptor_port) &&
+ tcpnal_env_param("TCPNAL_BUFFER_SIZE",
+ &tcpnal_buffer_size) &&
+ tcpnal_env_param("TCPNAL_NAGLE",
+ &tcpnal_nagle);
+}
/* Function: compare_connection
* Arguments: connection c: a connection in the hash table
static int compare_connection(void *arg1, void *arg2)
{
connection c = arg1;
- unsigned int * id = arg2;
-#if 0
- return((c->ip==id[0]) && (c->port==id[1]));
-#else
- /* CFS specific hacking */
- return (c->ip == id[0]);
-#endif
-}
+ ptl_nid_t *nid = arg2;
+ return (c->peer_nid == *nid);
+}
/* Function: connection_key
* Arguments: ptl_process_id_t id: an id to hash
* Returns: a not-particularily-well-distributed hash
* of the id
*/
-static unsigned int connection_key(unsigned int *id)
+static unsigned int connection_key(void *arg)
{
-#if 0
- return(id[0]^id[1]);
-#else
- /* CFS specific hacking */
- return (unsigned int) id[0];
-#endif
+ ptl_nid_t *nid = arg;
+
+ return (unsigned int)(*nid);
}
void remove_connection(void *arg)
{
connection c = arg;
- unsigned int id[2];
- id[0]=c->ip;
- id[1]=c->port;
- hash_table_remove(c->m->connections,id);
+ hash_table_remove(c->m->connections,&c->peer_nid);
close(c->fd);
free(c);
}
}
-/* Function: allocate_connection
- * Arguments: t: tcpnal the allocation is occuring in the context of
- * dest: portal endpoint address for this connection
- * fd: open file descriptor for the socket
- * Returns: an allocated connection structure
- *
- * just encompasses the action common to active and passive
- * connections of allocation and placement in the global table
- */
-static connection allocate_connection(manager m,
- unsigned int ip,
- unsigned short port,
- int fd)
+static connection
+allocate_connection(manager m,
+ ptl_nid_t nid,
+ int fd)
{
connection c=malloc(sizeof(struct connection));
- unsigned int id[2];
+
c->m=m;
c->fd=fd;
- c->ip=ip;
- c->port=port;
- id[0]=ip;
- id[1]=port;
+ c->peer_nid = nid;
+
register_io_handler(fd,READ_HANDLER,connection_input,c);
- hash_table_insert(m->connections,c,id);
+ hash_table_insert(m->connections,c,&nid);
return(c);
}
-
-/* Function: new_connection
- * Arguments: t: opaque argument holding the tcpname
- * Returns: 1 in order to reregister for new connection requests
- *
- * called when the bound service socket recieves
- * a new connection request, it always accepts and
- * installs a new connection
- */
-static int new_connection(void *z)
+int
+tcpnal_write(ptl_nid_t nid, int sockfd, void *buffer, int nob)
{
- manager m=z;
- struct sockaddr_in s;
- int len=sizeof(struct sockaddr_in);
- int fd=accept(m->bound,(struct sockaddr *)&s,&len);
- unsigned int nid=*((unsigned int *)&s.sin_addr);
- /* cfs specific hack */
- //unsigned short pid=s.sin_port;
- pthread_mutex_lock(&m->conn_lock);
- allocate_connection(m,htonl(nid),0/*pid*/,fd);
- pthread_mutex_unlock(&m->conn_lock);
- return(1);
+ int rc = syscall(SYS_write, sockfd, buffer, nob);
+
+ /* NB called on an 'empty' socket with huge buffering! */
+ if (rc == nob)
+ return 0;
+
+ if (rc < 0) {
+ CERROR("Failed to send to %s: %s\n",
+ libcfs_nid2str(nid), strerror(errno));
+ return -1;
+ }
+
+ CERROR("Short send to %s: %d/%d\n",
+ libcfs_nid2str(nid), rc, nob);
+ return -1;
}
-extern ptl_nid_t tcpnal_mynid;
+int
+tcpnal_read(ptl_nid_t nid, int sockfd, void *buffer, int nob)
+{
+ int rc;
+
+ while (nob > 0) {
+ rc = syscall(SYS_read, sockfd, buffer, nob);
+
+ if (rc == 0) {
+ CERROR("Unexpected EOF from %s\n",
+ libcfs_nid2str(nid));
+ return -1;
+ }
+
+ if (rc < 0) {
+ CERROR("Failed to receive from %s: %s\n",
+ libcfs_nid2str(nid), strerror(errno));
+ return -1;
+ }
+
+ nob -= rc;
+ }
+ return 0;
+}
int
-tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation)
+tcpnal_hello (int sockfd, ptl_nid_t nid)
{
- int rc;
- int nob;
- ptl_hdr_t hdr;
- ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid;
+ struct timeval tv;
+ __u64 incarnation;
+ int rc;
+ int nob;
+ ptl_acceptor_connreq_t cr;
+ ptl_hdr_t hdr;
+ ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid;
- LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
+ gettimeofday(&tv, NULL);
+ incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
+
+ memset(&cr, 0, sizeof(cr));
+ cr.acr_magic = PTL_PROTO_ACCEPTOR_MAGIC;
+ cr.acr_version = PTL_PROTO_ACCEPTOR_VERSION;
+ cr.acr_nid = nid;
+
+ CLASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
memset (&hdr, 0, sizeof (hdr));
hmv->magic = cpu_to_le32(PTL_PROTO_TCP_MAGIC);
hmv->version_major = cpu_to_le32(PTL_PROTO_TCP_VERSION_MAJOR);
hmv->version_minor = cpu_to_le32(PTL_PROTO_TCP_VERSION_MINOR);
- hdr.src_nid = cpu_to_le64(tcpnal_mynid);
- hdr.type = cpu_to_le32(PTL_MSG_HELLO);
+ /* hdr.src_nid/src_pid are ignored at dest */
- hdr.msg.hello.type = cpu_to_le32(type);
+ hdr.type = cpu_to_le32(PTL_MSG_HELLO);
+ hdr.msg.hello.type = cpu_to_le32(SOCKNAL_CONN_ANY);
hdr.msg.hello.incarnation = cpu_to_le64(incarnation);
/* I don't send any interface info */
- /* Assume sufficient socket buffering for this message */
- rc = syscall(SYS_write, sockfd, &hdr, sizeof(hdr));
- if (rc <= 0) {
- CERROR ("Error %d sending HELLO to "LPX64"\n", rc, *nid);
- return (rc);
- }
+ /* Assume sufficient socket buffering for these messages... */
+ rc = tcpnal_write(nid, sockfd, &cr, sizeof(cr));
+ if (rc != 0)
+ return -1;
- rc = syscall(SYS_read, sockfd, hmv, sizeof(*hmv));
- if (rc <= 0) {
- CERROR ("Error %d reading HELLO from "LPX64"\n", rc, *nid);
- return (rc);
- }
+ rc = tcpnal_write(nid, sockfd, &hdr, sizeof(hdr));
+ if (rc != 0)
+ return -1;
+
+ rc = tcpnal_read(nid, sockfd, hmv, sizeof(*hmv));
+ if (rc != 0)
+ return -1;
if (hmv->magic != le32_to_cpu(PTL_PROTO_TCP_MAGIC)) {
- CERROR ("Bad magic %#08x (%#08x expected) from "LPX64"\n",
- cpu_to_le32(hmv->magic), PTL_PROTO_TCP_MAGIC, *nid);
- return (-EPROTO);
+ CERROR ("Bad magic %#08x (%#08x expected) from %s\n",
+ cpu_to_le32(hmv->magic), PTL_PROTO_TCP_MAGIC,
+ libcfs_nid2str(nid));
+ return -1;
}
if (hmv->version_major != cpu_to_le16 (PTL_PROTO_TCP_VERSION_MAJOR) ||
hmv->version_minor != cpu_to_le16 (PTL_PROTO_TCP_VERSION_MINOR)) {
CERROR ("Incompatible protocol version %d.%d (%d.%d expected)"
- " from "LPX64"\n",
+ " from %s\n",
le16_to_cpu (hmv->version_major),
le16_to_cpu (hmv->version_minor),
PTL_PROTO_TCP_VERSION_MAJOR,
PTL_PROTO_TCP_VERSION_MINOR,
- *nid);
- return (-EPROTO);
+ libcfs_nid2str(nid));
+ return -1;
}
#if (PTL_PROTO_TCP_VERSION_MAJOR != 1)
/* version 1 sends magic/version as the dest_nid of a 'hello' header,
* so read the rest of it in now... */
- rc = syscall(SYS_read, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv));
- if (rc <= 0) {
- CERROR ("Error %d reading rest of HELLO hdr from "LPX64"\n",
- rc, *nid);
- return (rc);
- }
+ rc = tcpnal_read(nid, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv));
+ if (rc != 0)
+ return -1;
/* ...and check we got what we expected */
if (hdr.type != cpu_to_le32 (PTL_MSG_HELLO)) {
CERROR ("Expecting a HELLO hdr "
- " but got type %d with %d payload from "LPX64"\n",
+ " but got type %d with %d payload from %s\n",
le32_to_cpu (hdr.type),
- le32_to_cpu (hdr.payload_length), *nid);
- return (-EPROTO);
+ le32_to_cpu (hdr.payload_length), libcfs_nid2str(nid));
+ return -1;
}
if (le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) {
CERROR("Expecting a HELLO hdr with a NID, but got PTL_NID_ANY\n");
- return (-EPROTO);
+ return -1;
}
- if (*nid == PTL_NID_ANY) { /* don't know peer's nid yet */
- *nid = le64_to_cpu(hdr.src_nid);
- } else if (*nid != le64_to_cpu (hdr.src_nid)) {
- CERROR ("Connected to nid "LPX64", but expecting "LPX64"\n",
- le64_to_cpu (hdr.src_nid), *nid);
- return (-EPROTO);
+ if (nid != le64_to_cpu (hdr.src_nid)) {
+ CERROR ("Connected to %s, but expecting %s\n",
+ libcfs_nid2str(le64_to_cpu (hdr.src_nid)),
+ libcfs_nid2str(nid));
+ return -1;
}
/* Ignore any interface info in the payload */
nob = le32_to_cpu(hdr.payload_length);
- if (nob > getpagesize()) {
- CERROR("Unexpected HELLO payload %d from "LPX64"\n",
- nob, *nid);
- return (-EPROTO);
- }
- if (nob > 0) {
- char *space = (char *)malloc(nob);
-
- if (space == NULL) {
- CERROR("Can't allocate scratch buffer %d\n", nob);
- return (-ENOMEM);
- }
-
- rc = syscall(SYS_read, sockfd, space, nob);
- if (rc <= 0) {
- CERROR("Error %d skipping HELLO payload from "
- LPX64"\n", rc, *nid);
- return (rc);
- }
+ if (nob != 0) {
+ CERROR("Unexpected HELLO payload %d from %s\n",
+ nob, libcfs_nid2str(nid));
+ return -1;
}
- return (0);
+ return 0;
}
/* Function: force_tcp_connection
* a pre-existing one, or a new connection
*/
connection force_tcp_connection(manager m,
- unsigned int ip,
- unsigned short port,
+ ptl_nid_t nid,
procbridge pb)
{
- connection conn;
+ unsigned int ip = PTL_NIDADDR(nid);
+ connection conn;
struct sockaddr_in addr;
struct sockaddr_in locaddr;
- unsigned int id[2];
- struct timeval tv;
- __u64 incarnation;
-
- int fd;
- int option;
- int rc;
- int rport;
- ptl_nid_t peernid = PTL_NID_ANY;
-
- port = tcpnal_acceptor_port;
-
- id[0] = ip;
- id[1] = port;
+ int fd;
+ int option;
+ int rc;
pthread_mutex_lock(&m->conn_lock);
- conn = hash_table_find(m->connections, id);
+ conn = hash_table_find(m->connections, &nid);
if (conn)
goto out;
memset(&addr, 0, sizeof(addr));
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = htonl(ip);
- addr.sin_port = htons(port);
+ addr.sin_port = htons(tcpnal_acceptor_port);
memset(&locaddr, 0, sizeof(locaddr));
locaddr.sin_family = AF_INET;
locaddr.sin_addr.s_addr = INADDR_ANY;
+#if 1 /* tcpnal connects from a non-privileged port */
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (fd < 0) {
+ perror("tcpnal socket failed");
+ goto out;
+ }
+
+ option = 1;
+ rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
+ &option, sizeof(option));
+ if (rc != 0) {
+ perror ("Can't set SO_REUSEADDR for socket");
+ close(fd);
+ goto out;
+ }
+
+ rc = connect(fd, (struct sockaddr *)&addr,
+ sizeof(struct sockaddr_in));
+ if (rc != 0) {
+ perror("Error connecting to remote host");
+ close(fd);
+ goto out;
+ }
+#else
for (rport = IPPORT_RESERVED - 1; rport > IPPORT_RESERVED / 2; --rport) {
fd = socket(AF_INET, SOCK_STREAM, 0);
if (fd < 0) {
fprintf(stderr, "Out of ports trying to bind to a reserved port\n");
goto out;
}
+#endif
-#if 1
- option = 1;
+ option = tcpnal_nagle ? 0 : 1;
setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option));
- option = 1<<20;
+ option = tcpnal_buffer_size;
setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option));
- option = 1<<20;
+ option = tcpnal_buffer_size;
setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option));
-#endif
- gettimeofday(&tv, NULL);
- incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-
/* say hello */
- if (tcpnal_hello(fd, &peernid, SOCKNAL_CONN_ANY, incarnation))
- exit(-1);
+ if (tcpnal_hello(fd, nid))
+ goto out;
- conn = allocate_connection(m, ip, port, fd);
+ conn = allocate_connection(m, nid, fd);
/* let nal thread know this event right away */
if (conn)
}
+#if 0 /* we don't accept connections */
+/* Function: new_connection
+ * Arguments: t: opaque argument holding the tcpname
+ * Returns: 1 in order to reregister for new connection requests
+ *
+ * called when the bound service socket recieves
+ * a new connection request, it always accepts and
+ * installs a new connection
+ */
+static int new_connection(void *z)
+{
+ manager m=z;
+ struct sockaddr_in s;
+ int len=sizeof(struct sockaddr_in);
+ int fd=accept(m->bound,(struct sockaddr *)&s,&len);
+ unsigned int nid=*((unsigned int *)&s.sin_addr);
+ /* cfs specific hack */
+ //unsigned short pid=s.sin_port;
+ pthread_mutex_lock(&m->conn_lock);
+ allocate_connection(m,htonl(nid),0/*pid*/,fd);
+ pthread_mutex_unlock(&m->conn_lock);
+ return(1);
+}
+
/* Function: bind_socket
* Arguments: t: the nal state for this interface
* port: the port to attempt to bind to
m->port=addr.sin_port;
return(1);
}
+#endif
/* Function: shutdown_connections
*/
void shutdown_connections(manager m)
{
- close(m->bound);
- remove_io_handler(m->bound_handler);
- hash_destroy_table(m->connections,remove_connection);
- free(m);
+#if 0
+ /* we don't accept connections */
+ close(m->bound);
+ remove_io_handler(m->bound_handler);
+#endif
+ hash_destroy_table(m->connections,remove_connection);
+ free(m);
}
/* Function: init_connections
* Arguments: t: the nal state for this interface
- * port: the port to attempt to bind to
* Returns: a newly allocated manager structure, or
* zero if the fixed port could not be bound
*/
-manager init_connections(unsigned short pid,
- int (*input)(void *, void *),
- void *a)
+manager init_connections(int (*input)(void *, void *), void *a)
{
manager m = (manager)malloc(sizeof(struct manager));
+
m->connections = hash_create_table(compare_connection,connection_key);
m->handler = input;
m->handler_arg = a;
pthread_mutex_init(&m->conn_lock, 0);
+ return m;
+#if 0
if (bind_socket(m,pid))
return(m);
free(m);
return(0);
+#endif
}
#include <procbridge.h>
typedef struct manager {
- table connections;
+ table connections;
pthread_mutex_t conn_lock; /* protect connections table */
- int bound;
- io_handler bound_handler;
- int (*handler)(void *, void *);
- void *handler_arg;
- unsigned short port;
+#if 0 /* we don't accept connections */
+ int bound;
+ io_handler bound_handler;
+#endif
+ int (*handler)(void *, void *);
+ void *handler_arg;
+ unsigned short port;
} *manager;
typedef struct connection {
- unsigned int ip;
- unsigned short port;
- int fd;
- manager m;
+ ptl_nid_t peer_nid;
+ int fd;
+ manager m;
} *connection;
-connection force_tcp_connection(manager m, unsigned int ip, unsigned int short,
- procbridge pb);
-manager init_connections(unsigned short, int (*f)(void *, void *), void *);
+connection force_tcp_connection(manager m, ptl_nid_t nid, procbridge pb);
+manager init_connections(int (*f)(void *, void *), void *);
void remove_connection(void *arg);
void shutdown_connections(manager m);
int read_connection(connection c, unsigned char *dest, int len);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (c) 2002 Cray Inc.
- *
- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- */
-
-#define DIRECT_IP_MODE
-#ifdef DIRECT_IP_MODE
-#define PNAL_NID(in_addr, port) (in_addr)
-#define PNAL_PID(pid) (pid)
-#define PNAL_IP(in_addr, port) (in_addr)
-#define PNAL_PORT(nid, pid) (pid)
-#else
-
-#define PNAL_BASE_PORT 4096
-#define PNAL_HOSTID_SHIFT 24
-#define PNAL_HOSTID_MASK ((1 << PNAL_HOSTID_SHIFT) - 1)
-#define PNAL_VNODE_SHIFT 8
-#define PNAL_VNODE_MASK ((1 << PNAL_VNODE_SHIFT) - 1)
-#define PNAL_PID_SHIFT 8
-#define PNAL_PID_MASK ((1 << PNAL_PID_SHIFT) - 1)
-
-#define PNAL_NID(in_addr, port) (((ntohl(in_addr) & PNAL_HOSTID_MASK) \
- << PNAL_VNODE_SHIFT) \
- | (((ntohs(port)-PNAL_BASE_PORT) >>\
- PNAL_PID_SHIFT)))
-#define PNAL_PID(port) ((ntohs(port) - PNAL_BASE_PORT) & PNAL_PID_MASK)
-
-#define PNAL_IP(nid,t) (htonl((((unsigned)(nid))\
- >> PNAL_VNODE_SHIFT)\
- | (t->iptop8 << PNAL_HOSTID_SHIFT)))
-#define PNAL_PORT(nid, pid) (htons(((((nid) & PNAL_VNODE_MASK) \
- << PNAL_VNODE_SHIFT) \
- | ((pid) & PNAL_PID_MASK)) \
- + PNAL_BASE_PORT))
-#endif
#include <unistd.h>
#include <string.h>
#ifndef __CYGWIN__
-#include <syscall.h>
+# include <syscall.h>
#endif
+#include <netdb.h>
#include <sys/socket.h>
+#include <netinet/in.h>
#include <procbridge.h>
#include <pqtimer.h>
#include <dispatch.h>
#include <errno.h>
+#ifdef HAVE_GETHOSTBYNAME
+# include <sys/utsname.h>
+#endif
/* XXX CFS workaround, to give a chance to let nal thread wake up
* from waiting in select
.nal_recv = tcpnal_recv,
};
int tcpnal_running;
-ptl_nid_t tcpnal_mynid;
-
/* Function: shutdown
* Arguments: ni: the instance of me
procbridge_startup (ptl_ni_t *ni)
{
procbridge p;
- bridge b;
+ bridge b;
+
+ /* NB The local NID is not assigned. We only ever connect to the socknal,
+ * which assigns the src nid/pid on incoming non-privileged connections
+ * (i.e. us), and we don't accept connections. */
- LASSERT(ni->ni_nal == &tcpnal_nal);
- LASSERT (!tcpnal_running); /* only single instance supported */
+ LASSERT (ni->ni_nal == &tcpnal_nal);
+ LASSERT (!tcpnal_running); /* only single instance supported */
+ LASSERT (ni->ni_interfaces[0] == NULL); /* explicit interface(s) not supported */
init_unix_timer();
if (p->nal_flags & NAL_FLAG_STOPPED)
return PTL_FAIL;
- /* so what a load of bollocks set_address() is... */
- ni->ni_nid = tcpnal_mynid;
tcpnal_running = 1;
return PTL_OK;
#include <pthread.h>
#include <bridge.h>
-#include <ipmap.h>
#define NAL_FLAG_RUNNING 1
extern void *nal_thread(void *);
-extern void set_address(bridge t,ptl_pid_t pidrequest);
extern void procbridge_wakeup_nal(procbridge p);
extern ptl_err_t procbridge_startup (ptl_ni_t *);
procbridge p=b->local;
int rc;
- /* _the_ NI (ptl_apini) has already been set up with a requested pid; pass
- * that to set_address... */
- set_address(b, ptl_apini.apini_pid);
-
rc = tcpnal_init(b);
/*
* Returns: a pointer to the new table
*/
table hash_create_table (int (*compare_function)(void *, void *),
- unsigned int (*key_function)(unsigned int *))
+ unsigned int (*key_function)(void *))
{
table new=(table)malloc(sizeof(struct table));
memset(new, 0, sizeof(struct table));
int number_of_entries;
table_entry *entries;
int (*compare_function)(void *, void *);
- unsigned int (*key_function)(unsigned int *);
+ unsigned int (*key_function)(void *);
} *table;
/* table.c */
unsigned int key_from_int(int i);
unsigned int key_from_string(char *s);
-table hash_create_table(int (*compare_function)(void *, void *), unsigned int (*key_function)(unsigned int *));
+table hash_create_table(int (*compare_function)(void *, void *),
+ unsigned int (*key_function)(void *));
void *hash_table_find(table t, void *comparator);
void hash_table_insert(table t, void *value, void *comparator);
void hash_table_remove(table t, void *comparator);
return PTL_FAIL;
}
- if (!(c=force_tcp_connection((manager)b->lower,
- PNAL_IP(target.nid,b),
- PNAL_PORT(target.nid,target.pid),
+ if (!(c=force_tcp_connection((manager)b->lower, target.nid,
b->local)))
return(PTL_FAIL);
{
manager m;
- if (!(m=init_connections(PNAL_PORT(b->b_ni->ni_nid,
- ptl_apini.apini_pid),
- from_connection,b))){
+ if (!(m=init_connections(from_connection,b))){
/* TODO: this needs to shut down the
newly created junk */
return(PTL_NAL_FAILED);
* Returns: a pointer to the new table
*/
table hash_create_table (int (*compare_function)(void *, void *),
- unsigned int (*key_function)(unsigned int *))
+ unsigned int (*key_function)(void *))
{
table new=(table)malloc(sizeof(struct table));
memset(new, 0, sizeof(struct table));
int number_of_entries;
table_entry *entries;
int (*compare_function)(void *, void *);
- unsigned int (*key_function)(unsigned int *);
+ unsigned int (*key_function)(void *);
} *table;
/* table.c */
unsigned int key_from_int(int i);
unsigned int key_from_string(char *s);
-table hash_create_table(int (*compare_function)(void *, void *), unsigned int (*key_function)(unsigned int *));
+table hash_create_table(int (*compare_function)(void *, void *),
+ unsigned int (*key_function)(void *));
void *hash_table_find(table t, void *comparator);
void hash_table_insert(table t, void *value, void *comparator);
void hash_table_remove(table t, void *comparator);
return PTL_FAIL;
}
- if (!(c=force_tcp_connection((manager)b->lower,
- PNAL_IP(target.nid,b),
- PNAL_PORT(target.nid,target.pid),
+ if (!(c=force_tcp_connection((manager)b->lower, target.nid,
b->local)))
return(PTL_FAIL);
{
manager m;
- if (!(m=init_connections(PNAL_PORT(b->b_ni->ni_nid,
- ptl_apini.apini_pid),
- from_connection,b))){
+ if (!(m=init_connections(from_connection,b))){
/* TODO: this needs to shut down the
newly created junk */
return(PTL_NAL_FAILED);
jt_ptl_print_peers (int argc, char **argv)
{
struct portal_ioctl_data data;
+ ptl_process_id_t id;
char buffer[2][64];
int index;
int rc;
if (rc != 0)
break;
- if (g_net_is_compatible(NULL, SOCKNAL, 0))
+ if (g_net_is_compatible(NULL, SOCKNAL, 0)) {
+ id.nid = data.ioc_nid;
+ id.pid = data.ioc_u32[4];
printf ("%-20s [%d]%s->%s:%d #%d\n",
- libcfs_nid2str(data.ioc_nid),
+ libcfs_id2str(id),
data.ioc_count, /* persistence */
ptl_ipaddr_2_str (data.ioc_u32[2], buffer[0], 1), /* my ip */
ptl_ipaddr_2_str (data.ioc_u32[0], buffer[1], 1), /* peer ip */
data.ioc_u32[1], /* peer port */
data.ioc_u32[3]); /* conn_count */
- else if (g_net_is_compatible(NULL, RANAL, OPENIBNAL, VIBNAL, 0))
+ } else if (g_net_is_compatible(NULL, RANAL, OPENIBNAL, VIBNAL, 0)) {
printf ("%-20s [%d]@%s:%d\n",
libcfs_nid2str(data.ioc_nid),
data.ioc_count,
ptl_ipaddr_2_str (data.ioc_u32[0], buffer[1], 1), /* peer ip */
data.ioc_u32[1]); /* peer port */
- else
+ } else {
printf ("%-20s [%d]\n",
libcfs_nid2str(data.ioc_nid), data.ioc_count);
+ }
}
if (index == 0) {
jt_ptl_print_connections (int argc, char **argv)
{
struct portal_ioctl_data data;
+ ptl_process_id_t id;
char buffer[2][64];
int index;
int rc;
if (rc != 0)
break;
- if (g_net_is_compatible (NULL, SOCKNAL, 0))
+ if (g_net_is_compatible (NULL, SOCKNAL, 0)) {
+ id.nid = data.ioc_nid;
+ id.pid = data.ioc_u32[6];
printf ("%-20s %s[%d]%s->%s:%d %d/%d %s\n",
- libcfs_nid2str(data.ioc_nid),
+ libcfs_id2str(id),
(data.ioc_u32[3] == SOCKNAL_CONN_ANY) ? "A" :
(data.ioc_u32[3] == SOCKNAL_CONN_CONTROL) ? "C" :
(data.ioc_u32[3] == SOCKNAL_CONN_BULK_IN) ? "I" :
data.ioc_count, /* tx buffer size */
data.ioc_u32[5], /* rx buffer size */
data.ioc_flags ? "nagle" : "nonagle");
- else if (g_net_is_compatible (NULL, RANAL, 0))
+ } else if (g_net_is_compatible (NULL, RANAL, 0)) {
printf ("%-20s [%d]\n",
libcfs_nid2str(data.ioc_nid),
data.ioc_u32[0] /* device id */);
- else
+ } else {
printf ("%s\n", libcfs_nid2str(data.ioc_nid));
+ }
}
if (index == 0) {