Whamcloud - gitweb
* Compiles after merging b1_4
authoreeb <eeb>
Tue, 5 Jul 2005 18:35:31 +0000 (18:35 +0000)
committereeb <eeb>
Tue, 5 Jul 2005 18:35:31 +0000 (18:35 +0000)
*   Changed socknal...
    - use PID as well as NID to match connections so userspace (tcpnal) clients
      can be distinguished without changing the NID format.
    - unprivileged port == userspace client
    - don't create new connections to userspace clients
    - derive the NID/PID of a userspace client from the remote IP/port

*   Changed tcpnal...
    - use non-privileged ports
    - no concept of own NID (peer assigns)
    - don't accept connections

47 files changed:
lnet/include/libcfs/darwin/kp30.h
lnet/include/libcfs/darwin/libcfs.h
lnet/include/libcfs/kp30.h
lnet/include/libcfs/libcfs.h
lnet/include/libcfs/linux/kp30.h
lnet/include/libcfs/linux/libcfs.h
lnet/include/lnet/lib-lnet.h
lnet/include/lnet/lib-p30.h
lnet/klnds/openiblnd/openiblnd_cb.c
lnet/klnds/qswlnd/qswlnd.h
lnet/klnds/qswlnd/qswlnd_cb.c
lnet/klnds/socklnd/socklnd.c
lnet/klnds/socklnd/socklnd.h
lnet/klnds/socklnd/socklnd_cb.c
lnet/klnds/socklnd/socklnd_lib-darwin.c
lnet/libcfs/darwin/darwin-proc.c
lnet/libcfs/debug.c
lnet/libcfs/linux/linux-proc.c
lnet/libcfs/nidstrings.c
lnet/lnet/acceptor.c
lnet/lnet/api-ni.c
lnet/lnet/lib-msg.c
lnet/lnet/module.c
lnet/lnet/router.c
lnet/ulnds/Makefile.am
lnet/ulnds/address.c [deleted file]
lnet/ulnds/connection.c
lnet/ulnds/connection.h
lnet/ulnds/ipmap.h [deleted file]
lnet/ulnds/procapi.c
lnet/ulnds/procbridge.h
lnet/ulnds/proclib.c
lnet/ulnds/socklnd/Makefile.am
lnet/ulnds/socklnd/address.c [deleted file]
lnet/ulnds/socklnd/connection.c
lnet/ulnds/socklnd/connection.h
lnet/ulnds/socklnd/ipmap.h [deleted file]
lnet/ulnds/socklnd/procapi.c
lnet/ulnds/socklnd/procbridge.h
lnet/ulnds/socklnd/proclib.c
lnet/ulnds/socklnd/table.c
lnet/ulnds/socklnd/table.h
lnet/ulnds/socklnd/tcplnd.c
lnet/ulnds/table.c
lnet/ulnds/table.h
lnet/ulnds/tcplnd.c
lnet/utils/portals.c

index a95d769..fa1e10e 100644 (file)
@@ -32,7 +32,7 @@
 #define LASSERT_SPIN_LOCKED(lock) do {} while(0)
 #endif
 
-#define LBUG_WITH_LOC(file, func, line)         do {} while(0)
+#define LBUG_WITH_LOC(file, func, line)    do {portals_catastrophe = 1;} while(0)
 
 /* --------------------------------------------------------------------- */
 
index 8e4eb89..6ec2486 100644 (file)
@@ -155,7 +155,7 @@ __entry_nesting(&__cdd);
 /* ENTRY_NESTING_SUPPORT */
 #endif
 
-#define LUSTRE_PTL_PID          12345
+#define LUSTRE_PTL_PID          456             /* <= 1023 (TCP reserved port) */
 
 #define _XNU_LIBCFS_H
 
index d99a977..0a7d73c 100644 (file)
@@ -252,7 +252,7 @@ struct portals_device_userstate
  * USER LEVEL STUFF BELOW
  */
 
-#define PORTAL_IOCTL_VERSION 0x00010009
+#define PORTAL_IOCTL_VERSION 0x0001000a
 
 struct portal_ioctl_data {
         __u32 ioc_len;
@@ -264,7 +264,7 @@ struct portal_ioctl_data {
         __u32 ioc_flags;
         __u32 ioc_count;
         __u32 ioc_net;
-        __u32 ioc_u32[6];
+        __u32 ioc_u32[7];
 
         __u32 ioc_inllen1;
         char *ioc_inlbuf1;
@@ -452,8 +452,6 @@ enum {
         RANAL     = 8,
 };
 
-#define PTL_NALFMT_SIZE             32 /* %u:%u.%u.%u.%u,%u (10+4+4+4+3+5+1) */
-
 enum {
         DEBUG_DAEMON_START       =  1,
         DEBUG_DAEMON_STOP        =  2,
index e75e31e..bbd77f3 100644 (file)
@@ -38,6 +38,9 @@ extern unsigned int portal_stack;
 extern unsigned int portal_debug;
 extern unsigned int portal_printk;
 
+/* Has there been an LBUG? */
+extern unsigned int portals_catastrophe;
+
 /*
  * struct ptldebug_header is defined in libcfs/<os>/libcfs.h
  */
@@ -221,6 +224,7 @@ do {                                                                    \
 #endif /* !CDEBUG_ENTRY_EXIT */
 
 #else /* !1 */
+#define CDEBUG_LIMIT(mask, format, a...) do { } while (0)
 #define CDEBUG(mask, format, a...)      do { } while (0)
 #define CWARN(format, a...)             printk(KERN_WARNING format, ## a)
 #define CERROR(format, a...)            printk(KERN_ERR format, ## a)
@@ -236,6 +240,7 @@ do {                                                                    \
 #define EXIT                            do { } while (0)
 #endif /* !1 */
 #else /* !__KERNEL__ */
+#define CDEBUG_LIMIT(mask, format, a...) do { } while (0)
 #define CDEBUG(mask, format, a...)      do { } while (0)
 #define LCONSOLE(mask, format, a...)    fprintf(stderr, format, ## a)
 #define CWARN(format, a...)             fprintf(stderr, format, ## a)
@@ -274,6 +279,9 @@ int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand);
 int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand);
 
 /* libcfs tcpip */
+#define PTL_ACCEPTOR_MIN_RESERVED_PORT    512
+#define PTL_ACCEPTOR_MAX_RESERVED_PORT    1023
+
 int libcfs_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask);
 int libcfs_ipif_enumerate(char ***names);
 void libcfs_ipif_free_enumeration(char **names, int n);
index e495cf7..01052fc 100644 (file)
@@ -93,6 +93,7 @@ static inline void our_cond_resched(void)
 #define LBUG_WITH_LOC(file, func, line)                                 \
 do {                                                                    \
         CEMERG("LBUG - trying to dump log to /tmp/lustre-log\n");       \
+        portals_catastrophe = 1;                                        \
         portals_debug_dumplog();                                        \
         portals_run_lbug_upcall(file, func, line);                      \
         panic("LBUG");                                                  \
@@ -101,6 +102,7 @@ do {                                                                    \
 #define LBUG_WITH_LOC(file, func, line)                                 \
 do {                                                                    \
         CEMERG("LBUG\n");                                               \
+        portals_catastrophe = 1;                                        \
         portals_debug_dumpstack(NULL);                                  \
         portals_debug_dumplog();                                        \
         portals_run_lbug_upcall(file, func, line);                      \
index e62ac48..12d6204 100644 (file)
@@ -127,7 +127,7 @@ struct ptldebug_header {
  * */
 #define LUSTRE_PTL_PID          9
 # else
-#define LUSTRE_PTL_PID          12345
+#define LUSTRE_PTL_PID          456             /* <= 1023 (TCP reserved port) */
 # endif
 
 #define ENTRY_NESTING_SUPPORT (0)
index 7f5a6dc..7781d22 100644 (file)
@@ -467,6 +467,8 @@ extern int ptl_extract_kiov (int dst_niov, ptl_kiov_t *dst,
                              int src_niov, ptl_kiov_t *src,
                              ptl_size_t offset, ptl_size_t len);
 
+extern ptl_pid_t ptl_getpid(void);
+
 extern ptl_err_t ptl_recv (ptl_ni_t *ni, void *private, ptl_msg_t *msg, ptl_libmd_t *md,
                            ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen);
 extern ptl_err_t ptl_send (ptl_ni_t *ni, void *private, ptl_msg_t *msg,
index 7f5a6dc..7781d22 100644 (file)
@@ -467,6 +467,8 @@ extern int ptl_extract_kiov (int dst_niov, ptl_kiov_t *dst,
                              int src_niov, ptl_kiov_t *src,
                              ptl_size_t offset, ptl_size_t len);
 
+extern ptl_pid_t ptl_getpid(void);
+
 extern ptl_err_t ptl_recv (ptl_ni_t *ni, void *private, ptl_msg_t *msg, ptl_libmd_t *md,
                            ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen);
 extern ptl_err_t ptl_send (ptl_ni_t *ni, void *private, ptl_msg_t *msg,
index d05ba14..e629c53 100644 (file)
@@ -964,7 +964,7 @@ kibnal_launch_tx (kib_tx_t *tx, ptl_nid_t nid)
                 write_unlock_irqrestore (g_lock, flags);
 
                 if (retry) {
-                        CERROR("Can't find per %s\n", libcfs_nid2str(nid));
+                        CERROR("Can't find peer %s\n", libcfs_nid2str(nid));
                         tx->tx_status = -EHOSTUNREACH;
                         kibnal_tx_done (tx);
                         return;
index 77ae7f6..36ff15e 100644 (file)
@@ -290,7 +290,7 @@ kqswnal_elanid2nid (int elanid)
 static inline int
 kqswnal_nid2elanid (ptl_nid_t nid)
 {
-        int elanid = PTL_NIDADDR(nid);
+        __u32 elanid = PTL_NIDADDR(nid);
 
         /* not in this cluster? */
         return (elanid >= kqswnal_data.kqn_nnodes) ? -1 : elanid;
index 6001e06..67a52d6 100644 (file)
@@ -1513,8 +1513,7 @@ kqswnal_parse (kqswnal_rx_t *krx)
 
         dest_nid   = le64_to_cpu(hdr->dest_nid); /* final dest */
         src_nid    = le64_to_cpu(hdr->src_nid); /* original source */
-        sender_nid = PTL_MKNID(PTL_NIDNET(kqswnal_data.kqn_ni->ni_nid),
-                               ep_rxd_node(krx->krx_rxd)); /* who sent it to me */
+        sender_nid = kqswnal_elanid2nid(ep_rxd_node(krx->krx_rxd)); /* who sent it to me */
 #if KQSW_CHECKSUM
         LASSERTF (0, "checksums for forwarded packets not implemented\n");
 #endif
index a552472..09138c6 100644 (file)
@@ -94,13 +94,14 @@ ksocknal_destroy_route (ksock_route_t *route)
 }
 
 int
-ksocknal_create_peer (ksock_peer_t **peerp, ptl_ni_t *ni, ptl_nid_t nid)
+ksocknal_create_peer (ksock_peer_t **peerp, ptl_ni_t *ni, ptl_process_id_t id)
 {
         ksock_net_t   *net = ni->ni_data;
         ksock_peer_t  *peer;
         unsigned long  flags;
 
-        LASSERT (nid != PTL_NID_ANY);
+        LASSERT (id.nid != PTL_NID_ANY);
+        LASSERT (id.pid != PTL_PID_ANY);
         LASSERT (!in_interrupt());
 
         PORTAL_ALLOC (peer, sizeof (*peer));
@@ -110,7 +111,7 @@ ksocknal_create_peer (ksock_peer_t **peerp, ptl_ni_t *ni, ptl_nid_t nid)
         memset (peer, 0, sizeof (*peer));       /* NULL pointers/clear flags etc */
 
         peer->ksnp_ni = ni;
-        peer->ksnp_nid = nid;
+        peer->ksnp_id = id;
         atomic_set (&peer->ksnp_refcount, 1);   /* 1 ref for caller */
         peer->ksnp_closing = 0;
         CFS_INIT_LIST_HEAD (&peer->ksnp_conns);
@@ -142,7 +143,7 @@ ksocknal_destroy_peer (ksock_peer_t *peer)
         unsigned long   flags;
 
         CDEBUG (D_NET, "peer %s %p deleted\n", 
-                libcfs_nid2str(peer->ksnp_nid), peer);
+                libcfs_id2str(peer->ksnp_id), peer);
 
         LASSERT (atomic_read (&peer->ksnp_refcount) == 0);
         LASSERT (list_empty (&peer->ksnp_conns));
@@ -161,9 +162,9 @@ ksocknal_destroy_peer (ksock_peer_t *peer)
 }
 
 ksock_peer_t *
-ksocknal_find_peer_locked (ptl_ni_t *ni, ptl_nid_t nid)
+ksocknal_find_peer_locked (ptl_ni_t *ni, ptl_process_id_t id)
 {
-        struct list_head *peer_list = ksocknal_nid2peerlist (nid);
+        struct list_head *peer_list = ksocknal_nid2peerlist(id.nid);
         struct list_head *tmp;
         ksock_peer_t     *peer;
 
@@ -176,11 +177,12 @@ ksocknal_find_peer_locked (ptl_ni_t *ni, ptl_nid_t nid)
                 if (peer->ksnp_ni != ni)
                         continue;
 
-                if (peer->ksnp_nid != nid)
+                if (peer->ksnp_id.nid != id.nid ||
+                    peer->ksnp_id.pid != id.pid)
                         continue;
 
                 CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
-                       peer, libcfs_nid2str(nid), 
+                       peer, libcfs_id2str(id), 
                        atomic_read(&peer->ksnp_refcount));
                 return (peer);
         }
@@ -188,12 +190,12 @@ ksocknal_find_peer_locked (ptl_ni_t *ni, ptl_nid_t nid)
 }
 
 ksock_peer_t *
-ksocknal_find_peer (ptl_ni_t *ni, ptl_nid_t nid)
+ksocknal_find_peer (ptl_ni_t *ni, ptl_process_id_t id)
 {
         ksock_peer_t     *peer;
 
         read_lock (&ksocknal_data.ksnd_global_lock);
-        peer = ksocknal_find_peer_locked (ni, nid);
+        peer = ksocknal_find_peer_locked (ni, id);
         if (peer != NULL)                       /* +1 ref for caller? */
                 ksocknal_peer_addref(peer);
         read_unlock (&ksocknal_data.ksnd_global_lock);
@@ -225,7 +227,7 @@ ksocknal_unlink_peer_locked (ksock_peer_t *peer)
 
 int
 ksocknal_get_peer_info (ptl_ni_t *ni, int index, 
-                        ptl_nid_t *nid, __u32 *myip, __u32 *peer_ip, int *port,
+                        ptl_process_id_t *id, __u32 *myip, __u32 *peer_ip, int *port,
                         int *conn_count, int *share_count)
 {
         ksock_peer_t      *peer;
@@ -251,7 +253,7 @@ ksocknal_get_peer_info (ptl_ni_t *ni, int index,
                                 if (index-- > 0)
                                         continue;
 
-                                *nid = peer->ksnp_nid;
+                                *id = peer->ksnp_id;
                                 *myip = 0;
                                 *peer_ip = 0;
                                 *port = 0;
@@ -265,7 +267,7 @@ ksocknal_get_peer_info (ptl_ni_t *ni, int index,
                                 if (index-- > 0)
                                         continue;
 
-                                *nid = peer->ksnp_nid;
+                                *id = peer->ksnp_id;
                                 *myip = peer->ksnp_passive_ips[j];
                                 *peer_ip = 0;
                                 *port = 0;
@@ -282,7 +284,7 @@ ksocknal_get_peer_info (ptl_ni_t *ni, int index,
                                 route = list_entry(rtmp, ksock_route_t,
                                                    ksnr_list);
 
-                                *nid = peer->ksnp_nid;
+                                *id = peer->ksnp_id;
                                 *myip = route->ksnr_myipaddr;
                                 *peer_ip = route->ksnr_ipaddr;
                                 *port = route->ksnr_port;
@@ -312,13 +314,13 @@ ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn)
                 if (route->ksnr_myipaddr == 0) {
                         /* route wasn't bound locally yet (the initial route) */
                         CDEBUG(D_NET, "Binding %s %u.%u.%u.%u to %u.%u.%u.%u\n",
-                               libcfs_nid2str(peer->ksnp_nid),
+                               libcfs_id2str(peer->ksnp_id),
                                HIPQUAD(route->ksnr_ipaddr),
                                HIPQUAD(conn->ksnc_myipaddr));
                 } else {
                         CDEBUG(D_NET, "Rebinding %s %u.%u.%u.%u from "
                                "%u.%u.%u.%u to %u.%u.%u.%u\n",
-                               libcfs_nid2str(peer->ksnp_nid),
+                               libcfs_id2str(peer->ksnp_id),
                                HIPQUAD(route->ksnr_ipaddr),
                                HIPQUAD(route->ksnr_myipaddr),
                                HIPQUAD(conn->ksnc_myipaddr));
@@ -361,7 +363,7 @@ ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route)
 
                 if (route2->ksnr_ipaddr == route->ksnr_ipaddr) {
                         CERROR ("Duplicate route %s %u.%u.%u.%u\n",
-                                libcfs_nid2str(peer->ksnp_nid), 
+                                libcfs_id2str(peer->ksnp_id), 
                                 HIPQUAD(route->ksnr_ipaddr));
                         LBUG();
                 }
@@ -425,7 +427,7 @@ ksocknal_del_route_locked (ksock_route_t *route)
 }
 
 int
-ksocknal_add_peer (ptl_ni_t *ni, ptl_nid_t nid, __u32 ipaddr, int port)
+ksocknal_add_peer (ptl_ni_t *ni, ptl_process_id_t id, __u32 ipaddr, int port)
 {
         unsigned long      flags;
         struct list_head  *tmp;
@@ -435,11 +437,12 @@ ksocknal_add_peer (ptl_ni_t *ni, ptl_nid_t nid, __u32 ipaddr, int port)
         ksock_route_t     *route2;
         int                rc;
 
-        if (nid == PTL_NID_ANY)
+        if (id.nid == PTL_NID_ANY ||
+            id.pid == PTL_PID_ANY)
                 return (-EINVAL);
 
         /* Have a brand new peer ready... */
-        rc = ksocknal_create_peer(&peer, ni, nid);
+        rc = ksocknal_create_peer(&peer, ni, id);
         if (rc != 0)
                 return rc;
 
@@ -451,14 +454,14 @@ ksocknal_add_peer (ptl_ni_t *ni, ptl_nid_t nid, __u32 ipaddr, int port)
 
         write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
 
-        peer2 = ksocknal_find_peer_locked (ni, nid);
+        peer2 = ksocknal_find_peer_locked (ni, id);
         if (peer2 != NULL) {
                 ksocknal_peer_decref(peer);
                 peer = peer2;
         } else {
                 /* peer table takes my ref on peer */
                 list_add_tail (&peer->ksnp_list,
-                               ksocknal_nid2peerlist (nid));
+                               ksocknal_nid2peerlist (id.nid));
         }
 
         route2 = NULL;
@@ -539,7 +542,7 @@ ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip)
 }
 
 int
-ksocknal_del_peer (ptl_ni_t *ni, ptl_nid_t nid, __u32 ip)
+ksocknal_del_peer (ptl_ni_t *ni, ptl_process_id_t id, __u32 ip)
 {
         unsigned long      flags;
         struct list_head  *ptmp;
@@ -552,8 +555,8 @@ ksocknal_del_peer (ptl_ni_t *ni, ptl_nid_t nid, __u32 ip)
 
         write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
 
-        if (nid != PTL_NID_ANY)
-                lo = hi = ksocknal_nid2peerlist(nid) - ksocknal_data.ksnd_peers;
+        if (id.nid != PTL_NID_ANY)
+                lo = hi = ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers;
         else {
                 lo = 0;
                 hi = ksocknal_data.ksnd_peer_hash_size - 1;
@@ -566,7 +569,8 @@ ksocknal_del_peer (ptl_ni_t *ni, ptl_nid_t nid, __u32 ip)
                         if (peer->ksnp_ni != ni)
                                 continue;
 
-                        if (!(nid == PTL_NID_ANY || peer->ksnp_nid == nid))
+                        if (!((id.nid == PTL_NID_ANY || peer->ksnp_id.nid == id.nid) &&
+                              (id.pid == PTL_PID_ANY || peer->ksnp_id.pid == id.pid)))
                                 continue;
 
                         ksocknal_del_peer_locked (peer, ip);
@@ -953,7 +957,7 @@ ksocknal_create_conn (ptl_ni_t *ni, ksock_route_t *route,
         ksock_net_t       *net = (ksock_net_t *)ni->ni_data;
         __u32              ipaddrs[PTL_MAX_INTERFACES];
         int                nipaddrs;
-        ptl_nid_t          nid;
+        ptl_process_id_t   peerid;
         struct list_head  *tmp;
         __u64              incarnation;
         unsigned long      flags;
@@ -1017,12 +1021,18 @@ ksocknal_create_conn (ptl_ni_t *ni, ksock_route_t *route,
          * vector of interfaces she's willing to let me connect to.
          * Passive connections use the listener timeout since the peer sends
          * eagerly */
-        nid = (route == NULL) ? PTL_NID_ANY : route->ksnr_peer->ksnp_nid;
-        rc = ksocknal_recv_hello (ni, conn, &nid, &incarnation, ipaddrs);
+        if (route == NULL) {
+                peerid.nid = PTL_NID_ANY;
+                peerid.pid = PTL_PID_ANY;
+        } else {
+                peerid = route->ksnr_peer->ksnp_id;
+        }
+
+        rc = ksocknal_recv_hello (ni, conn, &peerid, &incarnation, ipaddrs);
         if (rc < 0)
                 goto failed_1;
         nipaddrs = rc;
-        LASSERT (nid != PTL_NID_ANY);
+        LASSERT (peerid.nid != PTL_NID_ANY);
 
         if (route != NULL) {
                 peer = route->ksnr_peer;
@@ -1033,18 +1043,18 @@ ksocknal_create_conn (ptl_ni_t *ni, ksock_route_t *route,
                                        ipaddrs, nipaddrs);
                 rc = 0;
         } else {
-                rc = ksocknal_create_peer(&peer, ni, nid);
+                rc = ksocknal_create_peer(&peer, ni, peerid);
                 if (rc != 0)
                         goto failed_1;
 
                 write_lock_irqsave(global_lock, flags);
 
-                peer2 = ksocknal_find_peer_locked(ni, nid);
+                peer2 = ksocknal_find_peer_locked(ni, peerid);
                 if (peer2 == NULL) {
                         /* NB this puts an "empty" peer in the peer
                          * table (which takes my ref) */
                         list_add_tail(&peer->ksnp_list,
-                                      ksocknal_nid2peerlist(nid));
+                                      ksocknal_nid2peerlist(peerid.nid));
                 } else  {
                         ksocknal_peer_decref(peer);
                         peer = peer2;
@@ -1095,7 +1105,7 @@ ksocknal_create_conn (ptl_ni_t *ni, ksock_route_t *route,
         if (route != NULL &&
             route->ksnr_ipaddr != conn->ksnc_ipaddr) {
                 CERROR("Route %s %u.%u.%u.%u connected to %u.%u.%u.%u\n",
-                       libcfs_nid2str(peer->ksnp_nid),
+                       libcfs_id2str(peer->ksnp_id),
                        HIPQUAD(route->ksnr_ipaddr),
                        HIPQUAD(conn->ksnc_ipaddr));
         }
@@ -1147,8 +1157,8 @@ ksocknal_create_conn (ptl_ni_t *ni, ksock_route_t *route,
 
         rc = ksocknal_close_stale_conns_locked(peer, incarnation);
         if (rc != 0)
-                CERROR ("Closed %d stale conns to nid %s ip %d.%d.%d.%d\n",
-                        rc, libcfs_nid2str(conn->ksnc_peer->ksnp_nid),
+                CERROR ("Closed %d stale conns to %s ip %d.%d.%d.%d\n",
+                        rc, libcfs_id2str(conn->ksnc_peer->ksnp_id),
                         HIPQUAD(conn->ksnc_ipaddr));
 
         write_unlock_irqrestore (global_lock, flags);
@@ -1163,7 +1173,7 @@ ksocknal_create_conn (ptl_ni_t *ni, ksock_route_t *route,
 
         CDEBUG(D_NET, "New conn %s %u.%u.%u.%u -> %u.%u.%u.%u/%d"
                " incarnation:"LPD64" sched[%d]/%d\n",
-               libcfs_nid2str(nid), HIPQUAD(conn->ksnc_myipaddr),
+               libcfs_id2str(peerid), HIPQUAD(conn->ksnc_myipaddr),
                HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port, incarnation,
                (int)(conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers), irq);
 
@@ -1320,7 +1330,7 @@ ksocknal_terminate_conn (ksock_conn_t *conn)
         ksocknal_connsock_decref(conn);
 
         if (notify)
-                kpr_notify (peer->ksnp_ni, peer->ksnp_nid, 0, then);
+                kpr_notify (peer->ksnp_ni, peer->ksnp_id.nid, 0, then);
 }
 
 void
@@ -1357,7 +1367,7 @@ ksocknal_destroy_conn (ksock_conn_t *conn)
         case SOCKNAL_RX_BODY:
                 CERROR("Completing partial receive from %s"
                        ", ip %d.%d.%d.%d:%d, with error\n",
-                       libcfs_nid2str(conn->ksnc_peer->ksnp_nid),
+                       libcfs_id2str(conn->ksnc_peer->ksnp_id),
                        HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
                 ptl_finalize (conn->ksnc_peer->ksnp_ni, NULL, 
                               conn->ksnc_cookie, PTL_FAIL);
@@ -1416,7 +1426,7 @@ ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation)
 
                 CDEBUG(D_NET, "Closing stale conn %s ip:%08x/%d "
                        "incarnation:"LPD64"("LPD64")\n",
-                       libcfs_nid2str(peer->ksnp_nid), 
+                       libcfs_id2str(peer->ksnp_id), 
                        conn->ksnc_ipaddr, conn->ksnc_port,
                        conn->ksnc_incarnation, incarnation);
 
@@ -1445,7 +1455,7 @@ ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why)
 }
 
 int
-ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr)
+ksocknal_close_matching_conns (ptl_process_id_t id, __u32 ipaddr)
 {
         unsigned long       flags;
         ksock_peer_t       *peer;
@@ -1458,8 +1468,8 @@ ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr)
 
         write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
 
-        if (nid != PTL_NID_ANY)
-                lo = hi = ksocknal_nid2peerlist(nid) - ksocknal_data.ksnd_peers;
+        if (id.nid != PTL_NID_ANY)
+                lo = hi = ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers;
         else {
                 lo = 0;
                 hi = ksocknal_data.ksnd_peer_hash_size - 1;
@@ -1470,7 +1480,8 @@ ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr)
 
                         peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
 
-                        if (!(nid == PTL_NID_ANY || nid == peer->ksnp_nid))
+                        if (!((id.nid == PTL_NID_ANY || id.nid == peer->ksnp_id.nid) &&
+                              (id.pid == PTL_PID_ANY || id.pid == peer->ksnp_id.pid)))
                                 continue;
 
                         count += ksocknal_close_peer_conns_locked (peer, ipaddr, 0);
@@ -1480,7 +1491,7 @@ ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr)
         write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
 
         /* wildcards always succeed */
-        if (nid == PTL_NID_ANY || ipaddr == 0)
+        if (id.nid == PTL_NID_ANY || id.pid == PTL_PID_ANY || ipaddr == 0)
                 return (0);
 
         return (count == 0 ? -ENOENT : 0);
@@ -1491,13 +1502,14 @@ ksocknal_notify (ptl_ni_t *ni, ptl_nid_t gw_nid, int alive)
 {
         /* The router is telling me she's been notified of a change in
          * gateway state.... */
+        ptl_process_id_t  id = {.nid = gw_nid, .pid = PTL_PID_ANY};
 
         CDEBUG (D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid), 
                 alive ? "up" : "down");
 
         if (!alive) {
                 /* If the gateway crashed, close all open connections... */
-                ksocknal_close_matching_conns (gw_nid, 0);
+                ksocknal_close_matching_conns (id, 0);
                 return;
         }
 
@@ -1538,7 +1550,7 @@ ksocknal_push_peer (ksock_peer_t *peer)
 }
 
 int
-ksocknal_push (ptl_ni_t *ni, ptl_nid_t nid)
+ksocknal_push (ptl_ni_t *ni, ptl_process_id_t id)
 {
         ksock_peer_t      *peer;
         struct list_head  *tmp;
@@ -1547,17 +1559,6 @@ ksocknal_push (ptl_ni_t *ni, ptl_nid_t nid)
         int                j;
         int                rc = -ENOENT;
 
-        if (nid != PTL_NID_ANY) {
-                peer = ksocknal_find_peer (ni, nid);
-
-                if (peer != NULL) {
-                        rc = 0;
-                        ksocknal_push_peer (peer);
-                        ksocknal_peer_decref(peer);
-                }
-                return (rc);
-        }
-
         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
                 for (j = 0; ; j++) {
                         read_lock (&ksocknal_data.ksnd_global_lock);
@@ -1566,9 +1567,18 @@ ksocknal_push (ptl_ni_t *ni, ptl_nid_t nid)
                         peer = NULL;
 
                         list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) {
+                                peer = list_entry(tmp, ksock_peer_t,
+                                                  ksnp_list);
+
+                                if (!((id.nid == PTL_NID_ANY ||
+                                       id.nid == peer->ksnp_id.nid) &&
+                                      (id.pid == PTL_PID_ANY ||
+                                       id.pid == peer->ksnp_id.pid))) {
+                                        peer = NULL;
+                                        continue;
+                                }
+
                                 if (index++ == j) {
-                                        peer = list_entry(tmp, ksock_peer_t,
-                                                          ksnp_list);
                                         ksocknal_peer_addref(peer);
                                         break;
                                 }
@@ -1776,39 +1786,42 @@ ksocknal_ctl(ptl_ni_t *ni, unsigned int cmd, void *arg)
                                               data->ioc_u32[0]); /* IP address */
 
         case IOC_PORTAL_GET_PEER: {
-                ptl_nid_t    nid = 0;
-                __u32        myip = 0;
-                __u32        ip = 0;
-                int          port = 0;
-                int          conn_count = 0;
-                int          share_count = 0;
+                ptl_process_id_t id = {0,};
+                __u32            myip = 0;
+                __u32            ip = 0;
+                int              port = 0;
+                int              conn_count = 0;
+                int              share_count = 0;
 
                 rc = ksocknal_get_peer_info(ni, data->ioc_count,
-                                            &nid, &myip, &ip, &port,
+                                            &id, &myip, &ip, &port,
                                             &conn_count,  &share_count);
                 if (rc != 0)
                         return rc;
                         
-                data->ioc_nid    = nid;
+                data->ioc_nid    = id.nid;
                 data->ioc_count  = share_count;
                 data->ioc_u32[0] = ip;
                 data->ioc_u32[1] = port;
                 data->ioc_u32[2] = myip;
                 data->ioc_u32[3] = conn_count;
+                data->ioc_u32[4] = id.pid;
                 return 0;
         }
 
-        case IOC_PORTAL_ADD_PEER:
-                return ksocknal_add_peer (ni,
-                                          data->ioc_nid,
+        case IOC_PORTAL_ADD_PEER: {
+                ptl_process_id_t  id = {.nid = data->ioc_nid,
+                                        .pid = LUSTRE_SRV_PTL_PID};
+                return ksocknal_add_peer (ni, id,
                                           data->ioc_u32[0], /* IP */
                                           data->ioc_u32[1]); /* port */
-
-        case IOC_PORTAL_DEL_PEER:
-                return ksocknal_del_peer (ni,
-                                          data->ioc_nid,
+        }
+        case IOC_PORTAL_DEL_PEER: {
+                ptl_process_id_t  id = {.nid = data->ioc_nid,
+                                        .pid = PTL_PID_ANY};
+                return ksocknal_del_peer (ni, id,
                                           data->ioc_u32[0]); /* IP */
-
+        }
         case IOC_PORTAL_GET_CONN: {
                 int           txmem;
                 int           rxmem;
@@ -1821,7 +1834,7 @@ ksocknal_ctl(ptl_ni_t *ni, unsigned int cmd, void *arg)
                 ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle);
 
                 data->ioc_count  = txmem;
-                data->ioc_nid    = conn->ksnc_peer->ksnp_nid;
+                data->ioc_nid    = conn->ksnc_peer->ksnp_id.nid;
                 data->ioc_flags  = nagle;
                 data->ioc_u32[0] = conn->ksnc_ipaddr;
                 data->ioc_u32[1] = conn->ksnc_port;
@@ -1830,14 +1843,18 @@ ksocknal_ctl(ptl_ni_t *ni, unsigned int cmd, void *arg)
                 data->ioc_u32[4] = conn->ksnc_scheduler -
                                    ksocknal_data.ksnd_schedulers;
                 data->ioc_u32[5] = rxmem;
+                data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid;
                 ksocknal_conn_decref(conn);
                 return 0;
         }
 
-        case IOC_PORTAL_CLOSE_CONNECTION:
-                return ksocknal_close_matching_conns (data->ioc_nid,
-                                                      data->ioc_u32[0]);
+        case IOC_PORTAL_CLOSE_CONNECTION: {
+                ptl_process_id_t  id = {.nid = data->ioc_nid,
+                                        .pid = PTL_PID_ANY};
 
+                return ksocknal_close_matching_conns (id,
+                                                      data->ioc_u32[0]);
+        }
         case IOC_PORTAL_REGISTER_MYNID:
                 /* Ignore if this is a noop */
                if (data->ioc_nid == ni->ni_nid)
@@ -1848,9 +1865,12 @@ ksocknal_ctl(ptl_ni_t *ni, unsigned int cmd, void *arg)
                       libcfs_nid2str(ni->ni_nid));
                return -EINVAL;
 
-        case IOC_PORTAL_PUSH_CONNECTION:
-                return ksocknal_push (ni, data->ioc_nid);
-
+        case IOC_PORTAL_PUSH_CONNECTION: {
+                ptl_process_id_t  id = {.nid = data->ioc_nid,
+                                        .pid = PTL_PID_ANY};
+                
+                return ksocknal_push(ni, id);
+        }
         default:
                 return -EINVAL;
         }
@@ -2146,9 +2166,11 @@ ksocknal_base_startup (void)
 void
 ksocknal_shutdown (ptl_ni_t *ni)
 {
-        ksock_net_t    *net = ni->ni_data;
-        int             i;
-        unsigned long   flags;
+        ksock_net_t      *net = ni->ni_data;
+        int               i;
+        unsigned long     flags;
+        ptl_process_id_t  anyid = {.nid = PTL_NID_ANY,
+                                   .pid = PTL_PID_ANY};
 
         LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL);
         LASSERT(ksocknal_data.ksnd_nnets > 0);
@@ -2158,7 +2180,7 @@ ksocknal_shutdown (ptl_ni_t *ni)
         spin_unlock_irqrestore(&net->ksnn_lock, flags);
 
         /* Delete all peers */
-        ksocknal_del_peer(ni, PTL_NID_ANY, 0);
+        ksocknal_del_peer(ni, anyid, 0);
 
         /* Wait for all peer state to clean up */
         i = 2;
@@ -2333,7 +2355,9 @@ ksocknal_module_init (void)
         CLASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t));
         /* check ksnr_connected/connecting field large enough */
         CLASSERT(SOCKNAL_CONN_NTYPES <= 4);
-
+        /* kernel PID should be in the "secure" TCP port range */
+        CLASSERT(LUSTRE_SRV_PTL_PID <= PTL_ACCEPTOR_MAX_RESERVED_PORT);
+        
         rc = ksocknal_lib_tunables_init();
         if (rc != 0)
                 return rc;
index d2789de..539c6de 100644 (file)
@@ -365,7 +365,7 @@ typedef struct ksock_route
 typedef struct ksock_peer
 {
         struct list_head    ksnp_list;          /* stash on global peer list */
-        ptl_nid_t           ksnp_nid;           /* who's on the other end(s) */
+        ptl_process_id_t    ksnp_id;            /* who's on the other end(s) */
         atomic_t            ksnp_refcount;      /* # users */
         int                 ksnp_sharecount;    /* lconf usage counter */
         int                 ksnp_closing;       /* being closed */
@@ -499,9 +499,9 @@ ptl_err_t ksocknal_recv_pages(ptl_ni_t *ni, void *private,
                               size_t mlen, size_t rlen);
 ptl_err_t ksocknal_accept(ptl_ni_t *ni, struct socket *sock);
 
-extern int ksocknal_add_peer(ptl_ni_t *ni, ptl_nid_t nid, __u32 ip, int port);
-extern ksock_peer_t *ksocknal_find_peer_locked (ptl_ni_t *ni, ptl_nid_t nid);
-extern ksock_peer_t *ksocknal_find_peer (ptl_ni_t *ni, ptl_nid_t nid);
+extern int ksocknal_add_peer(ptl_ni_t *ni, ptl_process_id_t id, __u32 ip, int port);
+extern ksock_peer_t *ksocknal_find_peer_locked (ptl_ni_t *ni, ptl_process_id_t id);
+extern ksock_peer_t *ksocknal_find_peer (ptl_ni_t *ni, ptl_process_id_t id);
 extern int ksocknal_create_conn (ptl_ni_t *ni, ksock_route_t *route,
                                  struct socket *sock, int type);
 extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why);
@@ -509,7 +509,7 @@ extern void ksocknal_terminate_conn (ksock_conn_t *conn);
 extern void ksocknal_destroy_conn (ksock_conn_t *conn);
 extern int ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation);
 extern int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why);
-extern int ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr);
+extern int ksocknal_close_matching_conns (ptl_process_id_t id, __u32 ipaddr);
 
 extern void ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn);
 extern void ksocknal_tx_done (ksock_peer_t *peer, ksock_tx_t *tx, int asynch);
@@ -525,8 +525,8 @@ extern int ksocknal_reaper (void *arg);
 extern int ksocknal_send_hello (ptl_ni_t *ni, ksock_conn_t *conn,
                                 __u32 *ipaddrs, int nipaddrs);
 extern int ksocknal_recv_hello (ptl_ni_t *ni, ksock_conn_t *conn, 
-                                ptl_nid_t *nid, __u64 *incarnation
-                                __u32 *ipaddrs);
+                                ptl_process_id_t *id
+                                __u64 *incarnation, __u32 *ipaddrs);
 
 extern void ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn);
 extern void ksocknal_lib_set_callback(struct socket *sock,  ksock_conn_t *conn);
index 3105543..2d12fe4 100644 (file)
@@ -480,7 +480,7 @@ ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
                 }
                 CERROR("[%p] Error %d on write to %s"
                        " ip %d.%d.%d.%d:%d\n", conn, rc,
-                       libcfs_nid2str(conn->ksnc_peer->ksnp_nid),
+                       libcfs_id2str(conn->ksnc_peer->ksnp_id),
                        HIPQUAD(conn->ksnc_ipaddr),
                        conn->ksnc_port);
         }
@@ -590,7 +590,7 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn)
         LASSERT(tx->tx_resid == tx->tx_nob);
         
         CDEBUG (D_NET, "Sending to %s ip %d.%d.%d.%d:%d\n", 
-                libcfs_nid2str(conn->ksnc_peer->ksnp_nid),
+                libcfs_id2str(conn->ksnc_peer->ksnp_id),
                 HIPQUAD(conn->ksnc_ipaddr),
                 conn->ksnc_port);
 
@@ -682,7 +682,7 @@ ksocknal_find_connecting_route_locked (ksock_peer_t *peer)
 }
 
 int
-ksocknal_launch_packet (ptl_ni_t *ni, ksock_tx_t *tx, ptl_nid_t nid)
+ksocknal_launch_packet (ptl_ni_t *ni, ksock_tx_t *tx, ptl_process_id_t id)
 {
         unsigned long     flags;
         ksock_peer_t     *peer;
@@ -716,7 +716,7 @@ ksocknal_launch_packet (ptl_ni_t *ni, ksock_tx_t *tx, ptl_nid_t nid)
         for (retry = 0;; retry = 1) {
 #if !SOCKNAL_ROUND_ROBIN
                 read_lock (g_lock);
-                peer = ksocknal_find_peer_locked(ni, nid);
+                peer = ksocknal_find_peer_locked(ni, id);
                 if (peer != NULL) {
                         if (ksocknal_find_connectable_route_locked(peer) == NULL) {
                                 conn = ksocknal_find_conn_locked (tx, peer);
@@ -736,22 +736,29 @@ ksocknal_launch_packet (ptl_ni_t *ni, ksock_tx_t *tx, ptl_nid_t nid)
 #endif
                 write_lock_irqsave(g_lock, flags);
 
-                peer = ksocknal_find_peer_locked(ni, nid);
+                peer = ksocknal_find_peer_locked(ni, id);
                 if (peer != NULL) 
                         break;
                 
                 write_unlock_irqrestore(g_lock, flags);
 
+                if (id.pid > PTL_ACCEPTOR_MAX_RESERVED_PORT) {
+                        CERROR("Refusing to create a connection to "
+                               "userspace process %s\n", libcfs_id2str(id));
+                        return -EHOSTUNREACH;
+                }
+                
                 if (retry) {
-                        CERROR("Can't find peer %s\n", libcfs_nid2str(nid));
+                        CERROR("Can't find peer %s\n", libcfs_id2str(id));
                         return -EHOSTUNREACH;
                 }
                 
-                rc = ksocknal_add_peer(ni, nid, 
-                                       PTL_NIDADDR(nid), ptl_acceptor_port());
+                rc = ksocknal_add_peer(ni, id, 
+                                       PTL_NIDADDR(id.nid),
+                                       ptl_acceptor_port());
                 if (rc != 0) {
                         CERROR("Can't add peer %s: %d\n",
-                               libcfs_nid2str(nid), rc);
+                               libcfs_id2str(id), rc);
                         return rc;
                 }
         }
@@ -784,7 +791,7 @@ ksocknal_launch_packet (ptl_ni_t *ni, ksock_tx_t *tx, ptl_nid_t nid)
         
         write_unlock_irqrestore (g_lock, flags);
 
-        CERROR("Peer entry with no routes: %s\n", libcfs_nid2str(nid));
+        CERROR("Peer entry with no routes: %s\n", libcfs_id2str(id));
         return (-EHOSTUNREACH);
 }
 
@@ -876,7 +883,7 @@ ksocknal_sendmsg(ptl_ni_t        *ni,
                                          payload_offset, payload_nob);
         }
 
-        rc = ksocknal_launch_packet(ni, &ltx->ltx_tx, target.nid);
+        rc = ksocknal_launch_packet(ni, &ltx->ltx_tx, target);
         if (rc == 0)
                 return (PTL_OK);
         
@@ -911,13 +918,18 @@ ksocknal_send_pages (ptl_ni_t *ni, void *private, ptl_msg_t *cookie,
 void
 ksocknal_fwd_packet (ptl_ni_t *ni, kpr_fwd_desc_t *fwd)
 {
-        ptl_nid_t     nid = fwd->kprfd_gateway_nid;
+        ptl_process_id_t id = {.nid = fwd->kprfd_gateway_nid,
+                               .pid = LUSTRE_SRV_PTL_PID};
+        /* CAVEAT EMPTOR:
+         * LUSTRE_SRV_PTL_PID assumes my target is another socknal instance and
+         * not a tcpnal (userspace/liblustre) instance.  These can't route in
+         * any case until we sort out how to make the RPC replies use the same
+         * connections as RPC requests. */
         ksock_ftx_t  *ftx = (ksock_ftx_t *)&fwd->kprfd_scratch;
         int           rc;
         
         CDEBUG (D_NET, "Forwarding [%p] -> %s (%s))\n", fwd,
-                libcfs_nid2str(fwd->kprfd_gateway_nid), 
-                libcfs_nid2str(fwd->kprfd_target_nid));
+                libcfs_id2str(id), libcfs_nid2str(fwd->kprfd_target_nid));
 
         /* setup iov for hdr */
         ftx->ftx_iov.iov_base = fwd->kprfd_hdr;
@@ -930,7 +942,7 @@ ksocknal_fwd_packet (ptl_ni_t *ni, kpr_fwd_desc_t *fwd)
         ftx->ftx_tx.tx_nkiov = fwd->kprfd_niov;
         ftx->ftx_tx.tx_kiov  = fwd->kprfd_kiov;
 
-        rc = ksocknal_launch_packet (ni, &ftx->ftx_tx, nid);
+        rc = ksocknal_launch_packet (ni, &ftx->ftx_tx, id);
         if (rc != 0)
                 kpr_fwd_done (ni, fwd, rc);
 }
@@ -1059,7 +1071,7 @@ ksocknal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb)
         int       payload_nob = conn->ksnc_rx_nob_left;
         ptl_nid_t src_nid = le64_to_cpu(conn->ksnc_hdr.src_nid);
         ptl_nid_t dest_nid = le64_to_cpu(conn->ksnc_hdr.dest_nid);
-        ptl_nid_t sender_nid = conn->ksnc_peer->ksnp_nid;
+        ptl_nid_t sender_nid = conn->ksnc_peer->ksnp_id.nid;
         int       niov = 0;
         int       nob = payload_nob;
 
@@ -1260,14 +1272,14 @@ ksocknal_process_receive (ksock_conn_t *conn)
                 if (rc == 0)
                         CDEBUG (D_NET, "[%p] EOF from %s ip %d.%d.%d.%d:%d\n",
                                 conn, 
-                                libcfs_nid2str(conn->ksnc_peer->ksnp_nid),
+                                libcfs_id2str(conn->ksnc_peer->ksnp_id),
                                 HIPQUAD(conn->ksnc_ipaddr),
                                 conn->ksnc_port);
                 else if (!conn->ksnc_closing)
                         CERROR ("[%p] Error %d on read from %s"
                                 " ip %d.%d.%d.%d:%d\n",
                                 conn, rc, 
-                                libcfs_nid2str(conn->ksnc_peer->ksnp_nid),
+                                libcfs_id2str(conn->ksnc_peer->ksnp_id),
                                 HIPQUAD(conn->ksnc_ipaddr),
                                 conn->ksnc_port);
 
@@ -1282,6 +1294,14 @@ ksocknal_process_receive (ksock_conn_t *conn)
         
         switch (conn->ksnc_rx_state) {
         case SOCKNAL_RX_HEADER:
+                if (conn->ksnc_port > PTL_ACCEPTOR_MAX_RESERVED_PORT) { 
+                        /* Userspace NAL */
+                        ptl_process_id_t *id = &conn->ksnc_peer->ksnp_id;
+                        
+                        /* Substitute process ID assigned at connection time */
+                        conn->ksnc_hdr.src_pid = cpu_to_le32(id->pid);
+                        conn->ksnc_hdr.src_nid = cpu_to_le64(id->nid);
+                }
                 rc = ptl_parse(conn->ksnc_peer->ksnp_ni, &conn->ksnc_hdr, conn);
 
                 switch (rc) {
@@ -1681,6 +1701,7 @@ ksocknal_send_hello (ptl_ni_t *ni, ksock_conn_t *conn,
         hmv->version_minor = cpu_to_le16 (PTL_PROTO_TCP_VERSION_MINOR);
 
         hdr.src_nid        = cpu_to_le64 (ni->ni_nid);
+        hdr.src_pid        = cpu_to_le64 (ptl_getpid());
         hdr.type           = cpu_to_le32 (PTL_MSG_HELLO);
         hdr.payload_length = cpu_to_le32 (nipaddrs * sizeof(*ipaddrs));
 
@@ -1730,7 +1751,8 @@ ksocknal_invert_type(int type)
 
 int
 ksocknal_recv_hello (ptl_ni_t *ni, ksock_conn_t *conn, 
-                     ptl_nid_t *nid, __u64 *incarnation, __u32 *ipaddrs)
+                     ptl_process_id_t *peerid, 
+                     __u64 *incarnation, __u32 *ipaddrs)
 {
         struct socket      *sock = conn->ksnc_sock;
         int                 active;
@@ -1740,9 +1762,10 @@ ksocknal_recv_hello (ptl_ni_t *ni, ksock_conn_t *conn,
         int                 i;
         int                 type;
         ptl_hdr_t           hdr;
+        ptl_process_id_t    recv_id;
         ptl_magicversion_t *hmv;
 
-        active = (*nid != PTL_NID_ANY);
+        active = (peerid->nid != PTL_NID_ANY);
         timeout = active ? *ksocknal_tunables.ksnd_timeout :
                             ptl_acceptor_timeout();
 
@@ -1831,21 +1854,32 @@ ksocknal_recv_hello (ptl_ni_t *ni, ksock_conn_t *conn,
                 return (-EPROTO);
         }
 
+        if (conn->ksnc_port > PTL_ACCEPTOR_MAX_RESERVED_PORT) {          
+                /* Userspace NAL assigns peer process ID from socket */
+                recv_id.pid = conn->ksnc_port;
+                recv_id.nid = PTL_MKNID(PTL_NIDNET(ni->ni_nid), conn->ksnc_ipaddr);
+        } else {
+                recv_id.pid = le32_to_cpu(hdr.src_pid);
+                recv_id.nid = le64_to_cpu (hdr.src_nid);
+        }
+        
         if (!active) {                          /* don't know peer's nid yet */
-                *nid = le64_to_cpu(hdr.src_nid);
-        } else if (*nid != le64_to_cpu (hdr.src_nid)) {
-                LCONSOLE_ERROR("Connected successfully to nid %s on host "
+                *peerid = recv_id;
+        } else if (peerid->pid != recv_id.pid ||
+                   peerid->pid != recv_id.nid) {
+                LCONSOLE_ERROR("Connected successfully to %s on host "
                                "%u.%u.%u.%u, but they claimed they were "
-                               "nid %s; please check your Lustre "
+                               "%s; please check your Lustre "
                                "configuration.\n",
-                               libcfs_nid2str(*nid), HIPQUAD(conn->ksnc_ipaddr),
-                               libcfs_nid2str(le64_to_cpu(hdr.src_nid)));
+                               libcfs_id2str(*peerid),
+                               HIPQUAD(conn->ksnc_ipaddr),
+                               libcfs_id2str(recv_id));
                                
-                CERROR ("Connected to nid %s ip %u.%u.%u.%u "
+                CERROR ("Connected to %s ip %u.%u.%u.%u "
                         "but expecting %s\n",
-                        libcfs_nid2str(le64_to_cpu (hdr.src_nid)),
+                        libcfs_id2str(recv_id),
                         HIPQUAD(conn->ksnc_ipaddr),
-                        libcfs_nid2str(*nid));
+                        libcfs_id2str(*peerid));
                 return (-EPROTO);
         }
 
@@ -1856,13 +1890,13 @@ ksocknal_recv_hello (ptl_ni_t *ni, ksock_conn_t *conn,
                 conn->ksnc_type = ksocknal_invert_type(type);
                 if (conn->ksnc_type == SOCKNAL_CONN_NONE) {
                         CERROR ("Unexpected type %d from %s ip %u.%u.%u.%u\n",
-                                type, libcfs_nid2str(*nid), 
+                                type, libcfs_id2str(*peerid), 
                                 HIPQUAD(conn->ksnc_ipaddr));
                         return (-EPROTO);
                 }
         } else if (ksocknal_invert_type(type) != conn->ksnc_type) {
                 CERROR ("Mismatched types: me %d, %s ip %u.%u.%u.%u %d\n",
-                        conn->ksnc_type, libcfs_nid2str(*nid), 
+                        conn->ksnc_type, libcfs_id2str(*peerid), 
                         HIPQUAD(conn->ksnc_ipaddr),
                         le32_to_cpu(hdr.msg.hello.type));
                 return (-EPROTO);
@@ -1876,7 +1910,7 @@ ksocknal_recv_hello (ptl_ni_t *ni, ksock_conn_t *conn,
             nips * sizeof(__u32) != __le32_to_cpu (hdr.payload_length)) {
                 CERROR("Bad payload length %d from %s ip %u.%u.%u.%u\n",
                        __le32_to_cpu (hdr.payload_length),
-                       libcfs_nid2str(*nid), HIPQUAD(conn->ksnc_ipaddr));
+                       libcfs_id2str(*peerid), HIPQUAD(conn->ksnc_ipaddr));
         }
 
         if (nips == 0)
@@ -1885,7 +1919,7 @@ ksocknal_recv_hello (ptl_ni_t *ni, ksock_conn_t *conn,
         rc = libcfs_sock_read(sock, ipaddrs, nips * sizeof(*ipaddrs), timeout);
         if (rc != 0) {
                 CERROR ("Error %d reading IPs from %s ip %u.%u.%u.%u\n",
-                        rc, libcfs_nid2str(*nid), HIPQUAD(conn->ksnc_ipaddr));
+                        rc, libcfs_id2str(*peerid), HIPQUAD(conn->ksnc_ipaddr));
                 return (rc);
         }
 
@@ -1894,7 +1928,8 @@ ksocknal_recv_hello (ptl_ni_t *ni, ksock_conn_t *conn,
                 
                 if (ipaddrs[i] == 0) {
                         CERROR("Zero IP[%d] from %s ip %u.%u.%u.%u\n",
-                               i, libcfs_nid2str(*nid), HIPQUAD(conn->ksnc_ipaddr));
+                               i, libcfs_id2str(*peerid),
+                               HIPQUAD(conn->ksnc_ipaddr));
                         return (-EPROTO);
                 }
         }
@@ -1934,7 +1969,7 @@ ksocknal_connect (ksock_route_t *route)
 
                 write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags);
 
-                rc = ptl_connect(&sock, peer->ksnp_nid,
+                rc = ptl_connect(&sock, peer->ksnp_id.nid,
                                  route->ksnr_myipaddr, 
                                  route->ksnr_ipaddr, route->ksnr_port);
                 if (rc != PTL_OK)
@@ -1942,7 +1977,7 @@ ksocknal_connect (ksock_route_t *route)
 
                 rc = ksocknal_create_conn(peer->ksnp_ni, route, sock, type);
                 if (rc != 0) {
-                        ptl_connect_console_error(rc, peer->ksnp_nid,
+                        ptl_connect_console_error(rc, peer->ksnp_id.nid,
                                                   route->ksnr_ipaddr, 
                                                   route->ksnr_port);
                         goto failed;
@@ -2129,7 +2164,7 @@ ksocknal_find_timed_out_conn (ksock_peer_t *peer)
                         /* Something (e.g. failed keepalive) set the socket error */
                         CERROR ("Socket error %d: %s %p %d.%d.%d.%d\n",
                                 SOCK_ERROR(conn->ksnc_sock), 
-                                libcfs_nid2str(peer->ksnp_nid),
+                                libcfs_id2str(peer->ksnp_id),
                                 conn, HIPQUAD(conn->ksnc_ipaddr));
 
                         return (conn);
@@ -2145,7 +2180,7 @@ ksocknal_find_timed_out_conn (ksock_peer_t *peer)
                                        "may be down.\n",
                                        HIPQUAD(conn->ksnc_ipaddr));
                         CERROR ("Timed out RX from %s %p %d.%d.%d.%d\n",
-                                libcfs_nid2str(peer->ksnp_nid),
+                                libcfs_id2str(peer->ksnp_id),
                                 conn, HIPQUAD(conn->ksnc_ipaddr));
                         return (conn);
                 }
@@ -2162,7 +2197,7 @@ ksocknal_find_timed_out_conn (ksock_peer_t *peer)
                                        "may be down.\n",
                                        HIPQUAD(conn->ksnc_ipaddr));
                         CERROR ("Timed out TX to %s %s%d %p %d.%d.%d.%d\n",
-                                libcfs_nid2str(peer->ksnp_nid),
+                                libcfs_id2str(peer->ksnp_id),
                                 list_empty (&conn->ksnc_tx_queue) ? "" : "Q ",
                                 SOCK_WMEM_QUEUED(conn->ksnc_sock), conn,
                                 HIPQUAD(conn->ksnc_ipaddr));
@@ -2195,7 +2230,7 @@ ksocknal_check_peer_timeouts (int idx)
                         read_unlock (&ksocknal_data.ksnd_global_lock);
 
                         CERROR ("Timeout out conn->%s ip %d.%d.%d.%d:%d\n",
-                                libcfs_nid2str(peer->ksnp_nid),
+                                libcfs_id2str(peer->ksnp_id),
                                 HIPQUAD(conn->ksnc_ipaddr),
                                 conn->ksnc_port);
                         ksocknal_close_conn_and_siblings (conn, -ETIMEDOUT);
index 08c88dc..9660780 100644 (file)
@@ -821,10 +821,10 @@ ksocknal_lib_connect_sock (struct socket **sockp, int *fatal,
         if (rc != 0) { 
                 CFS_NET_EX;
                 if (rc != EADDRNOTAVAIL && rc != EADDRINUSE)
-                        CERROR ("Can't connect to nid "LPX64 
+                        CERROR ("Can't connect to %s"
                                 " local IP: %u.%u.%u.%u," 
                                 " remote IP: %u.%u.%u.%u/%d: %d\n", 
-                                route->ksnr_peer->ksnp_nid, 
+                                libcfs_id2str(route->ksnr_peer->ksnp_id, 
                                 HIPQUAD(route->ksnr_myipaddr), 
                                 HIPQUAD(route->ksnr_ipaddr), 
                                 route->ksnr_port, rc); 
@@ -842,10 +842,10 @@ ksocknal_lib_connect_sock (struct socket **sockp, int *fatal,
 
         rc = so->so_error; 
         if (rc != 0) { 
-                CERROR ("Error %d waiting for connection to nid "LPX64 
+                CERROR ("Error %d waiting for connection to %s" 
                         " local IP: %u.%u.%u.%u," 
                         " remote IP: %u.%u.%u.%u/%d: %d\n", rc,
-                        route->ksnr_peer->ksnp_nid
+                        libcfs_id2str(route->ksnr_peer->ksnp_id)
                         HIPQUAD(route->ksnr_myipaddr), 
                         HIPQUAD(route->ksnr_ipaddr), 
                         route->ksnr_port, rc); 
index efa51aa..01251cf 100644 (file)
@@ -36,6 +36,7 @@ extern unsigned int portal_debug;
 extern char debug_file_path[1024];
 extern unsigned int portal_subsystem_debug;
 extern unsigned int portal_printk;
+extern unsigned int portals_catastrophe;
 extern atomic_t portal_kmemory;
 
 extern long max_debug_mb;
index 5f02708..59d7997 100644 (file)
@@ -44,6 +44,9 @@ EXPORT_SYMBOL(portal_printk);
 unsigned int portal_stack;
 EXPORT_SYMBOL(portal_stack);
 
+unsigned int portals_catastrophe;
+EXPORT_SYMBOL(portals_catastrophe);
+
 #ifdef __KERNEL__
 atomic_t portal_kmemory = ATOMIC_INIT(0);
 EXPORT_SYMBOL(portal_kmemory);
index 9d43be3..deb79e3 100644 (file)
@@ -71,6 +71,7 @@ enum {
         PSDEV_DEBUG_DUMP_PATH,    /* crashdump tracelog location */
         PSDEV_PORTALS_UPCALL,     /* User mode upcall script  */
         PSDEV_PORTALS_MEMUSED,    /* bytes currently PORTAL_ALLOCated */
+        PSDEV_PORTALS_CATASTROPHE,/* if we have LBUGged or panic'd */
 };
 
 static struct ctl_table portals_table[] = {
@@ -87,6 +88,8 @@ static struct ctl_table portals_table[] = {
          &sysctl_string},
         {PSDEV_PORTALS_MEMUSED, "memused", (int *)&portal_kmemory.counter,
          sizeof(int), 0644, NULL, &proc_dointvec},
+        {PSDEV_PORTALS_CATASTROPHE, "catastrophe", &portals_catastrophe,
+         sizeof(int), 0444, NULL, &proc_dointvec},
         {0}
 };
 
index db73ccd..b0ed502 100644 (file)
  * between getting its string and using it.
  */
 
-static char      libcfs_nidstrings[128][PTL_NALFMT_SIZE];
+#define PTL_NIDSTR_COUNT  128     /* # of nidstrings */
+#define PTL_NIDSTR_SIZE   32      /* size of each one (see below for usage) */
+
+static char      libcfs_nidstrings[PTL_NIDSTR_COUNT][PTL_NIDSTR_SIZE];
 static int       libcfs_nidstring_idx = 0;
 
 #ifdef __KERNEL__
@@ -153,13 +156,12 @@ libcfs_ip_addr2str(__u32 addr, char *str)
         __u32           netip = htonl(addr);
         struct hostent *he = gethostbyaddr(&netip, sizeof(netip), AF_INET);
         
-        if (he != NULL && 
-            strlen(he->h_name) < PTL_NALFMT_SIZE) {
-                strcpy(str, he->h_name);
+        if (he != NULL) {
+                snprintf(str, PTL_NIDSTR_SIZE, "%s", he->h_name);
                 return;
         }
 #endif
-        snprintf(str, PTL_NALFMT_SIZE, "%u.%u.%u.%u",
+        snprintf(str, PTL_NIDSTR_SIZE, "%u.%u.%u.%u",
                  (addr >> 24) & 0xff, (addr >> 16) & 0xff,
                  (addr >> 8) & 0xff, addr & 0xff);
 }
@@ -221,7 +223,7 @@ libcfs_ip_str2addr(char *str, int nob, __u32 *addr)
 void
 libcfs_num_addr2str(__u32 addr, char *str)
 {
-        snprintf(str, PTL_NALFMT_SIZE, "%u", addr);
+        snprintf(str, PTL_NIDSTR_SIZE, "%u", addr);
 }
 
 int
@@ -286,7 +288,7 @@ libcfs_nal2str(int nal)
                 return nf->nf_name;
         
         str = libcfs_next_nidstring();
-        snprintf(str, PTL_NALFMT_SIZE, "?%u?", nal);
+        snprintf(str, PTL_NIDSTR_SIZE, "?%u?", nal);
         return str;
 }
 
@@ -310,11 +312,11 @@ libcfs_net2str(__u32 net)
        char           *str = libcfs_next_nidstring();
 
         if (nf == NULL) 
-                snprintf(str, PTL_NALFMT_SIZE, "<%u:%u>", nal, num);
+                snprintf(str, PTL_NIDSTR_SIZE, "<%u:%u>", nal, num);
         else if (num == 0)
-                snprintf(str, PTL_NALFMT_SIZE, "%s", nf->nf_name);
+                snprintf(str, PTL_NIDSTR_SIZE, "%s", nf->nf_name);
         else
-                snprintf(str, PTL_NALFMT_SIZE, "%s%u", nf->nf_name, num);
+                snprintf(str, PTL_NIDSTR_SIZE, "%s%u", nf->nf_name, num);
 
         return str;
 }
@@ -337,15 +339,15 @@ libcfs_nid2str(ptl_nid_t nid)
        str = libcfs_next_nidstring();
 
         if (nf == NULL)
-                snprintf(str, PTL_NALFMT_SIZE, "%x@<%u:%u>", addr, nal, nnum);
+                snprintf(str, PTL_NIDSTR_SIZE, "%x@<%u:%u>", addr, nal, nnum);
         else {
                 nf->nf_addr2str(addr, str);
                 nob = strlen(str);
                 if (nnum == 0)
-                        snprintf(str + nob, PTL_NALFMT_SIZE - nob, "@%s",
+                        snprintf(str + nob, PTL_NIDSTR_SIZE - nob, "@%s",
                                  nf->nf_name);
                 else
-                        snprintf(str + nob, PTL_NALFMT_SIZE - nob, "@%s%u",
+                        snprintf(str + nob, PTL_NIDSTR_SIZE - nob, "@%s%u",
                                  nf->nf_name, nnum);
         }
 
@@ -460,7 +462,7 @@ libcfs_nid2str(ptl_nid_t nid)
 {
         char    *str = libcfs_next_nidstring();
         
-       snprintf(str, PTL_NALFMT_SIZE, "%llx", (unsigned long long)nid);
+       snprintf(str, PTL_NIDSTR_SIZE, "%llx", (unsigned long long)nid);
 }
 
 __u32
@@ -500,7 +502,7 @@ libcfs_id2str(ptl_process_id_t id)
         char *str = libcfs_nid2str(id.nid);
        int   len = strlen(str);
 
-        snprintf(str + len, PTL_NALFMT_SIZE - len, "-%u", id.pid);
+        snprintf(str + len, PTL_NIDSTR_SIZE - len, "-%u", id.pid);
         return str;
 }
 
index b65ea5c..c311164 100644 (file)
@@ -23,9 +23,6 @@
 #define DEBUG_SUBSYSTEM S_PORTALS
 #include <portals/lib-p30.h>
 
-#define MIN_RESERVED_PORT    512
-#define MAX_RESERVED_PORT    1023
-
 #ifdef __KERNEL__
 static int acceptor_port = 988;
 CFS_MODULE_PARM(acceptor_port, "i", int, 0444,
@@ -139,7 +136,9 @@ ptl_connect(struct socket **sockp, ptl_nid_t peer_nid,
 
         CLASSERT (sizeof(cr) <= 16);            /* not too big to be on the stack */
 
-        for (port = MAX_RESERVED_PORT; port >= MIN_RESERVED_PORT; --port) {
+        for (port = PTL_ACCEPTOR_MAX_RESERVED_PORT; 
+             port >= PTL_ACCEPTOR_MIN_RESERVED_PORT; 
+             --port) {
                 /* Iterate through reserved ports. */
 
                 rc = libcfs_sock_connect(&sock, &fatal, 
@@ -375,9 +374,10 @@ ptl_acceptor(void *arg)
                }
 
                 if (accept_secure_only &&
-                    peer_port > MAX_RESERVED_PORT) {
+                    peer_port > PTL_ACCEPTOR_MAX_RESERVED_PORT) {
                         CERROR("Refusing connection from %u.%u.%u.%u: "
-                               "insecure port %d\n", HIPQUAD(peer_ip), peer_port);
+                               "insecure port %d\n",
+                               HIPQUAD(peer_ip), peer_port);
                         goto failed;
                 }
 
index c51f882..2314c9e 100644 (file)
@@ -995,7 +995,7 @@ PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id)
         LASSERT (ptl_apini.apini_refcount > 0);
 
         /* pretty useless; just return the NID of the first local interface,
-         * that isn't LONAL; it has the same NID on all nodes */
+         * that isn't LONAL (it has the same NID on all nodes) */
 
         PTL_LOCK(flags);
 
index edd0ce4..d93f92f 100644 (file)
@@ -130,3 +130,8 @@ ptl_finalize (ptl_ni_t *ni, void *private, ptl_msg_t *msg, ptl_err_t status)
 
         PTL_UNLOCK(flags);
 }
+
+ptl_pid_t  ptl_getpid(void) 
+{
+        return ptl_apini.apini_pid;
+}
index 0a96659..a8310d6 100644 (file)
@@ -151,6 +151,7 @@ EXPORT_SYMBOL(ptl_finalize);
 EXPORT_SYMBOL(ptl_parse);
 EXPORT_SYMBOL(ptl_create_reply_msg);
 EXPORT_SYMBOL(ptl_net2ni);
+EXPORT_SYMBOL(ptl_getpid);
 
 MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
 MODULE_DESCRIPTION("Portals v3.1");
index aebf507..62a7fe4 100644 (file)
@@ -45,7 +45,7 @@ void
 kpr_do_upcall (void *arg)
 {
         kpr_upcall_t *u = (kpr_upcall_t *)arg;
-        char          nidstr[PTL_NALFMT_SIZE];
+        char          nidstr[36];
         char          whenstr[36];
         char         *argv[] = {
                 NULL,
@@ -55,7 +55,7 @@ kpr_do_upcall (void *arg)
                 whenstr,
                 NULL};
 
-        strcpy(nidstr, libcfs_nid2str(u->kpru_nid));
+        snprintf (nidstr, sizeof(nidstr), "%s", libcfs_nid2str(u->kpru_nid));
         snprintf (whenstr, sizeof(whenstr), "%ld", u->kpru_when);
 
         portals_run_upcall (argv);
@@ -902,7 +902,8 @@ kpr_lookup (ptl_ni_t **nip, ptl_nid_t target_nid, int nob)
                 }
         }
 
-        CERROR("Nid %s is not on a local network\n", 
+        CERROR("Nid %s is not on a local network and "
+               "userspace portals does not support routing\n",
                libcfs_nid2str(target_nid));
 
         return PTL_NID_ANY;
index 3437d39..5b84424 100644 (file)
@@ -4,7 +4,10 @@ noinst_LIBRARIES = libtcpnal.a
 endif
 endif
 
-noinst_HEADERS =  pqtimer.h dispatch.h table.h timer.h connection.h ipmap.h bridge.h procbridge.h
-libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h dispatch.h table.h timer.h address.c procapi.c proclib.c connection.c tcpnal.c connection.h
+noinst_HEADERS =  pqtimer.h dispatch.h table.h timer.h \
+                 connection.h bridge.h procbridge.h
+libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h \
+                     dispatch.h table.h timer.h procapi.c proclib.c \
+                     connection.c tcpnal.c connection.h
 libtcpnal_a_CPPFLAGS = $(LLCPPFLAGS)
 libtcpnal_a_CFLAGS = $(LLCFLAGS)
diff --git a/lnet/ulnds/address.c b/lnet/ulnds/address.c
deleted file mode 100644 (file)
index f47964c..0000000
+++ /dev/null
@@ -1,147 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2002 Cray Inc.
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/* address.c:
- * this file provides functions to aquire the IP address of the node
- * and translate them into a NID/PID pair which supports a static
- * mapping of virtual nodes into the port range of an IP socket.
-*/
-
-#define DEBUG_SUBSYSTEM S_NAL
-
-#include <stdlib.h>
-#include <netdb.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <portals/p30.h>
-#include <bridge.h>
-#include <ipmap.h>
-
-
-/* Function:  get_node_id
- * Returns: a 32 bit id for this node, actually a big-endian IP address
- *
- * get_node_id() determines the host name and uses the resolver to
- *  find out its ip address. This is fairly fragile and inflexible, but
- *  explicitly asking about interfaces and their addresses is very
- *  complicated and nonportable.
- */
-static unsigned int get_node_id(void)
-{
-    char buffer[255];
-    unsigned int x;
-    struct hostent *he;
-    char * host_envp;
-
-    if (!(host_envp = getenv("PTL_HOSTID")))
-        {
-            gethostname(buffer,sizeof(buffer));
-            he=gethostbyname(buffer);
-            if (he)
-                    x=*(unsigned int *)he->h_addr_list[0];
-            else
-                    x = 0;
-            return(ntohl(x));
-        }
-    else
-        {
-            if (host_envp[1] != 'x')
-                {
-                    int a, b, c, d;
-                    sscanf(host_envp, "%d.%d.%d.%d", &a, &b, &c, &d);
-                    return ((a<<24) | (b<<16) | (c<<8) | d);
-                }
-            else
-                {
-                    long long hostid = strtoll(host_envp, 0, 0);
-                    return((unsigned int) hostid);
-                }
-        }
-}
-
-
-/* Function:  set_address
- * Arugments: t: a procnal structure to populate with the request
- *
- * set_address performs the bit manipulations to set the nid, pid, and
- *    iptop8 fields of the procnal structures.
- *
- * TODO: fix pidrequest to try to do dynamic binding if PTL_ID_ANY
- */
-
-#ifdef DIRECT_IP_MODE
-void set_address(bridge t,ptl_pid_t pidrequest)
-{
-    int port;
-    if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0;
-    else port=pidrequest;
-    t->b_ni->ni_nid=get_node_id();
-    ptl_apini.apini_pid=port;
-}
-#else
-
-void set_address(bridge t,ptl_pid_t pidrequest)
-{
-    int virtnode, in_addr, port;
-    ptl_pid_t pid;
-
-    /* get and remember my node id*/
-    if (!getenv("PTL_VIRTNODE"))
-        virtnode = 0;
-    else
-        {
-            int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT
-                                              >> PNAL_VNODE_SHIFT);
-            virtnode = atoi(getenv("PTL_VIRTNODE"));
-            if (virtnode > maxvnode)
-                {
-                    fprintf(stderr, "PTL_VIRTNODE of %d is too large - max %d\n",
-                            virtnode, maxvnode);
-                    return;
-                }
-        }
-
-    in_addr = get_node_id();
-
-    t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */
-    t->b_ni->ni_nid = ((in_addr & PNAL_HOSTID_MASK)
-                       << PNAL_VNODE_SHIFT)
-                      + virtnode;
-    pid=pidrequest;
-    /* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */
-#ifdef notyet
-    if (pid==(unsigned short)PTL_PID_ANY) port = 0;
-#endif
-    if (pid==(unsigned short)PTL_PID_ANY)
-        {
-            fprintf(stderr, "portal pid PTL_ID_ANY is not currently supported\n");
-            return;
-        }
-    else if (pid > PNAL_PID_MASK)
-        {
-            fprintf(stderr, "portal pid of %d is too large - max %d\n",
-                    pid, PNAL_PID_MASK);
-            return;
-        }
-    else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT;
-    ptl_apini->apini_pid=pid;
-}
-#endif
index 0828fd0..67abfe3 100644 (file)
@@ -22,8 +22,7 @@
 /* connection.c:
    This file provides a simple stateful connection manager which
    builds tcp connections on demand and leaves them open for
-   future use. It also provides the machinery to allow peers
-   to connect to it
+   future use. 
 */
 
 #include <stdlib.h>
 #include <syscall.h>
 #endif
 
-/* global variable: acceptor port */
-unsigned short tcpnal_acceptor_port = 988;
+/* tunables (via environment) */
+int tcpnal_acceptor_port = 988;
+int tcpnal_buffer_size   = 2 * (PTL_MTU + sizeof(ptl_hdr_t));
+int tcpnal_nagle         = 0;
 
+int
+tcpnal_env_param (char *name, int *val) 
+{
+        char   *env = getenv(name);
+        int     n;
+        
+        if (env == NULL)
+                return 1;
+
+        n = strlen(env);                        /* scanf may not assign on EOS */
+        if (sscanf(env, "%i%n", val, &n) >= 1 &&
+            n == strlen(env))
+                return 1;
+        
+        CERROR("Can't parse environment variable '%s=%s'\n",
+               name, env);
+        return 0;
+}
+
+int
+tcpnal_set_global_params (void)
+{
+        return  tcpnal_env_param("TCPNAL_ACCEPTOR_PORT", 
+                                &tcpnal_acceptor_port) &&
+                tcpnal_env_param("TCPNAL_BUFFER_SIZE",   
+                                 &tcpnal_buffer_size) &&
+                tcpnal_env_param("TCPNAL_NAGLE",
+                                 &tcpnal_nagle);
+}
 
 /* Function:  compare_connection
  * Arguments: connection c:      a connection in the hash table
@@ -63,29 +93,21 @@ unsigned short tcpnal_acceptor_port = 988;
 static int compare_connection(void *arg1, void *arg2)
 {
     connection c = arg1;
-    unsigned int * id = arg2;
-#if 0
-    return((c->ip==id[0]) && (c->port==id[1]));
-#else
-    /* CFS specific hacking */
-    return (c->ip == id[0]);
-#endif
-}
+    ptl_nid_t *nid = arg2;
 
+    return (c->peer_nid == *nid);
+}
 
 /* Function:  connection_key
  * Arguments: ptl_process_id_t id:  an id to hash
  * Returns: a not-particularily-well-distributed hash
  *          of the id
  */
-static unsigned int connection_key(unsigned int *id)
+static unsigned int connection_key(void *arg)
 {
-#if 0
-    return(id[0]^id[1]);
-#else
-    /* CFS specific hacking */
-    return (unsigned int) id[0];
-#endif
+        ptl_nid_t *nid = arg;
+        
+        return (unsigned int)(*nid);
 }
 
 
@@ -95,11 +117,8 @@ static unsigned int connection_key(unsigned int *id)
 void remove_connection(void *arg)
 {
         connection c = arg;
-        unsigned int id[2];
         
-        id[0]=c->ip;
-        id[1]=c->port;
-        hash_table_remove(c->m->connections,id);
+        hash_table_remove(c->m->connections,&c->peer_nid);
         close(c->fd);
         free(c);
 }
@@ -149,111 +168,131 @@ static int connection_input(void *d)
 }
 
 
-/* Function:  allocate_connection
- * Arguments: t:    tcpnal the allocation is occuring in the context of
- *            dest: portal endpoint address for this connection
- *            fd:   open file descriptor for the socket
- * Returns: an allocated connection structure
- *
- * just encompasses the action common to active and passive
- *  connections of allocation and placement in the global table
- */
-static connection allocate_connection(manager m,
-                               unsigned int ip,
-                               unsigned short port,
-                               int fd)
+static connection 
+allocate_connection(manager        m,
+                    ptl_nid_t      nid,
+                    int            fd)
 {
     connection c=malloc(sizeof(struct connection));
-    unsigned int id[2];
+
     c->m=m;
     c->fd=fd;
-    c->ip=ip;
-    c->port=port;
-    id[0]=ip;
-    id[1]=port;
+    c->peer_nid = nid;
+
     register_io_handler(fd,READ_HANDLER,connection_input,c);
-    hash_table_insert(m->connections,c,id);
+    hash_table_insert(m->connections,c,&nid);
     return(c);
 }
 
-
-/* Function:  new_connection
- * Arguments: t: opaque argument holding the tcpname
- * Returns: 1 in order to reregister for new connection requests
- *
- *  called when the bound service socket recieves
- *     a new connection request, it always accepts and
- *     installs a new connection
- */
-static int new_connection(void *z)
+int
+tcpnal_write(ptl_nid_t nid, int sockfd, void *buffer, int nob)
 {
-    manager m=z;
-    struct sockaddr_in s;
-    int len=sizeof(struct sockaddr_in);
-    int fd=accept(m->bound,(struct sockaddr *)&s,&len);
-    unsigned int nid=*((unsigned int *)&s.sin_addr);
-    /* cfs specific hack */
-    //unsigned short pid=s.sin_port;
-    pthread_mutex_lock(&m->conn_lock);
-    allocate_connection(m,htonl(nid),0/*pid*/,fd);
-    pthread_mutex_unlock(&m->conn_lock);
-    return(1);
+        int rc = syscall(SYS_write, sockfd, buffer, nob);
+        
+        /* NB called on an 'empty' socket with huge buffering! */
+        if (rc == nob)
+                return 0;
+
+        if (rc < 0) {
+                CERROR("Failed to send to %s: %s\n",
+                       libcfs_nid2str(nid), strerror(errno));
+                return -1;
+        }
+        
+        CERROR("Short send to %s: %d/%d\n",
+               libcfs_nid2str(nid), rc, nob);
+        return -1;
 }
 
-extern ptl_nid_t tcpnal_mynid;
+int
+tcpnal_read(ptl_nid_t nid, int sockfd, void *buffer, int nob) 
+{
+        int       rc;
+
+        while (nob > 0) {
+                rc = syscall(SYS_read, sockfd, buffer, nob);
+                
+                if (rc == 0) {
+                        CERROR("Unexpected EOF from %s\n",
+                               libcfs_nid2str(nid));
+                        return -1;
+                }
+
+                if (rc < 0) {
+                        CERROR("Failed to receive from %s: %s\n",
+                               libcfs_nid2str(nid), strerror(errno));
+                        return -1;
+                }
+
+                nob -= rc;
+        }
+        return 0;
+}
 
 int
-tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation)
+tcpnal_hello (int sockfd, ptl_nid_t nid)
 {
-        int                 rc;
-        int                 nob;
-        ptl_hdr_t           hdr;
-        ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid;
+        struct timeval         tv;
+        __u64                  incarnation;
+        int                    rc;
+        int                    nob;
+        ptl_acceptor_connreq_t cr;
+        ptl_hdr_t              hdr;
+        ptl_magicversion_t    *hmv = (ptl_magicversion_t *)&hdr.dest_nid;
 
-        LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
+        gettimeofday(&tv, NULL);
+        incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
+
+        memset(&cr, 0, sizeof(cr));
+        cr.acr_magic   = PTL_PROTO_ACCEPTOR_MAGIC;
+        cr.acr_version = PTL_PROTO_ACCEPTOR_VERSION;
+        cr.acr_nid     = nid;
+
+        CLASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
 
         memset (&hdr, 0, sizeof (hdr));
         hmv->magic         = cpu_to_le32(PTL_PROTO_TCP_MAGIC);
         hmv->version_major = cpu_to_le32(PTL_PROTO_TCP_VERSION_MAJOR);
         hmv->version_minor = cpu_to_le32(PTL_PROTO_TCP_VERSION_MINOR);
         
-        hdr.src_nid = cpu_to_le64(tcpnal_mynid);
-        hdr.type    = cpu_to_le32(PTL_MSG_HELLO);
+        /* hdr.src_nid/src_pid are ignored at dest */
 
-        hdr.msg.hello.type = cpu_to_le32(type);
+        hdr.type    = cpu_to_le32(PTL_MSG_HELLO);
+        hdr.msg.hello.type = cpu_to_le32(SOCKNAL_CONN_ANY);
         hdr.msg.hello.incarnation = cpu_to_le64(incarnation);
 
         /* I don't send any interface info */
 
-        /* Assume sufficient socket buffering for this message */
-        rc = syscall(SYS_write, sockfd, &hdr, sizeof(hdr));
-        if (rc <= 0) {
-                CERROR ("Error %d sending HELLO to "LPX64"\n", rc, *nid);
-                return (rc);
-        }
+        /* Assume sufficient socket buffering for these messages... */
+        rc = tcpnal_write(nid, sockfd, &cr, sizeof(cr));
+        if (rc != 0)
+                return -1;
 
-        rc = syscall(SYS_read, sockfd, hmv, sizeof(*hmv));
-        if (rc <= 0) {
-                CERROR ("Error %d reading HELLO from "LPX64"\n", rc, *nid);
-                return (rc);
-        }
+        rc = tcpnal_write(nid, sockfd, &hdr, sizeof(hdr));
+        if (rc != 0)
+                return -1;
+
+        rc = tcpnal_read(nid, sockfd, hmv, sizeof(*hmv));
+        if (rc != 0)
+                return -1;
         
         if (hmv->magic != le32_to_cpu(PTL_PROTO_TCP_MAGIC)) {
-                CERROR ("Bad magic %#08x (%#08x expected) from "LPX64"\n",
-                        cpu_to_le32(hmv->magic), PTL_PROTO_TCP_MAGIC, *nid);
-                return (-EPROTO);
+                CERROR ("Bad magic %#08x (%#08x expected) from %s\n",
+                        cpu_to_le32(hmv->magic), PTL_PROTO_TCP_MAGIC, 
+                        libcfs_nid2str(nid));
+                return -1;
         }
 
         if (hmv->version_major != cpu_to_le16 (PTL_PROTO_TCP_VERSION_MAJOR) ||
             hmv->version_minor != cpu_to_le16 (PTL_PROTO_TCP_VERSION_MINOR)) {
                 CERROR ("Incompatible protocol version %d.%d (%d.%d expected)"
-                        " from "LPX64"\n",
+                        " from %s\n",
                         le16_to_cpu (hmv->version_major),
                         le16_to_cpu (hmv->version_minor),
                         PTL_PROTO_TCP_VERSION_MAJOR,
                         PTL_PROTO_TCP_VERSION_MINOR,
-                        *nid);
-                return (-EPROTO);
+                        libcfs_nid2str(nid));
+                return -1;
         }
 
 #if (PTL_PROTO_TCP_VERSION_MAJOR != 1)
@@ -262,59 +301,40 @@ tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation)
         /* version 1 sends magic/version as the dest_nid of a 'hello' header,
          * so read the rest of it in now... */
 
-        rc = syscall(SYS_read, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv));
-        if (rc <= 0) {
-                CERROR ("Error %d reading rest of HELLO hdr from "LPX64"\n",
-                        rc, *nid);
-                return (rc);
-        }
+        rc = tcpnal_read(nid, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv));
+        if (rc != 0)
+                return -1;
 
         /* ...and check we got what we expected */
         if (hdr.type != cpu_to_le32 (PTL_MSG_HELLO)) {
                 CERROR ("Expecting a HELLO hdr "
-                        " but got type %d with %d payload from "LPX64"\n",
+                        " but got type %d with %d payload from %s\n",
                         le32_to_cpu (hdr.type),
-                        le32_to_cpu (hdr.payload_length), *nid);
-                return (-EPROTO);
+                        le32_to_cpu (hdr.payload_length), libcfs_nid2str(nid));
+                return -1;
         }
 
         if (le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) {
                 CERROR("Expecting a HELLO hdr with a NID, but got PTL_NID_ANY\n");
-                return (-EPROTO);
+                return -1;
         }
 
-        if (*nid == PTL_NID_ANY) {              /* don't know peer's nid yet */
-                *nid = le64_to_cpu(hdr.src_nid);
-        } else if (*nid != le64_to_cpu (hdr.src_nid)) {
-                CERROR ("Connected to nid "LPX64", but expecting "LPX64"\n",
-                        le64_to_cpu (hdr.src_nid), *nid);
-                return (-EPROTO);
+        if (nid != le64_to_cpu (hdr.src_nid)) {
+                CERROR ("Connected to %s, but expecting %s\n",
+                        libcfs_nid2str(le64_to_cpu (hdr.src_nid)), 
+                        libcfs_nid2str(nid));
+                return -1;
         }
 
         /* Ignore any interface info in the payload */
         nob = le32_to_cpu(hdr.payload_length);
-        if (nob > getpagesize()) {
-                CERROR("Unexpected HELLO payload %d from "LPX64"\n",
-                       nob, *nid);
-                return (-EPROTO);
-        }
-        if (nob > 0) {
-                char *space = (char *)malloc(nob);
-                
-                if (space == NULL) {
-                        CERROR("Can't allocate scratch buffer %d\n", nob);
-                        return (-ENOMEM);
-                }
-                
-                rc = syscall(SYS_read, sockfd, space, nob);
-                if (rc <= 0) {
-                        CERROR("Error %d skipping HELLO payload from "
-                               LPX64"\n", rc, *nid);
-                        return (rc);
-                }
+        if (nob != 0) {
+                CERROR("Unexpected HELLO payload %d from %s\n",
+                       nob, libcfs_nid2str(nid));
+                return -1;
         }
 
-        return (0);
+        return 0;
 }
 
 /* Function:  force_tcp_connection
@@ -324,43 +344,56 @@ tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation)
  *          a pre-existing one, or a new connection
  */
 connection force_tcp_connection(manager m,
-                                unsigned int ip,
-                                unsigned short port,
+                                ptl_nid_t nid,
                                 procbridge pb)
 {
-    connection conn;
+    unsigned int       ip = PTL_NIDADDR(nid);
+    connection         conn;
     struct sockaddr_in addr;
     struct sockaddr_in locaddr; 
-    unsigned int id[2];
-    struct timeval tv;
-    __u64 incarnation;
-
-    int fd;
-    int option;
-    int rc;
-    int rport;
-    ptl_nid_t peernid = PTL_NID_ANY;
-
-    port = tcpnal_acceptor_port;
-
-    id[0] = ip;
-    id[1] = port;
+    int                fd;
+    int                option;
+    int                rc;
 
     pthread_mutex_lock(&m->conn_lock);
 
-    conn = hash_table_find(m->connections, id);
+    conn = hash_table_find(m->connections, &nid);
     if (conn)
             goto out;
 
     memset(&addr, 0, sizeof(addr));
     addr.sin_family      = AF_INET;
     addr.sin_addr.s_addr = htonl(ip);
-    addr.sin_port        = htons(port);
+    addr.sin_port        = htons(tcpnal_acceptor_port);
 
     memset(&locaddr, 0, sizeof(locaddr)); 
     locaddr.sin_family = AF_INET; 
     locaddr.sin_addr.s_addr = INADDR_ANY;
 
+#if 1 /* tcpnal connects from a non-privileged port */
+    fd = socket(AF_INET, SOCK_STREAM, 0);
+    if (fd < 0) {
+            perror("tcpnal socket failed");
+            goto out;
+    } 
+
+    option = 1;
+    rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, 
+                    &option, sizeof(option));
+    if (rc != 0) {
+            perror ("Can't set SO_REUSEADDR for socket"); 
+            close(fd);
+            goto out;
+    } 
+
+    rc = connect(fd, (struct sockaddr *)&addr,
+                 sizeof(struct sockaddr_in));
+    if (rc != 0) {
+            perror("Error connecting to remote host");
+            close(fd);
+            goto out;
+    }
+#else
     for (rport = IPPORT_RESERVED - 1; rport > IPPORT_RESERVED / 2; --rport) {
             fd = socket(AF_INET, SOCK_STREAM, 0);
             if (fd < 0) {
@@ -401,24 +434,20 @@ connection force_tcp_connection(manager m,
             fprintf(stderr, "Out of ports trying to bind to a reserved port\n");
             goto out;
     }
+#endif
     
-#if 1
-    option = 1;
+    option = tcpnal_nagle ? 0 : 1;
     setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option));
-    option = 1<<20;
+    option = tcpnal_buffer_size;
     setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option));
-    option = 1<<20;
+    option = tcpnal_buffer_size;
     setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option));
-#endif
    
-    gettimeofday(&tv, NULL);
-    incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-
     /* say hello */
-    if (tcpnal_hello(fd, &peernid, SOCKNAL_CONN_ANY, incarnation))
-            exit(-1);
+    if (tcpnal_hello(fd, nid))
+            goto out;
     
-    conn = allocate_connection(m, ip, port, fd);
+    conn = allocate_connection(m, nid, fd);
     
     /* let nal thread know this event right away */
     if (conn)
@@ -430,6 +459,30 @@ out:
 }
 
 
+#if 0                                           /* we don't accept connections */
+/* Function:  new_connection
+ * Arguments: t: opaque argument holding the tcpname
+ * Returns: 1 in order to reregister for new connection requests
+ *
+ *  called when the bound service socket recieves
+ *     a new connection request, it always accepts and
+ *     installs a new connection
+ */
+static int new_connection(void *z)
+{
+    manager m=z;
+    struct sockaddr_in s;
+    int len=sizeof(struct sockaddr_in);
+    int fd=accept(m->bound,(struct sockaddr *)&s,&len);
+    unsigned int nid=*((unsigned int *)&s.sin_addr);
+    /* cfs specific hack */
+    //unsigned short pid=s.sin_port;
+    pthread_mutex_lock(&m->conn_lock);
+    allocate_connection(m,htonl(nid),0/*pid*/,fd);
+    pthread_mutex_unlock(&m->conn_lock);
+    return(1);
+}
+
 /* Function:  bind_socket
  * Arguments: t: the nal state for this interface
  *            port: the port to attempt to bind to
@@ -467,6 +520,7 @@ static int bind_socket(manager m,unsigned short port)
     m->port=addr.sin_port;
     return(1);
 }
+#endif
 
 
 /* Function:  shutdown_connections
@@ -476,32 +530,36 @@ static int bind_socket(manager m,unsigned short port)
  */
 void shutdown_connections(manager m)
 {
-    close(m->bound);
-    remove_io_handler(m->bound_handler);
-    hash_destroy_table(m->connections,remove_connection);
-    free(m);
+#if 0
+        /* we don't accept connections */
+        close(m->bound);
+        remove_io_handler(m->bound_handler);
+#endif
+        hash_destroy_table(m->connections,remove_connection);
+        free(m);
 }
 
 
 /* Function:  init_connections
  * Arguments: t: the nal state for this interface
- *            port: the port to attempt to bind to
  * Returns: a newly allocated manager structure, or
  *          zero if the fixed port could not be bound
  */
-manager init_connections(unsigned short pid,
-                         int (*input)(void *, void *),
-                         void *a)
+manager init_connections(int (*input)(void *, void *), void *a)
 {
     manager m = (manager)malloc(sizeof(struct manager));
+
     m->connections = hash_create_table(compare_connection,connection_key);
     m->handler = input;
     m->handler_arg = a;
     pthread_mutex_init(&m->conn_lock, 0);
 
+    return m;
+#if 0
     if (bind_socket(m,pid))
         return(m);
 
     free(m);
     return(0);
+#endif
 }
index 343ffa6..f4e8544 100644 (file)
 #include <procbridge.h>
 
 typedef struct manager {
-    table connections;
+    table           connections;
     pthread_mutex_t conn_lock; /* protect connections table */
-    int bound;
-    io_handler bound_handler;
-    int (*handler)(void *, void *);
-    void *handler_arg;
-    unsigned short port;
+#if 0                                           /* we don't accept connections */
+    int             bound;
+    io_handler      bound_handler;
+#endif
+    int           (*handler)(void *, void *);
+    void           *handler_arg;
+    unsigned short  port;
 } *manager;
 
 
 typedef struct connection {
-    unsigned int ip;
-    unsigned short port;
-    int fd;
-    manager m;
+        ptl_nid_t      peer_nid;
+        int            fd;
+        manager        m;
 } *connection;
 
-connection force_tcp_connection(manager m, unsigned int ip, unsigned int short,
-                                procbridge pb);
-manager init_connections(unsigned short, int (*f)(void *, void *), void *);
+connection force_tcp_connection(manager m, ptl_nid_t nid, procbridge pb);
+manager init_connections(int (*f)(void *, void *), void *);
 void remove_connection(void *arg);
 void shutdown_connections(manager m);
 int read_connection(connection c, unsigned char *dest, int len);
diff --git a/lnet/ulnds/ipmap.h b/lnet/ulnds/ipmap.h
deleted file mode 100644 (file)
index 85b1e18..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2002 Cray Inc.
- *
- *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- */
-
-#define DIRECT_IP_MODE
-#ifdef DIRECT_IP_MODE
-#define PNAL_NID(in_addr, port) (in_addr)
-#define PNAL_PID(pid) (pid)
-#define PNAL_IP(in_addr, port) (in_addr)
-#define PNAL_PORT(nid, pid) (pid)
-#else
-
-#define PNAL_BASE_PORT 4096
-#define PNAL_HOSTID_SHIFT 24
-#define PNAL_HOSTID_MASK ((1 << PNAL_HOSTID_SHIFT) - 1)
-#define PNAL_VNODE_SHIFT 8
-#define PNAL_VNODE_MASK ((1 << PNAL_VNODE_SHIFT) - 1)
-#define PNAL_PID_SHIFT 8
-#define PNAL_PID_MASK ((1 << PNAL_PID_SHIFT) - 1)
-
-#define PNAL_NID(in_addr, port) (((ntohl(in_addr) & PNAL_HOSTID_MASK) \
-                                    << PNAL_VNODE_SHIFT) \
-                                   | (((ntohs(port)-PNAL_BASE_PORT) >>\
-                                       PNAL_PID_SHIFT)))
-#define PNAL_PID(port) ((ntohs(port) - PNAL_BASE_PORT)  & PNAL_PID_MASK)
-
-#define PNAL_IP(nid,t)  (htonl((((unsigned)(nid))\
-                                >> PNAL_VNODE_SHIFT)\
-                               | (t->iptop8 << PNAL_HOSTID_SHIFT)))
-#define PNAL_PORT(nid, pid) (htons(((((nid) & PNAL_VNODE_MASK) \
-                                 << PNAL_VNODE_SHIFT) \
-                                | ((pid) & PNAL_PID_MASK)) \
-                               + PNAL_BASE_PORT))
-#endif
index 0538920..a13b6b7 100644 (file)
 #include <unistd.h>
 #include <string.h>
 #ifndef __CYGWIN__
-#include <syscall.h>
+# include <syscall.h>
 #endif
+#include <netdb.h>
 #include <sys/socket.h>
+#include <netinet/in.h>
 #include <procbridge.h>
 #include <pqtimer.h>
 #include <dispatch.h>
 #include <errno.h>
+#ifdef HAVE_GETHOSTBYNAME
+# include <sys/utsname.h>
+#endif
 
 /* XXX CFS workaround, to give a chance to let nal thread wake up
  * from waiting in select
@@ -67,8 +72,6 @@ ptl_nal_t tcpnal_nal = {
         .nal_recv      = tcpnal_recv,
 };
 int       tcpnal_running;
-ptl_nid_t tcpnal_mynid;
-
 
 /* Function: shutdown
  * Arguments: ni: the instance of me
@@ -117,10 +120,15 @@ ptl_err_t
 procbridge_startup (ptl_ni_t *ni)
 {
     procbridge p;
-    bridge b;
+    bridge     b;
+
+    /* NB The local NID is not assigned.  We only ever connect to the socknal,
+     * which assigns the src nid/pid on incoming non-privileged connections
+     * (i.e. us), and we don't accept connections. */
 
-    LASSERT(ni->ni_nal == &tcpnal_nal);
-    LASSERT (!tcpnal_running);           /* only single instance supported */
+    LASSERT (ni->ni_nal == &tcpnal_nal);
+    LASSERT (!tcpnal_running);                  /* only single instance supported */
+    LASSERT (ni->ni_interfaces[0] == NULL);     /* explicit interface(s) not supported */
 
     init_unix_timer();
 
@@ -170,8 +178,6 @@ procbridge_startup (ptl_ni_t *ni)
     if (p->nal_flags & NAL_FLAG_STOPPED)
         return PTL_FAIL;
 
-    /* so what a load of bollocks set_address() is... */
-    ni->ni_nid = tcpnal_mynid;
     tcpnal_running = 1;
 
     return PTL_OK;
index f2de984..017db57 100644 (file)
@@ -12,7 +12,6 @@
 
 #include <pthread.h>
 #include <bridge.h>
-#include <ipmap.h>
 
 
 #define NAL_FLAG_RUNNING        1
@@ -39,7 +38,6 @@ typedef struct nal_init_args {
 
 extern void *nal_thread(void *);
 
-extern void set_address(bridge t,ptl_pid_t pidrequest);
 extern void procbridge_wakeup_nal(procbridge p);
 
 extern ptl_err_t procbridge_startup (ptl_ni_t *);
index a93004b..c2dda30 100644 (file)
@@ -82,10 +82,6 @@ void *nal_thread(void *z)
     procbridge p=b->local;
     int rc;
     
-    /* _the_ NI (ptl_apini) has already been set up with a requested pid; pass
-     * that to set_address... */
-    set_address(b, ptl_apini.apini_pid);
-    
     rc = tcpnal_init(b);
 
     /*
index 3437d39..5b84424 100644 (file)
@@ -4,7 +4,10 @@ noinst_LIBRARIES = libtcpnal.a
 endif
 endif
 
-noinst_HEADERS =  pqtimer.h dispatch.h table.h timer.h connection.h ipmap.h bridge.h procbridge.h
-libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h dispatch.h table.h timer.h address.c procapi.c proclib.c connection.c tcpnal.c connection.h
+noinst_HEADERS =  pqtimer.h dispatch.h table.h timer.h \
+                 connection.h bridge.h procbridge.h
+libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h \
+                     dispatch.h table.h timer.h procapi.c proclib.c \
+                     connection.c tcpnal.c connection.h
 libtcpnal_a_CPPFLAGS = $(LLCPPFLAGS)
 libtcpnal_a_CFLAGS = $(LLCFLAGS)
diff --git a/lnet/ulnds/socklnd/address.c b/lnet/ulnds/socklnd/address.c
deleted file mode 100644 (file)
index f47964c..0000000
+++ /dev/null
@@ -1,147 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2002 Cray Inc.
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/* address.c:
- * this file provides functions to aquire the IP address of the node
- * and translate them into a NID/PID pair which supports a static
- * mapping of virtual nodes into the port range of an IP socket.
-*/
-
-#define DEBUG_SUBSYSTEM S_NAL
-
-#include <stdlib.h>
-#include <netdb.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <portals/p30.h>
-#include <bridge.h>
-#include <ipmap.h>
-
-
-/* Function:  get_node_id
- * Returns: a 32 bit id for this node, actually a big-endian IP address
- *
- * get_node_id() determines the host name and uses the resolver to
- *  find out its ip address. This is fairly fragile and inflexible, but
- *  explicitly asking about interfaces and their addresses is very
- *  complicated and nonportable.
- */
-static unsigned int get_node_id(void)
-{
-    char buffer[255];
-    unsigned int x;
-    struct hostent *he;
-    char * host_envp;
-
-    if (!(host_envp = getenv("PTL_HOSTID")))
-        {
-            gethostname(buffer,sizeof(buffer));
-            he=gethostbyname(buffer);
-            if (he)
-                    x=*(unsigned int *)he->h_addr_list[0];
-            else
-                    x = 0;
-            return(ntohl(x));
-        }
-    else
-        {
-            if (host_envp[1] != 'x')
-                {
-                    int a, b, c, d;
-                    sscanf(host_envp, "%d.%d.%d.%d", &a, &b, &c, &d);
-                    return ((a<<24) | (b<<16) | (c<<8) | d);
-                }
-            else
-                {
-                    long long hostid = strtoll(host_envp, 0, 0);
-                    return((unsigned int) hostid);
-                }
-        }
-}
-
-
-/* Function:  set_address
- * Arugments: t: a procnal structure to populate with the request
- *
- * set_address performs the bit manipulations to set the nid, pid, and
- *    iptop8 fields of the procnal structures.
- *
- * TODO: fix pidrequest to try to do dynamic binding if PTL_ID_ANY
- */
-
-#ifdef DIRECT_IP_MODE
-void set_address(bridge t,ptl_pid_t pidrequest)
-{
-    int port;
-    if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0;
-    else port=pidrequest;
-    t->b_ni->ni_nid=get_node_id();
-    ptl_apini.apini_pid=port;
-}
-#else
-
-void set_address(bridge t,ptl_pid_t pidrequest)
-{
-    int virtnode, in_addr, port;
-    ptl_pid_t pid;
-
-    /* get and remember my node id*/
-    if (!getenv("PTL_VIRTNODE"))
-        virtnode = 0;
-    else
-        {
-            int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT
-                                              >> PNAL_VNODE_SHIFT);
-            virtnode = atoi(getenv("PTL_VIRTNODE"));
-            if (virtnode > maxvnode)
-                {
-                    fprintf(stderr, "PTL_VIRTNODE of %d is too large - max %d\n",
-                            virtnode, maxvnode);
-                    return;
-                }
-        }
-
-    in_addr = get_node_id();
-
-    t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */
-    t->b_ni->ni_nid = ((in_addr & PNAL_HOSTID_MASK)
-                       << PNAL_VNODE_SHIFT)
-                      + virtnode;
-    pid=pidrequest;
-    /* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */
-#ifdef notyet
-    if (pid==(unsigned short)PTL_PID_ANY) port = 0;
-#endif
-    if (pid==(unsigned short)PTL_PID_ANY)
-        {
-            fprintf(stderr, "portal pid PTL_ID_ANY is not currently supported\n");
-            return;
-        }
-    else if (pid > PNAL_PID_MASK)
-        {
-            fprintf(stderr, "portal pid of %d is too large - max %d\n",
-                    pid, PNAL_PID_MASK);
-            return;
-        }
-    else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT;
-    ptl_apini->apini_pid=pid;
-}
-#endif
index 0828fd0..67abfe3 100644 (file)
@@ -22,8 +22,7 @@
 /* connection.c:
    This file provides a simple stateful connection manager which
    builds tcp connections on demand and leaves them open for
-   future use. It also provides the machinery to allow peers
-   to connect to it
+   future use. 
 */
 
 #include <stdlib.h>
 #include <syscall.h>
 #endif
 
-/* global variable: acceptor port */
-unsigned short tcpnal_acceptor_port = 988;
+/* tunables (via environment) */
+int tcpnal_acceptor_port = 988;
+int tcpnal_buffer_size   = 2 * (PTL_MTU + sizeof(ptl_hdr_t));
+int tcpnal_nagle         = 0;
 
+int
+tcpnal_env_param (char *name, int *val) 
+{
+        char   *env = getenv(name);
+        int     n;
+        
+        if (env == NULL)
+                return 1;
+
+        n = strlen(env);                        /* scanf may not assign on EOS */
+        if (sscanf(env, "%i%n", val, &n) >= 1 &&
+            n == strlen(env))
+                return 1;
+        
+        CERROR("Can't parse environment variable '%s=%s'\n",
+               name, env);
+        return 0;
+}
+
+int
+tcpnal_set_global_params (void)
+{
+        return  tcpnal_env_param("TCPNAL_ACCEPTOR_PORT", 
+                                &tcpnal_acceptor_port) &&
+                tcpnal_env_param("TCPNAL_BUFFER_SIZE",   
+                                 &tcpnal_buffer_size) &&
+                tcpnal_env_param("TCPNAL_NAGLE",
+                                 &tcpnal_nagle);
+}
 
 /* Function:  compare_connection
  * Arguments: connection c:      a connection in the hash table
@@ -63,29 +93,21 @@ unsigned short tcpnal_acceptor_port = 988;
 static int compare_connection(void *arg1, void *arg2)
 {
     connection c = arg1;
-    unsigned int * id = arg2;
-#if 0
-    return((c->ip==id[0]) && (c->port==id[1]));
-#else
-    /* CFS specific hacking */
-    return (c->ip == id[0]);
-#endif
-}
+    ptl_nid_t *nid = arg2;
 
+    return (c->peer_nid == *nid);
+}
 
 /* Function:  connection_key
  * Arguments: ptl_process_id_t id:  an id to hash
  * Returns: a not-particularily-well-distributed hash
  *          of the id
  */
-static unsigned int connection_key(unsigned int *id)
+static unsigned int connection_key(void *arg)
 {
-#if 0
-    return(id[0]^id[1]);
-#else
-    /* CFS specific hacking */
-    return (unsigned int) id[0];
-#endif
+        ptl_nid_t *nid = arg;
+        
+        return (unsigned int)(*nid);
 }
 
 
@@ -95,11 +117,8 @@ static unsigned int connection_key(unsigned int *id)
 void remove_connection(void *arg)
 {
         connection c = arg;
-        unsigned int id[2];
         
-        id[0]=c->ip;
-        id[1]=c->port;
-        hash_table_remove(c->m->connections,id);
+        hash_table_remove(c->m->connections,&c->peer_nid);
         close(c->fd);
         free(c);
 }
@@ -149,111 +168,131 @@ static int connection_input(void *d)
 }
 
 
-/* Function:  allocate_connection
- * Arguments: t:    tcpnal the allocation is occuring in the context of
- *            dest: portal endpoint address for this connection
- *            fd:   open file descriptor for the socket
- * Returns: an allocated connection structure
- *
- * just encompasses the action common to active and passive
- *  connections of allocation and placement in the global table
- */
-static connection allocate_connection(manager m,
-                               unsigned int ip,
-                               unsigned short port,
-                               int fd)
+static connection 
+allocate_connection(manager        m,
+                    ptl_nid_t      nid,
+                    int            fd)
 {
     connection c=malloc(sizeof(struct connection));
-    unsigned int id[2];
+
     c->m=m;
     c->fd=fd;
-    c->ip=ip;
-    c->port=port;
-    id[0]=ip;
-    id[1]=port;
+    c->peer_nid = nid;
+
     register_io_handler(fd,READ_HANDLER,connection_input,c);
-    hash_table_insert(m->connections,c,id);
+    hash_table_insert(m->connections,c,&nid);
     return(c);
 }
 
-
-/* Function:  new_connection
- * Arguments: t: opaque argument holding the tcpname
- * Returns: 1 in order to reregister for new connection requests
- *
- *  called when the bound service socket recieves
- *     a new connection request, it always accepts and
- *     installs a new connection
- */
-static int new_connection(void *z)
+int
+tcpnal_write(ptl_nid_t nid, int sockfd, void *buffer, int nob)
 {
-    manager m=z;
-    struct sockaddr_in s;
-    int len=sizeof(struct sockaddr_in);
-    int fd=accept(m->bound,(struct sockaddr *)&s,&len);
-    unsigned int nid=*((unsigned int *)&s.sin_addr);
-    /* cfs specific hack */
-    //unsigned short pid=s.sin_port;
-    pthread_mutex_lock(&m->conn_lock);
-    allocate_connection(m,htonl(nid),0/*pid*/,fd);
-    pthread_mutex_unlock(&m->conn_lock);
-    return(1);
+        int rc = syscall(SYS_write, sockfd, buffer, nob);
+        
+        /* NB called on an 'empty' socket with huge buffering! */
+        if (rc == nob)
+                return 0;
+
+        if (rc < 0) {
+                CERROR("Failed to send to %s: %s\n",
+                       libcfs_nid2str(nid), strerror(errno));
+                return -1;
+        }
+        
+        CERROR("Short send to %s: %d/%d\n",
+               libcfs_nid2str(nid), rc, nob);
+        return -1;
 }
 
-extern ptl_nid_t tcpnal_mynid;
+int
+tcpnal_read(ptl_nid_t nid, int sockfd, void *buffer, int nob) 
+{
+        int       rc;
+
+        while (nob > 0) {
+                rc = syscall(SYS_read, sockfd, buffer, nob);
+                
+                if (rc == 0) {
+                        CERROR("Unexpected EOF from %s\n",
+                               libcfs_nid2str(nid));
+                        return -1;
+                }
+
+                if (rc < 0) {
+                        CERROR("Failed to receive from %s: %s\n",
+                               libcfs_nid2str(nid), strerror(errno));
+                        return -1;
+                }
+
+                nob -= rc;
+        }
+        return 0;
+}
 
 int
-tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation)
+tcpnal_hello (int sockfd, ptl_nid_t nid)
 {
-        int                 rc;
-        int                 nob;
-        ptl_hdr_t           hdr;
-        ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid;
+        struct timeval         tv;
+        __u64                  incarnation;
+        int                    rc;
+        int                    nob;
+        ptl_acceptor_connreq_t cr;
+        ptl_hdr_t              hdr;
+        ptl_magicversion_t    *hmv = (ptl_magicversion_t *)&hdr.dest_nid;
 
-        LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
+        gettimeofday(&tv, NULL);
+        incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
+
+        memset(&cr, 0, sizeof(cr));
+        cr.acr_magic   = PTL_PROTO_ACCEPTOR_MAGIC;
+        cr.acr_version = PTL_PROTO_ACCEPTOR_VERSION;
+        cr.acr_nid     = nid;
+
+        CLASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
 
         memset (&hdr, 0, sizeof (hdr));
         hmv->magic         = cpu_to_le32(PTL_PROTO_TCP_MAGIC);
         hmv->version_major = cpu_to_le32(PTL_PROTO_TCP_VERSION_MAJOR);
         hmv->version_minor = cpu_to_le32(PTL_PROTO_TCP_VERSION_MINOR);
         
-        hdr.src_nid = cpu_to_le64(tcpnal_mynid);
-        hdr.type    = cpu_to_le32(PTL_MSG_HELLO);
+        /* hdr.src_nid/src_pid are ignored at dest */
 
-        hdr.msg.hello.type = cpu_to_le32(type);
+        hdr.type    = cpu_to_le32(PTL_MSG_HELLO);
+        hdr.msg.hello.type = cpu_to_le32(SOCKNAL_CONN_ANY);
         hdr.msg.hello.incarnation = cpu_to_le64(incarnation);
 
         /* I don't send any interface info */
 
-        /* Assume sufficient socket buffering for this message */
-        rc = syscall(SYS_write, sockfd, &hdr, sizeof(hdr));
-        if (rc <= 0) {
-                CERROR ("Error %d sending HELLO to "LPX64"\n", rc, *nid);
-                return (rc);
-        }
+        /* Assume sufficient socket buffering for these messages... */
+        rc = tcpnal_write(nid, sockfd, &cr, sizeof(cr));
+        if (rc != 0)
+                return -1;
 
-        rc = syscall(SYS_read, sockfd, hmv, sizeof(*hmv));
-        if (rc <= 0) {
-                CERROR ("Error %d reading HELLO from "LPX64"\n", rc, *nid);
-                return (rc);
-        }
+        rc = tcpnal_write(nid, sockfd, &hdr, sizeof(hdr));
+        if (rc != 0)
+                return -1;
+
+        rc = tcpnal_read(nid, sockfd, hmv, sizeof(*hmv));
+        if (rc != 0)
+                return -1;
         
         if (hmv->magic != le32_to_cpu(PTL_PROTO_TCP_MAGIC)) {
-                CERROR ("Bad magic %#08x (%#08x expected) from "LPX64"\n",
-                        cpu_to_le32(hmv->magic), PTL_PROTO_TCP_MAGIC, *nid);
-                return (-EPROTO);
+                CERROR ("Bad magic %#08x (%#08x expected) from %s\n",
+                        cpu_to_le32(hmv->magic), PTL_PROTO_TCP_MAGIC, 
+                        libcfs_nid2str(nid));
+                return -1;
         }
 
         if (hmv->version_major != cpu_to_le16 (PTL_PROTO_TCP_VERSION_MAJOR) ||
             hmv->version_minor != cpu_to_le16 (PTL_PROTO_TCP_VERSION_MINOR)) {
                 CERROR ("Incompatible protocol version %d.%d (%d.%d expected)"
-                        " from "LPX64"\n",
+                        " from %s\n",
                         le16_to_cpu (hmv->version_major),
                         le16_to_cpu (hmv->version_minor),
                         PTL_PROTO_TCP_VERSION_MAJOR,
                         PTL_PROTO_TCP_VERSION_MINOR,
-                        *nid);
-                return (-EPROTO);
+                        libcfs_nid2str(nid));
+                return -1;
         }
 
 #if (PTL_PROTO_TCP_VERSION_MAJOR != 1)
@@ -262,59 +301,40 @@ tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation)
         /* version 1 sends magic/version as the dest_nid of a 'hello' header,
          * so read the rest of it in now... */
 
-        rc = syscall(SYS_read, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv));
-        if (rc <= 0) {
-                CERROR ("Error %d reading rest of HELLO hdr from "LPX64"\n",
-                        rc, *nid);
-                return (rc);
-        }
+        rc = tcpnal_read(nid, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv));
+        if (rc != 0)
+                return -1;
 
         /* ...and check we got what we expected */
         if (hdr.type != cpu_to_le32 (PTL_MSG_HELLO)) {
                 CERROR ("Expecting a HELLO hdr "
-                        " but got type %d with %d payload from "LPX64"\n",
+                        " but got type %d with %d payload from %s\n",
                         le32_to_cpu (hdr.type),
-                        le32_to_cpu (hdr.payload_length), *nid);
-                return (-EPROTO);
+                        le32_to_cpu (hdr.payload_length), libcfs_nid2str(nid));
+                return -1;
         }
 
         if (le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) {
                 CERROR("Expecting a HELLO hdr with a NID, but got PTL_NID_ANY\n");
-                return (-EPROTO);
+                return -1;
         }
 
-        if (*nid == PTL_NID_ANY) {              /* don't know peer's nid yet */
-                *nid = le64_to_cpu(hdr.src_nid);
-        } else if (*nid != le64_to_cpu (hdr.src_nid)) {
-                CERROR ("Connected to nid "LPX64", but expecting "LPX64"\n",
-                        le64_to_cpu (hdr.src_nid), *nid);
-                return (-EPROTO);
+        if (nid != le64_to_cpu (hdr.src_nid)) {
+                CERROR ("Connected to %s, but expecting %s\n",
+                        libcfs_nid2str(le64_to_cpu (hdr.src_nid)), 
+                        libcfs_nid2str(nid));
+                return -1;
         }
 
         /* Ignore any interface info in the payload */
         nob = le32_to_cpu(hdr.payload_length);
-        if (nob > getpagesize()) {
-                CERROR("Unexpected HELLO payload %d from "LPX64"\n",
-                       nob, *nid);
-                return (-EPROTO);
-        }
-        if (nob > 0) {
-                char *space = (char *)malloc(nob);
-                
-                if (space == NULL) {
-                        CERROR("Can't allocate scratch buffer %d\n", nob);
-                        return (-ENOMEM);
-                }
-                
-                rc = syscall(SYS_read, sockfd, space, nob);
-                if (rc <= 0) {
-                        CERROR("Error %d skipping HELLO payload from "
-                               LPX64"\n", rc, *nid);
-                        return (rc);
-                }
+        if (nob != 0) {
+                CERROR("Unexpected HELLO payload %d from %s\n",
+                       nob, libcfs_nid2str(nid));
+                return -1;
         }
 
-        return (0);
+        return 0;
 }
 
 /* Function:  force_tcp_connection
@@ -324,43 +344,56 @@ tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation)
  *          a pre-existing one, or a new connection
  */
 connection force_tcp_connection(manager m,
-                                unsigned int ip,
-                                unsigned short port,
+                                ptl_nid_t nid,
                                 procbridge pb)
 {
-    connection conn;
+    unsigned int       ip = PTL_NIDADDR(nid);
+    connection         conn;
     struct sockaddr_in addr;
     struct sockaddr_in locaddr; 
-    unsigned int id[2];
-    struct timeval tv;
-    __u64 incarnation;
-
-    int fd;
-    int option;
-    int rc;
-    int rport;
-    ptl_nid_t peernid = PTL_NID_ANY;
-
-    port = tcpnal_acceptor_port;
-
-    id[0] = ip;
-    id[1] = port;
+    int                fd;
+    int                option;
+    int                rc;
 
     pthread_mutex_lock(&m->conn_lock);
 
-    conn = hash_table_find(m->connections, id);
+    conn = hash_table_find(m->connections, &nid);
     if (conn)
             goto out;
 
     memset(&addr, 0, sizeof(addr));
     addr.sin_family      = AF_INET;
     addr.sin_addr.s_addr = htonl(ip);
-    addr.sin_port        = htons(port);
+    addr.sin_port        = htons(tcpnal_acceptor_port);
 
     memset(&locaddr, 0, sizeof(locaddr)); 
     locaddr.sin_family = AF_INET; 
     locaddr.sin_addr.s_addr = INADDR_ANY;
 
+#if 1 /* tcpnal connects from a non-privileged port */
+    fd = socket(AF_INET, SOCK_STREAM, 0);
+    if (fd < 0) {
+            perror("tcpnal socket failed");
+            goto out;
+    } 
+
+    option = 1;
+    rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, 
+                    &option, sizeof(option));
+    if (rc != 0) {
+            perror ("Can't set SO_REUSEADDR for socket"); 
+            close(fd);
+            goto out;
+    } 
+
+    rc = connect(fd, (struct sockaddr *)&addr,
+                 sizeof(struct sockaddr_in));
+    if (rc != 0) {
+            perror("Error connecting to remote host");
+            close(fd);
+            goto out;
+    }
+#else
     for (rport = IPPORT_RESERVED - 1; rport > IPPORT_RESERVED / 2; --rport) {
             fd = socket(AF_INET, SOCK_STREAM, 0);
             if (fd < 0) {
@@ -401,24 +434,20 @@ connection force_tcp_connection(manager m,
             fprintf(stderr, "Out of ports trying to bind to a reserved port\n");
             goto out;
     }
+#endif
     
-#if 1
-    option = 1;
+    option = tcpnal_nagle ? 0 : 1;
     setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option));
-    option = 1<<20;
+    option = tcpnal_buffer_size;
     setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option));
-    option = 1<<20;
+    option = tcpnal_buffer_size;
     setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option));
-#endif
    
-    gettimeofday(&tv, NULL);
-    incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-
     /* say hello */
-    if (tcpnal_hello(fd, &peernid, SOCKNAL_CONN_ANY, incarnation))
-            exit(-1);
+    if (tcpnal_hello(fd, nid))
+            goto out;
     
-    conn = allocate_connection(m, ip, port, fd);
+    conn = allocate_connection(m, nid, fd);
     
     /* let nal thread know this event right away */
     if (conn)
@@ -430,6 +459,30 @@ out:
 }
 
 
+#if 0                                           /* we don't accept connections */
+/* Function:  new_connection
+ * Arguments: t: opaque argument holding the tcpname
+ * Returns: 1 in order to reregister for new connection requests
+ *
+ *  called when the bound service socket recieves
+ *     a new connection request, it always accepts and
+ *     installs a new connection
+ */
+static int new_connection(void *z)
+{
+    manager m=z;
+    struct sockaddr_in s;
+    int len=sizeof(struct sockaddr_in);
+    int fd=accept(m->bound,(struct sockaddr *)&s,&len);
+    unsigned int nid=*((unsigned int *)&s.sin_addr);
+    /* cfs specific hack */
+    //unsigned short pid=s.sin_port;
+    pthread_mutex_lock(&m->conn_lock);
+    allocate_connection(m,htonl(nid),0/*pid*/,fd);
+    pthread_mutex_unlock(&m->conn_lock);
+    return(1);
+}
+
 /* Function:  bind_socket
  * Arguments: t: the nal state for this interface
  *            port: the port to attempt to bind to
@@ -467,6 +520,7 @@ static int bind_socket(manager m,unsigned short port)
     m->port=addr.sin_port;
     return(1);
 }
+#endif
 
 
 /* Function:  shutdown_connections
@@ -476,32 +530,36 @@ static int bind_socket(manager m,unsigned short port)
  */
 void shutdown_connections(manager m)
 {
-    close(m->bound);
-    remove_io_handler(m->bound_handler);
-    hash_destroy_table(m->connections,remove_connection);
-    free(m);
+#if 0
+        /* we don't accept connections */
+        close(m->bound);
+        remove_io_handler(m->bound_handler);
+#endif
+        hash_destroy_table(m->connections,remove_connection);
+        free(m);
 }
 
 
 /* Function:  init_connections
  * Arguments: t: the nal state for this interface
- *            port: the port to attempt to bind to
  * Returns: a newly allocated manager structure, or
  *          zero if the fixed port could not be bound
  */
-manager init_connections(unsigned short pid,
-                         int (*input)(void *, void *),
-                         void *a)
+manager init_connections(int (*input)(void *, void *), void *a)
 {
     manager m = (manager)malloc(sizeof(struct manager));
+
     m->connections = hash_create_table(compare_connection,connection_key);
     m->handler = input;
     m->handler_arg = a;
     pthread_mutex_init(&m->conn_lock, 0);
 
+    return m;
+#if 0
     if (bind_socket(m,pid))
         return(m);
 
     free(m);
     return(0);
+#endif
 }
index 343ffa6..f4e8544 100644 (file)
 #include <procbridge.h>
 
 typedef struct manager {
-    table connections;
+    table           connections;
     pthread_mutex_t conn_lock; /* protect connections table */
-    int bound;
-    io_handler bound_handler;
-    int (*handler)(void *, void *);
-    void *handler_arg;
-    unsigned short port;
+#if 0                                           /* we don't accept connections */
+    int             bound;
+    io_handler      bound_handler;
+#endif
+    int           (*handler)(void *, void *);
+    void           *handler_arg;
+    unsigned short  port;
 } *manager;
 
 
 typedef struct connection {
-    unsigned int ip;
-    unsigned short port;
-    int fd;
-    manager m;
+        ptl_nid_t      peer_nid;
+        int            fd;
+        manager        m;
 } *connection;
 
-connection force_tcp_connection(manager m, unsigned int ip, unsigned int short,
-                                procbridge pb);
-manager init_connections(unsigned short, int (*f)(void *, void *), void *);
+connection force_tcp_connection(manager m, ptl_nid_t nid, procbridge pb);
+manager init_connections(int (*f)(void *, void *), void *);
 void remove_connection(void *arg);
 void shutdown_connections(manager m);
 int read_connection(connection c, unsigned char *dest, int len);
diff --git a/lnet/ulnds/socklnd/ipmap.h b/lnet/ulnds/socklnd/ipmap.h
deleted file mode 100644 (file)
index 85b1e18..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2002 Cray Inc.
- *
- *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- */
-
-#define DIRECT_IP_MODE
-#ifdef DIRECT_IP_MODE
-#define PNAL_NID(in_addr, port) (in_addr)
-#define PNAL_PID(pid) (pid)
-#define PNAL_IP(in_addr, port) (in_addr)
-#define PNAL_PORT(nid, pid) (pid)
-#else
-
-#define PNAL_BASE_PORT 4096
-#define PNAL_HOSTID_SHIFT 24
-#define PNAL_HOSTID_MASK ((1 << PNAL_HOSTID_SHIFT) - 1)
-#define PNAL_VNODE_SHIFT 8
-#define PNAL_VNODE_MASK ((1 << PNAL_VNODE_SHIFT) - 1)
-#define PNAL_PID_SHIFT 8
-#define PNAL_PID_MASK ((1 << PNAL_PID_SHIFT) - 1)
-
-#define PNAL_NID(in_addr, port) (((ntohl(in_addr) & PNAL_HOSTID_MASK) \
-                                    << PNAL_VNODE_SHIFT) \
-                                   | (((ntohs(port)-PNAL_BASE_PORT) >>\
-                                       PNAL_PID_SHIFT)))
-#define PNAL_PID(port) ((ntohs(port) - PNAL_BASE_PORT)  & PNAL_PID_MASK)
-
-#define PNAL_IP(nid,t)  (htonl((((unsigned)(nid))\
-                                >> PNAL_VNODE_SHIFT)\
-                               | (t->iptop8 << PNAL_HOSTID_SHIFT)))
-#define PNAL_PORT(nid, pid) (htons(((((nid) & PNAL_VNODE_MASK) \
-                                 << PNAL_VNODE_SHIFT) \
-                                | ((pid) & PNAL_PID_MASK)) \
-                               + PNAL_BASE_PORT))
-#endif
index 0538920..a13b6b7 100644 (file)
 #include <unistd.h>
 #include <string.h>
 #ifndef __CYGWIN__
-#include <syscall.h>
+# include <syscall.h>
 #endif
+#include <netdb.h>
 #include <sys/socket.h>
+#include <netinet/in.h>
 #include <procbridge.h>
 #include <pqtimer.h>
 #include <dispatch.h>
 #include <errno.h>
+#ifdef HAVE_GETHOSTBYNAME
+# include <sys/utsname.h>
+#endif
 
 /* XXX CFS workaround, to give a chance to let nal thread wake up
  * from waiting in select
@@ -67,8 +72,6 @@ ptl_nal_t tcpnal_nal = {
         .nal_recv      = tcpnal_recv,
 };
 int       tcpnal_running;
-ptl_nid_t tcpnal_mynid;
-
 
 /* Function: shutdown
  * Arguments: ni: the instance of me
@@ -117,10 +120,15 @@ ptl_err_t
 procbridge_startup (ptl_ni_t *ni)
 {
     procbridge p;
-    bridge b;
+    bridge     b;
+
+    /* NB The local NID is not assigned.  We only ever connect to the socknal,
+     * which assigns the src nid/pid on incoming non-privileged connections
+     * (i.e. us), and we don't accept connections. */
 
-    LASSERT(ni->ni_nal == &tcpnal_nal);
-    LASSERT (!tcpnal_running);           /* only single instance supported */
+    LASSERT (ni->ni_nal == &tcpnal_nal);
+    LASSERT (!tcpnal_running);                  /* only single instance supported */
+    LASSERT (ni->ni_interfaces[0] == NULL);     /* explicit interface(s) not supported */
 
     init_unix_timer();
 
@@ -170,8 +178,6 @@ procbridge_startup (ptl_ni_t *ni)
     if (p->nal_flags & NAL_FLAG_STOPPED)
         return PTL_FAIL;
 
-    /* so what a load of bollocks set_address() is... */
-    ni->ni_nid = tcpnal_mynid;
     tcpnal_running = 1;
 
     return PTL_OK;
index f2de984..017db57 100644 (file)
@@ -12,7 +12,6 @@
 
 #include <pthread.h>
 #include <bridge.h>
-#include <ipmap.h>
 
 
 #define NAL_FLAG_RUNNING        1
@@ -39,7 +38,6 @@ typedef struct nal_init_args {
 
 extern void *nal_thread(void *);
 
-extern void set_address(bridge t,ptl_pid_t pidrequest);
 extern void procbridge_wakeup_nal(procbridge p);
 
 extern ptl_err_t procbridge_startup (ptl_ni_t *);
index a93004b..c2dda30 100644 (file)
@@ -82,10 +82,6 @@ void *nal_thread(void *z)
     procbridge p=b->local;
     int rc;
     
-    /* _the_ NI (ptl_apini) has already been set up with a requested pid; pass
-     * that to set_address... */
-    set_address(b, ptl_apini.apini_pid);
-    
     rc = tcpnal_init(b);
 
     /*
index 662775a..eb390c4 100644 (file)
@@ -110,7 +110,7 @@ unsigned int key_from_string(char *s)
  * Returns: a pointer to the new table
  */
 table hash_create_table (int (*compare_function)(void *, void *),
-                    unsigned int (*key_function)(unsigned int *))
+                    unsigned int (*key_function)(void *))
 {
     table new=(table)malloc(sizeof(struct table));
     memset(new, 0, sizeof(struct table));
index 7fab586..0cb9669 100644 (file)
@@ -22,13 +22,14 @@ typedef struct table {
   int number_of_entries;
   table_entry *entries;
   int (*compare_function)(void *, void *);
-  unsigned int (*key_function)(unsigned int *);
+  unsigned int (*key_function)(void *);
 } *table;
 
 /* table.c */
 unsigned int key_from_int(int i);
 unsigned int key_from_string(char *s);
-table hash_create_table(int (*compare_function)(void *, void *), unsigned int (*key_function)(unsigned int *));
+table hash_create_table(int (*compare_function)(void *, void *), 
+                        unsigned int (*key_function)(void *));
 void *hash_table_find(table t, void *comparator);
 void hash_table_insert(table t, void *value, void *comparator);
 void hash_table_remove(table t, void *comparator);
index 8573a3d..3bbc1ec 100644 (file)
@@ -80,9 +80,7 @@ ptl_err_t tcpnal_send(ptl_ni_t *ni,
             return PTL_FAIL;
     }
     
-    if (!(c=force_tcp_connection((manager)b->lower,
-                                 PNAL_IP(target.nid,b),
-                                 PNAL_PORT(target.nid,target.pid),
+    if (!(c=force_tcp_connection((manager)b->lower, target.nid,
                                  b->local)))
         return(PTL_FAIL);
 
@@ -242,9 +240,7 @@ int tcpnal_init(bridge b)
 {
     manager m;
         
-    if (!(m=init_connections(PNAL_PORT(b->b_ni->ni_nid,
-                                       ptl_apini.apini_pid),
-                             from_connection,b))){
+    if (!(m=init_connections(from_connection,b))){
         /* TODO: this needs to shut down the
            newly created junk */
         return(PTL_NAL_FAILED);
index 662775a..eb390c4 100644 (file)
@@ -110,7 +110,7 @@ unsigned int key_from_string(char *s)
  * Returns: a pointer to the new table
  */
 table hash_create_table (int (*compare_function)(void *, void *),
-                    unsigned int (*key_function)(unsigned int *))
+                    unsigned int (*key_function)(void *))
 {
     table new=(table)malloc(sizeof(struct table));
     memset(new, 0, sizeof(struct table));
index 7fab586..0cb9669 100644 (file)
@@ -22,13 +22,14 @@ typedef struct table {
   int number_of_entries;
   table_entry *entries;
   int (*compare_function)(void *, void *);
-  unsigned int (*key_function)(unsigned int *);
+  unsigned int (*key_function)(void *);
 } *table;
 
 /* table.c */
 unsigned int key_from_int(int i);
 unsigned int key_from_string(char *s);
-table hash_create_table(int (*compare_function)(void *, void *), unsigned int (*key_function)(unsigned int *));
+table hash_create_table(int (*compare_function)(void *, void *), 
+                        unsigned int (*key_function)(void *));
 void *hash_table_find(table t, void *comparator);
 void hash_table_insert(table t, void *value, void *comparator);
 void hash_table_remove(table t, void *comparator);
index 8573a3d..3bbc1ec 100644 (file)
@@ -80,9 +80,7 @@ ptl_err_t tcpnal_send(ptl_ni_t *ni,
             return PTL_FAIL;
     }
     
-    if (!(c=force_tcp_connection((manager)b->lower,
-                                 PNAL_IP(target.nid,b),
-                                 PNAL_PORT(target.nid,target.pid),
+    if (!(c=force_tcp_connection((manager)b->lower, target.nid,
                                  b->local)))
         return(PTL_FAIL);
 
@@ -242,9 +240,7 @@ int tcpnal_init(bridge b)
 {
     manager m;
         
-    if (!(m=init_connections(PNAL_PORT(b->b_ni->ni_nid,
-                                       ptl_apini.apini_pid),
-                             from_connection,b))){
+    if (!(m=init_connections(from_connection,b))){
         /* TODO: this needs to shut down the
            newly created junk */
         return(PTL_NAL_FAILED);
index 057b760..886e5f7 100644 (file)
@@ -512,6 +512,7 @@ int
 jt_ptl_print_peers (int argc, char **argv)
 {
         struct portal_ioctl_data data;
+        ptl_process_id_t         id;
         char                     buffer[2][64];
         int                      index;
         int                      rc;
@@ -529,23 +530,26 @@ jt_ptl_print_peers (int argc, char **argv)
                 if (rc != 0)
                         break;
 
-                if (g_net_is_compatible(NULL, SOCKNAL, 0))
+                if (g_net_is_compatible(NULL, SOCKNAL, 0)) {
+                        id.nid = data.ioc_nid;
+                        id.pid = data.ioc_u32[4];
                         printf ("%-20s [%d]%s->%s:%d #%d\n",
-                                libcfs_nid2str(data.ioc_nid), 
+                                libcfs_id2str(id), 
                                 data.ioc_count, /* persistence */
                                 ptl_ipaddr_2_str (data.ioc_u32[2], buffer[0], 1), /* my ip */
                                 ptl_ipaddr_2_str (data.ioc_u32[0], buffer[1], 1), /* peer ip */
                                 data.ioc_u32[1], /* peer port */
                                 data.ioc_u32[3]); /* conn_count */
-                else if (g_net_is_compatible(NULL, RANAL, OPENIBNAL, VIBNAL, 0))
+                } else if (g_net_is_compatible(NULL, RANAL, OPENIBNAL, VIBNAL, 0)) {
                         printf ("%-20s [%d]@%s:%d\n",
                                 libcfs_nid2str(data.ioc_nid), 
                                 data.ioc_count,
                                 ptl_ipaddr_2_str (data.ioc_u32[0], buffer[1], 1), /* peer ip */
                                 data.ioc_u32[1]); /* peer port */
-                else
+                } else {
                         printf ("%-20s [%d]\n",
                                 libcfs_nid2str(data.ioc_nid), data.ioc_count);
+                }
         }
 
         if (index == 0) {
@@ -681,6 +685,7 @@ int
 jt_ptl_print_connections (int argc, char **argv)
 {
         struct portal_ioctl_data data;
+        ptl_process_id_t         id;
         char                     buffer[2][64];
         int                      index;
         int                      rc;
@@ -698,9 +703,11 @@ jt_ptl_print_connections (int argc, char **argv)
                 if (rc != 0)
                         break;
 
-                if (g_net_is_compatible (NULL, SOCKNAL, 0))
+                if (g_net_is_compatible (NULL, SOCKNAL, 0)) {
+                        id.nid = data.ioc_nid;
+                        id.pid = data.ioc_u32[6];
                         printf ("%-20s %s[%d]%s->%s:%d %d/%d %s\n",
-                                libcfs_nid2str(data.ioc_nid),
+                                libcfs_id2str(id),
                                 (data.ioc_u32[3] == SOCKNAL_CONN_ANY) ? "A" :
                                 (data.ioc_u32[3] == SOCKNAL_CONN_CONTROL) ? "C" :
                                 (data.ioc_u32[3] == SOCKNAL_CONN_BULK_IN) ? "I" :
@@ -712,12 +719,13 @@ jt_ptl_print_connections (int argc, char **argv)
                                 data.ioc_count, /* tx buffer size */
                                 data.ioc_u32[5], /* rx buffer size */
                                 data.ioc_flags ? "nagle" : "nonagle");
-                else if (g_net_is_compatible (NULL, RANAL, 0))
+                } else if (g_net_is_compatible (NULL, RANAL, 0)) {
                         printf ("%-20s [%d]\n",
                                 libcfs_nid2str(data.ioc_nid),
                                 data.ioc_u32[0] /* device id */);
-                else
+                } else {
                         printf ("%s\n", libcfs_nid2str(data.ioc_nid));
+                }
         }
 
         if (index == 0) {