Whamcloud - gitweb
* multiple TCP networks pass initial tests
authoreeb <eeb>
Wed, 11 May 2005 20:12:29 +0000 (20:12 +0000)
committereeb <eeb>
Wed, 11 May 2005 20:12:29 +0000 (20:12 +0000)
lnet/include/lnet/lib-lnet.h
lnet/include/lnet/lib-p30.h
lnet/klnds/socklnd/socklnd.c
lnet/klnds/socklnd/socklnd.h
lnet/klnds/socklnd/socklnd_cb.c
lnet/libcfs/nidstrings.c
lnet/lnet/api-ni.c
lnet/lnet/config.c
lnet/lnet/lib-move.c
lnet/lnet/lo.c
lnet/lnet/module.c

index f81e488..e6a8e72 100644 (file)
@@ -412,7 +412,7 @@ ptl_ni_decref(ptl_ni_t *ni)
 }
 
 extern ptl_nal_t ptl_lonal;
-extern ptl_ni_t  ptl_loni;
+extern ptl_ni_t *ptl_loni;
 
 extern ptl_err_t ptl_get_apinih (ptl_handle_ni_t *nih);
 
index f81e488..e6a8e72 100644 (file)
@@ -412,7 +412,7 @@ ptl_ni_decref(ptl_ni_t *ni)
 }
 
 extern ptl_nal_t ptl_lonal;
-extern ptl_ni_t  ptl_loni;
+extern ptl_ni_t *ptl_loni;
 
 extern ptl_err_t ptl_get_apinih (ptl_handle_ni_t *nih);
 
index 5bccc6f..f60c567 100644 (file)
@@ -636,9 +636,9 @@ ksocknal_local_ipvec (ptl_ni_t *ni, __u32 *ipaddrs)
         read_lock (&ksocknal_data.ksnd_global_lock);
 
         nip = net->ksnn_ninterfaces;
-        for (i = 0; i < nip; i++) {
-                LASSERT (i < PTL_MAX_INTERFACES);
+        LASSERT (nip < PTL_MAX_INTERFACES);
 
+        for (i = 0; i < nip; i++) {
                 ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr;
                 LASSERT (ipaddrs[i] != 0);
         }
@@ -990,7 +990,6 @@ ksocknal_stop_listener(void)
 int
 ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type)
 {
-        int                passive = (type == SOCKNAL_CONN_NONE);
         rwlock_t          *global_lock = &ksocknal_data.ksnd_global_lock;
         __u32              ipaddrs[PTL_MAX_INTERFACES];
         int                nipaddrs;
@@ -1007,7 +1006,7 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type)
         ksock_tx_t        *tx;
         int                rc;
 
-        LASSERT (route == NULL || !passive);
+        LASSERT (route == NULL == (type == SOCKNAL_CONN_NONE));
 
         rc = ksocknal_lib_setup_sock (sock);
         if (rc != 0)
@@ -1043,14 +1042,16 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type)
         if (rc != 0)
                 goto failed_1;
 
-        if (!passive) {
+        if (route != NULL) {
+                ptl_ni_t     *ni = route->ksnr_peer->ksnp_ni;
+                ksock_net_t  *net = ni->ni_data;
+
                 /* Active connection sends HELLO eagerly */
-                rc = ksocknal_local_ipvec(route->ksnr_peer->ksnp_ni, ipaddrs);
-                if (rc < 0)
-                        goto failed_1;
-                nipaddrs = rc;
+                nipaddrs = ksocknal_local_ipvec(ni, ipaddrs);
 
-                rc = ksocknal_send_hello (conn, ipaddrs, nipaddrs);
+                rc = ksocknal_send_hello (conn, ni->ni_nid, 
+                                          net->ksnn_incarnation,
+                                          ipaddrs, nipaddrs);
                 if (rc != 0)
                         goto failed_1;
         }
@@ -1108,14 +1109,20 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type)
                 write_unlock_irqrestore(global_lock, flags);
         }
 
-        if (!passive) {
+        if (route != NULL) {
+                /* additional routes after interface exchange? */
                 ksocknal_create_routes(peer, conn->ksnc_port,
                                        ipaddrs, nipaddrs);
                 rc = 0;
         } else {
-                rc = ksocknal_select_ips(peer, ipaddrs, nipaddrs);
-                LASSERT (rc >= 0);
-                rc = ksocknal_send_hello (conn, ipaddrs, rc);
+                ptl_ni_t    *ni = peer->ksnp_ni;
+                ksock_net_t *net = ni->ni_data;
+
+                nipaddrs = ksocknal_select_ips(peer, ipaddrs, nipaddrs);
+
+                rc = ksocknal_send_hello (conn, ni->ni_nid,
+                                          net->ksnn_incarnation,
+                                          ipaddrs, nipaddrs);
         }
         if (rc < 0)
                 goto failed_2;
index de3cb63..5936445 100644 (file)
@@ -528,9 +528,10 @@ extern int ksocknal_new_packet (ksock_conn_t *conn, int skip);
 extern int ksocknal_scheduler (void *arg);
 extern int ksocknal_connd (void *arg);
 extern int ksocknal_reaper (void *arg);
-extern int ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs);
-extern int ksocknal_recv_hello (ksock_conn_t *conn,
-                                ptl_nid_t *nid, __u64 *incarnation, __u32 *ipaddrs);
+extern int ksocknal_send_hello (ksock_conn_t *conn, ptl_nid_t nid,
+                                __u64 incarnation, __u32 *ipaddrs, int nipaddrs);
+extern int ksocknal_recv_hello (ksock_conn_t *conn, ptl_nid_t *nid, 
+                                __u64 *incarnation, __u32 *ipaddrs);
 
 extern void ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn);
 extern void ksocknal_lib_set_callback(struct socket *sock,  ksock_conn_t *conn);
index 1172e72..1ff7bd4 100644 (file)
@@ -1683,11 +1683,11 @@ void ksocknal_write_callback (ksock_conn_t *conn)
 }
 
 int
-ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs)
+ksocknal_send_hello (ksock_conn_t *conn, 
+                     ptl_nid_t srcnid, __u64 incarnation,
+                     __u32 *ipaddrs, int nipaddrs)
 {
         /* CAVEAT EMPTOR: this byte flips 'ipaddrs' */
-        ptl_ni_t           *ni = conn->ksnc_peer->ksnp_ni;
-        ksock_net_t        *net = ni->ni_data;
         struct socket      *sock = conn->ksnc_sock;
         ptl_hdr_t           hdr;
         ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid;
@@ -1695,7 +1695,7 @@ ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs)
         int                 rc;
 
         LASSERT (conn->ksnc_type != SOCKNAL_CONN_NONE);
-        LASSERT (nipaddrs <= PTL_MAX_INTERFACES);
+        LASSERT (0 <= nipaddrs && nipaddrs <= PTL_MAX_INTERFACES);
 
         /* No need for getconnsock/putconnsock */
         LASSERT (!conn->ksnc_closing);
@@ -1705,14 +1705,18 @@ ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs)
         hmv->version_major = cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR);
         hmv->version_minor = cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR);
 
-        hdr.src_nid        = cpu_to_le64 (ni->ni_nid);
+        hdr.src_nid        = cpu_to_le64 (srcnid);
         hdr.type           = cpu_to_le32 (PTL_MSG_HELLO);
         hdr.payload_length = cpu_to_le32 (nipaddrs * sizeof(*ipaddrs));
 
         hdr.msg.hello.type = cpu_to_le32 (conn->ksnc_type);
-        hdr.msg.hello.incarnation = cpu_to_le64 (net->ksnn_incarnation);
+        hdr.msg.hello.incarnation = cpu_to_le64 (incarnation);
 
-        /* Receiver is eager */
+        for (i = 0; i < nipaddrs; i++) {
+                ipaddrs[i] = __cpu_to_le32 (ipaddrs[i]);
+        }
+
+        /* Receiver should be eager */
         rc = ksocknal_lib_sock_write (sock, &hdr, sizeof(hdr));
         if (rc != 0) {
                 CERROR ("Error %d sending HELLO hdr to %u.%u.%u.%u/%d\n",
@@ -1723,10 +1727,6 @@ ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs)
         if (nipaddrs == 0)
                 return (0);
         
-        for (i = 0; i < nipaddrs; i++) {
-                ipaddrs[i] = __cpu_to_le32 (ipaddrs[i]);
-        }
-
         rc = ksocknal_lib_sock_write (sock, ipaddrs, nipaddrs * sizeof(*ipaddrs));
         if (rc != 0)
                 CERROR ("Error %d sending HELLO payload (%d)"
index 1d2be09..afbe53d 100644 (file)
@@ -80,7 +80,6 @@ libcfs_next_nidstring (void)
 }
 
 #if !CRAY_PORTALS
-static void libcfs_lo_addr2str(__u32 addr, char *str);
 static int  libcfs_lo_str2addr(char *str, int nob, __u32 *addr);
 static void libcfs_ip_addr2str(__u32 addr, char *str);
 static int  libcfs_ip_str2addr(char *str, int nob, __u32 *addr);
@@ -99,7 +98,7 @@ static struct nalstrfns  libcfs_nalstrfns[] = {
         {.nf_nal      = LONAL,     
          .nf_name     = "lo",         
          .nf_modname  = "klonal",  
-         .nf_addr2str = libcfs_lo_addr2str,  
+         .nf_addr2str = libcfs_num_addr2str,  
          .nf_str2addr = libcfs_lo_str2addr},
         {.nf_nal      = SOCKNAL,    
          .nf_name     = "tcp",     
@@ -140,18 +139,9 @@ static struct nalstrfns  libcfs_nalstrfns[] = {
 
 const int libcfs_nnalstrfns = sizeof(libcfs_nalstrfns)/sizeof(libcfs_nalstrfns[0]);
 
-void
-libcfs_lo_addr2str(__u32 addr, char *str)
-{
-        /* don't print anything */
-}
-
 int
 libcfs_lo_str2addr(char *str, int nob, __u32 *addr)
 {
-        if (nob != 0)                           /* expecting the empty string */
-                return 0;
-        
         *addr = 0;
         return 1;
 }
index 79b47cc..89e075d 100644 (file)
@@ -644,6 +644,7 @@ ptl_shutdown_nalnis (void)
                 LASSERT (!in_interrupt());
                 atomic_dec(&ni->ni_nal->nal_refcount);
                 (ni->ni_nal->nal_shutdown)(ni);
+
                 PORTAL_FREE(ni, sizeof(*ni));
 
                 PTL_LOCK(flags);
@@ -763,6 +764,11 @@ PtlInit(int *max_interfaces)
         pthread_mutex_init(&ptl_apini.apini_api_mutex);
 #endif
 
+        ptl_apini.apini_init = 1;
+
+        if (max_interfaces != NULL)
+                *max_interfaces = 1;
+
         /* NALs in separate modules register themselves when their module
          * loads, and unregister themselves when their module is unloaded.
          * Otherwise they are plugged in explicitly here... */
@@ -771,10 +777,6 @@ PtlInit(int *max_interfaces)
 #ifndef __KERNEL__
         ptl_register_nal (&tcpnal_nal);
 #endif
-        ptl_apini.apini_init = 1;
-
-        if (max_interfaces != NULL)
-                *max_interfaces = 1;
 
         return PTL_OK;
 }
@@ -785,10 +787,12 @@ PtlFini(void)
         LASSERT (ptl_apini.apini_init);
         LASSERT (ptl_apini.apini_refcount == 0);
 
-#ifndef __KERNEL__
         /* See comment where tcpnal_nal registers itself */
+#ifndef __KERNEL__
         ptl_unregister_nal(&tcpnal_nal);
 #endif
+        ptl_unregister_nal(&ptl_lonal);
+
         LASSERT (list_empty(&ptl_apini.apini_nals));
 
         ptl_apini.apini_init = 0;
index 1f8d105..3834c16 100644 (file)
@@ -208,11 +208,16 @@ ptl_parse_networks(struct list_head *nilist, char *networks)
         memcpy (tokens, networks, tokensize);
        str = tokens;
 
+        PORTAL_ALLOC(ptl_loni, sizeof(*ptl_loni));
+        if (ptl_loni == NULL) {
+                CERROR("Can't allocate LO NI\n");
+                goto failed;
+        }
         /* Add in the loopback network */
         /* zero counters/flags, NULL pointers... */
-        memset(&ptl_loni, 0, sizeof(ptl_loni));
-        ptl_loni.ni_nid = PTL_MKNID(PTL_MKNET(LONAL, 0), 0);
-        list_add_tail(&ptl_loni.ni_list, nilist);
+        memset(ptl_loni, 0, sizeof(*ptl_loni));
+        ptl_loni->ni_nid = PTL_MKNID(PTL_MKNET(LONAL, 0), 0);
+        list_add_tail(&ptl_loni->ni_list, nilist);
         
         while (str != NULL && *str != 0) {
                 char      *comma = strchr(str, ',');
index 408bded..6dd787d 100644 (file)
@@ -576,7 +576,7 @@ ptl_send (ptl_ni_t *ni, void *private, ptl_msg_t *msg,
           ptl_libmd_t *md, ptl_size_t offset, ptl_size_t len)
 {
         ptl_nid_t gw_nid;
-        int       routing;
+        int       routing = 0;
         ptl_err_t rc;
 
         /* CAVEAT EMPTOR! ni != NULL == interface pre-determined (ACK) */
@@ -587,17 +587,23 @@ ptl_send (ptl_ni_t *ni, void *private, ptl_msg_t *msg,
                 return PTL_FAIL;
         }
 
-        if (target.nid != ni->ni_nid) {
-                /* will gateway have to forward? */
-                routing = (gw_nid != ni->ni_nid);
-        } else {
-                /* it's for me! */
-                ptl_ni_addref(&ptl_loni);
-                ptl_ni_decref(ni);
-                ni = &ptl_loni;
-                routing = 0;
+        if (PTL_NETNAL(PTL_NIDNET(ni->ni_nid)) == LONAL) {
+                if (target.nid != ni->ni_nid) {
+                        /* will gateway have to forward? */
+                        routing = (gw_nid != ni->ni_nid);
+                } else if (allow_destination_aliases) {
+                        /* it's for me (force lonal) */
+                        ptl_ni_addref(ptl_loni);
+                        ptl_ni_decref(ni);
+                        ni = ptl_loni;
+                } else {
+                        ptl_ni_decref(ni);
+                        CERROR("Attempt to send to self via %s, not LONAL\n",
+                               libcfs_nid2str(target.nid));
+                        return PTL_FAIL;
+                }
         }
-
+        
         hdr->type           = cpu_to_le32(type);
         hdr->dest_nid       = cpu_to_le64(target.nid);
         hdr->dest_pid       = cpu_to_le32(target.pid);
@@ -1047,7 +1053,8 @@ ptl_parse(ptl_ni_t *ni, ptl_hdr_t *hdr, void *private)
                 if (!allow_destination_aliases) {
                         /* dest is another local NI; sender should have used
                          * this node's NID on its own network */
-                        CERROR ("%s: Dropping message from %s: nid %s is a local alias\n",
+                        CERROR ("%s: Dropping message from %s: nid %s "
+                                "is a local alias\n",
                                 libcfs_nid2str(ni->ni_nid),
                                 libcfs_nid2str(le64_to_cpu(hdr->src_nid)),
                                 libcfs_nid2str(dest_nid));
index b58df75..4727e54 100644 (file)
@@ -255,7 +255,7 @@ void
 lonal_shutdown(ptl_ni_t *ni)
 {
        CDEBUG (D_NET, "shutdown\n");
-       LASSERT (ni == &ptl_loni);
+       LASSERT (ni == ptl_loni);
         LASSERT (lonal_instanced);
         
         lonal_instanced = 0;
@@ -265,7 +265,7 @@ ptl_err_t
 lonal_startup (ptl_ni_t *ni)
 {
        LASSERT (ni->ni_nal == &ptl_lonal);
-       LASSERT (ni == &ptl_loni);
+       LASSERT (ni == ptl_loni);
        LASSERT (!lonal_instanced);
         lonal_instanced = 1;
 
@@ -284,4 +284,4 @@ ptl_nal_t ptl_lonal = {
 #endif
 };
 
-ptl_ni_t ptl_loni;
+ptl_ni_t *ptl_loni;
index 7b9bd0c..a435fa2 100644 (file)
@@ -78,7 +78,7 @@ static int init_kportals_module(void)
         ENTRY;
 
         rc = PtlInit(NULL);
-        if (rc) {
+        if (rc != PTL_OK) {
                 CERROR("PtlInit: error %d\n", rc);
                 RETURN(rc);
         }
@@ -93,8 +93,10 @@ static int init_kportals_module(void)
                 rc = PtlNIInit(PTL_IFACE_DEFAULT, LUSTRE_SRV_PTL_PID,
                                NULL, NULL, &nih);
                 if (rc != PTL_OK) {
-                        PtlFini();
-                        return -ENETDOWN;
+                        /* Can't PtlFini or fail now if I loaded NALs */
+                        PTL_MUTEX_DOWN(&ptl_apini.apini_api_mutex);
+                        ptl_apini.apini_niinit_self = 0;
+                        PTL_MUTEX_UP(&ptl_apini.apini_api_mutex);
                 }
         }