Whamcloud - gitweb
* debugged single acceptor
authoreeb <eeb>
Tue, 21 Jun 2005 11:48:33 +0000 (11:48 +0000)
committereeb <eeb>
Tue, 21 Jun 2005 11:48:33 +0000 (11:48 +0000)
*   fixed PtlGetId() to skip the loopback NI

lnet/libcfs/linux/linux-tcpip.c
lnet/lnet/acceptor.c
lnet/lnet/api-ni.c

index 969e44f..02cfd34 100644 (file)
@@ -649,7 +649,6 @@ libcfs_sock_connect (struct socket **sockp, int *fatal,
                "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc,
                HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port);
 
- failed:
        sock_release(*sockp);
         return rc;
 }
index e9e5bbe..bb4709d 100644 (file)
@@ -45,7 +45,7 @@ CFS_MODULE_PARM(accept_secure_only, "i", int, 0644,
                 "Accept connection requests only from secure ports?");
 
 static int acceptor_proto_version = PTL_PROTO_ACCEPTOR_VERSION;
-CFS_MODULE_PARM(acceptor_version, "i", int, 0444,
+CFS_MODULE_PARM(acceptor_proto_version, "i", int, 0444,
                 "Acceptor protocol version (outgoing connection requests)");
 
 struct {
@@ -95,7 +95,13 @@ ptl_connect_console_error (int rc, ptl_nid_t peer_nid,
                                libcfs_nid2str(peer_nid),
                                HIPQUAD(peer_ip), peer_port);
                 break;
-        /* errors that should be rare */
+        case -ECONNRESET:
+                LCONSOLE_ERROR("Connection to %s at host %u.%u.%u.%u on "
+                               "port %d was reset; "
+                               "Is it running a compatible version of Lustre?\n",
+                               libcfs_nid2str(peer_nid),
+                               HIPQUAD(peer_ip), peer_port);
+                break;
         case -EPROTO:
                 LCONSOLE_ERROR("Protocol error connecting to %s at host "
                                "%u.%u.%u.%u on port %d: "
@@ -303,7 +309,6 @@ ptl_acceptor(void *arg)
        __u32          magic;
        __u32          peer_ip;
        int            peer_port;
-       int            nal;
         ptl_ni_t      *blind_ni;
 
         /* If there is only a single NI that needs me, I'll pass her
@@ -314,9 +319,6 @@ ptl_acceptor(void *arg)
         if (rc > 1) {
                 ptl_ni_decref(blind_ni);
                 blind_ni = NULL;
-        } else {
-                CWARN("Passing all incoming connections to NI %s\n",
-                      libcfs_nid2str(blind_ni->ni_nid));
         }
         
        LASSERT (ptl_acceptor_state.pta_sock == NULL);
@@ -328,7 +330,6 @@ ptl_acceptor(void *arg)
        LASSERT (acceptor_backlog > 0);
        rc = libcfs_sock_listen(&ptl_acceptor_state.pta_sock,
                                0, acceptor_port, acceptor_backlog);
-
        if (rc != 0)
                ptl_acceptor_state.pta_sock = NULL;
 
@@ -339,7 +340,7 @@ ptl_acceptor(void *arg)
        if (rc != 0)
                return rc;
        
-       while (ptl_acceptor_state.pta_shutdown != 0) {
+       while (ptl_acceptor_state.pta_shutdown == 0) {
                
                rc = libcfs_sock_accept(&newsock, ptl_acceptor_state.pta_sock);
                if (rc != 0) {
@@ -416,7 +417,6 @@ ptl_err_t
 ptl_acceptor_start(void)
 {
        long   pid;
-       int    rc;
 
        LASSERT (ptl_acceptor_state.pta_sock == NULL);
        init_mutex_locked(&ptl_acceptor_state.pta_signal);
@@ -433,10 +433,14 @@ ptl_acceptor_start(void)
 
        mutex_down(&ptl_acceptor_state.pta_signal); /* wait for acceptor to startup */
 
-       if (ptl_acceptor_state.pta_shutdown == 0) /* started OK */
+       if (ptl_acceptor_state.pta_shutdown == 0) {
+                /* started OK */
+                LASSERT (ptl_acceptor_state.pta_sock != NULL);
                return PTL_OK;
+        }
 
        CERROR ("Can't start acceptor: %d\n", ptl_acceptor_state.pta_shutdown);
+        LASSERT (ptl_acceptor_state.pta_sock == NULL);
        return PTL_FAIL;
 }
 
index 5ca2c50..8b1f294 100644 (file)
@@ -580,7 +580,6 @@ ptl_count_acceptor_nis (ptl_ni_t **first_ni)
                                 *first_ni = ni;
                         }
                         count++;
-                        break;
                 }
         }
         
@@ -978,30 +977,33 @@ PtlNICtl(ptl_handle_ni_t nih, unsigned int cmd, void *arg)
 ptl_err_t
 PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id)
 {
-        ptl_ni_t      *ni;
-        unsigned long  flags;
+        ptl_ni_t         *ni;
+        unsigned long     flags;
+        struct list_head *tmp;
+        ptl_err_t         rc = PTL_FAIL;
 
         LASSERT (ptl_apini.apini_init);
         LASSERT (ptl_apini.apini_refcount > 0);
 
-        /* pretty useless; just return the NID of the first local interface */
+        /* pretty useless; just return the NID of the first local interface,
+         * that isn't LONAL; it has the same NID on all nodes */
 
         PTL_LOCK(flags);
-        
-        if (list_empty(&ptl_apini.apini_nis)) {
-                PTL_UNLOCK(flags);
-                return PTL_FAIL;
-        }
-        
-        id->pid = ptl_apini.apini_pid;
 
-        ni = list_entry(ptl_apini.apini_nis.next,
-                        ptl_ni_t, ni_list);
-        id->nid = ni->ni_nid;
+        list_for_each(tmp, &ptl_apini.apini_nis) {
+                ni = list_entry(tmp, ptl_ni_t, ni_list);
+                if (ni->ni_nal->nal_type == LONAL)
+                        continue;
+
+                id->nid = ni->ni_nid;
+                id->pid = ptl_apini.apini_pid;
+                rc = PTL_OK;
+                break;
+        }
 
         PTL_UNLOCK(flags);
 
-        return PTL_OK;
+        return rc;
 }
 
 ptl_err_t