From fa31d63ac38f74a2f065f39a73804a1fd534f25e Mon Sep 17 00:00:00 2001 From: eeb Date: Tue, 21 Jun 2005 11:48:33 +0000 Subject: [PATCH] * debugged single acceptor * fixed PtlGetId() to skip the loopback NI --- lnet/libcfs/linux/linux-tcpip.c | 1 - lnet/lnet/acceptor.c | 24 ++++++++++++++---------- lnet/lnet/api-ni.c | 32 +++++++++++++++++--------------- 3 files changed, 31 insertions(+), 26 deletions(-) diff --git a/lnet/libcfs/linux/linux-tcpip.c b/lnet/libcfs/linux/linux-tcpip.c index 969e44f..02cfd34 100644 --- a/lnet/libcfs/linux/linux-tcpip.c +++ b/lnet/libcfs/linux/linux-tcpip.c @@ -649,7 +649,6 @@ libcfs_sock_connect (struct socket **sockp, int *fatal, "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc, HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port); - failed: sock_release(*sockp); return rc; } diff --git a/lnet/lnet/acceptor.c b/lnet/lnet/acceptor.c index e9e5bbe..bb4709d 100644 --- a/lnet/lnet/acceptor.c +++ b/lnet/lnet/acceptor.c @@ -45,7 +45,7 @@ CFS_MODULE_PARM(accept_secure_only, "i", int, 0644, "Accept connection requests only from secure ports?"); static int acceptor_proto_version = PTL_PROTO_ACCEPTOR_VERSION; -CFS_MODULE_PARM(acceptor_version, "i", int, 0444, +CFS_MODULE_PARM(acceptor_proto_version, "i", int, 0444, "Acceptor protocol version (outgoing connection requests)"); struct { @@ -95,7 +95,13 @@ ptl_connect_console_error (int rc, ptl_nid_t peer_nid, libcfs_nid2str(peer_nid), HIPQUAD(peer_ip), peer_port); break; - /* errors that should be rare */ + case -ECONNRESET: + LCONSOLE_ERROR("Connection to %s at host %u.%u.%u.%u on " + "port %d was reset; " + "Is it running a compatible version of Lustre?\n", + libcfs_nid2str(peer_nid), + HIPQUAD(peer_ip), peer_port); + break; case -EPROTO: LCONSOLE_ERROR("Protocol error connecting to %s at host " "%u.%u.%u.%u on port %d: " @@ -303,7 +309,6 @@ ptl_acceptor(void *arg) __u32 magic; __u32 peer_ip; int peer_port; - int nal; ptl_ni_t *blind_ni; /* If there is only a single NI that needs me, I'll pass her @@ -314,9 +319,6 @@ ptl_acceptor(void *arg) if (rc > 1) { ptl_ni_decref(blind_ni); blind_ni = NULL; - } else { - CWARN("Passing all incoming connections to NI %s\n", - libcfs_nid2str(blind_ni->ni_nid)); } LASSERT (ptl_acceptor_state.pta_sock == NULL); @@ -328,7 +330,6 @@ ptl_acceptor(void *arg) LASSERT (acceptor_backlog > 0); rc = libcfs_sock_listen(&ptl_acceptor_state.pta_sock, 0, acceptor_port, acceptor_backlog); - if (rc != 0) ptl_acceptor_state.pta_sock = NULL; @@ -339,7 +340,7 @@ ptl_acceptor(void *arg) if (rc != 0) return rc; - while (ptl_acceptor_state.pta_shutdown != 0) { + while (ptl_acceptor_state.pta_shutdown == 0) { rc = libcfs_sock_accept(&newsock, ptl_acceptor_state.pta_sock); if (rc != 0) { @@ -416,7 +417,6 @@ ptl_err_t ptl_acceptor_start(void) { long pid; - int rc; LASSERT (ptl_acceptor_state.pta_sock == NULL); init_mutex_locked(&ptl_acceptor_state.pta_signal); @@ -433,10 +433,14 @@ ptl_acceptor_start(void) mutex_down(&ptl_acceptor_state.pta_signal); /* wait for acceptor to startup */ - if (ptl_acceptor_state.pta_shutdown == 0) /* started OK */ + if (ptl_acceptor_state.pta_shutdown == 0) { + /* started OK */ + LASSERT (ptl_acceptor_state.pta_sock != NULL); return PTL_OK; + } CERROR ("Can't start acceptor: %d\n", ptl_acceptor_state.pta_shutdown); + LASSERT (ptl_acceptor_state.pta_sock == NULL); return PTL_FAIL; } diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 5ca2c50..8b1f294 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -580,7 +580,6 @@ ptl_count_acceptor_nis (ptl_ni_t **first_ni) *first_ni = ni; } count++; - break; } } @@ -978,30 +977,33 @@ PtlNICtl(ptl_handle_ni_t nih, unsigned int cmd, void *arg) ptl_err_t PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id) { - ptl_ni_t *ni; - unsigned long flags; + ptl_ni_t *ni; + unsigned long flags; + struct list_head *tmp; + ptl_err_t rc = PTL_FAIL; LASSERT (ptl_apini.apini_init); LASSERT (ptl_apini.apini_refcount > 0); - /* pretty useless; just return the NID of the first local interface */ + /* pretty useless; just return the NID of the first local interface, + * that isn't LONAL; it has the same NID on all nodes */ PTL_LOCK(flags); - - if (list_empty(&ptl_apini.apini_nis)) { - PTL_UNLOCK(flags); - return PTL_FAIL; - } - - id->pid = ptl_apini.apini_pid; - ni = list_entry(ptl_apini.apini_nis.next, - ptl_ni_t, ni_list); - id->nid = ni->ni_nid; + list_for_each(tmp, &ptl_apini.apini_nis) { + ni = list_entry(tmp, ptl_ni_t, ni_list); + if (ni->ni_nal->nal_type == LONAL) + continue; + + id->nid = ni->ni_nid; + id->pid = ptl_apini.apini_pid; + rc = PTL_OK; + break; + } PTL_UNLOCK(flags); - return PTL_OK; + return rc; } ptl_err_t -- 1.8.3.1