From 0e464299d38515fc9b69776c4417f24afb3452f1 Mon Sep 17 00:00:00 2001 From: eeb Date: Tue, 26 Apr 2005 18:28:47 +0000 Subject: [PATCH] * kernel socknal acceptor; commented out lconf acceptor start/stop * removed socknal's use of sock->file; replaced with atomic refcount in conn * moved more socknal tunables into module params/sysfs/sysctl * consistent use/naming of socknal refcounting fns --- lnet/include/libcfs/kp30.h | 2 +- lnet/include/libcfs/linux/linux-lock.h | 2 +- lnet/include/lnet/lnetctl.h | 1 - lnet/include/lnet/ptlctl.h | 1 - lnet/klnds/socklnd/socklnd.c | 359 +++++++++++++++++++------------- lnet/klnds/socklnd/socklnd.h | 127 ++++++++--- lnet/klnds/socklnd/socklnd_cb.c | 168 ++++++++------- lnet/klnds/socklnd/socklnd_lib-darwin.c | 90 +------- lnet/klnds/socklnd/socklnd_lib-darwin.h | 4 - lnet/klnds/socklnd/socklnd_lib-linux.c | 256 ++++++++++++++++------- lnet/klnds/socklnd/socklnd_lib-linux.h | 10 +- lnet/klnds/socklnd/socklnd_modparams.c | 32 ++- lnet/utils/Makefile.am | 5 +- lnet/utils/portals.c | 144 ------------- lnet/utils/ptlctl.c | 1 - 15 files changed, 640 insertions(+), 562 deletions(-) diff --git a/lnet/include/libcfs/kp30.h b/lnet/include/libcfs/kp30.h index 9a1ece4..66cf0e1 100644 --- a/lnet/include/libcfs/kp30.h +++ b/lnet/include/libcfs/kp30.h @@ -399,7 +399,7 @@ enum { #define NAL_MAX_NR (NAL_ENUM_END_MARKER - 1) -#define NAL_CMD_REGISTER_PEER_FD 100 +/* unused 100 */ #define NAL_CMD_CLOSE_CONNECTION 101 #define NAL_CMD_REGISTER_MYNID 102 #define NAL_CMD_PUSH_CONNECTION 103 diff --git a/lnet/include/libcfs/linux/linux-lock.h b/lnet/include/libcfs/linux/linux-lock.h index ce097e9..cb92b3a 100644 --- a/lnet/include/libcfs/linux/linux-lock.h +++ b/lnet/include/libcfs/linux/linux-lock.h @@ -74,7 +74,7 @@ */ /* - * mutex_t: + * mutex: * * - init_mutex(x) * - init_mutex_locked(x) diff --git a/lnet/include/lnet/lnetctl.h b/lnet/include/lnet/lnetctl.h index 99da747..c55e11e 100644 --- a/lnet/include/lnet/lnetctl.h +++ b/lnet/include/lnet/lnetctl.h @@ -47,7 +47,6 @@ int jt_ptl_print_peers (int argc, char **argv); int jt_ptl_add_peer (int argc, char **argv); int jt_ptl_del_peer (int argc, char **argv); int jt_ptl_print_connections (int argc, char **argv); -int jt_ptl_connect(int argc, char **argv); int jt_ptl_disconnect(int argc, char **argv); int jt_ptl_push_connection(int argc, char **argv); int jt_ptl_print_active_txs(int argc, char **argv); diff --git a/lnet/include/lnet/ptlctl.h b/lnet/include/lnet/ptlctl.h index 99da747..c55e11e 100644 --- a/lnet/include/lnet/ptlctl.h +++ b/lnet/include/lnet/ptlctl.h @@ -47,7 +47,6 @@ int jt_ptl_print_peers (int argc, char **argv); int jt_ptl_add_peer (int argc, char **argv); int jt_ptl_del_peer (int argc, char **argv); int jt_ptl_print_connections (int argc, char **argv); -int jt_ptl_connect(int argc, char **argv); int jt_ptl_disconnect(int argc, char **argv); int jt_ptl_push_connection(int argc, char **argv); int jt_ptl_print_active_txs(int argc, char **argv); diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index b4a27bb..5d42135 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -90,7 +90,8 @@ ksocknal_create_route (__u32 ipaddr, int port) atomic_set (&route->ksnr_refcount, 1); route->ksnr_peer = NULL; route->ksnr_timeout = cfs_time_current(); - route->ksnr_retry_interval = SOCKNAL_MIN_RECONNECT_INTERVAL; + route->ksnr_retry_interval = + cfs_time_seconds(*ksocknal_tunables.ksnd_min_reconnectms)/1000; route->ksnr_ipaddr = ipaddr; route->ksnr_port = port; route->ksnr_connecting = 0; @@ -105,25 +106,14 @@ ksocknal_create_route (__u32 ipaddr, int port) void ksocknal_destroy_route (ksock_route_t *route) { + LASSERT (atomic_read(&route->ksnr_refcount) == 0); + if (route->ksnr_peer != NULL) - ksocknal_put_peer (route->ksnr_peer); + ksocknal_peer_decref(route->ksnr_peer); PORTAL_FREE (route, sizeof (*route)); } -void -ksocknal_put_route (ksock_route_t *route) -{ - CDEBUG (D_OTHER, "putting route[%p] (%d)\n", - route, atomic_read (&route->ksnr_refcount)); - - LASSERT (atomic_read (&route->ksnr_refcount) > 0); - if (!atomic_dec_and_test (&route->ksnr_refcount)) - return; - - ksocknal_destroy_route (route); -} - ksock_peer_t * ksocknal_create_peer (ptl_nid_t nid) { @@ -160,27 +150,13 @@ ksocknal_destroy_peer (ksock_peer_t *peer) PORTAL_FREE (peer, sizeof (*peer)); - /* NB a peer's connections and autoconnect routes keep a reference - * on their peer until they are destroyed, so we can be assured - * that _all_ state to do with this peer has been cleaned up when - * its refcount drops to zero. */ + /* NB a peer's connections and routes keep a reference on their peer + * until they are destroyed, so we can be assured that _all_ state to + * do with this peer has been cleaned up when its refcount drops to + * zero. */ atomic_dec (&ksocknal_data.ksnd_npeers); } -void -ksocknal_put_peer (ksock_peer_t *peer) -{ - CDEBUG (D_OTHER, "putting peer[%p] -> "LPX64" (%d)\n", - peer, peer->ksnp_nid, - atomic_read (&peer->ksnp_refcount)); - - LASSERT (atomic_read (&peer->ksnp_refcount) > 0); - if (!atomic_dec_and_test (&peer->ksnp_refcount)) - return; - - ksocknal_destroy_peer (peer); -} - ksock_peer_t * ksocknal_find_peer_locked (ptl_nid_t nid) { @@ -212,7 +188,7 @@ ksocknal_get_peer (ptl_nid_t nid) read_lock (&ksocknal_data.ksnd_global_lock); peer = ksocknal_find_peer_locked (nid); if (peer != NULL) /* +1 ref for caller? */ - atomic_inc (&peer->ksnp_refcount); + ksocknal_peer_addref(peer); read_unlock (&ksocknal_data.ksnd_global_lock); return (peer); @@ -237,7 +213,7 @@ ksocknal_unlink_peer_locked (ksock_peer_t *peer) peer->ksnp_closing = 1; list_del (&peer->ksnp_list); /* lose peerlist's ref */ - ksocknal_put_peer (peer); + ksocknal_peer_decref(peer); } int @@ -320,7 +296,7 @@ ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn) ksock_interface_t *iface; conn->ksnc_route = route; - atomic_inc (&route->ksnr_refcount); + ksocknal_route_addref(route); if (route->ksnr_myipaddr != conn->ksnc_myipaddr) { if (route->ksnr_myipaddr == 0) { @@ -353,7 +329,8 @@ ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn) /* Successful connection => further attempts can * proceed immediately */ route->ksnr_timeout = cfs_time_current(); - route->ksnr_retry_interval = SOCKNAL_MIN_RECONNECT_INTERVAL; + route->ksnr_retry_interval = + cfs_time_seconds(*ksocknal_tunables.ksnd_min_reconnectms)/1000; } void @@ -380,7 +357,7 @@ ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route) } route->ksnr_peer = peer; - atomic_inc (&peer->ksnp_refcount); + ksocknal_peer_addref(peer); /* peer's routelist takes over my ref on 'route' */ list_add_tail(&route->ksnr_list, &peer->ksnp_routes); @@ -425,12 +402,12 @@ ksocknal_del_route_locked (ksock_route_t *route) route->ksnr_deleted = 1; list_del (&route->ksnr_list); - ksocknal_put_route (route); /* drop peer's ref */ + ksocknal_route_decref(route); /* drop peer's ref */ if (list_empty (&peer->ksnp_routes) && list_empty (&peer->ksnp_conns)) { - /* I've just removed the last autoconnect route of a peer - * with no active connections */ + /* I've just removed the last route to a peer with no active + * connections */ ksocknal_unlink_peer_locked (peer); } } @@ -455,7 +432,7 @@ ksocknal_add_peer (ptl_nid_t nid, __u32 ipaddr, int port) route = ksocknal_create_route (ipaddr, port); if (route == NULL) { - ksocknal_put_peer (peer); + ksocknal_peer_decref(peer); return (-ENOMEM); } @@ -463,7 +440,7 @@ ksocknal_add_peer (ptl_nid_t nid, __u32 ipaddr, int port) peer2 = ksocknal_find_peer_locked (nid); if (peer2 != NULL) { - ksocknal_put_peer (peer); + ksocknal_peer_decref(peer); peer = peer2; } else { /* peer table takes my ref on peer */ @@ -484,7 +461,7 @@ ksocknal_add_peer (ptl_nid_t nid, __u32 ipaddr, int port) ksocknal_add_route_locked(peer, route); route->ksnr_share_count++; } else { - ksocknal_put_route(route); + ksocknal_route_decref(route); route2->ksnr_share_count++; } @@ -505,7 +482,7 @@ ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip, int single_share) LASSERT (!peer->ksnp_closing); /* Extra ref prevents peer disappearing until I'm done with it */ - atomic_inc(&peer->ksnp_refcount); + ksocknal_peer_addref(peer); list_for_each_safe (tmp, nxt, &peer->ksnp_routes) { route = list_entry(tmp, ksock_route_t, ksnr_list); @@ -556,7 +533,7 @@ ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip, int single_share) } } - ksocknal_put_peer(peer); + ksocknal_peer_decref(peer); /* NB peer unlinks itself when last conn/route is removed */ } @@ -623,7 +600,7 @@ ksocknal_get_conn_by_idx (int index) continue; conn = list_entry (ctmp, ksock_conn_t, ksnc_list); - atomic_inc (&conn->ksnc_refcount); + ksocknal_conn_addref(conn); read_unlock (&ksocknal_data.ksnd_global_lock); return (conn); } @@ -924,10 +901,108 @@ ksocknal_create_routes(ksock_peer_t *peer, int port, write_unlock_irqrestore(global_lock, flags); if (newroute != NULL) - ksocknal_put_route(newroute); + ksocknal_route_decref(newroute); +} + +void +ksocknal_pause(int ticks) +{ + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(ticks); +} + +int +ksocknal_listener (void *arg) +{ + char name[16]; + ksock_connreq_t *cr; + int rc; + unsigned long flags; + + /* Called with parent blocked on ksnd_listener_signal */ + + snprintf(name, sizeof(name), "socknal_ld%03d", + *ksocknal_tunables.ksnd_port); + kportal_daemonize(name); + kportal_blockallsigs(); + + rc = ksocknal_lib_listen(&ksocknal_data.ksnd_listener_sock, + *ksocknal_tunables.ksnd_port, + *ksocknal_tunables.ksnd_backlog); + + /* set init status and unblock parent */ + ksocknal_data.ksnd_listener_shutdown = rc; + mutex_up(&ksocknal_data.ksnd_listener_signal); + + if (rc != 0) + return rc; + + while (ksocknal_data.ksnd_listener_shutdown == 0) { + + rc = ksocknal_lib_accept(ksocknal_data.ksnd_listener_sock, &cr); + if (rc != 0) { + if (rc != -EAGAIN) { + CWARN("Accept error: %d\n", rc); + ksocknal_pause(cfs_time_seconds(1)); + } + continue; + } + + spin_lock_irqsave(&ksocknal_data.ksnd_connd_lock, flags); + + list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs); + wake_up(&ksocknal_data.ksnd_connd_waitq); + + spin_unlock_irqrestore(&ksocknal_data.ksnd_connd_lock, flags); + } + + ksocknal_lib_release_sock(ksocknal_data.ksnd_listener_sock); + + /* unblock executioner */ + mutex_up(&ksocknal_data.ksnd_listener_signal); + ksocknal_thread_fini(); + return 0; } int +ksocknal_start_listener(void) +{ + int rc; + + /* listener disabled? */ + if (*ksocknal_tunables.ksnd_backlog <= 0) + return 0; + + ksocknal_data.ksnd_listener_shutdown = 0; + + rc = ksocknal_thread_start (ksocknal_listener, NULL); + if (rc != 0) { + CERROR("Can't spawn listener: %d\n", rc); + return rc; + } + + /* until listener starts or fails */ + mutex_down(&ksocknal_data.ksnd_listener_signal); + return ksocknal_data.ksnd_listener_shutdown; +} + +void +ksocknal_stop_listener(void) +{ + /* listener disabled? */ + if (*ksocknal_tunables.ksnd_backlog <= 0) + return; + + /* make the listener exit its loop... */ + ksocknal_data.ksnd_listener_shutdown = 1; + ksocknal_lib_abort_accept(ksocknal_data.ksnd_listener_sock); + + /* block until listener exits */ + mutex_down(&ksocknal_data.ksnd_listener_signal); +} + + +int ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) { int passive = (type == SOCKNAL_CONN_NONE); @@ -947,12 +1022,6 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) ksock_tx_t *tx; int rc; - /* NB, sock has an associated file since (a) this connection might - * have been created in userland and (b) we need to refcount the - * socket so that we don't close it while I/O is being done on - * it, and sock->file has that pre-cooked... */ - LASSERT (KSN_SOCK2FILE(sock) != NULL); - LASSERT (cfs_file_count(KSN_SOCK2FILE(sock)) > 0); LASSERT (route == NULL || !passive); rc = ksocknal_lib_setup_sock (sock); @@ -961,17 +1030,19 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) irq = ksocknal_lib_sock_irq (sock); + rc = -ENOMEM; PORTAL_ALLOC(conn, sizeof(*conn)); if (conn == NULL) - return (-ENOMEM); + goto failed_0; memset (conn, 0, sizeof (*conn)); conn->ksnc_peer = NULL; conn->ksnc_route = NULL; conn->ksnc_sock = sock; + atomic_set (&conn->ksnc_sock_refcount, 1); /* 1 ref for conn */ conn->ksnc_type = type; ksocknal_lib_save_callback(sock, conn); - atomic_set (&conn->ksnc_refcount, 1); /* 1 ref for me */ + atomic_set (&conn->ksnc_conn_refcount, 1); /* 1 ref for me */ conn->ksnc_rx_ready = 0; conn->ksnc_rx_scheduled = 0; @@ -985,18 +1056,18 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) /* stash conn's local and remote addrs */ rc = ksocknal_lib_get_conn_addrs (conn); if (rc != 0) - goto failed_0; + goto failed_1; if (!passive) { /* Active connection sends HELLO eagerly */ rc = ksocknal_local_ipvec(ipaddrs); if (rc < 0) - goto failed_0; + goto failed_1; nipaddrs = rc; rc = ksocknal_send_hello (conn, ipaddrs, nipaddrs); if (rc != 0) - goto failed_0; + goto failed_1; } /* Find out/confirm peer's NID and connection type and get the @@ -1004,18 +1075,18 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) nid = (route == NULL) ? PTL_NID_ANY : route->ksnr_peer->ksnp_nid; rc = ksocknal_recv_hello (conn, &nid, &incarnation, ipaddrs); if (rc < 0) - goto failed_0; + goto failed_1; nipaddrs = rc; LASSERT (nid != PTL_NID_ANY); if (route != NULL) { peer = route->ksnr_peer; - atomic_inc(&peer->ksnp_refcount); + ksocknal_peer_addref(peer); } else { peer = ksocknal_create_peer(nid); if (peer == NULL) { rc = -ENOMEM; - goto failed_0; + goto failed_1; } write_lock_irqsave(global_lock, flags); @@ -1027,11 +1098,11 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) list_add_tail(&peer->ksnp_list, ksocknal_nid2peerlist(nid)); } else { - ksocknal_put_peer(peer); + ksocknal_peer_decref(peer); peer = peer2; } /* +1 ref for me */ - atomic_inc(&peer->ksnp_refcount); + ksocknal_peer_addref(peer); write_unlock_irqrestore(global_lock, flags); } @@ -1046,7 +1117,7 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) rc = ksocknal_send_hello (conn, ipaddrs, rc); } if (rc < 0) - goto failed_1; + goto failed_2; write_lock_irqsave (global_lock, flags); @@ -1054,11 +1125,11 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) (route != NULL && route->ksnr_deleted)) { /* route/peer got closed under me */ rc = -ESTALE; - goto failed_2; + goto failed_3; } /* Refuse to duplicate an existing connection (both sides might - * autoconnect at once), unless this is a loopback connection */ + * connect at once), unless this is a loopback connection */ if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) { list_for_each(tmp, &peer->ksnp_conns) { conn2 = list_entry(tmp, ksock_conn_t, ksnc_list); @@ -1073,7 +1144,7 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) "%u.%u.%u.%u type %d\n", HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_type); rc = -EALREADY; - goto failed_2; + goto failed_3; } } @@ -1102,9 +1173,6 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) break; } - /* Give conn a ref on sock->file since we're going to return success */ - cfs_get_file(KSN_SOCK2FILE(sock)); - conn->ksnc_peer = peer; /* conn takes my ref on peer */ conn->ksnc_incarnation = incarnation; peer->ksnp_last_alive = cfs_time_current(); @@ -1120,7 +1188,7 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) mb(); /* order with adding to peer's conn list */ list_add (&conn->ksnc_list, &peer->ksnp_conns); - atomic_inc (&conn->ksnc_refcount); + ksocknal_conn_addref(conn); /* NB my callbacks block while I hold ksnd_global_lock */ ksocknal_lib_set_callback(sock, conn); @@ -1147,9 +1215,9 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) ksocknal_lib_bind_irq (irq); /* Call the callbacks right now to get things going. */ - if (ksocknal_getconnsock(conn) == 0) { + if (ksocknal_connsock_addref(conn) == 0) { ksocknal_lib_act_callback(sock, conn); - ksocknal_putconnsock(conn); + ksocknal_connsock_decref(conn); } CWARN("New conn nid:"LPX64" %u.%u.%u.%u -> %u.%u.%u.%u/%d" @@ -1158,21 +1226,24 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port, incarnation, (int)(conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers), irq); - ksocknal_put_conn (conn); + ksocknal_conn_decref(conn); return (0); - failed_2: + failed_3: if (!peer->ksnp_closing && list_empty (&peer->ksnp_conns) && list_empty (&peer->ksnp_routes)) ksocknal_unlink_peer_locked(peer); write_unlock_irqrestore(global_lock, flags); + failed_2: + ksocknal_peer_decref(peer); + failed_1: - ksocknal_put_peer (peer); + PORTAL_FREE (conn, sizeof(*conn)); failed_0: - PORTAL_FREE (conn, sizeof(*conn)); + ksocknal_lib_release_sock(sock); LASSERT (rc != 0); return (rc); @@ -1222,7 +1293,7 @@ ksocknal_close_conn_locked (ksock_conn_t *conn, int error) list_del (&route->ksnr_list); /* make route least favourite */ list_add_tail (&route->ksnr_list, &peer->ksnp_routes); #endif - ksocknal_put_route (route); /* drop conn's ref on route */ + ksocknal_route_decref(route); /* drop conn's ref on route */ } if (list_empty (&peer->ksnp_conns)) { @@ -1232,7 +1303,7 @@ ksocknal_close_conn_locked (ksock_conn_t *conn, int error) if (list_empty (&peer->ksnp_routes)) { /* I've just closed last conn belonging to a - * non-autoconnecting peer */ + * peer with no routes to it */ ksocknal_unlink_peer_locked (peer); } } @@ -1272,7 +1343,7 @@ ksocknal_terminate_conn (ksock_conn_t *conn) conn->ksnc_tx_ready = 1; conn->ksnc_tx_scheduled = 1; /* extra ref for scheduler */ - atomic_inc (&conn->ksnc_refcount); + ksocknal_conn_addref(conn); cfs_waitq_signal (&sched->kss_waitq); } @@ -1306,7 +1377,7 @@ ksocknal_terminate_conn (ksock_conn_t *conn) * when the connection was established, this will close the socket * immediately, aborting anything buffered in it. Any hung * zero-copy transmits will therefore complete in finite time. */ - ksocknal_putconnsock (conn); + ksocknal_connsock_decref(conn); if (notify) kpr_notify (&ksocknal_data.ksnd_router, peer->ksnp_nid, @@ -1314,12 +1385,29 @@ ksocknal_terminate_conn (ksock_conn_t *conn) } void +ksocknal_queue_zombie_conn (ksock_conn_t *conn) +{ + /* Queue the conn for the reaper to destroy */ + unsigned long flags; + + LASSERT (atomic_read(&conn->ksnc_conn_refcount) == 0); + spin_lock_irqsave(&ksocknal_data.ksnd_reaper_lock, flags); + + list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns); + cfs_waitq_signal(&ksocknal_data.ksnd_reaper_waitq); + + spin_unlock_irqrestore(&ksocknal_data.ksnd_reaper_lock, flags); +} + +void ksocknal_destroy_conn (ksock_conn_t *conn) { /* Final coup-de-grace of the reaper */ CDEBUG (D_NET, "connection %p\n", conn); - LASSERT (atomic_read (&conn->ksnc_refcount) == 0); + LASSERT (atomic_read (&conn->ksnc_conn_refcount) == 0); + LASSERT (atomic_read (&conn->ksnc_sock_refcount) == 0); + LASSERT (conn->ksnc_sock == NULL); LASSERT (conn->ksnc_route == NULL); LASSERT (!conn->ksnc_tx_scheduled); LASSERT (!conn->ksnc_rx_scheduled); @@ -1346,33 +1434,12 @@ ksocknal_destroy_conn (ksock_conn_t *conn) break; } - ksocknal_put_peer (conn->ksnc_peer); + ksocknal_peer_decref(conn->ksnc_peer); PORTAL_FREE (conn, sizeof (*conn)); atomic_dec (&ksocknal_data.ksnd_nclosing_conns); } -void -ksocknal_put_conn (ksock_conn_t *conn) -{ - unsigned long flags; - - CDEBUG (D_OTHER, "putting conn[%p] -> "LPX64" (%d)\n", - conn, conn->ksnc_peer->ksnp_nid, - atomic_read (&conn->ksnc_refcount)); - - LASSERT (atomic_read (&conn->ksnc_refcount) > 0); - if (!atomic_dec_and_test (&conn->ksnc_refcount)) - return; - - spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags); - - list_add (&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns); - cfs_waitq_signal (&ksocknal_data.ksnd_reaper_waitq); - - spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags); -} - int ksocknal_close_peer_conns_locked (ksock_peer_t *peer, __u32 ipaddr, int why) { @@ -1514,7 +1581,7 @@ ksocknal_push_peer (ksock_peer_t *peer) list_for_each (tmp, &peer->ksnp_conns) { if (i++ == index) { conn = list_entry (tmp, ksock_conn_t, ksnc_list); - atomic_inc (&conn->ksnc_refcount); + ksocknal_conn_addref(conn); break; } } @@ -1525,7 +1592,7 @@ ksocknal_push_peer (ksock_peer_t *peer) break; ksocknal_lib_push_conn (conn); - ksocknal_put_conn (conn); + ksocknal_conn_decref(conn); } } @@ -1545,7 +1612,7 @@ ksocknal_push (ptl_nid_t nid) if (peer != NULL) { rc = 0; ksocknal_push_peer (peer); - ksocknal_put_peer (peer); + ksocknal_peer_decref(peer); } return (rc); } @@ -1561,7 +1628,7 @@ ksocknal_push (ptl_nid_t nid) if (index++ == j) { peer = list_entry(tmp, ksock_peer_t, ksnp_list); - atomic_inc (&peer->ksnp_refcount); + ksocknal_peer_addref(peer); break; } } @@ -1571,7 +1638,7 @@ ksocknal_push (ptl_nid_t nid) if (peer != NULL) { rc = 0; ksocknal_push_peer (peer); - ksocknal_put_peer (peer); + ksocknal_peer_decref(peer); } } @@ -1830,30 +1897,8 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private) pcfg->pcfg_count = txmem; pcfg->pcfg_size = rxmem; pcfg->pcfg_wait = nagle; - ksocknal_put_conn (conn); - } - break; - } - case NAL_CMD_REGISTER_PEER_FD: { - struct socket *sock = sockfd_lookup (pcfg->pcfg_fd, &rc); - int type = pcfg->pcfg_misc; - - if (sock == NULL) - break; - - switch (type) { - case SOCKNAL_CONN_NONE: - case SOCKNAL_CONN_ANY: - case SOCKNAL_CONN_CONTROL: - case SOCKNAL_CONN_BULK_IN: - case SOCKNAL_CONN_BULK_OUT: - rc = ksocknal_create_conn(NULL, sock, type); - break; - default: - rc = -EINVAL; - break; + ksocknal_conn_decref(conn); } - cfs_put_file (KSN_SOCK2FILE(sock)); break; } case NAL_CMD_CLOSE_CONNECTION: { @@ -1937,6 +1982,19 @@ ksocknal_shutdown (ptl_ni_t *ni) libcfs_nal_cmd_unregister(SOCKNAL); /* fall through */ + case SOCKNAL_INIT_LISTENER: + ksocknal_stop_listener(); + /* Wait for queued connreqs to clean up */ + i = 2; + while (!list_empty(&ksocknal_data.ksnd_connd_connreqs)) { + i++; + CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ + "waiting for connreqs to clean up\n"); + ksocknal_pause(cfs_time_seconds(1)); + } + + /* fall through */ + case SOCKNAL_INIT_DATA: /* No more calls to ksocknal_cmd() to create new * peers/connections since we're being unloaded. */ @@ -1951,8 +2009,7 @@ ksocknal_shutdown (ptl_ni_t *ni) CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ "waiting for %d peers to disconnect\n", atomic_read (&ksocknal_data.ksnd_npeers)); - set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (cfs_time_seconds(1)); + ksocknal_pause(cfs_time_seconds(1)); } LASSERT (atomic_read (&ksocknal_data.ksnd_npeers) == 0); @@ -1962,7 +2019,8 @@ ksocknal_shutdown (ptl_ni_t *ni) } LASSERT (list_empty (&ksocknal_data.ksnd_enomem_conns)); LASSERT (list_empty (&ksocknal_data.ksnd_zombie_conns)); - LASSERT (list_empty (&ksocknal_data.ksnd_autoconnectd_routes)); + LASSERT (list_empty (&ksocknal_data.ksnd_connd_connreqs)); + LASSERT (list_empty (&ksocknal_data.ksnd_connd_routes)); LASSERT (list_empty (&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns)); LASSERT (list_empty (&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns)); @@ -1981,7 +2039,7 @@ ksocknal_shutdown (ptl_ni_t *ni) /* flag threads to terminate; wake and wait for them to die */ ksocknal_data.ksnd_shuttingdown = 1; - cfs_waitq_broadcast (&ksocknal_data.ksnd_autoconnectd_waitq); + cfs_waitq_broadcast (&ksocknal_data.ksnd_connd_waitq); cfs_waitq_broadcast (&ksocknal_data.ksnd_reaper_waitq); if (ksocknal_data.ksnd_schedulers != NULL) @@ -1998,8 +2056,7 @@ ksocknal_shutdown (ptl_ni_t *ni) "waiting for %d threads to terminate\n", ksocknal_data.ksnd_nthreads); read_unlock(&ksocknal_data.ksnd_global_lock); - set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (cfs_time_seconds(1)); + ksocknal_pause(cfs_time_seconds(1)); read_lock(&ksocknal_data.ksnd_global_lock); } read_unlock(&ksocknal_data.ksnd_global_lock); @@ -2074,6 +2131,8 @@ ksocknal_startup (ptl_ni_t *ni, char **interfaces) rwlock_init(&ksocknal_data.ksnd_global_lock); + init_mutex_locked(&ksocknal_data.ksnd_listener_signal); + spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock); CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs); CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns); @@ -2090,9 +2149,10 @@ ksocknal_startup (ptl_ni_t *ni, char **interfaces) CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns); cfs_waitq_init(&ksocknal_data.ksnd_reaper_waitq); - spin_lock_init (&ksocknal_data.ksnd_autoconnectd_lock); - CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_autoconnectd_routes); - cfs_waitq_init(&ksocknal_data.ksnd_autoconnectd_waitq); + spin_lock_init (&ksocknal_data.ksnd_connd_lock); + CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_connreqs); + CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_routes); + cfs_waitq_init(&ksocknal_data.ksnd_connd_waitq); /* NB memset above zeros whole of ksocknal_data, including * ksocknal_data.ksnd_irqinfo[all].ksni_valid */ @@ -2132,10 +2192,10 @@ ksocknal_startup (ptl_ni_t *ni, char **interfaces) } } - for (i = 0; i < SOCKNAL_N_AUTOCONNECTD; i++) { - rc = ksocknal_thread_start (ksocknal_autoconnectd, (void *)((long)i)); + for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) { + rc = ksocknal_thread_start (ksocknal_connd, (void *)((long)i)); if (rc != 0) { - CERROR("Can't spawn socknal autoconnectd: %d\n", rc); + CERROR("Can't spawn socknal connd: %d\n", rc); ksocknal_shutdown (ni); return (rc); } @@ -2191,6 +2251,15 @@ ksocknal_startup (ptl_ni_t *ni, char **interfaces) } } + rc = ksocknal_start_listener(); + if (rc != 0) { + CERROR("Can't start listener: %d\n", rc); + ksocknal_shutdown(ni); + return rc; + } + + ksocknal_data.ksnd_init = SOCKNAL_INIT_LISTENER; + rc = libcfs_nal_cmd_register(SOCKNAL, &ksocknal_cmd, NULL); if (rc != 0) { CERROR ("Can't initialise command interface (rc = %d)\n", rc); diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index 4c3006e..0111087 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -44,13 +44,14 @@ #include #include -#define SOCKNAL_N_AUTOCONNECTD 4 /* # socknal autoconnect daemons */ - -#define SOCKNAL_MIN_RECONNECT_INTERVAL cfs_time_seconds(1) /* first failed connection retry... */ -#define SOCKNAL_MAX_RECONNECT_INTERVAL cfs_time_seconds(60) /* ...exponentially increasing to this */ - -/* default vals for runtime tunables */ -#define SOCKNAL_IO_TIMEOUT 50 /* default comms timeout (seconds) */ +/* default vals for tunables/modparams */ +#define SOCKNAL_PORT 988 /* the socknal service # */ +#define SOCKNAL_BACKLOG 127 /* listen backlog */ +#define SOCKNAL_TIMEOUT 50 /* default comms timeout (seconds) */ +#define SOCKNAL_LISTEN_TIMEOUT 10 /* default listener timeout (seconds) */ +#define SOCKNAL_NCONND 4 /* # socknal connection daemons */ +#define SOCKNAL_MIN_RECONNECTMS 1000 /* first connection retry after (mS)... */ +#define SOCKNAL_MAX_RECONNECTMS 60000 /* ...exponentially increasing to this */ #define SOCKNAL_EAGER_ACK SOCKNAL_ARCH_EAGER_ACK /* default eager ack (boolean) */ #define SOCKNAL_TYPED_CONNS 1 /* unidirectional large, bidirectional small? */ #define SOCKNAL_ZC_MIN_FRAG (2<<10) /* default smallest zerocopy fragment */ @@ -129,7 +130,13 @@ typedef struct typedef struct { + int *ksnd_port; /* the socknal service # */ + int *ksnd_backlog; /* listen backlog */ int *ksnd_timeout; /* "stuck" socket timeout (seconds) */ + int *ksnd_listen_timeout; /* passive connection timeout */ + int *ksnd_nconnds; /* # connection daemons */ + int *ksnd_min_reconnectms; /* first connection retry after (ms)... */ + int *ksnd_max_reconnectms; /* ...exponentially increasing to this */ int *ksnd_eager_ack; /* make TCP ack eagerly? */ int *ksnd_typed_conns; /* drive sockets by type? */ int *ksnd_min_bulk; /* smallest "large" message */ @@ -153,6 +160,10 @@ typedef struct { int ksnd_init; /* initialisation state */ __u64 ksnd_incarnation; /* my epoch */ + + int ksnd_listener_shutdown; /* listener start/stop/rc */ + struct socket *ksnd_listener_sock; /* listener's socket */ + struct semaphore ksnd_listener_signal; /* parent waits here */ rwlock_t ksnd_global_lock; /* stabilize peer/conn ops */ struct list_head *ksnd_peers; /* hash table of all my known peers */ @@ -184,9 +195,10 @@ typedef struct int ksnd_stall_tx; /* test sluggish sender */ int ksnd_stall_rx; /* test sluggish receiver */ - struct list_head ksnd_autoconnectd_routes; /* routes waiting to be connected */ - cfs_waitq_t ksnd_autoconnectd_waitq; /* autoconnectds sleep here */ - spinlock_t ksnd_autoconnectd_lock; /* serialise */ + struct list_head ksnd_connd_connreqs; /* incoming connection requests */ + struct list_head ksnd_connd_routes; /* routes waiting to be connected */ + cfs_waitq_t ksnd_connd_waitq; /* connds sleep here */ + spinlock_t ksnd_connd_lock; /* serialise */ ksock_irqinfo_t ksnd_irqinfo[NR_IRQS];/* irq->scheduler lookup */ @@ -198,7 +210,8 @@ typedef struct #define SOCKNAL_INIT_NOTHING 0 #define SOCKNAL_INIT_DATA 1 -#define SOCKNAL_INIT_ALL 2 +#define SOCKNAL_INIT_LISTENER 2 +#define SOCKNAL_INIT_ALL 3 /* A packet just assembled for transmission is represented by 1 or more * struct iovec fragments (the first frag contains the portals header), @@ -291,7 +304,8 @@ typedef struct ksock_conn struct socket *ksnc_sock; /* actual socket */ void *ksnc_saved_data_ready; /* socket's original data_ready() callback */ void *ksnc_saved_write_space; /* socket's original write_space() callback */ - atomic_t ksnc_refcount; /* # users */ + atomic_t ksnc_conn_refcount; /* conn refcount */ + atomic_t ksnc_sock_refcount; /* sock refcount */ ksock_sched_t *ksnc_scheduler; /* who schedules this connection */ __u32 ksnc_myipaddr; /* my IP */ __u32 ksnc_ipaddr; /* peer's IP */ @@ -341,7 +355,7 @@ typedef struct ksock_conn typedef struct ksock_route { struct list_head ksnr_list; /* chain on peer route list */ - struct list_head ksnr_connect_list; /* chain on autoconnect list */ + struct list_head ksnr_connd_list; /* chain on ksnr_connd_routes */ struct ksock_peer *ksnr_peer; /* owning peer */ atomic_t ksnr_refcount; /* # users */ cfs_time_t ksnr_timeout; /* when (in jiffies) reconnection can happen next */ @@ -372,6 +386,11 @@ typedef struct ksock_peer __u32 ksnp_passive_ips[SOCKNAL_MAX_INTERFACES]; /* preferred local interfaces */ } ksock_peer_t; +typedef struct ksock_connreq +{ + struct list_head ksncr_list; /* stash on ksnd_connd_connreqs */ + struct socket *ksncr_sock; /* accepted socket */ +} ksock_connreq_t; extern ksock_nal_data_t ksocknal_data; extern ksock_tunables_t ksocknal_tunables; @@ -384,27 +403,87 @@ ksocknal_nid2peerlist (ptl_nid_t nid) return (&ksocknal_data.ksnd_peers [hash]); } +static inline void +ksocknal_conn_addref (ksock_conn_t *conn) +{ + LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0); + atomic_inc(&conn->ksnc_conn_refcount); +} + +extern void ksocknal_queue_zombie_conn (ksock_conn_t *conn); + +static inline void +ksocknal_conn_decref (ksock_conn_t *conn) +{ + LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0); + if (atomic_dec_and_test(&conn->ksnc_conn_refcount)) + ksocknal_queue_zombie_conn(conn); +} + static inline int -ksocknal_getconnsock (ksock_conn_t *conn) +ksocknal_connsock_addref (ksock_conn_t *conn) { int rc = -ESHUTDOWN; read_lock (&ksocknal_data.ksnd_global_lock); if (!conn->ksnc_closing) { + LASSERT (atomic_read(&conn->ksnc_sock_refcount) > 0); + atomic_inc(&conn->ksnc_sock_refcount); rc = 0; - cfs_get_file (KSN_CONN2FILE(conn)); } read_unlock (&ksocknal_data.ksnd_global_lock); return (rc); } +extern void ksocknal_lib_release_sock(struct socket *sock); + +static inline void +ksocknal_connsock_decref (ksock_conn_t *conn) +{ + LASSERT (atomic_read(&conn->ksnc_sock_refcount) > 0); + if (atomic_dec_and_test(&conn->ksnc_sock_refcount)) { + LASSERT (conn->ksnc_closing); + ksocknal_lib_release_sock(conn->ksnc_sock); + conn->ksnc_sock = NULL; + } +} + static inline void -ksocknal_putconnsock (ksock_conn_t *conn) +ksocknal_route_addref (ksock_route_t *route) { - cfs_put_file (KSN_CONN2FILE(conn)); + LASSERT (atomic_read(&route->ksnr_refcount) > 0); + atomic_inc(&route->ksnr_refcount); } +extern void ksocknal_destroy_route (ksock_route_t *route); + +static inline void +ksocknal_route_decref (ksock_route_t *route) +{ + LASSERT (atomic_read (&route->ksnr_refcount) > 0); + if (atomic_dec_and_test(&route->ksnr_refcount)) + ksocknal_destroy_route (route); +} + +static inline void +ksocknal_peer_addref (ksock_peer_t *peer) +{ + LASSERT (atomic_read (&peer->ksnp_refcount) > 0); + atomic_inc(&peer->ksnp_refcount); +} + +extern void ksocknal_destroy_peer (ksock_peer_t *peer); + +static inline void +ksocknal_peer_decref (ksock_peer_t *peer) +{ + LASSERT (atomic_read (&peer->ksnp_refcount) > 0); + if (atomic_dec_and_test(&peer->ksnp_refcount)) + ksocknal_destroy_peer (peer); +} + + ptl_err_t ksocknal_startup (ptl_ni_t *ni, char **interfaces); void ksocknal_shutdown (ptl_ni_t *ni); ptl_err_t ksocknal_send (ptl_ni_t *ni, void *private, @@ -429,19 +508,14 @@ ptl_err_t ksocknal_recv_pages(ptl_ni_t *ni, void *private, size_t mlen, size_t rlen); -extern void ksocknal_put_route (ksock_route_t *route); -extern void ksocknal_put_peer (ksock_peer_t *peer); extern int ksocknal_add_peer(ptl_nid_t nid, __u32 ip, int port); extern ksock_peer_t *ksocknal_find_peer_locked (ptl_nid_t nid); extern ksock_peer_t *ksocknal_get_peer (ptl_nid_t nid); -extern int ksocknal_del_route (ptl_nid_t nid, __u32 ipaddr, - int single, int keep_conn); extern int ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type); extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why); extern void ksocknal_terminate_conn (ksock_conn_t *conn); extern void ksocknal_destroy_conn (ksock_conn_t *conn); -extern void ksocknal_put_conn (ksock_conn_t *conn); extern int ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation); extern int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why); extern int ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr); @@ -452,11 +526,11 @@ extern void ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd); extern void ksocknal_fmb_callback (void *arg, int error); extern void ksocknal_notify (void *arg, ptl_nid_t gw_nid, int alive); extern int ksocknal_thread_start (int (*fn)(void *arg), void *arg); +extern void ksocknal_thread_fini (void); extern int ksocknal_new_packet (ksock_conn_t *conn, int skip); extern int ksocknal_scheduler (void *arg); -extern int ksocknal_autoconnectd (void *arg); +extern int ksocknal_connd (void *arg); extern int ksocknal_reaper (void *arg); -extern int ksocknal_setup_sock (struct socket *sock); extern int ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs); extern int ksocknal_recv_hello (ksock_conn_t *conn, ptl_nid_t *nid, __u64 *incarnation, __u32 *ipaddrs); @@ -481,8 +555,11 @@ extern int ksocknal_lib_sock_read (struct socket *sock, void *buffer, int nob); extern int ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle); -extern int ksocknal_lib_connect_sock(struct socket **sockp, int *may_retry, +extern int ksocknal_lib_connect_sock(struct socket **sockp, int *fatal, ksock_route_t *route, int local_port); +extern int ksocknal_lib_listen(struct socket **sockp, int port, int backlog); +extern int ksocknal_lib_accept(struct socket *sock, ksock_connreq_t **crp); +extern void ksocknal_lib_abort_accept(struct socket *sock); extern int ksocknal_lib_tunables_init(void); extern void ksocknal_lib_tunables_fini(void); diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index c9bf19f..58c6002 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -120,7 +120,7 @@ ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) LASSERT (tx->tx_resid != 0); - rc = ksocknal_getconnsock (conn); + rc = ksocknal_connsock_addref(conn); if (rc != 0) { LASSERT (conn->ksnc_closing); return (-ESHUTDOWN); @@ -189,7 +189,7 @@ ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) } while (tx->tx_resid != 0); - ksocknal_putconnsock (conn); + ksocknal_connsock_decref(conn); return (rc); } @@ -296,7 +296,7 @@ ksocknal_receive (ksock_conn_t *conn) schedule_timeout(cfs_time_seconds (ksocknal_data.ksnd_stall_rx)); } - rc = ksocknal_getconnsock (conn); + rc = ksocknal_connsock_addref(conn); if (rc != 0) { LASSERT (conn->ksnc_closing); return (-ESHUTDOWN); @@ -334,7 +334,7 @@ ksocknal_receive (ksock_conn_t *conn) } } - ksocknal_putconnsock (conn); + ksocknal_connsock_decref(conn); RETURN (rc); } @@ -371,7 +371,7 @@ ksocknal_tx_done (ksock_tx_t *tx, int asynch) * process_transmit() so it needs to keep a ref on * tx_conn... */ if (asynch) - ksocknal_put_conn (tx->tx_conn); + ksocknal_conn_decref(tx->tx_conn); #else LASSERT (!asynch); #endif @@ -405,7 +405,7 @@ ksocknal_tx_launched (ksock_tx_t *tx) /* zccd skbufs are still in-flight. First take a ref on * conn, so it hangs about for ksocknal_tx_done... */ - atomic_inc (&conn->ksnc_refcount); + ksocknal_conn_addref(conn); /* ...then drop the initial ref on zccd, so the zero copy * callback can occur */ @@ -494,23 +494,23 @@ ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx) } void -ksocknal_launch_autoconnect_locked (ksock_route_t *route) +ksocknal_launch_connection_locked (ksock_route_t *route) { unsigned long flags; /* called holding write lock on ksnd_global_lock */ LASSERT (!route->ksnr_connecting); - route->ksnr_connecting = 1; /* scheduling conn for autoconnectd */ - atomic_inc (&route->ksnr_refcount); /* extra ref for autoconnectd */ + route->ksnr_connecting = 1; /* scheduling conn for connd */ + ksocknal_route_addref(route); /* extra ref for connd */ - spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags); + spin_lock_irqsave (&ksocknal_data.ksnd_connd_lock, flags); - list_add_tail (&route->ksnr_connect_list, - &ksocknal_data.ksnd_autoconnectd_routes); - cfs_waitq_signal (&ksocknal_data.ksnd_autoconnectd_waitq); + list_add_tail (&route->ksnr_connd_list, + &ksocknal_data.ksnd_connd_routes); + cfs_waitq_signal (&ksocknal_data.ksnd_connd_waitq); - spin_unlock_irqrestore (&ksocknal_data.ksnd_autoconnectd_lock, flags); + spin_unlock_irqrestore (&ksocknal_data.ksnd_connd_lock, flags); } #if 0 @@ -656,7 +656,7 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) if (conn->ksnc_tx_ready && /* able to send */ !conn->ksnc_tx_scheduled) { /* not scheduled to send */ /* +1 ref for scheduler */ - atomic_inc (&conn->ksnc_refcount); + ksocknal_conn_addref(conn); list_add_tail (&conn->ksnc_tx_list, &sched->kss_tx_conns); conn->ksnc_tx_scheduled = 1; @@ -751,7 +751,8 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) again: if (create_peer) { - rc = ksocknal_add_peer(nid, (__u32)nid, 988); + rc = ksocknal_add_peer(nid, (__u32)nid, + *ksocknal_tunables.ksnd_port); if (rc != 0) { CERROR("Can't add peer "LPX64": %d\n", nid, rc); @@ -767,8 +768,9 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) if (ksocknal_find_connectable_route_locked(peer) == NULL) { conn = ksocknal_find_conn_locked (tx, peer); if (conn != NULL) { - /* I've got no autoconnect routes that need to be - * connecting and I do have an actual connection... */ + /* I've got no routes that need to be + * connecting and I do have an actual + * connection... */ ksocknal_queue_tx_locked (tx, conn); read_unlock (g_lock); return (0); @@ -796,12 +798,12 @@ ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid) } for (;;) { - /* launch any/all autoconnections that need it */ + /* launch any/all connections that need it */ route = ksocknal_find_connectable_route_locked (peer); if (route == NULL) break; - ksocknal_launch_autoconnect_locked (route); + ksocknal_launch_connection_locked (route); } conn = ksocknal_find_conn_locked (tx, peer); @@ -1026,7 +1028,7 @@ ksocknal_fmb_callback (void *arg, int error) le64_to_cpu(hdr->src_nid), le64_to_cpu(hdr->dest_nid)); /* drop peer ref taken on init */ - ksocknal_put_peer (fmb->fmb_peer); + ksocknal_peer_decref(fmb->fmb_peer); spin_lock_irqsave (&fmp->fmp_lock, flags); @@ -1118,7 +1120,7 @@ ksocknal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb) /* Take a ref on the conn's peer to prevent module unload before * forwarding completes. */ fmb->fmb_peer = conn->ksnc_peer; - atomic_inc (&conn->ksnc_peer->ksnp_refcount); + ksocknal_peer_addref(conn->ksnc_peer); /* Copy the header we just read into the forwarding buffer. If * there's payload, start reading reading it into the buffer, @@ -1218,7 +1220,7 @@ ksocknal_fwd_parse (ksock_conn_t *conn) "(%s): target is a peer\n", src_nid, libcfs_nid2str(src_nid), dest_nid, libcfs_nid2str(dest_nid)); - ksocknal_put_peer (peer); /* drop ref from get above */ + ksocknal_peer_decref(peer); /* drop ref from get above */ /* on to next packet (skip this one's body) */ ksocknal_new_packet (conn, body_len); @@ -1291,7 +1293,7 @@ ksocknal_process_receive (ksock_conn_t *conn) ksock_fmb_t *fmb; int rc; - LASSERT (atomic_read (&conn->ksnc_refcount) > 0); + LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0); /* doesn't need a forwarding buffer */ if (conn->ksnc_rx_state != SOCKNAL_RX_GET_FMB) @@ -1503,7 +1505,7 @@ int ksocknal_scheduler (void *arg) int id = sched - ksocknal_data.ksnd_schedulers; char name[16]; - snprintf (name, sizeof (name),"ksocknald_%02d", id); + snprintf (name, sizeof (name),"socknal_sd%02d", id); kportal_daemonize (name); kportal_blockallsigs (); @@ -1565,7 +1567,7 @@ int ksocknal_scheduler (void *arg) } else { conn->ksnc_rx_scheduled = 0; /* drop my ref */ - ksocknal_put_conn(conn); + ksocknal_conn_decref(conn); } did_something = 1; @@ -1615,7 +1617,7 @@ int ksocknal_scheduler (void *arg) } else { conn->ksnc_tx_scheduled = 0; /* drop my ref */ - ksocknal_put_conn (conn); + ksocknal_conn_decref(conn); } did_something = 1; @@ -1678,7 +1680,7 @@ void ksocknal_read_callback (ksock_conn_t *conn) &sched->kss_rx_conns); conn->ksnc_rx_scheduled = 1; /* extra ref for scheduler */ - atomic_inc (&conn->ksnc_refcount); + ksocknal_conn_addref(conn); cfs_waitq_signal (&sched->kss_waitq); } @@ -1709,7 +1711,7 @@ void ksocknal_write_callback (ksock_conn_t *conn) &sched->kss_tx_conns); conn->ksnc_tx_scheduled = 1; /* extra ref for scheduler */ - atomic_inc (&conn->ksnc_refcount); + ksocknal_conn_addref(conn); cfs_waitq_signal (&sched->kss_waitq); } @@ -1720,18 +1722,6 @@ void ksocknal_write_callback (ksock_conn_t *conn) } int -ksocknal_sock_write (struct socket *sock, void *buffer, int nob) -{ - return ksocknal_lib_sock_write(sock, buffer, nob); -} - -int -ksocknal_sock_read (struct socket *sock, void *buffer, int nob) -{ - return ksocknal_lib_sock_read(sock, buffer, nob); -} - -int ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs) { /* CAVEAT EMPTOR: this byte flips 'ipaddrs' */ @@ -1761,7 +1751,7 @@ ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs) cpu_to_le64 (ksocknal_data.ksnd_incarnation); /* Receiver is eager */ - rc = ksocknal_sock_write (sock, &hdr, sizeof(hdr)); + rc = ksocknal_lib_sock_write (sock, &hdr, sizeof(hdr)); if (rc != 0) { CERROR ("Error %d sending HELLO hdr to %u.%u.%u.%u/%d\n", rc, HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); @@ -1775,7 +1765,7 @@ ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs) ipaddrs[i] = __cpu_to_le32 (ipaddrs[i]); } - rc = ksocknal_sock_write (sock, ipaddrs, nipaddrs * sizeof(*ipaddrs)); + rc = ksocknal_lib_sock_write (sock, ipaddrs, nipaddrs * sizeof(*ipaddrs)); if (rc != 0) CERROR ("Error %d sending HELLO payload (%d)" " to %u.%u.%u.%u/%d\n", rc, nipaddrs, @@ -1815,7 +1805,7 @@ ksocknal_recv_hello (ksock_conn_t *conn, ptl_nid_t *nid, hmv = (ptl_magicversion_t *)&hdr.dest_nid; LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); - rc = ksocknal_sock_read (sock, hmv, sizeof (*hmv)); + rc = ksocknal_lib_sock_read (sock, hmv, sizeof (*hmv)); if (rc != 0) { CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n", rc, HIPQUAD(conn->ksnc_ipaddr)); @@ -1848,7 +1838,7 @@ ksocknal_recv_hello (ksock_conn_t *conn, ptl_nid_t *nid, * header, followed by payload full of interface IP addresses. * Read the rest of it in now... */ - rc = ksocknal_sock_read (sock, hmv + 1, sizeof (hdr) - sizeof (*hmv)); + rc = ksocknal_lib_sock_read (sock, hmv + 1, sizeof (hdr) - sizeof (*hmv)); if (rc != 0) { CERROR ("Error %d reading rest of HELLO hdr from %u.%u.%u.%u\n", rc, HIPQUAD(conn->ksnc_ipaddr)); @@ -1919,7 +1909,7 @@ ksocknal_recv_hello (ksock_conn_t *conn, ptl_nid_t *nid, if (nips == 0) return (0); - rc = ksocknal_sock_read (sock, ipaddrs, nips * sizeof(*ipaddrs)); + rc = ksocknal_lib_sock_read (sock, ipaddrs, nips * sizeof(*ipaddrs)); if (rc != 0) { CERROR ("Error %d reading IPs from "LPX64"@%u.%u.%u.%u\n", rc, *nid, HIPQUAD(conn->ksnc_ipaddr)); @@ -1945,7 +1935,7 @@ ksocknal_connect_peer (ksock_route_t *route, int type) struct socket *sock; int rc; int port; - int may_retry; + int fatal; /* Iterate through reserved ports. When typed connections are * used, we will need to bind to multiple ports, but we only know @@ -1954,15 +1944,14 @@ ksocknal_connect_peer (ksock_route_t *route, int type) for (port = 1023; port > 512; --port) { - rc = ksocknal_lib_connect_sock(&sock, &may_retry, route, port); + rc = ksocknal_lib_connect_sock(&sock, &fatal, route, port); if (rc == 0) { rc = ksocknal_create_conn(route, sock, type); - cfs_put_file(KSN_SOCK2FILE(sock)); return rc; } - if (!may_retry) + if (fatal) return rc; } @@ -1971,7 +1960,7 @@ ksocknal_connect_peer (ksock_route_t *route, int type) } void -ksocknal_autoconnect (ksock_route_t *route) +ksocknal_connect (ksock_route_t *route) { CFS_LIST_HEAD (zombies); ksock_tx_t *tx; @@ -2064,8 +2053,9 @@ ksocknal_autoconnect (ksock_route_t *route) LASSERT (route->ksnr_retry_interval != 0); route->ksnr_timeout = cfs_time_add(cfs_time_current(), route->ksnr_retry_interval); - route->ksnr_retry_interval = MIN (route->ksnr_retry_interval * 2, - SOCKNAL_MAX_RECONNECT_INTERVAL); + route->ksnr_retry_interval = + MIN (route->ksnr_retry_interval * 2, + cfs_time_seconds(*ksocknal_tunables.ksnd_max_reconnectms)/1000); if (!list_empty (&peer->ksnp_tx_queue) && ksocknal_find_connecting_route_locked (peer) == NULL) { @@ -2108,48 +2098,74 @@ ksocknal_autoconnect (ksock_route_t *route) } int -ksocknal_autoconnectd (void *arg) +ksocknal_connd (void *arg) { long id = (long)arg; char name[16]; unsigned long flags; + ksock_connreq_t *cr; ksock_route_t *route; int rc; + int did_something; - snprintf (name, sizeof (name), "ksocknal_ad%02ld", id); + snprintf (name, sizeof (name), "socknal_cd%02ld", id); kportal_daemonize (name); kportal_blockallsigs (); - spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags); + spin_lock_irqsave (&ksocknal_data.ksnd_connd_lock, flags); while (!ksocknal_data.ksnd_shuttingdown) { - if (!list_empty (&ksocknal_data.ksnd_autoconnectd_routes)) { - route = list_entry (ksocknal_data.ksnd_autoconnectd_routes.next, - ksock_route_t, ksnr_connect_list); + did_something = 0; + + if (!list_empty(&ksocknal_data.ksnd_connd_connreqs)) { + /* Connection accepted by the listener */ + cr = list_entry(ksocknal_data.ksnd_connd_connreqs.next, + ksock_connreq_t, ksncr_list); + + list_del(&cr->ksncr_list); + spin_unlock_irqrestore(&ksocknal_data.ksnd_connd_lock, + flags); + + ksocknal_create_conn(NULL, cr->ksncr_sock, SOCKNAL_CONN_NONE); + PORTAL_FREE(cr, sizeof(*cr)); + + spin_lock_irqsave(&ksocknal_data.ksnd_connd_lock, + flags); + did_something = 1; + } + + if (!list_empty (&ksocknal_data.ksnd_connd_routes)) { + /* Connection request */ + route = list_entry (ksocknal_data.ksnd_connd_routes.next, + ksock_route_t, ksnr_connd_list); - list_del (&route->ksnr_connect_list); - spin_unlock_irqrestore (&ksocknal_data.ksnd_autoconnectd_lock, flags); + list_del (&route->ksnr_connd_list); + spin_unlock_irqrestore (&ksocknal_data.ksnd_connd_lock, flags); - ksocknal_autoconnect (route); - ksocknal_put_route (route); + ksocknal_connect (route); + ksocknal_route_decref(route); - spin_lock_irqsave(&ksocknal_data.ksnd_autoconnectd_lock, + spin_lock_irqsave(&ksocknal_data.ksnd_connd_lock, flags); - continue; + did_something = 1; } - spin_unlock_irqrestore(&ksocknal_data.ksnd_autoconnectd_lock, + if (did_something) + continue; + + spin_unlock_irqrestore(&ksocknal_data.ksnd_connd_lock, flags); - rc = wait_event_interruptible(ksocknal_data.ksnd_autoconnectd_waitq, + rc = wait_event_interruptible(ksocknal_data.ksnd_connd_waitq, ksocknal_data.ksnd_shuttingdown || - !list_empty(&ksocknal_data.ksnd_autoconnectd_routes)); + !list_empty(&ksocknal_data.ksnd_connd_connreqs) || + !list_empty(&ksocknal_data.ksnd_connd_routes)); - spin_lock_irqsave(&ksocknal_data.ksnd_autoconnectd_lock, flags); + spin_lock_irqsave(&ksocknal_data.ksnd_connd_lock, flags); } - spin_unlock_irqrestore (&ksocknal_data.ksnd_autoconnectd_lock, flags); + spin_unlock_irqrestore (&ksocknal_data.ksnd_connd_lock, flags); ksocknal_thread_fini (); return (0); @@ -2169,7 +2185,7 @@ ksocknal_find_timed_out_conn (ksock_peer_t *peer) LASSERT (!conn->ksnc_closing); if (SOCK_ERROR(conn->ksnc_sock) != 0) { - atomic_inc (&conn->ksnc_refcount); + ksocknal_conn_addref(conn); switch (SOCK_ERROR(conn->ksnc_sock)) { case ECONNRESET: @@ -2204,7 +2220,7 @@ ksocknal_find_timed_out_conn (ksock_peer_t *peer) cfs_time_aftereq (cfs_time_current(), conn->ksnc_rx_deadline)) { /* Timed out incomplete incoming message */ - atomic_inc (&conn->ksnc_refcount); + ksocknal_conn_addref(conn); LCONSOLE_ERROR("A timeout occurred receiving data from " "%u.%u.%u.%u; the network or that node " "may be down.\n", @@ -2220,7 +2236,7 @@ ksocknal_find_timed_out_conn (ksock_peer_t *peer) conn->ksnc_tx_deadline)) { /* Timed out messages queued for sending or * buffered in the socket's send buffer */ - atomic_inc (&conn->ksnc_refcount); + ksocknal_conn_addref(conn); LCONSOLE_ERROR("A timeout occurred sending data to " "%u.%u.%u.%u; the network or that node " "may be down.\n", @@ -2267,7 +2283,7 @@ ksocknal_check_peer_timeouts (int idx) /* NB we won't find this one again, but we can't * just proceed with the next peer, since we dropped * ksnd_global_lock and it might be dead already! */ - ksocknal_put_conn (conn); + ksocknal_conn_decref(conn); goto again; } } @@ -2289,7 +2305,7 @@ ksocknal_reaper (void *arg) int peer_index = 0; cfs_time_t deadline = cfs_time_current(); - kportal_daemonize ("ksocknal_reaper"); + kportal_daemonize ("socknal_reaper"); kportal_blockallsigs (); CFS_INIT_LIST_HEAD(&enomem_conns); @@ -2307,7 +2323,7 @@ ksocknal_reaper (void *arg) spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags); ksocknal_terminate_conn (conn); - ksocknal_put_conn (conn); + ksocknal_conn_decref(conn); spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags); continue; diff --git a/lnet/klnds/socklnd/socklnd_lib-darwin.c b/lnet/klnds/socklnd/socklnd_lib-darwin.c index 7c1bc93..08c88dc 100644 --- a/lnet/klnds/socklnd/socklnd_lib-darwin.c +++ b/lnet/klnds/socklnd/socklnd_lib-darwin.c @@ -108,73 +108,18 @@ ksocknal_lib_tunables_fini () static unsigned long ksocknal_mbuf_size = (u_quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); -struct socket * -sockfd_lookup(int fd, void *foo) -{ - struct socket *so; - struct file *fp; - CFS_DECL_FUNNEL_DATA; - - CFS_NET_IN; - getsock(current_proc()->p_fd, fd, &fp); - CFS_NET_EX; - so = (struct socket *)fp->f_data; - so->reserved4 = fp; - CFS_CONE_IN; - fref(fp); - CFS_CONE_EX; - return so; -} - extern struct fileops socketops; -static int -sock_map_fd (struct socket *so) -{ - struct file *fp; - int fd; - CFS_DECL_FUNNEL_DATA; - - CFS_CONE_IN; - falloc(current_proc(), &fp, &fd); - fp->f_flag = FREAD|FWRITE; - fp->f_type = DTYPE_SOCKET; - fp->f_ops = &socketops; - fp->f_data = (caddr_t)so; - so->reserved4 = fp; - *fdflags(current_proc(), fd) &= ~UF_RESERVED; - CFS_CONE_EX; - - return fd; -} - -static void -sock_release(struct socket *so) +void +ksocknal_lib_release_sock(struct socket *so) { - struct file *fp; CFS_DECL_FUNNEL_DATA; - fp = (struct file *)so->reserved4; - so->reserved4 = NULL; - fp->f_data = NULL; - CFS_CONE_IN; - frele(fp); - CFS_CONE_EX; CFS_NET_IN; soshutdown(so, 0); CFS_NET_EX; } -static void -sock_fdrelse(int fd) -{ - CFS_DECL_FUNNEL_DATA; - - CFS_CONE_IN; - fdrelse(current_proc(), fd); - CFS_CONE_EX; -} - void ksocknal_lib_bind_irq (unsigned int irq) { @@ -623,7 +568,7 @@ ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int int rc; CFS_DECL_NET_DATA; - rc = ksocknal_getconnsock (conn); + rc = ksocknal_connsock_addref(conn); if (rc != 0) { LASSERT (conn->ksnc_closing); *txmem = *rxmem = *nagle = 0; @@ -655,7 +600,7 @@ ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int } CFS_NET_EX; - ksocknal_putconnsock (conn); + ksocknal_connsock_decref(conn); if (rc == 0) *nagle = !*nagle; @@ -780,7 +725,7 @@ out: } int -ksocknal_lib_connect_sock (struct socket **sockp, int *may_retry, +ksocknal_lib_connect_sock (struct socket **sockp, int *fatal, ksock_route_t *route, int local_port) { struct sockaddr_in locaddr; @@ -808,7 +753,7 @@ ksocknal_lib_connect_sock (struct socket **sockp, int *may_retry, srvaddr.sin_port = htons (route->ksnr_port); srvaddr.sin_addr.s_addr = htonl (route->ksnr_ipaddr); - *may_retry = 0; + *fatal = 1; CFS_NET_IN; rc = socreate(PF_INET, &so, SOCK_STREAM, 0); @@ -819,18 +764,6 @@ ksocknal_lib_connect_sock (struct socket **sockp, int *may_retry, return (-rc); } - /* - * XXX - * Liang: what do we need here? - */ - fd = sock_map_fd (so); - if (fd < 0) { - sock_release (so); - CERROR ("sock_map_fd error %d\n", fd); - return (fd); - } - sock_fdrelse(fd); - /* Set the socket timeouts, so our connection attempt completes in * finite time */ tv.tv_sec = *ksocknal_tunables.ksnd_timeout; @@ -874,7 +807,7 @@ ksocknal_lib_connect_sock (struct socket **sockp, int *may_retry, if (rc == EADDRINUSE) { CFS_NET_EX; CDEBUG(D_NET, "Port %d already in use\n", local_port); - *may_retry = 1; + *fatal = 0; goto out; } if (rc != 0) { @@ -884,7 +817,7 @@ ksocknal_lib_connect_sock (struct socket **sockp, int *may_retry, goto out; } rc = soconnect(so, (struct sockaddr *)&srvaddr); - *may_retry = (rc == EADDRNOTAVAIL || rc == EADDRINUSE); + *fatal = !(rc == EADDRNOTAVAIL || rc == EADDRINUSE); if (rc != 0) { CFS_NET_EX; if (rc != EADDRNOTAVAIL && rc != EADDRINUSE) @@ -921,8 +854,7 @@ ksocknal_lib_connect_sock (struct socket **sockp, int *may_retry, return (-rc); out: - rele_file(KSN_SOCK2FILE(so)); - + ksocknal_lib_release_sock(so); return (-rc); } @@ -935,7 +867,7 @@ ksocknal_lib_push_conn(ksock_conn_t *conn) int rc; CFS_DECL_NET_DATA; - rc = ksocknal_getconnsock (conn); + rc = ksocknal_connsock_addref(conn); if (rc != 0) /* being shut down */ return; sock = conn->ksnc_sock; @@ -950,7 +882,7 @@ ksocknal_lib_push_conn(ksock_conn_t *conn) sosetopt(sock, &sopt); CFS_NET_EX; - ksocknal_putconnsock (conn); + ksocknal_connsock_decref(conn); return; } diff --git a/lnet/klnds/socklnd/socklnd_lib-darwin.h b/lnet/klnds/socklnd/socklnd_lib-darwin.h index e3b286bc..69a20dd 100644 --- a/lnet/klnds/socklnd/socklnd_lib-darwin.h +++ b/lnet/klnds/socklnd/socklnd_lib-darwin.h @@ -32,14 +32,10 @@ #define SOCKNAL_ARCH_EAGER_ACK 1 -#define KSN_SOCK2FILE(so) ((struct file *)(so)->reserved4) -#define KSN_CONN2FILE(conn) ((struct file *)(conn)->ksnc_sock->reserved4) - #define SOCK_WMEM_QUEUED(so) ((so)->so_snd.sb_cc) #define SOCK_ERROR(so) ((so)->so_error) #define SOCK_TEST_NOSPACE(so) (sbspace(&(so)->so_snd) < (so)->so_snd.sb_lowat) -extern struct socket * sockfd_lookup(int fd, void *foo); static inline int ksocknal_nsched(void) diff --git a/lnet/klnds/socklnd/socklnd_lib-linux.c b/lnet/klnds/socklnd/socklnd_lib-linux.c index 3bfcf03..14a3427e 100644 --- a/lnet/klnds/socklnd/socklnd_lib-linux.c +++ b/lnet/klnds/socklnd/socklnd_lib-linux.c @@ -1,7 +1,7 @@ #include "socknal.h" # if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM -static ctl_table ksocknal_ctl_table[12]; +static ctl_table ksocknal_ctl_table[18]; ctl_table ksocknal_top_ctl_table[] = { {200, "socknal", NULL, 0, 0555, ksocknal_ctl_table}, @@ -15,9 +15,27 @@ ksocknal_lib_tunables_init () int j = 1; ksocknal_ctl_table[i++] = (ctl_table) + {j++, "port", ksocknal_tunables.ksnd_port, + sizeof (int), 0444, NULL, &proc_dointvec}; + ksocknal_ctl_table[i++] = (ctl_table) + {j++, "backlog", ksocknal_tunables.ksnd_backlog, + sizeof (int), 0444, NULL, &proc_dointvec}; + ksocknal_ctl_table[i++] = (ctl_table) {j++, "timeout", ksocknal_tunables.ksnd_timeout, sizeof (int), 0644, NULL, &proc_dointvec}; ksocknal_ctl_table[i++] = (ctl_table) + {j++, "listen_timeout", ksocknal_tunables.ksnd_listen_timeout, + sizeof (int), 0644, NULL, &proc_dointvec}; + ksocknal_ctl_table[i++] = (ctl_table) + {j++, "nconnds", ksocknal_tunables.ksnd_nconnds, + sizeof (int), 0444, NULL, &proc_dointvec}; + ksocknal_ctl_table[i++] = (ctl_table) + {j++, "min_reconnectms", ksocknal_tunables.ksnd_min_reconnectms, + sizeof (int), 0444, NULL, &proc_dointvec}; + ksocknal_ctl_table[i++] = (ctl_table) + {j++, "max_reconnectms", ksocknal_tunables.ksnd_max_reconnectms, + sizeof (int), 0444, NULL, &proc_dointvec}; + ksocknal_ctl_table[i++] = (ctl_table) {j++, "eager_ack", ksocknal_tunables.ksnd_eager_ack, sizeof (int), 0644, NULL, &proc_dointvec}; #if SOCKNAL_ZC @@ -554,7 +572,7 @@ ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int int len; int rc; - rc = ksocknal_getconnsock (conn); + rc = ksocknal_connsock_addref(conn); if (rc != 0) { LASSERT (conn->ksnc_closing); *txmem = *rxmem = *nagle = 0; @@ -578,7 +596,7 @@ ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int } set_fs (oldmm); - ksocknal_putconnsock (conn); + ksocknal_connsock_decref(conn); if (rc == 0) *nagle = !*nagle; @@ -715,62 +733,15 @@ ksocknal_lib_setup_sock (struct socket *sock) } int -ksocknal_lib_connect_sock(struct socket **sockp, int *may_retry, - ksock_route_t *route, int local_port) +ksocknal_lib_set_sock_timeout (struct socket *sock, int timeout) { - struct sockaddr_in locaddr; - struct sockaddr_in srvaddr; - struct socket *sock; - int rc; - int option; - mm_segment_t oldmm = get_fs(); - struct timeval tv; - - memset(&locaddr, 0, sizeof(locaddr)); - locaddr.sin_family = AF_INET; - locaddr.sin_port = htons(local_port); - locaddr.sin_addr.s_addr = - (route->ksnr_myipaddr != 0) ? htonl(route->ksnr_myipaddr) - : INADDR_ANY; - - memset (&srvaddr, 0, sizeof (srvaddr)); - srvaddr.sin_family = AF_INET; - srvaddr.sin_port = htons (route->ksnr_port); - srvaddr.sin_addr.s_addr = htonl (route->ksnr_ipaddr); - - *may_retry = 0; - - rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock); - *sockp = sock; - if (rc != 0) { - CERROR ("Can't create autoconnect socket: %d\n", rc); - return (rc); - } - - /* Ugh; have to map_fd for compatibility with sockets passed in - * from userspace. And we actually need the sock->file refcounting - * that this gives you :) */ - - rc = sock_map_fd (sock); - if (rc < 0) { - sock_release (sock); - CERROR ("sock_map_fd error %d\n", rc); - return (rc); - } - - /* NB the file descriptor (rc) now owns the ref on sock->file */ - LASSERT (sock->file != NULL); - LASSERT (file_count(sock->file) == 1); - - get_file(sock->file); /* extra ref makes sock->file */ - sys_close(rc); /* survive this close */ - - /* Still got a single ref on sock->file */ - LASSERT (file_count(sock->file) == 1); + struct timeval tv; + int rc; + mm_segment_t oldmm = get_fs(); - /* Set the socket timeouts, so our connection attempt completes in + /* Set the socket timeouts, so our connection handshake completes in * finite time */ - tv.tv_sec = *ksocknal_tunables.ksnd_timeout; + tv.tv_sec = timeout; tv.tv_usec = 0; set_fs (KERNEL_DS); @@ -778,9 +749,8 @@ ksocknal_lib_connect_sock(struct socket **sockp, int *may_retry, (char *)&tv, sizeof (tv)); set_fs (oldmm); if (rc != 0) { - CERROR ("Can't set send timeout %d: %d\n", - *ksocknal_tunables.ksnd_timeout, rc); - goto failed; + CERROR ("Can't set send timeout %d: %d\n", timeout, rc); + return rc; } set_fs (KERNEL_DS); @@ -788,11 +758,48 @@ ksocknal_lib_connect_sock(struct socket **sockp, int *may_retry, (char *)&tv, sizeof (tv)); set_fs (oldmm); if (rc != 0) { - CERROR ("Can't set receive timeout %d: %d\n", - *ksocknal_tunables.ksnd_timeout, rc); - goto failed; + CERROR ("Can't set receive timeout %d: %d\n", timeout, rc); + return rc; + } + + return 0; +} + +void +ksocknal_lib_release_sock(struct socket *sock) +{ + sock_release(sock); +} + +int +ksocknal_lib_create_sock(struct socket **sockp, int *fatal, + int timeout, __u32 local_ip, int local_port) +{ + struct sockaddr_in locaddr; + struct socket *sock; + int rc; + int option; + mm_segment_t oldmm = get_fs(); + + *fatal = 1; /* assume errors are fatal */ + + memset(&locaddr, 0, sizeof(locaddr)); + locaddr.sin_family = AF_INET; + locaddr.sin_port = htons(local_port); + locaddr.sin_addr.s_addr = (local_ip == 0) ? + INADDR_ANY : htonl(local_ip); + + rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock); + *sockp = sock; + if (rc != 0) { + CERROR ("Can't create socket: %d\n", rc); + return (rc); } + rc = ksocknal_lib_set_sock_timeout(sock, timeout); + if (rc != 0) + goto failed; + set_fs (KERNEL_DS); option = 1; rc = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, @@ -806,8 +813,8 @@ ksocknal_lib_connect_sock(struct socket **sockp, int *may_retry, rc = sock->ops->bind(sock, (struct sockaddr *)&locaddr, sizeof(locaddr)); if (rc == -EADDRINUSE) { - CDEBUG(D_NET, "Port %d already in use\n", local_port); - *may_retry = 1; + CDEBUG(D_WARNING, "Port %d already in use\n", local_port); + *fatal = 0; goto failed; } if (rc != 0) { @@ -816,9 +823,113 @@ ksocknal_lib_connect_sock(struct socket **sockp, int *may_retry, goto failed; } - rc = sock->ops->connect(sock, - (struct sockaddr *)&srvaddr, sizeof(srvaddr), - sock->file->f_flags); + return 0; + + failed: + sock_release(sock); + return rc; +} + +int +ksocknal_lib_listen(struct socket **sockp, int port, int backlog) +{ + int fatal; + int rc; + + rc = ksocknal_lib_create_sock(sockp, &fatal, 1, 0, port); + if (rc != 0) + return rc; + + rc = (*sockp)->ops->listen(*sockp, backlog); + if (rc == 0) + return 0; + + CERROR("Can't set listen backlog %d: %d\n", backlog, rc); + sock_release(*sockp); + return rc; +} + +int +ksocknal_lib_accept(struct socket *sock, ksock_connreq_t **crp) +{ + wait_queue_t wait; + struct socket *newsock; + int rc; + + init_waitqueue_entry(&wait, current); + + newsock = sock_alloc(); + if (newsock == NULL) { + CERROR("Can't allocate socket\n"); + return -ENOMEM; + } + + /* XXX this should add a ref to sock->ops->owner, if + * TCP could be a module */ + newsock->type = sock->type; + newsock->ops = sock->ops; + + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(sock->sk->sk_sleep, &wait); + + rc = sock->ops->accept(sock, newsock, O_NONBLOCK); + if (rc == -EAGAIN) { + /* Nothing ready, so wait for activity */ + schedule(); + rc = sock->ops->accept(sock, newsock, O_NONBLOCK); + } + + remove_wait_queue(sock->sk->sk_sleep, &wait); + set_current_state(TASK_RUNNING); + + if (rc != 0) + goto failed; + + rc = ksocknal_lib_set_sock_timeout(newsock, + *ksocknal_tunables.ksnd_listen_timeout); + if (rc != 0) + goto failed; + + rc = -ENOMEM; + PORTAL_ALLOC(*crp, sizeof(**crp)); + if (*crp == NULL) + goto failed; + + (*crp)->ksncr_sock = newsock; + return 0; + + failed: + sock_release(newsock); + return rc; +} + +void +ksocknal_lib_abort_accept(struct socket *sock) +{ + wake_up_all(sock->sk->sk_sleep); +} + +int +ksocknal_lib_connect_sock(struct socket **sockp, int *fatal, + ksock_route_t *route, int local_port) +{ + struct sockaddr_in srvaddr; + int rc; + + memset (&srvaddr, 0, sizeof (srvaddr)); + srvaddr.sin_family = AF_INET; + srvaddr.sin_port = htons (route->ksnr_port); + srvaddr.sin_addr.s_addr = htonl (route->ksnr_ipaddr); + + rc = ksocknal_lib_create_sock(sockp, fatal, + *ksocknal_tunables.ksnd_timeout, + route->ksnr_myipaddr, local_port); + if (rc != 0) + return rc; + + rc = (*sockp)->ops->connect(*sockp, + (struct sockaddr *)&srvaddr, sizeof(srvaddr), + 0); if (rc == 0) return 0; @@ -826,15 +937,14 @@ ksocknal_lib_connect_sock(struct socket **sockp, int *may_retry, * peer/port on the same local port on a differently typed * connection. Let our caller retry with a different local * port... */ - *may_retry = (rc == -EADDRNOTAVAIL); + *fatal = !(rc == -EADDRNOTAVAIL); - CDEBUG(*may_retry ? D_NET : D_ERROR, + CDEBUG(*fatal ? D_ERROR : D_NET, "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc, HIPQUAD(route->ksnr_myipaddr), local_port, HIPQUAD(route->ksnr_ipaddr), route->ksnr_port); - failed: - fput(sock->file); + sock_release(*sockp); return rc; } @@ -861,7 +971,7 @@ ksocknal_lib_push_conn (ksock_conn_t *conn) int rc; mm_segment_t oldmm; - rc = ksocknal_getconnsock (conn); + rc = ksocknal_connsock_addref(conn); if (rc != 0) /* being shut down */ return; @@ -886,7 +996,7 @@ ksocknal_lib_push_conn (ksock_conn_t *conn) tp->nonagle = nonagle; release_sock (sk); - ksocknal_putconnsock (conn); + ksocknal_connsock_decref(conn); } extern void ksocknal_read_callback (ksock_conn_t *conn); diff --git a/lnet/klnds/socklnd/socklnd_lib-linux.h b/lnet/klnds/socklnd/socklnd_lib-linux.h index 6129fdc..982bcdb 100644 --- a/lnet/klnds/socklnd/socklnd_lib-linux.h +++ b/lnet/klnds/socklnd/socklnd_lib-linux.h @@ -19,11 +19,11 @@ #include #include #include - + #include #include #include - + #include #include #include @@ -38,7 +38,7 @@ #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) # include #endif - + #include #include @@ -52,6 +52,7 @@ # define sk_prot prot # define sk_sndbuf sndbuf # define sk_socket socket +# define sk_sleep sleep #endif #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) @@ -64,9 +65,6 @@ #define SOCK_ERROR(so) ((so)->sk->sk_err) #define SOCK_TEST_NOSPACE(so) test_bit(SOCK_NOSPACE, &(so)->flags) -#define KSN_SOCK2FILE(so) ((so)->file) -#define KSN_CONN2FILE(conn) ((conn)->ksnc_sock->file) - #ifndef CONFIG_SMP static inline int ksocknal_nsched(void) diff --git a/lnet/klnds/socklnd/socklnd_modparams.c b/lnet/klnds/socklnd/socklnd_modparams.c index b8ade09..c60d21c 100644 --- a/lnet/klnds/socklnd/socklnd_modparams.c +++ b/lnet/klnds/socklnd/socklnd_modparams.c @@ -20,10 +20,34 @@ #include "socknal.h" -static int timeout = SOCKNAL_IO_TIMEOUT; +static int port = SOCKNAL_PORT; +CFS_MODULE_PARM(port, "i", int, 0444, + "the socknal service number (site wide)"); + +static int backlog = SOCKNAL_BACKLOG; +CFS_MODULE_PARM(backlog, "i", int, 0644, + "service listen backlog (0 == disable service)"); + +static int timeout = SOCKNAL_TIMEOUT; CFS_MODULE_PARM(timeout, "i", int, 0644, "dead socket timeout (seconds)"); +static int listen_timeout = SOCKNAL_LISTEN_TIMEOUT; +CFS_MODULE_PARM(listen_timeout, "i", int, 0644, + "socket listen timeout (seconds)"); + +static int nconnds = SOCKNAL_NCONND; +CFS_MODULE_PARM(nconnds, "i", int, 0644, + "# connection daemons"); + +static int min_reconnectms = SOCKNAL_MIN_RECONNECTMS; +CFS_MODULE_PARM(min_reconnectms, "i", int, 0644, + "min connection retry interval (mS)"); + +static int max_reconnectms = SOCKNAL_MAX_RECONNECTMS; +CFS_MODULE_PARM(max_reconnectms, "i", int, 0644, + "max connection retry interval (mS)"); + static int eager_ack = SOCKNAL_EAGER_ACK; CFS_MODULE_PARM(eager_ack, "i", int, 0644, "send tcp ack packets eagerly"); @@ -69,7 +93,13 @@ CFS_MODULE_PARM(zc_min_frag, "i", int, 0644, #endif ksock_tunables_t ksocknal_tunables = { + .ksnd_port = &port, + .ksnd_backlog = &backlog, .ksnd_timeout = &timeout, + .ksnd_listen_timeout = &listen_timeout, + .ksnd_nconnds = &nconnds, + .ksnd_min_reconnectms = &min_reconnectms, + .ksnd_max_reconnectms = &max_reconnectms, .ksnd_eager_ack = &eager_ack, .ksnd_typed_conns = &typed_conns, .ksnd_min_bulk = &min_bulk, diff --git a/lnet/utils/Makefile.am b/lnet/utils/Makefile.am index 609a430..b8dbf2c 100644 --- a/lnet/utils/Makefile.am +++ b/lnet/utils/Makefile.am @@ -23,16 +23,13 @@ libptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h if UTILS if !CRAY_PORTALS -sbin_PROGRAMS += acceptor ptlctl routerstat wirecheck +sbin_PROGRAMS += ptlctl routerstat wirecheck endif if BUILD_GMNAL sbin_PROGRAMS += gmnalnid endif endif -acceptor_SOURCES = acceptor.c -acceptor_LDADD = $(LIBWRAP) - wirecheck_SOURCES = wirecheck.c gmnalnid_SOURCES = gmnalnid.c diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c index 2a5166b..54c716b 100644 --- a/lnet/utils/portals.c +++ b/lnet/utils/portals.c @@ -916,150 +916,6 @@ jt_ptl_print_connections (int argc, char **argv) return 0; } -int jt_ptl_connect(int argc, char **argv) -{ -#ifndef HAVE_CONNECT - /* no connect() support */ - return -1; -#else /* HAVE_CONNECT */ - struct portals_cfg pcfg; - struct sockaddr_in srvaddr; - struct sockaddr_in locaddr; - __u32 ipaddr; - char *flag; - int fd, rc; - int type = SOCKNAL_CONN_ANY; - int port, rport; - int o; - - if (argc < 3) { - fprintf(stderr, "usage: %s ip port [type]\n", argv[0]); - return 0; - } - - if (!g_nal_is_compatible (argv[0], SOCKNAL, 0)) - return -1; - - rc = ptl_parse_ipaddr (&ipaddr, argv[1]); - if (rc != 0) { - fprintf(stderr, "Can't parse hostname: %s\n", argv[1]); - return -1; - } - - if (ptl_parse_port (&port, argv[2]) != 0) { - fprintf (stderr, "Can't parse port: %s\n", argv[2]); - return -1; - } - - if (argc > 3) - for (flag = argv[3]; *flag != 0; flag++) - switch (*flag) - { - case 'I': - if (type != SOCKNAL_CONN_ANY) { - fprintf(stderr, "Can't flag type twice\n"); - return -1; - } - type = SOCKNAL_CONN_BULK_IN; - break; - - case 'O': - if (type != SOCKNAL_CONN_ANY) { - fprintf(stderr, "Can't flag type twice\n"); - return -1; - } - type = SOCKNAL_CONN_BULK_OUT; - break; - - case 'C': - if (type != SOCKNAL_CONN_ANY) { - fprintf(stderr, "Can't flag type twice\n"); - return -1; - } - type = SOCKNAL_CONN_CONTROL; - break; - - default: - fprintf (stderr, "unrecognised flag '%c'\n", - *flag); - return (-1); - } - - memset(&locaddr, 0, sizeof(locaddr)); - locaddr.sin_family = AF_INET; - locaddr.sin_addr.s_addr = INADDR_ANY; - - memset(&srvaddr, 0, sizeof(srvaddr)); - srvaddr.sin_family = AF_INET; - srvaddr.sin_port = htons(port); - srvaddr.sin_addr.s_addr = htonl(ipaddr); - - - for (rport = IPPORT_RESERVED - 1; rport > IPPORT_RESERVED / 2; --rport) { - fd = socket(PF_INET, SOCK_STREAM, 0); - if ( fd < 0 ) { - fprintf(stderr, "socket() failed: %s\n", strerror(errno)); - return -1; - } - - o = 1; - rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, - &o, sizeof(o)); - - locaddr.sin_port = htons(rport); - rc = bind(fd, (struct sockaddr *)&locaddr, sizeof(locaddr)); - if (rc == 0 || errno == EACCES) { - rc = connect(fd, (struct sockaddr *)&srvaddr, sizeof(srvaddr)); - if (rc == 0) { - break; - } else if (errno != EADDRINUSE) { - fprintf(stderr, "Error connecting to host: %s\n", strerror(errno)); - close(fd); - return -1; - } - } else if (errno != EADDRINUSE) { - fprintf(stderr, "Error binding to port %d: %d: %s\n", port, errno, strerror(errno)); - close(fd); - return -1; - } - } - - if (rport == IPPORT_RESERVED / 2) { - fprintf(stderr, - "Warning: all privileged ports are in use.\n"); - return -1; - } - - printf("Connected host: %s type: %s\n", - argv[1], - (type == SOCKNAL_CONN_ANY) ? "A" : - (type == SOCKNAL_CONN_CONTROL) ? "C" : - (type == SOCKNAL_CONN_BULK_IN) ? "I" : - (type == SOCKNAL_CONN_BULK_OUT) ? "O" : "?"); - - PCFG_INIT(pcfg, NAL_CMD_REGISTER_PEER_FD); - pcfg.pcfg_nal = g_nal; - pcfg.pcfg_fd = fd; - pcfg.pcfg_misc = type; - - rc = pcfg_ioctl(&pcfg); - if (rc) { - fprintf(stderr, "failed to register fd with portals: %s\n", - strerror(errno)); - close (fd); - return -1; - } - - printf("Connection to %s registered with socknal\n", argv[1]); - - rc = close(fd); - if (rc) - fprintf(stderr, "close failed: %d\n", rc); - - return 0; -#endif /* HAVE_CONNECT */ -} - int jt_ptl_disconnect(int argc, char **argv) { struct portals_cfg pcfg; diff --git a/lnet/utils/ptlctl.c b/lnet/utils/ptlctl.c index 03cfe77..4287c55 100644 --- a/lnet/utils/ptlctl.c +++ b/lnet/utils/ptlctl.c @@ -37,7 +37,6 @@ command_t list[] = { {"add_peer", jt_ptl_add_peer, 0, "add peer entry (args: nid host port)"}, {"del_peer", jt_ptl_del_peer, 0, "delete peer entry (args: [nid] [host])"}, {"print_conns", jt_ptl_print_connections, 0, "print connections (no args)"}, - {"connect", jt_ptl_connect, 0, "connect to a remote nid (args: host port [iIOC])"}, {"disconnect", jt_ptl_disconnect, 0, "disconnect from a remote nid (args: [nid] [host]"}, {"push", jt_ptl_push_connection, 0, "flush connection to a remote nid (args: [nid]"}, {"active_tx", jt_ptl_print_active_txs, 0, "print active transmits (no args)"}, -- 1.8.3.1