From f92b526fc0456f973a10a96879b663700fe0990d Mon Sep 17 00:00:00 2001 From: liangzhen Date: Mon, 5 May 2008 14:54:30 +0000 Subject: [PATCH] Branch HEAD Backout patch of b13983, checking and updating ksnp_proto and adding conn to ksnp_conns should always happen atomically, keep extra reference count of socket during establishing of connection. b=15712 i=adilger i=liangzhen --- lnet/klnds/socklnd/socklnd.c | 110 ++++++++++++++++++++++------------------ lnet/klnds/socklnd/socklnd_cb.c | 3 +- 2 files changed, 62 insertions(+), 51 deletions(-) diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index a04bb48..710ebd2 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -1045,7 +1045,9 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, conn->ksnc_peer = NULL; conn->ksnc_route = NULL; conn->ksnc_sock = sock; - atomic_set (&conn->ksnc_sock_refcount, 1); /* 1 ref for conn */ + /* 2 ref, 1 for conn, another extra ref prevents socket + * being closed before establishment of connection */ + atomic_set (&conn->ksnc_sock_refcount, 2); conn->ksnc_type = type; ksocknal_lib_save_callback(sock, conn); atomic_set (&conn->ksnc_conn_refcount, 1); /* 1 ref for me */ @@ -1154,6 +1156,14 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, } } + if (peer->ksnp_closing || + (active && route->ksnr_deleted)) { + /* peer/route got closed under me */ + rc = -ESTALE; + warn = "peer/route removed"; + goto failed_2; + } + if (peer->ksnp_proto == NULL) { /* Never connected before. * NB recv_hello may have returned EPROTO to signal my peer @@ -1191,40 +1201,6 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, goto failed_2; } - write_unlock_bh(global_lock); - - /* No more race (or won the race), has compatible version with peer */ - if (active) { - /* additional routes after interface exchange? */ - ksocknal_create_routes(peer, conn->ksnc_port, - hello->kshm_ips, hello->kshm_nips); - } else { - hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips, - hello->kshm_nips); - rc = ksocknal_send_hello(ni, conn, peerid.nid, hello); - } - - /* setup the socket AFTER I've received hello (it disables - * SO_LINGER). I might call back to the acceptor who may want - * to send a protocol version response and then close the - * socket; this ensures the socket only tears down after the - * response has been sent. */ - if (rc == 0) - rc = ksocknal_lib_setup_sock(sock); - - write_lock_bh(global_lock); - - if (rc != 0) - goto failed_2; - - if (peer->ksnp_closing || - (active && route->ksnr_deleted)) { - /* peer/route got closed under me */ - rc = -ESTALE; - warn = "peer/route removed"; - goto failed_2; - } - /* Refuse to duplicate an existing connection, unless this is a * loopback connection */ if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) { @@ -1301,21 +1277,17 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, ksocknal_queue_tx_locked (tx, conn); } - /* NB my callbacks block while I hold ksnd_global_lock */ - ksocknal_lib_set_callback(sock, conn); - - if (!active) - peer->ksnp_accepting--; + write_unlock_bh (global_lock); - write_unlock_bh(global_lock); + /* We've now got a new connection. Any errors from here on are just + * like "normal" comms errors and we close the connection normally. + * NB (a) we still have to send the reply HELLO for passive + * connections, + * (b) normal I/O on the conn is blocked until I setup and call the + * socket callbacks. + */ - if (ksocknal_connsock_addref(conn) == 0) { - ksocknal_lib_bind_irq (irq); - /* Allow I/O to proceed. */ - ksocknal_read_callback(conn); - ksocknal_write_callback(conn); - ksocknal_connsock_decref(conn); - } + ksocknal_lib_bind_irq (irq); CDEBUG(D_NET, "New conn %s p %d.x %u.%u.%u.%u -> %u.%u.%u.%u/%d" " incarnation:"LPD64" sched[%d]/%d\n", @@ -1324,11 +1296,51 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, conn->ksnc_port, incarnation, (int)(conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers), irq); + if (active) { + /* additional routes after interface exchange? */ + ksocknal_create_routes(peer, conn->ksnc_port, + hello->kshm_ips, hello->kshm_nips); + } else { + hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips, + hello->kshm_nips); + rc = ksocknal_send_hello(ni, conn, peerid.nid, hello); + } + LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t, kshm_ips[LNET_MAX_INTERFACES])); + /* setup the socket AFTER I've received hello (it disables + * SO_LINGER). I might call back to the acceptor who may want + * to send a protocol version response and then close the + * socket; this ensures the socket only tears down after the + * response has been sent. */ + if (rc == 0) + rc = ksocknal_lib_setup_sock(sock); + + write_lock_bh(global_lock); + + /* NB my callbacks block while I hold ksnd_global_lock */ + ksocknal_lib_set_callback(sock, conn); + + if (!active) + peer->ksnp_accepting--; + + write_unlock_bh(global_lock); + + if (rc != 0) { + write_lock_bh(global_lock); + ksocknal_close_conn_locked(conn, rc); + write_unlock_bh(global_lock); + } else if (ksocknal_connsock_addref(conn) == 0) { + /* Allow I/O to proceed. */ + ksocknal_read_callback(conn); + ksocknal_write_callback(conn); + ksocknal_connsock_decref(conn); + } + + ksocknal_connsock_decref(conn); ksocknal_conn_decref(conn); - return 0; + return rc; failed_2: if (!peer->ksnp_closing && diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index 8217d6b..bf4cd08 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -2104,8 +2104,7 @@ ksocknal_send_hello (lnet_ni_t *ni, ksock_conn_t *conn, LASSERT (0 <= hello->kshm_nips && hello->kshm_nips <= LNET_MAX_INTERFACES); - /* No need for getconnsock/putconnsock */ - LASSERT (!conn->ksnc_closing); + /* rely on caller to hold a ref on socket so it wouldn't disappear */ LASSERT (conn->ksnc_proto != NULL); srcnid = lnet_ptlcompat_srcnid(ni->ni_nid, peer_nid); -- 1.8.3.1