From: Serguei Smirnov Date: Sat, 30 Oct 2021 18:39:26 +0000 (-0700) Subject: LU-15137 socklnd: decrement connection counters on close X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=2d4ba09e46fd2f49807664c5146e3364af549c82;p=fs%2Flustre-release.git LU-15137 socklnd: decrement connection counters on close To gracefully handle potential race with delayed connection create, decrement connection counters per type as connections are being closed. Lustre-change: https://review.whamcloud.com/45422 Lustre-commit: 7e26413aa85fdc931721cde36bae3bf2bb97e63f Test-Parameters: trivial testlist=sanity-lnet Fixes: e8842e86 ("LU-12815 socklnd: add conns_per_peer parameter") Signed-off-by: Serguei Smirnov Change-Id: Ieb3b44701e4999ea1fe63234162dd5878d65958a Reviewed-by: Amir Shehata Reviewed-by: Cyril Bordage Reviewed-by: Oleg Drokin Reviewed-on: https://review.whamcloud.com/46035 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 53b8367..26380aa 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -358,7 +358,9 @@ ksocknal_incr_conn_count(struct ksock_route *route, switch (type) { case SOCKLND_CONN_CONTROL: route->ksnr_ctrl_conn_count++; - /* there's a single control connection per peer */ + /* there's a single control connection per peer, + * two in case of loopback + */ route->ksnr_connected |= BIT(type); break; case SOCKLND_CONN_BULK_IN: @@ -390,6 +392,48 @@ ksocknal_incr_conn_count(struct ksock_route *route, *ksocknal_tunables.ksnd_conns_per_peer); } + +static void +ksocknal_decr_conn_count(struct ksock_route *route, + int type) +{ + int max_conns = *ksocknal_tunables.ksnd_conns_per_peer; + + route->ksnr_conn_count--; + + /* check if all connections of the given type got created */ + switch (type) { + case SOCKLND_CONN_CONTROL: + route->ksnr_ctrl_conn_count--; + /* there's a single control connection per peer, + * two in case of loopback + */ + if (route->ksnr_ctrl_conn_count == 0) + route->ksnr_connected &= ~BIT(type); + break; + case SOCKLND_CONN_BULK_IN: + route->ksnr_blki_conn_count--; + if (route->ksnr_blki_conn_count < max_conns) + route->ksnr_connected &= ~BIT(type); + break; + case SOCKLND_CONN_BULK_OUT: + route->ksnr_blko_conn_count--; + if (route->ksnr_blko_conn_count < max_conns) + route->ksnr_connected &= ~BIT(type); + break; + case SOCKLND_CONN_ANY: + if (route->ksnr_conn_count < max_conns) + route->ksnr_connected &= ~BIT(type); + break; + default: + LBUG(); + break; + } + + CDEBUG(D_NET, "Del conn type %d, ksnr_connected %x\n", + type, route->ksnr_connected); +} + static void ksocknal_associate_route_conn_locked(struct ksock_route *route, struct ksock_conn *conn) { @@ -1500,7 +1544,8 @@ ksocknal_close_conn_locked(struct ksock_conn *conn, int error) struct ksock_peer_ni *peer_ni = conn->ksnc_peer; struct ksock_route *route; struct ksock_conn *conn2; - struct list_head *tmp; + int conn_count; + int duplicate_count = 0; LASSERT(peer_ni->ksnp_error == 0); LASSERT(!conn->ksnc_closing); @@ -1514,25 +1559,28 @@ ksocknal_close_conn_locked(struct ksock_conn *conn, int error) /* dissociate conn from route... */ LASSERT(!route->ksnr_deleted); + conn_count = ksocknal_get_conn_count_by_type(route, + conn->ksnc_type); /* connected bit is set only if all connections * of the given type got created */ - if (ksocknal_get_conn_count_by_type(route, conn->ksnc_type) == - *ksocknal_tunables.ksnd_conns_per_peer) + if (conn_count == *ksocknal_tunables.ksnd_conns_per_peer) LASSERT((route->ksnr_connected & BIT(conn->ksnc_type)) != 0); - conn2 = NULL; - list_for_each(tmp, &peer_ni->ksnp_conns) { - conn2 = list_entry(tmp, struct ksock_conn, ksnc_list); - - if (conn2->ksnc_route == route && - conn2->ksnc_type == conn->ksnc_type) - break; - conn2 = NULL; + if (conn_count == 1) { + list_for_each_entry(conn2, &peer_ni->ksnp_conns, + ksnc_list) { + if (conn2->ksnc_route == route && + conn2->ksnc_type == conn->ksnc_type) + duplicate_count += 1; + } + if (duplicate_count > 0) + CERROR("Found %d duplicate conns type %d\n", + duplicate_count, + conn->ksnc_type); } - if (conn2 == NULL) - route->ksnr_connected &= ~(1 << conn->ksnc_type); + ksocknal_decr_conn_count(route, conn->ksnc_type); conn->ksnc_route = NULL;