From d209519c6d28a7fd3e2fb339b709165de4322624 Mon Sep 17 00:00:00 2001 From: Serguei Smirnov Date: Sat, 30 Oct 2021 11:39:26 -0700 Subject: [PATCH] LU-15137 socklnd: decrement connection counters on close To gracefully handle potential race with delayed connection create, decrement connection counters per type as connections are being closed. Lustre-change: https://review.whamcloud.com/45422 Lustre-commit: 7e26413aa85fdc931721cde36bae3bf2bb97e63f Test-Parameters: trivial testlist=sanity-lnet Fixes: cbf740d0 ("LU-12815 socklnd: add conns_per_peer parameter") Signed-off-by: Serguei Smirnov Change-Id: Ieb3b44701e4999ea1fe63234162dd5878d65958a Reviewed-by: Amir Shehata Reviewed-by: Cyril Bordage Reviewed-by: Oleg Drokin Reviewed-on: https://review.whamcloud.com/46051 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lnet/klnds/socklnd/socklnd.c | 75 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 60 insertions(+), 15 deletions(-) diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index ab4f8cf..ab36a4e 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -400,7 +400,9 @@ ksocknal_incr_conn_count(struct ksock_conn_cb *conn_cb, switch (type) { case SOCKLND_CONN_CONTROL: conn_cb->ksnr_ctrl_conn_count++; - /* there's a single control connection per peer */ + /* there's a single control connection per peer, + * two in case of loopback + */ conn_cb->ksnr_connected |= BIT(type); break; case SOCKLND_CONN_BULK_IN: @@ -426,6 +428,46 @@ ksocknal_incr_conn_count(struct ksock_conn_cb *conn_cb, type, conn_cb->ksnr_connected, conn_cb->ksnr_max_conns); } + +static void +ksocknal_decr_conn_count(struct ksock_conn_cb *conn_cb, + int type) +{ + conn_cb->ksnr_conn_count--; + + /* check if all connections of the given type got created */ + switch (type) { + case SOCKLND_CONN_CONTROL: + conn_cb->ksnr_ctrl_conn_count--; + /* there's a single control connection per peer, + * two in case of loopback + */ + if (conn_cb->ksnr_ctrl_conn_count == 0) + conn_cb->ksnr_connected &= ~BIT(type); + break; + case SOCKLND_CONN_BULK_IN: + conn_cb->ksnr_blki_conn_count--; + if (conn_cb->ksnr_blki_conn_count < conn_cb->ksnr_max_conns) + conn_cb->ksnr_connected &= ~BIT(type); + break; + case SOCKLND_CONN_BULK_OUT: + conn_cb->ksnr_blko_conn_count--; + if (conn_cb->ksnr_blko_conn_count < conn_cb->ksnr_max_conns) + conn_cb->ksnr_connected &= ~BIT(type); + break; + case SOCKLND_CONN_ANY: + if (conn_cb->ksnr_conn_count < conn_cb->ksnr_max_conns) + conn_cb->ksnr_connected &= ~BIT(type); + break; + default: + LBUG(); + break; + } + + CDEBUG(D_NET, "Del conn type %d, ksnr_connected %x ksnr_max_conns %d\n", + type, conn_cb->ksnr_connected, conn_cb->ksnr_max_conns); +} + static void ksocknal_associate_cb_conn_locked(struct ksock_conn_cb *conn_cb, struct ksock_conn *conn) @@ -1195,7 +1237,8 @@ ksocknal_close_conn_locked(struct ksock_conn *conn, int error) struct ksock_peer_ni *peer_ni = conn->ksnc_peer; struct ksock_conn_cb *conn_cb; struct ksock_conn *conn2; - struct list_head *tmp; + int conn_count; + int duplicate_count = 0; LASSERT(peer_ni->ksnp_error == 0); LASSERT(!conn->ksnc_closing); @@ -1209,26 +1252,28 @@ ksocknal_close_conn_locked(struct ksock_conn *conn, int error) /* dissociate conn from cb... */ LASSERT(!conn_cb->ksnr_deleted); + conn_count = ksocknal_get_conn_count_by_type(conn_cb, + conn->ksnc_type); /* connected bit is set only if all connections * of the given type got created */ - if (ksocknal_get_conn_count_by_type(conn_cb, conn->ksnc_type) == - conn_cb->ksnr_max_conns) + if (conn_count == conn_cb->ksnr_max_conns) LASSERT((conn_cb->ksnr_connected & BIT(conn->ksnc_type)) != 0); - conn2 = NULL; - list_for_each(tmp, &peer_ni->ksnp_conns) { - conn2 = list_entry(tmp, struct ksock_conn, ksnc_list); - - if (conn2->ksnc_conn_cb == conn_cb && - conn2->ksnc_type == conn->ksnc_type) - break; - - conn2 = NULL; + if (conn_count == 1) { + list_for_each_entry(conn2, &peer_ni->ksnp_conns, + ksnc_list) { + if (conn2->ksnc_conn_cb == conn_cb && + conn2->ksnc_type == conn->ksnc_type) + duplicate_count += 1; + } + if (duplicate_count > 0) + CERROR("Found %d duplicate conns type %d\n", + duplicate_count, + conn->ksnc_type); } - if (conn2 == NULL) - conn_cb->ksnr_connected &= ~BIT(conn->ksnc_type); + ksocknal_decr_conn_count(conn_cb, conn->ksnc_type); conn->ksnc_conn_cb = NULL; -- 1.8.3.1