From f93676bebb458e791acce210345ff87f9fd3ecac Mon Sep 17 00:00:00 2001 From: Cyril Bordage Date: Tue, 8 Aug 2023 14:34:17 +0200 Subject: [PATCH] LU-17021 socklnd: fix late ksnr_max_conns set ksnr_max_conns was set to the correct value after it was used. Lustre-change: https://review.whamcloud.com/51890 Lustre-commit: TBD (from cc894288304fb8e6caa44543e6b44b8ec18deb9b) Test-Parameters: trivial Signed-off-by: Cyril Bordage Change-Id: I9f2454d915ee1ab27db96f5247028db94965a11f Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/51891 Tested-by: Andreas Dilger Tested-by: jenkins Reviewed-by: Andreas Dilger Reviewed-by: Serguei Smirnov --- lnet/klnds/socklnd/socklnd.c | 5 ++++- lnet/klnds/socklnd/socklnd.h | 3 +++ lnet/klnds/socklnd/socklnd_cb.c | 21 +++++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index b0bf7b1..bfa943e 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -115,6 +115,8 @@ ksocknal_create_conn_cb(__u32 ipaddr, int port) conn_cb->ksnr_blko_conn_count = 0; conn_cb->ksnr_max_conns = 0; conn_cb->ksnr_busy_retry_count = 0; + conn_cb->ksnr_t_last_conn = 0; + conn_cb->ksnr_max_retries = 0; return conn_cb; } @@ -161,6 +163,7 @@ ksocknal_create_peer(struct lnet_ni *ni, struct lnet_process_id id) peer_ni->ksnp_last_alive = 0; peer_ni->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1; peer_ni->ksnp_conn_cb = NULL; + peer_ni->ksnp_t_created = ktime_get_seconds(); INIT_LIST_HEAD(&peer_ni->ksnp_conns); INIT_LIST_HEAD(&peer_ni->ksnp_tx_queue); @@ -581,13 +584,13 @@ ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr, if (peer_ni->ksnp_conn_cb) { ksocknal_conn_cb_decref(conn_cb); } else { - ksocknal_add_conn_cb_locked(peer_ni, conn_cb); /* Remember conns_per_peer setting at the time * of connection initiation. It will define the * max number of conns per type for this conn_cb * while it's in use. */ conn_cb->ksnr_max_conns = ksocknal_get_conns_per_peer(peer_ni); + ksocknal_add_conn_cb_locked(peer_ni, conn_cb); } write_unlock_bh(&ksocknal_data.ksnd_global_lock); diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index c7fbfb4..cf209fd 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -391,6 +391,8 @@ struct ksock_conn_cb { unsigned int ksnr_busy_retry_count;/* counts retry attempts * due to EALREADY rc */ + time64_t ksnr_t_last_conn; + unsigned int ksnr_max_retries:4;/* max_retries by type */ }; #define SOCKNAL_KEEPALIVE_PING 1 /* cookie for keepalive ping */ @@ -414,6 +416,7 @@ struct ksock_peer_ni { struct list_head ksnp_zc_req_list; time64_t ksnp_send_keepalive; /* time to send keepalive */ struct lnet_ni *ksnp_ni; /* which network */ + time64_t ksnp_t_created; }; struct ksock_connreq { diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index 4180f89..c50f25b 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -1946,6 +1946,25 @@ ksocknal_connect(struct ksock_conn_cb *conn_cb) conn_cb->ksnr_blki_conn_count <= conn_cb->ksnr_blko_conn_count) { type = SOCKLND_CONN_BULK_IN; } else { + if ((wanted & BIT(SOCKLND_CONN_BULK_OUT)) == 0) { + CERROR("Time %lld Peer %s (c:a:la:ct)(%d:%d:%lld:%lld)\n", + ktime_get_seconds(), + libcfs_nid2str(peer_ni->ksnp_id.nid), + peer_ni->ksnp_closing, + peer_ni->ksnp_accepting, + peer_ni->ksnp_last_alive, + peer_ni->ksnp_t_created); + CERROR("Conn_cb %p IP %pI4h (sh:cg:cd:de:cc:ct:mr)(%u,%u,%u:%u:%d:%lld:%u)\n", + conn_cb, + &conn_cb->ksnr_ipaddr, + conn_cb->ksnr_scheduled, + conn_cb->ksnr_connecting, + conn_cb->ksnr_connected, + conn_cb->ksnr_deleted, + conn_cb->ksnr_conn_count, + conn_cb->ksnr_t_last_conn, + conn_cb->ksnr_max_retries); + } LASSERT ((wanted & BIT(SOCKLND_CONN_BULK_OUT)) != 0); type = SOCKLND_CONN_BULK_OUT; } @@ -1991,6 +2010,7 @@ ksocknal_connect(struct ksock_conn_cb *conn_cb) if (retry_later) CDEBUG(D_NET, "peer_ni %s: conn race, retry later. rc %d\n", libcfs_nid2str(peer_ni->ksnp_id.nid), rc); + conn_cb->ksnr_t_last_conn = ktime_get_seconds(); write_lock_bh(&ksocknal_data.ksnd_global_lock); } @@ -2004,6 +2024,7 @@ ksocknal_connect(struct ksock_conn_cb *conn_cb) * the peer doesn't support as many connections as we want */ conn_cb->ksnr_connected |= BIT(type); + conn_cb->ksnr_max_retries |= BIT(type); retry_later = false; } -- 1.8.3.1