From 71b2476e4ddb95aa42f4a0ea3f23b1826017bfa5 Mon Sep 17 00:00:00 2001 From: Serguei Smirnov Date: Tue, 30 Mar 2021 12:58:57 -0400 Subject: [PATCH] LU-12815 socklnd: add conns_per_peer parameter Introduce conns_per_peer ksocklnd module parameter. In typed mode, this parameter shall control the number of BULK_IN and BULK_OUT tcp connections, while the number of CONTROL connections shall stay at 1. In untyped mode, this parameter shall control the number of untyped connections. The default conns_per_peer is 1. Max is 127. Test-Parameters: trivial testlist=sanity-lnet Signed-off-by: Serguei Smirnov Change-Id: I70bbaf7899ae1fbc41de34553c8c4ad1c7d55f7e Reviewed-on: https://review.whamcloud.com/41056 Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Chris Horn Reviewed-by: Oleg Drokin --- lnet/klnds/socklnd/socklnd.c | 161 +++++++++++++++++++++++++-------- lnet/klnds/socklnd/socklnd.h | 22 ++++- lnet/klnds/socklnd/socklnd_cb.c | 3 +- lnet/klnds/socklnd/socklnd_modparams.c | 9 ++ 4 files changed, 150 insertions(+), 45 deletions(-) diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index a0aef2d..cdc15e9 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -131,6 +131,9 @@ ksocknal_create_conn_cb(struct sockaddr *addr) conn_cb->ksnr_connected = 0; conn_cb->ksnr_deleted = 0; conn_cb->ksnr_conn_count = 0; + conn_cb->ksnr_ctrl_conn_count = 0; + conn_cb->ksnr_blki_conn_count = 0; + conn_cb->ksnr_blko_conn_count = 0; return conn_cb; } @@ -363,6 +366,73 @@ out: return rc; } +static unsigned int +ksocknal_get_conn_count_by_type(struct ksock_conn_cb *conn_cb, + int type) +{ + unsigned int count = 0; + + switch (type) { + case SOCKLND_CONN_CONTROL: + count = conn_cb->ksnr_ctrl_conn_count; + break; + case SOCKLND_CONN_BULK_IN: + count = conn_cb->ksnr_blki_conn_count; + break; + case SOCKLND_CONN_BULK_OUT: + count = conn_cb->ksnr_blko_conn_count; + break; + case SOCKLND_CONN_ANY: + count = conn_cb->ksnr_conn_count; + break; + default: + LBUG(); + break; + } + + return count; +} + +static void +ksocknal_incr_conn_count(struct ksock_conn_cb *conn_cb, + int type) +{ + conn_cb->ksnr_conn_count++; + + /* check if all connections of the given type got created */ + switch (type) { + case SOCKLND_CONN_CONTROL: + conn_cb->ksnr_ctrl_conn_count++; + /* there's a single control connection per peer */ + conn_cb->ksnr_connected |= BIT(type); + break; + case SOCKLND_CONN_BULK_IN: + conn_cb->ksnr_blki_conn_count++; + if (conn_cb->ksnr_blki_conn_count >= + *ksocknal_tunables.ksnd_conns_per_peer) + conn_cb->ksnr_connected |= BIT(type); + break; + case SOCKLND_CONN_BULK_OUT: + conn_cb->ksnr_blko_conn_count++; + if (conn_cb->ksnr_blko_conn_count >= + *ksocknal_tunables.ksnd_conns_per_peer) + conn_cb->ksnr_connected |= BIT(type); + break; + case SOCKLND_CONN_ANY: + if (conn_cb->ksnr_conn_count >= + *ksocknal_tunables.ksnd_conns_per_peer) + conn_cb->ksnr_connected |= BIT(type); + break; + default: + LBUG(); + break; + + } + + CDEBUG(D_NET, "Add conn type %d, ksnr_connected %x conns_per_peer %d\n", + type, conn_cb->ksnr_connected, *ksocknal_tunables.ksnd_conns_per_peer); +} + static void ksocknal_associate_cb_conn_locked(struct ksock_conn_cb *conn_cb, struct ksock_conn *conn) @@ -404,8 +474,7 @@ ksocknal_associate_cb_conn_locked(struct ksock_conn_cb *conn_cb, iface->ksni_nroutes++; } - conn_cb->ksnr_connected |= (1<ksnr_conn_count++; + ksocknal_incr_conn_count(conn_cb, type); /* Successful connection => further attempts can * proceed immediately @@ -727,6 +796,7 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_conn_cb *conn_cb, int rc; int rc2; int active; + int num_dup = 0; char *warn = NULL; active = (conn_cb != NULL); @@ -845,9 +915,9 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_conn_cb *conn_cb, peer_ni = peer2; } - /* +1 ref for me */ - ksocknal_peer_addref(peer_ni); - peer_ni->ksnp_accepting++; + /* +1 ref for me */ + ksocknal_peer_addref(peer_ni); + peer_ni->ksnp_accepting++; /* Am I already connecting to this guy? Resolve in * favour of higher NID... @@ -859,14 +929,14 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_conn_cb *conn_cb, warn = "connection race resolution"; goto failed_2; } - } + } - if (peer_ni->ksnp_closing || + if (peer_ni->ksnp_closing || (active && conn_cb->ksnr_deleted)) { /* peer_ni/conn_cb got closed under me */ - rc = -ESTALE; + rc = -ESTALE; warn = "peer_ni/conn_cb removed"; - goto failed_2; + goto failed_2; } if (peer_ni->ksnp_proto == NULL) { @@ -893,18 +963,18 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_conn_cb *conn_cb, goto failed_2; } - switch (rc) { - default: - LBUG(); - case 0: - break; - case EALREADY: - warn = "lost conn race"; - goto failed_2; - case EPROTO: - warn = "retry with different protocol version"; - goto failed_2; - } + switch (rc) { + default: + LBUG(); + case 0: + break; + case EALREADY: + warn = "lost conn race"; + goto failed_2; + case EPROTO: + warn = "retry with different protocol version"; + goto failed_2; + } /* Refuse to duplicate an existing connection, unless this is a * loopback connection */ @@ -922,29 +992,33 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_conn_cb *conn_cb, conn2->ksnc_type != conn->ksnc_type) continue; - /* Reply on a passive connection attempt so the peer_ni - * realises we're connected. */ - LASSERT (rc == 0); - if (!active) - rc = EALREADY; + num_dup++; + if (num_dup < *ksocknal_tunables.ksnd_conns_per_peer) + continue; - warn = "duplicate"; - goto failed_2; - } - } + /* Reply on a passive connection attempt so the peer_ni + * realises we're connected. + */ + LASSERT(rc == 0); + if (!active) + rc = EALREADY; - /* If the connection created by this route didn't bind to the IP - * address the route connected to, the connection/route matching + warn = "duplicate"; + goto failed_2; + } + } + /* If the connection created by this route didn't bind to the IP + * address the route connected to, the connection/route matching * code below probably isn't going to work. */ - if (active && + if (active && !rpc_cmp_addr((struct sockaddr *)&conn_cb->ksnr_addr, (struct sockaddr *)&conn->ksnc_peeraddr)) { CERROR("Route %s %pIS connected to %pIS\n", libcfs_id2str(peer_ni->ksnp_id), &conn_cb->ksnr_addr, &conn->ksnc_peeraddr); - } + } /* Search for a conn_cb corresponding to the new connection and * create an association. This allows incoming connections created @@ -1017,8 +1091,8 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_conn_cb *conn_cb, if (!active) { hello->kshm_nips = 0; - rc = ksocknal_send_hello(ni, conn, peerid.nid, hello); - } + rc = ksocknal_send_hello(ni, conn, peerid.nid, hello); + } LIBCFS_FREE(hello, offsetof(struct ksock_hello_msg, kshm_ips[LNET_INTERFACES_NUM])); @@ -1139,7 +1213,14 @@ ksocknal_close_conn_locked(struct ksock_conn *conn, int error) if (conn_cb != NULL) { /* dissociate conn from cb... */ LASSERT(!conn_cb->ksnr_deleted); - LASSERT((conn_cb->ksnr_connected & BIT(conn->ksnc_type)) != 0); + + /* connected bit is set only if all connections + * of the given type got created + */ + if (ksocknal_get_conn_count_by_type(conn_cb, conn->ksnc_type) == + *ksocknal_tunables.ksnd_conns_per_peer) + LASSERT((conn_cb->ksnr_connected & + BIT(conn->ksnc_type)) != 0); conn2 = NULL; list_for_each(tmp, &peer_ni->ksnp_conns) { @@ -1606,9 +1687,9 @@ ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg) read_lock(&ksocknal_data.ksnd_global_lock); if (data->ioc_count >= 1) { - rc = -ENOENT; - } else { - rc = 0; + rc = -ENOENT; + } else { + rc = 0; iface = &net->ksnn_interface; sa = (void *)&iface->ksni_addr; diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index 49055d0..08bbab9 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -156,6 +156,11 @@ struct ksock_tunables { #if SOCKNAL_VERSION_DEBUG int *ksnd_protocol; /* protocol version */ #endif + int *ksnd_conns_per_peer; /* for typed mode, yields: + * 1 + 2*conns_per_peer total + * for untyped: + * conns_per_peer total + */ }; struct ksock_net { @@ -357,6 +362,8 @@ struct ksock_conn { time64_t ksnc_tx_last_post; }; +#define SOCKNAL_CONN_COUNT_MAX_BITS 8 /* max conn count bits */ + struct ksock_conn_cb { struct list_head ksnr_connd_list;/* chain on ksnr_connd_routes */ struct ksock_peer_ni *ksnr_peer; /* owning peer_ni */ @@ -371,7 +378,11 @@ struct ksock_conn_cb { unsigned int ksnr_connecting:1;/* connection in progress */ unsigned int ksnr_connected:4;/* connections by type */ unsigned int ksnr_deleted:1; /* been removed from peer_ni? */ - int ksnr_conn_count;/* # conns for this route */ + unsigned int ksnr_ctrl_conn_count:1; /* # conns by type */ + unsigned int ksnr_blki_conn_count:8; + unsigned int ksnr_blko_conn_count:8; + int ksnr_conn_count;/* total # conns for this cb */ + }; #define SOCKNAL_KEEPALIVE_PING 1 /* cookie for keepalive ping */ @@ -557,9 +568,12 @@ ksocknal_peer_decref(struct ksock_peer_ni *peer_ni) static inline int ksocknal_timeout(void) { - return *ksocknal_tunables.ksnd_timeout ? - *ksocknal_tunables.ksnd_timeout : - lnet_get_lnd_timeout(); + return *ksocknal_tunables.ksnd_timeout ?: lnet_get_lnd_timeout(); +} + +static inline int ksocknal_conns_per_peer(void) +{ + return *ksocknal_tunables.ksnd_conns_per_peer ?: 1; } int ksocknal_startup(struct lnet_ni *ni); diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index 502d127..a0c79d2 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -1943,7 +1943,8 @@ ksocknal_connect(struct ksock_conn_cb *conn_cb) type = SOCKLND_CONN_ANY; } else if ((wanted & BIT(SOCKLND_CONN_CONTROL)) != 0) { type = SOCKLND_CONN_CONTROL; - } else if ((wanted & BIT(SOCKLND_CONN_BULK_IN)) != 0) { + } else if ((wanted & BIT(SOCKLND_CONN_BULK_IN)) != 0 && + conn_cb->ksnr_blki_conn_count <= conn_cb->ksnr_blko_conn_count) { type = SOCKLND_CONN_BULK_IN; } else { LASSERT ((wanted & BIT(SOCKLND_CONN_BULK_OUT)) != 0); diff --git a/lnet/klnds/socklnd/socklnd_modparams.c b/lnet/klnds/socklnd/socklnd_modparams.c index 2204280..6d274a0 100644 --- a/lnet/klnds/socklnd/socklnd_modparams.c +++ b/lnet/klnds/socklnd/socklnd_modparams.c @@ -142,6 +142,10 @@ static unsigned int zc_recv_min_nfrags = 16; module_param(zc_recv_min_nfrags, int, 0644); MODULE_PARM_DESC(zc_recv_min_nfrags, "minimum # of fragments to enable ZC recv"); +static unsigned int conns_per_peer = 1; +module_param(conns_per_peer, uint, 0444); +MODULE_PARM_DESC(conns_per_peer, "number of connections per peer"); + #ifdef SOCKNAL_BACKOFF static int backoff_init = 3; module_param(backoff_init, int, 0644); @@ -201,6 +205,11 @@ int ksocknal_tunables_init(void) ksocknal_tunables.ksnd_zc_min_payload = &zc_min_payload; ksocknal_tunables.ksnd_zc_recv = &zc_recv; ksocknal_tunables.ksnd_zc_recv_min_nfrags = &zc_recv_min_nfrags; + if (conns_per_peer > ((1 << SOCKNAL_CONN_COUNT_MAX_BITS)-1)) { + CWARN("socklnd conns_per_peer is capped at %u.\n", + (1 << SOCKNAL_CONN_COUNT_MAX_BITS)-1); + } + ksocknal_tunables.ksnd_conns_per_peer = &conns_per_peer; if (enable_irq_affinity) { CWARN("irq_affinity is removed from socklnd because modern " -- 1.8.3.1