From 6d68c06ac7971659342c4469ee68adeda81caef5 Mon Sep 17 00:00:00 2001 From: Serguei Smirnov Date: Wed, 3 Feb 2021 20:35:00 -0500 Subject: [PATCH] LU-12815 socklnd: add conns_per_peer parameter Introduce conns_per_peer ksocklnd module parameter. In typed mode, this parameter shall control the number of BULK_IN and BULK_OUT tcp connections, while the number of CONTROL connections shall stay at 1. In untyped mode, this parameter shall control the number of untyped connections. The default conns_per_peer is 1. Max is 127. Performance scaling on 100GbE: conns_per_peer speed 1 1.7GiB/s 2 3.3GiB/s 4 6.4GiB/s 8 11.5GiB/s Lustre-change: https://review.whamcloud.com/41056 Lustre-commit: 71b2476e4ddb95aa42f4a0ea3f23b1826017bfa5 Test-Parameters: trivial testlist=sanity-lnet Signed-off-by: Serguei Smirnov Change-Id: I1f4ef22141882224e14e18c2526554dcfa69c871 Reviewed-on: https://review.whamcloud.com/41411 Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo --- lnet/klnds/socklnd/socklnd.c | 88 ++++++++++++++++++++++++++++++++-- lnet/klnds/socklnd/socklnd.h | 15 ++++++ lnet/klnds/socklnd/socklnd_cb.c | 9 ++-- lnet/klnds/socklnd/socklnd_modparams.c | 9 ++++ 4 files changed, 114 insertions(+), 7 deletions(-) diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index b713747..53b8367 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -81,6 +81,9 @@ ksocknal_create_route(__u32 ipaddr, int port) route->ksnr_deleted = 0; route->ksnr_conn_count = 0; route->ksnr_share_count = 0; + route->ksnr_ctrl_conn_count = 0; + route->ksnr_blki_conn_count = 0; + route->ksnr_blko_conn_count = 0; return (route); } @@ -318,6 +321,75 @@ out: return rc; } +static unsigned int +ksocknal_get_conn_count_by_type(struct ksock_route *route, + int type) +{ + unsigned int count = 0; + + switch (type) { + case SOCKLND_CONN_CONTROL: + count = route->ksnr_ctrl_conn_count; + break; + case SOCKLND_CONN_BULK_IN: + count = route->ksnr_blki_conn_count; + break; + case SOCKLND_CONN_BULK_OUT: + count = route->ksnr_blko_conn_count; + break; + case SOCKLND_CONN_ANY: + count = route->ksnr_conn_count; + break; + default: + LBUG(); + break; + } + + return count; +} + +static void +ksocknal_incr_conn_count(struct ksock_route *route, + int type) +{ + route->ksnr_conn_count++; + + /* check if all connections of the given type got created */ + switch (type) { + case SOCKLND_CONN_CONTROL: + route->ksnr_ctrl_conn_count++; + /* there's a single control connection per peer */ + route->ksnr_connected |= BIT(type); + break; + case SOCKLND_CONN_BULK_IN: + route->ksnr_blki_conn_count++; + if (route->ksnr_blki_conn_count >= + *ksocknal_tunables.ksnd_conns_per_peer) + route->ksnr_connected |= BIT(type); + break; + case SOCKLND_CONN_BULK_OUT: + route->ksnr_blko_conn_count++; + if (route->ksnr_blko_conn_count >= + *ksocknal_tunables.ksnd_conns_per_peer) + route->ksnr_connected |= BIT(type); + break; + case SOCKLND_CONN_ANY: + if (route->ksnr_conn_count >= + *ksocknal_tunables.ksnd_conns_per_peer) + route->ksnr_connected |= BIT(type); + break; + default: + LBUG(); + break; + + } + + CDEBUG(D_NET, "Add conn type %d, ksnr_connected %x conns_per_peer %d\n", + type, + route->ksnr_connected, + *ksocknal_tunables.ksnd_conns_per_peer); +} + static void ksocknal_associate_route_conn_locked(struct ksock_route *route, struct ksock_conn *conn) { @@ -354,8 +426,7 @@ ksocknal_associate_route_conn_locked(struct ksock_route *route, struct ksock_con iface->ksni_nroutes++; } - route->ksnr_connected |= (1<ksnr_conn_count++; + ksocknal_incr_conn_count(route, type); /* Successful connection => further attempts can * proceed immediately */ @@ -1032,6 +1103,7 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route, int rc; int rc2; int active; + int num_dup = 0; char *warn = NULL; active = (route != NULL); @@ -1217,6 +1289,10 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route, conn2->ksnc_type != conn->ksnc_type) continue; + num_dup++; + if (num_dup < *ksocknal_tunables.ksnd_conns_per_peer) + continue; + /* Reply on a passive connection attempt so the peer_ni * realises we're connected. */ LASSERT (rc == 0); @@ -1437,8 +1513,14 @@ ksocknal_close_conn_locked(struct ksock_conn *conn, int error) if (route != NULL) { /* dissociate conn from route... */ LASSERT(!route->ksnr_deleted); - LASSERT((route->ksnr_connected & (1 << conn->ksnc_type)) != 0); + /* connected bit is set only if all connections + * of the given type got created + */ + if (ksocknal_get_conn_count_by_type(route, conn->ksnc_type) == + *ksocknal_tunables.ksnd_conns_per_peer) + LASSERT((route->ksnr_connected & + BIT(conn->ksnc_type)) != 0); conn2 = NULL; list_for_each(tmp, &peer_ni->ksnp_conns) { conn2 = list_entry(tmp, struct ksock_conn, ksnc_list); diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index 9f198dd..80d9952 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -162,6 +162,11 @@ struct ksock_tunables { #if SOCKNAL_VERSION_DEBUG int *ksnd_protocol; /* protocol version */ #endif + int *ksnd_conns_per_peer; /* for typed mode, yields: + * 1 + 2*conns_per_peer total + * for untyped: + * conns_per_peer total + */ }; struct ksock_net { @@ -371,6 +376,8 @@ struct ksock_conn { time64_t ksnc_tx_last_post; }; +#define SOCKNAL_CONN_COUNT_MAX_BITS 8 /* max conn count bits */ + struct ksock_route { struct list_head ksnr_list; /* chain on peer_ni route list */ struct list_head ksnr_connd_list; /* chain on ksnr_connd_routes */ @@ -385,6 +392,9 @@ struct ksock_route { unsigned int ksnr_connecting:1;/* connection establishment in progress */ unsigned int ksnr_connected:4; /* connections established by type */ unsigned int ksnr_deleted:1; /* been removed from peer_ni? */ + unsigned int ksnr_ctrl_conn_count:1; /* # conns by type */ + unsigned int ksnr_blki_conn_count:8; + unsigned int ksnr_blko_conn_count:8; unsigned int ksnr_share_count; /* created explicitly? */ int ksnr_conn_count; /* # conns established by this route */ }; @@ -589,6 +599,11 @@ ksocknal_peer_decref(struct ksock_peer_ni *peer_ni) ksocknal_destroy_peer(peer_ni); } +static inline int ksocknal_conns_per_peer(void) +{ + return *ksocknal_tunables.ksnd_conns_per_peer ?: 1; +} + int ksocknal_startup(struct lnet_ni *ni); void ksocknal_shutdown(struct lnet_ni *ni); int ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg); diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index 1da3fe5..cae7aec 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -1973,14 +1973,15 @@ ksocknal_connect(struct ksock_route *route) if (retry_later) /* needs reschedule */ break; - if ((wanted & (1 << SOCKLND_CONN_ANY)) != 0) { + if ((wanted & BIT(SOCKLND_CONN_ANY)) != 0) { type = SOCKLND_CONN_ANY; - } else if ((wanted & (1 << SOCKLND_CONN_CONTROL)) != 0) { + } else if ((wanted & BIT(SOCKLND_CONN_CONTROL)) != 0) { type = SOCKLND_CONN_CONTROL; - } else if ((wanted & (1 << SOCKLND_CONN_BULK_IN)) != 0) { + } else if ((wanted & BIT(SOCKLND_CONN_BULK_IN)) != 0 && + route->ksnr_blki_conn_count <= route->ksnr_blko_conn_count) { type = SOCKLND_CONN_BULK_IN; } else { - LASSERT ((wanted & (1 << SOCKLND_CONN_BULK_OUT)) != 0); + LASSERT ((wanted & BIT(SOCKLND_CONN_BULK_OUT)) != 0); type = SOCKLND_CONN_BULK_OUT; } diff --git a/lnet/klnds/socklnd/socklnd_modparams.c b/lnet/klnds/socklnd/socklnd_modparams.c index df9d96e..98cedb8 100644 --- a/lnet/klnds/socklnd/socklnd_modparams.c +++ b/lnet/klnds/socklnd/socklnd_modparams.c @@ -139,6 +139,10 @@ static unsigned int zc_recv_min_nfrags = 16; module_param(zc_recv_min_nfrags, int, 0644); MODULE_PARM_DESC(zc_recv_min_nfrags, "minimum # of fragments to enable ZC recv"); +static unsigned int conns_per_peer = 1; +module_param(conns_per_peer, uint, 0444); +MODULE_PARM_DESC(conns_per_peer, "number of connections per peer"); + #ifdef SOCKNAL_BACKOFF static int backoff_init = 3; module_param(backoff_init, int, 0644); @@ -188,6 +192,11 @@ int ksocknal_tunables_init(void) ksocknal_tunables.ksnd_zc_min_payload = &zc_min_payload; ksocknal_tunables.ksnd_zc_recv = &zc_recv; ksocknal_tunables.ksnd_zc_recv_min_nfrags = &zc_recv_min_nfrags; + ksocknal_tunables.ksnd_conns_per_peer = &conns_per_peer; + if (conns_per_peer > ((1 << SOCKNAL_CONN_COUNT_MAX_BITS) - 1)) + CWARN("socklnd conns_per_peer is capped at %u.\n", + (1 << SOCKNAL_CONN_COUNT_MAX_BITS) - 1); + ksocknal_tunables.ksnd_conns_per_peer = &conns_per_peer; #ifdef CPU_AFFINITY if (enable_irq_affinity) { -- 1.8.3.1