From 5c2a1267f94715ca5e1e80bcb4d8fdbc51d4b0dd Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Sat, 2 May 2020 10:18:42 -0500 Subject: [PATCH] LU-13510 lnd: Allow independent socklnd timeout Allow the socklnd timeout to be set independent of lnet_transaction_timeout and retry_count. Signed-off-by: Chris Horn Change-Id: Iaa76e77990c8c5ce79193ae8d1f7b3a7db6b433f Reviewed-on: https://review.whamcloud.com/38460 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Serguei Smirnov Reviewed-by: Amir Shehata Reviewed-by: Oleg Drokin --- lnet/klnds/socklnd/socklnd.c | 4 ++-- lnet/klnds/socklnd/socklnd.h | 7 +++++++ lnet/klnds/socklnd/socklnd_cb.c | 18 +++++++++--------- lnet/klnds/socklnd/socklnd_modparams.c | 2 +- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index b50b8b3..d750eaa 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -1311,7 +1311,7 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route, /* Set the deadline for the outgoing HELLO to drain */ conn->ksnc_tx_bufnob = sock->sk->sk_wmem_queued; conn->ksnc_tx_deadline = ktime_get_seconds() + - lnet_get_lnd_timeout(); + ksocknal_timeout(); smp_mb(); /* order with adding to peer_ni's conn list */ list_add(&conn->ksnc_list, &peer_ni->ksnp_conns); @@ -1688,7 +1688,7 @@ ksocknal_destroy_conn(struct ksock_conn *conn) switch (conn->ksnc_rx_state) { case SOCKNAL_RX_LNET_PAYLOAD: last_rcv = conn->ksnc_rx_deadline - - lnet_get_lnd_timeout(); + ksocknal_timeout(); CERROR("Completing partial receive from %s[%d], " "ip %pI4h:%d, with error, wanted: %d, left: %d, " "last alive is %lld secs ago\n", diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index 59127c4..950c756 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -567,6 +567,13 @@ ksocknal_peer_decref(struct ksock_peer_ni *peer_ni) ksocknal_destroy_peer(peer_ni); } +static inline int ksocknal_timeout(void) +{ + return *ksocknal_tunables.ksnd_timeout ? + *ksocknal_tunables.ksnd_timeout : + lnet_get_lnd_timeout(); +} + int ksocknal_startup(struct lnet_ni *ni); void ksocknal_shutdown(struct lnet_ni *ni); int ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg); diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index 9641431..aa9f747 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -222,7 +222,7 @@ ksocknal_transmit(struct ksock_conn *conn, struct ksock_tx *tx, /* allocated send buffer bytes < computed; infer * something got ACKed */ conn->ksnc_tx_deadline = ktime_get_seconds() + - lnet_get_lnd_timeout(); + ksocknal_timeout(); conn->ksnc_peer->ksnp_last_alive = ktime_get_seconds(); conn->ksnc_tx_bufnob = bufnob; smp_mb(); @@ -271,7 +271,7 @@ ksocknal_recv_iov(struct ksock_conn *conn, struct kvec *scratchiov) conn->ksnc_peer->ksnp_last_alive = ktime_get_seconds(); conn->ksnc_rx_deadline = ktime_get_seconds() + - lnet_get_lnd_timeout(); + ksocknal_timeout(); smp_mb(); /* order with setting rx_started */ conn->ksnc_rx_started = 1; @@ -316,7 +316,7 @@ ksocknal_recv_kiov(struct ksock_conn *conn, struct page **rx_scratch_pgs, conn->ksnc_peer->ksnp_last_alive = ktime_get_seconds(); conn->ksnc_rx_deadline = ktime_get_seconds() + - lnet_get_lnd_timeout(); + ksocknal_timeout(); smp_mb(); /* order with setting rx_started */ conn->ksnc_rx_started = 1; @@ -490,7 +490,7 @@ ksocknal_check_zc_req(struct ksock_tx *tx) /* ZC_REQ is going to be pinned to the peer_ni */ tx->tx_deadline = ktime_get_seconds() + - lnet_get_lnd_timeout(); + ksocknal_timeout(); LASSERT (tx->tx_msg.ksm_zc_cookies[0] == 0); @@ -784,7 +784,7 @@ ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn) if (list_empty(&conn->ksnc_tx_queue) && bufnob == 0) { /* First packet starts the timeout */ conn->ksnc_tx_deadline = ktime_get_seconds() + - lnet_get_lnd_timeout(); + ksocknal_timeout(); if (conn->ksnc_tx_bufnob > 0) /* something got ACKed */ conn->ksnc_peer->ksnp_last_alive = ktime_get_seconds(); conn->ksnc_tx_bufnob = 0; @@ -961,7 +961,7 @@ ksocknal_launch_packet(struct lnet_ni *ni, struct ksock_tx *tx, ksocknal_find_connecting_route_locked (peer_ni) != NULL) { /* the message is going to be pinned to the peer_ni */ tx->tx_deadline = ktime_get_seconds() + - lnet_get_lnd_timeout(); + ksocknal_timeout(); /* Queue the message until a connection is established */ list_add_tail(&tx->tx_list, &peer_ni->ksnp_tx_queue); @@ -1782,7 +1782,7 @@ ksocknal_recv_hello(struct lnet_ni *ni, struct ksock_conn *conn, /* socket type set on active connections - not set on passive */ LASSERT(!active == !(conn->ksnc_type != SOCKLND_CONN_NONE)); - timeout = active ? lnet_get_lnd_timeout() : + timeout = active ? ksocknal_timeout() : lnet_acceptor_timeout(); rc = lnet_sock_read(sock, &hello->kshm_magic, @@ -1917,7 +1917,7 @@ ksocknal_connect(struct ksock_route *route) int retry_later = 0; int rc = 0; - deadline = ktime_get_seconds() + lnet_get_lnd_timeout(); + deadline = ktime_get_seconds() + ksocknal_timeout(); write_lock_bh(&ksocknal_data.ksnd_global_lock); @@ -2662,7 +2662,7 @@ int ksocknal_reaper(void *arg) * times the timeout interval. */ - lnd_timeout = lnet_get_lnd_timeout(); + lnd_timeout = ksocknal_timeout(); if (lnd_timeout > n * p) chunk = (chunk * n * p) / lnd_timeout; if (chunk == 0) diff --git a/lnet/klnds/socklnd/socklnd_modparams.c b/lnet/klnds/socklnd/socklnd_modparams.c index 737cb88..fb8b95c 100644 --- a/lnet/klnds/socklnd/socklnd_modparams.c +++ b/lnet/klnds/socklnd/socklnd_modparams.c @@ -26,7 +26,7 @@ #include #endif -static int sock_timeout = 50; +static int sock_timeout; module_param(sock_timeout, int, 0644); MODULE_PARM_DESC(sock_timeout, "dead socket timeout (seconds)"); -- 1.8.3.1