From 2d3036857edade4b73ccaaf20d5eb8cfbc91da93 Mon Sep 17 00:00:00 2001 From: Mikhail Pershin Date: Wed, 27 Nov 2024 12:11:13 +0300 Subject: [PATCH] LU-17906 ptlrpc: reduce time for connection switch If connection peer is not ready then reduce request timeout for it to 1s, switching all imports connections faster until some is ready If connection status become ready after not-ready, try it first to connect Signed-off-by: Mikhail Pershin Change-Id: I87f2359f3a767ea9e52ce9da4cd5cf9b42b56320 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/57234 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Serguei Smirnov Reviewed-by: Oleg Drokin Reviewed-by: Andreas Dilger --- lustre/ptlrpc/import.c | 12 +++++++++++- lustre/ptlrpc/niobuf.c | 2 ++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 0f8ae7d..c8a5206 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -517,12 +517,23 @@ static int import_select_connection(struct obd_import *imp) } list_for_each_entry(conn, &imp->imp_conn_list, oic_item) { + int old_status = conn->oic_uptodate; + CDEBUG(D_HA, "%s: connect to NID %s last attempt %lld\n", imp->imp_obd->obd_name, libcfs_nidstr(&conn->oic_conn->c_peer.nid), conn->oic_last_attempt); conn->oic_uptodate = LNetPeerDiscovered(&conn->oic_conn->c_peer.nid); + /* connection status is changed to good state, try it like + * this is first attempt + */ + if (old_status <= 0 && conn->oic_uptodate > 0) { + lru_conn = imp_conn = conn; + tried_all = false; + break; + } + /* LNET ping failed, skip peer completely */ if (conn->oic_uptodate == -EHOSTUNREACH) { CDEBUG(D_HA, "%s: skip NID %s as unreachable\n", @@ -530,7 +541,6 @@ static int import_select_connection(struct obd_import *imp) libcfs_nidstr(&conn->oic_conn->c_peer.nid)); continue; } - /* track least recently used conn for fallback */ if (!lru_conn || lru_conn->oic_last_attempt > conn->oic_last_attempt) diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index 13f57e9..ce338cc 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -769,6 +769,8 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) imp->imp_state == LUSTRE_IMP_CONNECTING) { spin_unlock(&imp->imp_lock); request->rq_sent = ktime_get_real_seconds(); + request->rq_timeout = 1; + request->rq_deadline = request->rq_sent + 1; RETURN(0); } spin_unlock(&imp->imp_lock); -- 1.8.3.1