From 34b2246e4a6c8ce827c404cb4e52f7c6a0a1b90b Mon Sep 17 00:00:00 2001 From: Vitaly Fertman Date: Fri, 28 Aug 2020 22:17:58 +0300 Subject: [PATCH] LU-16062 ldlm: improve bl_timeout for prolong If there is a client's RPC in hand, we can do a better job for calculating the lock callback timeout as RPC has the info what client thinks about this RPC timeout. Let's use it. HPE-bug-id: LUS-8866, LUS-11074 Signed-off-by: Vitaly Fertman Change-Id: Ibd67d37c1073d0d3cb2e08b532c801af0de116fe Reviewed-on: https://es-gerrit.dev.cray.com/157782 Reviewed-by: Andriy Skulysh Reviewed-by: Alexey Lyashkov Tested-by: Jenkins Build User Reviewed-on: https://review.whamcloud.com/48094 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Alexey Lyashkov Reviewed-by: Andriy Skulysh Reviewed-by: Oleg Drokin --- lustre/include/lustre_dlm.h | 3 ++- lustre/include/lustre_net.h | 24 ----------------------- lustre/ldlm/ldlm_extent.c | 7 ++----- lustre/ldlm/ldlm_lockd.c | 48 +++++++++++++++++++++++++++++++++++++++++++++ lustre/mdt/mdt_io.c | 2 +- lustre/ofd/ofd_dev.c | 2 +- 6 files changed, 54 insertions(+), 32 deletions(-) diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index 8fc8127..0b0fd66 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -1395,10 +1395,10 @@ __u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms); struct ldlm_prolong_args { struct obd_export *lpa_export; + struct ptlrpc_request *lpa_req; struct ldlm_res_id lpa_resid; struct ldlm_extent lpa_extent; enum ldlm_mode lpa_mode; - timeout_t lpa_timeout; int lpa_locks_cnt; int lpa_blocks_cnt; }; @@ -1445,6 +1445,7 @@ int ldlm_request_cancel(struct ptlrpc_request *req, void ldlm_revoke_export_locks(struct obd_export *exp); timeout_t ldlm_bl_timeout(struct ldlm_lock *lock); +timeout_t ldlm_bl_timeout_by_rpc(struct ptlrpc_request *req); #endif int ldlm_del_waiting_lock(struct ldlm_lock *lock); int ldlm_refresh_waiting_lock(struct ldlm_lock *lock, timeout_t timeout); diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 044caa6..897302c 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -2548,30 +2548,6 @@ ptlrpc_server_get_timeout(struct ptlrpc_service_part *svcpt) max_t(int, at, obd_timeout); } -/** - * Calculate the amount of time for lock prolongation. - * - * This is helper function to get the timeout extra time. - * - * @req current request - * - * Return: amount of time to extend the timeout with - */ -static inline timeout_t prolong_timeout(struct ptlrpc_request *req) -{ - struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt; - timeout_t req_timeout = 0; - - if (AT_OFF) - return obd_timeout / 2; - - if (req->rq_deadline > req->rq_arrival_time.tv_sec) - req_timeout = req->rq_deadline - req->rq_arrival_time.tv_sec; - - return max(req_timeout, - at_est2timeout(at_get(&svcpt->scp_at_estimate))); -} - static inline struct ptlrpc_service * ptlrpc_req2svc(struct ptlrpc_request *req) { diff --git a/lustre/ldlm/ldlm_extent.c b/lustre/ldlm/ldlm_extent.c index ff40ece..4209310 100644 --- a/lustre/ldlm/ldlm_extent.c +++ b/lustre/ldlm/ldlm_extent.c @@ -651,16 +651,13 @@ void ldlm_lock_prolong_one(struct ldlm_lock *lock, /* ignore locks not being cancelled */ return; - /* We are in the middle of the process - BL AST is sent, CANCEL - * is ahead. Take half of BL AT + IO AT process time. - */ - timeout = arg->lpa_timeout + (ldlm_bl_timeout(lock) >> 1); - arg->lpa_blocks_cnt++; /* OK. this is a possible lock the user holds doing I/O * let's refresh eviction timer for it. */ + timeout = ldlm_bl_timeout_by_rpc(arg->lpa_req); + LDLM_DEBUG(lock, "refreshed to %ds.\n", timeout); ldlm_refresh_waiting_lock(lock, timeout); } EXPORT_SYMBOL(ldlm_lock_prolong_one); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index f54b788..a5a49f9 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -648,6 +648,54 @@ timeout_t ldlm_bl_timeout(struct ldlm_lock *lock) EXPORT_SYMBOL(ldlm_bl_timeout); /** + * Calculate the per-export Blocking timeout by the given RPC (covering the + * reply to this RPC and the next RPC). The next RPC could be still not CANCEL, + * but having the lock refresh mechanism it is enough. + * + * Used for lock refresh timeout when we are in the middle of the process - + * BL AST is sent, CANCEL is ahead - it is still 1 reply for the current RPC + * and at least 1 RPC (which will trigger another refresh if it will be not + * CANCEL) - but more accurate than ldlm_bl_timeout as the timeout is taken + * from the RPC (i.e. the view of the client on the current AT) is taken into + * account. + * + * \param[in] req req which export needs the timeout calculation + * + * \retval timeout in seconds to wait for the next client's RPC + */ +timeout_t ldlm_bl_timeout_by_rpc(struct ptlrpc_request *req) +{ + struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt; + timeout_t timeout, req_timeout, at_timeout, netl; + + if (AT_OFF) + return obd_timeout / 2; + + /* A blocked lock means somebody in the cluster is waiting, and we + * should not consider the worst ever case, consisting of a chain of + * failures on each step, however this timeout should survive a + * recovery of at least 1 failure, let this one to be the worst one: + * in case a server NID is dead first re-connect is done through the + * same router and also times out. + * + * Either this on the next RPC times out, take the max. + * Considering the current RPC, take just the left time. + */ + netl = at_get(&req->rq_export->exp_imp_reverse->imp_at.iat_net_latency); + req_timeout = req->rq_deadline - ktime_get_real_seconds() + netl; + at_timeout = at_est2timeout(at_get(&svcpt->scp_at_estimate)) + netl; + req_timeout = max(req_timeout, at_timeout); + + /* Take 1 re-connect failure and 1 re-connect success into account. */ + timeout = at_timeout + INITIAL_CONNECT_TIMEOUT + netl + req_timeout; + + /* Client's timeout is calculated as at_est2timeout(), let's be a bit + * more conservative than client */ + return max(timeout + (timeout >> 4), (timeout_t)ldlm_enqueue_min); +} +EXPORT_SYMBOL(ldlm_bl_timeout_by_rpc); + +/** * Perform lock cleanup if AST sending failed. */ static void ldlm_failed_ast(struct ldlm_lock *lock, int rc, diff --git a/lustre/mdt/mdt_io.c b/lustre/mdt/mdt_io.c index 8858135..3a9ec7e 100644 --- a/lustre/mdt/mdt_io.c +++ b/lustre/mdt/mdt_io.c @@ -100,7 +100,7 @@ static void mdt_prolong_dom_lock(struct tgt_session_info *tsi, ENTRY; - data->lpa_timeout = prolong_timeout(tgt_ses_req(tsi)); + data->lpa_req = tgt_ses_req(tsi); data->lpa_export = tsi->tsi_exp; data->lpa_resid = tsi->tsi_resid; diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index b230132..811a1e7 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -2482,7 +2482,7 @@ static void ofd_prolong_extent_locks(struct tgt_session_info *tsi, ENTRY; - data->lpa_timeout = prolong_timeout(tgt_ses_req(tsi)); + data->lpa_req = tgt_ses_req(tsi); data->lpa_export = tsi->tsi_exp; data->lpa_resid = tsi->tsi_resid; -- 1.8.3.1