From: Vitaly Fertman Date: Tue, 4 Oct 2022 17:30:08 +0000 (-0700) Subject: LU-16062 ldlm: improve bl_timeout for prolong X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=532fa8a41bdb611e6338750888dcddfff901fc4e;p=fs%2Flustre-release.git LU-16062 ldlm: improve bl_timeout for prolong If there is a client's RPC in hand, we can do a better job for calculating the lock callback timeout as RPC has the info what client thinks about this RPC timeout. Let's use it. Lustre-change: https://review.whamcloud.com/48094 Lustre-commit: 34b2246e4a6c8ce827c404cb4e52f7c6a0a1b90b HPE-bug-id: LUS-8866, LUS-11074 Signed-off-by: Vitaly Fertman Change-Id: Ibd67d37c1073d0d3cb2e08b532c801af0de116fe Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/48762 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index 795c5ff..4bd273e 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -1397,10 +1397,10 @@ __u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms); struct ldlm_prolong_args { struct obd_export *lpa_export; + struct ptlrpc_request *lpa_req; struct ldlm_res_id lpa_resid; struct ldlm_extent lpa_extent; enum ldlm_mode lpa_mode; - timeout_t lpa_timeout; int lpa_locks_cnt; int lpa_blocks_cnt; }; @@ -1447,6 +1447,7 @@ int ldlm_request_cancel(struct ptlrpc_request *req, void ldlm_revoke_export_locks(struct obd_export *exp); timeout_t ldlm_bl_timeout(struct ldlm_lock *lock); +timeout_t ldlm_bl_timeout_by_rpc(struct ptlrpc_request *req); #endif int ldlm_del_waiting_lock(struct ldlm_lock *lock); int ldlm_refresh_waiting_lock(struct ldlm_lock *lock, timeout_t timeout); diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 1d94c3e..e71b936 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -2545,30 +2545,6 @@ ptlrpc_server_get_timeout(struct ptlrpc_service_part *svcpt) max_t(int, at, obd_timeout); } -/** - * Calculate the amount of time for lock prolongation. - * - * This is helper function to get the timeout extra time. - * - * @req current request - * - * Return: amount of time to extend the timeout with - */ -static inline timeout_t prolong_timeout(struct ptlrpc_request *req) -{ - struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt; - timeout_t req_timeout = 0; - - if (AT_OFF) - return obd_timeout / 2; - - if (req->rq_deadline > req->rq_arrival_time.tv_sec) - req_timeout = req->rq_deadline - req->rq_arrival_time.tv_sec; - - return max(req_timeout, - at_est2timeout(at_get(&svcpt->scp_at_estimate))); -} - static inline struct ptlrpc_service * ptlrpc_req2svc(struct ptlrpc_request *req) { diff --git a/lustre/ldlm/ldlm_extent.c b/lustre/ldlm/ldlm_extent.c index b350708..993193a 100644 --- a/lustre/ldlm/ldlm_extent.c +++ b/lustre/ldlm/ldlm_extent.c @@ -652,16 +652,13 @@ void ldlm_lock_prolong_one(struct ldlm_lock *lock, /* ignore locks not being cancelled */ return; - /* We are in the middle of the process - BL AST is sent, CANCEL - * is ahead. Take half of BL AT + IO AT process time. - */ - timeout = arg->lpa_timeout + (ldlm_bl_timeout(lock) >> 1); - arg->lpa_blocks_cnt++; /* OK. this is a possible lock the user holds doing I/O * let's refresh eviction timer for it. */ + timeout = ldlm_bl_timeout_by_rpc(arg->lpa_req); + LDLM_DEBUG(lock, "refreshed to %ds.\n", timeout); ldlm_refresh_waiting_lock(lock, timeout); } EXPORT_SYMBOL(ldlm_lock_prolong_one); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 52c69e0..7ecb280 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -648,6 +648,54 @@ timeout_t ldlm_bl_timeout(struct ldlm_lock *lock) EXPORT_SYMBOL(ldlm_bl_timeout); /** + * Calculate the per-export Blocking timeout by the given RPC (covering the + * reply to this RPC and the next RPC). The next RPC could be still not CANCEL, + * but having the lock refresh mechanism it is enough. + * + * Used for lock refresh timeout when we are in the middle of the process - + * BL AST is sent, CANCEL is ahead - it is still 1 reply for the current RPC + * and at least 1 RPC (which will trigger another refresh if it will be not + * CANCEL) - but more accurate than ldlm_bl_timeout as the timeout is taken + * from the RPC (i.e. the view of the client on the current AT) is taken into + * account. + * + * \param[in] req req which export needs the timeout calculation + * + * \retval timeout in seconds to wait for the next client's RPC + */ +timeout_t ldlm_bl_timeout_by_rpc(struct ptlrpc_request *req) +{ + struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt; + timeout_t timeout, req_timeout, at_timeout, netl; + + if (AT_OFF) + return obd_timeout / 2; + + /* A blocked lock means somebody in the cluster is waiting, and we + * should not consider the worst ever case, consisting of a chain of + * failures on each step, however this timeout should survive a + * recovery of at least 1 failure, let this one to be the worst one: + * in case a server NID is dead first re-connect is done through the + * same router and also times out. + * + * Either this on the next RPC times out, take the max. + * Considering the current RPC, take just the left time. + */ + netl = at_get(&req->rq_export->exp_imp_reverse->imp_at.iat_net_latency); + req_timeout = req->rq_deadline - ktime_get_real_seconds() + netl; + at_timeout = at_est2timeout(at_get(&svcpt->scp_at_estimate)) + netl; + req_timeout = max(req_timeout, at_timeout); + + /* Take 1 re-connect failure and 1 re-connect success into account. */ + timeout = at_timeout + INITIAL_CONNECT_TIMEOUT + netl + req_timeout; + + /* Client's timeout is calculated as at_est2timeout(), let's be a bit + * more conservative than client */ + return max(timeout + (timeout >> 4), (timeout_t)ldlm_enqueue_min); +} +EXPORT_SYMBOL(ldlm_bl_timeout_by_rpc); + +/** * Perform lock cleanup if AST sending failed. */ static void ldlm_failed_ast(struct ldlm_lock *lock, int rc, diff --git a/lustre/mdt/mdt_io.c b/lustre/mdt/mdt_io.c index 799d117..d09b555 100644 --- a/lustre/mdt/mdt_io.c +++ b/lustre/mdt/mdt_io.c @@ -98,7 +98,7 @@ static void mdt_prolong_dom_lock(struct tgt_session_info *tsi, ENTRY; - data->lpa_timeout = prolong_timeout(tgt_ses_req(tsi)); + data->lpa_req = tgt_ses_req(tsi); data->lpa_export = tsi->tsi_exp; data->lpa_resid = tsi->tsi_resid; diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index 38292a7f..ae59348 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -2461,7 +2461,7 @@ static void ofd_prolong_extent_locks(struct tgt_session_info *tsi, ENTRY; - data->lpa_timeout = prolong_timeout(tgt_ses_req(tsi)); + data->lpa_req = tgt_ses_req(tsi); data->lpa_export = tsi->tsi_exp; data->lpa_resid = tsi->tsi_resid;