exp_bl_lock_at accounted the period since BLAST send until cancel RPC
came to server originally. LU-6032 started to update l_blast_sent for
expired locks which are still busy - prolonged locks when the timeout
expired. In fact, this is a good idea to cover not the whole period
but until any involved RPC comes - it avoids excessively large lock
callback timeouts - and the IO which does the lock prolong is also
able to re-start the AT cycle by updating the l_blast_sent.
Unfortunately, the change seems to be made occasionally as the main
prolong code was not adjusted accordingly.
Fixes:
292aa42e08 ("LU-6032 ldlm: don't disable softirq for exp_rpc_lock")
HPE-bug-id: LUS-9278
Signed-off-by: Vitaly Fertman <c17818@cray.com>
Change-Id: Idc598508fc13aa33ac9fce56f13310ca6fc819d4
Tested-by: Jenkins Build User <nssreleng@cray.com>
Reviewed-by: Andriy Skulysh <c17819@cray.com>
Reviewed-by: Alexander Boyko <c17825@cray.com>
Reviewed-on: https://review.whamcloud.com/40868
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alexander Boyko <alexander.boyko@hpe.com>
Reviewed-by: Andriy Skulysh <askulysh@gmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
*/
timeout = arg->lpa_timeout + (ldlm_bl_timeout(lock) >> 1);
*/
timeout = arg->lpa_timeout + (ldlm_bl_timeout(lock) >> 1);
- LDLM_DEBUG(lock, "refreshed to %ds.\n", timeout);
-
arg->lpa_blocks_cnt++;
/* OK. this is a possible lock the user holds doing I/O
arg->lpa_blocks_cnt++;
/* OK. this is a possible lock the user holds doing I/O
LDLM_ERROR(lock,
"lock callback timer expired after %llds: evicting client at %s ",
LDLM_ERROR(lock,
"lock callback timer expired after %llds: evicting client at %s ",
- ktime_get_real_seconds() -
lock->l_blast_sent,
obd_export_nid2str(export));
ldlm_lock_to_ns(lock)->ns_timeouts++;
lock->l_blast_sent,
obd_export_nid2str(export));
ldlm_lock_to_ns(lock)->ns_timeouts++;
static int __ldlm_add_waiting_lock(struct ldlm_lock *lock, timeout_t delay)
{
unsigned long timeout_jiffies = jiffies;
static int __ldlm_add_waiting_lock(struct ldlm_lock *lock, timeout_t delay)
{
unsigned long timeout_jiffies = jiffies;
- time64_t now = ktime_get_seconds();
time64_t deadline;
timeout_t timeout;
time64_t deadline;
timeout_t timeout;
+ lock->l_blast_sent = ktime_get_seconds();
if (!list_empty(&lock->l_pending_chain))
return 0;
if (!list_empty(&lock->l_pending_chain))
return 0;
OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_TIMEOUT))
delay = 1;
OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_TIMEOUT))
delay = 1;
- deadline = now + delay;
+ deadline = lock->l_blast_sent + delay;
if (likely(deadline > lock->l_callback_timestamp))
lock->l_callback_timestamp = deadline;
if (likely(deadline > lock->l_callback_timestamp))
lock->l_callback_timestamp = deadline;
- timeout = clamp_t(timeout_t, lock->l_callback_timestamp - now,
+ timeout = clamp_t(timeout_t,
+ lock->l_callback_timestamp - lock->l_blast_sent,
0, delay);
timeout_jiffies += cfs_time_seconds(timeout);
0, delay);
timeout_jiffies += cfs_time_seconds(timeout);
- lock->l_blast_sent = ktime_get_real_seconds();
ret = __ldlm_add_waiting_lock(lock, timeout);
if (ret) {
/*
ret = __ldlm_add_waiting_lock(lock, timeout);
if (ret) {
/*
__ldlm_add_waiting_lock(lock, timeout);
spin_unlock_bh(&waiting_locks_spinlock);
__ldlm_add_waiting_lock(lock, timeout);
spin_unlock_bh(&waiting_locks_spinlock);
- LDLM_DEBUG(lock, "refreshed");
+ LDLM_DEBUG(lock, "refreshed to %ds", timeout);
return 1;
}
EXPORT_SYMBOL(ldlm_refresh_waiting_lock);
return 1;
}
EXPORT_SYMBOL(ldlm_refresh_waiting_lock);
lock->l_blast_sent != 0) {
timeout_t delay = 0;
lock->l_blast_sent != 0) {
timeout_t delay = 0;
- if (ktime_get_real_seconds() > lock->l_blast_sent)
- delay = ktime_get_real_seconds() -
+ if (ktime_get_seconds() > lock->l_blast_sent)
+ delay = ktime_get_seconds() -
lock->l_blast_sent;
LDLM_DEBUG(lock,
"server cancels blocked lock after %ds",
lock->l_blast_sent;
LDLM_DEBUG(lock,
"server cancels blocked lock after %ds",