When race happen between ldlm_server_blocking_ast() and
ldlm_request_cancel(), the at_measured() is called with wrong
value equal to current time. And even worse, ldlm_bl_timeout() can
return current_time*1.5.
Before a time functions was fixed by LU-9019(
e920be681) for 64bit,
this race leads to ETIMEDOUT at ptlrpc_import_delay_req() and
client eviction during bl ast sending. The wrong type conversion
take a place at pltrpc_send_limit_expired() at cfs_time_seconds().
We should not take cancels into accoount if the BLAST is not send,
just because the last_activity is not properly initialised - it
destroys the AT completely.
The patch devides l_last_activity to the client l_activity and
server l_blast_sent for better understanding. The l_blast_sent is
used for blocking ast only to measure time between BLAST and
cancel request.
For example:
server cancels blocked lock after 1518731697s
waiting_locks_callback()) ### lock callback timer expired after 0s:
evicting client
Signed-off-by: Alexander Boyko <c17825@cray.com>
Change-Id: I44962d2b3675b77e09182bbe062bdd78d6cb0af5
Cray-bug-id: LUS-5736
Reviewed-on: https://review.whamcloud.com/32133
Tested-by: Jenkins
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Vitaly Fertman <c17818@cray.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-by: Mike Pershin <mike.pershin@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
wait_queue_head_t l_waitq;
/**
- * Seconds. It will be updated if there is any activity related to
- * the lock, e.g. enqueue the lock or send blocking AST.
- */
- time64_t l_last_activity;
-
- /**
* Time, in nanoseconds, last used by e.g. being matched by lock match.
*/
ktime_t l_last_used;
/** Private storage for lock user. Opaque to LDLM. */
void *l_ast_data;
+
+ union {
+ /**
+ * Seconds. It will be updated if there is any activity related to
+ * the lock at client, e.g. enqueue the lock. For server it is the
+ * time when blocking ast was sent.
+ */
+ time64_t l_activity;
+ time64_t l_blast_sent;
+ };
+
/* separate ost_lvb used mostly by Data-on-MDT for now.
* It is introduced to don't mix with layout lock data. */
struct ost_lvb l_ost_lvb;
lu_ref_init(&lock->l_reference);
lu_ref_add(&lock->l_reference, "hash", lock);
lock->l_callback_timeout = 0;
+ lock->l_activity = 0;
#if LUSTRE_TRACKS_LOCK_EXP_REFS
INIT_LIST_HEAD(&lock->l_exp_refs_link);
LDLM_ERROR(lock,
"lock callback timer expired after %llds: evicting client at %s ",
ktime_get_real_seconds() -
- lock->l_last_activity,
+ lock->l_blast_sent,
obd_export_nid2str(export));
ldlm_lock_to_ns(lock)->ns_timeouts++;
do_dump++;
}
ldlm_set_waited(lock);
- lock->l_last_activity = ktime_get_real_seconds();
+ lock->l_blast_sent = ktime_get_real_seconds();
ret = __ldlm_add_waiting_lock(lock, timeout);
if (ret) {
/* grab ref on the lock if it has been added to the
if (AT_OFF)
req->rq_timeout = ldlm_get_rq_timeout();
- lock->l_last_activity = ktime_get_real_seconds();
-
if (lock->l_export && lock->l_export->exp_nid_stats &&
lock->l_export->exp_nid_stats->nid_ldlm_stats)
lprocfs_counter_incr(lock->l_export->exp_nid_stats->nid_ldlm_stats,
}
}
- lock->l_last_activity = ktime_get_real_seconds();
-
LDLM_DEBUG(lock, "server preparing completion AST");
ptlrpc_request_set_replen(req);
if (AT_OFF)
req->rq_timeout = ldlm_get_rq_timeout();
- lock->l_last_activity = ktime_get_real_seconds();
-
req->rq_interpret_reply = ldlm_cb_interpret;
if (lock->l_export && lock->l_export->exp_nid_stats &&
pres = res;
}
- if ((flags & LATF_STATS) && ldlm_is_ast_sent(lock)) {
+ if ((flags & LATF_STATS) && ldlm_is_ast_sent(lock) &&
+ lock->l_blast_sent != 0) {
time64_t delay = ktime_get_real_seconds() -
- lock->l_last_activity;
+ lock->l_blast_sent;
LDLM_DEBUG(lock, "server cancels blocked lock after %llds",
(s64)delay);
at_measured(&lock->l_export->exp_bl_lock_at, delay);
LDLM_ERROR(lock, "lock timed out (enqueued at %lld, %llds ago); "
"not entering recovery in server code, just going back to sleep",
- (s64)lock->l_last_activity,
+ (s64)lock->l_activity,
(s64)(ktime_get_real_seconds() -
- lock->l_last_activity));
+ lock->l_activity));
if (ktime_get_seconds() > next_dump) {
last_dump = next_dump;
next_dump = ktime_get_seconds() + 300;
imp = obd->u.cli.cl_import;
ptlrpc_fail_import(imp, lwd->lwd_conn_cnt);
LDLM_ERROR(lock, "lock timed out (enqueued at %lld, %llds ago), entering recovery for %s@%s",
- (s64)lock->l_last_activity,
- (s64)(ktime_get_real_seconds() - lock->l_last_activity),
+ (s64)lock->l_activity,
+ (s64)(ktime_get_real_seconds() - lock->l_activity),
obd2cli_tgt(obd), imp->imp_connection->c_remote_uuid.uuid);
RETURN(0);
LDLM_DEBUG(lock, "client-side enqueue: granted");
} else {
/* Take into AT only CP RPC, not immediately granted locks */
- delay = ktime_get_real_seconds() - lock->l_last_activity;
+ delay = ktime_get_real_seconds() - lock->l_activity;
LDLM_DEBUG(lock, "client-side enqueue: granted after %llds",
(s64)delay);
timeout = ldlm_cp_timeout(lock);
lwd.lwd_lock = lock;
- lock->l_last_activity = ktime_get_real_seconds();
+ lock->l_activity = ktime_get_real_seconds();
if (ldlm_is_no_timeout(lock)) {
LDLM_DEBUG(lock, "waiting indefinitely because of NO_TIMEOUT");
lock->l_export = NULL;
lock->l_blocking_ast = einfo->ei_cb_bl;
lock->l_flags |= (*flags & (LDLM_FL_NO_LRU | LDLM_FL_EXCL));
- lock->l_last_activity = ktime_get_real_seconds();
+ lock->l_activity = ktime_get_real_seconds();
/* lock not sent to server yet */
if (reqp == NULL || *reqp == NULL) {