From e09d273cb5f218a857d22011ab3035f7b178b023 Mon Sep 17 00:00:00 2001 From: Alexander Boyko Date: Tue, 24 Apr 2018 03:06:42 -0400 Subject: [PATCH] LU-10945 ldlm: fix l_last_activity usage When race happen between ldlm_server_blocking_ast() and ldlm_request_cancel(), the at_measured() is called with wrong value equal to current time. And even worse, ldlm_bl_timeout() can return current_time*1.5. Before a time functions was fixed by LU-9019(e920be681) for 64bit, this race leads to ETIMEDOUT at ptlrpc_import_delay_req() and client eviction during bl ast sending. The wrong type conversion take a place at pltrpc_send_limit_expired() at cfs_time_seconds(). We should not take cancels into accoount if the BLAST is not send, just because the last_activity is not properly initialised - it destroys the AT completely. The patch devides l_last_activity to the client l_activity and server l_blast_sent for better understanding. The l_blast_sent is used for blocking ast only to measure time between BLAST and cancel request. For example: server cancels blocked lock after 1518731697s waiting_locks_callback()) ### lock callback timer expired after 0s: evicting client Signed-off-by: Alexander Boyko Change-Id: I44962d2b3675b77e09182bbe062bdd78d6cb0af5 Cray-bug-id: LUS-5736 Reviewed-on: https://review.whamcloud.com/32133 Tested-by: Jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Vitaly Fertman Reviewed-by: James Simmons Reviewed-by: Mike Pershin Reviewed-by: Oleg Drokin --- lustre/include/lustre_dlm.h | 17 +++++++++++------ lustre/ldlm/ldlm_lock.c | 1 + lustre/ldlm/ldlm_lockd.c | 15 +++++---------- lustre/ldlm/ldlm_request.c | 14 +++++++------- 4 files changed, 24 insertions(+), 23 deletions(-) diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index ae5a599..ac3ec39 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -825,12 +825,6 @@ struct ldlm_lock { wait_queue_head_t l_waitq; /** - * Seconds. It will be updated if there is any activity related to - * the lock, e.g. enqueue the lock or send blocking AST. - */ - time64_t l_last_activity; - - /** * Time, in nanoseconds, last used by e.g. being matched by lock match. */ ktime_t l_last_used; @@ -853,6 +847,17 @@ struct ldlm_lock { /** Private storage for lock user. Opaque to LDLM. */ void *l_ast_data; + + union { + /** + * Seconds. It will be updated if there is any activity related to + * the lock at client, e.g. enqueue the lock. For server it is the + * time when blocking ast was sent. + */ + time64_t l_activity; + time64_t l_blast_sent; + }; + /* separate ost_lvb used mostly by Data-on-MDT for now. * It is introduced to don't mix with layout lock data. */ struct ost_lvb l_ost_lvb; diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 41f4907..6b00671 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -486,6 +486,7 @@ static struct ldlm_lock *ldlm_lock_new(struct ldlm_resource *resource) lu_ref_init(&lock->l_reference); lu_ref_add(&lock->l_reference, "hash", lock); lock->l_callback_timeout = 0; + lock->l_activity = 0; #if LUSTRE_TRACKS_LOCK_EXP_REFS INIT_LIST_HEAD(&lock->l_exp_refs_link); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 22b1a18..45c3b67 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -247,7 +247,7 @@ static int expired_lock_main(void *arg) LDLM_ERROR(lock, "lock callback timer expired after %llds: evicting client at %s ", ktime_get_real_seconds() - - lock->l_last_activity, + lock->l_blast_sent, obd_export_nid2str(export)); ldlm_lock_to_ns(lock)->ns_timeouts++; do_dump++; @@ -443,7 +443,7 @@ static int ldlm_add_waiting_lock(struct ldlm_lock *lock, time64_t timeout) } ldlm_set_waited(lock); - lock->l_last_activity = ktime_get_real_seconds(); + lock->l_blast_sent = ktime_get_real_seconds(); ret = __ldlm_add_waiting_lock(lock, timeout); if (ret) { /* grab ref on the lock if it has been added to the @@ -918,8 +918,6 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock, if (AT_OFF) req->rq_timeout = ldlm_get_rq_timeout(); - lock->l_last_activity = ktime_get_real_seconds(); - if (lock->l_export && lock->l_export->exp_nid_stats && lock->l_export->exp_nid_stats->nid_ldlm_stats) lprocfs_counter_incr(lock->l_export->exp_nid_stats->nid_ldlm_stats, @@ -1008,8 +1006,6 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data) } } - lock->l_last_activity = ktime_get_real_seconds(); - LDLM_DEBUG(lock, "server preparing completion AST"); ptlrpc_request_set_replen(req); @@ -1118,8 +1114,6 @@ int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data) if (AT_OFF) req->rq_timeout = ldlm_get_rq_timeout(); - lock->l_last_activity = ktime_get_real_seconds(); - req->rq_interpret_reply = ldlm_cb_interpret; if (lock->l_export && lock->l_export->exp_nid_stats && @@ -1600,9 +1594,10 @@ int ldlm_request_cancel(struct ptlrpc_request *req, pres = res; } - if ((flags & LATF_STATS) && ldlm_is_ast_sent(lock)) { + if ((flags & LATF_STATS) && ldlm_is_ast_sent(lock) && + lock->l_blast_sent != 0) { time64_t delay = ktime_get_real_seconds() - - lock->l_last_activity; + lock->l_blast_sent; LDLM_DEBUG(lock, "server cancels blocked lock after %llds", (s64)delay); at_measured(&lock->l_export->exp_bl_lock_at, delay); diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index 014b67a..1ab84a5 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -124,9 +124,9 @@ int ldlm_expired_completion_wait(void *data) LDLM_ERROR(lock, "lock timed out (enqueued at %lld, %llds ago); " "not entering recovery in server code, just going back to sleep", - (s64)lock->l_last_activity, + (s64)lock->l_activity, (s64)(ktime_get_real_seconds() - - lock->l_last_activity)); + lock->l_activity)); if (ktime_get_seconds() > next_dump) { last_dump = next_dump; next_dump = ktime_get_seconds() + 300; @@ -142,8 +142,8 @@ int ldlm_expired_completion_wait(void *data) imp = obd->u.cli.cl_import; ptlrpc_fail_import(imp, lwd->lwd_conn_cnt); LDLM_ERROR(lock, "lock timed out (enqueued at %lld, %llds ago), entering recovery for %s@%s", - (s64)lock->l_last_activity, - (s64)(ktime_get_real_seconds() - lock->l_last_activity), + (s64)lock->l_activity, + (s64)(ktime_get_real_seconds() - lock->l_activity), obd2cli_tgt(obd), imp->imp_connection->c_remote_uuid.uuid); RETURN(0); @@ -191,7 +191,7 @@ static int ldlm_completion_tail(struct ldlm_lock *lock, void *data) LDLM_DEBUG(lock, "client-side enqueue: granted"); } else { /* Take into AT only CP RPC, not immediately granted locks */ - delay = ktime_get_real_seconds() - lock->l_last_activity; + delay = ktime_get_real_seconds() - lock->l_activity; LDLM_DEBUG(lock, "client-side enqueue: granted after %llds", (s64)delay); @@ -282,7 +282,7 @@ noreproc: timeout = ldlm_cp_timeout(lock); lwd.lwd_lock = lock; - lock->l_last_activity = ktime_get_real_seconds(); + lock->l_activity = ktime_get_real_seconds(); if (ldlm_is_no_timeout(lock)) { LDLM_DEBUG(lock, "waiting indefinitely because of NO_TIMEOUT"); @@ -946,7 +946,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp, lock->l_export = NULL; lock->l_blocking_ast = einfo->ei_cb_bl; lock->l_flags |= (*flags & (LDLM_FL_NO_LRU | LDLM_FL_EXCL)); - lock->l_last_activity = ktime_get_real_seconds(); + lock->l_activity = ktime_get_real_seconds(); /* lock not sent to server yet */ if (reqp == NULL || *reqp == NULL) { -- 1.8.3.1