From e09d273cb5f218a857d22011ab3035f7b178b023 Mon Sep 17 00:00:00 2001
From: Alexander Boyko <c17825@cray.com>
Date: Tue, 24 Apr 2018 03:06:42 -0400
Subject: [PATCH] LU-10945 ldlm: fix l_last_activity usage

When race happen between ldlm_server_blocking_ast() and
ldlm_request_cancel(), the at_measured() is called with wrong
value equal to current time. And even worse, ldlm_bl_timeout() can
return current_time*1.5.
Before a time functions was fixed by LU-9019(e920be681) for 64bit,
this race leads to ETIMEDOUT at ptlrpc_import_delay_req() and
client eviction during bl ast sending. The wrong type conversion
take a place at pltrpc_send_limit_expired() at cfs_time_seconds().

We should not take cancels into accoount if the BLAST is not send,
just because the last_activity is not properly initialised - it
destroys the AT completely.
The patch devides l_last_activity to the client l_activity and
server l_blast_sent for better understanding. The l_blast_sent is
used for blocking ast only to measure time between BLAST and
cancel request.

For example:
 server cancels blocked lock after 1518731697s
 waiting_locks_callback()) ### lock callback timer expired after 0s:
 evicting client

Signed-off-by: Alexander Boyko <c17825@cray.com>
Change-Id: I44962d2b3675b77e09182bbe062bdd78d6cb0af5
Cray-bug-id: LUS-5736
Reviewed-on: https://review.whamcloud.com/32133
Tested-by: Jenkins
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Vitaly Fertman <c17818@cray.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-by: Mike Pershin <mike.pershin@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
---
 lustre/include/lustre_dlm.h | 17 +++++++++++------
 lustre/ldlm/ldlm_lock.c     |  1 +
 lustre/ldlm/ldlm_lockd.c    | 15 +++++----------
 lustre/ldlm/ldlm_request.c  | 14 +++++++-------
 4 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h
index ae5a599..ac3ec39 100644
--- a/lustre/include/lustre_dlm.h
+++ b/lustre/include/lustre_dlm.h
@@ -825,12 +825,6 @@ struct ldlm_lock {
 	wait_queue_head_t	l_waitq;
 
 	/**
-	 * Seconds. It will be updated if there is any activity related to
-	 * the lock, e.g. enqueue the lock or send blocking AST.
-	 */
-	time64_t		l_last_activity;
-
-	/**
 	 * Time, in nanoseconds, last used by e.g. being matched by lock match.
 	 */
 	ktime_t			l_last_used;
@@ -853,6 +847,17 @@ struct ldlm_lock {
 
 	/** Private storage for lock user. Opaque to LDLM. */
 	void			*l_ast_data;
+
+	union {
+	/**
+	 * Seconds. It will be updated if there is any activity related to
+	 * the lock at client, e.g. enqueue the lock. For server it is the
+	 * time when blocking ast was sent.
+	 */
+		time64_t	l_activity;
+		time64_t	l_blast_sent;
+	};
+
 	/* separate ost_lvb used mostly by Data-on-MDT for now.
 	 * It is introduced to don't mix with layout lock data. */
 	struct ost_lvb		 l_ost_lvb;
diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c
index 41f4907..6b00671 100644
--- a/lustre/ldlm/ldlm_lock.c
+++ b/lustre/ldlm/ldlm_lock.c
@@ -486,6 +486,7 @@ static struct ldlm_lock *ldlm_lock_new(struct ldlm_resource *resource)
         lu_ref_init(&lock->l_reference);
         lu_ref_add(&lock->l_reference, "hash", lock);
         lock->l_callback_timeout = 0;
+	lock->l_activity = 0;
 
 #if LUSTRE_TRACKS_LOCK_EXP_REFS
 	INIT_LIST_HEAD(&lock->l_exp_refs_link);
diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c
index 22b1a18..45c3b67 100644
--- a/lustre/ldlm/ldlm_lockd.c
+++ b/lustre/ldlm/ldlm_lockd.c
@@ -247,7 +247,7 @@ static int expired_lock_main(void *arg)
 				LDLM_ERROR(lock,
 					   "lock callback timer expired after %llds: evicting client at %s ",
 					   ktime_get_real_seconds() -
-					   lock->l_last_activity,
+					   lock->l_blast_sent,
 					   obd_export_nid2str(export));
 				ldlm_lock_to_ns(lock)->ns_timeouts++;
 				do_dump++;
@@ -443,7 +443,7 @@ static int ldlm_add_waiting_lock(struct ldlm_lock *lock, time64_t timeout)
 	}
 
 	ldlm_set_waited(lock);
-	lock->l_last_activity = ktime_get_real_seconds();
+	lock->l_blast_sent = ktime_get_real_seconds();
 	ret = __ldlm_add_waiting_lock(lock, timeout);
 	if (ret) {
 		/* grab ref on the lock if it has been added to the
@@ -918,8 +918,6 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock,
         if (AT_OFF)
                 req->rq_timeout = ldlm_get_rq_timeout();
 
-	lock->l_last_activity = ktime_get_real_seconds();
-
         if (lock->l_export && lock->l_export->exp_nid_stats &&
             lock->l_export->exp_nid_stats->nid_ldlm_stats)
                 lprocfs_counter_incr(lock->l_export->exp_nid_stats->nid_ldlm_stats,
@@ -1008,8 +1006,6 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
 		}
         }
 
-	lock->l_last_activity = ktime_get_real_seconds();
-
 	LDLM_DEBUG(lock, "server preparing completion AST");
 
         ptlrpc_request_set_replen(req);
@@ -1118,8 +1114,6 @@ int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data)
         if (AT_OFF)
                 req->rq_timeout = ldlm_get_rq_timeout();
 
-	lock->l_last_activity = ktime_get_real_seconds();
-
 	req->rq_interpret_reply = ldlm_cb_interpret;
 
         if (lock->l_export && lock->l_export->exp_nid_stats &&
@@ -1600,9 +1594,10 @@ int ldlm_request_cancel(struct ptlrpc_request *req,
                         pres = res;
                 }
 
-		if ((flags & LATF_STATS) && ldlm_is_ast_sent(lock)) {
+		if ((flags & LATF_STATS) && ldlm_is_ast_sent(lock) &&
+		    lock->l_blast_sent != 0) {
 			time64_t delay = ktime_get_real_seconds() -
-					 lock->l_last_activity;
+					 lock->l_blast_sent;
 			LDLM_DEBUG(lock, "server cancels blocked lock after %llds",
 				   (s64)delay);
 			at_measured(&lock->l_export->exp_bl_lock_at, delay);
diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c
index 014b67a..1ab84a5 100644
--- a/lustre/ldlm/ldlm_request.c
+++ b/lustre/ldlm/ldlm_request.c
@@ -124,9 +124,9 @@ int ldlm_expired_completion_wait(void *data)
 
 		LDLM_ERROR(lock, "lock timed out (enqueued at %lld, %llds ago); "
 			   "not entering recovery in server code, just going back to sleep",
-			   (s64)lock->l_last_activity,
+			   (s64)lock->l_activity,
 			   (s64)(ktime_get_real_seconds() -
-				 lock->l_last_activity));
+				 lock->l_activity));
 		if (ktime_get_seconds() > next_dump) {
                         last_dump = next_dump;
 			next_dump = ktime_get_seconds() + 300;
@@ -142,8 +142,8 @@ int ldlm_expired_completion_wait(void *data)
         imp = obd->u.cli.cl_import;
         ptlrpc_fail_import(imp, lwd->lwd_conn_cnt);
 	LDLM_ERROR(lock, "lock timed out (enqueued at %lld, %llds ago), entering recovery for %s@%s",
-		  (s64)lock->l_last_activity,
-		  (s64)(ktime_get_real_seconds() - lock->l_last_activity),
+		  (s64)lock->l_activity,
+		  (s64)(ktime_get_real_seconds() - lock->l_activity),
                   obd2cli_tgt(obd), imp->imp_connection->c_remote_uuid.uuid);
 
         RETURN(0);
@@ -191,7 +191,7 @@ static int ldlm_completion_tail(struct ldlm_lock *lock, void *data)
 		LDLM_DEBUG(lock, "client-side enqueue: granted");
 	} else {
 		/* Take into AT only CP RPC, not immediately granted locks */
-		delay = ktime_get_real_seconds() - lock->l_last_activity;
+		delay = ktime_get_real_seconds() - lock->l_activity;
 		LDLM_DEBUG(lock, "client-side enqueue: granted after %llds",
 			   (s64)delay);
 
@@ -282,7 +282,7 @@ noreproc:
 	timeout = ldlm_cp_timeout(lock);
 
 	lwd.lwd_lock = lock;
-	lock->l_last_activity = ktime_get_real_seconds();
+	lock->l_activity = ktime_get_real_seconds();
 
 	if (ldlm_is_no_timeout(lock)) {
                 LDLM_DEBUG(lock, "waiting indefinitely because of NO_TIMEOUT");
@@ -946,7 +946,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
 	lock->l_export = NULL;
 	lock->l_blocking_ast = einfo->ei_cb_bl;
 	lock->l_flags |= (*flags & (LDLM_FL_NO_LRU | LDLM_FL_EXCL));
-	lock->l_last_activity = ktime_get_real_seconds();
+	lock->l_activity = ktime_get_real_seconds();
 
 	/* lock not sent to server yet */
 	if (reqp == NULL || *reqp == NULL) {
-- 
1.8.3.1