From 71f156e09259ef1e6e78b83cf68442b49c9ab25e Mon Sep 17 00:00:00 2001
From: James Nunez <jnunez@whamcloud.com>
Date: Wed, 10 Apr 2019 14:55:20 +0000
Subject: [PATCH] Revert "LU-11771 ldlm: use hrtimer for recovery to fix
 timeout messages"

This reverts commit 1ba794f6ec9e7ce7ad65fd74f170089fffc31d91.

We've seen several new or an increase in test failures for:
LU-12175 for sanity test 208 failures
LU-11560 for recovery-small test 134 failures
LU-12176 is for recovery-small test 107 failures
LU-12177 is for sanity test 160a failures

We are trying to pinpoint what patch landed that is causing these failures.

Test-Parameters: trivial testgroup=review-dne-part-1
Test-Parameters: testgroup=review-dne-zfs-part-1
Test-Parameters: testgroup=review-dne-part-1
Test-Parameters: testgroup=review-dne-zfs-part-1
Test-Parameters: testgroup=review-dne-part-1
Test-Parameters: testgroup=review-dne-zfs-part-1
Test-Parameters: testgroup=review-dne-part-1
Test-Parameters: testgroup=review-dne-zfs-part-1

Change-Id: I2ed33fb14726e29cae8745d671d4e25e276a7a66
Signed-off-by: James Nunez <jnunez@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/34629
Tested-by: Jenkins
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
---
 lustre/include/obd.h   |   2 +-
 lustre/ldlm/ldlm_lib.c | 225 +++++++++++++++++++++++++------------------------
 2 files changed, 114 insertions(+), 113 deletions(-)

diff --git a/lustre/include/obd.h b/lustre/include/obd.h
index 0d04870..242bfb8 100644
--- a/lustre/include/obd.h
+++ b/lustre/include/obd.h
@@ -687,7 +687,7 @@ struct obd_device {
 	int			obd_requests_queued_for_recovery;
 	wait_queue_head_t	obd_next_transno_waitq;
 	/* protected by obd_recovery_task_lock */
-	struct hrtimer		obd_recovery_timer;
+	struct timer_list	obd_recovery_timer;
 	/* seconds */
 	time64_t		obd_recovery_start;
 	/* seconds, for lprocfs_status */
diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c
index 00e1542..5a587c7 100644
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -766,8 +766,9 @@ static int target_handle_reconnect(struct lustre_handle *conn,
 {
 	struct obd_device *target;
 	struct lustre_handle *hdl;
-	ktime_t remaining;
-	s64 timeout;
+	time64_t deadline;
+	time64_t timeout;
+	time64_t now;
 	int rc = 0;
 
 	ENTRY;
@@ -804,21 +805,16 @@ static int target_handle_reconnect(struct lustre_handle *conn,
 		GOTO(out_already, rc);
 	}
 
-	remaining = hrtimer_expires_remaining(&target->obd_recovery_timer);
-	timeout = ktime_divns(remaining, NSEC_PER_SEC);
-	if (timeout > 0) {
-		LCONSOLE_WARN("%s: Client %s (at %s) reconnected, waiting for %d clients in recovery for %lld:%.02lld\n",
-			      target->obd_name,
-			      obd_uuid2str(&exp->exp_client_uuid),
-			      obd_export_nid2str(exp),
-			      target->obd_max_recoverable_clients,
-			      timeout / 60, timeout % 60);
-	} else {
+	now = ktime_get_seconds();
+	deadline = jiffies_to_msecs(target->obd_recovery_timer.expires) /
+		   MSEC_PER_SEC;
+	if (now < deadline) {
 		struct target_distribute_txn_data *tdtd;
 		int size = 0;
 		int count = 0;
 		char *buf = NULL;
 
+		timeout = deadline - now;
 		tdtd = class_exp2tgt(exp)->lut_tdtd;
 		if (tdtd && tdtd->tdtd_show_update_logs_retrievers)
 			buf = tdtd->tdtd_show_update_logs_retrievers(
@@ -826,21 +822,30 @@ static int target_handle_reconnect(struct lustre_handle *conn,
 				&size, &count);
 
 		if (count > 0)
-			LCONSOLE_WARN("%s: Client %s (at %s) reconnecting, waiting for %d MDTs (%s) in recovery for %lld:%.02lld. Please wait until all MDTs recovered or you may force MDT evicition via 'lctl --device %s abort_recovery.\n",
-				      target->obd_name,
-				      obd_uuid2str(&exp->exp_client_uuid),
-				      obd_export_nid2str(exp), count,
-				      buf ? buf : "unknown (not enough RAM)",
-				      (abs(timeout) + target->obd_recovery_timeout) / 60,
-				      (abs(timeout) + target->obd_recovery_timeout) % 60,
-				      target->obd_name);
+			LCONSOLE_WARN("%s: Recovery already passed deadline "
+				      "%lld:%.02lld. It is due to DNE recovery "
+				      "failed/stuck on the %d MDT(s):%s. "
+				      "Please wait until all MDTs recovered "
+				      "or abort the recovery by force.\n",
+				      target->obd_name, timeout / 60,
+				      timeout % 60, count,
+				      buf ? buf : "unknown (not enough RAM)");
 		else
-			LCONSOLE_WARN("%s: Recovery already passed deadline %lld:%.02lld. If you do not want to wait more, you may force taget eviction via 'lctl --device %s abort_recovery.\n",
-				      target->obd_name, abs(timeout) / 60,
-				      abs(timeout) % 60, target->obd_name);
+			LCONSOLE_WARN("%s: Recovery already passed deadline "
+				      "%lld:%.02lld. If you do not want to wait "
+				      "more, please abort the recovery by "
+				      "force.\n", target->obd_name,
+				      timeout / 60, timeout % 60);
 
 		if (buf != NULL)
 			OBD_FREE(buf, size);
+	} else {
+		timeout = now - deadline;
+		LCONSOLE_WARN("%s: Recovery already passed deadline"
+			" %lld:%.02lld, It is most likely due to DNE"
+			" recovery is failed or stuck, please wait a"
+			" few more minutes or abort the recovery.\n",
+			target->obd_name, timeout / 60, timeout % 60);
 	}
 
 out_already:
@@ -1261,34 +1266,36 @@ no_export:
 		/* allow "new" MDT to be connected during recovery, since we
 		 * need retrieve recovery update records from it */
 		if (target->obd_recovering && !lw_client && !mds_mds_conn) {
-			struct hrtimer *timer = &target->obd_recovery_timer;
-			ktime_t remaining;
-			s64 timeout, left;
-			int in_progress;
-			int connected;
-			int known;
-			int stale;
+			time64_t now;
+			time64_t t;
 			char *msg;
-
-			connected = atomic_read(&target->obd_connected_clients);
-			in_progress = atomic_read(&target->obd_lock_replay_clients);
-			known = target->obd_max_recoverable_clients;
-			stale = target->obd_stale_clients;
-			remaining = hrtimer_expires_remaining(timer);
-			left = ktime_divns(remaining, NSEC_PER_SEC);
-			if (ktime_to_ns(remaining) > 0) {
-				msg = "to recover in";
-				timeout = left;
-			} else {
+			int c; /* connected */
+			int i; /* in progress */
+			int k; /* known */
+			int s; /* stale/evicted */
+
+			c = atomic_read(&target->obd_connected_clients);
+			i = atomic_read(&target->obd_lock_replay_clients);
+			k = target->obd_max_recoverable_clients;
+			s = target->obd_stale_clients;
+			t = jiffies_to_msecs(target->obd_recovery_timer.expires);
+			t /= MSEC_PER_SEC;
+			now = ktime_get_seconds();
+			if (now > t) {
+				t = now - t;
 				msg = "already passed deadline";
-				timeout = -left;
+			} else {
+				t -= now;
+				msg = "to recover in";
 			}
 
-			LCONSOLE_WARN("%s: Denying connection for new client %s (at %s), waiting for %d known clients (%d recovered, %d in progress, and %d evicted) %s %lld:%.02lld\n",
+			LCONSOLE_WARN("%s: Denying connection for new client %s"
+				      "(at %s), waiting for %d known clients "
+				      "(%d recovered, %d in progress, and %d "
+				      "evicted) %s %lld:%.02lld\n",
 				      target->obd_name, cluuid.uuid,
-				      libcfs_nid2str(req->rq_peer.nid), known,
-				      connected - in_progress, in_progress,
-				      stale, msg, timeout / 60, timeout % 60);
+				      libcfs_nid2str(req->rq_peer.nid), k,
+				      c - i, i, s, msg, t / 60, t % 60);
 			rc = -EBUSY;
 		} else {
 dont_check_exports:
@@ -1705,14 +1712,12 @@ EXPORT_SYMBOL(target_cleanup_recovery);
 /* obd_recovery_task_lock should be held */
 void target_cancel_recovery_timer(struct obd_device *obd)
 {
-	CDEBUG(D_HA, "%s: cancel recovery timer\n", obd->obd_name);
-	hrtimer_cancel(&obd->obd_recovery_timer);
+        CDEBUG(D_HA, "%s: cancel recovery timer\n", obd->obd_name);
+	del_timer(&obd->obd_recovery_timer);
 }
 
 static void target_start_recovery_timer(struct obd_device *obd)
 {
-	ktime_t delay;
-
 	if (obd->obd_recovery_start != 0)
 		return;
 
@@ -1729,8 +1734,8 @@ static void target_start_recovery_timer(struct obd_device *obd)
 		return;
 	}
 
-	delay = ktime_set(obd->obd_recovery_timeout, 0);
-	hrtimer_start(&obd->obd_recovery_timer, delay, HRTIMER_MODE_REL);
+	mod_timer(&obd->obd_recovery_timer,
+		  jiffies + cfs_time_seconds(obd->obd_recovery_timeout));
 	obd->obd_recovery_start = ktime_get_real_seconds();
 	spin_unlock(&obd->obd_dev_lock);
 
@@ -1749,12 +1754,13 @@ static void target_start_recovery_timer(struct obd_device *obd)
  * if @extend is true, extend recovery window to have @drt remaining at least;
  * otherwise, make sure the recovery timeout value is not less than @drt.
  */
-static void extend_recovery_timer(struct obd_device *obd, time_t drt,
+static void extend_recovery_timer(struct obd_device *obd, time64_t drt,
 				  bool extend)
 {
-	ktime_t left_ns;
-	time_t left;
-	time_t to;
+	time64_t now;
+	time64_t end;
+	time64_t left;
+	time64_t to;
 
 	spin_lock(&obd->obd_dev_lock);
 	if (!obd->obd_recovering || obd->obd_abort_recovery) {
@@ -1763,34 +1769,33 @@ static void extend_recovery_timer(struct obd_device *obd, time_t drt,
 	}
 	LASSERT(obd->obd_recovery_start != 0);
 
+	now = ktime_get_real_seconds();
 	to = obd->obd_recovery_timeout;
-	left_ns = hrtimer_expires_remaining(&obd->obd_recovery_timer);
-	left = ktime_divns(left_ns, NSEC_PER_SEC);
-	if (extend && (drt > left))
-		to += drt - left;
-	else if (!extend && (drt > to))
-		to = drt;
+	end = obd->obd_recovery_start + to;
+	left = end - now;
+
+        if (extend && (drt > left)) {
+                to += drt - left;
+        } else if (!extend && (drt > to)) {
+                to = drt;
+        }
 
 	if (to > obd->obd_recovery_time_hard) {
 		to = obd->obd_recovery_time_hard;
-		CWARN("%s: extended recovery timer reaching hard limit: %ld, extend: %d\n",
+		CWARN("%s: extended recovery timer reaching hard limit: %lld, extend: %d\n",
 		      obd->obd_name, to, extend);
 	}
 
 	if (obd->obd_recovery_timeout < to) {
-		ktime_t now = ktime_get_real();
-		ktime_t end;
-
-		obd->obd_recovery_timeout = to;
-		end = ktime_set(obd->obd_recovery_start + to, 0);
-		left_ns = ktime_sub(end, now);
-		hrtimer_forward_now(&obd->obd_recovery_timer, left_ns);
-		left = ktime_divns(left_ns, NSEC_PER_MSEC);
-	}
+                obd->obd_recovery_timeout = to;
+		end = obd->obd_recovery_start + to;
+		mod_timer(&obd->obd_recovery_timer,
+			  jiffies + cfs_time_seconds(end - now));
+        }
 	spin_unlock(&obd->obd_dev_lock);
 
-	CDEBUG(D_HA, "%s: recovery timer will expire in %ld seconds\n",
-		obd->obd_name, left);
+	CDEBUG(D_HA, "%s: recovery timer will expire in %lld seconds\n",
+		obd->obd_name, (s64)(end - now));
 }
 
 /* Reset the timer with each new client connection */
@@ -1803,22 +1808,22 @@ static void extend_recovery_timer(struct obd_device *obd, time_t drt,
  * be extended to make sure the client could be reconnected, in the
  * process, the timeout from the new client should be ignored.
  */
+
 static void
 check_and_start_recovery_timer(struct obd_device *obd,
-			       struct ptlrpc_request *req,
-			       int new_client)
+                               struct ptlrpc_request *req,
+                               int new_client)
 {
-	time_t service_time = lustre_msg_get_service_time(req->rq_reqmsg);
-	struct obd_device_target *obt = &obd->u.obt;
+	time64_t service_time = lustre_msg_get_service_time(req->rq_reqmsg);
+        struct obd_device_target *obt = &obd->u.obt;
 
-	if (!new_client && service_time)
-		/* Teach server about old server's estimates, as first guess
-		 * at how long new requests will take.
-		 */
+        if (!new_client && service_time)
+                /* Teach server about old server's estimates, as first guess
+                 * at how long new requests will take. */
 		at_measured(&req->rq_rqbd->rqbd_svcpt->scp_at_estimate,
-			    service_time);
+                            service_time);
 
-	target_start_recovery_timer(obd);
+        target_start_recovery_timer(obd);
 
 	/* Convert the service time to RPC timeout,
 	 * and reuse service_time to limit stack usage.
@@ -2174,15 +2179,15 @@ static void handle_recovery_req(struct ptlrpc_thread *thread,
         (void)handler(req);
         lu_context_exit(&thread->t_env->le_ctx);
 
-	/* don't reset timer for final stage */
-	if (!exp_finished(req->rq_export)) {
-		time_t to = obd_timeout;
+        /* don't reset timer for final stage */
+        if (!exp_finished(req->rq_export)) {
+		time64_t to = obd_timeout;
 
-		/**
-		 * Add request timeout to the recovery time so next request from
-		 * this client may come in recovery time
-		 */
-		if (!AT_OFF) {
+                /**
+                 * Add request timeout to the recovery time so next request from
+                 * this client may come in recovery time
+                 */
+                if (!AT_OFF) {
 			struct ptlrpc_service_part *svcpt;
 
 			svcpt = req->rq_rqbd->rqbd_svcpt;
@@ -2190,10 +2195,10 @@ static void handle_recovery_req(struct ptlrpc_thread *thread,
 			 * the client will recalculate the timeout according to
 			 * current server estimate service time, so we will
 			 * use the maxium timeout here for waiting the client
-			 * sending the next req
-			 */
-			to = max_t(time_t, at_est2timeout(at_get(&svcpt->scp_at_estimate)),
-				   lustre_msg_get_timeout(req->rq_reqmsg));
+			 * sending the next req */
+			to = max((int)at_est2timeout(
+				 at_get(&svcpt->scp_at_estimate)),
+				 (int)lustre_msg_get_timeout(req->rq_reqmsg));
 			/* Add 2 net_latency, one for balance rq_deadline
 			 * (see ptl_send_rpc), one for resend the req to server,
 			 * Note: client will pack net_latency in replay req
@@ -2629,20 +2634,17 @@ void target_recovery_fini(struct obd_device *obd)
 }
 EXPORT_SYMBOL(target_recovery_fini);
 
-static enum hrtimer_restart target_recovery_expired(struct hrtimer *timer)
+static void target_recovery_expired(cfs_timer_cb_arg_t data)
 {
-	struct obd_device *obd = container_of(timer, struct obd_device,
-					      obd_recovery_timer);
-
-	CDEBUG(D_HA,
-	       "%s: recovery timed out; %d clients are still in recovery after %llu seconds (%d clients connected)\n",
+	struct obd_device *obd = cfs_from_timer(obd, data, obd_recovery_timer);
+	CDEBUG(D_HA, "%s: recovery timed out; %d clients are still in recovery"
+	       " after %llus (%d clients connected)\n",
 	       obd->obd_name, atomic_read(&obd->obd_lock_replay_clients),
-	       ktime_get_real_seconds() - obd->obd_recovery_start,
+	       (s64)(ktime_get_real_seconds() - obd->obd_recovery_start),
 	       atomic_read(&obd->obd_connected_clients));
 
 	obd->obd_recovery_expired = 1;
 	wake_up(&obd->obd_next_transno_waitq);
-	return HRTIMER_NORESTART;
 }
 
 void target_recovery_init(struct lu_target *lut, svc_handler_t handler)
@@ -2661,14 +2663,13 @@ void target_recovery_init(struct lu_target *lut, svc_handler_t handler)
 	CDEBUG(D_HA, "RECOVERY: service %s, %d recoverable clients, "
 	       "last_transno %llu\n", obd->obd_name,
 	       obd->obd_max_recoverable_clients, obd->obd_last_committed);
-	LASSERT(obd->obd_stopping == 0);
-	obd->obd_next_recovery_transno = obd->obd_last_committed + 1;
-	obd->obd_recovery_start = 0;
-	obd->obd_recovery_end = 0;
-
-	hrtimer_init(&obd->obd_recovery_timer, CLOCK_REALTIME,
-		     HRTIMER_MODE_REL);
-	obd->obd_recovery_timer.function = &target_recovery_expired;
+        LASSERT(obd->obd_stopping == 0);
+        obd->obd_next_recovery_transno = obd->obd_last_committed + 1;
+        obd->obd_recovery_start = 0;
+        obd->obd_recovery_end = 0;
+
+	cfs_timer_setup(&obd->obd_recovery_timer, target_recovery_expired,
+			(unsigned long)obd, 0);
 	target_start_recovery_thread(lut, handler);
 }
 EXPORT_SYMBOL(target_recovery_init);
-- 
1.8.3.1