From bc871f8ff53068bfe69ad7653479b42e6a6d2d93 Mon Sep 17 00:00:00 2001
From: Alexander Boyko <c17825@cray.com>
Date: Thu, 7 Nov 2019 06:13:50 -0500
Subject: [PATCH] LU-12949 obdclass: don't extend timer if obd stops

During umount all clients became stale, so the first check at
check_for_recovery_ready() is passed, but there is no guarantee
that recovery timer was started. So, we need to check obd_stopping.

The test 138 is added to recovery-smal.sh.
It reproduces the issue when MDT is waiting for clients during
recovery and MDT umount happens.
extend_recovery_timer()) ASSERTION( obd->obd_recovery_start != 0 )
failed

Cray-bug-id: LUS-7917
Signed-off-by: Alexander Boyko <c17825@cray.com>
Change-Id: I1906fdfcc10606912a1f81560bb60b9d424db149
Reviewed-on: https://review.whamcloud.com/36703
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Andriy Skulysh <c17819@cray.com>
Reviewed-by: Sergey Cheremencev <c17829@cray.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
---
 lustre/include/obd_support.h   |  1 +
 lustre/ldlm/ldlm_lib.c         |  9 +++++++--
 lustre/lod/lod_dev.c           | 10 +++++++++-
 lustre/tests/recovery-small.sh | 26 ++++++++++++++++++++++++++
 4 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h
index 347be7a..6e11bd8 100644
--- a/lustre/include/obd_support.h
+++ b/lustre/include/obd_support.h
@@ -489,6 +489,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_TGT_RCVD_EIO		 0x720
 #define OBD_FAIL_TGT_RECOVERY_REQ_RACE	 0x721
 #define OBD_FAIL_TGT_REPLY_DATA_RACE	 0x722
+#define OBD_FAIL_TGT_RECOVERY_CONNECT    0x724
 
 #define OBD_FAIL_MDC_REVALIDATE_PAUSE    0x800
 #define OBD_FAIL_MDC_ENQUEUE_PAUSE       0x801
diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c
index 3ffe771..e94d72a 100644
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -1244,6 +1244,10 @@ int target_handle_connect(struct ptlrpc_request *req)
 		rc = -EALREADY;
 		class_export_put(export);
 		export = NULL;
+	} else if (OBD_FAIL_PRECHECK(OBD_FAIL_TGT_RECOVERY_CONNECT) &&
+		   !lw_client) {
+		spin_unlock(&export->exp_lock);
+		rc = -EAGAIN;
 	} else {
 		export->exp_connecting = 1;
 		spin_unlock(&export->exp_lock);
@@ -1830,7 +1834,8 @@ static void extend_recovery_timer(struct obd_device *obd, time_t dr_timeout,
 	time_t left;
 
 	spin_lock(&obd->obd_dev_lock);
-	if (!obd->obd_recovering || obd->obd_abort_recovery) {
+	if (!obd->obd_recovering || obd->obd_abort_recovery ||
+	    obd->obd_stopping) {
 		spin_unlock(&obd->obd_dev_lock);
 		return;
 	}
@@ -2328,7 +2333,7 @@ static int check_for_recovery_ready(struct lu_target *lut)
 
 	if (lut->lut_tdtd != NULL) {
 		if (!lut->lut_tdtd->tdtd_replay_ready &&
-		    !obd->obd_abort_recovery) {
+		    !obd->obd_abort_recovery && !obd->obd_stopping) {
 			/*
 			 * Let's extend recovery timer, in case the recovery
 			 * timer expired, and some clients got evicted
diff --git a/lustre/lod/lod_dev.c b/lustre/lod/lod_dev.c
index 927c764..5b2b38f 100644
--- a/lustre/lod/lod_dev.c
+++ b/lustre/lod/lod_dev.c
@@ -386,7 +386,14 @@ static int lod_sub_recovery_thread(void *arg)
 	start = ktime_get_real_seconds();
 
 again:
-	rc = lod_sub_prep_llog(&env, lod, dt, lrd->lrd_idx);
+
+	if (unlikely(OBD_FAIL_PRECHECK(OBD_FAIL_TGT_RECOVERY_CONNECT)) &&
+	    lrd->lrd_ltd) {
+		OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_RECOVERY_CONNECT, cfs_fail_val);
+		rc = -EIO;
+	} else {
+		rc = lod_sub_prep_llog(&env, lod, dt, lrd->lrd_idx);
+	}
 	if (!rc && !lod->lod_child->dd_rdonly) {
 		/* Process the recovery record */
 		ctxt = llog_get_context(dt->dd_lu_dev.ld_obd,
@@ -1039,6 +1046,7 @@ static int lod_process_config(const struct lu_env *env,
 	case LCFG_PRE_CLEANUP: {
 		lod_sub_process_config(env, lod, &lod->lod_mdt_descs, lcfg);
 		lod_sub_process_config(env, lod, &lod->lod_ost_descs, lcfg);
+		OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_RECOVERY_CONNECT, cfs_fail_val * 2);
 		next = &lod->lod_child->dd_lu_dev;
 		rc = next->ld_ops->ldo_process_config(env, next, lcfg);
 		if (rc != 0)
diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh
index 48de1b9..389d530 100755
--- a/lustre/tests/recovery-small.sh
+++ b/lustre/tests/recovery-small.sh
@@ -2878,6 +2878,32 @@ test_137() {
 }
 run_test 137 "late resend must be skipped if already applied"
 
+test_138() {
+	remote_mds_nodsh && skip "remote MDS with nodsh"
+	[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+	[[ "$MDS1_VERSION" -ge $(version_code 2.12.59) ]] ||
+		skip "Need server version newer than 2.12.59"
+
+	zconf_umount_clients $CLIENTS $MOUNT
+
+#define OBD_FAIL_TGT_RECOVERY_CONNECT 0x724
+	#delay a first step of recovey when MDS waiting clients
+	#and failing to get osp logs
+	do_facet $SINGLEMDS $LCTL set_param fail_loc=0x724 fail_val=5
+
+	facet_failover $SINGLEMDS
+
+	#waiting failover and recovery timer
+	#the valuse is based on target_recovery_overseer() wait_event timeout
+	sleep 55
+	stop $SINGLEMDS || error "stop MDS failed"
+	do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+	start $SINGLEMDS $(mdsdevname ${SINGLEMDS//mds/}) ||
+		error "start MDS failed"
+	zconf_mount_clients $CLIENTS $MOUNT
+}
+run_test 138 "Umount MDT during recovery"
+
 complete $SECONDS
 check_and_cleanup_lustre
 exit_status
-- 
1.8.3.1