From: Di Wang <di.wang@intel.com>
Date: Thu, 8 Oct 2015 23:58:35 +0000 (-0700)
Subject: LU-7285 update: update next transno only if recovery succeeds
X-Git-Tag: 2.7.62~1
X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=e55fb7193651d205ad94761d6b3ed9b12e910daa;ds=sidebyside

LU-7285 update: update next transno only if recovery succeeds

Update obd_next_recovery_transno only if update recovery
succeeds, otherwise if client send replay request with the
same transno, it will cause panic in check_for_next_transno()

LustreError: 4529:0:(ldlm_lib.c:1826:check_for_next_transno())
ASSERTION( req_transno >= next_transno ) failed: req_transno:
1404455952555, next_transno: 1404455952556

LustreError: 4529:0:(ldlm_lib.c:1826:check_for_next_transno()) LBUG
Call Trace:
[<ffffffffa074c875>] libcfs_debug_dumpstack+0x55/0x80 [libcfs]
[<ffffffffa074ce77>] lbug_with_loc+0x47/0xb0 [libcfs]
[<ffffffffa0a8640c>] check_for_next_transno+0x68c/0x6d0 [ptlrpc]
[<ffffffffa089a6ed>] ? keys_fini+0x16d/0x240 [obdclass]
[<ffffffffa0a85d80>] ? check_for_next_transno+0x0/0x6d0 [ptlrpc]
[<ffffffffa0a82883>] target_recovery_overseer+0x93/0x320 [ptlrpc]
[<ffffffffa0a81000>] ? exp_req_replay_healthy+0x0/0x30 [ptlrpc]
[<ffffffffa0a89510>] target_recovery_thread+0x6d0/0x2380 [ptlrpc]
[<ffffffffa0a88e40>] ? target_recovery_thread+0x0/0x2380 [ptlrpc]
[<ffffffff8109e78e>] kthread+0x9e/0xc0

Add replay-single.sh 71a to verify double MDTs failover.

Signed-off-by: Di Wang <di.wang@intel.com>
Change-Id: Id74768a851985a1cec53e6bce28a0bf00b3fc1c7
Reviewed-on: http://review.whamcloud.com/16799
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
---

diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c
index ae8c386..cb67f82 100644
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -2271,6 +2271,7 @@ static void replay_request_or_update(struct lu_env *env,
 			obd->obd_replayed_requests++;
 		} else if (type == UPDATE_RECOVERY && transno != 0) {
 			struct distribute_txn_replay_req *dtrq;
+			bool update_transno = false;
 
 			spin_unlock(&obd->obd_recovery_task_lock);
 
@@ -2289,18 +2290,22 @@ static void replay_request_or_update(struct lu_env *env,
 				       dtrq->dtrq_master_transno);
 				list_add(&dtrq->dtrq_list,
 					 &tdtd->tdtd_replay_finish_list);
+				update_transno = true;
 			} else {
 				dtrq_destroy(dtrq);
 			}
 			spin_unlock(&tdtd->tdtd_replay_list_lock);
 
-			spin_lock(&obd->obd_recovery_task_lock);
-			if (transno == obd->obd_next_recovery_transno)
-				obd->obd_next_recovery_transno++;
-			else if (transno > obd->obd_next_recovery_transno)
-				obd->obd_next_recovery_transno = transno + 1;
-			spin_unlock(&obd->obd_recovery_task_lock);
-
+			if (update_transno) {
+				spin_lock(&obd->obd_recovery_task_lock);
+				if (transno == obd->obd_next_recovery_transno)
+					obd->obd_next_recovery_transno++;
+				else if (transno >
+					 obd->obd_next_recovery_transno)
+					obd->obd_next_recovery_transno =
+								transno + 1;
+				spin_unlock(&obd->obd_recovery_task_lock);
+			}
 		} else {
 			spin_unlock(&obd->obd_recovery_task_lock);
 			LASSERT(list_empty(&obd->obd_req_replay_queue));
diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh
index e4de09b..7855b67 100755
--- a/lustre/tests/replay-single.sh
+++ b/lustre/tests/replay-single.sh
@@ -2305,6 +2305,77 @@ test_70e () {
 }
 run_test 70e "rename cross-MDT with random fails"
 
+cleanup_71a() {
+	trap 0
+	kill -9 $mkdir_71a_pid
+}
+
+random_double_fail_mdt() {
+	local max_index=$1
+	local duration=$2
+	local monitor_pid=$3
+	local elapsed
+	local start_ts=$(date +%s)
+	local num_failovers=0
+	local fail_index
+	local second_index
+
+	elapsed=$(($(date +%s) - start_ts))
+	while [ $elapsed -lt $duration ]; do
+		fail_index=$((RANDOM%max_index + 1))
+		if [ $fail_index -eq $max_index ]; then
+			second_index=1
+		else
+			second_index=$((fail_index + 1))
+		fi
+		kill -0 $monitor_pid ||
+			error "$monitor_pid stopped"
+		sleep 120
+		replay_barrier mds$fail_index
+		replay_barrier mds$second_index
+		sleep 10
+		# Increment the number of failovers
+		num_failovers=$((num_failovers+1))
+		log "fail mds$fail_index mds$second_index $num_failovers times"
+		fail mds${fail_index},mds${second_index}
+		elapsed=$(($(date +%s) - start_ts))
+	done
+}
+
+test_71a () {
+	[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+	local clients=${CLIENTS:-$HOSTNAME}
+	local rc=0
+
+	zconf_mount_clients $clients $MOUNT
+
+	local duration=300
+	[ "$SLOW" = "no" ] && duration=180
+	# set duration to 900 because it takes some time to boot node
+	[ "$FAILURE_MODE" = HARD ] && duration=900
+
+	mkdir -p $DIR/$tdir
+
+	local elapsed
+	local start_ts=$(date +%s)
+
+	trap cleanup_71a EXIT
+	(
+		while true; do
+			$LFS mkdir -i0 -c2 $DIR/$tdir/test
+			rmdir $DIR/$tdir/test
+		done
+	)&
+	mkdir_71a_pid=$!
+	echo "Started  $mkdir_71a_pid"
+
+	random_double_fail_mdt 2 $duration $mkdir_71a_pid
+	kill -0 $mkdir_71a_pid || error "mkdir/rmdir $mkdir_71a_pid stopped"
+
+	cleanup_71a
+	true
+}
+run_test 71a "mkdir/rmdir striped dir with 2 mdts recovery"
 
 test_73a() {
 	multiop_bg_pause $DIR/$tfile O_tSc ||