From ba6398629c6aca2f5978b12283f554a3410092cd Mon Sep 17 00:00:00 2001
From: Elena Gryaznova <elena.gryaznova@hpe.com>
Date: Mon, 23 Dec 2024 13:51:33 +0300
Subject: [PATCH] LU-18594 tests: add vmstat and recovery status info

Patch adds:
   -- the ability to collect vmstat and recovery status
      info. Set VMSTAT_DELAY=value starts vmstat with delay=value,
      set RECOVERY_STATUS_DELAY=value runs:
          lctl get_param *.*.recovery_status
      every "value" seconds on victim server nodes and their pairs;

   -- the precmd and postcmd return code check and stops if the
      commands failed;

   -- minor cleanup:
      a little bit verbose ha_sleep()
      the turnable tmp directory.

Test-Parameters: trivial
Signed-off-by: Elena Gryaznova <elena.gryaznova@hpe.com>
HPE-bug-id: LUS-12232
Reviewed-by: Alexander Boyko <alexander.boyko@hpe.com>
Reviewed-by: Vladimir Saveliev <vladimir.saveliev@hpe.com>
Change-Id: I4087c73f58bf58b163f164e28b267a536569268a
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/57573
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Jian Yu <yujian@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
---
 lustre/tests/ha.sh | 177 +++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 165 insertions(+), 12 deletions(-)

diff --git a/lustre/tests/ha.sh b/lustre/tests/ha.sh
index 1471a0c..8b604f0 100755
--- a/lustre/tests/ha.sh
+++ b/lustre/tests/ha.sh
@@ -159,6 +159,29 @@ ha_touch()
 		true
 }
 
+ha_recovery_status ()
+{
+	local log
+	local -a nodes=(${ha_victims[*]} ${ha_victims_pair[*]})
+	local node
+
+	while [ ! -e "$ha_stop_file" ]; do
+		for ((i=0; i<${#nodes[@]}; i++)) {
+			node=${nodes[i]}
+			log=$ha_tmp_dir/${node}.recovery.status
+			local lock=${log}.lock
+			if [ ! -e $lock ]; then
+				ha_on $node \
+					"date; \
+					lctl get_param *.*.recovery_status" >>\
+					"$log" 2>&1 || true
+			fi
+		}
+		ha_sleep $ha_recovery_status_delay \
+			"recovery status each $ha_recovery_status_delay sec"
+	done
+}
+
 ha_log()
 {
 	local nodes=${1// /,}
@@ -166,6 +189,48 @@ ha_log()
 	ha_on $nodes "lctl mark $*"
 }
 
+declare -A ha_node_vmstat_pids
+
+ha_start_vmstat_node()
+{
+	local node=$1
+	local delay=$2
+	local log=$ha_vmstat_dir/${node}.vmstat
+
+	rm -f $ha_tmp_dir/${node}.vmstat.lock
+
+	local pid=$(ha_on $node "mkdir -p $ha_vmstat_dir; vmstat -t $delay >> \
+		$log 2>/dev/null </dev/null & echo \$!" | awk '{print $2}')
+	echo "VMSTAT started on $node PID: $pid, log: ${node}:$log"
+
+	ha_on $node ps aux | grep vmstat
+	ha_node_vmstat_pids[$node]=$pid
+}
+
+ha_start_vmstat()
+{
+	local -a nodes=(${ha_victims[*]} ${ha_victims_pair[*]})
+	for ((i=0; i<${#nodes[@]}; i++)) {
+		ha_start_vmstat_node ${nodes[i]} $ha_vmstat_delay
+	}
+}
+
+ha_stop_vmstat()
+{
+	local -a nodes=(${ha_victims[*]} ${ha_victims_pair[*]})
+
+	for ((i=0; i<${#nodes[@]}; i++)) {
+		node=${nodes[i]}
+		ha_info "Stopping vmstat on $node ... "
+		ha_on $node "ps aux | grep vmstat" || continue
+		local pid=${ha_node_vmstat_pids[$node]}
+		ha_on $node "kill -s TERM $pid; \
+				tail --pid=$pid -f /dev/null" || true
+		ha_info "Check is vmstat still running on $node ..."
+		ha_on $node "ps aux | grep vmstat" || true
+	}
+}
+
 ha_error()
 {
     ha_info "$@" >&2
@@ -186,8 +251,23 @@ ha_trap_err()
 trap ha_trap_err ERR
 set -eE
 
+declare TMP=${TMP:-/tmp}
+
+# Set equal to value if want to gather recovery_status info
+# each "value" secs.
+# 0 means "do not collect vmstat and recovery status info"
+declare     ha_recovery_status_delay=${RECOVERY_STATUS_DELAY:-0}
+declare     ha_recovery_status_pid
+
+declare     ha_vmstat_delay=${VMSTAT_DELAY:-0}
 declare     ha_power_down_pids
-declare     ha_tmp_dir=/tmp/$(basename $0)-$$
+declare     ha_test_subdir=$(basename $0)-$$
+declare     ha_tmp_dir=$TMP/$ha_test_subdir
+
+# Useless to store vmstat results in /tmp because
+# of no guarantee that files not disapeared when node crashed
+declare     ha_vmstat_dir=${VMSTATDIR:-$TMP}/$ha_test_subdir
+
 declare     ha_stop_file=$ha_tmp_dir/stop
 declare     ha_fail_file=$ha_tmp_dir/fail
 declare     ha_pm_states=$ha_tmp_dir/ha_pm_states
@@ -391,6 +471,14 @@ ha_trap_exit()
 {
 	ha_touch stop
 	trap 0
+	if (( ha_vmstat_delay != 0 )); then
+		ha_stop_vmstat
+	fi
+	if (( ha_recovery_status_delay != 0 )); then
+		# the process $ha_recovery_status_pid
+		# could be completed by ha_stop_loads()->wait
+		wait $ha_recovery_status_pid || true
+	fi
 	if [ -e "$ha_fail_file" ]; then
 		ha_info "Test directories ${ha_testdirs[@]} not removed"
 		ha_info "Temporary directory $ha_tmp_dir not removed"
@@ -410,9 +498,13 @@ ha_trap_stop_signals()
 
 ha_sleep()
 {
-    local n=$1
+	local n=$1
+	local reason=$2
+
+	[[ -n $reason ]] &&
+		reason=", Reason: $reason"
 
-    ha_info "Sleeping for ${n}s"
+    ha_info "Sleeping for ${n}s$reason"
     #
     # sleep(1) could interrupted.
     #
@@ -447,7 +539,7 @@ ha_unlock()
 ha_dump_logs()
 {
 	local nodes=${1// /,}
-	local file=/tmp/$(basename $0)-$$-$(date +%s).dk
+	local file=${ha_tmp_dir}-$(date +%s).dk
 	local lock=$ha_tmp_dir/lock-dump-logs
 	local rc=0
 
@@ -483,6 +575,7 @@ ha_repeat_mpi_load()
 	local log=$ha_tmp_dir/$client-$tag
 	local rc=0
 	local rccheck=0
+	local rcprepostcmd=0
 	local nr_loops=0
 	local avg_loop_time=0
 	local start_time=$(date +%s)
@@ -512,9 +605,18 @@ ha_repeat_mpi_load()
 		else
 			dir_stripe_count=$ha_dir_stripe_count
 		fi
-		[[ -n "$ha_precmd" ]] && ha_info "$ha_precmd" &&
+		if [[ -n "$ha_precmd" ]]; then
+			ha_info "$ha_precmd"
 			ha_on $client "$ha_precmd" >>"$log" 2>&1
-		ha_info "$client Creates $dir with -i$mdt_index -c$dir_stripe_count "
+			rcprepostcmd=$?
+			if (( rcprepostcmd != 0 )); then
+				ha_touch stop,fail $client-$tag
+				ha_dump_logs "${ha_clients[*]} ${ha_servers[*]}"
+				continue
+			fi
+		fi
+
+		ha_info "$client Creates $dir with -i$mdt_index -c$dir_stripe_count ; stripeparams: $stripeparams "
 		ha_on $client $LFS mkdir -i$mdt_index -c$dir_stripe_count "$dir" &&
 		ha_on $client $LFS getdirstripe "$dir" &&
 		ha_on $client $LFS setstripe $stripeparams $dir &&
@@ -526,8 +628,15 @@ ha_repeat_mpi_load()
 		ha_on ${ha_clients[0]} "$check_attrs &&                    \
 			$LFS df $dir &&                                    \
 			$check_attrs " && rccheck=1
-		[[ -n "$ha_postcmd" ]] && ha_info "$ha_postcmd" &&
+		if [[ -n "$ha_postcmd" ]]; then
+			ha_info "$ha_postcmd"
 			ha_on $client "$ha_postcmd" >>"$log" 2>&1
+			rcprepostcmd=$?
+			if (( rcprepostcmd != 0 )); then
+				ha_touch stop,fail $client-$tag
+				ha_dump_logs "${ha_clients[*]} ${ha_servers[*]}"
+			fi
+		fi
 		if (( ((rc == 0)) && ((rccheck == 0)) && \
 			(( mustpass != 0 )) )) ||
 			(( ((rc != 0)) && ((rccheck == 0)) && \
@@ -821,7 +930,7 @@ ha_start_lfsck()
 		if [ -e $ha_lfsck_lock ]; then
 			rc=0
 			ha_wait_unlock $ha_lfsck_lock
-			ha_sleep 120
+			ha_sleep 120 "before lfsck restarting"
 			ha_on $ha_lfsck_node "lctl lfsck_start $params" || rc=1
 		fi
 	fi
@@ -948,6 +1057,22 @@ ha_power_down_cmd_fn()
 	# format is: POWER_DOWN=sysrqcrash
 	sysrqcrash)
 		cmd="pdsh -S -w $nodes -u 120 \"echo c > /proc/sysrq-trigger\" &"
+		# stop grab recovery status on crashed nodes
+		if (( ha_recovery_status_delay != 0 )); then
+			for n in ${nodes//,/ }; do
+				touch $ha_tmp_dir/${n}.recovery.status.lock
+				echo $(date) \
+					"recovery status collection is paused: \
+					$n is going to power down" >> \
+					$ha_tmp_dir/${n}.recovery.status
+			done
+		fi
+		# restart vmstat after node back
+		if (( ha_vmstat_delay != 0 )); then
+			for n in ${nodes//,/ }; do
+				touch $ha_tmp_dir/${n}.vmstat.lock
+			done
+		fi
 		eval $cmd
 		pid=$!
 		ha_power_down_pids=$(echo $ha_power_down_pids $pid)
@@ -989,7 +1114,7 @@ ha_power_down()
 	for (( i=0; i<10; i++ )) {
 		ha_info "attempt: $i"
 		ha_power_down_cmd_fn $nodes || rc=1
-		ha_sleep $ha_power_delay
+		ha_sleep $ha_power_delay "delay node status check after powerdown ..."
 		ha_powermanage $nodes $state && rc=0 && break
 	}
 	if [[ -n "$ha_power_down_pids" ]]; then
@@ -1025,7 +1150,7 @@ ha_power_up_delay()
 	local rc
 
 	if [[ ${#ha_victims_pair[@]} -eq 0 ]]; then
-		ha_sleep $ha_node_up_delay
+		ha_sleep $ha_node_up_delay "before node power up"
 		return 0
 	fi
 
@@ -1177,7 +1302,7 @@ ha_failback()
 			$ha_failback_delay sec, attempt: $i ($attempts); \
 			cmd: $ha_failback_cmd $nodes"
 
-		ha_sleep $ha_failback_delay
+		ha_sleep $ha_failback_delay "delay before failback"
 		[ "$ha_failback_cmd" ] ||
 		{
 			ha_info "No failback command set, skiping"
@@ -1186,6 +1311,21 @@ ha_failback()
 		if $ha_failback_cmd $nodes ; then
 			rc=0
 			ha_info "Failback succesfully started: attempt: $i"
+			for n in ${nodes//,/ }; do
+				if (( ha_recovery_status_delay != 0 )); then
+					local lock=$ha_tmp_dir/${n}.recovery.status.lock
+					ls -al $lock
+					rm -f $lock
+					echo $(date) \
+						"recovery status collection is \
+						resumed" >> \
+						$ha_tmp_dir/${n}.recovery.status
+				fi
+				lock=$ha_tmp_dir/${n}.vmstat.lock
+				if (( ha_vmstat_delay != 0 )) && [[ -e $lock ]]; then
+					ha_start_vmstat_node $n $ha_vmstat_delay
+				fi
+			done
 			break
 		fi
 	done
@@ -1216,7 +1356,8 @@ ha_killer()
 		ha_info "Failing $nodes"
 		$ha_workloads_only && ha_info "    is skipped: workload only..."
 
-		ha_sleep $(ha_rand $ha_max_failover_period)
+		ha_sleep $(ha_rand $ha_max_failover_period) \
+			"random of max failover set ($ha_max_failover_period)"
 		$ha_workloads_only || ha_power_down $nodes
 		ha_sleep 10
 		ha_wait_loads || return
@@ -1278,6 +1419,18 @@ ha_main()
 			$LFS setstripe $ha_stripe_params $test_dir"
 	done
 
+	if (( ha_recovery_status_delay != 0 )); then
+		ha_info "Dumping recovery status info \
+			each $ha_recovery_status_delay sec"
+		ha_recovery_status &
+		ha_recovery_status_pid=$!
+	fi
+
+	if (( ha_vmstat_delay != 0 )); then
+		ha_info "Starting vmstat with delay $ha_vmstat_delay"
+		ha_start_vmstat
+	fi
+
 	ha_start_loads
 	ha_wait_loads
 
-- 
1.8.3.1