From 54fc56da54aa727ea8218ff1cba71d909189037a Mon Sep 17 00:00:00 2001 From: Elena Gryaznova Date: Wed, 10 Oct 2018 16:29:00 +0300 Subject: [PATCH] LU-11489 tests: retry power off/on several times Sometimes we have a BMC hangup for ~1 minute. Patch improves ha.sh to try power off/on several times with delay $ha_power_delay default 60 sec. Test-Parameters:trivial Signed-off-by: Elena Gryaznova Cray-bug-id: LU-6486 Reviewed-by: Andrew Perepechko Reviewed-by: Alexander Boyko Change-Id: Id192e2a39a229979484b0d209fa8fdc5fafbbfc6 Reviewed-on: https://review.whamcloud.com/33332 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Alexandr Boyko Reviewed-by: Oleg Drokin --- lustre/tests/ha.sh | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/lustre/tests/ha.sh b/lustre/tests/ha.sh index bd47b7f..6f7cb19 100755 --- a/lustre/tests/ha.sh +++ b/lustre/tests/ha.sh @@ -176,6 +176,7 @@ declare -a ha_status_files declare ha_machine_file=$ha_tmp_dir/machine_file declare ha_power_down_cmd=${POWER_DOWN:-"pm -0"} declare ha_power_up_cmd=${POWER_UP:-"pm -1"} +declare ha_power_delay=${POWER_DELAY:-60} declare ha_failback_delay=${DELAY:-5} declare ha_failback_cmd=${FAILBACK:-""} declare ha_stripe_params=${STRIPEPARAMS:-"-c 0"} @@ -570,17 +571,31 @@ ha_wait_loads() ha_power_down() { local nodes=$1 + local rc=1 + local i ha_info "Powering down $nodes" - $ha_power_down_cmd $nodes + for i in $(seq 1 5); do + $ha_power_down_cmd $nodes && rc=0 && break + sleep $ha_power_delay + done + + [ $rc -eq 0 ] || ha_info "Failed Powering down in $i attempts" } ha_power_up() { local nodes=$1 + local rc=1 + local i ha_info "Powering up $nodes" - $ha_power_up_cmd $nodes + for i in $(seq 1 5); do + $ha_power_up_cmd $nodes && rc=0 && break + sleep $ha_power_delay + done + + [ $rc -eq 0 ] || ha_info "Failed Powering up in $i attempts" } # -- 1.8.3.1