Whamcloud - gitweb
LU-11489 tests: retry power off/on several times 32/33332/3
authorElena Gryaznova <c17455@cray.com>
Wed, 10 Oct 2018 13:29:00 +0000 (16:29 +0300)
committerOleg Drokin <green@whamcloud.com>
Tue, 13 Nov 2018 06:18:13 +0000 (06:18 +0000)
Sometimes we have a BMC hangup for ~1 minute.
Patch improves ha.sh to try power off/on several
times with delay $ha_power_delay default 60 sec.

Test-Parameters:trivial
Signed-off-by: Elena Gryaznova <c17455@cray.com>
Cray-bug-id: LU-6486
Reviewed-by: Andrew Perepechko <c17827@cray.com>
Reviewed-by: Alexander Boyko <c17825@cray.com>
Change-Id: Id192e2a39a229979484b0d209fa8fdc5fafbbfc6
Reviewed-on: https://review.whamcloud.com/33332
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Alexandr Boyko <c17825@cray.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/tests/ha.sh

index bd47b7f..6f7cb19 100755 (executable)
@@ -176,6 +176,7 @@ declare -a  ha_status_files
 declare     ha_machine_file=$ha_tmp_dir/machine_file
 declare     ha_power_down_cmd=${POWER_DOWN:-"pm -0"}
 declare     ha_power_up_cmd=${POWER_UP:-"pm -1"}
 declare     ha_machine_file=$ha_tmp_dir/machine_file
 declare     ha_power_down_cmd=${POWER_DOWN:-"pm -0"}
 declare     ha_power_up_cmd=${POWER_UP:-"pm -1"}
+declare     ha_power_delay=${POWER_DELAY:-60}
 declare     ha_failback_delay=${DELAY:-5}
 declare     ha_failback_cmd=${FAILBACK:-""}
 declare     ha_stripe_params=${STRIPEPARAMS:-"-c 0"}
 declare     ha_failback_delay=${DELAY:-5}
 declare     ha_failback_cmd=${FAILBACK:-""}
 declare     ha_stripe_params=${STRIPEPARAMS:-"-c 0"}
@@ -570,17 +571,31 @@ ha_wait_loads()
 ha_power_down()
 {
        local nodes=$1
 ha_power_down()
 {
        local nodes=$1
+       local rc=1
+       local i
 
        ha_info "Powering down $nodes"
 
        ha_info "Powering down $nodes"
-       $ha_power_down_cmd $nodes
+       for i in $(seq 1 5); do
+               $ha_power_down_cmd $nodes && rc=0 && break
+               sleep $ha_power_delay
+       done
+
+       [ $rc -eq 0 ] || ha_info "Failed Powering down in $i attempts"
 }
 
 ha_power_up()
 {
        local nodes=$1
 }
 
 ha_power_up()
 {
        local nodes=$1
+       local rc=1
+       local i
 
        ha_info "Powering up $nodes"
 
        ha_info "Powering up $nodes"
-       $ha_power_up_cmd $nodes
+       for i in $(seq 1 5); do
+               $ha_power_up_cmd $nodes && rc=0 && break
+               sleep $ha_power_delay
+       done
+
+       [ $rc -eq 0 ] || ha_info "Failed Powering up in $i attempts"
 }
 
 #
 }
 
 #