Sometimes we have a BMC hangup for ~1 minute.
Patch improves ha.sh to try power off/on several
times with delay $ha_power_delay default 60 sec.
Test-Parameters:trivial
Signed-off-by: Elena Gryaznova <c17455@cray.com>
Cray-bug-id: LU-6486
Reviewed-by: Andrew Perepechko <c17827@cray.com>
Reviewed-by: Alexander Boyko <c17825@cray.com>
Change-Id: Id192e2a39a229979484b0d209fa8fdc5fafbbfc6
Reviewed-on: https://review.whamcloud.com/33332
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Alexandr Boyko <c17825@cray.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
declare ha_machine_file=$ha_tmp_dir/machine_file
declare ha_power_down_cmd=${POWER_DOWN:-"pm -0"}
declare ha_power_up_cmd=${POWER_UP:-"pm -1"}
declare ha_machine_file=$ha_tmp_dir/machine_file
declare ha_power_down_cmd=${POWER_DOWN:-"pm -0"}
declare ha_power_up_cmd=${POWER_UP:-"pm -1"}
+declare ha_power_delay=${POWER_DELAY:-60}
declare ha_failback_delay=${DELAY:-5}
declare ha_failback_cmd=${FAILBACK:-""}
declare ha_stripe_params=${STRIPEPARAMS:-"-c 0"}
declare ha_failback_delay=${DELAY:-5}
declare ha_failback_cmd=${FAILBACK:-""}
declare ha_stripe_params=${STRIPEPARAMS:-"-c 0"}
ha_power_down()
{
local nodes=$1
ha_power_down()
{
local nodes=$1
ha_info "Powering down $nodes"
ha_info "Powering down $nodes"
- $ha_power_down_cmd $nodes
+ for i in $(seq 1 5); do
+ $ha_power_down_cmd $nodes && rc=0 && break
+ sleep $ha_power_delay
+ done
+
+ [ $rc -eq 0 ] || ha_info "Failed Powering down in $i attempts"
}
ha_power_up()
{
local nodes=$1
}
ha_power_up()
{
local nodes=$1
ha_info "Powering up $nodes"
ha_info "Powering up $nodes"
- $ha_power_up_cmd $nodes
+ for i in $(seq 1 5); do
+ $ha_power_up_cmd $nodes && rc=0 && break
+ sleep $ha_power_delay
+ done
+
+ [ $rc -eq 0 ] || ha_info "Failed Powering up in $i attempts"