check_and_setup_lustre
rm -rf $DIR/[df][0-9]*
+max_recov_time=$(max_recovery_time)
+
# the test node needs to be insulated from a lustre failure as much as possible,
# so not even loading the lustre modules is ideal.
# -- umount lustre
exit 3
fi
-START_TS=$(date +%s)
-CURRENT_TS=$START_TS
-
MINSLEEP=${MINSLEEP:-120}
REQFAIL_PERCENT=${REQFAIL_PERCENT:-3} # bug17839 comment 62
REQFAIL=${REQFAIL:-$(( DURATION / SERVER_FAILOVER_PERIOD * REQFAIL_PERCENT / 100))}
reqfail=0
sleep=0
+
+START_TS=$(date +%s)
+CURRENT_TS=$START_TS
+
while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do
# In order to perform the
log "WARNING: failover and two check_client_loads time exceeded SERVER_FAILOVER_PERIOD - MINSLEEP !
Failed to load the filesystem with I/O for a minimum period of $MINSLEEP $reqfail times ( REQFAIL=$REQFAIL ).
This iteration, the load was only applied for sleep=$sleep seconds.
+Estimated max recovery time : $max_recov_time
Probably the hardware is taking excessively long to boot.
Try to increase SERVER_FAILOVER_PERIOD (current is $SERVER_FAILOVER_PERIOD), bug 20918"
[ $reqfail -gt $REQFAIL ] && exit 6
return $rc
}
+# target_start_and_reset_recovery_timer()
+# service_time = at_est2timeout(service_time);
+# service_time += 2 * (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC +
+# INITIAL_CONNECT_TIMEOUT);
+# CONNECTION_SWITCH_MAX : min(25U, max(CONNECTION_SWITCH_MIN,obd_timeout))
+#define CONNECTION_SWITCH_INC 1
+#define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN,obd_timeout/20)
+#define CONNECTION_SWITCH_MIN 5U
+
+max_recovery_time () {
+ local init_connect_timeout=$(( TIMEOUT / 20 ))
+ [[ $init_connect_timeout > 5 ]] || init_connect_timeout=5
+
+ local service_time=$(( $(at_max_get client) + $(( 2 * $(( 25 + 1 + init_connect_timeout)) )) ))
+
+ echo $service_time
+}
+
get_clients_mount_count () {
local clients=${CLIENTS:-`hostname`}