X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftests%2Frecovery-random-scale.sh;h=6cedb2bdae7c5cee929fd88aa1a3649dd241c1d4;hb=2bcb82cd77f5cb616ede36a3fabf351222f069b7;hp=2458aff826ef71afd330930ad46cf06f4306c208;hpb=d753c060099e12655344fc461e4b6f7142b3b5a1;p=fs%2Flustre-release.git diff --git a/lustre/tests/recovery-random-scale.sh b/lustre/tests/recovery-random-scale.sh index 2458aff..6cedb2b 100644 --- a/lustre/tests/recovery-random-scale.sh +++ b/lustre/tests/recovery-random-scale.sh @@ -18,6 +18,7 @@ CLEANUP=${CLEANUP:-""} init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} +init_logging TESTSUITELOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh)} DEBUGLOG=$TESTSUITELOG.debug @@ -189,12 +190,8 @@ fi start_client_loads $NODES_TO_USE echo clients load pids: -if ! do_nodes $NODES_TO_USE "set -x; echo \$(hostname): && cat $LOAD_PID_FILE"; then - if [ -e $DEBUGLOG ]; then - exec 2<&- - cat $DEBUGLOG +if ! do_nodesv $NODES_TO_USE "cat $LOAD_PID_FILE"; then exit 3 - fi fi START_TS=$(date +%s) @@ -291,9 +288,11 @@ while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do # the value ( SERVER_FAILOVER_PERIOD - MINSLEEP ) if [ $sleep -lt $MINSLEEP ]; then reqfail=$((reqfail +1)) - log "WARNING: failover, client reintegration and check_client_loads time -exceeded SERVER_FAILOVER_PERIOD - MINSLEEP ! -Failed to meet interval $reqfail times ( REQFAIL=$REQFAIL ); have sleep=$sleep" + log "WARNING: failover, client reintegration and check_client_loads time exceeded SERVER_FAILOVER_PERIOD - MINSLEEP ! +Failed to load the filesystem with I/O for a minimum period of $MINSLEEP $reqfail times ( REQFAIL=$REQFAIL ). +This iteration, the load was only applied for sleep=$sleep seconds. +Probably the hardware is taking excessively long to boot. +Try to increase SERVER_FAILOVER_PERIOD (current is $SERVER_FAILOVER_PERIOD), bug 20918" [ $reqfail -gt $REQFAIL ] && exit 6 fi