X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftests%2Frunracer;h=083a56e5c44b0d0912a536c98e58d6c78b9042e0;hb=64401149ef68b9291ae84acefe1681441d4bdb83;hp=feca24b57855949b4d1c5c97c8977117a1546d32;hpb=d23880c787ee66eca0eb936f584bca94c846a47e;p=fs%2Flustre-release.git diff --git a/lustre/tests/runracer b/lustre/tests/runracer index feca24b..083a56e 100644 --- a/lustre/tests/runracer +++ b/lustre/tests/runracer @@ -2,16 +2,18 @@ #set -vx set -e +ONLY=${ONLY:-"$*"} LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} . $LUSTRE/tests/test-framework.sh init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} +init_logging -racer=`which racer.sh` +racer=`which racer.sh 2> /dev/null` +echo racer: $racer [ -z "$racer" ] && echo racer is not installed && exit 1 CLIENTS=${CLIENTS:-$HOSTNAME} -RACERDIRS=$@ RACERDIRS=${RACERDIRS:-$DIR} echo RACERDIRS=$RACERDIRS for d in ${RACERDIRS}; do @@ -21,6 +23,7 @@ for d in ${RACERDIRS}; do done DURATION=${DURATION:-120} +PIDFILE=$TMP/racer.$$ assert_env CLIENTS @@ -37,6 +40,7 @@ do_racer_cleanup () { local INTERVAL=5 local pids local rc=0 + local TMAX local RDIR=$1 @@ -45,11 +49,19 @@ do_racer_cleanup () { # Check if all processes are killed local clients=$CLIENTS + local num_clients=$(get_node_count ${clients//,/ }) + if at_is_enabled; then + TMAX=$(at_max_get mds) + else + TMAX=$(lctl get_param -n timeout) + fi + + [ $TMAX -gt $((num_clients * 60)) ] || TMAX=$((num_clients * 60)) # 1.Let chance to racer to kill all it's processes # FIXME: not sure how long does it take for racer to kill all processes # 80 is sometimes are enough for 2 clients; sometimes it takes more than 150 sec - while [ $WAIT -lt 90 ]; do + while [ $WAIT -lt $TMAX ]; do running=$(do_nodes $clients "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|pdsh|bash)" || true) [ -z "$running" ] && rc=0 && break echo "clients $clients are still running the racer processes. Waited $WAIT secs" @@ -68,8 +80,8 @@ do_racer_cleanup () { echo "client $C still running racer processes after $WAIT seconds. Killing $pids" do_node $C "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|PATH)" do_node $C kill -TERM $pids || true - # let processes to be killed - sleep 2 + # let processes to be killed, there maybe many threads to be killed, so give 20 sec gap + sleep 20 # 3. Check if the processes were killed # exit error if the processes still exist for pid in $pids; do @@ -96,7 +108,7 @@ racer_cleanup () { done else echo "Racer completed before DURATION=$DURATION expired. Cleaning up..." - kill $TIMERPID + kill $TIMERPID || true for dir in $RDIRS; do do_racer_cleanup $dir done @@ -105,36 +117,47 @@ racer_cleanup () { racer_timeout () { timeout="timeout" + RACERPID=$(cat $PIDFILE) + rm -f $PIDFILE racer_cleanup echo "$0: completed $RC" - exit $RC + return $RC } -# run racer -log "Start racer on clients: $CLIENTS DURATION=$DURATION" -RC=0 - +build_test_filter +check_and_setup_lustre trap racer_timeout ALRM -timer_on $((DURATION + 5)) - -RACERPID="" -for rdir in $RDIRS; do - do_nodes $CLIENTS "DURATION=$DURATION $racer $rdir $NUM_RACER_THREADS" & - pid=$! - RACERPID="$RACERPID $pid" -done - -echo RACERPID=$RACERPID -for rpid in $RACERPID; do - wait $rpid - rc=$? - echo "rpid=$rpid rc=$rc" - if [ $rc != 0 ]; then - RC=$((RC + 1)) - fi -done +# run racer +test_1() { + RC=0 + + timer_on $((DURATION + 5)) + + RACERPID="" + for rdir in $RDIRS; do + do_nodes $CLIENTS "DURATION=$DURATION $racer $rdir $NUM_RACER_THREADS" & + pid=$! + RACERPID="$RACERPID $pid" + done + + echo RACERPID=$RACERPID + echo $RACERPID > $PIDFILE + for rpid in $RACERPID; do + wait $rpid + rc=$? + echo "rpid=$rpid rc=$rc" + if [ $rc != 0 ]; then + RC=$((RC + 1)) + fi + done + + racer_cleanup + + return $RC +} +run_test 1 "racer on clients: $CLIENTS DURATION=$DURATION" -racer_cleanup -echo "$0: completed $RC" -exit $RC +equals_msg `basename $0`: test complete, cleaning up +check_and_cleanup_lustre +[ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true