X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftests%2Frunracer;h=083a56e5c44b0d0912a536c98e58d6c78b9042e0;hb=fb19103b08d413c228a755e639daadba427d25e6;hp=fcc26edf1bf5d2088025ec7c258b1e012eabad75;hpb=aa4f18ebc8eac8635ebd13bbcf4857e4d386d95d;p=fs%2Flustre-release.git diff --git a/lustre/tests/runracer b/lustre/tests/runracer index fcc26ed..083a56e 100644 --- a/lustre/tests/runracer +++ b/lustre/tests/runracer @@ -2,18 +2,28 @@ #set -vx set -e +ONLY=${ONLY:-"$*"} LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} . $LUSTRE/tests/test-framework.sh init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} +init_logging -racer=`which racer.sh` +racer=`which racer.sh 2> /dev/null` +echo racer: $racer [ -z "$racer" ] && echo racer is not installed && exit 1 CLIENTS=${CLIENTS:-$HOSTNAME} -RDIR=$DIR/racer -mkdir -p $RDIR +RACERDIRS=${RACERDIRS:-$DIR} +echo RACERDIRS=$RACERDIRS +for d in ${RACERDIRS}; do + RDIRS="$RDIRS $d/racer" + mkdir -p $d/racer +# lfs setstripe $d/racer -c -1 +done + DURATION=${DURATION:-120} +PIDFILE=$TMP/racer.$$ assert_env CLIENTS @@ -30,17 +40,28 @@ do_racer_cleanup () { local INTERVAL=5 local pids local rc=0 + local TMAX + + local RDIR=$1 echo "DOING RACER CLEANUP ... " # Check if all processes are killed local clients=$CLIENTS + local num_clients=$(get_node_count ${clients//,/ }) + + if at_is_enabled; then + TMAX=$(at_max_get mds) + else + TMAX=$(lctl get_param -n timeout) + fi + [ $TMAX -gt $((num_clients * 60)) ] || TMAX=$((num_clients * 60)) # 1.Let chance to racer to kill all it's processes # FIXME: not sure how long does it take for racer to kill all processes # 80 is sometimes are enough for 2 clients; sometimes it takes more than 150 sec - while [ $WAIT -lt 90 ]; do + while [ $WAIT -lt $TMAX ]; do running=$(do_nodes $clients "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|pdsh|bash)" || true) [ -z "$running" ] && rc=0 && break echo "clients $clients are still running the racer processes. Waited $WAIT secs" @@ -59,8 +80,8 @@ do_racer_cleanup () { echo "client $C still running racer processes after $WAIT seconds. Killing $pids" do_node $C "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|PATH)" do_node $C kill -TERM $pids || true - # let processes to be killed - sleep 2 + # let processes to be killed, there maybe many threads to be killed, so give 20 sec gap + sleep 20 # 3. Check if the processes were killed # exit error if the processes still exist for pid in $pids; do @@ -81,33 +102,62 @@ racer_cleanup () { echo $timeout killing RACERPID=$RACERPID kill $RACERPID || true sleep 2 # give chance racer to kill it's processes - do_racer_cleanup + local dir + for dir in $RDIRS; do + do_racer_cleanup $dir + done else echo "Racer completed before DURATION=$DURATION expired. Cleaning up..." - kill $TIMERPID - do_racer_cleanup + kill $TIMERPID || true + for dir in $RDIRS; do + do_racer_cleanup $dir + done fi } racer_timeout () { timeout="timeout" + RACERPID=$(cat $PIDFILE) + rm -f $PIDFILE racer_cleanup echo "$0: completed $RC" - exit $RC + return $RC } -# run racer -log "Start racer on clients: $CLIENTS DURATION=$DURATION" -RC=0 - +build_test_filter +check_and_setup_lustre trap racer_timeout ALRM -timer_on $((DURATION + 5)) +# run racer +test_1() { + RC=0 + + timer_on $((DURATION + 5)) + + RACERPID="" + for rdir in $RDIRS; do + do_nodes $CLIENTS "DURATION=$DURATION $racer $rdir $NUM_RACER_THREADS" & + pid=$! + RACERPID="$RACERPID $pid" + done + + echo RACERPID=$RACERPID + echo $RACERPID > $PIDFILE + for rpid in $RACERPID; do + wait $rpid + rc=$? + echo "rpid=$rpid rc=$rc" + if [ $rc != 0 ]; then + RC=$((RC + 1)) + fi + done + + racer_cleanup + + return $RC +} +run_test 1 "racer on clients: $CLIENTS DURATION=$DURATION" -do_nodes $CLIENTS "DURATION=$DURATION $racer $RDIR" & -RACERPID=$! -echo RACERPID=$RACERPID -wait $RACERPID || RC=2 -racer_cleanup -echo "$0: completed $RC" -exit $RC +equals_msg `basename $0`: test complete, cleaning up +check_and_cleanup_lustre +[ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true