-#!/bin/bash
-#set -vx
-set -e
-
-ONLY=${ONLY:-"$*"}
-LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
-. $LUSTRE/tests/test-framework.sh
-init_test_env $@
-. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
-init_logging
-
-racer=`which racer.sh 2> /dev/null`
-echo racer: $racer
-[ -z "$racer" ] && echo racer is not installed && exit 1
-
-CLIENTS=${CLIENTS:-$HOSTNAME}
-RACERDIRS=${RACERDIRS:-$DIR}
-echo RACERDIRS=$RACERDIRS
-for d in ${RACERDIRS}; do
- RDIRS="$RDIRS $d/racer"
- mkdir -p $d/racer
-# lfs setstripe $d/racer -c -1
-done
-
-DURATION=${DURATION:-120}
-PIDFILE=$TMP/racer.$$
-
-assert_env CLIENTS
-
-timer_on () {
- sleep $1 && kill -s ALRM $$ &
- TIMERPID=$!
- echo TIMERPID=$TIMERPID
-}
-
-do_racer_cleanup () {
- trap 0
-
- local WAIT=0
- local INTERVAL=5
- local pids
- local rc=0
- local TMAX
-
- local RDIR=$1
-
- echo "DOING RACER CLEANUP ... "
-
- # Check if all processes are killed
-
- local clients=$CLIENTS
- local num_clients=$(get_node_count ${clients//,/ })
-
- if at_is_enabled; then
- TMAX=$(at_max_get mds)
- else
- TMAX=$(lctl get_param -n timeout)
- fi
-
- [ $TMAX -gt $((num_clients * 60)) ] || TMAX=$((num_clients * 60))
- # 1.Let chance to racer to kill all it's processes
- # FIXME: not sure how long does it take for racer to kill all processes
- # 80 is sometimes are enough for 2 clients; sometimes it takes more than 150 sec
- while [ $WAIT -lt $TMAX ]; do
- running=$(do_nodes $clients "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|pdsh|bash)" || true)
- [ -z "$running" ] && rc=0 && break
- echo "clients $clients are still running the racer processes. Waited $WAIT secs"
- echo $running
- rc=1
- [ $INTERVAL -lt 40 ] && INTERVAL=$((INTERVAL + INTERVAL))
- sleep $INTERVAL
- WAIT=$((WAIT + INTERVAL))
- done
-
- # 2. Kill the remaining processes
- if [ $rc -ne 0 ]; then
- for C in ${clients//,/ } ; do
- pids=$(do_node $C "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|PATH)" | awk '{print $2}' || true)
- if [ ! -z "$pids" ]; then
- echo "client $C still running racer processes after $WAIT seconds. Killing $pids"
- do_node $C "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|PATH)"
- do_node $C kill -TERM $pids || true
- # let processes to be killed, there maybe many threads to be killed, so give 20 sec gap
- sleep 20
- # 3. Check if the processes were killed
- # exit error if the processes still exist
- for pid in $pids; do
- do_node $C "ps -P $pid" && RC=1 || true
- done
- else
- echo "All processes on client $C exited after $WAIT seconds. OK."
- fi
- done
- else
- echo "No racer processes running after $WAIT seconds. OK."
- wait_remote_prog $racer 10
- fi
-}
-
-racer_cleanup () {
- if [ "$timeout" == "timeout" ]; then
- echo $timeout killing RACERPID=$RACERPID
- kill $RACERPID || true
- sleep 2 # give chance racer to kill it's processes
- local dir
- for dir in $RDIRS; do
- do_racer_cleanup $dir
- done
- else
- echo "Racer completed before DURATION=$DURATION expired. Cleaning up..."
- kill $TIMERPID || true
- for dir in $RDIRS; do
- do_racer_cleanup $dir
- done
- fi
-}
-
-racer_timeout () {
- timeout="timeout"
- RACERPID=$(cat $PIDFILE)
- rm -f $PIDFILE
- racer_cleanup
- echo "$0: completed $RC"
- return $RC
-}
-
-build_test_filter
-check_and_setup_lustre
-trap racer_timeout ALRM
-
-# run racer
-test_1() {
- RC=0
-
- timer_on $((DURATION + 5))
-
- RACERPID=""
- for rdir in $RDIRS; do
- do_nodes $CLIENTS "DURATION=$DURATION $racer $rdir $NUM_RACER_THREADS" &
- pid=$!
- RACERPID="$RACERPID $pid"
- done
-
- echo RACERPID=$RACERPID
- echo $RACERPID > $PIDFILE
- for rpid in $RACERPID; do
- wait $rpid
- rc=$?
- echo "rpid=$rpid rc=$rc"
- if [ $rc != 0 ]; then
- RC=$((RC + 1))
- fi
- done
-
- racer_cleanup
-
- return $RC
-}
-run_test 1 "racer on clients: $CLIENTS DURATION=$DURATION"
-
-complete $(basename $0) $SECONDS
-check_and_cleanup_lustre
-exit_status