6 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
7 . $LUSTRE/tests/test-framework.sh
9 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
12 racer=`which racer.sh 2> /dev/null`
14 [ -z "$racer" ] && echo racer is not installed && exit 1
16 CLIENTS=${CLIENTS:-$HOSTNAME}
17 RACERDIRS=${RACERDIRS:-$DIR}
18 echo RACERDIRS=$RACERDIRS
19 for d in ${RACERDIRS}; do
20 RDIRS="$RDIRS $d/racer"
22 # lfs setstripe $d/racer -c -1
25 DURATION=${DURATION:-120}
31 sleep $1 && kill -s ALRM $$ &
33 echo TIMERPID=$TIMERPID
47 echo "DOING RACER CLEANUP ... "
49 # Check if all processes are killed
51 local clients=$CLIENTS
52 local num_clients=$(get_node_count ${clients//,/ })
54 if at_is_enabled; then
55 TMAX=$(at_max_get mds)
57 TMAX=$(lctl get_param -n timeout)
60 [ $TMAX -gt $((num_clients * 60)) ] || TMAX=$((num_clients * 60))
61 # 1.Let chance to racer to kill all it's processes
62 # FIXME: not sure how long does it take for racer to kill all processes
63 # 80 is sometimes are enough for 2 clients; sometimes it takes more than 150 sec
64 while [ $WAIT -lt $TMAX ]; do
65 running=$(do_nodes $clients "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|pdsh|bash)" || true)
66 [ -z "$running" ] && rc=0 && break
67 echo "clients $clients are still running the racer processes. Waited $WAIT secs"
70 [ $INTERVAL -lt 40 ] && INTERVAL=$((INTERVAL + INTERVAL))
72 WAIT=$((WAIT + INTERVAL))
75 # 2. Kill the remaining processes
76 if [ $rc -ne 0 ]; then
77 for C in ${clients//,/ } ; do
78 pids=$(do_node $C "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|PATH)" | awk '{print $2}' || true)
79 if [ ! -z "$pids" ]; then
80 echo "client $C still running racer processes after $WAIT seconds. Killing $pids"
81 do_node $C "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|PATH)"
82 do_node $C kill -TERM $pids || true
83 # let processes to be killed, there maybe many threads to be killed, so give 20 sec gap
85 # 3. Check if the processes were killed
86 # exit error if the processes still exist
88 do_node $C "ps -P $pid" && RC=1 || true
91 echo "All processes on client $C exited after $WAIT seconds. OK."
95 echo "No racer processes running after $WAIT seconds. OK."
96 wait_remote_prog $racer 10
101 if [ "$timeout" == "timeout" ]; then
102 echo $timeout killing RACERPID=$RACERPID
103 kill $RACERPID || true
104 sleep 2 # give chance racer to kill it's processes
106 for dir in $RDIRS; do
107 do_racer_cleanup $dir
110 echo "Racer completed before DURATION=$DURATION expired. Cleaning up..."
111 kill $TIMERPID || true
112 for dir in $RDIRS; do
113 do_racer_cleanup $dir
120 RACERPID=$(cat $PIDFILE)
123 echo "$0: completed $RC"
128 check_and_setup_lustre
129 trap racer_timeout ALRM
135 timer_on $((DURATION + 5))
138 for rdir in $RDIRS; do
139 do_nodes $CLIENTS "DURATION=$DURATION $racer $rdir $NUM_RACER_THREADS" &
141 RACERPID="$RACERPID $pid"
144 echo RACERPID=$RACERPID
145 echo $RACERPID > $PIDFILE
146 for rpid in $RACERPID; do
149 echo "rpid=$rpid rc=$rc"
150 if [ $rc != 0 ]; then
159 run_test 1 "racer on clients: $CLIENTS DURATION=$DURATION"
161 equals_msg `basename $0`: test complete, cleaning up
162 check_and_cleanup_lustre
163 [ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true