5 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
6 . $LUSTRE/tests/test-framework.sh
8 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
10 racer=`which racer.sh`
11 [ -z "$racer" ] && echo racer is not installed && exit 1
13 CLIENTS=${CLIENTS:-$HOSTNAME}
15 RACERDIRS=${RACERDIRS:-$DIR}
16 echo RACERDIRS=$RACERDIRS
17 for d in ${RACERDIRS}; do
18 RDIRS="$RDIRS $d/racer"
20 # lfs setstripe $d/racer -c -1
23 DURATION=${DURATION:-120}
28 sleep $1 && kill -s ALRM $$ &
30 echo TIMERPID=$TIMERPID
44 echo "DOING RACER CLEANUP ... "
46 # Check if all processes are killed
48 local clients=$CLIENTS
49 local num_clients=$(get_node_count ${clients//,/ })
51 if at_is_enabled; then
52 TMAX=$(at_max_get mds)
54 TMAX=$(lctl get_param -n timeout)
57 [ $TMAX -gt $((num_clients * 60)) ] || TMAX=$((num_clients * 60))
58 # 1.Let chance to racer to kill all it's processes
59 # FIXME: not sure how long does it take for racer to kill all processes
60 # 80 is sometimes are enough for 2 clients; sometimes it takes more than 150 sec
61 while [ $WAIT -lt $TMAX ]; do
62 running=$(do_nodes $clients "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|pdsh|bash)" || true)
63 [ -z "$running" ] && rc=0 && break
64 echo "clients $clients are still running the racer processes. Waited $WAIT secs"
67 [ $INTERVAL -lt 40 ] && INTERVAL=$((INTERVAL + INTERVAL))
69 WAIT=$((WAIT + INTERVAL))
72 # 2. Kill the remaining processes
73 if [ $rc -ne 0 ]; then
74 for C in ${clients//,/ } ; do
75 pids=$(do_node $C "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|PATH)" | awk '{print $2}' || true)
76 if [ ! -z "$pids" ]; then
77 echo "client $C still running racer processes after $WAIT seconds. Killing $pids"
78 do_node $C "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|PATH)"
79 do_node $C kill -TERM $pids || true
80 # let processes to be killed, there maybe many threads to be killed, so give 20 sec gap
82 # 3. Check if the processes were killed
83 # exit error if the processes still exist
85 do_node $C "ps -P $pid" && RC=1 || true
88 echo "All processes on client $C exited after $WAIT seconds. OK."
92 echo "No racer processes running after $WAIT seconds. OK."
93 wait_remote_prog $racer 10
98 if [ "$timeout" == "timeout" ]; then
99 echo $timeout killing RACERPID=$RACERPID
100 kill $RACERPID || true
101 sleep 2 # give chance racer to kill it's processes
103 for dir in $RDIRS; do
104 do_racer_cleanup $dir
107 echo "Racer completed before DURATION=$DURATION expired. Cleaning up..."
109 for dir in $RDIRS; do
110 do_racer_cleanup $dir
118 echo "$0: completed $RC"
123 log "Start racer on clients: $CLIENTS DURATION=$DURATION"
126 trap racer_timeout ALRM
128 timer_on $((DURATION + 5))
131 for rdir in $RDIRS; do
132 do_nodes $CLIENTS "DURATION=$DURATION $racer $rdir $NUM_RACER_THREADS" &
134 RACERPID="$RACERPID $pid"
137 echo RACERPID=$RACERPID
138 for rpid in $RACERPID; do
141 echo "rpid=$rpid rc=$rc"
142 if [ $rc != 0 ]; then
148 echo "$0: completed $RC"