2 # -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*-
3 # vim:autoindent:shiftwidth=4:tabstop=4:
9 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
10 . $LUSTRE/tests/test-framework.sh
12 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
15 racer=$LUSTRE/tests/racer/racer.sh
17 [ -z "$racer" ] && echo racer is not installed && exit 1
19 CLIENTS=${CLIENTS:-$HOSTNAME}
20 RACERDIRS=${RACERDIRS:-$DIR}
21 echo RACERDIRS=$RACERDIRS
22 for d in ${RACERDIRS}; do
23 RDIRS="$RDIRS $d/racer"
25 # lfs setstripe $d/racer -c -1
28 DURATION=${DURATION:-900}
29 [ "$SLOW" = "no" ] && DURATION=300
36 sleep $1 && kill -s ALRM $$ &
38 echo TIMERPID=$TIMERPID
52 echo "DOING RACER CLEANUP ... "
54 # Check if all processes are killed
56 local clients=$CLIENTS
57 local num_clients=$(get_node_count ${clients//,/ })
59 if at_is_enabled; then
60 TMAX=$(at_max_get mds)
62 TMAX=$(lctl get_param -n timeout)
65 [ $TMAX -gt $((num_clients * 60)) ] || TMAX=$((num_clients * 60))
66 # 1.Let chance to racer to kill all it's processes
67 # FIXME: not sure how long does it take for racer to kill all processes
68 # 80 is sometimes are enough for 2 clients; sometimes it takes more than 150 sec
69 while [ $WAIT -lt $TMAX ]; do
70 running=$(do_nodes $clients "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|pdsh|bash)" || true)
71 [ -z "$running" ] && rc=0 && break
72 echo "clients $clients are still running the racer processes. Waited $WAIT secs"
75 [ $INTERVAL -lt 40 ] && INTERVAL=$((INTERVAL + INTERVAL))
77 WAIT=$((WAIT + INTERVAL))
80 # 2. Kill the remaining processes
81 if [ $rc -ne 0 ]; then
82 for C in ${clients//,/ } ; do
83 pids=$(do_node $C "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|PATH)" | awk '{print $2}' || true)
84 if [ ! -z "$pids" ]; then
85 echo "client $C still running racer processes after $WAIT seconds. Killing $pids"
86 do_node $C "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|PATH)"
87 do_node $C kill -TERM $pids || true
88 # let processes to be killed, there maybe many threads to be killed, so give 20 sec gap
90 # 3. Check if the processes were killed
91 # exit error if the processes still exist
93 do_node $C "ps -P $pid" && RC=1 || true
96 echo "All processes on client $C exited after $WAIT seconds. OK."
100 echo "No racer processes running after $WAIT seconds. OK."
101 wait_remote_prog $racer 10
106 if [ "$timeout" == "timeout" ]; then
107 echo $timeout killing RACERPID=$RACERPID
108 kill $RACERPID || true
109 sleep 2 # give chance racer to kill it's processes
111 for dir in $RDIRS; do
112 do_racer_cleanup $dir
115 echo "Racer completed before DURATION=$DURATION expired. Cleaning up..."
116 kill $TIMERPID || true
117 for dir in $RDIRS; do
118 do_racer_cleanup $dir
125 RACERPID=$(cat $PIDFILE)
128 echo "$0: completed $RC"
133 check_and_setup_lustre
134 trap racer_timeout ALRM
140 timer_on $((DURATION + 5))
143 for rdir in $RDIRS; do
144 do_nodes $CLIENTS "DURATION=$DURATION $racer $rdir $NUM_RACER_THREADS" &
146 RACERPID="$RACERPID $pid"
149 echo RACERPID=$RACERPID
150 echo $RACERPID > $PIDFILE
151 for rpid in $RACERPID; do
154 echo "rpid=$rpid rc=$rc"
155 if [ $rc != 0 ]; then
164 run_test 1 "racer on clients: $CLIENTS DURATION=$DURATION"
166 complete $(basename $0) $SECONDS
167 check_and_cleanup_lustre