Whamcloud - gitweb
b=16488
authorgrev <grev>
Wed, 12 Nov 2008 16:07:34 +0000 (16:07 +0000)
committergrev <grev>
Wed, 12 Nov 2008 16:07:34 +0000 (16:07 +0000)
i=Oleg.Drokin
new runracer script

lustre/tests/runracer [new file with mode: 0644]

diff --git a/lustre/tests/runracer b/lustre/tests/runracer
new file mode 100644 (file)
index 0000000..fcc26ed
--- /dev/null
@@ -0,0 +1,113 @@
+#!/bin/bash
+#set -vx
+set -e
+
+LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
+. $LUSTRE/tests/test-framework.sh
+init_test_env $@
+. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+
+racer=`which racer.sh`
+[ -z "$racer" ] && echo racer is not installed && exit 1
+
+CLIENTS=${CLIENTS:-$HOSTNAME}
+RDIR=$DIR/racer
+mkdir -p $RDIR
+DURATION=${DURATION:-120}
+
+assert_env CLIENTS
+
+timer_on () {
+       sleep $1 && kill -s ALRM $$ &
+       TIMERPID=$!
+       echo TIMERPID=$TIMERPID
+}
+
+do_racer_cleanup () {
+       trap 0
+
+       local WAIT=0
+       local INTERVAL=5
+        local pids
+       local rc=0
+
+       echo "DOING RACER CLEANUP ... "
+
+       # Check if all processes are killed
+
+       local clients=$CLIENTS
+
+       # 1.Let chance to racer to kill all it's processes
+       # FIXME: not sure how long does it take for racer to kill all processes
+       # 80 is sometimes are enough for 2 clients; sometimes it takes more than 150 sec
+       while [ $WAIT -lt 90 ]; do
+               running=$(do_nodes $clients "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|pdsh|bash)" || true)
+               [ -z "$running" ] && rc=0 && break
+               echo "clients $clients are still running the racer processes. Waited $WAIT secs"
+               echo $running
+               rc=1
+               [ $INTERVAL -lt 40 ] && INTERVAL=$((INTERVAL + INTERVAL))
+               sleep $INTERVAL
+               WAIT=$((WAIT + INTERVAL))
+       done
+
+       # 2. Kill the remaining processes
+       if [ $rc -ne 0 ]; then
+               for C in ${clients//,/ } ; do
+                       pids=$(do_node $C "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|PATH)" | awk '{print $2}' || true)
+                       if [ ! -z "$pids" ]; then
+                               echo "client $C still running racer processes after $WAIT seconds. Killing $pids"
+                               do_node $C "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|PATH)"
+                               do_node $C kill -TERM $pids || true
+                               # let processes to be killed
+                               sleep 2
+       # 3. Check if the processes were killed
+       # exit error if the processes still exist
+                               for pid in $pids; do
+                                       do_node $C "ps -P $pid" && RC=1 || true
+                               done
+                       else
+                               echo "All processes on client $C exited after $WAIT seconds. OK."
+                       fi
+               done
+       else
+               echo "No racer processes running after $WAIT seconds. OK."
+               wait_remote_prog $racer 10
+       fi
+}
+
+racer_cleanup () {
+       if [ "$timeout" == "timeout" ]; then
+               echo $timeout killing RACERPID=$RACERPID
+               kill $RACERPID || true
+               sleep 2 # give chance racer to kill it's processes
+               do_racer_cleanup
+       else
+               echo "Racer completed before DURATION=$DURATION expired. Cleaning up..."
+               kill $TIMERPID
+               do_racer_cleanup
+       fi
+}
+
+racer_timeout () {
+       timeout="timeout"
+       racer_cleanup
+       echo "$0: completed $RC"
+       exit $RC
+}
+
+# run racer
+log "Start racer on clients: $CLIENTS DURATION=$DURATION"
+RC=0
+
+trap racer_timeout ALRM
+
+timer_on $((DURATION + 5))
+
+do_nodes $CLIENTS "DURATION=$DURATION $racer $RDIR" &
+RACERPID=$!
+echo RACERPID=$RACERPID
+wait $RACERPID || RC=2
+racer_cleanup
+echo "$0: completed $RC"
+exit $RC