From: Elena Gryaznova Date: Wed, 2 Mar 2011 21:04:39 +0000 (+0300) Subject: b=24451 racer test cleanup X-Git-Tag: 1.8.5.55~4 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=10ccc927fb3e95bf83fa746a8308804913ab1281;p=fs%2Flustre-release.git b=24451 racer test cleanup i=Vladimir.Saveliev i=Andrew.Perepechko - modify racer/racer.sh to wait the process killed, exit 1 if the process are still existing; - remove runracer; --- diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index 7903112..2262fa4 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -13,7 +13,7 @@ noinst_SCRIPTS += sanity.sh rundbench acceptance-small.sh compile.sh noinst_SCRIPTS += conf-sanity.sh insanity.sh lfsck.sh oos.sh oos2.sh noinst_SCRIPTS += llog-test.sh recovery-small.sh replay-dual.sh sanity-quota.sh noinst_SCRIPTS += replay-ost-single.sh replay-single.sh run-llog.sh sanityn.sh -noinst_SCRIPTS += large-scale.sh runracer racer.sh performance-sanity.sh +noinst_SCRIPTS += large-scale.sh racer.sh performance-sanity.sh noinst_SCRIPTS += mdsrate-create-small.sh mdsrate-create-large.sh noinst_SCRIPTS += mdsrate-lookup-1dir.sh mdsrate-lookup-10dirs.sh noinst_SCRIPTS += mdsrate-stat-small.sh mdsrate-stat-large.sh replay-vbr.sh diff --git a/lustre/tests/racer.sh b/lustre/tests/racer.sh index f862012..3567ebd 100644 --- a/lustre/tests/racer.sh +++ b/lustre/tests/racer.sh @@ -1,3 +1,61 @@ #!/bin/bash +#set -vx +set -e -bash $(dirname $0)/runracer $@ +ONLY=${ONLY:-"$*"} +LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} +. $LUSTRE/tests/test-framework.sh +init_test_env $@ +. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} + +racer=$LUSTRE/tests/racer/racer.sh +echo racer: $racer + +CLIENTS=${CLIENTS:-$HOSTNAME} +RACERDIRS=${RACERDIRS:-$DIR} +echo RACERDIRS=$RACERDIRS +for d in ${RACERDIRS}; do + RDIRS="$RDIRS $d/racer" + mkdir -p $d/racer +# lfs setstripe $d/racer -c -1 +done + +DURATION=${DURATION:-900} +[ "$SLOW" = "no" ] && DURATION=300 + +build_test_filter +check_and_setup_lustre + +# run racer +test_1() { + local rrc=0 + local rc=0 + local clients=${CLIENTS:-$(hostname)} + + check_progs_installed $clients $racer || \ + { skip_env "$racer not found" && return 0; } + + local rpids="" + for rdir in $RDIRS; do + do_nodes $clients "DURATION=$DURATION $racer $rdir $NUM_RACER_THREADS" & + pid=$! + rpids="$rpids $pid" + done + + echo racers pids: $rpids + for pid in $rpids; do + wait $pid + rc=$? + echo "pid=$pid rc=$rc" + if [ $rc != 0 ]; then + rrc=$((rrc + 1)) + fi + done + + return $rrc +} +run_test 1 "racer on clients: ${CLIENTS:-$(hostname)} DURATION=$DURATION" + +complete $(basename $0) $SECONDS +check_and_cleanup_lustre +exit_status diff --git a/lustre/tests/racer/racer.sh b/lustre/tests/racer/racer.sh index 1274d02..efd3bcd 100755 --- a/lustre/tests/racer/racer.sh +++ b/lustre/tests/racer/racer.sh @@ -1,4 +1,5 @@ #!/bin/bash +#set -x MAX_FILES=${MAX_FILES:-20} DIR=${DIR:-$1} @@ -10,23 +11,52 @@ NUM_THREADS=${NUM_THREADS:-3} mkdir -p $DIR -RACER_PROGS="file_create dir_create file_rm file_rename file_link file_symlink +RACER_PROGS="file_create dir_create file_rm file_rename file_link file_symlink \ file_list file_concat" racer_cleanup() { + echo "racer cleanup" for P in $RACER_PROGS; do killall $P.sh done trap 0 + + local TOT_WAIT=0 + local MAX_WAIT=$DURATION + local SHORT_WAIT=5 + + local rc + while [[ $TOT_WAIT -le $MAX_WAIT ]]; do + rc=0 + echo sleeping $SHORT_WAIT sec ... + sleep $SHORT_WAIT + # this only checks whether processes exist + for P in $RACER_PROGS; do + killall -0 $P.sh + [[ $? -eq 0 ]] && (( rc+=1 )) + done + if [[ $rc -eq 0 ]]; then + echo there should be NO racer processes: + ps aux | grep -E "${RACER_PROGS// /|}" + return 0 + fi + echo -n "Waited $(( TOT_WAIT + SHORT_WAIT)), rc=$rc " + (( SHORT_WAIT+=SHORT_WAIT )) + (( TOT_WAIT+=SHORT_WAIT )) + done + ps aux | grep -E "${RACER_PROGS// /|}" + return 1 } +RC=0 + echo "Running $0 for $DURATION seconds. CTRL-C to exit" trap " echo \"Cleaning up\" racer_cleanup exit 0 -" 2 15 +" INT TERM cd `dirname $0` for N in `seq 1 $NUM_THREADS`; do @@ -36,11 +66,11 @@ for N in `seq 1 $NUM_THREADS`; do done sleep $DURATION -racer_cleanup +racer_cleanup || RC=$? # Check our to see whether our test DIR is still available. df $DIR -RC=$? +(( RC+=$? )) if [ $RC -eq 0 ]; then echo "We survived $0 for $DURATION seconds." fi diff --git a/lustre/tests/rpc.sh b/lustre/tests/rpc.sh index 11c5181..15e960a 100755 --- a/lustre/tests/rpc.sh +++ b/lustre/tests/rpc.sh @@ -4,7 +4,7 @@ NAME=${NAME:-local} LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} . $LUSTRE/tests/test-framework.sh -init_test_env $@ +init_test_env . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} cmd=$1 diff --git a/lustre/tests/runracer b/lustre/tests/runracer deleted file mode 100644 index d81d4b1..0000000 --- a/lustre/tests/runracer +++ /dev/null @@ -1,149 +0,0 @@ -#!/bin/bash -#set -vx -set -e - -LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} -. $LUSTRE/tests/test-framework.sh -init_test_env $@ -. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} - -racer=`which racer.sh` -[ -z "$racer" ] && echo racer is not installed && exit 1 - -CLIENTS=${CLIENTS:-$HOSTNAME} -RACERDIRS=$@ -RACERDIRS=${RACERDIRS:-$DIR} -echo RACERDIRS=$RACERDIRS -for d in ${RACERDIRS}; do - RDIRS="$RDIRS $d/racer" - mkdir -p $d/racer -# lfs setstripe $d/racer -c -1 -done - -DURATION=${DURATION:-120} - -assert_env CLIENTS - -timer_on () { - sleep $1 && kill -s ALRM $$ & - TIMERPID=$! - echo TIMERPID=$TIMERPID -} - -do_racer_cleanup () { - trap 0 - - local WAIT=0 - local INTERVAL=5 - local pids - local rc=0 - local TMAX - - local RDIR=$1 - - echo "DOING RACER CLEANUP ... " - - # Check if all processes are killed - - local clients=$CLIENTS - local num_clients=$(get_node_count ${clients//,/ }) - - if at_is_enabled; then - TMAX=$(at_max_get mds) - else - TMAX=$(lctl get_param -n timeout) - fi - - [ $TMAX -gt $((num_clients * 60)) ] || TMAX=$((num_clients * 60)) - # 1.Let chance to racer to kill all it's processes - # FIXME: not sure how long does it take for racer to kill all processes - # 80 is sometimes are enough for 2 clients; sometimes it takes more than 150 sec - while [ $WAIT -lt $TMAX ]; do - running=$(do_nodes $clients "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|pdsh|bash)" || true) - [ -z "$running" ] && rc=0 && break - echo "clients $clients are still running the racer processes. Waited $WAIT secs" - echo $running - rc=1 - [ $INTERVAL -lt 40 ] && INTERVAL=$((INTERVAL + INTERVAL)) - sleep $INTERVAL - WAIT=$((WAIT + INTERVAL)) - done - - # 2. Kill the remaining processes - if [ $rc -ne 0 ]; then - for C in ${clients//,/ } ; do - pids=$(do_node $C "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|PATH)" | awk '{print $2}' || true) - if [ ! -z "$pids" ]; then - echo "client $C still running racer processes after $WAIT seconds. Killing $pids" - do_node $C "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|PATH)" - do_node $C kill -TERM $pids || true - # let processes to be killed, there maybe many threads to be killed, so give 20 sec gap - sleep 20 - # 3. Check if the processes were killed - # exit error if the processes still exist - for pid in $pids; do - do_node $C "ps -P $pid" && RC=1 || true - done - else - echo "All processes on client $C exited after $WAIT seconds. OK." - fi - done - else - echo "No racer processes running after $WAIT seconds. OK." - wait_remote_prog $racer 10 - fi -} - -racer_cleanup () { - if [ "$timeout" == "timeout" ]; then - echo $timeout killing RACERPID=$RACERPID - kill $RACERPID || true - sleep 2 # give chance racer to kill it's processes - local dir - for dir in $RDIRS; do - do_racer_cleanup $dir - done - else - echo "Racer completed before DURATION=$DURATION expired. Cleaning up..." - kill $TIMERPID - for dir in $RDIRS; do - do_racer_cleanup $dir - done - fi -} - -racer_timeout () { - timeout="timeout" - racer_cleanup - echo "$0: completed $RC" - exit $RC -} - -# run racer -log "Start racer on clients: $CLIENTS DURATION=$DURATION" -RC=0 - -trap racer_timeout ALRM - -timer_on $((DURATION + 5)) - -RACERPID="" -for rdir in $RDIRS; do - do_nodes $CLIENTS "DURATION=$DURATION $racer $rdir $NUM_RACER_THREADS" & - pid=$! - RACERPID="$RACERPID $pid" -done - -echo RACERPID=$RACERPID -for rpid in $RACERPID; do - wait $rpid - rc=$? - echo "rpid=$rpid rc=$rc" - if [ $rc != 0 ]; then - RC=$((RC + 1)) - fi -done - -racer_cleanup -complete $(basename $0) $SECONDS -exit $RC diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 5414b27..8f5443a 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -59,7 +59,6 @@ print_summary () { local o=$(echo $O | tr "[:upper:]" "[:lower:]") o=${o//_/-} local log=${TMP}/${o}.log - [ "$o" = racer ] && log=${TMP}/runracer.log if is_sanity_benchmark $o; then log=${TMP}/sanity-benchmark.log fi