From f6796fea971503083308076ce78acfc385271ae4 Mon Sep 17 00:00:00 2001
From: grev <grev>
Date: Tue, 16 Dec 2008 17:20:53 +0000
Subject: [PATCH] b=17839 i=Brian cmd3-11 ported to acc-sm t-f -- new
 RECOVERY_MDS_SCALE -- rundbench modified -- new t-f functions, run loads
 scripts

---
 lustre/tests/Makefile.am           |   2 +
 lustre/tests/acceptance-small.sh   |  11 +-
 lustre/tests/cfg/ncli.sh           |   7 +
 lustre/tests/recovery-mds-scale.sh | 256 +++++++++++++++++++++++++++++++++++++
 lustre/tests/run_dbench.sh         |  65 ++++++++++
 lustre/tests/run_dd.sh             |  62 +++++++++
 lustre/tests/run_iozone.sh         |  77 +++++++++++
 lustre/tests/run_tar.sh            |  68 ++++++++++
 lustre/tests/rundbench             |  19 ++-
 lustre/tests/test-framework.sh     | 116 ++++++++++++++++-
 10 files changed, 676 insertions(+), 7 deletions(-)
 create mode 100644 lustre/tests/recovery-mds-scale.sh
 create mode 100755 lustre/tests/run_dbench.sh
 create mode 100755 lustre/tests/run_dd.sh
 create mode 100755 lustre/tests/run_iozone.sh
 create mode 100755 lustre/tests/run_tar.sh
diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am
index ebda53f..c9c8800 100644
--- a/lustre/tests/Makefile.am
+++ b/lustre/tests/Makefile.am
@@ -17,6 +17,8 @@ noinst_SCRIPTS += mdsrate-create-large.sh mdsrate-lookup-1dir.sh
 noinst_SCRIPTS += mdsrate-stat-small.sh mdsrate-stat-large.sh
 noinst_SCRIPTS += lockorder.sh socketclient socketserver runmultiop_bg_pause
 noinst_SCRIPTS += sanity-sec.sh sanity-gss.sh krb5_login.sh setup_kerberos.sh
+noinst_SCRIPTS += recovery-mds-scale.sh run_dd.sh run_tar.sh run_iozone.sh
+noinst_SCRIPTS += run_dbench.sh
 nobase_noinst_SCRIPTS = cfg/local.sh
 nobase_noinst_SCRIPTS += acl/make-tree acl/run cfg/ncli.sh
 nobase_noinst_SCRIPTS += racer/dir_create.sh racer/file_create.sh racer/file_list.sh
diff --git a/lustre/tests/acceptance-small.sh b/lustre/tests/acceptance-small.sh
index 7b320d8..7edce6f 100755
--- a/lustre/tests/acceptance-small.sh
+++ b/lustre/tests/acceptance-small.sh
@@ -23,7 +23,7 @@ fi
 [ "$DEBUG_OFF" ] || DEBUG_OFF="eval lctl set_param debug=\"$DEBUG_LVL\""
 [ "$DEBUG_ON" ] || DEBUG_ON="eval lctl set_param debug=0x33f0484"
 
-export TESTSUITE_LIST="RUNTESTS SANITY DBENCH BONNIE IOZONE FSX SANITYN LFSCK LIBLUSTRE RACER REPLAY_SINGLE CONF_SANITY RECOVERY_SMALL REPLAY_OST_SINGLE REPLAY_DUAL INSANITY SANITY_QUOTA SANITY_SEC SANITY_GSS PERFORMANCE_SANITY"
+export TESTSUITE_LIST="RUNTESTS SANITY DBENCH BONNIE IOZONE FSX SANITYN LFSCK LIBLUSTRE RACER REPLAY_SINGLE CONF_SANITY RECOVERY_SMALL REPLAY_OST_SINGLE REPLAY_DUAL INSANITY SANITY_QUOTA SANITY_SEC SANITY_GSS PERFORMANCE_SANITY RECOVERY_MDS_SCALE"
 
 if [ "$ACC_SM_ONLY" ]; then
     for O in $TESTSUITE_LIST; do
@@ -432,6 +432,15 @@ if [ "$PERFORMANCE_SANITY" != "no" ]; then
         PERFORMANCE_SANITY="done"
 fi
 
+[ "$SLOW" = no ] && RECOVERY_MDS_SCALE="no"
+[ "$RECOVERY_MDS_SCALE" != "no" ] && skip_remmds recovery-mds-scale && RECOVERY_MDS_SCALE=no && MSKIPPED=1
+[ "$RECOVERY_MDS_SCALE" != "no" ] && skip_remost recovery-mds-scale && RECOVERY_MDS_SCALE=no && OSKIPPED=1
+if [ "$RECOVERY_MDS_SCALE" != "no" ]; then
+        title recovery-mds-scale
+        bash recovery-mds-scale.sh
+        RECOVERY_MDS_SCALE="done"
+fi
+
 RC=$?
 title FINISHED
 echo "Finished at `date` in $((`date +%s` - $STARTTIME))s"
diff --git a/lustre/tests/cfg/ncli.sh b/lustre/tests/cfg/ncli.sh
index c583c5f..a3fff9f 100644
--- a/lustre/tests/cfg/ncli.sh
+++ b/lustre/tests/cfg/ncli.sh
@@ -15,3 +15,10 @@ MPIBIN=${MPIBIN:-/testsuite/tests/`arch`/bin}
 export PATH=:$PATH:$MPIBIN
 MPIRUN=$(which mpirun) || true
 MPI_USER=${MPI_USER:-mpiuser}
+
+# for recovery scale tests
+# default boulder cluster iozone location
+export PATH=/opt/iozone/bin:$PATH
+SHARED_DIRECTORY=${SHARED_DIRECTORY:-""}	# bug 17839 comment 65
+LOADS="dd tar dbench iozone"
+CLIENT_LOADS=($LOADS)
diff --git a/lustre/tests/recovery-mds-scale.sh b/lustre/tests/recovery-mds-scale.sh
new file mode 100644
index 0000000..7440ed5
--- /dev/null
+++ b/lustre/tests/recovery-mds-scale.sh
@@ -0,0 +1,256 @@
+#!/bin/bash
+
+# Was Test 11 in cmd3.
+# For duration of 24 hours repeatedly failover a random MDS at
+# 10 minute intervals and verify that no application errors occur.
+
+# Test runs one of CLIENT_LOAD progs on remote clients.
+
+LUSTRE=${LUSTRE:-`dirname $0`/..}
+SETUP=${SETUP:-""}
+CLEANUP=${CLEANUP:-""}
+. $LUSTRE/tests/test-framework.sh
+
+init_test_env $@
+
+. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+
+TESTSUITELOG=${TESTSUITELOG:-$TMP/recovery-mds-scale}
+DEBUGLOG=$TESTSUITELOG.debug
+exec 2>$DEBUGLOG
+echo "--- env ---" >&2
+env >&2
+echo "--- env ---" >&2
+set -x
+
+[ "$SHARED_DIRECTORY" ] || \
+    { skip "$0: Empty SHARED_DIRECTORY" && exit 0; }
+
+[ -n "$CLIENTS" ] || { skip "$0 Need two or more remote clients" && exit 0; }
+[ $CLIENTCOUNT -ge 3 ] || \
+    { skip "$0 Need two or more clients, have $CLIENTCOUNT" && exit 0; }
+
+END_RUN_FILE=${END_RUN_FILE:-$SHARED_DIRECTORY}/end_run_file}
+LOAD_PID_FILE=${LOAD_PID_FILE:-$TMP/client-load.pid}
+
+remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
+remote_ost_nodsh && skip "remote OST with nodsh" && exit 0
+
+build_test_filter
+
+check_and_setup_lustre
+rm -rf $DIR/[df][0-9]*
+
+# the test node needs to be insulated from a lustre failure as much as possible,
+# so not even loading the lustre modules is ideal.
+# -- umount lustre
+# -- remove hostname from clients list
+zconf_umount $(hostname) $MOUNT
+NODES_TO_USE=${NODES_TO_USE:-$CLIENTS}
+NODES_TO_USE=$(exclude_item_from_list $NODES_TO_USE $(hostname))
+
+check_progs_installed $NODES_TO_USE ${CLIENT_LOADS[@]}
+
+MDTS=""
+for ((i=1; i<=$MDSCOUNT; i++)) do
+    MDTS="$MDTS mds$i"
+done
+MDTS=$(comma_list $MDTS)
+
+OSTS=""
+for ((i=1; i<=$OSTCOUNT; i++)) do
+    OSTS="$OSTS ost$i"
+done
+OSTS=$(comma_list $OSTS)
+
+ERRORS_OK=""    # No application failures should occur during this test.
+FLAVOR=${FLAVOR:-"MDS"}
+
+rm -f $END_RUN_FILE
+
+vmstatLOG=${TESTSUITELOG}_$(basename $0 .sh).vmstat
+
+server_numfailovers () {
+    local facet
+    local var
+
+    for facet in $MDTS ${OSTS//,/ }; do
+        var=${facet}_nums
+        val=${!var}
+        if [ "$val" ] ; then
+            echo "$facet failed  over  $val times"
+        fi
+    done
+}
+
+summary_and_cleanup () {
+
+    local rc=$?
+    local var
+    trap 0
+
+    # Having not empty END_RUN_FILE means the failed loads only
+    if [ -s $END_RUN_FILE ]; then
+        echo "Found the END_RUN_FILE file: $END_RUN_FILE"
+        cat $END_RUN_FILE
+        local END_RUN_NODE=
+        read END_RUN_NODE < $END_RUN_FILE
+
+    # a client load will end (i.e. fail) if it finds
+    # the end run file.  that does not mean that that client load
+    # actually failed though.  the first node in the END_RUN_NODE is
+    # the one we are really interested in.
+        if [ -n "$END_RUN_NODE" ]; then
+            var=${END_RUN_NODE}_load
+            echo "Client load failed on node $END_RUN_NODE" 
+            echo
+            echo "client $END_RUN_NODE load stdout and debug files :
+              ${TESTSUITELOG}_run_${!var}.sh-${END_RUN_NODE}
+              ${TESTSUITELOG}_run_${!var}.sh-${END_RUN_NODE}.debug"
+        fi
+        rc=1
+    fi
+     
+    echo $(date +'%F %H:%M:%S') Terminating clients loads ...
+    echo "$0" >> $END_RUN_FILE
+    local result=PASS
+    [ $rc -eq 0 ] || result=FAIL
+
+    log "Duraion:                $DURATION
+Server failover period: $SERVER_FAILOVER_PERIOD seconds
+Exited after:           $ELAPSED seconds
+Number of failovers before exit:
+$(server_numfailovers)
+Status: $result: rc=$rc"
+
+    # stop the vmstats on the OSTs
+    if [ "$VMSTAT" ]; then
+        do_nodes $(comma_list $(osts_nodes)) "test -f /tmp/vmstat.pid && \
+            { kill -s TERM \$(cat /tmp/vmstat.pid); rm -f /tmp/vmstat.pid; \
+            gzip -f9 $vmstatLOG-\$(hostname); }"
+    fi
+
+    # make sure the client loads die
+    do_nodes $NODES_TO_USE "set -x; test -f $LOAD_PID_FILE && \
+        { kill -s TERM \$(cat $LOAD_PID_FILE) || true; }"
+
+    # and free up the pdshes that started them, if any are still around
+    if [ -n "$CLIENT_LOAD_PIDS" ]; then
+        kill $CLIENT_LOAD_PIDS || true
+        sleep 5
+        kill -9 $CLIENT_LOAD_PIDS || true
+    fi
+    [ $rc -eq 0 ] && zconf_mount $(hostname) $MOUNT
+
+    exit $rc
+}
+
+#
+# MAIN 
+#
+log "-----============= $0 starting =============-----"
+
+trap summary_and_cleanup EXIT INT
+
+DURATION=${DURATION:-$((60*60*24))}
+ELAPSED=0
+NUM_FAILOVERS=0
+
+# vmstat the osts
+if [ "$VMSTAT" ]; then
+    do_nodes $(comma_list $(osts_nodes)) "vmstat 1 > $vmstatLOG-\$(hostname) 2>/dev/null </dev/null & echo \$! > /tmp/vmstat.pid"
+fi
+
+# Start client loads.
+start_client_loads $NODES_TO_USE
+
+echo clients load pids:
+if ! do_nodes $NODES_TO_USE "set -x; echo \$(hostname): && cat $LOAD_PID_FILE"; then
+    if [ -e $DEBUGLOG ]; then
+        exec 2<&-
+        cat $DEBUGLOG
+        exit 3
+    fi
+fi
+
+START_TS=$(date +%s)
+CURRENT_TS=$START_TS
+
+if [ "$FLAVOR" == "MDS" ]; then
+    SERVER_FAILOVER_PERIOD=$MDS_FAILOVER_PERIOD
+    SERVERS=$MDTS
+else
+    SERVER_FAILOVER_PERIOD=$OSS_FAILOVER_PERIOD
+    SERVERS=$OSTS
+fi
+
+SERVER_FAILOVER_PERIOD=${SERVER_FAILOVER_PERIOD:-$((60 * 10))} # 10 minutes
+
+MINSLEEP=${MINSLEEP:-120}
+REQFAIL_PERCENT=${REQFAIL_PERCENT:-3}	# bug17839 comment 62
+REQFAIL=${REQFAIL:-$(( DURATION / SERVER_FAILOVER_PERIOD * REQFAIL_PERCENT / 100))}
+reqfail=0
+sleep=0
+while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do
+
+    # In order to perform the 
+    # expected number of failovers, we need to account the following :
+    # 1) the time that has elapsed during the client load checking
+    # 2) time takes for failover
+
+    it_time_start=$(date +%s)
+    
+    SERVERFACET=$(get_random_entry $SERVERS)
+    var=${SERVERFACET}_nums
+
+    # Check that our client loads are still running. If any have died, 
+    # that means they have died outside of recovery, which is unacceptable.    
+
+    log "==== Checking the clients loads BEFORE failover -- failure NOT OK \
+    ELAPSED=$ELAPSED DURATION=$DURATION PERIOD=$SERVER_FAILOVER_PERIOD" 
+
+    if ! check_client_loads $NODES_TO_USE; then
+        exit 4
+    fi
+
+    log "Starting failover on $SERVERNODE"
+
+    facet_failover "$SERVERFACET" || exit 1
+
+    # Check that our client loads are still running during failover.
+    # No application failures should occur.
+
+    log "==== Checking the clients loads AFTER  failover -- failure NOT OK"
+    if ! check_client_loads $NODES_TO_USE; then
+        log "Client load failed during failover. Exiting"
+        exit 5
+    fi
+
+    # Increment the number of failovers
+    NUM_FAILOVERS=$((NUM_FAILOVERS+1))
+    val=$((${!var} + 1))
+    eval $var=$val
+ 
+    CURRENT_TS=$(date +%s)
+    ELAPSED=$((CURRENT_TS - START_TS))
+ 
+    sleep=$((SERVER_FAILOVER_PERIOD-(CURRENT_TS - it_time_start)))
+
+    # keep count the number of itterations when
+    # time spend to failover and two client loads check exceeded 
+    # the value ( SERVER_FAILOVER_PERIOD - MINSLEEP )
+    if [ $sleep -lt $MINSLEEP ]; then
+        reqfail=$((reqfail +1))
+        log "WARNING: failover and two check_client_loads time exceeded SERVER_FAILOVER_PERIOD - MINSLEEP !
+Failed to meet interval $reqfail times ( REQFAIL=$REQFAIL ); have sleep=$sleep"
+        [ $reqfail -gt $REQFAIL ] && exit 6 
+    fi  
+
+    log "$SERVERFACET has failed over ${!var} times, and counting..."
+    if [ $sleep -gt 0 ]; then 
+        echo "sleeping $sleep seconds ... "
+        sleep $sleep
+    fi
+done
+
+exit 0
diff --git a/lustre/tests/run_dbench.sh b/lustre/tests/run_dbench.sh
new file mode 100755
index 0000000..f82d9dd
--- /dev/null
+++ b/lustre/tests/run_dbench.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+set -x
+
+TMP=${TMP:-/tmp}
+
+TESTSUITELOG=${TESTSUITELOG:-$TMP/recovery-mds-scale}
+LOG=${TESTSUITELOG}_$(basename $0)-$(hostname)
+DEBUGLOG=${LOG}.debug
+
+mkdir -p ${LOG%/*}
+
+rm -f $LOG $DEBUGLOG
+exec 2>$DEBUGLOG
+
+if [ -z "$MOUNT" -o -z "$END_RUN_FILE" -o -z "$LOAD_PID_FILE" ]; then
+    echo "The following must be set: MOUNT END_RUN_FILE LOAD_PID_FILE"
+    exit 1
+fi
+
+echoerr () { echo "$@" 1>&2 ; }
+
+signaled() {
+    trap 0
+    echoerr "$(date +'%F %H:%M:%S'): client load was signaled to terminate"
+    kill  $load_pid 
+    kill -TERM -$PPID
+    sleep 5
+    kill -KILL -$PPID
+}
+
+trap signaled TERM
+
+# recovery-mds-scale uses this to signal the client loads to die
+echo $$ >$LOAD_PID_FILE
+
+TESTDIR=$MOUNT/dbench-$(hostname)
+
+CONTINUE=true
+
+while [ ! -e "$END_RUN_FILE" ] && $CONTINUE; do
+    echoerr "$(date +'%F %H:%M:%S'): dbench run starting"
+
+    mkdir -p $TESTDIR
+    rundbench -D $TESTDIR 2 1>$LOG &
+    load_pid=$!
+
+    wait $load_pid
+    if [ ${PIPESTATUS[0]} -eq 0 ]; then
+	echoerr "$(date +'%F %H:%M:%S'): dbench succeeded"
+	cd $TMP
+	rm -rf $TESTDIR
+	echoerr "$(date +'%F %H:%M:%S'): dbench run finished"
+    else
+	echoerr "$(date +'%F %H:%M:%S'): dbench failed"
+	if [ -z "$ERRORS_OK" ]; then
+	    echo $(hostname) >> $END_RUN_FILE
+	fi
+	if [ $BREAK_ON_ERROR ]; then
+	    # break
+            CONTINUE=false
+	fi
+    fi
+done
+
+echoerr "$(date +'%F %H:%M:%S'): dbench run exiting"
diff --git a/lustre/tests/run_dd.sh b/lustre/tests/run_dd.sh
new file mode 100755
index 0000000..96a4950
--- /dev/null
+++ b/lustre/tests/run_dd.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+set -x
+
+TMP=${TMP:-/tmp}
+
+TESTSUITELOG=${TESTSUITELOG:-$TMP/recovery-mds-scale}
+LOG=${TESTSUITELOG}_$(basename $0)-$(hostname)
+DEBUGLOG=${LOG}.debug
+
+mkdir -p ${LOG%/*}
+
+rm -f $LOG $DEBUGLOG
+exec 2>$DEBUGLOG
+
+if [ -z "$MOUNT" -o -z "$END_RUN_FILE" -o -z "$LOAD_PID_FILE" ]; then
+    echo "The following must be set: MOUNT END_RUN_FILE LOAD_PID_FILE"
+    exit 1
+fi
+
+echoerr () { echo "$@" 1>&2 ; }
+
+signaled() {
+    echoerr "$(date +'%F %H:%M:%S'): client load was signaled to terminate"
+    kill -TERM -$PPID
+    sleep 5
+    kill -KILL -$PPID
+}
+
+trap signaled TERM
+
+# recovery-mds-scale uses this to signal the client loads to die
+echo $$ >$LOAD_PID_FILE
+
+TESTDIR=$MOUNT/dd-$(hostname)
+
+CONTINUE=true
+while [ ! -e "$END_RUN_FILE" ] && $CONTINUE; do
+    echoerr "$(date +'%F %H:%M:%S'): dd run starting"
+    mkdir -p $TESTDIR
+    cd $TESTDIR
+    dd bs=4k count=1000000 if=/dev/zero of=$TESTDIR/dd-file 1>$LOG &
+    load_pid=$!
+    wait $load_pid
+
+    if [ $? -eq 0 ]; then
+	echoerr "$(date +'%F %H:%M:%S'): dd succeeded"
+	cd $TMP
+	rm -rf $TESTDIR
+	echoerr "$(date +'%F %H:%M:%S'): dd run finished"
+    else
+	echoerr "$(date +'%F %H:%M:%S'): dd failed"
+	if [ -z "$ERRORS_OK" ]; then
+	    echo $(hostname) >> $END_RUN_FILE
+	fi
+	if [ $BREAK_ON_ERROR ]; then
+	    # break
+            CONTINUE=false
+	fi
+    fi
+done
+
+echoerr "$(date +'%F %H:%M:%S'): dd run exiting"
diff --git a/lustre/tests/run_iozone.sh b/lustre/tests/run_iozone.sh
new file mode 100755
index 0000000..2b71118
--- /dev/null
+++ b/lustre/tests/run_iozone.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+set -x
+
+TMP=${TMP:-/tmp}
+
+TESTSUITELOG=${TESTSUITELOG:-$TMP/recovery-mds-scale}
+LOG=${TESTSUITELOG}_$(basename $0)-$(hostname)
+DEBUGLOG=${LOG}.debug
+
+mkdir -p ${LOG%/*}
+
+rm -f $LOG $DEBUGLOG
+exec 2>$DEBUGLOG
+
+if [ -z "$MOUNT" -o -z "$END_RUN_FILE" -o -z "$LOAD_PID_FILE" ]; then
+    echo "The following must be set: MOUNT END_RUN_FILE LOAD_PID_FILE"
+    exit 1
+fi
+
+echoerr () { echo "$@" 1>&2 ; }
+
+signaled() {
+    echoerr "$(date +'%F %H:%M:%S'): client load was signaled to terminate"
+    kill -TERM -$PPID
+    sleep 5
+    kill -KILL -$PPID
+}
+
+trap signaled TERM
+
+# recovery-mds-scale uses this to signal the client loads to die
+echo $$ >$LOAD_PID_FILE
+
+TESTDIR=$MOUNT/iozone-$(hostname)
+
+# needed to debug oom problem
+#echo 1 > /proc/sys/vm/vm_gfp_debug
+#killpids=""
+#vmstat 1 1000000 >$TMP/iozone.vmstat.out &
+#killpids="$killpids $!"
+#$LUSTRE_TESTS/runvmstat > $TMP/iozone.runvmstat.out &
+#killpids="$killpids $!"
+
+CONTINUE=true
+while [ ! -e "$END_RUN_FILE" ] && $CONTINUE; do
+    echoerr "$(date +'%F %H:%M:%S'): iozone run starting"
+    mkdir -p $TESTDIR
+    cd $TESTDIR
+    iozone -a -M -R -V 0xab -g 100M -q 512k -i0 -i1 -f $TESTDIR/iozone-file 1>$LOG &
+    load_pid=$!
+    wait $load_pid
+    if [ ${PIPESTATUS[0]} -eq 0 ]; then
+	echoerr "$(date +'%F %H:%M:%S'): iozone succeeded"
+	cd $TMP
+	rm -rf $TESTDIR
+        if [ -d $TESTDIR ]; then
+	    echoerr "$(date +'%F %H:%M:%S'): failed to remove $TESTDIR"
+	    echo $(hostname) >> $END_RUN_FILE
+            CONTINUE=false
+        fi
+	echoerr "$(date +'%F %H:%M:%S'): iozone run finished"
+    else
+	echoerr "$(date +'%F %H:%M:%S'): iozone failed"
+	if [ -z "$ERRORS_OK" ]; then
+	    echo $(hostname) >> $END_RUN_FILE
+	fi
+	if [ $BREAK_ON_ERROR ]; then
+	    # break
+            CONTINUE=false
+	fi
+    fi
+done
+
+echoerr "$(date +'%F %H:%M:%S'): iozone run exiting"
+#kill $killpids
+#sleep 5
+#kill -9 $killpids
diff --git a/lustre/tests/run_tar.sh b/lustre/tests/run_tar.sh
new file mode 100755
index 0000000..7502c241
--- /dev/null
+++ b/lustre/tests/run_tar.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+set -x
+
+TMP=${TMP:-/tmp}
+
+TESTSUITELOG=${TESTSUITELOG:-$TMP/recovery-mds-scale}
+LOG=${TESTSUITELOG}_$(basename $0)-$(hostname)
+DEBUGLOG=${LOG}.debug
+
+mkdir -p ${LOG%/*}
+
+rm -f $LOG $DEBUGLOG
+exec 2>$DEBUGLOG
+
+if [ -z "$MOUNT" -o -z "$END_RUN_FILE" -o -z "$LOAD_PID_FILE" ]; then
+    echo "The following must be set: MOUNT END_RUN_FILE LOAD_PID_FILE"
+    exit 1
+fi
+
+echoerr () { echo "$@" 1>&2 ; }
+
+signaled() {
+    echoerr "$(date +'%F %H:%M:%S'): client load was signaled to terminate"
+    kill -TERM -$PPID
+    sleep 5
+    kill -KILL -$PPID
+}
+
+trap signaled TERM
+
+# recovery-mds-scale uses this to signal the client loads to die
+echo $$ >$LOAD_PID_FILE
+
+TESTDIR=$MOUNT/tar-$(hostname)
+
+CONTINUE=true
+while [ ! -e "$END_RUN_FILE" ] && $CONTINUE; do
+    echoerr "$(date +'%F %H:%M:%S'): tar run starting"
+    mkdir -p $TESTDIR
+    cd $TESTDIR
+    tar cf - /etc | tar xf - 2>&1 | tee $LOG &
+    load_pid=$!
+ps -e f -o "pid ppid pgrp comm" >$TMP/client-load.ps-list
+    wait $load_pid
+    RC=${PIPESTATUS[0]}
+    PREV_ERRORS=$(grep "exit delayed from previous errors" $LOG) || true
+    if [ $RC -ne 0 -a "$ERRORS_OK" -a "$PREV_ERRORS" ]; then
+        echoerr "$(date +'%F %H:%M:%S'): tar errors earlier, ignoring"
+        RC=0
+    fi
+    if [ $RC -eq 0 ]; then
+	echoerr "$(date +'%F %H:%M:%S'): tar succeeded"
+	cd $TMP
+	rm -rf $TESTDIR
+	echoerr "$(date +'%F %H:%M:%S'): tar run finished"
+    else
+	echoerr "$(date +'%F %H:%M:%S'): tar failed"
+	if [ -z "$ERRORS_OK" ]; then
+	    echo $(hostname) >> $END_RUN_FILE
+	fi
+	if [ $BREAK_ON_ERROR ]; then
+	    # break
+            CONTINUE=false
+	fi
+    fi
+done
+
+echoerr "$(date +'%F %H:%M:%S'): tar run exiting"
diff --git a/lustre/tests/rundbench b/lustre/tests/rundbench
index fb21863..c3fa9cb 100755
--- a/lustre/tests/rundbench
+++ b/lustre/tests/rundbench
@@ -27,7 +27,11 @@ mkdir -p $DIR
 TGT=$DIR/client.txt
 CLIENT_PREFIX="${DBENCH_LIB} /usr/share/dbench /usr/local/share /usr/lib/dbench"
 CLIENT_FILE="client.txt client_plain.txt dbench_client"
-which dbench > /dev/null 2>&1 || { skip "$0: dbench not installed" && exit 0; }
+if ! which dbench > /dev/null 2>&1 ; then
+    [ "$MISSING_DBENCH_OK" ] || { error "dbench is not installed !" && exit 3; }
+    skip "$0: dbench is not installed"
+    exit 0
+fi
 CLIENT=""
 
 for prefix in $CLIENT_PREFIX; do
@@ -65,9 +69,20 @@ fi
 
 shift $((OPTIND - 1))
 
+trap '
+echo kill dbench main pid=$DBENCHPID
+kill $DBENCHPID
+rm -rf dbench $LIBS71 client.txt
+exit 0
+' TERM
+
 cd $DIR
 echo "running 'dbench $@' $PREFIX $PWD at `date`"
-$RUN dbench -c client.txt $@
+
+$RUN dbench -c client.txt $@ &
+DBENCHPID=$!
+echo "dbench PID=$DBENCHPID"
+wait $DBENCHPID
 RC=$?
 [ $RC -ne 0 ] && killall -9 dbench
 
diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh
index 440887e..4f19d4f 100644
--- a/lustre/tests/test-framework.sh
+++ b/lustre/tests/test-framework.sh
@@ -580,7 +580,7 @@ zconf_umount_clients() {
 }
 
 shutdown_facet() {
-    facet=$1
+    local facet=$1
     if [ "$FAILURE_MODE" = HARD ]; then
         $POWER_DOWN `facet_active_host $facet`
         sleep 2
@@ -605,6 +605,92 @@ boot_node() {
     fi
 }
 
+# recovery-scale functions
+check_progs_installed () {
+    local clients=$1
+    shift
+    local progs=$@
+
+    do_nodes $clients "set -x ; PATH=:$PATH status=true; for prog in $progs; do
+        which \\\$prog || { echo \\\$prog missing on \\\$(hostname) && status=false; }
+        done;
+        eval \\\$status"
+}
+
+start_client_load() {
+    local list=(${1//,/ })
+    local nodenum=$2
+
+    local numloads=${#CLIENT_LOADS[@]}
+    local testnum=$((nodenum % numloads))
+
+    do_node ${list[nodenum]} "PATH=$PATH MOUNT=$MOUNT ERRORS_OK=$ERRORS_OK \
+                              BREAK_ON_ERROR=$BREAK_ON_ERROR \
+                              END_RUN_FILE=$END_RUN_FILE \
+                              LOAD_PID_FILE=$LOAD_PID_FILE \
+                              TESTSUITELOG=$TESTSUITELOG \
+                              run_${CLIENT_LOADS[testnum]}.sh" &
+    CLIENT_LOAD_PIDS="$CLIENT_LOAD_PIDS $!"
+    log "Started client load: ${CLIENT_LOADS[testnum]} on ${list[nodenum]}"
+
+    eval export ${list[nodenum]}_load=${CLIENT_LOADS[testnum]}
+    return 0
+}
+
+start_client_loads () {
+    local clients=(${1//,/ })
+
+    for ((num=0; num < ${#clients[@]}; num++ )); do
+        start_client_load $1 $num
+    done
+}
+
+# only for remote client 
+check_client_load () {
+    local client=$1
+    local var=${client}_load
+
+    local TESTLOAD=run_${!var}.sh
+
+    ps auxww | grep -v grep | grep $client | grep -q "$TESTLOAD" || return 1
+
+    check_catastrophe $client || return 2
+
+    # see if the load is still on the client
+    local tries=3
+    local RC=254
+    while [ $RC = 254 -a $tries -gt 0 ]; do
+        let tries=$tries-1
+        # assume success
+        RC=0
+        if ! do_node $client "ps auxwww | grep -v grep | grep -q $TESTLOAD"; then
+            RC=${PIPESTATUS[0]}
+            sleep 30
+        fi
+    done
+    if [ $RC = 254 ]; then
+        echo "got a return status of $RC from do_node while checking (i.e. with 'ps') the client load on the remote system"
+        # see if we can diagnose a bit why this is
+    fi
+
+    return $RC
+}
+check_client_loads () {
+   local clients=${1//,/ }
+   local client=
+   local rc=0
+
+   for client in $clients; do
+      check_client_load $client
+      rc=$?
+      if [ "$rc" != 0 ]; then
+        log "Client load failed on node $client, rc=$rc"
+        return $rc
+      fi
+   done
+}
+# End recovery-scale functions
+
 # verify that lustre actually cleaned up properly
 cleanup_check() {
     [ -f $CATASTROPHE ] && [ `cat $CATASTROPHE` -ne 0 ] && \
@@ -1403,6 +1489,16 @@ comma_list() {
     echo "$*" | tr -s " " "\n" | sort -b -u | tr "\n" " " | sed 's/ \([^$]\)/,\1/g'
 }
 
+# list is comma separated list
+exclude_item_from_list () {
+    local list=$1
+    local excluded=$2
+
+    list=${list//,/ }
+    list=$(echo " $list " | sed -re "s/\s+$excluded\s+/ /g")
+    echo $(comma_list $list) 
+}
+
 absolute_path() {
     (cd `dirname $1`; echo $PWD/`basename $1`)
 }
@@ -1982,6 +2078,18 @@ init_clients_lists () {
     CLIENTCOUNT=$((${#remoteclients[@]} + 1))
 }
 
+get_random_entry () {
+    local rnodes=$1
+
+    rnodes=${rnodes//,/ }
+
+    local nodes=($rnodes)
+    local num=${#nodes[@]} 
+    local i=$((RANDOM * num  / 65536))
+
+    echo ${nodes[i]}
+}
+
 is_patchless ()
 {
     lctl get_param version | grep -q patchless
@@ -2156,11 +2264,11 @@ restore_lustre_params() {
 }
 
 check_catastrophe () {
-    local rnodes=$(comma_list $(remote_nodes_list))
+    local rnodes=${1:-$(comma_list $(remote_nodes_list))}
 
-    [ -f $CATASTROPHE ] && [ `cat $CATASTROPHE` -ne 0 ] && return 1
+    [ -f $CATASTROPHE ] && [ $(cat $CATASTROPHE) -ne 0 ] && return 1
     if [ $rnodes ]; then
-        do_nodes $rnodes "[ -f $CATASTROPHE ] && { [ \`cat $CATASTROPHE\` -eq 0 ] || false; } || true"
+        do_nodes $rnodes "set -x; [ -f $CATASTROPHE ] && { [ \`cat $CATASTROPHE\` -eq 0 ] || false; } || true"
     fi 
 }
 
-- 
1.8.3.1