From fb05f2d177fbcfc2499008a4bb04fdf64ab19466 Mon Sep 17 00:00:00 2001
From: Yu Jian <yujian@whamcloud.com>
Date: Wed, 7 Mar 2012 22:45:27 +0800
Subject: [PATCH] LU-734 tests: add sub-tests into recovery-*-scale tests

This patch adds sub-tests into the recovery-*-scale tests
so that test results and logs could be gathered properly
and uploaded to Maloo.

The patch also does some cleanup works on the test scripts
and moves some common functions into test-framework.sh.

Signed-off-by: Yu Jian <yujian@whamcloud.com>
Change-Id: I2c23b641e62a56882171982b8a3157ba023ea36c
Reviewed-on: http://review.whamcloud.com/2265
Tested-by: Hudson
Reviewed-by: Cliff White <cliffw@whamcloud.com>
Reviewed-by: Minh Diep <mdiep@whamcloud.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
---
 lustre/tests/recovery-double-scale.sh | 338 ++++++++++++++-----------------
 lustre/tests/recovery-mds-scale.sh    | 335 ++++++++++++++----------------
 lustre/tests/recovery-random-scale.sh | 370 +++++++++++++++-------------------
 lustre/tests/run_IOR.sh               |  27 +--
 lustre/tests/run_dbench.sh            |  27 +--
 lustre/tests/run_dd.sh                |  27 +--
 lustre/tests/run_iozone.sh            |  31 +--
 lustre/tests/run_tar.sh               |  27 +--
 lustre/tests/test-framework.sh        |  88 ++++++--
 9 files changed, 620 insertions(+), 650 deletions(-)

diff --git a/lustre/tests/recovery-double-scale.sh b/lustre/tests/recovery-double-scale.sh
index 805fddc..18ca852 100644
--- a/lustre/tests/recovery-double-scale.sh
+++ b/lustre/tests/recovery-double-scale.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
 
 # All pairwise combinations of node failures.
 # Was cmd3-17
@@ -8,97 +9,73 @@
 # Script fails pair of nodes:
 # --  in parallel by default
 # --  in series if SERIAL is set
+set -e
 
-LUSTRE=${LUSTRE:-`dirname $0`/..}
-SETUP=${SETUP:-""}
-CLEANUP=${CLEANUP:-""}
-. $LUSTRE/tests/test-framework.sh
+ONLY=${ONLY:-"$*"}
 
-init_test_env $@
+# bug number for skipped test:
+ALWAYS_EXCEPT="$RECOVERY_DOUBLE_SCALE_EXCEPT"
+# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
 
+LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
+. $LUSTRE/tests/test-framework.sh
+init_test_env $@
 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
 init_logging
 
-DEBUGLOG=$TESTLOG_PREFIX.suite_debug_log.$(hostname -s).log
-
-exec 2>$DEBUGLOG
-echo "--- env ---" >&2
-env >&2
-echo "--- env ---" >&2
-set -x
+remote_mds_nodsh && skip_env "remote MDS with nodsh" && exit 0
+remote_ost_nodsh && skip_env "remote OST with nodsh" && exit 0
 
-[ "$SHARED_DIRECTORY" ] || \
-    { FAIL_ON_ERROR=true skip_env "$0 Empty SHARED_DIRECTORY" && exit 0; }
+[ -z "$CLIENTS" -o $CLIENTCOUNT -lt 3 ] &&
+    skip_env "need three or more clients" && exit 0
 
-check_shared_dir $SHARED_DIRECTORY ||
-    error "$SHARED_DIRECTORY isn't a shared directory"
-
-[ -n "$CLIENTS" ] || \
-    { FAIL_ON_ERROR=true skip_env "$0 Need two or more remote clients" && exit 0; }
-
-[ $CLIENTCOUNT -ge 3 ] || \
-    { FAIL_ON_ERROR=true skip_env "$0 Need two or more remote clients, have $((CLIENTCOUNT - 1))" && exit 0; }
-
-END_RUN_FILE=${END_RUN_FILE:-$SHARED_DIRECTORY/end_run_file}
-LOAD_PID_FILE=${LOAD_PID_FILE:-$TMP/client-load.pid}
-
-remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
-remote_ost_nodsh && skip "remote OST with nodsh" && exit 0
-
-check_timeout || exit 1
+if [ -z "$SHARED_DIRECTORY" ] || ! check_shared_dir $SHARED_DIRECTORY; then
+    skip_env "SHARED_DIRECTORY should be specified with a shared directory \
+which is accessable on all of the nodes"
+    exit 0
+fi
 
 [[ $FAILURE_MODE = SOFT ]] && \
     log "WARNING: $0 is not functional with FAILURE_MODE = SOFT, bz22797"
 
-build_test_filter
-
-check_and_setup_lustre
-rm -rf $DIR/[df][0-9]*
-
-# the test node needs to be insulated from a lustre failure as much as possible,
-# so not even loading the lustre modules is ideal.
-# -- umount lustre
-# -- remove hostname from clients list
-zconf_umount $(hostname) $MOUNT
-NODES_TO_USE=${NODES_TO_USE:-$CLIENTS}
-NODES_TO_USE=$(exclude_items_from_list $NODES_TO_USE $(hostname))
+# Set SERIAL to serialize the failure through a recovery of the first failure.
+SERIAL=${SERIAL:-""}
+ERRORS_OK="yes"
 
-check_progs_installed $NODES_TO_USE ${CLIENT_LOADS[@]}
+[ "$SERIAL" ] && ERRORS_OK=""
 
-MDTS=$(get_facets MDS)
-OSTS=$(get_facets OST)
+FAILOVER_PERIOD=${FAILOVER_PERIOD:-$((60 * 5))} # 5 minutes
 
-rm -f $END_RUN_FILE
+END_RUN_FILE=${END_RUN_FILE:-$SHARED_DIRECTORY/end_run_file}
+LOAD_PID_FILE=${LOAD_PID_FILE:-$TMP/client-load.pid}
 
 reboot_recover_node () {
     # item var contains a pair of clients if nodetype=clients
     # I would prefer to have a list here
     local item=$1
-    local nodetype=$2	
-    local timeout=$($LCTL get_param  -n timeout)
+    local nodetype=$2
+    local c
 
     # MDS, OST item contains the facet
     case $nodetype in
-       MDS|OST )    facet_failover $item
-                [ "$SERIAL" ] && wait_recovery_complete $item || true
-                ;;
-       clients) for c in ${item//,/ }; do
-                      # make sure the client loads die
-                      do_nodes $c "set -x; test -f $LOAD_PID_FILE &&
-                          { kill -s TERM \\\$(cat $LOAD_PID_FILE);
-                          rm -f $LOAD_PID_FILE || true; }"
-                      shutdown_client $c
-                      boot_node $c
-                      echo "Reintegrating $c"
-                      # one client fails; need dk logs from this client only 
-                      zconf_mount $c $MOUNT || NODES="$c $(mdts_nodes) $(osts_nodes)" error_exit "zconf_mount failed"
-                 done
-                 start_client_loads $item
-                 ;;
-                # script failure:
-                # don't use error (), the logs from all nodes not needed
-       * )      echo "reboot_recover_node: nodetype=$nodetype. Must be one of 'MDS', 'OST', or 'clients'."
-                exit 1;;
+        MDS|OST )   facet_failover $item
+                    [ "$SERIAL" ] && wait_recovery_complete $item || true
+                    ;;
+        clients)    for c in ${item//,/ }; do
+                        # make sure the client loads die
+                        stop_process $c $LOAD_PID_FILE
+                        shutdown_client $c
+                        boot_node $c
+                        echo "Reintegrating $c"
+                        zconf_mount $c $MOUNT ||
+                            error "mount $MOUNT on $c failed"
+                        client_up $c || error "start client on $c failed"
+                    done
+                    start_client_loads $item
+                    ;;
+        * )         echo "ERROR: invalid nodetype=$nodetype." \
+                         "Must be one of 'MDS', 'OST', or 'clients'."
+                    exit 1;;
     esac
 }
 
@@ -110,11 +87,9 @@ get_item_type () {
     case $type in
        MDS )    list=$MDTS;;
        OST )    list=$OSTS;;
-       clients) list=$NODES_TO_USE
-                ;;
-                # script failure:
-                # don't use error (), the logs from all nodes not needed
-       * )      echo "Invalid type=$type. Must be one of 'MDS', 'OST', or 'clients'."
+       clients) list=$NODES_TO_USE;;
+       * )      echo "ERROR: invalid type=$type." \
+                     "Must be one of 'MDS', 'OST', or 'clients'."
                 exit 1;;
     esac
 
@@ -125,8 +100,8 @@ get_item_type () {
         return
     fi
 
-    item=$(get_random_entry $list)
-    if [ "$type" = clients ] ; then
+    local item=$(get_random_entry $list)
+    if [ "$type" = "clients" ]; then
         item="$item $(get_random_entry $(exclude_items_from_list $list $item))"
         item=$(comma_list $item)
     fi
@@ -150,29 +125,26 @@ failover_pair() {
     local client2=
 
     log "
-==== START === $title "
+==== START === $title"
 
     item1=$(get_item_type $type1)
     [ "$item1" ] || \
         { echo "type1=$type1 item1 is empty" && return 0; }
     item2=$(get_item_type $type2 $item1)
     [ "$item2" ] || \
-        { echo "type1=$type1 item1=$item1 type2=$type2 item2=$item2 is empty" && return 0; }
+        { echo "type1=$type1 item1=$item1 type2=$type2 item2=$item2 is empty" \
+          && return 0; }
 
     # Check that our client loads are still running. If any have died,
     # that means they have died outside of recovery, which is unacceptable.
     log "==== Checking the clients loads BEFORE failover -- failure NOT OK"
-
     # FIXME. need print summary on exit
-    if ! check_client_loads $NODES_TO_USE; then
-        exit 4
-    fi
+    check_client_loads $NODES_TO_USE || exit $?
 
     log "Done checking client loads. Failing type1=$type1 item1=$item1 ... "
+    reboot_recover_node $item1 $type1 || exit $?
 
-    reboot_recover_node $item1 $type1
-
-    # Hendrix test17 description: 
+    # Hendrix test17 description:
     # Introduce a failure, wait at
     # least 5 minutes (for recovery),
     # introduce a 2nd
@@ -184,40 +156,27 @@ failover_pair() {
     # We have a "double failures" if SERIAL is not set,
     # do not need a sleep between failures for "double failures"
 
-    log "                            Failing type2=$type2 item2=$item2 ... "    
-    reboot_recover_node $item2 $type2
+    log "                            Failing type2=$type2 item2=$item2 ... "
+    reboot_recover_node $item2 $type2 || exit $?
 
     # Client loads are allowed to die while in recovery, so we just
     # restart them.
-    log "==== Checking the clients loads AFTER  failovers -- ERRORS_OK=$ERRORS_OK"
-    restart_client_loads $NODES_TO_USE $ERRORS_OK || return $? 
-    log "Done checking / re-Starting client loads. PASS"
+    log "==== Checking the clients loads AFTER failovers -- ERRORS_OK=$ERRORS_OK"
+    restart_client_loads $NODES_TO_USE $ERRORS_OK || exit $?
+    log "Done checking / re-starting client loads. PASS"
     return 0
 }
 
 summary_and_cleanup () {
     local rc=$?
-    local var
     trap 0
 
+    CURRENT_TS=$(date +%s)
+    ELAPSED=$((CURRENT_TS - START_TS))
+
     # Having not empty END_RUN_FILE means the failed loads only
     if [ -s $END_RUN_FILE ]; then
-        echo "Found the END_RUN_FILE file: $END_RUN_FILE"
-        cat $END_RUN_FILE
-        local END_RUN_NODE=
-        read END_RUN_NODE < $END_RUN_FILE
-
-        # A client load will stop if it found the END_RUN_FILE file.
-        # That does not mean the client load actually failed though.
-        # The first node in END_RUN_FILE is the one we are interested in.
-        if [ -n "$END_RUN_NODE" ]; then
-            var=$(node_var_name $END_RUN_NODE)_load
-            echo "Client load failed on node $END_RUN_NODE"
-            echo
-            echo "Client $END_RUN_NODE load stdout and debug files:
-                $TESTLOG_PREFIX.run_${!var}_stdout.$END_RUN_NODE.log
-                $TESTLOG_PREFIX.run_${!var}_debug.$END_RUN_NODE.log"
-        fi
+        print_end_run_file $END_RUN_FILE
         rc=1
     fi
 
@@ -231,116 +190,123 @@ Server failover period: $FAILOVER_PERIOD seconds
 Exited after:           $ELAPSED seconds
 Status: $result: rc=$rc"
 
-    # make sure the client loads die
-    do_nodes $NODES_TO_USE "set -x; test -f $LOAD_PID_FILE &&
-        { kill -s TERM \\\$(cat $LOAD_PID_FILE);
-        rm -f $LOAD_PID_FILE || true; }"
-
-    # and free up the pdshes that started them, if any are still around
-    if [ -n "$CLIENT_LOAD_PIDS" ]; then
-        kill $CLIENT_LOAD_PIDS || true
-        sleep 5
-        kill -9 $CLIENT_LOAD_PIDS || true
-    fi
+    # stop the client loads
+    stop_client_loads $NODES_TO_USE $LOAD_PID_FILE
 
     if [ $rc -ne 0 ]; then
         # we are interested in only on failed clients and servers
         local failedclients=$(cat $END_RUN_FILE | grep -v $0)
         # FIXME: need ostfailover-s nodes also for FLAVOR=OST
-        local product=$(gather_logs $(comma_list $(osts_nodes) \
-                        $(mdts_nodes) $mdsfailover_HOST $failedclients) 1)
-        echo $product
+        gather_logs $(comma_list $(osts_nodes) $(mdts_nodes) \
+                      $mdsfailover_HOST $failedclients)
     fi
 
-    [ $rc -eq 0 ] && zconf_mount $(hostname) $MOUNT
     exit $rc
 }
 
-trap summary_and_cleanup EXIT TERM INT
+################################## Main Flow ###################################
+build_test_filter
 
-#
-# MAIN
-#
-log "-----============= $0 starting =============-----"
+check_and_setup_lustre
+rm -rf $DIR/[Rdfs][0-9]*
 
-START_TS=$(date +%s)
-CURRENT_TS=$START_TS
-ELAPSED=0
+check_timeout || exit 1
 
-# Set SERIAL to serialize the failure through a recovery of the first failure. 
-SERIAL=${SERIAL:-""}
-ERRORS_OK="yes"
+# The test node needs to be insulated from a lustre failure as much as possible,
+# so not even loading the lustre modules is ideal.
+# -- umount lustre
+# -- remove hostname from clients list
+zconf_umount $HOSTNAME $MOUNT
+NODES_TO_USE=${NODES_TO_USE:-$CLIENTS}
+NODES_TO_USE=$(exclude_items_from_list $NODES_TO_USE $HOSTNAME)
 
-[ "$SERIAL" ] && ERRORS_OK="" 
+check_progs_installed $NODES_TO_USE ${CLIENT_LOADS[@]}
 
-FAILOVER_PERIOD=${FAILOVER_PERIOD:-$((60*5))} # 5 minutes
+MDTS=$(get_facets MDS)
+OSTS=$(get_facets OST)
 
-# Start client loads.
-start_client_loads $NODES_TO_USE
+ELAPSED=0
+START_TS=$(date +%s)
+CURRENT_TS=$START_TS
 
-echo clients load pids:
-if ! do_nodesv $NODES_TO_USE "cat $LOAD_PID_FILE"; then
-    exit 3
-fi
+# Every pairwise combination of client failures (2 clients),
+# MDS failure, and OST failure will be tested.
+test_pairwise_fail() {
+    trap summary_and_cleanup EXIT TERM INT
 
-# FIXME: Do we want to have an initial sleep period where the clients 
-# just run before introducing a failure?
-sleep $FAILOVER_PERIOD
+    # Start client loads.
+    rm -f $END_RUN_FILE
+    start_client_loads $NODES_TO_USE
 
-#CMD_TEST_NUM=17.1
-failover_pair MDS OST     "test 1: failover MDS, then OST =========="
-sleep $FAILOVER_PERIOD
+    echo clients load pids:
+    do_nodesv $NODES_TO_USE "cat $LOAD_PID_FILE" || exit 3
 
-#CMD_TEST_NUM=17.2
-failover_pair MDS clients "test 2: failover MDS, then 2 clients ===="
-sleep $FAILOVER_PERIOD
+    # FIXME: Do we want to have an initial sleep period where the clients
+    # just run before introducing a failure?
+    sleep $FAILOVER_PERIOD
 
-#CMD_TEST_NUM=17.3
-if [ $MDSCOUNT -gt 1 ]; then
-    failover_pair MDS MDS     "test 3: failover MDS, then another MDS =="
+    # CMD_TEST_NUM=17.1
+    failover_pair MDS OST "test 1: failover MDS, then OST =========="
     sleep $FAILOVER_PERIOD
-else
-    skip "$0 : $MDSCOUNT < 2 MDTs, test 3 skipped"
-fi 
 
-#CMD_TEST_NUM=17.4
-if [ $OSTCOUNT -gt 1 ]; then
-    failover_pair OST OST     "test 4: failover OST, then another OST =="
+    # CMD_TEST_NUM=17.2
+    failover_pair MDS clients "test 2: failover MDS, then 2 clients ===="
     sleep $FAILOVER_PERIOD
-else
-    skip "$0 : $OSTCOUNT < 2 OSTs, test 4 skipped"
-fi 
 
-#CMD_TEST_NUM=17.5
-failover_pair OST clients "test 5: failover OST, then 2 clients ===="
-sleep $FAILOVER_PERIOD
+    # CMD_TEST_NUM=17.3
+    if [ $MDSCOUNT -gt 1 ]; then
+        failover_pair MDS MDS "test 3: failover MDS, then another MDS =="
+        sleep $FAILOVER_PERIOD
+    else
+        skip_env "has less than 2 MDTs, test 3 skipped"
+    fi
 
-#CMD_TEST_NUM=17.6
-failover_pair OST MDS     "test 6: failover OST, then MDS =========="
-sleep $FAILOVER_PERIOD
+    # CMD_TEST_NUM=17.4
+    if [ $OSTCOUNT -gt 1 ]; then
+        failover_pair OST OST "test 4: failover OST, then another OST =="
+        sleep $FAILOVER_PERIOD
+    else
+        skip_env "has less than 2 OSTs, test 4 skipped"
+    fi
 
-#CMD_TEST_NUM=17.7
-failover_pair clients MDS "test 7: failover 2 clients, then MDS ===="
-sleep $FAILOVER_PERIOD
+    # CMD_TEST_NUM=17.5
+    failover_pair OST clients "test 5: failover OST, then 2 clients ===="
+    sleep $FAILOVER_PERIOD
 
-#CMD_TEST_NUM=17.8
-#failover_pair clients OST "test 8: failover 2 clients, then OST ===="
-sleep $FAILOVER_PERIOD
+    # CMD_TEST_NUM=17.6
+    failover_pair OST MDS "test 6: failover OST, then MDS =========="
+    sleep $FAILOVER_PERIOD
 
-#CMD_TEST_NUM=17.9
-if [ $CLIENTCOUNT -ge 5 ]; then
-    failover_pair clients clients "test 9: failover 2 clients, then 2 different clients =="
+    # CMD_TEST_NUM=17.7
+    failover_pair clients MDS "test 7: failover 2 clients, then MDS ===="
     sleep $FAILOVER_PERIOD
-fi
-log "==== Checking the clients loads AFTER  all failovers -- failure NOT OK"
-if ! check_client_loads $NODES_TO_USE; then
-    log "Client load failed after failover. Exiting"
-    exit 5
-fi
 
-CURRENT_TS=$(date +%s)
-ELAPSED=$((CURRENT_TS - START_TS))
+    # CMD_TEST_NUM=17.8
+    failover_pair clients OST "test 8: failover 2 clients, then OST ===="
+    sleep $FAILOVER_PERIOD
+
+    # CMD_TEST_NUM=17.9
+    if [ $CLIENTCOUNT -gt 4 ]; then
+        failover_pair clients clients \
+            "test 9: failover 2 clients, then 2 different clients =="
+        sleep $FAILOVER_PERIOD
+    else
+        skip_env "has less than 5 Clients, test 9 skipped"
+    fi
+
+    log "==== Checking the clients loads AFTER all failovers -- failure NOT OK"
+    if ! check_client_loads $NODES_TO_USE; then
+        log "Client load failed after failover. Exiting..."
+        exit 5
+    fi
+
+    exit 0
+}
+run_test pairwise_fail "pairwise combination of clients, MDS, and OST failures"
 
-log "Completed successfully in $ELAPSED seconds"
+zconf_mount $HOSTNAME $MOUNT || error "mount $MOUNT on $HOSTNAME failed"
+client_up || error "start client on $HOSTNAME failed"
 
-exit 0
+complete $(basename $0) $SECONDS
+check_and_cleanup_lustre
+exit_status
diff --git a/lustre/tests/recovery-mds-scale.sh b/lustre/tests/recovery-mds-scale.sh
index 3b016b9..6a914b3 100644
--- a/lustre/tests/recovery-mds-scale.sh
+++ b/lustre/tests/recovery-mds-scale.sh
@@ -1,86 +1,54 @@
 #!/bin/bash
+# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
 
 # Was Test 11 in cmd3.
 # For duration of 24 hours repeatedly failover a random MDS at
 # 10 minute intervals and verify that no application errors occur.
 
 # Test runs one of CLIENT_LOAD progs on remote clients.
+set -e
 
-LUSTRE=${LUSTRE:-`dirname $0`/..}
-SETUP=${SETUP:-""}
-CLEANUP=${CLEANUP:-""}
-. $LUSTRE/tests/test-framework.sh
+ONLY=${ONLY:-"$*"}
 
-init_test_env $@
+# bug number for skipped test:
+ALWAYS_EXCEPT="$RECOVERY_MDS_SCALE_EXCEPT"
+# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
 
+LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
+. $LUSTRE/tests/test-framework.sh
+init_test_env $@
 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
 init_logging
 
-DEBUGLOG=$TESTLOG_PREFIX.suite_debug_log.$(hostname -s).log
-
-exec 2>$DEBUGLOG
-echo "--- env ---" >&2
-env >&2
-echo "--- env ---" >&2
-set -x
+remote_mds_nodsh && skip_env "remote MDS with nodsh" && exit 0
+remote_ost_nodsh && skip_env "remote OST with nodsh" && exit 0
 
-[ "$SHARED_DIRECTORY" ] || \
-    { FAIL_ON_ERROR=true skip_env "$0 Empty SHARED_DIRECTORY" && exit 0; }
+[ -z "$CLIENTS" -o $CLIENTCOUNT -lt 3 ] &&
+    skip_env "need three or more clients" && exit 0
 
-check_shared_dir $SHARED_DIRECTORY ||
-    error "$SHARED_DIRECTORY isn't a shared directory"
-
-[ -n "$CLIENTS" ] || \
-    { FAIL_ON_ERROR=true skip_env "$0 Need two or more remote clients" && exit 0; }
-
-[ $CLIENTCOUNT -ge 3 ] || \
-    { FAIL_ON_ERROR=true skip_env "$0 Need two or more remote clients, have $((CLIENTCOUNT - 1))" && exit 0; }
-
-END_RUN_FILE=${END_RUN_FILE:-$SHARED_DIRECTORY/end_run_file}
-LOAD_PID_FILE=${LOAD_PID_FILE:-$TMP/client-load.pid}
-VMSTAT_PID_FILE=${VMSTAT_PID_FILE:-$TMP/vmstat.pid}
-
-remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
-remote_ost_nodsh && skip "remote OST with nodsh" && exit 0
-
-build_test_filter
-
-check_and_setup_lustre
-rm -rf $DIR/[df][0-9]*
-
-max_recov_time=$(max_recovery_time)
-
-# the test node needs to be insulated from a lustre failure as much as possible,
-# so not even loading the lustre modules is ideal.
-# -- umount lustre
-# -- remove hostname from clients list
-zconf_umount $(hostname) $MOUNT
-NODES_TO_USE=${NODES_TO_USE:-$CLIENTS}
-NODES_TO_USE=$(exclude_items_from_list $NODES_TO_USE $(hostname))
-
-check_progs_installed $NODES_TO_USE ${CLIENT_LOADS[@]}
-
-MDTS=$(get_facets MDS)
-OSTS=$(get_facets OST)
+if [ -z "$SHARED_DIRECTORY" ] || ! check_shared_dir $SHARED_DIRECTORY; then
+    skip_env "SHARED_DIRECTORY should be specified with a shared directory \
+which is accessable on all of the nodes"
+    exit 0
+fi
 
 ERRORS_OK=""    # No application failures should occur during this test.
-FLAVOR=${FLAVOR:-"MDS"}
 
-if [ "$FLAVOR" == "MDS" ]; then
-    SERVERS=$MDTS
-else
-    SERVERS=$OSTS
-fi
- 
 if [ "$SLOW" = "no" ]; then
     DURATION=${DURATION:-$((60 * 30))}
-    SERVER_FAILOVER_PERIOD=${SERVER_FAILOVER_PERIOD:-$((60 * 5))}
 else
     DURATION=${DURATION:-$((60 * 60 * 24))}
-    SERVER_FAILOVER_PERIOD=${SERVER_FAILOVER_PERIOD:-$((60 * 10))} # 10 minutes
 fi
+SERVER_FAILOVER_PERIOD=${SERVER_FAILOVER_PERIOD:-$((60 * 10))} # 10 minutes
 
-rm -f $END_RUN_FILE
+MINSLEEP=${MINSLEEP:-120}
+REQFAIL_PERCENT=${REQFAIL_PERCENT:-3}    # bug17839 comment 62
+REQFAIL=${REQFAIL:-$((DURATION / SERVER_FAILOVER_PERIOD *
+                      REQFAIL_PERCENT / 100))}
+
+END_RUN_FILE=${END_RUN_FILE:-$SHARED_DIRECTORY/end_run_file}
+LOAD_PID_FILE=${LOAD_PID_FILE:-$TMP/client-load.pid}
+VMSTAT_PID_FILE=${VMSTAT_PID_FILE:-$TMP/vmstat.pid}
 
 server_numfailovers () {
     local facet=$1
@@ -102,27 +70,11 @@ servers_numfailovers () {
 
 summary_and_cleanup () {
     local rc=$?
-    local var
     trap 0
 
     # Having not empty END_RUN_FILE means the failed loads only
     if [ -s $END_RUN_FILE ]; then
-        echo "Found the END_RUN_FILE file: $END_RUN_FILE"
-        cat $END_RUN_FILE
-        local END_RUN_NODE=
-        read END_RUN_NODE < $END_RUN_FILE
-
-        # A client load will stop if it found the END_RUN_FILE file.
-        # That does not mean the client load actually failed though.
-        # The first node in END_RUN_FILE is the one we are interested in.
-        if [ -n "$END_RUN_NODE" ]; then
-            var=$(node_var_name $END_RUN_NODE)_load
-            echo "Client load failed on node $END_RUN_NODE"
-            echo
-            echo "Client $END_RUN_NODE load stdout and debug files:
-                $TESTLOG_PREFIX.run_${!var}_stdout.$END_RUN_NODE.log
-                $TESTLOG_PREFIX.run_${!var}_debug.$END_RUN_NODE.log"
-        fi
+        print_end_run_file $END_RUN_FILE
         rc=1
     fi
 
@@ -131,159 +83,170 @@ summary_and_cleanup () {
     local result=PASS
     [ $rc -eq 0 ] || result=FAIL
 
-    log "Duration:                $DURATION
+    log "Duration:               $DURATION
 Server failover period: $SERVER_FAILOVER_PERIOD seconds
 Exited after:           $ELAPSED seconds
 Number of failovers before exit:
 $(servers_numfailovers)
 Status: $result: rc=$rc"
 
-    # stop the vmstats on the OSTs
-    if [ "$VMSTAT" ]; then
-        do_nodes $(comma_list $(osts_nodes)) "test -f $VMSTAT_PID_FILE &&
-            { kill -s TERM \\\$(cat $VMSTAT_PID_FILE);
-            rm -f $VMSTAT_PID_FILE || true; }"
-    fi
-
-    # make sure the client loads die
-    do_nodes $NODES_TO_USE "set -x; test -f $LOAD_PID_FILE &&
-        { kill -s TERM \\\$(cat $LOAD_PID_FILE);
-        rm -f $LOAD_PID_FILE || true; }"
+    # stop vmstat on OSS nodes
+    [ "$VMSTAT" ] && stop_process $(comma_list $(osts_nodes)) $VMSTAT_PID_FILE
 
-    # and free up the pdshes that started them, if any are still around
-    if [ -n "$CLIENT_LOAD_PIDS" ]; then
-        kill $CLIENT_LOAD_PIDS || true
-        sleep 5
-        kill -9 $CLIENT_LOAD_PIDS || true
-    fi
+    # stop the client loads
+    stop_client_loads $NODES_TO_USE $LOAD_PID_FILE
 
     if [ $rc -ne 0 ]; then
         # we are interested in only on failed clients and servers
         local failedclients=$(cat $END_RUN_FILE | grep -v $0)
         # FIXME: need ostfailover-s nodes also for FLAVOR=OST
-        local product=$(gather_logs $(comma_list $(osts_nodes) \
-                        $(mdts_nodes) $mdsfailover_HOST $failedclients) 1)
-        echo $product
+        gather_logs $(comma_list $(osts_nodes) $(mdts_nodes) \
+                      $mdsfailover_HOST $failedclients)
     fi
 
-    [ $rc -eq 0 ] && zconf_mount $(hostname) $MOUNT
-
     exit $rc
 }
 
-#
-# MAIN
-#
-log "-----============= $0 starting =============-----"
+failover_target() {
+    local flavor=${1:-"MDS"}
+    local servers
+    local serverfacet
+    local var
 
-trap summary_and_cleanup EXIT INT
+    [ "$flavor" = "MDS" ] && servers=$MDTS || servers=$OSTS
 
-ELAPSED=0
+    trap summary_and_cleanup EXIT INT
 
-# vmstat the osts
-if [ "$VMSTAT" ]; then
-    do_nodes $(comma_list $(osts_nodes)) \
-        "vmstat 1 > $TESTLOG_PREFIX.vmstat.\\\$(hostname -s).log \
-        2>/dev/null </dev/null & echo \\\$! > $VMSTAT_PID_FILE"
-fi
+    # start vmstat on OSS nodes
+    [ "$VMSTAT" ] && start_vmstat $(comma_list $(osts_nodes)) $VMSTAT_PID_FILE
 
-# Start client loads.
-start_client_loads $NODES_TO_USE
+    # start client loads
+    rm -f $END_RUN_FILE
+    start_client_loads $NODES_TO_USE
 
-echo clients load pids:
-if ! do_nodesv $NODES_TO_USE "cat $LOAD_PID_FILE"; then
-    exit 3
-fi
+    echo client loads pids:
+    do_nodesv $NODES_TO_USE "cat $LOAD_PID_FILE" || exit 3
 
-MINSLEEP=${MINSLEEP:-120}
-REQFAIL_PERCENT=${REQFAIL_PERCENT:-3}	# bug17839 comment 62
-REQFAIL=${REQFAIL:-$(( DURATION / SERVER_FAILOVER_PERIOD * REQFAIL_PERCENT / 100))}
-reqfail=0
-sleep=0
+    ELAPSED=0
+    local sleep=0
+    local reqfail=0
+    local it_time_start
+    local start_ts=$(date +%s)
+    local current_ts=$start_ts
+
+    while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do
+        # In order to perform the
+        # expected number of failovers, we need to account the following:
+        # 1) the time that has elapsed during the client load checking
+        # 2) time takes for failover
+        it_time_start=$(date +%s)
 
-START_TS=$(date +%s)
-CURRENT_TS=$START_TS
+        serverfacet=$(get_random_entry $servers)
+        var=${serverfacet}_numfailovers
 
-while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do
+        # Check that our client loads are still running. If any have died,
+        # that means they have died outside of recovery, which is unacceptable.
+        log "==== Checking the clients loads BEFORE failover -- failure NOT OK \
+             ELAPSED=$ELAPSED DURATION=$DURATION PERIOD=$SERVER_FAILOVER_PERIOD"
+        check_client_loads $NODES_TO_USE || exit 4
 
-    # In order to perform the
-    # expected number of failovers, we need to account the following :
-    # 1) the time that has elapsed during the client load checking
-    # 2) time takes for failover
+        log "Wait $serverfacet recovery complete before doing next failover..."
+        if ! wait_recovery_complete $serverfacet; then
+            echo "$serverfacet recovery is not completed!"
+            exit 7
+        fi
 
-    it_time_start=$(date +%s)
+        log "Checking clients are in FULL state before doing next failover..."
+        if ! wait_clients_import_state $NODES_TO_USE $serverfacet FULL; then
+            echo "Clients import not FULL, please consider to increase \
+SERVER_FAILOVER_PERIOD=$SERVER_FAILOVER_PERIOD!"
+        fi
 
-    SERVERFACET=$(get_random_entry $SERVERS)
-    var=${SERVERFACET}_numfailovers
+        log "Starting failover on $serverfacet"
+        facet_failover "$serverfacet" || exit 1
 
-    # Check that our client loads are still running. If any have died,
-    # that means they have died outside of recovery, which is unacceptable.
+        # Check that our client loads are still running during failover.
+        # No application failures should occur.
+        log "==== Checking the clients loads AFTER failover -- failure NOT OK"
+        if ! check_client_loads $NODES_TO_USE; then
+            log "Client load failed during failover. Exiting..."
+            exit 5
+        fi
 
-    log "==== Checking the clients loads BEFORE failover -- failure NOT OK \
-    ELAPSED=$ELAPSED DURATION=$DURATION PERIOD=$SERVER_FAILOVER_PERIOD"
+        # Increment the number of failovers.
+        val=$((${!var} + 1))
+        eval $var=$val
 
-    if ! check_client_loads $NODES_TO_USE; then
-        exit 4
-    fi
+        current_ts=$(date +%s)
+        ELAPSED=$((current_ts - start_ts))
 
-    log "Wait $SERVERFACET recovery complete before doing next failover ...."
+        sleep=$((SERVER_FAILOVER_PERIOD - (current_ts - it_time_start)))
 
-    if ! wait_recovery_complete $SERVERFACET ; then
-        echo "$SERVERFACET recovery is not completed!"
-        exit 7
-    fi
+        # Keep counting the number of iterations when
+        # time spent to failover and two client loads check exceeded
+        # the value ( SERVER_FAILOVER_PERIOD - MINSLEEP ).
+        if [ $sleep -lt $MINSLEEP ]; then
+            reqfail=$((reqfail + 1))
+            log "WARNING: failover and two check_client_loads time exceeded \
+SERVER_FAILOVER_PERIOD - MINSLEEP!
+Failed to load the filesystem with I/O for a minimum period of \
+$MINSLEEP $reqfail times ( REQFAIL=$REQFAIL ).
+This iteration, the load was only applied for sleep=$sleep seconds.
+Estimated max recovery time: $MAX_RECOV_TIME
+Probably the hardware is taking excessively long time to boot.
+Try to increase SERVER_FAILOVER_PERIOD (current is $SERVER_FAILOVER_PERIOD), \
+bug 20918"
+            [ $reqfail -gt $REQFAIL ] && exit 6
+        fi
 
-    log "Checking clients are in FULL state before doing next failover"
-    if ! wait_clients_import_state $NODES_TO_USE $SERVERFACET FULL; then
-        echo "Clients import not FULL, please consider to increase SERVER_FAILOVER_PERIOD=$SERVER_FAILOVER_PERIOD !"
+        log "$serverfacet has failed over ${!var} times, and counting..."
 
-    fi
-    log "Starting failover on $SERVERFACET"
+        [ $((ELAPSED + sleep)) -ge $DURATION ] && break
 
-    facet_failover "$SERVERFACET" || exit 1
+        if [ $sleep -gt 0 ]; then
+            echo "sleeping $sleep seconds... "
+            sleep $sleep
+        fi
+    done
+    exit 0
+}
 
-    # Check that our client loads are still running during failover.
-    # No application failures should occur.
+################################## Main Flow ###################################
+build_test_filter
 
-    log "==== Checking the clients loads AFTER  failover -- failure NOT OK"
-    if ! check_client_loads $NODES_TO_USE; then
-        log "Client load failed during failover. Exiting"
-        exit 5
-    fi
+check_and_setup_lustre
+rm -rf $DIR/[Rdfs][0-9]*
 
-    # Increment the number of failovers
-    val=$((${!var} + 1))
-    eval $var=$val
+MAX_RECOV_TIME=$(max_recovery_time)
 
-    CURRENT_TS=$(date +%s)
-    ELAPSED=$((CURRENT_TS - START_TS))
+# The test node needs to be insulated from a lustre failure as much as possible,
+# so not even loading the lustre modules is ideal.
+# -- umount lustre
+# -- remove hostname from clients list
+zconf_umount $HOSTNAME $MOUNT
+NODES_TO_USE=${NODES_TO_USE:-$CLIENTS}
+NODES_TO_USE=$(exclude_items_from_list $NODES_TO_USE $HOSTNAME)
 
-    sleep=$((SERVER_FAILOVER_PERIOD-(CURRENT_TS - it_time_start)))
+check_progs_installed $NODES_TO_USE ${CLIENT_LOADS[@]}
 
-    # keep count the number of itterations when
-    # time spend to failover and two client loads check exceeded 
-    # the value ( SERVER_FAILOVER_PERIOD - MINSLEEP )
-    if [ $sleep -lt $MINSLEEP ]; then
-        reqfail=$((reqfail +1))
-        log "WARNING: failover and two check_client_loads time exceeded SERVER_FAILOVER_PERIOD - MINSLEEP !
-Failed to load the filesystem with I/O for a minimum period of $MINSLEEP $reqfail times ( REQFAIL=$REQFAIL ).
-This iteration, the load was only applied for sleep=$sleep seconds.
-Estimated max recovery time : $max_recov_time
-Probably the hardware is taking excessively long to boot.
-Try to increase SERVER_FAILOVER_PERIOD (current is $SERVER_FAILOVER_PERIOD), bug 20918"
-        [ $reqfail -gt $REQFAIL ] && exit 6
-    fi
+MDTS=$(get_facets MDS)
+OSTS=$(get_facets OST)
 
-    log "$SERVERFACET has failed over ${!var} times, and counting..."
+test_failover_mds() {
+    # failover a random MDS
+    failover_target MDS
+}
+run_test failover_mds "failover MDS"
 
-    if [ $((ELAPSED + sleep)) -ge $DURATION ]; then
-         break
-    fi
+test_failover_ost() {
+    # failover a random OST
+    failover_target OST
+}
+run_test failover_ost "failover OST"
 
-    if [ $sleep -gt 0 ]; then
-        echo "sleeping $sleep seconds ... "
-        sleep $sleep
-    fi
-done
+zconf_mount $HOSTNAME $MOUNT || error "mount $MOUNT on $HOSTNAME failed"
+client_up || error "start client on $HOSTNAME failed"
 
-exit 0
+complete $(basename $0) $SECONDS
+check_and_cleanup_lustre
+exit_status
diff --git a/lustre/tests/recovery-random-scale.sh b/lustre/tests/recovery-random-scale.sh
index 8b4506f..9de55c0 100644
--- a/lustre/tests/recovery-random-scale.sh
+++ b/lustre/tests/recovery-random-scale.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
 
 # client failure does not affect other clients
 
@@ -9,111 +10,75 @@
 # 10 minute intervals and verify that no application errors occur.
 
 # Test runs one of CLIENT_LOAD progs on remote clients.
+set -e
 
-LUSTRE=${LUSTRE:-`dirname $0`/..}
-SETUP=${SETUP:-""}
-CLEANUP=${CLEANUP:-""}
-. $LUSTRE/tests/test-framework.sh
+ONLY=${ONLY:-"$*"}
 
-init_test_env $@
+# bug number for skipped test:
+ALWAYS_EXCEPT="$RECOVERY_RANDOM_SCALE_EXCEPT"
+# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
 
+LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
+. $LUSTRE/tests/test-framework.sh
+init_test_env $@
 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
 init_logging
 
-DEBUGLOG=$TESTLOG_PREFIX.suite_debug_log.$(hostname -s).log
-
-exec 2>$DEBUGLOG
-echo "--- env ---" >&2
-env >&2
-echo "--- env ---" >&2
-set -x
+remote_mds_nodsh && skip_env "remote MDS with nodsh" && exit 0
+remote_ost_nodsh && skip_env "remote OST with nodsh" && exit 0
 
-[ "$SHARED_DIRECTORY" ] || \
-    { FAIL_ON_ERROR=true skip_env "$0 Empty SHARED_DIRECTORY" && exit 0; }
+[ -z "$CLIENTS" -o $CLIENTCOUNT -lt 3 ] &&
+    skip_env "need three or more clients" && exit 0
 
-check_shared_dir $SHARED_DIRECTORY ||
-    error "$SHARED_DIRECTORY isn't a shared directory"
-
-[ -n "$CLIENTS" ] || \
-    { FAIL_ON_ERROR=true skip_env "$0 Need two or more remote clients" && exit 0; }
-
-[ $CLIENTCOUNT -ge 3 ] || \
-    { FAIL_ON_ERROR=true skip_env "$0 Need two or more remote clients, have $((CLIENTCOUNT - 1))" && exit 0; }
-
-END_RUN_FILE=${END_RUN_FILE:-$SHARED_DIRECTORY/end_run_file}
-LOAD_PID_FILE=${LOAD_PID_FILE:-$TMP/client-load.pid}
-VMSTAT_PID_FILE=${VMSTAT_PID_FILE:-$TMP/vmstat.pid}
-
-remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
+if [ -z "$SHARED_DIRECTORY" ] || ! check_shared_dir $SHARED_DIRECTORY; then
+    skip_env "SHARED_DIRECTORY should be specified with a shared directory \
+which is accessable on all of the nodes"
+    exit 0
+fi
 
 [[ $FAILURE_MODE = SOFT ]] && \
     log "WARNING: $0 is not functional with FAILURE_MODE = SOFT, bz22797"
 
-build_test_filter
-
-check_and_setup_lustre
-rm -rf $DIR/[df][0-9]*
-
-max_recov_time=$(max_recovery_time)
-
-# the test node needs to be insulated from a lustre failure as much as possible,
-# so not even loading the lustre modules is ideal.
-# -- umount lustre
-# -- remove hostname from clients list
-zconf_umount $(hostname) $MOUNT
-NODES_TO_USE=${NODES_TO_USE:-$CLIENTS}
-NODES_TO_USE=$(exclude_items_from_list $NODES_TO_USE $(hostname))
-
-check_progs_installed $NODES_TO_USE ${CLIENT_LOADS[@]}
-
-MDTS=$(get_facets MDS)
+# Application failures are allowed for the failed client
+# but not for other clients.
+ERRORS_OK="yes"
 
 if [ "$SLOW" = "no" ]; then
     DURATION=${DURATION:-$((60 * 30))}
-    SERVER_FAILOVER_PERIOD=${SERVER_FAILOVER_PERIOD:-$((60 * 5))}
 else
     DURATION=${DURATION:-$((60 * 60 * 24))}
-    SERVER_FAILOVER_PERIOD=${SERVER_FAILOVER_PERIOD:-$((60 * 10))} # 10 minutes
 fi
+SERVER_FAILOVER_PERIOD=${SERVER_FAILOVER_PERIOD:-$((60 * 10))} # 10 minutes
+
+MINSLEEP=${MINSLEEP:-120}
+REQFAIL_PERCENT=${REQFAIL_PERCENT:-3}    # bug17839 comment 62
+REQFAIL=${REQFAIL:-$((DURATION / SERVER_FAILOVER_PERIOD *
+                      REQFAIL_PERCENT / 100))}
 
-rm -f $END_RUN_FILE
+END_RUN_FILE=${END_RUN_FILE:-$SHARED_DIRECTORY/end_run_file}
+LOAD_PID_FILE=${LOAD_PID_FILE:-$TMP/client-load.pid}
+VMSTAT_PID_FILE=${VMSTAT_PID_FILE:-$TMP/vmstat.pid}
 
 numfailovers () {
     local facet
     local var
 
-    for facet in $MDTS ${failed_clients//,/ }; do
+    for facet in $MDTS ${FAILED_CLIENTS//,/ }; do
         var=${facet}_nums
         val=${!var}
         if [ "$val" ] ; then
-            echo "$facet failed  over  $val times"
+            echo "$facet failed over $val times"
         fi
     done
 }
 
 summary_and_cleanup () {
     local rc=$?
-    local var
     trap 0
 
     # Having not empty END_RUN_FILE means the failed loads only
     if [ -s $END_RUN_FILE ]; then
-        echo "Found the END_RUN_FILE file: $END_RUN_FILE"
-        cat $END_RUN_FILE
-        local END_RUN_NODE=
-        read END_RUN_NODE < $END_RUN_FILE
-
-        # A client load will stop if it found the END_RUN_FILE file.
-        # That does not mean the client load actually failed though.
-        # The first node in END_RUN_FILE is the one we are interested in.
-        if [ -n "$END_RUN_NODE" ]; then
-            var=$(node_var_name $END_RUN_NODE)_load
-            echo "Client load failed on node $END_RUN_NODE"
-            echo
-            echo "Client $END_RUN_NODE load stdout and debug files:
-                $TESTLOG_PREFIX.run_${!var}_stdout.$END_RUN_NODE.log
-                $TESTLOG_PREFIX.run_${!var}_debug.$END_RUN_NODE.log"
-        fi
+        print_end_run_file $END_RUN_FILE
         rc=1
     fi
 
@@ -122,183 +87,184 @@ summary_and_cleanup () {
     local result=PASS
     [ $rc -eq 0 ] || result=FAIL
 
-    log "Duration:                $DURATION
+    log "Duration:               $DURATION
 Server failover period: $SERVER_FAILOVER_PERIOD seconds
 Exited after:           $ELAPSED seconds
 Number of failovers before exit:
 $(numfailovers)
 Status: $result: rc=$rc"
 
-    # stop the vmstats on the OSTs
-    if [ "$VMSTAT" ]; then
-        do_nodes $(comma_list $(osts_nodes)) "test -f $VMSTAT_PID_FILE &&
-            { kill -s TERM \\\$(cat $VMSTAT_PID_FILE);
-            rm -f $VMSTAT_PID_FILE || true; }"
-    fi
+    # stop vmstat on OSS nodes
+    [ "$VMSTAT" ] && stop_process $(comma_list $(osts_nodes)) $VMSTAT_PID_FILE
 
-    # make sure the client loads die
-    do_nodes $NODES_TO_USE "set -x; test -f $LOAD_PID_FILE &&
-        { kill -s TERM \\\$(cat $LOAD_PID_FILE);
-        rm -f $LOAD_PID_FILE || true; }"
-
-    # and free up the pdshes that started them, if any are still around
-    if [ -n "$CLIENT_LOAD_PIDS" ]; then
-        kill $CLIENT_LOAD_PIDS || true
-        sleep 5
-        kill -9 $CLIENT_LOAD_PIDS || true
-    fi
+    # stop the client loads
+    stop_client_loads $NODES_TO_USE $LOAD_PID_FILE
 
     if [ $rc -ne 0 ]; then
         # we are interested in only on failed clients and servers
         local failedclients=$(cat $END_RUN_FILE | grep -v $0)
         # FIXME: need ostfailover-s nodes also for FLAVOR=OST
-        local product=$(gather_logs $(comma_list $(osts_nodes) \
-                        $(mdts_nodes) $mdsfailover_HOST $failedclients) 1)
-        echo $product
+        gather_logs $(comma_list $(osts_nodes) $(mdts_nodes) \
+                      $mdsfailover_HOST $failedclients)
     fi
 
-    [ $rc -eq 0 ] && zconf_mount $(hostname) $MOUNT
-
     exit $rc
 }
 
-#
-# MAIN 
-#
-log "-----============= $0 starting =============-----"
+################################## Main Flow ###################################
+build_test_filter
 
-trap summary_and_cleanup EXIT # INT
+check_and_setup_lustre
+rm -rf $DIR/[Rdfs][0-9]*
 
-ELAPSED=0
+MAX_RECOV_TIME=$(max_recovery_time)
 
-# vmstat the osts
-if [ "$VMSTAT" ]; then
-    do_nodes $(comma_list $(osts_nodes)) \
-        "vmstat 1 > $TESTLOG_PREFIX.vmstat.\\\$(hostname -s).log \
-        2>/dev/null </dev/null & echo \\\$! > $VMSTAT_PID_FILE"
-fi
+# The test node needs to be insulated from a lustre failure as much as possible,
+# so not even loading the lustre modules is ideal.
+# -- umount lustre
+# -- remove hostname from clients list
+zconf_umount $HOSTNAME $MOUNT
+NODES_TO_USE=${NODES_TO_USE:-$CLIENTS}
+NODES_TO_USE=$(exclude_items_from_list $NODES_TO_USE $HOSTNAME)
 
-# Start client loads.
-start_client_loads $NODES_TO_USE
+check_progs_installed $NODES_TO_USE ${CLIENT_LOADS[@]}
 
-echo clients load pids:
-if ! do_nodesv $NODES_TO_USE "cat $LOAD_PID_FILE"; then
-    exit 3
-fi
+MDTS=$(get_facets MDS)
 
-START_TS=$(date +%s)
-CURRENT_TS=$START_TS
+# Fail a random client and then failover a random MDS.
+test_fail_client_mds() {
+    local fail_client
+    local serverfacet
+    local client_var
+    local var
 
-MINSLEEP=${MINSLEEP:-120}
-REQFAIL_PERCENT=${REQFAIL_PERCENT:-3}	# bug17839 comment 62
-REQFAIL=${REQFAIL:-$(( DURATION / SERVER_FAILOVER_PERIOD * REQFAIL_PERCENT / 100))}
-reqfail=0
-sleep=0
+    trap summary_and_cleanup EXIT INT
 
-# This is used for FAIL_CLIENT only
-ERRORS_OK="yes"
-while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do
+    # start vmstat on OSS nodes
+    [ "$VMSTAT" ] && start_vmstat $(comma_list $(osts_nodes)) $VMSTAT_PID_FILE
 
-    # In order to perform the 
-    # expected number of failovers, we need to account the following :
-    # 1) the time that has elapsed during the client load checking
-    # 2) time takes for failover
+    # start client loads
+    rm -f $END_RUN_FILE
+    start_client_loads $NODES_TO_USE
 
-    it_time_start=$(date +%s)
-    
-    FAIL_CLIENT=$(get_random_entry $NODES_TO_USE)
-    client_var=$(node_var_name $FAIL_CLIENT)_nums
+    echo client loads pids:
+    do_nodesv $NODES_TO_USE "cat $LOAD_PID_FILE" || exit 3
 
-    # store the list of failed clients
-    # lists are comma separated
-    failed_clients=$(expand_list $failed_clients $FAIL_CLIENT)
+    ELAPSED=0
+    local sleep=0
+    local reqfail=0
+    local it_time_start
+    local start_ts=$(date +%s)
+    local current_ts=$start_ts
 
-    SERVERFACET=$(get_random_entry $MDTS)
-    var=${SERVERFACET}_nums
+    while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do
+        # In order to perform the
+        # expected number of failovers, we need to account the following:
+        # 1) the time that has elapsed during the client load checking
+        # 2) time takes for failover
+        it_time_start=$(date +%s)
 
-    # Check that our client loads are still running. If any have died, 
-    # that means they have died outside of recovery, which is unacceptable.    
+        fail_client=$(get_random_entry $NODES_TO_USE)
+        client_var=$(node_var_name $fail_client)_nums
 
-    log "==== Checking the clients loads BEFORE failover -- failure NOT OK \
-    ELAPSED=$ELAPSED DURATION=$DURATION PERIOD=$SERVER_FAILOVER_PERIOD" 
+        # store the list of failed clients
+        # lists are comma separated
+        FAILED_CLIENTS=$(expand_list $FAILED_CLIENTS $fail_client)
 
-    if ! check_client_loads $NODES_TO_USE; then
-        exit 4
-    fi
+        serverfacet=$(get_random_entry $MDTS)
+        var=${serverfacet}_nums
 
-    log "FAIL CLIENT $FAIL_CLIENT ... "
-    shutdown_client $FAIL_CLIENT
+        # Check that our client loads are still running. If any have died,
+        # that means they have died outside of recovery, which is unacceptable.
+        log "==== Checking the clients loads BEFORE failover -- failure NOT OK \
+             ELAPSED=$ELAPSED DURATION=$DURATION PERIOD=$SERVER_FAILOVER_PERIOD"
+        check_client_loads $NODES_TO_USE || exit 4
 
-    log "Starting failover on $SERVERFACET"
+        log "FAIL CLIENT $fail_client..."
+        shutdown_client $fail_client
 
-    facet_failover "$SERVERFACET" || exit 1
-    if ! wait_recovery_complete $SERVERFACET ; then
-        echo "$SERVERFACET recovery is not completed!"
-        exit 7
-    fi
+        log "Starting failover on $serverfacet"
+        facet_failover "$serverfacet" || exit 1
 
-    boot_node $FAIL_CLIENT
-    echo "Reintegrating $FAIL_CLIENT"
-    zconf_mount $FAIL_CLIENT $MOUNT || exit $?
-
-    # Increment the number of failovers
-    val=$((${!var} + 1))
-    eval $var=$val
-    val=$((${!client_var} + 1))
-    eval $client_var=$val
-
-    # load script on failed clients could create END_RUN_FILE
-    # We shuold remove it and ignore the failure if this
-    # file contains the failed client only.
-    # We can not use ERRORS_OK when start all loads at the start of this script
-    # because the application errors allowed for random failed client only, but
-    # not for all clients.
-    if [ -e $END_RUN_FILE ]; then
-        read END_RUN_NODE < $END_RUN_FILE
-        [[ $END_RUN_NODE = $FAIL_CLIENT ]] &&
-            rm -f $END_RUN_FILE || exit 13
-    fi
+        if ! wait_recovery_complete $serverfacet; then
+            echo "$serverfacet recovery is not completed!"
+            exit 7
+        fi
 
-    restart_client_loads $FAIL_CLIENT $ERRORS_OK || exit $?
+        boot_node $fail_client
+        echo "Reintegrating $fail_client"
+        zconf_mount $fail_client $MOUNT || exit $?
+        client_up $fail_client || exit $?
+
+        # Increment the number of failovers
+        val=$((${!var} + 1))
+        eval $var=$val
+        val=$((${!client_var} + 1))
+        eval $client_var=$val
+
+        # load script on failed clients could create END_RUN_FILE
+        # We shuold remove it and ignore the failure if this
+        # file contains the failed client only.
+        # We can not use ERRORS_OK when start all loads at the start of
+        # this script because the application errors allowed for random
+        # failed client only, but not for all clients.
+        if [ -e $END_RUN_FILE ]; then
+            local end_run_node
+            read end_run_node < $END_RUN_FILE
+            [[ $end_run_node = $fail_client ]] &&
+                rm -f $END_RUN_FILE || exit 13
+        fi
 
-    # Check that not failed clients loads are still running.
-    # No application failures should occur on clients that was not failed.
+        restart_client_loads $fail_client $ERRORS_OK || exit $?
 
-    log "==== Checking the clients loads AFTER failed client reintegrated -- failure NOT OK"
-    if ! ERRORS_OK= check_client_loads $(exclude_items_from_list $NODES_TO_USE $FAIL_CLIENT); then
-        log "Client load failed. Exiting"
-        exit 5
-    fi
+        # Check that not failed clients loads are still running.
+        # No application failures should occur on clients that were not failed.
+        log "==== Checking the clients loads AFTER failed client reintegrated \
+-- failure NOT OK"
+        if ! ERRORS_OK= check_client_loads \
+            $(exclude_items_from_list $NODES_TO_USE $fail_client); then
+            log "Client load failed. Exiting..."
+            exit 5
+        fi
 
-    CURRENT_TS=$(date +%s)
-    ELAPSED=$((CURRENT_TS - START_TS))
-    sleep=$((SERVER_FAILOVER_PERIOD-(CURRENT_TS - it_time_start)))
-
-    # keep count the number of itterations when
-    # time spend to failover and two client loads check exceeded 
-    # the value ( SERVER_FAILOVER_PERIOD - MINSLEEP )
-    if [ $sleep -lt $MINSLEEP ]; then
-        reqfail=$((reqfail +1))
-        log "WARNING: failover, client reintegration and check_client_loads time exceeded SERVER_FAILOVER_PERIOD - MINSLEEP !
-Failed to load the filesystem with I/O for a minimum period of $MINSLEEP $reqfail times ( REQFAIL=$REQFAIL ).
+        current_ts=$(date +%s)
+        ELAPSED=$((current_ts - start_ts))
+        sleep=$((SERVER_FAILOVER_PERIOD - (current_ts - it_time_start)))
+
+        # Keep counting the number of iterations when
+        # time spent to failover and two client loads check exceeded
+        # the value ( SERVER_FAILOVER_PERIOD - MINSLEEP ).
+        if [ $sleep -lt $MINSLEEP ]; then
+            reqfail=$((reqfail + 1))
+            log "WARNING: failover, client reintegration and \
+check_client_loads time exceeded SERVER_FAILOVER_PERIOD - MINSLEEP!
+Failed to load the filesystem with I/O for a minimum period of \
+$MINSLEEP $reqfail times ( REQFAIL=$REQFAIL ).
 This iteration, the load was only applied for sleep=$sleep seconds.
-Estimated max recovery time : $max_recov_time
-Probably the hardware is taking excessively long to boot.
-Try to increase SERVER_FAILOVER_PERIOD (current is $SERVER_FAILOVER_PERIOD), bug 20918"
-        [ $reqfail -gt $REQFAIL ] && exit 6
-    fi
+Estimated max recovery time : $MAX_RECOV_TIME
+Probably the hardware is taking excessively long time to boot.
+Try to increase SERVER_FAILOVER_PERIOD (current is $SERVER_FAILOVER_PERIOD), \
+bug 20918"
+            [ $reqfail -gt $REQFAIL ] && exit 6
+        fi
 
-    log " Number of failovers:
+        log "Number of failovers:
 $(numfailovers)                and counting..."
 
-    if [ $((ELAPSED + sleep)) -ge $DURATION ]; then
-         break
-    fi
+        [ $((ELAPSED + sleep)) -ge $DURATION ] && break
 
-    if [ $sleep -gt 0 ]; then
-        echo "sleeping $sleep seconds ... "
-        sleep $sleep
-    fi
-done
+        if [ $sleep -gt 0 ]; then
+            echo "sleeping $sleep seconds... "
+            sleep $sleep
+        fi
+    done
+    exit 0
+}
+run_test fail_client_mds "fail client, then failover MDS"
+
+zconf_mount $HOSTNAME $MOUNT || error "mount $MOUNT on $HOSTNAME failed"
+client_up || error "start client on $HOSTNAME failed"
 
-exit 0
+complete $(basename $0) $SECONDS
+check_and_cleanup_lustre
+exit_status
diff --git a/lustre/tests/run_IOR.sh b/lustre/tests/run_IOR.sh
index 4cd6933..9f8f816 100755
--- a/lustre/tests/run_IOR.sh
+++ b/lustre/tests/run_IOR.sh
@@ -1,9 +1,11 @@
 #!/bin/bash
-set -x
 
 TMP=${TMP:-/tmp}
 
 TESTLOG_PREFIX=${TESTLOG_PREFIX:-$TMP/recovery-mds-scale}
+TESTNAME=${TESTNAME:-""}
+[ -n "$TESTNAME" ] && TESTLOG_PREFIX=$TESTLOG_PREFIX.$TESTNAME
+
 LOG=$TESTLOG_PREFIX.$(basename $0 .sh)_stdout.$(hostname -s).log
 DEBUGLOG=$(echo $LOG | sed 's/\(.*\)stdout/\1debug/')
 
@@ -11,6 +13,7 @@ mkdir -p ${LOG%/*}
 
 rm -f $LOG $DEBUGLOG
 exec 2>$DEBUGLOG
+set -x
 
 . $(dirname $0)/functions.sh
 
@@ -46,19 +49,19 @@ while [ ! -e "$END_RUN_FILE" ] && $CONTINUE; do
     load_pid=$!
     wait $load_pid
     if [ ${PIPESTATUS[0]} -eq 0 ]; then
-	echoerr "$(date +'%F %H:%M:%S'): IOR succeeded"
-	cd $TMP
-	rm -rf $TESTDIR
-	echoerr "$(date +'%F %H:%M:%S'): IOR run finished"
+        echoerr "$(date +'%F %H:%M:%S'): IOR succeeded"
+        cd $TMP
+        rm -rf $TESTDIR
+        echoerr "$(date +'%F %H:%M:%S'): IOR run finished"
     else
-	echoerr "$(date +'%F %H:%M:%S'): IOR failed"
-	if [ -z "$ERRORS_OK" ]; then
-	    echo $(hostname) >> $END_RUN_FILE
-	fi
-	if [ $BREAK_ON_ERROR ]; then
-	    # break
+        echoerr "$(date +'%F %H:%M:%S'): IOR failed"
+        if [ -z "$ERRORS_OK" ]; then
+            echo $(hostname) >> $END_RUN_FILE
+        fi
+        if [ $BREAK_ON_ERROR ]; then
+            # break
             CONTINUE=false
-	fi
+        fi
     fi
 done
 
diff --git a/lustre/tests/run_dbench.sh b/lustre/tests/run_dbench.sh
index d1a4a38..b6c2ac1 100755
--- a/lustre/tests/run_dbench.sh
+++ b/lustre/tests/run_dbench.sh
@@ -1,9 +1,11 @@
 #!/bin/bash
-set -x
 
 TMP=${TMP:-/tmp}
 
 TESTLOG_PREFIX=${TESTLOG_PREFIX:-$TMP/recovery-mds-scale}
+TESTNAME=${TESTNAME:-""}
+[ -n "$TESTNAME" ] && TESTLOG_PREFIX=$TESTLOG_PREFIX.$TESTNAME
+
 LOG=$TESTLOG_PREFIX.$(basename $0 .sh)_stdout.$(hostname -s).log
 DEBUGLOG=$(echo $LOG | sed 's/\(.*\)stdout/\1debug/')
 
@@ -11,6 +13,7 @@ mkdir -p ${LOG%/*}
 
 rm -f $LOG $DEBUGLOG
 exec 2>$DEBUGLOG
+set -x
 
 . $(dirname $0)/functions.sh
 
@@ -34,19 +37,19 @@ while [ ! -e "$END_RUN_FILE" ] && $CONTINUE; do
 
     wait $load_pid
     if [ ${PIPESTATUS[0]} -eq 0 ]; then
-	echoerr "$(date +'%F %H:%M:%S'): dbench succeeded"
-	cd $TMP
-	rm -rf $TESTDIR
-	echoerr "$(date +'%F %H:%M:%S'): dbench run finished"
+        echoerr "$(date +'%F %H:%M:%S'): dbench succeeded"
+        cd $TMP
+        rm -rf $TESTDIR
+        echoerr "$(date +'%F %H:%M:%S'): dbench run finished"
     else
-	echoerr "$(date +'%F %H:%M:%S'): dbench failed"
-	if [ -z "$ERRORS_OK" ]; then
-	    echo $(hostname) >> $END_RUN_FILE
-	fi
-	if [ $BREAK_ON_ERROR ]; then
-	    # break
+        echoerr "$(date +'%F %H:%M:%S'): dbench failed"
+        if [ -z "$ERRORS_OK" ]; then
+            echo $(hostname) >> $END_RUN_FILE
+        fi
+        if [ $BREAK_ON_ERROR ]; then
+            # break
             CONTINUE=false
-	fi
+        fi
     fi
 done
 
diff --git a/lustre/tests/run_dd.sh b/lustre/tests/run_dd.sh
index 0f2a1f9..36af6ae 100755
--- a/lustre/tests/run_dd.sh
+++ b/lustre/tests/run_dd.sh
@@ -1,9 +1,11 @@
 #!/bin/bash
-set -x
 
 TMP=${TMP:-/tmp}
 
 TESTLOG_PREFIX=${TESTLOG_PREFIX:-$TMP/recovery-mds-scale}
+TESTNAME=${TESTNAME:-""}
+[ -n "$TESTNAME" ] && TESTLOG_PREFIX=$TESTLOG_PREFIX.$TESTNAME
+
 LOG=$TESTLOG_PREFIX.$(basename $0 .sh)_stdout.$(hostname -s).log
 DEBUGLOG=$(echo $LOG | sed 's/\(.*\)stdout/\1debug/')
 
@@ -11,6 +13,7 @@ mkdir -p ${LOG%/*}
 
 rm -f $LOG $DEBUGLOG
 exec 2>$DEBUGLOG
+set -x
 
 . $(dirname $0)/functions.sh
 
@@ -35,19 +38,19 @@ while [ ! -e "$END_RUN_FILE" ] && $CONTINUE; do
     wait $load_pid
 
     if [ $? -eq 0 ]; then
-	echoerr "$(date +'%F %H:%M:%S'): dd succeeded"
-	cd $TMP
-	rm -rf $TESTDIR
-	echoerr "$(date +'%F %H:%M:%S'): dd run finished"
+        echoerr "$(date +'%F %H:%M:%S'): dd succeeded"
+        cd $TMP
+        rm -rf $TESTDIR
+        echoerr "$(date +'%F %H:%M:%S'): dd run finished"
     else
-	echoerr "$(date +'%F %H:%M:%S'): dd failed"
-	if [ -z "$ERRORS_OK" ]; then
-	    echo $(hostname) >> $END_RUN_FILE
-	fi
-	if [ $BREAK_ON_ERROR ]; then
-	    # break
+        echoerr "$(date +'%F %H:%M:%S'): dd failed"
+        if [ -z "$ERRORS_OK" ]; then
+            echo $(hostname) >> $END_RUN_FILE
+        fi
+        if [ $BREAK_ON_ERROR ]; then
+            # break
             CONTINUE=false
-	fi
+        fi
     fi
 done
 
diff --git a/lustre/tests/run_iozone.sh b/lustre/tests/run_iozone.sh
index 01eb9fe..642303c 100755
--- a/lustre/tests/run_iozone.sh
+++ b/lustre/tests/run_iozone.sh
@@ -1,9 +1,11 @@
 #!/bin/bash
-set -x
 
 TMP=${TMP:-/tmp}
 
 TESTLOG_PREFIX=${TESTLOG_PREFIX:-$TMP/recovery-mds-scale}
+TESTNAME=${TESTNAME:-""}
+[ -n "$TESTNAME" ] && TESTLOG_PREFIX=$TESTLOG_PREFIX.$TESTNAME
+
 LOG=$TESTLOG_PREFIX.$(basename $0 .sh)_stdout.$(hostname -s).log
 DEBUGLOG=$(echo $LOG | sed 's/\(.*\)stdout/\1debug/')
 
@@ -11,6 +13,7 @@ mkdir -p ${LOG%/*}
 
 rm -f $LOG $DEBUGLOG
 exec 2>$DEBUGLOG
+set -x
 
 . $(dirname $0)/functions.sh
 
@@ -32,24 +35,24 @@ while [ ! -e "$END_RUN_FILE" ] && $CONTINUE; do
     load_pid=$!
     wait $load_pid
     if [ ${PIPESTATUS[0]} -eq 0 ]; then
-	echoerr "$(date +'%F %H:%M:%S'): iozone succeeded"
-	cd $TMP
-	rm -rf $TESTDIR
+        echoerr "$(date +'%F %H:%M:%S'): iozone succeeded"
+        cd $TMP
+        rm -rf $TESTDIR
         if [ -d $TESTDIR ]; then
-	    echoerr "$(date +'%F %H:%M:%S'): failed to remove $TESTDIR"
-	    echo $(hostname) >> $END_RUN_FILE
+            echoerr "$(date +'%F %H:%M:%S'): failed to remove $TESTDIR"
+            echo $(hostname) >> $END_RUN_FILE
             CONTINUE=false
         fi
-	echoerr "$(date +'%F %H:%M:%S'): iozone run finished"
+        echoerr "$(date +'%F %H:%M:%S'): iozone run finished"
     else
-	echoerr "$(date +'%F %H:%M:%S'): iozone failed"
-	if [ -z "$ERRORS_OK" ]; then
-	    echo $(hostname) >> $END_RUN_FILE
-	fi
-	if [ $BREAK_ON_ERROR ]; then
-	    # break
+        echoerr "$(date +'%F %H:%M:%S'): iozone failed"
+        if [ -z "$ERRORS_OK" ]; then
+            echo $(hostname) >> $END_RUN_FILE
+        fi
+        if [ $BREAK_ON_ERROR ]; then
+            # break
             CONTINUE=false
-	fi
+        fi
     fi
 done
 
diff --git a/lustre/tests/run_tar.sh b/lustre/tests/run_tar.sh
index 9ad3a58..0b82ce1 100755
--- a/lustre/tests/run_tar.sh
+++ b/lustre/tests/run_tar.sh
@@ -1,9 +1,11 @@
 #!/bin/bash
-set -x
 
 TMP=${TMP:-/tmp}
 
 TESTLOG_PREFIX=${TESTLOG_PREFIX:-$TMP/recovery-mds-scale}
+TESTNAME=${TESTNAME:-""}
+[ -n "$TESTNAME" ] && TESTLOG_PREFIX=$TESTLOG_PREFIX.$TESTNAME
+
 LOG=$TESTLOG_PREFIX.$(basename $0 .sh)_stdout.$(hostname -s).log
 DEBUGLOG=$(echo $LOG | sed 's/\(.*\)stdout/\1debug/')
 
@@ -11,6 +13,7 @@ mkdir -p ${LOG%/*}
 
 rm -f $LOG $DEBUGLOG
 exec 2>$DEBUGLOG
+set -x
 
 . $(dirname $0)/functions.sh
 
@@ -42,19 +45,19 @@ while [ ! -e "$END_RUN_FILE" ] && $CONTINUE; do
         RC=0
     fi
     if [ $RC -eq 0 ]; then
-	echoerr "$(date +'%F %H:%M:%S'): tar succeeded"
-	cd $TMP
-	rm -rf $TESTDIR
-	echoerr "$(date +'%F %H:%M:%S'): tar run finished"
+        echoerr "$(date +'%F %H:%M:%S'): tar succeeded"
+        cd $TMP
+        rm -rf $TESTDIR
+        echoerr "$(date +'%F %H:%M:%S'): tar run finished"
     else
-	echoerr "$(date +'%F %H:%M:%S'): tar failed"
-	if [ -z "$ERRORS_OK" ]; then
-	    echo $(hostname) >> $END_RUN_FILE
-	fi
-	if [ $BREAK_ON_ERROR ]; then
-	    # break
+        echoerr "$(date +'%F %H:%M:%S'): tar failed"
+        if [ -z "$ERRORS_OK" ]; then
+            echo $(hostname) >> $END_RUN_FILE
+        fi
+        if [ $BREAK_ON_ERROR ]; then
+            # break
             CONTINUE=false
-	fi
+        fi
     fi
 done
 
diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh
index 9146697..880e55b 100644
--- a/lustre/tests/test-framework.sh
+++ b/lustre/tests/test-framework.sh
@@ -1111,6 +1111,7 @@ start_client_load() {
                               END_RUN_FILE=$END_RUN_FILE \
                               LOAD_PID_FILE=$LOAD_PID_FILE \
                               TESTLOG_PREFIX=$TESTLOG_PREFIX \
+                              TESTNAME=$TESTNAME \
                               run_${load}.sh" &
     local ppid=$!
     log "Started client load: ${load} on $client"
@@ -1134,14 +1135,14 @@ start_client_loads () {
     sleep 2
 }
 
-# only for remote client 
+# only for remote client
 check_client_load () {
     local client=$1
     local var=$(node_var_name $client)_load
     local TESTLOAD=run_${!var}.sh
 
     ps auxww | grep -v grep | grep $client | grep -q "$TESTLOAD" || return 1
-    
+
     # bug 18914: try to connect several times not only when
     # check ps, but  while check_catastrophe also
     local tries=3
@@ -1219,7 +1220,7 @@ restart_client_loads () {
             if [ "$rc" != 0 ]; then
                 log "Client load failed to restart on node $client, rc=$rc"
                 # failure one client load means test fail
-                # we do not need to check other 
+                # we do not need to check other
                 return $rc
             fi
         else
@@ -1227,6 +1228,70 @@ restart_client_loads () {
         fi
     done
 }
+
+# Start vmstat and save its process ID in a file.
+start_vmstat() {
+    local nodes=$1
+    local pid_file=$2
+
+    [ -z "$nodes" -o -z "$pid_file" ] && return 0
+
+    do_nodes $nodes \
+        "vmstat 1 > $TESTLOG_PREFIX.$TESTNAME.vmstat.\\\$(hostname -s).log \
+        2>/dev/null </dev/null & echo \\\$! > $pid_file"
+}
+
+# Display the nodes on which client loads failed.
+print_end_run_file() {
+    local file=$1
+    local node
+
+    [ -s $file ] || return 0
+
+    echo "Found the END_RUN_FILE file: $file"
+    cat $file
+
+    # A client load will stop if it finds the END_RUN_FILE file.
+    # That does not mean the client load actually failed though.
+    # The first node in END_RUN_FILE is the one we are interested in.
+    read node < $file
+
+    if [ -n "$node" ]; then
+        local var=$(node_var_name $node)_load
+
+        local prefix=$TESTLOG_PREFIX
+        [ -n "$TESTNAME" ] && prefix=$prefix.$TESTNAME
+        local stdout_log=$prefix.run_${!var}_stdout.$node.log
+        local debug_log=$(echo $stdout_log | sed 's/\(.*\)stdout/\1debug/')
+
+        echo "Client load ${!var} failed on node $node:"
+        echo "$stdout_log"
+        echo "$debug_log"
+    fi
+}
+
+# Stop the process which had its PID saved in a file.
+stop_process() {
+    local nodes=$1
+    local pid_file=$2
+
+    [ -z "$nodes" -o -z "$pid_file" ] && return 0
+
+    do_nodes $nodes "test -f $pid_file &&
+        { kill -s TERM \\\$(cat $pid_file); rm -f $pid_file; }" || true
+}
+
+# Stop all client loads.
+stop_client_loads() {
+    local nodes=${1:-$CLIENTS}
+    local pid_file=$2
+
+    # stop the client loads
+    stop_process $nodes $pid_file
+
+    # clean up the processes that started them
+    [ -n "$CLIENT_LOAD_PIDS" ] && kill -9 $CLIENT_LOAD_PIDS 2>/dev/null || true
+}
 # End recovery-scale functions
 
 # verify that lustre actually cleaned up properly
@@ -2262,6 +2327,7 @@ setupall() {
     [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
     mount_client $MOUNT
     [ -n "$CLIENTS" ] && zconf_mount_clients $CLIENTS $MOUNT
+    clients_up
 
     if [ "$MOUNT_2" ]; then
         mount_client $MOUNT2
@@ -2705,7 +2771,8 @@ check_and_cleanup_lustre() {
     fi
 
     if is_mounted $MOUNT; then
-        [ -n "$DIR" ] && rm -rf $DIR/[Rdfs][0-9]*
+        [ -n "$DIR" ] && rm -rf $DIR/[Rdfs][0-9]* ||
+            error "remove sub-test dirs failed"
         [ "$ENABLE_QUOTA" ] && restore_quota_type || true
     fi
 
@@ -3067,9 +3134,10 @@ error_noexit() {
 
     log " ${TESTSUITE} ${TESTNAME}: @@@@@@ ${TYPE}: $@ "
 
+    mkdir -p $LOGDIR
     # We need to dump the logs on all nodes
     if $dump; then
-        gather_logs $(comma_list $(nodes_list)) 0
+        gather_logs $(comma_list $(nodes_list))
     fi
 
     debugrestore
@@ -4248,12 +4316,11 @@ cleanup_pools () {
 
 gather_logs () {
     local list=$1
-    local tar_logs=$2
 
     local ts=$(date +%s)
     local docp=true
     [ -f $LOGDIR/shared ] && docp=false
- 
+
     # dump lustre logs, dmesg
 
     prefix="$TESTLOG_PREFIX.$TESTNAME"
@@ -4272,13 +4339,6 @@ gather_logs () {
          dmesg > ${prefix}.dmesg.\\\$(hostname -s).${suffix}"
     if [ ! -f $LOGDIR/shared ]; then
         do_nodes $list rsync -az "${prefix}.*.${suffix}" $HOSTNAME:$LOGDIR
-      fi
-
-    if [ $tar_logs == 1 ]; then
-        local archive=$LOGDIR/${TESTSUITE}-$ts.tar.bz2
-        tar -jcf $archive $LOGDIR/*$ts* $LOGDIR/*${TESTSUITE}*
-
-        echo $archive
     fi
 }
 
-- 
1.8.3.1