From: grev <grev>
Date: Tue, 24 Feb 2009 19:54:59 +0000 (+0000)
Subject: b=17839
X-Git-Tag: v1_9_162~21
X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=852f34ac50727d3a012b9b325f9614b2b4fa7db7

b=17839
i=Brian
cmd3-17 port to acc-sm
---

diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am
index fae5f37..884d9fe 100644
--- a/lustre/tests/Makefile.am
+++ b/lustre/tests/Makefile.am
@@ -18,7 +18,7 @@ noinst_SCRIPTS += mdsrate-stat-small.sh mdsrate-stat-large.sh
 noinst_SCRIPTS += lockorder.sh socketclient socketserver runmultiop_bg_pause
 noinst_SCRIPTS += sanity-sec.sh sanity-gss.sh krb5_login.sh setup_kerberos.sh
 noinst_SCRIPTS += recovery-mds-scale.sh run_dd.sh run_tar.sh run_iozone.sh
-noinst_SCRIPTS += run_dbench.sh
+noinst_SCRIPTS += run_dbench.sh recovery-double-scale.sh
 nobase_noinst_SCRIPTS = cfg/local.sh
 nobase_noinst_SCRIPTS += acl/make-tree acl/run cfg/ncli.sh
 nobase_noinst_SCRIPTS += racer/dir_create.sh racer/file_create.sh racer/file_list.sh
diff --git a/lustre/tests/acceptance-small.sh b/lustre/tests/acceptance-small.sh
index b21af68..710e6f0 100755
--- a/lustre/tests/acceptance-small.sh
+++ b/lustre/tests/acceptance-small.sh
@@ -23,7 +23,7 @@ fi
 [ "$DEBUG_OFF" ] || DEBUG_OFF="eval lctl set_param debug=\"$DEBUG_LVL\""
 [ "$DEBUG_ON" ] || DEBUG_ON="eval lctl set_param debug=0x33f0484"
 
-export TESTSUITE_LIST="RUNTESTS SANITY DBENCH BONNIE IOZONE FSX SANITYN LFSCK LIBLUSTRE RACER REPLAY_SINGLE CONF_SANITY RECOVERY_SMALL REPLAY_OST_SINGLE REPLAY_DUAL INSANITY SANITY_QUOTA SANITY_SEC SANITY_GSS PERFORMANCE_SANITY RECOVERY_MDS_SCALE"
+export TESTSUITE_LIST="RUNTESTS SANITY DBENCH BONNIE IOZONE FSX SANITYN LFSCK LIBLUSTRE RACER REPLAY_SINGLE CONF_SANITY RECOVERY_SMALL REPLAY_OST_SINGLE REPLAY_DUAL INSANITY SANITY_QUOTA SANITY_SEC SANITY_GSS PERFORMANCE_SANITY RECOVERY_MDS_SCALE RECOVERY_DOUBLE_SCALE"
 
 if [ "$ACC_SM_ONLY" ]; then
     for O in $TESTSUITE_LIST; do
@@ -440,6 +440,14 @@ if [ "$RECOVERY_MDS_SCALE" != "no" ]; then
         RECOVERY_MDS_SCALE="done"
 fi
 
+[ "$RECOVERY_DOUBLE_SCALE" != "no" ] && skip_remmds recovery-double-scale && RECOVERY_DOUBLE_SCALE=no && MSKIPPED=1
+[ "$RECOVERY_DOUBLE_SCALE" != "no" ] && skip_remost recovery-double-scale && RECOVERY_DOUBLE_SCALE=no && OSKIPPED=1
+if [ "$RECOVERY_DOUBLE_SCALE" != "no" ]; then
+        title recovery-double-scale
+        bash recovery-double-scale.sh
+        RECOVERY_DOUBLE_SCALE="done"
+fi
+
 RC=$?
 title FINISHED
 echo "Finished at `date` in $((`date +%s` - $STARTTIME))s"
diff --git a/lustre/tests/insanity.sh b/lustre/tests/insanity.sh
index ebf5cb8..06b5ca3 100755
--- a/lustre/tests/insanity.sh
+++ b/lustre/tests/insanity.sh
@@ -60,19 +60,6 @@ set_fail_client() {
     echo "fail $FAIL_CLIENT, next is $FAIL_NEXT"
 }
 
-shutdown_client() {
-    client=$1
-    if [ "$FAILURE_MODE" = HARD ]; then
-       $POWER_DOWN $client
-       while ping -w 3 -c 1 $client > /dev/null 2>&1; do 
-	   echo "waiting for node $client to fail"
-	   sleep 1
-       done  
-    elif [ "$FAILURE_MODE" = SOFT ]; then
-       zconf_umount $client $MOUNT -f
-    fi
-}
-
 fail_clients() {
     num=$1
 
diff --git a/lustre/tests/recovery-double-scale.sh b/lustre/tests/recovery-double-scale.sh
new file mode 100644
index 0000000..d98dc65
--- /dev/null
+++ b/lustre/tests/recovery-double-scale.sh
@@ -0,0 +1,314 @@
+#!/bin/bash
+
+# All pairwise combinations of node failures.
+# Was cmd3-17
+#
+# Author: Chris Cooper <ccooper@clusterfs.com>
+#
+# Script fails pair of nodes:
+# --  in parallel by default
+# --  in series if SERIAL is set
+
+LUSTRE=${LUSTRE:-`dirname $0`/..}
+SETUP=${SETUP:-""}
+CLEANUP=${CLEANUP:-""}
+. $LUSTRE/tests/test-framework.sh
+
+init_test_env $@
+
+. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+TESTSUITELOG=${TESTSUITELOG:-$TMP/recovery-double-scale}
+DEBUGLOG=$TESTSUITELOG.debug
+exec 2>$DEBUGLOG
+echo "--- env ---" >&2
+env >&2
+echo "--- env ---" >&2
+set -x
+
+[ -n "$CLIENTS" ] || { skip "$0 Need two or more remote clients" && exit 0; }
+[ $CLIENTCOUNT -ge 3 ] || \
+    { skip "$0 Need two or more remote clients, have $CLIENTCOUNT" && exit 0; }
+
+END_RUN_FILE=${END_RUN_FILE:-$SHARED_DIRECTORY}/end_run_file}
+LOAD_PID_FILE=${LOAD_PID_FILE:-$TMP/client-load.pid}
+
+remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
+remote_ost_nodsh && skip "remote OST with nodsh" && exit 0
+
+check_timeout || exit 1
+
+build_test_filter
+
+check_and_setup_lustre
+rm -rf $DIR/[df][0-9]*
+
+# the test node needs to be insulated from a lustre failure as much as possible,
+# so not even loading the lustre modules is ideal.
+# -- umount lustre
+# -- remove hostname from clients list
+zconf_umount $(hostname) $MOUNT
+NODES_TO_USE=${NODES_TO_USE:-$CLIENTS}
+NODES_TO_USE=$(exclude_items_from_list $NODES_TO_USE $(hostname))
+
+check_progs_installed $NODES_TO_USE ${CLIENT_LOADS[@]}
+
+MDTS=$(get_facets MDS)
+OSTS=$(get_facets OST)
+
+rm -f $END_RUN_FILE
+
+reboot_recover_node () {
+    # item var contains a pair of clients if nodetype=clients
+    # I would prefer to have a list here
+    local item=$1
+    local nodetype=$2	
+    local timeout=$($LCTL get_param  -n timeout)
+
+    # MDS, OST item contains the facet
+    case $nodetype in
+       MDS|OST )    facet_failover $item
+                [ "$SERIAL" ] && wait_recovery_complete $item $((timeout * 4)) || true
+                ;;
+       clients) for c in ${item//,/ }; do
+                      shutdown_client $c
+                      boot_node $c
+                 done
+                 start_client_loads $list || return $?
+                 ;;
+       * )      error "reboot_recover_node: nodetype=$nodetype. Must be one of 'MDS', 'OST', or 'clients'."
+                exit 1;;
+    esac
+}
+
+get_item_type () {
+    local type=$1
+    local excluded=${2:-""}
+
+    local list
+    case $type in
+       MDS )    list=$MDTS;;
+       OST )    list=$OSTS;;
+       clients) list=$NODES_TO_USE
+                ;;
+       * )      error "Invalid type=$type. Must be one of 'MDS', 'OST', or 'clients'."
+                exit 1;;
+    esac
+
+    [ "$excluded" ] && list=$(exclude_items_from_list $list $excluded)
+    # empty list
+    if [ ! "$(echo $list)" ]; then
+        echo
+        return
+    fi
+
+    item=$(get_random_entry $list)
+    if [ "$type" = clients ] ; then
+        item="$item $(get_random_entry $(exclude_items_from_list $list $item))"
+        item=$(comma_list $item)
+    fi
+    echo $item
+}
+
+# failover_pair
+#
+# for the two nodetypes specified, chooses a random node(s) from each
+# class, reboots the nodes sequentially, and then restarts lustre on
+# the nodes.
+failover_pair() {
+    local type1=$1
+    local type2=$2
+    local title=$3
+
+    local client_nodes=""
+    local item1=
+    local item2=
+    local client1=
+    local client2=
+
+    log "
+==== START === $title "
+
+    item1=$(get_item_type $type1)
+    [ "$item1" ] || \
+        { echo "type1=$type1 item1 is empty" && return 0; }
+    item2=$(get_item_type $type2 $item1)
+    [ "$item2" ] || \
+        { echo "type1=$type1 item1=$item1 type2=$type2 item2=$item2 is empty" && return 0; }
+
+    # Check that our client loads are still running. If any have died,
+    # that means they have died outside of recovery, which is unacceptable.
+    log "==== Checking the clients loads BEFORE failover -- failure NOT OK"
+
+    # FIXME. need print summary on exit
+    if ! check_client_loads $NODES_TO_USE; then
+        exit 4
+    fi
+
+    log "Done checking client loads. Failing type1=$type1 item1=$item1 ... "
+
+    reboot_recover_node $item1 $type1 || return $?
+
+    # Hendrix test17 description: 
+    # Introduce a failure, wait at
+    # least 5 minutes (for recovery),
+    # introduce a 2nd
+    # failure, and wait another 5
+    # minutes
+
+    # reboot_recover_node waits recovery in according to
+    # SERIAL value.
+    # We have a "double failures" if SERIAL is not set,
+    # do not need a sleep between failures for "double failures"
+
+    log "                            Failing type2=$type2 item2=$item2 ... "    
+    reboot_recover_node $item2 $type2 || return $?
+
+    # Client loads are allowed to die while in recovery, so we just
+    # restart them.
+    log "==== Checking the clients loads AFTER  failovers -- ERRORS_OK=$ERRORS_OK"
+    restart_client_loads $NODES_TO_USE $ERRORS_OK || return $? 
+    log "Done checking / re-Starting client loads. PASS"
+    return 0
+}
+
+summary_and_cleanup () {
+    local rc=$?
+    trap 0
+
+    # Having not empty END_RUN_FILE means the failed loads only
+    if [ -s $END_RUN_FILE ]; then
+        echo "Found the END_RUN_FILE file: $END_RUN_FILE"
+        cat $END_RUN_FILE
+        local END_RUN_NODE=
+        read END_RUN_NODE < $END_RUN_FILE
+
+        # a client load will end (i.e. fail) if it finds
+        # the end run file.  that does not mean that that client load
+        # actually failed though.  the first node in the END_RUN_NODE is
+        # the one we are really interested in.
+        if [ -n "$END_RUN_NODE" ]; then
+            var=${END_RUN_NODE}_load
+            echo "Client load failed on node $END_RUN_NODE"
+            echo
+            echo "client $END_RUN_NODE load debug output :"
+            local logfile=${TESTSUITELOG}_run_${!var}.sh-${END_RUN_NODE}.debug 
+            do_node ${END_RUN_NODE} "set -x; [ -e $logfile ] && cat $logfile " || true
+        fi
+        rc=1
+    fi
+
+    echo $(date +'%F %H:%M:%S') Terminating clients loads ...
+    echo "$0" >> $END_RUN_FILE
+    local result=PASS
+    [ $rc -eq 0 ] || result=FAIL
+
+    log "
+Server failover period: $FAILOVER_PERIOD seconds
+Exited after:           $ELAPSED seconds
+Status: $result: rc=$rc"
+
+    # make sure the client loads die
+    do_nodes $NODES_TO_USE "set -x; test -f $TMP/client-load.pid && \
+        { kill -s TERM \$(cat $TMP/client-load.pid) || true; }"
+
+    # and free up the pdshes that started them, if any are still around
+    if [ -n "$CLIENT_LOAD_PIDS" ]; then
+        kill $CLIENT_LOAD_PIDS || true
+        sleep 5
+        kill -9 $CLIENT_LOAD_PIDS || true
+    fi
+    [ $rc -eq 0 ] && zconf_mount $(hostname) $MOUNT
+    exit $rc
+}
+
+trap summary_and_cleanup EXIT TERM INT
+
+#
+# MAIN
+#
+log "-----============= $0 starting =============-----"
+
+START_TS=$(date +%s)
+CURRENT_TS=$START_TS
+ELAPSED=0
+
+# Set SERIAL to serialize the failure through a recovery of the first failure. 
+SERIAL=${SERIAL:-""}
+ERRORS_OK="yes"
+
+[ "$SERIAL" ] && ERRORS_OK="" 
+
+FAILOVER_PERIOD=${FAILOVER_PERIOD:-$((60*5))} # 5 minutes
+
+# Start client loads.
+start_client_loads $NODES_TO_USE
+echo clients load pids:
+if ! do_nodes $NODES_TO_USE "set -x; echo \$(hostname): && cat $TMP/client-load.pid"; then
+    if [ -e $DEBUGLOG ]; then
+        exec 2<&-
+        cat $DEBUGLOG
+        exit 3
+    fi
+fi
+
+# FIXME: Do we want to have an initial sleep period where the clients 
+# just run before introducing a failure?
+sleep $FAILOVER_PERIOD
+
+#CMD_TEST_NUM=17.1
+failover_pair MDS OST     "test 1: failover MDS, then OST =========="
+sleep $FAILOVER_PERIOD
+
+#CMD_TEST_NUM=17.2
+failover_pair MDS clients "test 2: failover MDS, then 2 clients ===="
+sleep $FAILOVER_PERIOD
+
+#CMD_TEST_NUM=17.3
+if [ $MDSCOUNT -gt 1 ]; then
+    failover_pair MDS MDS     "test 3: failover MDS, then another MDS =="
+    sleep $FAILOVER_PERIOD
+else
+    skip "$0 : $MDSCOUNT < 2 MDTs, test 3 skipped"
+fi 
+
+#CMD_TEST_NUM=17.4
+if [ $OSTCOUNT -gt 1 ]; then
+    failover_pair OST OST     "test 4: failover OST, then another OST =="
+    sleep $FAILOVER_PERIOD
+else
+    skip "$0 : $OSTCOUNT < 2 OSTs, test 4 skipped"
+fi 
+
+#CMD_TEST_NUM=17.5
+failover_pair OST clients "test 5: failover OST, then 2 clients ===="
+sleep $FAILOVER_PERIOD
+
+#CMD_TEST_NUM=17.6
+failover_pair OST MDS     "test 6: failover OST, then MDS =========="
+sleep $FAILOVER_PERIOD
+
+#CMD_TEST_NUM=17.7
+failover_pair clients MDS "test 7: failover 2 clients, then MDS ===="
+sleep $FAILOVER_PERIOD
+
+#CMD_TEST_NUM=17.8
+#failover_pair clients OST "test 8: failover 2 clients, then OST ===="
+sleep $FAILOVER_PERIOD
+
+#CMD_TEST_NUM=17.9
+if [ $CLIENTCOUNT -ge 5 ]; then
+    failover_pair clients clients "test 9: failover 2 clients, then 2 different clients =="
+    sleep $FAILOVER_PERIOD
+fi
+log "==== Checking the clients loads AFTER  all failovers -- failure NOT OK"
+if ! check_client_loads $NODES_TO_USE; then
+    log "Client load failed after failover. Exiting"
+    exit 5
+fi
+
+CURRENT_TS=$(date +%s)
+ELAPSED=$((CURRENT_TS - START_TS))
+
+log "Completed successfully in $ELAPSED seconds"
+
+exit 0
diff --git a/lustre/tests/recovery-mds-scale.sh b/lustre/tests/recovery-mds-scale.sh
index 4d6bb7c..598620b 100644
--- a/lustre/tests/recovery-mds-scale.sh
+++ b/lustre/tests/recovery-mds-scale.sh
@@ -47,21 +47,12 @@ rm -rf $DIR/[df][0-9]*
 # -- remove hostname from clients list
 zconf_umount $(hostname) $MOUNT
 NODES_TO_USE=${NODES_TO_USE:-$CLIENTS}
-NODES_TO_USE=$(exclude_item_from_list $NODES_TO_USE $(hostname))
+NODES_TO_USE=$(exclude_items_from_list $NODES_TO_USE $(hostname))
 
 check_progs_installed $NODES_TO_USE ${CLIENT_LOADS[@]}
 
-MDTS=""
-for ((i=1; i<=$MDSCOUNT; i++)) do
-    MDTS="$MDTS mds$i"
-done
-MDTS=$(comma_list $MDTS)
-
-OSTS=""
-for ((i=1; i<=$OSTCOUNT; i++)) do
-    OSTS="$OSTS ost$i"
-done
-OSTS=$(comma_list $OSTS)
+MDTS=$(get_facets MDS)
+OSTS=$(get_facets OST)
 
 ERRORS_OK=""    # No application failures should occur during this test.
 FLAVOR=${FLAVOR:-"MDS"}
diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh
index 54f5305..c6428c7 100755
--- a/lustre/tests/replay-dual.sh
+++ b/lustre/tests/replay-dual.sh
@@ -436,21 +436,6 @@ test_21a() {
 }
 run_test 21a "commit on sharing"
 
-shutdown_client() {
-    local client=$1
-    local mnt=$2
-
-    if [ "$FAILURE_MODE" = HARD ]; then
-       $POWER_DOWN $client
-       while ping -w 3 -c 1 $client > /dev/null 2>&1; do
-           echo "waiting for node $client to fail"
-           sleep 1
-       done
-    else
-       zconf_umount_clients $client $mnt -f
-    fi
-}
-
 test_21b_sub () {
     local mds=$1 
     do_node $CLIENT1 rm -f $MOUNT1/$tfile-*
diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh
index 46bf4e7..5b618e0 100755
--- a/lustre/tests/replay-single.sh
+++ b/lustre/tests/replay-single.sh
@@ -475,7 +475,7 @@ test_20b() { # bug 10480
 
     fail $SINGLEMDS                            # start orphan recovery
     df -P $DIR || df -P $DIR || true    # reconnect
-    wait_mds_recovery_done || error "MDS recovery not done"
+    wait_recovery_complete $SINGLEMDS || error "MDS recovery not done"
 
     # FIXME just because recovery is done doesn't mean we've finished
     # orphan cleanup.  Fake it with a sleep for now...
diff --git a/lustre/tests/run_dbench.sh b/lustre/tests/run_dbench.sh
index f82d9dd..45cfceb 100755
--- a/lustre/tests/run_dbench.sh
+++ b/lustre/tests/run_dbench.sh
@@ -33,7 +33,7 @@ trap signaled TERM
 # recovery-mds-scale uses this to signal the client loads to die
 echo $$ >$LOAD_PID_FILE
 
-TESTDIR=$MOUNT/dbench-$(hostname)
+TESTDIR=$MOUNT/d0.dbench-$(hostname)
 
 CONTINUE=true
 
diff --git a/lustre/tests/run_dd.sh b/lustre/tests/run_dd.sh
index 96a4950..f4f1a54 100755
--- a/lustre/tests/run_dd.sh
+++ b/lustre/tests/run_dd.sh
@@ -31,7 +31,7 @@ trap signaled TERM
 # recovery-mds-scale uses this to signal the client loads to die
 echo $$ >$LOAD_PID_FILE
 
-TESTDIR=$MOUNT/dd-$(hostname)
+TESTDIR=$MOUNT/d0.dd-$(hostname)
 
 CONTINUE=true
 while [ ! -e "$END_RUN_FILE" ] && $CONTINUE; do
diff --git a/lustre/tests/run_iozone.sh b/lustre/tests/run_iozone.sh
index 2b71118..2d075d7 100755
--- a/lustre/tests/run_iozone.sh
+++ b/lustre/tests/run_iozone.sh
@@ -31,7 +31,7 @@ trap signaled TERM
 # recovery-mds-scale uses this to signal the client loads to die
 echo $$ >$LOAD_PID_FILE
 
-TESTDIR=$MOUNT/iozone-$(hostname)
+TESTDIR=$MOUNT/d0.iozone-$(hostname)
 
 # needed to debug oom problem
 #echo 1 > /proc/sys/vm/vm_gfp_debug
diff --git a/lustre/tests/run_tar.sh b/lustre/tests/run_tar.sh
index 7502c241..5f40e68 100755
--- a/lustre/tests/run_tar.sh
+++ b/lustre/tests/run_tar.sh
@@ -31,7 +31,7 @@ trap signaled TERM
 # recovery-mds-scale uses this to signal the client loads to die
 echo $$ >$LOAD_PID_FILE
 
-TESTDIR=$MOUNT/tar-$(hostname)
+TESTDIR=$MOUNT/d0.tar-$(hostname)
 
 CONTINUE=true
 while [ ! -e "$END_RUN_FILE" ] && $CONTINUE; do
diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh
index 88dadb1..e081f8d 100644
--- a/lustre/tests/test-framework.sh
+++ b/lustre/tests/test-framework.sh
@@ -622,11 +622,36 @@ fi
 fi"
 }
 
+shudown_node_hard () {
+    local host=$1
+    local attempts=3
+
+    for i in $(seq $attempts) ; do
+        $POWER_DOWN $host
+        sleep 1
+        ping -w 3 -c 1 $host > /dev/null 2>&1 || return 0
+        echo "waiting for $host to fail attempts=$attempts"
+        [ $i -lt $attempts ] || \
+            { echo "$host still pingable after power down! attempts=$attempts" && return 1; } 
+    done
+}
+
+shutdown_client() {
+    local client=$1
+    local mnt=${2:-$MOUNT}
+    local attempts=3
+
+    if [ "$FAILURE_MODE" = HARD ]; then
+        shudown_node_hard $client 
+    else
+       zconf_umount_clients $client $mnt -f
+    fi
+}
+
 shutdown_facet() {
     local facet=$1
     if [ "$FAILURE_MODE" = HARD ]; then
-        $POWER_DOWN `facet_active_host $facet`
-        sleep 2
+        shudown_node_hard $(facet_active_host $facet)
     elif [ "$FAILURE_MODE" = SOFT ]; then
         stop $facet
     fi
@@ -661,30 +686,30 @@ check_progs_installed () {
 }
 
 start_client_load() {
-    local list=(${1//,/ })
-    local nodenum=$2
-
-    local numloads=${#CLIENT_LOADS[@]}
-    local testnum=$((nodenum % numloads))
+    local client=$1
+    local var=${client}_load
 
-    do_node ${list[nodenum]} "PATH=$PATH MOUNT=$MOUNT ERRORS_OK=$ERRORS_OK \
+    do_node $client "PATH=$PATH MOUNT=$MOUNT ERRORS_OK=$ERRORS_OK \
                               BREAK_ON_ERROR=$BREAK_ON_ERROR \
                               END_RUN_FILE=$END_RUN_FILE \
                               LOAD_PID_FILE=$LOAD_PID_FILE \
                               TESTSUITELOG=$TESTSUITELOG \
-                              run_${CLIENT_LOADS[testnum]}.sh" &
+                              run_${!var}.sh" &
     CLIENT_LOAD_PIDS="$CLIENT_LOAD_PIDS $!"
-    log "Started client load: ${CLIENT_LOADS[testnum]} on ${list[nodenum]}"
+    log "Started client load: ${!var} on $client"
 
-    eval export ${list[nodenum]}_load=${CLIENT_LOADS[testnum]}
     return 0
 }
 
 start_client_loads () {
     local clients=(${1//,/ })
+    local numloads=${#CLIENT_LOADS[@]}
+    local testnum
 
-    for ((num=0; num < ${#clients[@]}; num++ )); do
-        start_client_load $1 $num
+    for ((nodenum=0; nodenum < ${#clients[@]}; nodenum++ )); do
+        testnum=$((nodenum % numloads))
+        eval export ${clients[nodenum]}_load=${CLIENT_LOADS[testnum]}
+        start_client_load ${clients[nodenum]}
     done
 }
 
@@ -725,13 +750,39 @@ check_client_loads () {
 
    for client in $clients; do
       check_client_load $client
-      rc=$?
+      rc=${PIPESTATUS[0]}
       if [ "$rc" != 0 ]; then
         log "Client load failed on node $client, rc=$rc"
         return $rc
       fi
    done
 }
+
+restart_client_loads () {
+    local clients=${1//,/ }
+    local expectedfail=${2:-""}
+    local client=
+    local rc=0
+
+    for client in $clients; do
+        check_client_load $client
+        rc=${PIPESTATUS[0]}
+        if [ "$rc" != 0 -a "$expectedfail"]; then
+            start_client_load $client
+            echo "Restarted client load: on $client. Checking ..."
+            check_client_load $client 
+            rc=${PIPESTATUS[0]}
+            if [ "$rc" != 0 ]; then
+                log "Client load failed to restart on node $client, rc=$rc"
+                # failure one client load means test fail
+                # we do not need to check other 
+                return $rc
+            fi
+        else
+            return $rc
+        fi
+    done
+}
 # End recovery-scale functions
 
 # verify that lustre actually cleaned up properly
@@ -805,32 +856,39 @@ wait_delete_completed () {
 }
 
 wait_for_host() {
-    local HOST=$1
-    check_network "$HOST" 900
-    while ! do_node $HOST "ls -d $LUSTRE " > /dev/null; do sleep 5; done
+    local host=$1
+    check_network "$host" 900
+    while ! do_node $host "ls -d $LUSTRE " > /dev/null; do sleep 5; done
 }
 
 wait_for() {
     local facet=$1
-    local HOST=`facet_active_host $facet`
-    wait_for_host $HOST
+    local host=`facet_active_host $facet`
+    wait_for_host $host
 }
 
-wait_mds_recovery_done () {
-    local timeout=`do_facet $SINGLEMDS lctl get_param  -n timeout`
-#define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 / 2)
-# as we are in process of changing obd_timeout in different ways
-# let's set MAX longer than that
-    local MAX=$(( timeout * 4 ))
+wait_recovery_complete () {
+    local facet=$1
+
+    # Use default policy if $2 is not passed by caller. 
+    #define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 / 2)
+    # as we are in process of changing obd_timeout in different ways
+    # let's set MAX longer than that
+    local MAX=${2:-$(( TIMEOUT * 4 ))}
+ 
+    local var_svc=${facet}_svc
+    local procfile="*.${!var_svc}.recovery_status"
     local WAIT=0
+    local STATUS=
+
     while [ $WAIT -lt $MAX ]; do
-        STATUS=`do_facet $SINGLEMDS "lctl get_param -n mdt.*-MDT0000.recovery_status | grep status"`
-        echo $STATUS | grep COMPLETE && return 0
+        STATUS=$(do_facet $facet lctl get_param -n $procfile | grep status)
+        [[ $STATUS = "status: COMPLETE" ]] && return 0
         sleep 5
         WAIT=$((WAIT + 5))
-        echo "Waiting $(($MAX - $WAIT)) secs for MDS recovery done"
+        echo "Waiting $((MAX - WAIT)) secs for $facet recovery done. $STATUS"
     done
-    echo "MDS recovery not done in $MAX sec"
+    echo "$facet recovery not done in $MAX sec. $STATUS"
     return 1
 }
 
@@ -919,7 +977,7 @@ facet_failover() {
     DFPID=$!
     echo "df pid is $DFPID"
     change_active $facet
-    TO=`facet_active_host $facet`
+    local TO=`facet_active_host $facet`
     echo "Failover $facet to $TO"
     wait_for $facet
     mount_facet $facet || error "Restart of $facet failed"
@@ -1560,13 +1618,16 @@ comma_list() {
     echo "$*" | tr -s " " "\n" | sort -b -u | tr "\n" " " | sed 's/ \([^$]\)/,\1/g'
 }
 
-# list is comma separated list
-exclude_item_from_list () {
+# list, excluded are the comma separated lists
+exclude_items_from_list () {
     local list=$1
     local excluded=$2
+    local item
 
     list=${list//,/ }
-    list=$(echo " $list " | sed -re "s/\s+$excluded\s+/ /g")
+    for item in ${excluded//,/ }; do
+        list=$(echo " $list " | sed -re "s/\s+$item\s+/ /g")
+    done
     echo $(comma_list $list) 
 }
 
@@ -1574,6 +1635,18 @@ absolute_path() {
     (cd `dirname $1`; echo $PWD/`basename $1`)
 }
 
+get_facets () {
+    local name=$(echo $1 | tr "[:upper:]" "[:lower:]")
+    local type=$(echo $1 | tr "[:lower:]" "[:upper:]")
+
+    local list=""
+    local count=${type}COUNT
+    for ((i=1; i<=${!count}; i++)) do
+        list="$list ${name}$i"
+    done
+    echo $(comma_list $list)
+}
+
 ##################################
 # Adaptive Timeouts funcs