+# recovery-scale functions
+check_progs_installed () {
+ local clients=$1
+ shift
+ local progs=$@
+
+ do_nodes $clients "PATH=:$PATH; status=true;
+for prog in $progs; do
+ if ! [ \\\"\\\$(which \\\$prog)\\\" -o \\\"\\\${!prog}\\\" ]; then
+ echo \\\$prog missing on \\\$(hostname);
+ status=false;
+ fi
+done;
+eval \\\$status"
+}
+
+client_var_name() {
+ echo __$(echo $1 | tr '-' 'X')
+}
+
+start_client_load() {
+ local client=$1
+ local load=$2
+ local var=$(client_var_name $client)_load
+ eval export ${var}=$load
+
+ do_node $client "PATH=$PATH MOUNT=$MOUNT ERRORS_OK=$ERRORS_OK \
+ BREAK_ON_ERROR=$BREAK_ON_ERROR \
+ END_RUN_FILE=$END_RUN_FILE \
+ LOAD_PID_FILE=$LOAD_PID_FILE \
+ TESTSUITELOG=$TESTSUITELOG \
+ run_${load}.sh" &
+ CLIENT_LOAD_PIDS="$CLIENT_LOAD_PIDS $!"
+ log "Started client load: ${load} on $client"
+
+ return 0
+}
+
+start_client_loads () {
+ local -a clients=(${1//,/ })
+ local numloads=${#CLIENT_LOADS[@]}
+ local testnum
+
+ for ((nodenum=0; nodenum < ${#clients[@]}; nodenum++ )); do
+ testnum=$((nodenum % numloads))
+ start_client_load ${clients[nodenum]} ${CLIENT_LOADS[testnum]}
+ done
+}
+
+# only for remote client
+check_client_load () {
+ local client=$1
+ local var=$(client_var_name $client)_load
+ local TESTLOAD=run_${!var}.sh
+
+ ps auxww | grep -v grep | grep $client | grep -q "$TESTLOAD" || return 1
+
+ # bug 18914: try to connect several times not only when
+ # check ps, but while check_catastrophe also
+ local tries=3
+ local RC=254
+ while [ $RC = 254 -a $tries -gt 0 ]; do
+ let tries=$tries-1
+ # assume success
+ RC=0
+ if ! check_catastrophe $client; then
+ RC=${PIPESTATUS[0]}
+ if [ $RC -eq 254 ]; then
+ # FIXME: not sure how long we shuold sleep here
+ sleep 10
+ continue
+ fi
+ echo "check catastrophe failed: RC=$RC "
+ return $RC
+ fi
+ done
+ # We can continue try to connect if RC=254
+ # Just print the warning about this
+ if [ $RC = 254 ]; then
+ echo "got a return status of $RC from do_node while checking catastrophe on $client"
+ fi
+
+ # see if the load is still on the client
+ tries=3
+ RC=254
+ while [ $RC = 254 -a $tries -gt 0 ]; do
+ let tries=$tries-1
+ # assume success
+ RC=0
+ if ! do_node $client "ps auxwww | grep -v grep | grep -q $TESTLOAD"; then
+ RC=${PIPESTATUS[0]}
+ sleep 30
+ fi
+ done
+ if [ $RC = 254 ]; then
+ echo "got a return status of $RC from do_node while checking (catastrophe and 'ps') the client load on $client"
+ # see if we can diagnose a bit why this is
+ fi
+
+ return $RC
+}
+check_client_loads () {
+ local clients=${1//,/ }
+ local client=
+ local rc=0
+
+ for client in $clients; do
+ check_client_load $client
+ rc=${PIPESTATUS[0]}
+ if [ "$rc" != 0 ]; then
+ log "Client load failed on node $client, rc=$rc"
+ return $rc
+ fi
+ done
+}
+
+restart_client_loads () {
+ local clients=${1//,/ }
+ local expectedfail=${2:-""}
+ local client=
+ local rc=0
+
+ for client in $clients; do
+ check_client_load $client
+ rc=${PIPESTATUS[0]}
+ if [ "$rc" != 0 -a "$expectedfail" ]; then
+ start_client_load $client
+ echo "Restarted client load: on $client. Checking ..."
+ check_client_load $client
+ rc=${PIPESTATUS[0]}
+ if [ "$rc" != 0 ]; then
+ log "Client load failed to restart on node $client, rc=$rc"
+ # failure one client load means test fail
+ # we do not need to check other
+ return $rc
+ fi
+ else
+ return $rc
+ fi
+ done
+}
+# End recovery-scale functions
+