lustre/tests/recovery-random-scale.sh

   1 #!/bin/bash
   2
   3 # client failure does not affect other clients
   4
   5 # Start load on clients (each client works on it's own directory).
   6 # At defined (5-10 minutes) interval fail one random client and then fail mds.
   7 # Reintegrate failed client after recovery completed,
   8 # application errors are allowed for that client but not on other clients.
   9 # 10 minute intervals and verify that no application errors occur.
  10
  11 # Test runs one of CLIENT_LOAD progs on remote clients.
  12
  13 LUSTRE=${LUSTRE:-`dirname $0`/..}
  14 SETUP=${SETUP:-""}
  15 CLEANUP=${CLEANUP:-""}
  16 . $LUSTRE/tests/test-framework.sh
  17
  18 init_test_env $@
  19
  20 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
  21 init_logging
  22
  23 DEBUGLOG=$TESTLOG_PREFIX.suite_debug_log.$(hostname -s).log
  24
  25 exec 2>$DEBUGLOG
  26 echo "--- env ---" >&2
  27 env >&2
  28 echo "--- env ---" >&2
  29 set -x
  30
  31 [ "$SHARED_DIRECTORY" ] || \
  32     { FAIL_ON_ERROR=true skip_env "$0 Empty SHARED_DIRECTORY" && exit 0; }
  33
  34 check_shared_dir $SHARED_DIRECTORY ||
  35     error "$SHARED_DIRECTORY isn't a shared directory"
  36
  37 [ -n "$CLIENTS" ] || \
  38     { FAIL_ON_ERROR=true skip_env "$0 Need two or more remote clients" && exit 0; }
  39
  40 [ $CLIENTCOUNT -ge 3 ] || \
  41     { FAIL_ON_ERROR=true skip_env "$0 Need two or more remote clients, have $((CLIENTCOUNT - 1))" && exit 0; }
  42
  43 END_RUN_FILE=${END_RUN_FILE:-$SHARED_DIRECTORY/end_run_file}
  44 LOAD_PID_FILE=${LOAD_PID_FILE:-$TMP/client-load.pid}
  45 VMSTAT_PID_FILE=${VMSTAT_PID_FILE:-$TMP/vmstat.pid}
  46
  47 remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
  48
  49 [[ $FAILURE_MODE = SOFT ]] && \
  50     log "WARNING: $0 is not functional with FAILURE_MODE = SOFT, bz22797"
  51
  52 build_test_filter
  53
  54 check_and_setup_lustre
  55 rm -rf $DIR/[df][0-9]*
  56
  57 max_recov_time=$(max_recovery_time)
  58
  59 # the test node needs to be insulated from a lustre failure as much as possible,
  60 # so not even loading the lustre modules is ideal.
  61 # -- umount lustre
  62 # -- remove hostname from clients list
  63 zconf_umount $(hostname) $MOUNT
  64 NODES_TO_USE=${NODES_TO_USE:-$CLIENTS}
  65 NODES_TO_USE=$(exclude_items_from_list $NODES_TO_USE $(hostname))
  66
  67 check_progs_installed $NODES_TO_USE ${CLIENT_LOADS[@]}
  68
  69 MDTS=$(get_facets MDS)
  70
  71 if [ "$SLOW" = "no" ]; then
  72     DURATION=${DURATION:-$((60 * 30))}
  73     SERVER_FAILOVER_PERIOD=${SERVER_FAILOVER_PERIOD:-$((60 * 5))}
  74 else
  75     DURATION=${DURATION:-$((60 * 60 * 24))}
  76     SERVER_FAILOVER_PERIOD=${SERVER_FAILOVER_PERIOD:-$((60 * 10))} # 10 minutes
  77 fi
  78
  79 rm -f $END_RUN_FILE
  80
  81 numfailovers () {
  82     local facet
  83     local var
  84
  85     for facet in $MDTS ${failed_clients//,/ }; do
  86         var=${facet}_nums
  87         val=${!var}
  88         if [ "$val" ] ; then
  89             echo "$facet failed  over  $val times"
  90         fi
  91     done
  92 }
  93
  94 summary_and_cleanup () {
  95     local rc=$?
  96     local var
  97     trap 0
  98
  99     # Having not empty END_RUN_FILE means the failed loads only
 100     if [ -s $END_RUN_FILE ]; then
 101         echo "Found the END_RUN_FILE file: $END_RUN_FILE"
 102         cat $END_RUN_FILE
 103         local END_RUN_NODE=
 104         read END_RUN_NODE < $END_RUN_FILE
 105
 106         # A client load will stop if it found the END_RUN_FILE file.
 107         # That does not mean the client load actually failed though.
 108         # The first node in END_RUN_FILE is the one we are interested in.
 109         if [ -n "$END_RUN_NODE" ]; then
 110             var=$(node_var_name $END_RUN_NODE)_load
 111             echo "Client load failed on node $END_RUN_NODE"
 112             echo
 113             echo "Client $END_RUN_NODE load stdout and debug files:
 114                 $TESTLOG_PREFIX.run_${!var}_stdout.$END_RUN_NODE.log
 115                 $TESTLOG_PREFIX.run_${!var}_debug.$END_RUN_NODE.log"
 116         fi
 117         rc=1
 118     fi
 119
 120     echo $(date +'%F %H:%M:%S') Terminating clients loads ...
 121     echo "$0" >> $END_RUN_FILE
 122     local result=PASS
 123     [ $rc -eq 0 ] || result=FAIL
 124
 125     log "Duration:                $DURATION
 126 Server failover period: $SERVER_FAILOVER_PERIOD seconds
 127 Exited after:           $ELAPSED seconds
 128 Number of failovers before exit:
 129 $(numfailovers)
 130 Status: $result: rc=$rc"
 131
 132     # stop the vmstats on the OSTs
 133     if [ "$VMSTAT" ]; then
 134         do_nodes $(comma_list $(osts_nodes)) "test -f $VMSTAT_PID_FILE &&
 135             { kill -s TERM \\\$(cat $VMSTAT_PID_FILE);
 136             rm -f $VMSTAT_PID_FILE || true; }"
 137     fi
 138
 139     # make sure the client loads die
 140     do_nodes $NODES_TO_USE "set -x; test -f $LOAD_PID_FILE &&
 141         { kill -s TERM \\\$(cat $LOAD_PID_FILE);
 142         rm -f $LOAD_PID_FILE || true; }"
 143
 144     # and free up the pdshes that started them, if any are still around
 145     if [ -n "$CLIENT_LOAD_PIDS" ]; then
 146         kill $CLIENT_LOAD_PIDS || true
 147         sleep 5
 148         kill -9 $CLIENT_LOAD_PIDS || true
 149     fi
 150
 151     if [ $rc -ne 0 ]; then
 152         # we are interested in only on failed clients and servers
 153         local failedclients=$(cat $END_RUN_FILE | grep -v $0)
 154         # FIXME: need ostfailover-s nodes also for FLAVOR=OST
 155         local product=$(gather_logs $(comma_list $(osts_nodes) \
 156                         $(mdts_nodes) $mdsfailover_HOST $failedclients) 1)
 157         echo $product
 158     fi
 159
 160     [ $rc -eq 0 ] && zconf_mount $(hostname) $MOUNT
 161
 162     exit $rc
 163 }
 164
 165 #
 166 # MAIN
 167 #
 168 log "-----============= $0 starting =============-----"
 169
 170 trap summary_and_cleanup EXIT # INT
 171
 172 ELAPSED=0
 173
 174 # vmstat the osts
 175 if [ "$VMSTAT" ]; then
 176     do_nodes $(comma_list $(osts_nodes)) \
 177         "vmstat 1 > $TESTLOG_PREFIX.vmstat.\\\$(hostname -s).log \
 178         2>/dev/null </dev/null & echo \\\$! > $VMSTAT_PID_FILE"
 179 fi
 180
 181 # Start client loads.
 182 start_client_loads $NODES_TO_USE
 183
 184 echo clients load pids:
 185 if ! do_nodesv $NODES_TO_USE "cat $LOAD_PID_FILE"; then
 186     exit 3
 187 fi
 188
 189 START_TS=$(date +%s)
 190 CURRENT_TS=$START_TS
 191
 192 MINSLEEP=${MINSLEEP:-120}
 193 REQFAIL_PERCENT=${REQFAIL_PERCENT:-3}   # bug17839 comment 62
 194 REQFAIL=${REQFAIL:-$(( DURATION / SERVER_FAILOVER_PERIOD * REQFAIL_PERCENT / 100))}
 195 reqfail=0
 196 sleep=0
 197
 198 # This is used for FAIL_CLIENT only
 199 ERRORS_OK="yes"
 200 while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do
 201
 202     # In order to perform the
 203     # expected number of failovers, we need to account the following :
 204     # 1) the time that has elapsed during the client load checking
 205     # 2) time takes for failover
 206
 207     it_time_start=$(date +%s)
 208
 209     FAIL_CLIENT=$(get_random_entry $NODES_TO_USE)
 210     client_var=$(node_var_name $FAIL_CLIENT)_nums
 211
 212     # store the list of failed clients
 213     # lists are comma separated
 214     failed_clients=$(expand_list $failed_clients $FAIL_CLIENT)
 215
 216     SERVERFACET=$(get_random_entry $MDTS)
 217     var=${SERVERFACET}_nums
 218
 219     # Check that our client loads are still running. If any have died,
 220     # that means they have died outside of recovery, which is unacceptable.
 221
 222     log "==== Checking the clients loads BEFORE failover -- failure NOT OK \
 223     ELAPSED=$ELAPSED DURATION=$DURATION PERIOD=$SERVER_FAILOVER_PERIOD"
 224
 225     if ! check_client_loads $NODES_TO_USE; then
 226         exit 4
 227     fi
 228
 229     log "FAIL CLIENT $FAIL_CLIENT ... "
 230     shutdown_client $FAIL_CLIENT
 231
 232     log "Starting failover on $SERVERFACET"
 233
 234     facet_failover "$SERVERFACET" || exit 1
 235     if ! wait_recovery_complete $SERVERFACET ; then
 236         echo "$SERVERFACET recovery is not completed!"
 237         exit 7
 238     fi
 239
 240     boot_node $FAIL_CLIENT
 241     echo "Reintegrating $FAIL_CLIENT"
 242     zconf_mount $FAIL_CLIENT $MOUNT || exit $?
 243
 244     # Increment the number of failovers
 245     val=$((${!var} + 1))
 246     eval $var=$val
 247     val=$((${!client_var} + 1))
 248     eval $client_var=$val
 249
 250     # load script on failed clients could create END_RUN_FILE
 251     # We shuold remove it and ignore the failure if this
 252     # file contains the failed client only.
 253     # We can not use ERRORS_OK when start all loads at the start of this script
 254     # because the application errors allowed for random failed client only, but
 255     # not for all clients.
 256     if [ -e $END_RUN_FILE ]; then
 257         read END_RUN_NODE < $END_RUN_FILE
 258         [[ $END_RUN_NODE = $FAIL_CLIENT ]] &&
 259             rm -f $END_RUN_FILE || exit 13
 260     fi
 261
 262     restart_client_loads $FAIL_CLIENT $ERRORS_OK || exit $?
 263
 264     # Check that not failed clients loads are still running.
 265     # No application failures should occur on clients that was not failed.
 266
 267     log "==== Checking the clients loads AFTER failed client reintegrated -- failure NOT OK"
 268     if ! ERRORS_OK= check_client_loads $(exclude_items_from_list $NODES_TO_USE $FAIL_CLIENT); then
 269         log "Client load failed. Exiting"
 270         exit 5
 271     fi
 272
 273     CURRENT_TS=$(date +%s)
 274     ELAPSED=$((CURRENT_TS - START_TS))
 275     sleep=$((SERVER_FAILOVER_PERIOD-(CURRENT_TS - it_time_start)))
 276
 277     # keep count the number of itterations when
 278     # time spend to failover and two client loads check exceeded
 279     # the value ( SERVER_FAILOVER_PERIOD - MINSLEEP )
 280     if [ $sleep -lt $MINSLEEP ]; then
 281         reqfail=$((reqfail +1))
 282         log "WARNING: failover, client reintegration and check_client_loads time exceeded SERVER_FAILOVER_PERIOD - MINSLEEP !
 283 Failed to load the filesystem with I/O for a minimum period of $MINSLEEP $reqfail times ( REQFAIL=$REQFAIL ).
 284 This iteration, the load was only applied for sleep=$sleep seconds.
 285 Estimated max recovery time : $max_recov_time
 286 Probably the hardware is taking excessively long to boot.
 287 Try to increase SERVER_FAILOVER_PERIOD (current is $SERVER_FAILOVER_PERIOD), bug 20918"
 288         [ $reqfail -gt $REQFAIL ] && exit 6
 289     fi
 290
 291     log " Number of failovers:
 292 $(numfailovers)                and counting..."
 293
 294     if [ $((ELAPSED + sleep)) -ge $DURATION ]; then
 295          break
 296     fi
 297
 298     if [ $sleep -gt 0 ]; then
 299         echo "sleeping $sleep seconds ... "
 300         sleep $sleep
 301     fi
 302 done
 303
 304 exit 0