lustre/tests/recovery-random-scale.sh

   1 #!/bin/bash
   2 # vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
   3
   4 # client failure does not affect other clients
   5
   6 # Start load on clients (each client works on it's own directory).
   7 # At defined (5-10 minutes) interval fail one random client and then fail mds.
   8 # Reintegrate failed client after recovery completed,
   9 # application errors are allowed for that client but not on other clients.
  10 # 10 minute intervals and verify that no application errors occur.
  11
  12 # Test runs one of CLIENT_LOAD progs on remote clients.
  13 set -e
  14
  15 ONLY=${ONLY:-"$*"}
  16
  17 # bug number for skipped test:
  18 ALWAYS_EXCEPT="$RECOVERY_RANDOM_SCALE_EXCEPT"
  19 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
  20
  21 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
  22 . $LUSTRE/tests/test-framework.sh
  23 init_test_env $@
  24 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
  25 init_logging
  26
  27 remote_mds_nodsh && skip_env "remote MDS with nodsh" && exit 0
  28 remote_ost_nodsh && skip_env "remote OST with nodsh" && exit 0
  29
  30 [ -z "$CLIENTS" -o $CLIENTCOUNT -lt 3 ] &&
  31     skip_env "need three or more clients" && exit 0
  32
  33 if [ -z "$SHARED_DIRECTORY" ] || ! check_shared_dir $SHARED_DIRECTORY; then
  34     skip_env "SHARED_DIRECTORY should be specified with a shared directory \
  35 which is accessable on all of the nodes"
  36     exit 0
  37 fi
  38
  39 [[ $FAILURE_MODE = SOFT ]] && \
  40     log "WARNING: $0 is not functional with FAILURE_MODE = SOFT, bz22797"
  41
  42 # Application failures are allowed for the failed client
  43 # but not for other clients.
  44 ERRORS_OK="yes"
  45
  46 if [ "$SLOW" = "no" ]; then
  47     DURATION=${DURATION:-$((60 * 30))}
  48 else
  49     DURATION=${DURATION:-$((60 * 60 * 24))}
  50 fi
  51 SERVER_FAILOVER_PERIOD=${SERVER_FAILOVER_PERIOD:-$((60 * 10))} # 10 minutes
  52
  53 MINSLEEP=${MINSLEEP:-120}
  54 REQFAIL_PERCENT=${REQFAIL_PERCENT:-3}    # bug17839 comment 62
  55 REQFAIL=${REQFAIL:-$((DURATION / SERVER_FAILOVER_PERIOD *
  56                       REQFAIL_PERCENT / 100))}
  57
  58 END_RUN_FILE=${END_RUN_FILE:-$SHARED_DIRECTORY/end_run_file}
  59 LOAD_PID_FILE=${LOAD_PID_FILE:-$TMP/client-load.pid}
  60 VMSTAT_PID_FILE=${VMSTAT_PID_FILE:-$TMP/vmstat.pid}
  61
  62 numfailovers () {
  63     local facet
  64     local var
  65
  66     for facet in $MDTS ${FAILED_CLIENTS//,/ }; do
  67         var=${facet}_nums
  68         val=${!var}
  69         if [ "$val" ] ; then
  70             echo "$facet failed over $val times"
  71         fi
  72     done
  73 }
  74
  75 summary_and_cleanup () {
  76     local rc=$?
  77     trap 0
  78
  79     # Having not empty END_RUN_FILE means the failed loads only
  80     if [ -s $END_RUN_FILE ]; then
  81         print_end_run_file $END_RUN_FILE
  82         rc=1
  83     fi
  84
  85     echo $(date +'%F %H:%M:%S') Terminating clients loads ...
  86     echo "$0" >> $END_RUN_FILE
  87     local result=PASS
  88     [ $rc -eq 0 ] || result=FAIL
  89
  90     log "Duration:               $DURATION
  91 Server failover period: $SERVER_FAILOVER_PERIOD seconds
  92 Exited after:           $ELAPSED seconds
  93 Number of failovers before exit:
  94 $(numfailovers)
  95 Status: $result: rc=$rc"
  96
  97     # stop vmstat on OSS nodes
  98     [ "$VMSTAT" ] && stop_process $(comma_list $(osts_nodes)) $VMSTAT_PID_FILE
  99
 100     # stop the client loads
 101     stop_client_loads $NODES_TO_USE $LOAD_PID_FILE
 102
 103     if [ $rc -ne 0 ]; then
 104         # we are interested in only on failed clients and servers
 105         local failedclients=$(cat $END_RUN_FILE | grep -v $0)
 106         # FIXME: need ostfailover-s nodes also for FLAVOR=OST
 107         gather_logs $(comma_list $(osts_nodes) $(mdts_nodes) \
 108                       $mdsfailover_HOST $failedclients)
 109     fi
 110
 111     exit $rc
 112 }
 113
 114 ################################## Main Flow ###################################
 115 build_test_filter
 116
 117 check_and_setup_lustre
 118 rm -rf $DIR/[Rdfs][0-9]*
 119
 120 MAX_RECOV_TIME=$(max_recovery_time)
 121
 122 # The test node needs to be insulated from a lustre failure as much as possible,
 123 # so not even loading the lustre modules is ideal.
 124 # -- umount lustre
 125 # -- remove hostname from clients list
 126 zconf_umount $HOSTNAME $MOUNT
 127 NODES_TO_USE=${NODES_TO_USE:-$CLIENTS}
 128 NODES_TO_USE=$(exclude_items_from_list $NODES_TO_USE $HOSTNAME)
 129
 130 check_progs_installed $NODES_TO_USE ${CLIENT_LOADS[@]}
 131
 132 MDTS=$(get_facets MDS)
 133
 134 # Fail a random client and then failover a random MDS.
 135 test_fail_client_mds() {
 136     local fail_client
 137     local serverfacet
 138     local client_var
 139     local var
 140
 141     trap summary_and_cleanup EXIT INT
 142
 143     # start vmstat on OSS nodes
 144     [ "$VMSTAT" ] && start_vmstat $(comma_list $(osts_nodes)) $VMSTAT_PID_FILE
 145
 146     # start client loads
 147     rm -f $END_RUN_FILE
 148     start_client_loads $NODES_TO_USE
 149
 150     echo client loads pids:
 151     do_nodesv $NODES_TO_USE "cat $LOAD_PID_FILE" || exit 3
 152
 153     ELAPSED=0
 154     local sleep=0
 155     local reqfail=0
 156     local it_time_start
 157     local start_ts=$(date +%s)
 158     local current_ts=$start_ts
 159
 160     while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do
 161         # In order to perform the
 162         # expected number of failovers, we need to account the following:
 163         # 1) the time that has elapsed during the client load checking
 164         # 2) time takes for failover
 165         it_time_start=$(date +%s)
 166
 167         fail_client=$(get_random_entry $NODES_TO_USE)
 168         client_var=$(node_var_name $fail_client)_nums
 169
 170         # store the list of failed clients
 171         # lists are comma separated
 172         FAILED_CLIENTS=$(expand_list $FAILED_CLIENTS $fail_client)
 173
 174         serverfacet=$(get_random_entry $MDTS)
 175         var=${serverfacet}_nums
 176
 177         # Check that our client loads are still running. If any have died,
 178         # that means they have died outside of recovery, which is unacceptable.
 179         log "==== Checking the clients loads BEFORE failover -- failure NOT OK \
 180              ELAPSED=$ELAPSED DURATION=$DURATION PERIOD=$SERVER_FAILOVER_PERIOD"
 181         check_client_loads $NODES_TO_USE || exit 4
 182
 183         log "FAIL CLIENT $fail_client..."
 184         shutdown_client $fail_client
 185
 186         log "Starting failover on $serverfacet"
 187         facet_failover "$serverfacet" || exit 1
 188
 189         if ! wait_recovery_complete $serverfacet; then
 190             echo "$serverfacet recovery is not completed!"
 191             exit 7
 192         fi
 193
 194         boot_node $fail_client
 195         echo "Reintegrating $fail_client"
 196         zconf_mount $fail_client $MOUNT || exit $?
 197         client_up $fail_client || exit $?
 198
 199         # Increment the number of failovers
 200         val=$((${!var} + 1))
 201         eval $var=$val
 202         val=$((${!client_var} + 1))
 203         eval $client_var=$val
 204
 205         # load script on failed clients could create END_RUN_FILE
 206         # We shuold remove it and ignore the failure if this
 207         # file contains the failed client only.
 208         # We can not use ERRORS_OK when start all loads at the start of
 209         # this script because the application errors allowed for random
 210         # failed client only, but not for all clients.
 211         if [ -e $END_RUN_FILE ]; then
 212             local end_run_node
 213             read end_run_node < $END_RUN_FILE
 214             [[ $end_run_node = $fail_client ]] &&
 215                 rm -f $END_RUN_FILE || exit 13
 216         fi
 217
 218         restart_client_loads $fail_client $ERRORS_OK || exit $?
 219
 220         # Check that not failed clients loads are still running.
 221         # No application failures should occur on clients that were not failed.
 222         log "==== Checking the clients loads AFTER failed client reintegrated \
 223 -- failure NOT OK"
 224         if ! ERRORS_OK= check_client_loads \
 225             $(exclude_items_from_list $NODES_TO_USE $fail_client); then
 226             log "Client load failed. Exiting..."
 227             exit 5
 228         fi
 229
 230         current_ts=$(date +%s)
 231         ELAPSED=$((current_ts - start_ts))
 232         sleep=$((SERVER_FAILOVER_PERIOD - (current_ts - it_time_start)))
 233
 234         # Keep counting the number of iterations when
 235         # time spent to failover and two client loads check exceeded
 236         # the value ( SERVER_FAILOVER_PERIOD - MINSLEEP ).
 237         if [ $sleep -lt $MINSLEEP ]; then
 238             reqfail=$((reqfail + 1))
 239             log "WARNING: failover, client reintegration and \
 240 check_client_loads time exceeded SERVER_FAILOVER_PERIOD - MINSLEEP!
 241 Failed to load the filesystem with I/O for a minimum period of \
 242 $MINSLEEP $reqfail times ( REQFAIL=$REQFAIL ).
 243 This iteration, the load was only applied for sleep=$sleep seconds.
 244 Estimated max recovery time : $MAX_RECOV_TIME
 245 Probably the hardware is taking excessively long time to boot.
 246 Try to increase SERVER_FAILOVER_PERIOD (current is $SERVER_FAILOVER_PERIOD), \
 247 bug 20918"
 248             [ $reqfail -gt $REQFAIL ] && exit 6
 249         fi
 250
 251         log "Number of failovers:
 252 $(numfailovers)                and counting..."
 253
 254         [ $((ELAPSED + sleep)) -ge $DURATION ] && break
 255
 256         if [ $sleep -gt 0 ]; then
 257             echo "sleeping $sleep seconds... "
 258             sleep $sleep
 259         fi
 260     done
 261     exit 0
 262 }
 263 run_test fail_client_mds "fail client, then failover MDS"
 264
 265 zconf_mount $HOSTNAME $MOUNT || error "mount $MOUNT on $HOSTNAME failed"
 266 client_up || error "start client on $HOSTNAME failed"
 267
 268 complete $(basename $0) $SECONDS
 269 check_and_cleanup_lustre
 270 exit_status