lustre/tests/recovery-random-scale.sh

   1 #!/bin/bash
   2 #
   3 # client failure does not affect other clients
   4
   5 # Start load on clients (each client works on it's own directory).
   6 # At defined (5-10 minutes) interval fail one random client and then fail mds.
   7 # Reintegrate failed client after recovery completed,
   8 # application errors are allowed for that client but not on other clients.
   9 # 10 minute intervals and verify that no application errors occur.
  10
  11 # Test runs one of CLIENT_LOAD progs on remote clients.
  12 set -e
  13
  14 ONLY=${ONLY:-"$*"}
  15
  16 LUSTRE=${LUSTRE:-$(dirname $0)/..}
  17 . $LUSTRE/tests/test-framework.sh
  18 init_test_env "$@"
  19 init_logging
  20
  21 # bug number for skipped test:
  22 ALWAYS_EXCEPT="$RECOVERY_RANDOM_SCALE_EXCEPT"
  23 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
  24
  25 build_test_filter
  26
  27 remote_mds_nodsh && skip_env "remote MDS with nodsh"
  28 remote_ost_nodsh && skip_env "remote OST with nodsh"
  29
  30 [ $CLIENTCOUNT -lt 3 ] &&
  31     skip_env "need three or more clients"
  32
  33 if [ -z "$SHARED_DIRECTORY" ] || ! check_shared_dir $SHARED_DIRECTORY; then
  34         skip_env "SHARED_DIRECTORY should be specified with a shared directory \
  35 which is accessible on all of the nodes"
  36 fi
  37
  38 [[ $FAILURE_MODE = SOFT ]] && \
  39         log "WARNING: $0 is not functional with FAILURE_MODE = SOFT, bz22797"
  40
  41 # Application failures are allowed for the failed client
  42 # but not for other clients.
  43 ERRORS_OK="yes"
  44
  45 if [ "$SLOW" = "no" ]; then
  46         DURATION=${DURATION:-$((60 * 30))}
  47 else
  48         DURATION=${DURATION:-$((60 * 60 * 24))}
  49 fi
  50 SERVER_FAILOVER_PERIOD=${SERVER_FAILOVER_PERIOD:-$((60 * 10))} # 10 minutes
  51
  52 MINSLEEP=${MINSLEEP:-120}
  53 REQFAIL_PERCENT=${REQFAIL_PERCENT:-3}    # bug17839 comment 62
  54 # round up the result of integer division: C=(A + (B - 1)) / B
  55 REQFAIL=${REQFAIL:-$(((DURATION * REQFAIL_PERCENT + (SERVER_FAILOVER_PERIOD *
  56         100 - 1 )) / SERVER_FAILOVER_PERIOD / 100))}
  57
  58 END_RUN_FILE=${END_RUN_FILE:-$SHARED_DIRECTORY/end_run_file}
  59 LOAD_PID_FILE=${LOAD_PID_FILE:-$TMP/client-load.pid}
  60 VMSTAT_PID_FILE=${VMSTAT_PID_FILE:-$TMP/vmstat.pid}
  61
  62 numfailovers () {
  63         local facet
  64         local var
  65
  66         for facet in ${MDTS//,/ } ${FAILED_CLIENTS//,/ }; do
  67                 var=$(node_var_name $facet)_nums
  68                 val=${!var}
  69                 if [ "$val" ] ; then
  70                         echo "$facet failed over $val times"
  71                 fi
  72         done
  73 }
  74
  75 summary_and_cleanup () {
  76         local rc=$?
  77
  78         # Having not empty END_RUN_FILE means the failed loads only
  79         if [ -s $END_RUN_FILE ]; then
  80                 print_end_run_file $END_RUN_FILE
  81                 rc=1
  82         fi
  83
  84         echo $(date +'%F %H:%M:%S') Terminating clients loads ...
  85         echo "$0" >> $END_RUN_FILE
  86         local result=PASS
  87         [ $rc -eq 0 ] || result=FAIL
  88
  89         log "Duration:               $DURATION
  90 Server failover period: $SERVER_FAILOVER_PERIOD seconds
  91 Exited after:           $ELAPSED seconds
  92 Number of failovers before exit:
  93 $(numfailovers)
  94 Status: $result: rc=$rc"
  95
  96         # stop vmstat on OSS nodes
  97         [ "$VMSTAT" ] &&
  98                 stop_process $(comma_list $(osts_nodes)) $VMSTAT_PID_FILE
  99
 100         # stop the client loads
 101         stop_client_loads $NODES_TO_USE $LOAD_PID_FILE
 102
 103         if [ $rc -ne 0 ]; then
 104                 # we are interested in only on failed clients and servers
 105                 local failedclients=$(cat $END_RUN_FILE | grep -v $0)
 106                 gather_logs $(comma_list $(all_server_nodes) $failedclients)
 107         fi
 108
 109         exit $rc
 110 }
 111
 112 ################################## Main Flow ###################################
 113 check_and_setup_lustre
 114 rm -rf $DIR/[Rdfs][0-9]*
 115
 116 MAX_RECOV_TIME=$(max_recovery_time)
 117
 118 # The test node needs to be insulated from a lustre failure as much as possible,
 119 # so not even loading the lustre modules is ideal.
 120 # -- umount lustre
 121 # -- remove hostname from clients list
 122 zconf_umount $HOSTNAME $MOUNT
 123 NODES_TO_USE=${NODES_TO_USE:-$CLIENTS}
 124 NODES_TO_USE=$(exclude_items_from_list $NODES_TO_USE $HOSTNAME)
 125
 126 check_progs_installed $NODES_TO_USE "${CLIENT_LOADS[@]}"
 127
 128 MDTS=$(get_facets MDS)
 129
 130 # Fail a random client and then failover a random MDS.
 131 test_fail_client_mds() {
 132         local fail_client
 133         local serverfacet
 134         local client_var
 135         local var
 136
 137         stack_trap summary_and_cleanup EXIT INT
 138
 139         # start vmstat on OSS nodes
 140         [ "$VMSTAT" ] &&
 141                 start_vmstat $(comma_list $(osts_nodes)) $VMSTAT_PID_FILE
 142
 143         # start client loads
 144         rm -f $END_RUN_FILE
 145         start_client_loads $NODES_TO_USE
 146
 147         echo client loads pids:
 148         do_nodesv $NODES_TO_USE "cat $LOAD_PID_FILE" || exit 3
 149
 150         ELAPSED=0
 151         local it_time_start
 152         local sleep=0
 153         local reqfail=0
 154         local start_ts=$(date +%s)
 155         local current_ts=$start_ts
 156
 157         while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do
 158                 # In order to perform the
 159                 # expected number of failovers, we need to account the
 160                 # following:
 161                 # 1) the time that has elapsed during the client load checking
 162                 # 2) time takes for failover
 163                 it_time_start=$(date +%s)
 164
 165                 fail_client=$(get_random_entry $NODES_TO_USE)
 166                 client_var=$(node_var_name $fail_client)_nums
 167
 168                 # store the list of failed clients
 169                 # lists are comma separated
 170                 FAILED_CLIENTS=$(expand_list $FAILED_CLIENTS $fail_client)
 171
 172                 serverfacet=$(get_random_entry $MDTS)
 173                 var=$(node_var_name $serverfacet)_nums
 174
 175                 # Check that our client loads are still running. If any have
 176                 # died, that means they have died outside of recovery, which
 177                 # is unacceptable.
 178                 log "==== Checking clients loads BEFORE failover -- failure NOT OK \
 179                      ELAPSED=$ELAPSED DURATION=$DURATION \
 180                      PERIOD=$SERVER_FAILOVER_PERIOD"
 181                 check_client_loads $NODES_TO_USE || exit 4
 182
 183                 log "FAIL CLIENT $fail_client..."
 184                 shutdown_client $fail_client
 185
 186                 log "Starting failover on $serverfacet"
 187                 facet_failover "$serverfacet" || exit 1
 188
 189                 if ! wait_recovery_complete $serverfacet; then
 190                         echo "$serverfacet recovery is not completed!"
 191                         exit 7
 192                 fi
 193
 194                 boot_node $fail_client
 195                 echo "Reintegrating $fail_client"
 196                 zconf_mount $fail_client $MOUNT || exit $?
 197                 client_up $fail_client || exit $?
 198
 199                 # Increment the number of failovers
 200                 val=$((${!var} + 1))
 201                 eval $var=$val
 202                 val=$((${!client_var} + 1))
 203                 eval $client_var=$val
 204
 205                 # load script on failed clients could create END_RUN_FILE
 206                 # We shuold remove it and ignore the failure if this
 207                 # file contains the failed client only.
 208                 # We can not use ERRORS_OK when start all loads at the start of
 209                 # this script because the application errors allowed for random
 210                 # failed client only, but not for all clients.
 211                 if [ -e $END_RUN_FILE ]; then
 212                         local end_run_node
 213                         read end_run_node < $END_RUN_FILE
 214                         [[ $end_run_node = $fail_client ]] &&
 215                                 rm -f $END_RUN_FILE || exit 13
 216                 fi
 217
 218                 restart_client_loads $fail_client $ERRORS_OK || exit $?
 219
 220                 # Check that not failed clients loads are still running.
 221                 # No application failures should occur on clients that were
 222                 # not failed.
 223                 log "==== Checking clients loads AFTER failed client reintegrated \
 224                         -- failure NOT OK"
 225                 if ! ERRORS_OK= check_client_loads \
 226                         $(exclude_items_from_list $NODES_TO_USE $fail_client); then
 227                         log "Client load failed. Exiting..."
 228                         exit 5
 229                 fi
 230
 231                 current_ts=$(date +%s)
 232                 ELAPSED=$((current_ts - start_ts))
 233                 sleep=$((SERVER_FAILOVER_PERIOD - (current_ts - it_time_start)))
 234
 235                 # Keep counting the number of iterations when
 236                 # time spent to failover and two client loads check exceeded
 237                 # the value ( SERVER_FAILOVER_PERIOD - MINSLEEP ).
 238                 if [ $sleep -lt $MINSLEEP ]; then
 239                         reqfail=$((reqfail + 1))
 240                         log "WARNING: failover, client reintegration and \
 241 check_client_loads time exceeded SERVER_FAILOVER_PERIOD - MINSLEEP!
 242 Failed to load the filesystem with I/O for a minimum period of \
 243 $MINSLEEP $reqfail times ( REQFAIL=$REQFAIL ).
 244 This iteration, the load was only applied for sleep=$sleep seconds.
 245 Estimated max recovery time : $MAX_RECOV_TIME
 246 Probably the hardware is taking excessively long time to boot.
 247 Try to increase SERVER_FAILOVER_PERIOD (current is $SERVER_FAILOVER_PERIOD), \
 248 bug 20918"
 249                         [ $reqfail -gt $REQFAIL ] && exit 6
 250                 fi
 251
 252                 log "Number of failovers:
 253 $(numfailovers)                and counting..."
 254
 255                 [ $((ELAPSED + sleep)) -ge $DURATION ] && break
 256
 257                 if [ $sleep -gt 0 ]; then
 258                         echo "sleeping $sleep seconds... "
 259                         sleep $sleep
 260                 fi
 261         done
 262         exit 0
 263 }
 264 run_test fail_client_mds "fail client, then failover MDS"
 265
 266 zconf_mount $HOSTNAME $MOUNT || error "mount $MOUNT on $HOSTNAME failed"
 267 client_up || error "start client on $HOSTNAME failed"
 268
 269 complete $SECONDS
 270 check_and_cleanup_lustre
 271 exit_status