lustre/tests/recovery-double-scale.sh

   1 #!/bin/bash
   2
   3 # All pairwise combinations of node failures.
   4 # Was cmd3-17
   5 #
   6 # Author: Chris Cooper <ccooper@clusterfs.com>
   7 #
   8 # Script fails pair of nodes:
   9 # --  in parallel by default
  10 # --  in series if SERIAL is set
  11
  12 LUSTRE=${LUSTRE:-`dirname $0`/..}
  13 SETUP=${SETUP:-""}
  14 CLEANUP=${CLEANUP:-""}
  15 . $LUSTRE/tests/test-framework.sh
  16
  17 init_test_env $@
  18
  19 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
  20 TESTSUITELOG=${TESTSUITELOG:-$TMP/recovery-double-scale}
  21 DEBUGLOG=$TESTSUITELOG.debug
  22 exec 2>$DEBUGLOG
  23 echo "--- env ---" >&2
  24 env >&2
  25 echo "--- env ---" >&2
  26 set -x
  27
  28 [ "$SHARED_DIRECTORY" ] || \
  29     { skip "$0: Empty SHARED_DIRECTORY" && exit 0; }
  30
  31 [ -n "$CLIENTS" ] || { skip "$0 Need two or more remote clients" && exit 0; }
  32 [ $CLIENTCOUNT -ge 3 ] || \
  33     { skip "$0 Need two or more remote clients, have $CLIENTCOUNT" && exit 0; }
  34
  35 END_RUN_FILE=${END_RUN_FILE:-$SHARED_DIRECTORY/end_run_file}
  36 LOAD_PID_FILE=${LOAD_PID_FILE:-$TMP/client-load.pid}
  37
  38 remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
  39 remote_ost_nodsh && skip "remote OST with nodsh" && exit 0
  40
  41 check_timeout || exit 1
  42
  43 build_test_filter
  44
  45 check_and_setup_lustre
  46 rm -rf $DIR/[df][0-9]*
  47
  48 # the test node needs to be insulated from a lustre failure as much as possible,
  49 # so not even loading the lustre modules is ideal.
  50 # -- umount lustre
  51 # -- remove hostname from clients list
  52 zconf_umount $(hostname) $MOUNT
  53 NODES_TO_USE=${NODES_TO_USE:-$CLIENTS}
  54 NODES_TO_USE=$(exclude_items_from_list $NODES_TO_USE $(hostname))
  55
  56 check_progs_installed $NODES_TO_USE ${CLIENT_LOADS[@]}
  57
  58 MDTS=$(get_facets MDS)
  59 OSTS=$(get_facets OST)
  60
  61 rm -f $END_RUN_FILE
  62
  63 reboot_recover_node () {
  64     # item var contains a pair of clients if nodetype=clients
  65     # I would prefer to have a list here
  66     local item=$1
  67     local nodetype=$2
  68     local timeout=$($LCTL get_param  -n timeout)
  69
  70     # MDS, OST item contains the facet
  71     case $nodetype in
  72        MDS|OST )    facet_failover $item
  73                 [ "$SERIAL" ] && wait_recovery_complete $item $((timeout * 4)) || true
  74                 ;;
  75        clients) for c in ${item//,/ }; do
  76                       shutdown_client $c
  77                       boot_node $c
  78                       echo "Reintegrating $c"
  79                       # one client fails; need dk logs from this client only
  80                       zconf_mount $c $MOUNT || NODES="$c $(mdts_nodes) $(osts_nodes)" error_exit "zconf_mount failed"
  81                  done
  82                  start_client_loads $item
  83                  ;;
  84                 # script failure:
  85                 # don't use error (), the logs from all nodes not needed
  86        * )      echo "reboot_recover_node: nodetype=$nodetype. Must be one of 'MDS', 'OST', or 'clients'."
  87                 exit 1;;
  88     esac
  89 }
  90
  91 get_item_type () {
  92     local type=$1
  93     local excluded=${2:-""}
  94
  95     local list
  96     case $type in
  97        MDS )    list=$MDTS;;
  98        OST )    list=$OSTS;;
  99        clients) list=$NODES_TO_USE
 100                 ;;
 101                 # script failure:
 102                 # don't use error (), the logs from all nodes not needed
 103        * )      echo "Invalid type=$type. Must be one of 'MDS', 'OST', or 'clients'."
 104                 exit 1;;
 105     esac
 106
 107     [ "$excluded" ] && list=$(exclude_items_from_list $list $excluded)
 108     # empty list
 109     if [ ! "$(echo $list)" ]; then
 110         echo
 111         return
 112     fi
 113
 114     item=$(get_random_entry $list)
 115     if [ "$type" = clients ] ; then
 116         item="$item $(get_random_entry $(exclude_items_from_list $list $item))"
 117         item=$(comma_list $item)
 118     fi
 119     echo $item
 120 }
 121
 122 # failover_pair
 123 #
 124 # for the two nodetypes specified, chooses a random node(s) from each
 125 # class, reboots the nodes sequentially, and then restarts lustre on
 126 # the nodes.
 127 failover_pair() {
 128     local type1=$1
 129     local type2=$2
 130     local title=$3
 131
 132     local client_nodes=""
 133     local item1=
 134     local item2=
 135     local client1=
 136     local client2=
 137
 138     log "
 139 ==== START === $title "
 140
 141     item1=$(get_item_type $type1)
 142     [ "$item1" ] || \
 143         { echo "type1=$type1 item1 is empty" && return 0; }
 144     item2=$(get_item_type $type2 $item1)
 145     [ "$item2" ] || \
 146         { echo "type1=$type1 item1=$item1 type2=$type2 item2=$item2 is empty" && return 0; }
 147
 148     # Check that our client loads are still running. If any have died,
 149     # that means they have died outside of recovery, which is unacceptable.
 150     log "==== Checking the clients loads BEFORE failover -- failure NOT OK"
 151
 152     # FIXME. need print summary on exit
 153     if ! check_client_loads $NODES_TO_USE; then
 154         exit 4
 155     fi
 156
 157     log "Done checking client loads. Failing type1=$type1 item1=$item1 ... "
 158
 159     reboot_recover_node $item1 $type1
 160
 161     # Hendrix test17 description:
 162     # Introduce a failure, wait at
 163     # least 5 minutes (for recovery),
 164     # introduce a 2nd
 165     # failure, and wait another 5
 166     # minutes
 167
 168     # reboot_recover_node waits recovery in according to
 169     # SERIAL value.
 170     # We have a "double failures" if SERIAL is not set,
 171     # do not need a sleep between failures for "double failures"
 172
 173     log "                            Failing type2=$type2 item2=$item2 ... "
 174     reboot_recover_node $item2 $type2
 175
 176     # Client loads are allowed to die while in recovery, so we just
 177     # restart them.
 178     log "==== Checking the clients loads AFTER  failovers -- ERRORS_OK=$ERRORS_OK"
 179     restart_client_loads $NODES_TO_USE $ERRORS_OK || return $?
 180     log "Done checking / re-Starting client loads. PASS"
 181     return 0
 182 }
 183
 184 summary_and_cleanup () {
 185     local rc=$?
 186     trap 0
 187
 188     # Having not empty END_RUN_FILE means the failed loads only
 189     if [ -s $END_RUN_FILE ]; then
 190         echo "Found the END_RUN_FILE file: $END_RUN_FILE"
 191         cat $END_RUN_FILE
 192         local END_RUN_NODE=
 193         read END_RUN_NODE < $END_RUN_FILE
 194
 195         # a client load will end (i.e. fail) if it finds
 196         # the end run file.  that does not mean that that client load
 197         # actually failed though.  the first node in the END_RUN_NODE is
 198         # the one we are really interested in.
 199         if [ -n "$END_RUN_NODE" ]; then
 200             var=$(client_var_name $END_RUN_NODE)_load
 201             echo "Client load failed on node $END_RUN_NODE"
 202             echo
 203             echo "client $END_RUN_NODE load debug output :"
 204             local logfile=${TESTSUITELOG}_run_${!var}.sh-${END_RUN_NODE}.debug
 205             do_node ${END_RUN_NODE} "set -x; [ -e $logfile ] && cat $logfile " || true
 206         fi
 207         rc=1
 208     fi
 209
 210     echo $(date +'%F %H:%M:%S') Terminating clients loads ...
 211     echo "$0" >> $END_RUN_FILE
 212     local result=PASS
 213     [ $rc -eq 0 ] || result=FAIL
 214
 215     log "
 216 Server failover period: $FAILOVER_PERIOD seconds
 217 Exited after:           $ELAPSED seconds
 218 Status: $result: rc=$rc"
 219
 220     # make sure the client loads die
 221     do_nodes $NODES_TO_USE "set -x; test -f $TMP/client-load.pid && \
 222         { kill -s TERM \$(cat $TMP/client-load.pid) || true; }"
 223
 224     # and free up the pdshes that started them, if any are still around
 225     if [ -n "$CLIENT_LOAD_PIDS" ]; then
 226         kill $CLIENT_LOAD_PIDS || true
 227         sleep 5
 228         kill -9 $CLIENT_LOAD_PIDS || true
 229     fi
 230     [ $rc -eq 0 ] && zconf_mount $(hostname) $MOUNT
 231     exit $rc
 232 }
 233
 234 trap summary_and_cleanup EXIT TERM INT
 235
 236 #
 237 # MAIN
 238 #
 239 log "-----============= $0 starting =============-----"
 240
 241 START_TS=$(date +%s)
 242 CURRENT_TS=$START_TS
 243 ELAPSED=0
 244
 245 # Set SERIAL to serialize the failure through a recovery of the first failure.
 246 SERIAL=${SERIAL:-""}
 247 ERRORS_OK="yes"
 248
 249 [ "$SERIAL" ] && ERRORS_OK=""
 250
 251 FAILOVER_PERIOD=${FAILOVER_PERIOD:-$((60*5))} # 5 minutes
 252
 253 # Start client loads.
 254 start_client_loads $NODES_TO_USE
 255 echo clients load pids:
 256 if ! do_nodes $NODES_TO_USE "set -x; echo \$(hostname): && cat $TMP/client-load.pid"; then
 257     if [ -e $DEBUGLOG ]; then
 258         exec 2<&-
 259         cat $DEBUGLOG
 260         exit 3
 261     fi
 262 fi
 263
 264 # FIXME: Do we want to have an initial sleep period where the clients
 265 # just run before introducing a failure?
 266 sleep $FAILOVER_PERIOD
 267
 268 #CMD_TEST_NUM=17.1
 269 failover_pair MDS OST     "test 1: failover MDS, then OST =========="
 270 sleep $FAILOVER_PERIOD
 271
 272 #CMD_TEST_NUM=17.2
 273 failover_pair MDS clients "test 2: failover MDS, then 2 clients ===="
 274 sleep $FAILOVER_PERIOD
 275
 276 #CMD_TEST_NUM=17.3
 277 if [ $MDSCOUNT -gt 1 ]; then
 278     failover_pair MDS MDS     "test 3: failover MDS, then another MDS =="
 279     sleep $FAILOVER_PERIOD
 280 else
 281     skip "$0 : $MDSCOUNT < 2 MDTs, test 3 skipped"
 282 fi
 283
 284 #CMD_TEST_NUM=17.4
 285 if [ $OSTCOUNT -gt 1 ]; then
 286     failover_pair OST OST     "test 4: failover OST, then another OST =="
 287     sleep $FAILOVER_PERIOD
 288 else
 289     skip "$0 : $OSTCOUNT < 2 OSTs, test 4 skipped"
 290 fi
 291
 292 #CMD_TEST_NUM=17.5
 293 failover_pair OST clients "test 5: failover OST, then 2 clients ===="
 294 sleep $FAILOVER_PERIOD
 295
 296 #CMD_TEST_NUM=17.6
 297 failover_pair OST MDS     "test 6: failover OST, then MDS =========="
 298 sleep $FAILOVER_PERIOD
 299
 300 #CMD_TEST_NUM=17.7
 301 failover_pair clients MDS "test 7: failover 2 clients, then MDS ===="
 302 sleep $FAILOVER_PERIOD
 303
 304 #CMD_TEST_NUM=17.8
 305 #failover_pair clients OST "test 8: failover 2 clients, then OST ===="
 306 sleep $FAILOVER_PERIOD
 307
 308 #CMD_TEST_NUM=17.9
 309 if [ $CLIENTCOUNT -ge 5 ]; then
 310     failover_pair clients clients "test 9: failover 2 clients, then 2 different clients =="
 311     sleep $FAILOVER_PERIOD
 312 fi
 313 log "==== Checking the clients loads AFTER  all failovers -- failure NOT OK"
 314 if ! check_client_loads $NODES_TO_USE; then
 315     log "Client load failed after failover. Exiting"
 316     exit 5
 317 fi
 318
 319 CURRENT_TS=$(date +%s)
 320 ELAPSED=$((CURRENT_TS - START_TS))
 321
 322 log "Completed successfully in $ELAPSED seconds"
 323
 324 exit 0