lustre/tests/recovery-double-scale.sh

   1 #!/bin/bash
   2
   3 # All pairwise combinations of node failures.
   4 # Was cmd3-17
   5 #
   6 # Author: Chris Cooper <ccooper@clusterfs.com>
   7 #
   8 # Script fails pair of nodes:
   9 # --  in parallel by default
  10 # --  in series if SERIAL is set
  11
  12 LUSTRE=${LUSTRE:-`dirname $0`/..}
  13 SETUP=${SETUP:-""}
  14 CLEANUP=${CLEANUP:-""}
  15 . $LUSTRE/tests/test-framework.sh
  16
  17 init_test_env $@
  18
  19 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
  20 TESTSUITELOG=${TESTSUITELOG:-$TMP/recovery-double-scale}
  21 DEBUGLOG=$TESTSUITELOG.debug
  22 exec 2>$DEBUGLOG
  23 echo "--- env ---" >&2
  24 env >&2
  25 echo "--- env ---" >&2
  26 set -x
  27
  28 [ -n "$CLIENTS" ] || { skip "$0 Need two or more remote clients" && exit 0; }
  29 [ $CLIENTCOUNT -ge 3 ] || \
  30     { skip "$0 Need two or more remote clients, have $CLIENTCOUNT" && exit 0; }
  31
  32 END_RUN_FILE=${END_RUN_FILE:-$SHARED_DIRECTORY}/end_run_file}
  33 LOAD_PID_FILE=${LOAD_PID_FILE:-$TMP/client-load.pid}
  34
  35 remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
  36 remote_ost_nodsh && skip "remote OST with nodsh" && exit 0
  37
  38 check_timeout || exit 1
  39
  40 build_test_filter
  41
  42 check_and_setup_lustre
  43 rm -rf $DIR/[df][0-9]*
  44
  45 # the test node needs to be insulated from a lustre failure as much as possible,
  46 # so not even loading the lustre modules is ideal.
  47 # -- umount lustre
  48 # -- remove hostname from clients list
  49 zconf_umount $(hostname) $MOUNT
  50 NODES_TO_USE=${NODES_TO_USE:-$CLIENTS}
  51 NODES_TO_USE=$(exclude_items_from_list $NODES_TO_USE $(hostname))
  52
  53 check_progs_installed $NODES_TO_USE ${CLIENT_LOADS[@]}
  54
  55 MDTS=$(get_facets MDS)
  56 OSTS=$(get_facets OST)
  57
  58 rm -f $END_RUN_FILE
  59
  60 reboot_recover_node () {
  61     # item var contains a pair of clients if nodetype=clients
  62     # I would prefer to have a list here
  63     local item=$1
  64     local nodetype=$2
  65     local timeout=$($LCTL get_param  -n timeout)
  66
  67     # MDS, OST item contains the facet
  68     case $nodetype in
  69        MDS|OST )    facet_failover $item
  70                 [ "$SERIAL" ] && wait_recovery_complete $item $((timeout * 4)) || true
  71                 ;;
  72        clients) for c in ${item//,/ }; do
  73                       shutdown_client $c
  74                       boot_node $c
  75                  done
  76                  start_client_loads $list || return $?
  77                  ;;
  78        * )      error "reboot_recover_node: nodetype=$nodetype. Must be one of 'MDS', 'OST', or 'clients'."
  79                 exit 1;;
  80     esac
  81 }
  82
  83 get_item_type () {
  84     local type=$1
  85     local excluded=${2:-""}
  86
  87     local list
  88     case $type in
  89        MDS )    list=$MDTS;;
  90        OST )    list=$OSTS;;
  91        clients) list=$NODES_TO_USE
  92                 ;;
  93        * )      error "Invalid type=$type. Must be one of 'MDS', 'OST', or 'clients'."
  94                 exit 1;;
  95     esac
  96
  97     [ "$excluded" ] && list=$(exclude_items_from_list $list $excluded)
  98     # empty list
  99     if [ ! "$(echo $list)" ]; then
 100         echo
 101         return
 102     fi
 103
 104     item=$(get_random_entry $list)
 105     if [ "$type" = clients ] ; then
 106         item="$item $(get_random_entry $(exclude_items_from_list $list $item))"
 107         item=$(comma_list $item)
 108     fi
 109     echo $item
 110 }
 111
 112 # failover_pair
 113 #
 114 # for the two nodetypes specified, chooses a random node(s) from each
 115 # class, reboots the nodes sequentially, and then restarts lustre on
 116 # the nodes.
 117 failover_pair() {
 118     local type1=$1
 119     local type2=$2
 120     local title=$3
 121
 122     local client_nodes=""
 123     local item1=
 124     local item2=
 125     local client1=
 126     local client2=
 127
 128     log "
 129 ==== START === $title "
 130
 131     item1=$(get_item_type $type1)
 132     [ "$item1" ] || \
 133         { echo "type1=$type1 item1 is empty" && return 0; }
 134     item2=$(get_item_type $type2 $item1)
 135     [ "$item2" ] || \
 136         { echo "type1=$type1 item1=$item1 type2=$type2 item2=$item2 is empty" && return 0; }
 137
 138     # Check that our client loads are still running. If any have died,
 139     # that means they have died outside of recovery, which is unacceptable.
 140     log "==== Checking the clients loads BEFORE failover -- failure NOT OK"
 141
 142     # FIXME. need print summary on exit
 143     if ! check_client_loads $NODES_TO_USE; then
 144         exit 4
 145     fi
 146
 147     log "Done checking client loads. Failing type1=$type1 item1=$item1 ... "
 148
 149     reboot_recover_node $item1 $type1 || return $?
 150
 151     # Hendrix test17 description:
 152     # Introduce a failure, wait at
 153     # least 5 minutes (for recovery),
 154     # introduce a 2nd
 155     # failure, and wait another 5
 156     # minutes
 157
 158     # reboot_recover_node waits recovery in according to
 159     # SERIAL value.
 160     # We have a "double failures" if SERIAL is not set,
 161     # do not need a sleep between failures for "double failures"
 162
 163     log "                            Failing type2=$type2 item2=$item2 ... "
 164     reboot_recover_node $item2 $type2 || return $?
 165
 166     # Client loads are allowed to die while in recovery, so we just
 167     # restart them.
 168     log "==== Checking the clients loads AFTER  failovers -- ERRORS_OK=$ERRORS_OK"
 169     restart_client_loads $NODES_TO_USE $ERRORS_OK || return $?
 170     log "Done checking / re-Starting client loads. PASS"
 171     return 0
 172 }
 173
 174 summary_and_cleanup () {
 175     local rc=$?
 176     trap 0
 177
 178     # Having not empty END_RUN_FILE means the failed loads only
 179     if [ -s $END_RUN_FILE ]; then
 180         echo "Found the END_RUN_FILE file: $END_RUN_FILE"
 181         cat $END_RUN_FILE
 182         local END_RUN_NODE=
 183         read END_RUN_NODE < $END_RUN_FILE
 184
 185         # a client load will end (i.e. fail) if it finds
 186         # the end run file.  that does not mean that that client load
 187         # actually failed though.  the first node in the END_RUN_NODE is
 188         # the one we are really interested in.
 189         if [ -n "$END_RUN_NODE" ]; then
 190             var=${END_RUN_NODE}_load
 191             echo "Client load failed on node $END_RUN_NODE"
 192             echo
 193             echo "client $END_RUN_NODE load debug output :"
 194             local logfile=${TESTSUITELOG}_run_${!var}.sh-${END_RUN_NODE}.debug
 195             do_node ${END_RUN_NODE} "set -x; [ -e $logfile ] && cat $logfile " || true
 196         fi
 197         rc=1
 198     fi
 199
 200     echo $(date +'%F %H:%M:%S') Terminating clients loads ...
 201     echo "$0" >> $END_RUN_FILE
 202     local result=PASS
 203     [ $rc -eq 0 ] || result=FAIL
 204
 205     log "
 206 Server failover period: $FAILOVER_PERIOD seconds
 207 Exited after:           $ELAPSED seconds
 208 Status: $result: rc=$rc"
 209
 210     # make sure the client loads die
 211     do_nodes $NODES_TO_USE "set -x; test -f $TMP/client-load.pid && \
 212         { kill -s TERM \$(cat $TMP/client-load.pid) || true; }"
 213
 214     # and free up the pdshes that started them, if any are still around
 215     if [ -n "$CLIENT_LOAD_PIDS" ]; then
 216         kill $CLIENT_LOAD_PIDS || true
 217         sleep 5
 218         kill -9 $CLIENT_LOAD_PIDS || true
 219     fi
 220     [ $rc -eq 0 ] && zconf_mount $(hostname) $MOUNT
 221     exit $rc
 222 }
 223
 224 trap summary_and_cleanup EXIT TERM INT
 225
 226 #
 227 # MAIN
 228 #
 229 log "-----============= $0 starting =============-----"
 230
 231 START_TS=$(date +%s)
 232 CURRENT_TS=$START_TS
 233 ELAPSED=0
 234
 235 # Set SERIAL to serialize the failure through a recovery of the first failure.
 236 SERIAL=${SERIAL:-""}
 237 ERRORS_OK="yes"
 238
 239 [ "$SERIAL" ] && ERRORS_OK=""
 240
 241 FAILOVER_PERIOD=${FAILOVER_PERIOD:-$((60*5))} # 5 minutes
 242
 243 # Start client loads.
 244 start_client_loads $NODES_TO_USE
 245 echo clients load pids:
 246 if ! do_nodes $NODES_TO_USE "set -x; echo \$(hostname): && cat $TMP/client-load.pid"; then
 247     if [ -e $DEBUGLOG ]; then
 248         exec 2<&-
 249         cat $DEBUGLOG
 250         exit 3
 251     fi
 252 fi
 253
 254 # FIXME: Do we want to have an initial sleep period where the clients
 255 # just run before introducing a failure?
 256 sleep $FAILOVER_PERIOD
 257
 258 #CMD_TEST_NUM=17.1
 259 failover_pair MDS OST     "test 1: failover MDS, then OST =========="
 260 sleep $FAILOVER_PERIOD
 261
 262 #CMD_TEST_NUM=17.2
 263 failover_pair MDS clients "test 2: failover MDS, then 2 clients ===="
 264 sleep $FAILOVER_PERIOD
 265
 266 #CMD_TEST_NUM=17.3
 267 if [ $MDSCOUNT -gt 1 ]; then
 268     failover_pair MDS MDS     "test 3: failover MDS, then another MDS =="
 269     sleep $FAILOVER_PERIOD
 270 else
 271     skip "$0 : $MDSCOUNT < 2 MDTs, test 3 skipped"
 272 fi
 273
 274 #CMD_TEST_NUM=17.4
 275 if [ $OSTCOUNT -gt 1 ]; then
 276     failover_pair OST OST     "test 4: failover OST, then another OST =="
 277     sleep $FAILOVER_PERIOD
 278 else
 279     skip "$0 : $OSTCOUNT < 2 OSTs, test 4 skipped"
 280 fi
 281
 282 #CMD_TEST_NUM=17.5
 283 failover_pair OST clients "test 5: failover OST, then 2 clients ===="
 284 sleep $FAILOVER_PERIOD
 285
 286 #CMD_TEST_NUM=17.6
 287 failover_pair OST MDS     "test 6: failover OST, then MDS =========="
 288 sleep $FAILOVER_PERIOD
 289
 290 #CMD_TEST_NUM=17.7
 291 failover_pair clients MDS "test 7: failover 2 clients, then MDS ===="
 292 sleep $FAILOVER_PERIOD
 293
 294 #CMD_TEST_NUM=17.8
 295 #failover_pair clients OST "test 8: failover 2 clients, then OST ===="
 296 sleep $FAILOVER_PERIOD
 297
 298 #CMD_TEST_NUM=17.9
 299 if [ $CLIENTCOUNT -ge 5 ]; then
 300     failover_pair clients clients "test 9: failover 2 clients, then 2 different clients =="
 301     sleep $FAILOVER_PERIOD
 302 fi
 303 log "==== Checking the clients loads AFTER  all failovers -- failure NOT OK"
 304 if ! check_client_loads $NODES_TO_USE; then
 305     log "Client load failed after failover. Exiting"
 306     exit 5
 307 fi
 308
 309 CURRENT_TS=$(date +%s)
 310 ELAPSED=$((CURRENT_TS - START_TS))
 311
 312 log "Completed successfully in $ELAPSED seconds"
 313
 314 exit 0