lustre/tests/recovery-double-scale.sh

   1 #!/bin/bash
   2
   3 # All pairwise combinations of node failures.
   4 # Was cmd3-17
   5 #
   6 # Author: Chris Cooper <ccooper@clusterfs.com>
   7 #
   8 # Script fails pair of nodes:
   9 # --  in parallel by default
  10 # --  in series if SERIAL is set
  11
  12 LUSTRE=${LUSTRE:-`dirname $0`/..}
  13 SETUP=${SETUP:-""}
  14 CLEANUP=${CLEANUP:-""}
  15 . $LUSTRE/tests/test-framework.sh
  16
  17 init_test_env $@
  18
  19 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
  20 TESTSUITELOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh)}
  21 DEBUGLOG=$TESTSUITELOG.debug
  22
  23 cleanup_logs
  24
  25 exec 2>$DEBUGLOG
  26 echo "--- env ---" >&2
  27 env >&2
  28 echo "--- env ---" >&2
  29 set -x
  30
  31 [ "$SHARED_DIRECTORY" ] || \
  32     { skip "$0: Empty SHARED_DIRECTORY" && exit 0; }
  33
  34 [ -n "$CLIENTS" ] || { skip "$0 Need two or more remote clients" && exit 0; }
  35 [ $CLIENTCOUNT -ge 3 ] || \
  36     { skip "$0 Need two or more remote clients, have $CLIENTCOUNT" && exit 0; }
  37
  38 END_RUN_FILE=${END_RUN_FILE:-$SHARED_DIRECTORY/end_run_file}
  39 LOAD_PID_FILE=${LOAD_PID_FILE:-$TMP/client-load.pid}
  40
  41 remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
  42 remote_ost_nodsh && skip "remote OST with nodsh" && exit 0
  43
  44 check_timeout || exit 1
  45
  46 [[ $FAILURE_MODE = SOFT ]] && \
  47     log "WARNING: $0 is not functional with FAILURE_MODE = SOFT, bz22797"
  48
  49 build_test_filter
  50
  51 check_and_setup_lustre
  52 rm -rf $DIR/[df][0-9]*
  53
  54 # the test node needs to be insulated from a lustre failure as much as possible,
  55 # so not even loading the lustre modules is ideal.
  56 # -- umount lustre
  57 # -- remove hostname from clients list
  58 zconf_umount $(hostname) $MOUNT
  59 NODES_TO_USE=${NODES_TO_USE:-$CLIENTS}
  60 NODES_TO_USE=$(exclude_items_from_list $NODES_TO_USE $(hostname))
  61
  62 check_progs_installed $NODES_TO_USE ${CLIENT_LOADS[@]}
  63
  64 MDTS=$(get_facets MDS)
  65 OSTS=$(get_facets OST)
  66
  67 rm -f $END_RUN_FILE
  68
  69 reboot_recover_node () {
  70     # item var contains a pair of clients if nodetype=clients
  71     # I would prefer to have a list here
  72     local item=$1
  73     local nodetype=$2
  74     local timeout=$($LCTL get_param  -n timeout)
  75
  76     # MDS, OST item contains the facet
  77     case $nodetype in
  78        MDS|OST )    facet_failover $item
  79                 [ "$SERIAL" ] && wait_recovery_complete $item $((timeout * 4)) || true
  80                 ;;
  81        clients) for c in ${item//,/ }; do
  82                       shutdown_client $c
  83                       boot_node $c
  84                       echo "Reintegrating $c"
  85                       # one client fails; need dk logs from this client only
  86                       zconf_mount $c $MOUNT || NODES="$c $(facet_host mds) $(osts_nodes)" error_exit "zconf_mount failed"
  87                  done
  88                  start_client_loads $item
  89                  ;;
  90                 # script failure:
  91                 # don't use error (), the logs from all nodes not needed
  92        * )      echo "reboot_recover_node: nodetype=$nodetype. Must be one of 'MDS', 'OST', or 'clients'."
  93                 exit 1;;
  94     esac
  95 }
  96
  97 get_item_type () {
  98     local type=$1
  99     local excluded=${2:-""}
 100
 101     local list
 102     case $type in
 103        MDS )    list=$MDTS;;
 104        OST )    list=$OSTS;;
 105        clients) list=$NODES_TO_USE
 106                 ;;
 107                 # script failure:
 108                 # don't use error (), the logs from all nodes not needed
 109        * )      echo "Invalid type=$type. Must be one of 'MDS', 'OST', or 'clients'."
 110                 exit 1;;
 111     esac
 112
 113     [ "$excluded" ] && list=$(exclude_items_from_list $list $excluded)
 114     # empty list
 115     if [ ! "$(echo $list)" ]; then
 116         echo
 117         return
 118     fi
 119
 120     item=$(get_random_entry $list)
 121     if [ "$type" = clients ] ; then
 122         item="$item $(get_random_entry $(exclude_items_from_list $list $item))"
 123         item=$(comma_list $item)
 124     fi
 125     echo $item
 126 }
 127
 128 # failover_pair
 129 #
 130 # for the two nodetypes specified, chooses a random node(s) from each
 131 # class, reboots the nodes sequentially, and then restarts lustre on
 132 # the nodes.
 133 failover_pair() {
 134     local type1=$1
 135     local type2=$2
 136     local title=$3
 137
 138     local client_nodes=""
 139     local item1=
 140     local item2=
 141     local client1=
 142     local client2=
 143
 144     log "
 145 ==== START === $title "
 146
 147     item1=$(get_item_type $type1)
 148     [ "$item1" ] || \
 149         { echo "type1=$type1 item1 is empty" && return 0; }
 150     item2=$(get_item_type $type2 $item1)
 151     [ "$item2" ] || \
 152         { echo "type1=$type1 item1=$item1 type2=$type2 item2=$item2 is empty" && return 0; }
 153
 154     # Check that our client loads are still running. If any have died,
 155     # that means they have died outside of recovery, which is unacceptable.
 156     log "==== Checking the clients loads BEFORE failover -- failure NOT OK"
 157
 158     # FIXME. need print summary on exit
 159     if ! check_client_loads $NODES_TO_USE; then
 160         exit 4
 161     fi
 162
 163     log "Done checking client loads. Failing type1=$type1 item1=$item1 ... "
 164
 165     reboot_recover_node $item1 $type1
 166
 167     # Hendrix test17 description:
 168     # Introduce a failure, wait at
 169     # least 5 minutes (for recovery),
 170     # introduce a 2nd
 171     # failure, and wait another 5
 172     # minutes
 173
 174     # reboot_recover_node waits recovery in according to
 175     # SERIAL value.
 176     # We have a "double failures" if SERIAL is not set,
 177     # do not need a sleep between failures for "double failures"
 178
 179     log "                            Failing type2=$type2 item2=$item2 ... "
 180     reboot_recover_node $item2 $type2
 181
 182     # Client loads are allowed to die while in recovery, so we just
 183     # restart them.
 184     log "==== Checking the clients loads AFTER  failovers -- ERRORS_OK=$ERRORS_OK"
 185     restart_client_loads $NODES_TO_USE $ERRORS_OK || return $?
 186     log "Done checking / re-Starting client loads. PASS"
 187     return 0
 188 }
 189
 190 summary_and_cleanup () {
 191     local rc=$?
 192     trap 0
 193
 194     # Having not empty END_RUN_FILE means the failed loads only
 195     if [ -s $END_RUN_FILE ]; then
 196         echo "Found the END_RUN_FILE file: $END_RUN_FILE"
 197         cat $END_RUN_FILE
 198         local END_RUN_NODE=
 199         read END_RUN_NODE < $END_RUN_FILE
 200
 201         # a client load will end (i.e. fail) if it finds
 202         # the end run file.  that does not mean that that client load
 203         # actually failed though.  the first node in the END_RUN_NODE is
 204         # the one we are really interested in.
 205         if [ -n "$END_RUN_NODE" ]; then
 206             var=$(client_var_name $END_RUN_NODE)_load
 207             echo "Client load failed on node $END_RUN_NODE"
 208             echo
 209             echo "client $END_RUN_NODE load debug output :"
 210             local logfile=${TESTSUITELOG}_run_${!var}.sh-${END_RUN_NODE}.debug
 211             do_node ${END_RUN_NODE} "set -x; [ -e $logfile ] && cat $logfile " || true
 212         fi
 213         rc=1
 214     fi
 215
 216     echo $(date +'%F %H:%M:%S') Terminating clients loads ...
 217     echo "$0" >> $END_RUN_FILE
 218     local result=PASS
 219     [ $rc -eq 0 ] || result=FAIL
 220
 221     log "
 222 Server failover period: $FAILOVER_PERIOD seconds
 223 Exited after:           $ELAPSED seconds
 224 Status: $result: rc=$rc"
 225
 226     # make sure the client loads die
 227     do_nodes $NODES_TO_USE "set -x; test -f $TMP/client-load.pid && \
 228         { kill -s TERM \$(cat $TMP/client-load.pid) || true; }"
 229
 230     # and free up the pdshes that started them, if any are still around
 231     if [ -n "$CLIENT_LOAD_PIDS" ]; then
 232         kill $CLIENT_LOAD_PIDS || true
 233         sleep 5
 234         kill -9 $CLIENT_LOAD_PIDS || true
 235     fi
 236
 237     if [ $rc -ne 0 ]; then
 238         # we are interested in only on failed clients and servers
 239         local failedclients=$(cat $END_RUN_FILE | grep -v $0)
 240         # FIXME: need ostfailover-s nodes also for FLAVOR=OST
 241         local product=$(gather_logs $(comma_list $(osts_nodes) \
 242                                  $mds_HOST $mdsfailover_HOST $failedclients))
 243         echo logs files $product
 244     fi
 245
 246     [ $rc -eq 0 ] && zconf_mount $(hostname) $MOUNT
 247     exit $rc
 248 }
 249
 250 trap summary_and_cleanup EXIT TERM INT
 251
 252 #
 253 # MAIN
 254 #
 255 log "-----============= $0 starting =============-----"
 256
 257 START_TS=$(date +%s)
 258 CURRENT_TS=$START_TS
 259 ELAPSED=0
 260
 261 # Set SERIAL to serialize the failure through a recovery of the first failure.
 262 SERIAL=${SERIAL:-""}
 263 ERRORS_OK="yes"
 264
 265 [ "$SERIAL" ] && ERRORS_OK=""
 266
 267 FAILOVER_PERIOD=${FAILOVER_PERIOD:-$((60*5))} # 5 minutes
 268
 269 # Start client loads.
 270 start_client_loads $NODES_TO_USE
 271 echo clients load pids:
 272 if ! do_nodesv $NODES_TO_USE "cat $TMP/client-load.pid"; then
 273         exit 3
 274 fi
 275
 276 # FIXME: Do we want to have an initial sleep period where the clients
 277 # just run before introducing a failure?
 278 sleep $FAILOVER_PERIOD
 279
 280 #CMD_TEST_NUM=17.1
 281 failover_pair MDS OST     "test 1: failover MDS, then OST =========="
 282 sleep $FAILOVER_PERIOD
 283
 284 #CMD_TEST_NUM=17.2
 285 failover_pair MDS clients "test 2: failover MDS, then 2 clients ===="
 286 sleep $FAILOVER_PERIOD
 287
 288 #CMD_TEST_NUM=17.3
 289 # No test 3 for 1.8.x lustre version
 290
 291 #CMD_TEST_NUM=17.4
 292 if [ $OSTCOUNT -gt 1 ]; then
 293     failover_pair OST OST     "test 4: failover OST, then another OST =="
 294     sleep $FAILOVER_PERIOD
 295 else
 296     skip "$0 : $OSTCOUNT < 2 OSTs, test 4 skipped"
 297 fi
 298
 299 #CMD_TEST_NUM=17.5
 300 failover_pair OST clients "test 5: failover OST, then 2 clients ===="
 301 sleep $FAILOVER_PERIOD
 302
 303 #CMD_TEST_NUM=17.6
 304 failover_pair OST MDS     "test 6: failover OST, then MDS =========="
 305 sleep $FAILOVER_PERIOD
 306
 307 #CMD_TEST_NUM=17.7
 308 failover_pair clients MDS "test 7: failover 2 clients, then MDS ===="
 309 sleep $FAILOVER_PERIOD
 310
 311 #CMD_TEST_NUM=17.8
 312 #failover_pair clients OST "test 8: failover 2 clients, then OST ===="
 313 sleep $FAILOVER_PERIOD
 314
 315 #CMD_TEST_NUM=17.9
 316 if [ $CLIENTCOUNT -ge 5 ]; then
 317     failover_pair clients clients "test 9: failover 2 clients, then 2 different clients =="
 318     sleep $FAILOVER_PERIOD
 319 fi
 320 log "==== Checking the clients loads AFTER  all failovers -- failure NOT OK"
 321 if ! check_client_loads $NODES_TO_USE; then
 322     log "Client load failed after failover. Exiting"
 323     exit 5
 324 fi
 325
 326 CURRENT_TS=$(date +%s)
 327 ELAPSED=$((CURRENT_TS - START_TS))
 328
 329 log "Completed successfully in $ELAPSED seconds"
 330
 331 exit 0