lustre/tests/recovery-double-scale.sh

   1 #!/bin/bash
   2
   3 # All pairwise combinations of node failures.
   4 # Was cmd3-17
   5 #
   6 # Author: Chris Cooper <ccooper@clusterfs.com>
   7 #
   8 # Script fails pair of nodes:
   9 # --  in parallel by default
  10 # --  in series if SERIAL is set
  11
  12 LUSTRE=${LUSTRE:-`dirname $0`/..}
  13 SETUP=${SETUP:-""}
  14 CLEANUP=${CLEANUP:-""}
  15 . $LUSTRE/tests/test-framework.sh
  16
  17 init_test_env $@
  18
  19 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
  20 TESTSUITELOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh)}
  21 DEBUGLOG=$TESTSUITELOG.debug
  22
  23 cleanup_logs
  24
  25 exec 2>$DEBUGLOG
  26 echo "--- env ---" >&2
  27 env >&2
  28 echo "--- env ---" >&2
  29 set -x
  30
  31 [ "$SHARED_DIRECTORY" ] || \
  32     { FAIL_ON_ERROR=true skip_env "$0 Empty SHARED_DIRECTORY" && exit 0; }
  33
  34 [ -n "$CLIENTS" ] || \
  35     { FAIL_ON_ERROR=true skip_env "$0 Need two or more remote clients" && exit 0; }
  36
  37 [ $CLIENTCOUNT -ge 3 ] || \
  38     { FAIL_ON_ERROR=true skip_env "$0 Need two or more remote clients, have $((CLIENTCOUNT - 1))" && exit 0; }
  39
  40 END_RUN_FILE=${END_RUN_FILE:-$SHARED_DIRECTORY/end_run_file}
  41 LOAD_PID_FILE=${LOAD_PID_FILE:-$TMP/client-load.pid}
  42
  43 remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
  44 remote_ost_nodsh && skip "remote OST with nodsh" && exit 0
  45
  46 check_timeout || exit 1
  47
  48 [[ $FAILURE_MODE = SOFT ]] && \
  49     log "WARNING: $0 is not functional with FAILURE_MODE = SOFT, bz22797"
  50
  51 build_test_filter
  52
  53 check_and_setup_lustre
  54 rm -rf $DIR/[df][0-9]*
  55
  56 # the test node needs to be insulated from a lustre failure as much as possible,
  57 # so not even loading the lustre modules is ideal.
  58 # -- umount lustre
  59 # -- remove hostname from clients list
  60 zconf_umount $(hostname) $MOUNT
  61 NODES_TO_USE=${NODES_TO_USE:-$CLIENTS}
  62 NODES_TO_USE=$(exclude_items_from_list $NODES_TO_USE $(hostname))
  63
  64 check_progs_installed $NODES_TO_USE ${CLIENT_LOADS[@]}
  65
  66 MDTS=$(get_facets MDS)
  67 OSTS=$(get_facets OST)
  68
  69 rm -f $END_RUN_FILE
  70
  71 reboot_recover_node () {
  72     # item var contains a pair of clients if nodetype=clients
  73     # I would prefer to have a list here
  74     local item=$1
  75     local nodetype=$2
  76     local timeout=$($LCTL get_param  -n timeout)
  77
  78     # MDS, OST item contains the facet
  79     case $nodetype in
  80        MDS|OST )    facet_failover $item
  81                 [ "$SERIAL" ] && wait_recovery_complete $item || true
  82                 ;;
  83        clients) for c in ${item//,/ }; do
  84                       shutdown_client $c
  85                       boot_node $c
  86                       echo "Reintegrating $c"
  87                       # one client fails; need dk logs from this client only
  88                       zconf_mount $c $MOUNT || NODES="$c $(facet_host mds) $(osts_nodes)" error_exit "zconf_mount failed"
  89                  done
  90                  start_client_loads $item
  91                  ;;
  92                 # script failure:
  93                 # don't use error (), the logs from all nodes not needed
  94        * )      echo "reboot_recover_node: nodetype=$nodetype. Must be one of 'MDS', 'OST', or 'clients'."
  95                 exit 1;;
  96     esac
  97 }
  98
  99 get_item_type () {
 100     local type=$1
 101     local excluded=${2:-""}
 102
 103     local list
 104     case $type in
 105        MDS )    list=$MDTS;;
 106        OST )    list=$OSTS;;
 107        clients) list=$NODES_TO_USE
 108                 ;;
 109                 # script failure:
 110                 # don't use error (), the logs from all nodes not needed
 111        * )      echo "Invalid type=$type. Must be one of 'MDS', 'OST', or 'clients'."
 112                 exit 1;;
 113     esac
 114
 115     [ "$excluded" ] && list=$(exclude_items_from_list $list $excluded)
 116     # empty list
 117     if [ ! "$(echo $list)" ]; then
 118         echo
 119         return
 120     fi
 121
 122     item=$(get_random_entry $list)
 123     if [ "$type" = clients ] ; then
 124         item="$item $(get_random_entry $(exclude_items_from_list $list $item))"
 125         item=$(comma_list $item)
 126     fi
 127     echo $item
 128 }
 129
 130 # failover_pair
 131 #
 132 # for the two nodetypes specified, chooses a random node(s) from each
 133 # class, reboots the nodes sequentially, and then restarts lustre on
 134 # the nodes.
 135 failover_pair() {
 136     local type1=$1
 137     local type2=$2
 138     local title=$3
 139
 140     local client_nodes=""
 141     local item1=
 142     local item2=
 143     local client1=
 144     local client2=
 145
 146     log "
 147 ==== START === $title "
 148
 149     item1=$(get_item_type $type1)
 150     [ "$item1" ] || \
 151         { echo "type1=$type1 item1 is empty" && return 0; }
 152     item2=$(get_item_type $type2 $item1)
 153     [ "$item2" ] || \
 154         { echo "type1=$type1 item1=$item1 type2=$type2 item2=$item2 is empty" && return 0; }
 155
 156     # Check that our client loads are still running. If any have died,
 157     # that means they have died outside of recovery, which is unacceptable.
 158     log "==== Checking the clients loads BEFORE failover -- failure NOT OK"
 159
 160     # FIXME. need print summary on exit
 161     if ! check_client_loads $NODES_TO_USE; then
 162         exit 4
 163     fi
 164
 165     log "Done checking client loads. Failing type1=$type1 item1=$item1 ... "
 166
 167     reboot_recover_node $item1 $type1
 168
 169     # Hendrix test17 description:
 170     # Introduce a failure, wait at
 171     # least 5 minutes (for recovery),
 172     # introduce a 2nd
 173     # failure, and wait another 5
 174     # minutes
 175
 176     # reboot_recover_node waits recovery in according to
 177     # SERIAL value.
 178     # We have a "double failures" if SERIAL is not set,
 179     # do not need a sleep between failures for "double failures"
 180
 181     log "                            Failing type2=$type2 item2=$item2 ... "
 182     reboot_recover_node $item2 $type2
 183
 184     # Client loads are allowed to die while in recovery, so we just
 185     # restart them.
 186     log "==== Checking the clients loads AFTER  failovers -- ERRORS_OK=$ERRORS_OK"
 187     restart_client_loads $NODES_TO_USE $ERRORS_OK || return $?
 188     log "Done checking / re-Starting client loads. PASS"
 189     return 0
 190 }
 191
 192 summary_and_cleanup () {
 193     local rc=$?
 194     trap 0
 195
 196     # Having not empty END_RUN_FILE means the failed loads only
 197     if [ -s $END_RUN_FILE ]; then
 198         echo "Found the END_RUN_FILE file: $END_RUN_FILE"
 199         cat $END_RUN_FILE
 200         local END_RUN_NODE=
 201         read END_RUN_NODE < $END_RUN_FILE
 202
 203         # a client load will end (i.e. fail) if it finds
 204         # the end run file.  that does not mean that that client load
 205         # actually failed though.  the first node in the END_RUN_NODE is
 206         # the one we are really interested in.
 207         if [ -n "$END_RUN_NODE" ]; then
 208             var=$(node_var_name $END_RUN_NODE)_load
 209             echo "Client load failed on node $END_RUN_NODE"
 210             echo
 211             echo "client $END_RUN_NODE load debug output :"
 212             local logfile=${TESTSUITELOG}_run_${!var}.sh-${END_RUN_NODE}.debug
 213             do_node ${END_RUN_NODE} "set -x; [ -e $logfile ] && cat $logfile " || true
 214         fi
 215         rc=1
 216     fi
 217
 218     echo $(date +'%F %H:%M:%S') Terminating clients loads ...
 219     echo "$0" >> $END_RUN_FILE
 220     local result=PASS
 221     [ $rc -eq 0 ] || result=FAIL
 222
 223     log "
 224 Server failover period: $FAILOVER_PERIOD seconds
 225 Exited after:           $ELAPSED seconds
 226 Status: $result: rc=$rc"
 227
 228     # make sure the client loads die
 229     do_nodes $NODES_TO_USE "set -x; test -f $TMP/client-load.pid && \
 230         { kill -s TERM \$(cat $TMP/client-load.pid) || true; }"
 231
 232     # and free up the pdshes that started them, if any are still around
 233     if [ -n "$CLIENT_LOAD_PIDS" ]; then
 234         kill $CLIENT_LOAD_PIDS || true
 235         sleep 5
 236         kill -9 $CLIENT_LOAD_PIDS || true
 237     fi
 238
 239     if [ $rc -ne 0 ]; then
 240         # we are interested in only on failed clients and servers
 241         local failedclients=$(cat $END_RUN_FILE | grep -v $0)
 242         # FIXME: need ostfailover-s nodes also for FLAVOR=OST
 243         local product=$(gather_logs $(comma_list $(osts_nodes) \
 244                                  $mds_HOST $mdsfailover_HOST $failedclients))
 245         echo logs files $product
 246     fi
 247
 248     [ $rc -eq 0 ] && zconf_mount $(hostname) $MOUNT
 249     exit $rc
 250 }
 251
 252 trap summary_and_cleanup EXIT TERM INT
 253
 254 #
 255 # MAIN
 256 #
 257 log "-----============= $0 starting =============-----"
 258
 259 START_TS=$(date +%s)
 260 CURRENT_TS=$START_TS
 261 ELAPSED=0
 262
 263 # Set SERIAL to serialize the failure through a recovery of the first failure.
 264 SERIAL=${SERIAL:-""}
 265 ERRORS_OK="yes"
 266
 267 [ "$SERIAL" ] && ERRORS_OK=""
 268
 269 FAILOVER_PERIOD=${FAILOVER_PERIOD:-$((60*5))} # 5 minutes
 270
 271 # Start client loads.
 272 start_client_loads $NODES_TO_USE
 273 echo clients load pids:
 274 if ! do_nodesv $NODES_TO_USE "cat $TMP/client-load.pid"; then
 275         exit 3
 276 fi
 277
 278 # FIXME: Do we want to have an initial sleep period where the clients
 279 # just run before introducing a failure?
 280 sleep $FAILOVER_PERIOD
 281
 282 #CMD_TEST_NUM=17.1
 283 failover_pair MDS OST     "test 1: failover MDS, then OST =========="
 284 sleep $FAILOVER_PERIOD
 285
 286 #CMD_TEST_NUM=17.2
 287 failover_pair MDS clients "test 2: failover MDS, then 2 clients ===="
 288 sleep $FAILOVER_PERIOD
 289
 290 #CMD_TEST_NUM=17.3
 291 # No test 3 for 1.8.x lustre version
 292
 293 #CMD_TEST_NUM=17.4
 294 if [ $OSTCOUNT -gt 1 ]; then
 295     failover_pair OST OST     "test 4: failover OST, then another OST =="
 296     sleep $FAILOVER_PERIOD
 297 else
 298     skip "$0 : $OSTCOUNT < 2 OSTs, test 4 skipped"
 299 fi
 300
 301 #CMD_TEST_NUM=17.5
 302 failover_pair OST clients "test 5: failover OST, then 2 clients ===="
 303 sleep $FAILOVER_PERIOD
 304
 305 #CMD_TEST_NUM=17.6
 306 failover_pair OST MDS     "test 6: failover OST, then MDS =========="
 307 sleep $FAILOVER_PERIOD
 308
 309 #CMD_TEST_NUM=17.7
 310 failover_pair clients MDS "test 7: failover 2 clients, then MDS ===="
 311 sleep $FAILOVER_PERIOD
 312
 313 #CMD_TEST_NUM=17.8
 314 #failover_pair clients OST "test 8: failover 2 clients, then OST ===="
 315 sleep $FAILOVER_PERIOD
 316
 317 #CMD_TEST_NUM=17.9
 318 if [ $CLIENTCOUNT -ge 5 ]; then
 319     failover_pair clients clients "test 9: failover 2 clients, then 2 different clients =="
 320     sleep $FAILOVER_PERIOD
 321 fi
 322 log "==== Checking the clients loads AFTER  all failovers -- failure NOT OK"
 323 if ! check_client_loads $NODES_TO_USE; then
 324     log "Client load failed after failover. Exiting"
 325     exit 5
 326 fi
 327
 328 CURRENT_TS=$(date +%s)
 329 ELAPSED=$((CURRENT_TS - START_TS))
 330
 331 log "Completed successfully in $ELAPSED seconds"
 332
 333 exit 0