lustre/tests/recovery-double-scale.sh

   1 #!/bin/bash
   2
   3 # All pairwise combinations of node failures.
   4 # Was cmd3-17
   5 #
   6 # Author: Chris Cooper <ccooper@clusterfs.com>
   7 #
   8 # Script fails pair of nodes:
   9 # --  in parallel by default
  10 # --  in series if SERIAL is set
  11
  12 LUSTRE=${LUSTRE:-`dirname $0`/..}
  13 SETUP=${SETUP:-""}
  14 CLEANUP=${CLEANUP:-""}
  15 . $LUSTRE/tests/test-framework.sh
  16
  17 init_test_env $@
  18
  19 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
  20 init_logging
  21 TESTSUITELOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh)}
  22 DEBUGLOG=$TESTSUITELOG.debug
  23
  24 cleanup_logs
  25
  26 exec 2>$DEBUGLOG
  27 echo "--- env ---" >&2
  28 env >&2
  29 echo "--- env ---" >&2
  30 set -x
  31
  32 [ "$SHARED_DIRECTORY" ] || \
  33     { FAIL_ON_ERROR=true skip_env "$0 Empty SHARED_DIRECTORY" && exit 0; }
  34
  35 check_shared_dir $SHARED_DIRECTORY ||
  36     error "$SHARED_DIRECTORY isn't a shared directory"
  37
  38 [ -n "$CLIENTS" ] || \
  39     { FAIL_ON_ERROR=true skip_env "$0 Need two or more remote clients" && exit 0; }
  40
  41 [ $CLIENTCOUNT -ge 3 ] || \
  42     { FAIL_ON_ERROR=true skip_env "$0 Need two or more remote clients, have $((CLIENTCOUNT - 1))" && exit 0; }
  43
  44 END_RUN_FILE=${END_RUN_FILE:-$SHARED_DIRECTORY/end_run_file}
  45 LOAD_PID_FILE=${LOAD_PID_FILE:-$TMP/client-load.pid}
  46
  47 remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
  48 remote_ost_nodsh && skip "remote OST with nodsh" && exit 0
  49
  50 check_timeout || exit 1
  51
  52 [[ $FAILURE_MODE = SOFT ]] && \
  53     log "WARNING: $0 is not functional with FAILURE_MODE = SOFT, bz22797"
  54
  55 build_test_filter
  56
  57 check_and_setup_lustre
  58 rm -rf $DIR/[df][0-9]*
  59
  60 # the test node needs to be insulated from a lustre failure as much as possible,
  61 # so not even loading the lustre modules is ideal.
  62 # -- umount lustre
  63 # -- remove hostname from clients list
  64 zconf_umount $(hostname) $MOUNT
  65 NODES_TO_USE=${NODES_TO_USE:-$CLIENTS}
  66 NODES_TO_USE=$(exclude_items_from_list $NODES_TO_USE $(hostname))
  67
  68 check_progs_installed $NODES_TO_USE ${CLIENT_LOADS[@]}
  69
  70 MDTS=$(get_facets MDS)
  71 OSTS=$(get_facets OST)
  72
  73 rm -f $END_RUN_FILE
  74
  75 reboot_recover_node () {
  76     # item var contains a pair of clients if nodetype=clients
  77     # I would prefer to have a list here
  78     local item=$1
  79     local nodetype=$2
  80     local timeout=$($LCTL get_param  -n timeout)
  81
  82     # MDS, OST item contains the facet
  83     case $nodetype in
  84        MDS|OST )    facet_failover $item
  85                 [ "$SERIAL" ] && wait_recovery_complete $item || true
  86                 ;;
  87        clients) for c in ${item//,/ }; do
  88                       # make sure the client loads die
  89                       do_nodes $c "set -x; test -f $TMP/client-load.pid && \
  90                              { kill -s TERM \$(cat $TMP/client-load.pid) || true; }"
  91                       shutdown_client $c
  92                       boot_node $c
  93                       echo "Reintegrating $c"
  94                       # one client fails; need dk logs from this client only
  95                       zconf_mount $c $MOUNT || NODES="$c $(mdts_nodes) $(osts_nodes)" error_exit "zconf_mount failed"
  96                  done
  97                  start_client_loads $item
  98                  ;;
  99                 # script failure:
 100                 # don't use error (), the logs from all nodes not needed
 101        * )      echo "reboot_recover_node: nodetype=$nodetype. Must be one of 'MDS', 'OST', or 'clients'."
 102                 exit 1;;
 103     esac
 104 }
 105
 106 get_item_type () {
 107     local type=$1
 108     local excluded=${2:-""}
 109
 110     local list
 111     case $type in
 112        MDS )    list=$MDTS;;
 113        OST )    list=$OSTS;;
 114        clients) list=$NODES_TO_USE
 115                 ;;
 116                 # script failure:
 117                 # don't use error (), the logs from all nodes not needed
 118        * )      echo "Invalid type=$type. Must be one of 'MDS', 'OST', or 'clients'."
 119                 exit 1;;
 120     esac
 121
 122     [ "$excluded" ] && list=$(exclude_items_from_list $list $excluded)
 123     # empty list
 124     if [ ! "$(echo $list)" ]; then
 125         echo
 126         return
 127     fi
 128
 129     item=$(get_random_entry $list)
 130     if [ "$type" = clients ] ; then
 131         item="$item $(get_random_entry $(exclude_items_from_list $list $item))"
 132         item=$(comma_list $item)
 133     fi
 134     echo $item
 135 }
 136
 137 # failover_pair
 138 #
 139 # for the two nodetypes specified, chooses a random node(s) from each
 140 # class, reboots the nodes sequentially, and then restarts lustre on
 141 # the nodes.
 142 failover_pair() {
 143     local type1=$1
 144     local type2=$2
 145     local title=$3
 146
 147     local client_nodes=""
 148     local item1=
 149     local item2=
 150     local client1=
 151     local client2=
 152
 153     log "
 154 ==== START === $title "
 155
 156     item1=$(get_item_type $type1)
 157     [ "$item1" ] || \
 158         { echo "type1=$type1 item1 is empty" && return 0; }
 159     item2=$(get_item_type $type2 $item1)
 160     [ "$item2" ] || \
 161         { echo "type1=$type1 item1=$item1 type2=$type2 item2=$item2 is empty" && return 0; }
 162
 163     # Check that our client loads are still running. If any have died,
 164     # that means they have died outside of recovery, which is unacceptable.
 165     log "==== Checking the clients loads BEFORE failover -- failure NOT OK"
 166
 167     # FIXME. need print summary on exit
 168     if ! check_client_loads $NODES_TO_USE; then
 169         exit 4
 170     fi
 171
 172     log "Done checking client loads. Failing type1=$type1 item1=$item1 ... "
 173
 174     reboot_recover_node $item1 $type1
 175
 176     # Hendrix test17 description:
 177     # Introduce a failure, wait at
 178     # least 5 minutes (for recovery),
 179     # introduce a 2nd
 180     # failure, and wait another 5
 181     # minutes
 182
 183     # reboot_recover_node waits recovery in according to
 184     # SERIAL value.
 185     # We have a "double failures" if SERIAL is not set,
 186     # do not need a sleep between failures for "double failures"
 187
 188     log "                            Failing type2=$type2 item2=$item2 ... "
 189     reboot_recover_node $item2 $type2
 190
 191     # Client loads are allowed to die while in recovery, so we just
 192     # restart them.
 193     log "==== Checking the clients loads AFTER  failovers -- ERRORS_OK=$ERRORS_OK"
 194     restart_client_loads $NODES_TO_USE $ERRORS_OK || return $?
 195     log "Done checking / re-Starting client loads. PASS"
 196     return 0
 197 }
 198
 199 summary_and_cleanup () {
 200     local rc=$?
 201     trap 0
 202
 203     # Having not empty END_RUN_FILE means the failed loads only
 204     if [ -s $END_RUN_FILE ]; then
 205         echo "Found the END_RUN_FILE file: $END_RUN_FILE"
 206         cat $END_RUN_FILE
 207         local END_RUN_NODE=
 208         read END_RUN_NODE < $END_RUN_FILE
 209
 210         # a client load will end (i.e. fail) if it finds
 211         # the end run file.  that does not mean that that client load
 212         # actually failed though.  the first node in the END_RUN_NODE is
 213         # the one we are really interested in.
 214         if [ -n "$END_RUN_NODE" ]; then
 215             var=$(node_var_name $END_RUN_NODE)_load
 216             echo "Client load failed on node $END_RUN_NODE"
 217             echo
 218             echo "client $END_RUN_NODE load debug output :"
 219             local logfile=${TESTSUITELOG}_run_${!var}.sh-${END_RUN_NODE}.debug
 220             do_node ${END_RUN_NODE} "set -x; [ -e $logfile ] && cat $logfile " || true
 221         fi
 222         rc=1
 223     fi
 224
 225     echo $(date +'%F %H:%M:%S') Terminating clients loads ...
 226     echo "$0" >> $END_RUN_FILE
 227     local result=PASS
 228     [ $rc -eq 0 ] || result=FAIL
 229
 230     log "
 231 Server failover period: $FAILOVER_PERIOD seconds
 232 Exited after:           $ELAPSED seconds
 233 Status: $result: rc=$rc"
 234
 235     # make sure the client loads die
 236     do_nodes $NODES_TO_USE "set -x; test -f $TMP/client-load.pid && \
 237         { kill -s TERM \$(cat $TMP/client-load.pid) || true; }"
 238
 239     # and free up the pdshes that started them, if any are still around
 240     if [ -n "$CLIENT_LOAD_PIDS" ]; then
 241         kill $CLIENT_LOAD_PIDS || true
 242         sleep 5
 243         kill -9 $CLIENT_LOAD_PIDS || true
 244     fi
 245
 246     if [ $rc -ne 0 ]; then
 247         # we are interested in only on failed clients and servers
 248         local failedclients=$(cat $END_RUN_FILE | grep -v $0)
 249         # FIXME: need ostfailover-s nodes also for FLAVOR=OST
 250         local product=$(gather_logs $(comma_list $(osts_nodes) \
 251                         $(mdts_nodes) $mdsfailover_HOST $failedclients) 1)
 252         echo logs files $product
 253     fi
 254
 255     [ $rc -eq 0 ] && zconf_mount $(hostname) $MOUNT
 256     exit $rc
 257 }
 258
 259 trap summary_and_cleanup EXIT TERM INT
 260
 261 #
 262 # MAIN
 263 #
 264 log "-----============= $0 starting =============-----"
 265
 266 START_TS=$(date +%s)
 267 CURRENT_TS=$START_TS
 268 ELAPSED=0
 269
 270 # Set SERIAL to serialize the failure through a recovery of the first failure.
 271 SERIAL=${SERIAL:-""}
 272 ERRORS_OK="yes"
 273
 274 [ "$SERIAL" ] && ERRORS_OK=""
 275
 276 FAILOVER_PERIOD=${FAILOVER_PERIOD:-$((60*5))} # 5 minutes
 277
 278 # Start client loads.
 279 start_client_loads $NODES_TO_USE
 280 echo clients load pids:
 281 if ! do_nodesv $NODES_TO_USE "cat $TMP/client-load.pid"; then
 282         exit 3
 283 fi
 284
 285 # FIXME: Do we want to have an initial sleep period where the clients
 286 # just run before introducing a failure?
 287 sleep $FAILOVER_PERIOD
 288
 289 #CMD_TEST_NUM=17.1
 290 failover_pair MDS OST     "test 1: failover MDS, then OST =========="
 291 sleep $FAILOVER_PERIOD
 292
 293 #CMD_TEST_NUM=17.2
 294 failover_pair MDS clients "test 2: failover MDS, then 2 clients ===="
 295 sleep $FAILOVER_PERIOD
 296
 297 #CMD_TEST_NUM=17.3
 298 if [ $MDSCOUNT -gt 1 ]; then
 299     failover_pair MDS MDS     "test 3: failover MDS, then another MDS =="
 300     sleep $FAILOVER_PERIOD
 301 else
 302     skip "$0 : $MDSCOUNT < 2 MDTs, test 3 skipped"
 303 fi
 304
 305 #CMD_TEST_NUM=17.4
 306 if [ $OSTCOUNT -gt 1 ]; then
 307     failover_pair OST OST     "test 4: failover OST, then another OST =="
 308     sleep $FAILOVER_PERIOD
 309 else
 310     skip "$0 : $OSTCOUNT < 2 OSTs, test 4 skipped"
 311 fi
 312
 313 #CMD_TEST_NUM=17.5
 314 failover_pair OST clients "test 5: failover OST, then 2 clients ===="
 315 sleep $FAILOVER_PERIOD
 316
 317 #CMD_TEST_NUM=17.6
 318 failover_pair OST MDS     "test 6: failover OST, then MDS =========="
 319 sleep $FAILOVER_PERIOD
 320
 321 #CMD_TEST_NUM=17.7
 322 failover_pair clients MDS "test 7: failover 2 clients, then MDS ===="
 323 sleep $FAILOVER_PERIOD
 324
 325 #CMD_TEST_NUM=17.8
 326 #failover_pair clients OST "test 8: failover 2 clients, then OST ===="
 327 sleep $FAILOVER_PERIOD
 328
 329 #CMD_TEST_NUM=17.9
 330 if [ $CLIENTCOUNT -ge 5 ]; then
 331     failover_pair clients clients "test 9: failover 2 clients, then 2 different clients =="
 332     sleep $FAILOVER_PERIOD
 333 fi
 334 log "==== Checking the clients loads AFTER  all failovers -- failure NOT OK"
 335 if ! check_client_loads $NODES_TO_USE; then
 336     log "Client load failed after failover. Exiting"
 337     exit 5
 338 fi
 339
 340 CURRENT_TS=$(date +%s)
 341 ELAPSED=$((CURRENT_TS - START_TS))
 342
 343 log "Completed successfully in $ELAPSED seconds"
 344
 345 exit 0