X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Frecovery-mds-scale.sh;h=3b016b9529598dfb93edeb796fbdc0c22f498293;hp=0f8861213fead9ae815d24ace908f657aff19e67;hb=285389479125decaefa3d64a8460a7bfe0a7c889;hpb=f665b5f7691740cbdae6c96c5d959dfb7659f679 diff --git a/lustre/tests/recovery-mds-scale.sh b/lustre/tests/recovery-mds-scale.sh index 0f88612..3b016b9 100644 --- a/lustre/tests/recovery-mds-scale.sh +++ b/lustre/tests/recovery-mds-scale.sh @@ -16,10 +16,7 @@ init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} init_logging -TESTSUITELOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh)} -DEBUGLOG=$TESTSUITELOG.debug - -cleanup_logs +DEBUGLOG=$TESTLOG_PREFIX.suite_debug_log.$(hostname -s).log exec 2>$DEBUGLOG echo "--- env ---" >&2 @@ -41,6 +38,7 @@ check_shared_dir $SHARED_DIRECTORY || END_RUN_FILE=${END_RUN_FILE:-$SHARED_DIRECTORY/end_run_file} LOAD_PID_FILE=${LOAD_PID_FILE:-$TMP/client-load.pid} +VMSTAT_PID_FILE=${VMSTAT_PID_FILE:-$TMP/vmstat.pid} remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0 remote_ost_nodsh && skip "remote OST with nodsh" && exit 0 @@ -84,8 +82,6 @@ fi rm -f $END_RUN_FILE -vmstatLOG=${TESTSUITELOG}_$(basename $0 .sh).vmstat - server_numfailovers () { local facet=$1 local var=${facet}_numfailovers @@ -105,7 +101,6 @@ servers_numfailovers () { } summary_and_cleanup () { - local rc=$? local var trap 0 @@ -117,17 +112,16 @@ summary_and_cleanup () { local END_RUN_NODE= read END_RUN_NODE < $END_RUN_FILE - # a client load will end (i.e. fail) if it finds - # the end run file. that does not mean that that client load - # actually failed though. the first node in the END_RUN_NODE is - # the one we are really interested in. + # A client load will stop if it found the END_RUN_FILE file. + # That does not mean the client load actually failed though. + # The first node in END_RUN_FILE is the one we are interested in. if [ -n "$END_RUN_NODE" ]; then var=$(node_var_name $END_RUN_NODE)_load - echo "Client load failed on node $END_RUN_NODE" + echo "Client load failed on node $END_RUN_NODE" echo - echo "client $END_RUN_NODE load stdout and debug files : - ${TESTSUITELOG}_run_${!var}.sh-${END_RUN_NODE} - ${TESTSUITELOG}_run_${!var}.sh-${END_RUN_NODE}.debug" + echo "Client $END_RUN_NODE load stdout and debug files: + $TESTLOG_PREFIX.run_${!var}_stdout.$END_RUN_NODE.log + $TESTLOG_PREFIX.run_${!var}_debug.$END_RUN_NODE.log" fi rc=1 fi @@ -146,14 +140,15 @@ Status: $result: rc=$rc" # stop the vmstats on the OSTs if [ "$VMSTAT" ]; then - do_nodes $(comma_list $(osts_nodes)) "test -f /tmp/vmstat.pid && \ - { kill -s TERM \$(cat /tmp/vmstat.pid); rm -f /tmp/vmstat.pid; \ - gzip -f9 $vmstatLOG-\$(hostname); }" + do_nodes $(comma_list $(osts_nodes)) "test -f $VMSTAT_PID_FILE && + { kill -s TERM \\\$(cat $VMSTAT_PID_FILE); + rm -f $VMSTAT_PID_FILE || true; }" fi # make sure the client loads die - do_nodes $NODES_TO_USE "set -x; test -f $LOAD_PID_FILE && \ - { kill -s TERM \$(cat $LOAD_PID_FILE) || true; }" + do_nodes $NODES_TO_USE "set -x; test -f $LOAD_PID_FILE && + { kill -s TERM \\\$(cat $LOAD_PID_FILE); + rm -f $LOAD_PID_FILE || true; }" # and free up the pdshes that started them, if any are still around if [ -n "$CLIENT_LOAD_PIDS" ]; then @@ -161,13 +156,14 @@ Status: $result: rc=$rc" sleep 5 kill -9 $CLIENT_LOAD_PIDS || true fi + if [ $rc -ne 0 ]; then # we are interested in only on failed clients and servers local failedclients=$(cat $END_RUN_FILE | grep -v $0) # FIXME: need ostfailover-s nodes also for FLAVOR=OST local product=$(gather_logs $(comma_list $(osts_nodes) \ $(mdts_nodes) $mdsfailover_HOST $failedclients) 1) - echo logs files $product + echo $product fi [ $rc -eq 0 ] && zconf_mount $(hostname) $MOUNT @@ -186,7 +182,9 @@ ELAPSED=0 # vmstat the osts if [ "$VMSTAT" ]; then - do_nodes $(comma_list $(osts_nodes)) "vmstat 1 > $vmstatLOG-\$(hostname) 2>/dev/null /tmp/vmstat.pid" + do_nodes $(comma_list $(osts_nodes)) \ + "vmstat 1 > $TESTLOG_PREFIX.vmstat.\\\$(hostname -s).log \ + 2>/dev/null $VMSTAT_PID_FILE" fi # Start client loads. @@ -194,7 +192,7 @@ start_client_loads $NODES_TO_USE echo clients load pids: if ! do_nodesv $NODES_TO_USE "cat $LOAD_PID_FILE"; then - exit 3 + exit 3 fi MINSLEEP=${MINSLEEP:-120}