LU-123 Port yaml and auster to b1_8

author root <root@murdoch.sodor>

Tue, 21 Dec 2010 14:00:06 +0000 (14:00 +0000)

committer Johann Lombardi <johann@whamcloud.com>

Wed, 20 Apr 2011 13:36:09 +0000 (06:36 -0700)
author root <root@murdoch.sodor>
Tue, 21 Dec 2010 14:00:06 +0000 (14:00 +0000)
committer Johann Lombardi <johann@whamcloud.com>
Wed, 20 Apr 2011 13:36:09 +0000 (06:36 -0700)
diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am

index 2262fa4..98d7398 100644 (file)
--- a/lustre/tests/Makefile.am
+++ b/lustre/tests/Makefile.am
@@ -23,8 +23,9 @@ noinst_SCRIPTS += recovery-mds-scale.sh run_dd.sh run_tar.sh run_iozone.sh
  noinst_SCRIPTS += run_dbench.sh run_IOR.sh recovery-double-scale.sh
  noinst_SCRIPTS += recovery-random-scale.sh parallel-scale.sh metadata-updates.sh
  noinst_SCRIPTS += ost-pools.sh rpc.sh lnet-selftest.sh obdfilter-survey.sh mmp.sh
-noinst_SCRIPTS += sgpdd-survey.sh
+noinst_SCRIPTS += sgpdd-survey.sh auster.sh yaml.sh maloo_upload.sh
  nobase_noinst_SCRIPTS = cfg/local.sh
+nobase_noinst_SCRIPTS += test-groups/regression test-groups/regression-mpi
  nobase_noinst_SCRIPTS += acl/make-tree acl/run cfg/ncli.sh
  nobase_noinst_SCRIPTS += racer/dir_create.sh racer/file_create.sh racer/file_list.sh
  nobase_noinst_SCRIPTS += racer/file_rm.sh racer/racer.sh racer/file_concat.sh
@@ -40,7 +41,7 @@ if MPITESTS
  SUBDIRS = mpi
  endif
  noinst_PROGRAMS = openunlink truncate directio writeme mlink utime it_test
-noinst_PROGRAMS += tchmod fsx test_brw 
+noinst_PROGRAMS += tchmod fsx test_brw
  noinst_PROGRAMS += createmany chownmany statmany multifstat createtest
  noinst_PROGRAMS += opendirunlink opendevunlink unlinkmany checkstat
  noinst_PROGRAMS += statone runas openfile rmdirmany
@@ -48,7 +49,7 @@ noinst_PROGRAMS += small_write multiop ll_sparseness_verify
  noinst_PROGRAMS += ll_sparseness_write mrename ll_dirstripe_verify mkdirmany
  noinst_PROGRAMS += openfilleddirunlink rename_many memhog iopentest1 iopentest2
  noinst_PROGRAMS += mmap_sanity flock_test writemany reads flocks_test
-# noinst_PROGRAMS += copy_attr mkdirdeep 
+# noinst_PROGRAMS += copy_attr mkdirdeep
  bin_PROGRAMS = mcreate munlink
  testdir = $(libdir)/lustre/tests
  test_SCRIPTS = $(noinst_SCRIPTS) $(noinst_PROGRAMS)
diff --git a/lustre/tests/acceptance-small.sh b/lustre/tests/acceptance-small.sh

index 524cb9b..83a9532 100755 (executable)
--- a/lustre/tests/acceptance-small.sh
+++ b/lustre/tests/acceptance-small.sh
@@ -4,26 +4,37 @@
  #set -vx
  set -e
  
-export TESTSUITE_LIST="RUNTESTS SANITY DBENCH BONNIE IOZONE FSX SANITYN LFSCK LIBLUSTRE RACER REPLAY_SINGLE CONF_SANITY RECOVERY_SMALL REPLAY_OST_SINGLE REPLAY_DUAL REPLAY_VBR INSANITY SANITY_QUOTA PERFORMANCE_SANITY LARGE_SCALE RECOVERY_MDS_SCALE RECOVERY_DOUBLE_SCALE RECOVERY_RANDOM_SCALE PARALLEL_SCALE METADATA_UPDATES OST_POOLS SANITY_BENCHMARK LNET_SELFTEST MMP OBDFILTER_SURVEY SGPDD_SURVEY"
+export MSKIPPED=0
+export OSKIPPED=0
+
+# This is the default set of tests to run.
+DEFAULT_SUITES="runtests sanity sanity-benchmark sanityn lfsck liblustre
+                racer replay-single conf-sanity recovery-small
+                replay-ost-single replay-dual replay-vbr insanity sanity-quota
+                performance-sanity large-scale recovery-mds-scale
+                recovery-double-scale recovery-random-scale parallel-scale
+                lustre_rsync-test metadata-updates ost-pools lnet-selftest
+                mmp obdfilter-survey sgpdd-survey"
+
+if [[ -n $@ ]]; then
+    ACC_SM_ONLY="${ACC_SM_ONLY} $@"
+fi
  
  if [ "$ACC_SM_ONLY" ]; then
-    for O in $TESTSUITE_LIST; do
-       export ${O}="no"
+    for O in $DEFAULT_SUITES; do
+        O=$(echo $O | tr "-" "_" | tr "[:lower:]" "[:upper:]")
+        export ${O}="no"
      done
      for O in $ACC_SM_ONLY; do
-       O=`echo ${O%.sh} | tr "-" "_"`
-       O=`echo $O | tr "[:lower:]" "[:upper:]"`
-       export ${O}="yes"
+        O=`echo ${O%.sh} | tr "-" "_"`
+        O=`echo $O | tr "[:lower:]" "[:upper:]"`
+        export ${O}="yes"
      done
  fi
  
-LIBLUSTRETESTS=${LIBLUSTRETESTS:-../liblustre/tests}
-
-RANTEST=""
-
  LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
  . $LUSTRE/tests/test-framework.sh
-init_test_env $@
+init_test_env
  
  SETUP=${SETUP:-setupall}
  FORMAT=${FORMAT:-formatall}
@@ -65,57 +76,21 @@ find_in_path() {
  title() {
      # update titlebar if stdin is attached to an xterm
      if ${UPDATE_TITLEBAR:-false}; then
-       if tty -s; then
-           case $TERM in 
-               xterm*)
-                   echo -ne "\033]2; acceptance-small: $* \007" >&0
-                   ;;
-           esac
-       fi
-    fi 
+        if tty -s; then
+            case $TERM in
+            xterm*)
+                echo -ne "\033]2; acceptance-small: $* \007" >&0
+                ;;
+            esac
+        fi
+    fi
      log "-----============= acceptance-small: "$*" ============----- `date`"
-    RANTEST=${RANTEST}$*", "
-}
-
-skip_remost() {
-       remote_ost_nodsh && log "SKIP: $1: remote OST with nodsh" && return 0
-       return 1
-}
-
-skip_remmds() {
-       remote_mds_nodsh && log "SKIP: $1: remote MDS with nodsh" && return 0
-       return 1
-}
-
-# cleanup the logs of all suites
-cleanup_log () {
-    local suite
-    local o=$(echo $O | tr "[:upper:]" "[:lower:]")
-    o=${o//_/-}
-    
-    rm -f ${TMP}/${o}.log
  }
  
-cleanup_logs () {
-    local suite
-    for suite in ${ACC_SM_ONLY:-$TESTSUITE_LIST}; do
-        cleanup_log $suite
-    done
-}
-
-export NAME MOUNT START CLEAN
-. $LUSTRE/tests/cfg/$NAME.sh
-
-assert_env mds_HOST MDS_MKFS_OPTS MDSDEV
-assert_env ost_HOST OST_MKFS_OPTS OSTCOUNT
-assert_env FSNAME MOUNT MOUNT2
-
-setup_if_needed
-
-for s in ${ACC_SM_ONLY:-$TESTSUITE_LIST}; do
-    suite_name=$(echo ${s%.sh} | tr "[:upper:]_" "[:lower:]-" )
-    suite=$(echo ${suite_name} | tr "[:lower:]-" "[:upper:]_")
-    suite_only=ONLY # Change to ${suite}_ONLY after fixing YALA
+run_suite() {
+    local suite_name=$(echo ${1%.sh} | tr "[:upper:]_" "[:lower:]-" )
+    local suite=$(echo ${suite_name} | tr "[:lower:]-" "[:upper:]_")
+    local suite_only=ONLY # Change to ${suite}_ONLY after fixing YALA
  
      if is_sanity_benchmark ${suite_name}; then
          suite_only=suite_name
@@ -130,34 +105,55 @@ for s in ${ACC_SM_ONLY:-$TESTSUITE_LIST}; do
          suite_script=${suite_name}.sh
      else
          echo "Can't find test script for $suite_name"
-        exit 1
+        return 1
      fi
  
      echo "$suite_script located."
-
-    if [[ ${!suite} = no ]]; then
+    if [[ ${!suite} != no ]]; then
+        local rc
+        local status
+        local duration
+        local start_ts=$(date +%s)
+        rm -rf $TF_FAIL
+        title $suite_name
+        log_test $suite_name
+        bash $suite_script ${!suite_only}
+        rc=$?
+        duration=$(($(date +%s) - $start_ts))
+        if [ -f $TF_FAIL -o $rc -ne 0 ]; then
+            status="FAIL"
+        else
+            status="PASS"
+        fi
+        echo "Script: $status"
+        log_test_status $duration $status
+
+        $CLEANUP
+        [ x$suite = xSGPDD_SURVEY ] || $SETUP
+
+        eval ${suite}="done"
+    else
          echo "Skipping $suite_name"
-        continue
      fi
+}
  
-    start_ts=$(date +%s)
-    title $suite_name
-    bash $suite_script ${!suite_only}
-    rc=$?
-    duration=$(($(date +%s) - $start_ts))
-    if [ $rc -ne 0 ]; then
-        RC=$rc
-        status="FAIL"
-    else
-        status="PASS"
-    fi
-    echo "Script: $status"
+run_suites() {
+    for suite in $*; do
+        run_suite $suite
+    done
+}
+
+export NAME MOUNT START CLEAN
+. $LUSTRE/tests/cfg/$NAME.sh
  
+assert_env mds_HOST MDS_MKFS_OPTS
+assert_env ost_HOST OST_MKFS_OPTS OSTCOUNT
+assert_env FSNAME MOUNT MOUNT2
+
+setup_if_needed
+init_logging
  
-    $CLEANUP
-    [ x$suite = xSGPDD_SURVEY ] || $SETUP
-    eval ${suite}="done"
-done
+run_suites ${ACC_SM_ONLY:-$DEFAULT_SUITES}
  
  RC=$?
  title FINISHED
diff --git a/lustre/tests/auster.sh b/lustre/tests/auster.sh

new file mode 100755 (executable)

index 0000000..17c60e1
--- /dev/null
+++ b/lustre/tests/auster.sh
@@ -0,0 +1,320 @@
+#!/bin/bash
+#
+#
+# auster - drive lustre tests
+# TODO
+#  1. --time-limt <seconds>  add per test time limit, kill test if it runs to long
+#  2. Read list of tests to run from a file. same syntax as cli, but one test per line
+#  3. Run test on remote node
+#  4. Use long opts for auster options
+
+set -e
+
+export TF_FAIL=/tmp/tf.fail
+
+usage() {
+    less -F <<EOF
+Usage ${0##*/} [options]  suite [suite optoins] [suite [suite options]]
+Run Lustre regression tests suites.
+      -c CONFIG Test environment config file
+      -d LOGDIR Top level directory for logs
+      -D FULLLOGDIR Full directory for logs
+      -f STR    Config name (cfg/<name>.sh)
+      -g GROUP  Test group file (Overrides tests listed on command line)
+      -i N      Repeat tests N times (default 1). A new directory
+                will be created under LOGDIR for each iteration.
+      -k        Don't stop when subtests fail
+      -R        Remount lustre between tests
+      -r        Reformat (during initial configuration if needed)
+      -s        SLOW=yes
+      -v        Verbose mode
+      -l        Send logs to the Maloo database after run
+                  (can be done later by running maloo_upload.sh)
+      -h        This help.
+
+Suite options
+These are suite specific options that can be specified after each suite on
+the command line.
+   suite-name  [options]
+      --only LIST         Run only specific list of subtests
+      --except LIST       Skip list of subtests
+      --start-at SUBTEST  Start testing from subtest
+      --stop-at SUBTEST   Stop testing at subtest
+      --time-limit LIMIT  Don't allow this suite to run longer
+                          than LIMT seconds. [UNIMPLEMENTED]
+
+Example usage:
+Run all of sanity and all of replay-single except for 70b with SLOW=y using
+the default "local" configuration.
+
+  auster -s sanity replay-single --except 70b
+
+Run all tests in the regression group 5 times using large config.
+
+  auster -f large -g test-groups/regression  -r 5
+
+EOF
+    exit
+}
+
+dry_run=false
+do_reset=false
+verbose=false
+repeat_count=1
+upload_logs=false
+reformat=false
+test_logs_dir=/tmp/test_logs/$(date +%Y-%m-%d)/$(date +%H%M%S)
+export SLOW=no
+export ${NAME:=local}
+while getopts "c:d:D:nkf:g:i:rRslhv" opt
+do
+    case "$opt" in
+       c) CONFIG=$OPTARG;;
+       d) test_logs_dir=$OPTARG/$(date +%Y-%m-%d)/$(date +%H%M%S);;
+       D) test_logs_dir=$OPTARG;;
+       g) test_group_file=$OPTARG;;
+       k) export FAIL_ON_ERROR=false;;
+        n) dry_run=:;;
+        v) verbose=:;;
+       i) repeat_count=$OPTARG;;
+       f) NAME=$OPTARG;;
+       R) do_reset=:;;
+       r) reformat=:;;
+       s) SLOW=yes;;
+       l) upload_logs=true;;
+        h|\?) usage;;
+    esac
+done
+
+# If a test_group_file is specified, then ignore rest of command line
+if [[ $test_group_file ]]; then
+    export TEST_GROUP=$(basename $test_group_file)
+    set $(sed 's/#.*$//' $test_group_file)
+else
+    shift $((OPTIND -1))
+fi
+
+reset_lustre() {
+    if $do_reset; then
+       stopall
+       setupall
+    fi
+}
+
+STARTTIME=`date +%s`
+
+LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
+. $LUSTRE/tests/test-framework.sh
+init_test_env
+
+print_summary () {
+    trap 0
+    local form="%-13s %-17s %s\n"
+    printf "$form" "status" "script" "skipped tests E(xcluded) S(low)"
+    echo "------------------------------------------------------------------------------------"
+    echo "Done!"
+}
+
+
+setup_if_needed() {
+    nfs_client_mode && return
+    auster_cleanup=false
+
+    local MOUNTED=$(mounted_lustre_filesystems)
+    if $(echo $MOUNTED | grep -w -q $MOUNT); then
+        check_config_clients $MOUNT
+       # init_facets_vars
+       # init_param_vars
+        return
+    fi
+
+    echo "Lustre is not mounted, trying to do setup ... "
+    $reformat && formatall
+    setupall
+
+    MOUNTED=$(mounted_lustre_filesystems)
+    if ! $(echo $MOUNTED | grep -w -q $MOUNT); then
+        echo "Lustre is not mounted after setup! "
+        exit 1
+    fi
+    auster_cleanup=true
+}
+
+cleanup_if_needed() {
+    if $auster_cleanup; then
+       cleanupall
+    fi
+}
+
+find_script_in_path() {
+    target=$1
+    path=$2
+    for dir in $(tr : " " <<< $path); do
+      if [ -e $dir/$target ]; then
+         echo $dir/$target
+          return 0
+      fi
+      if [ -e $dir/$target.sh ]; then
+         echo $dir/$target.sh
+          return 0
+      fi
+    done
+    return 1
+}
+
+title() {
+    log "-----============= acceptance-small: "$*" ============----- `date`"
+}
+
+doit() {
+    if $dry_run; then
+        printf "Would have run: %s\n" "$*"
+        return 0
+    fi
+    if $verbose; then
+        printf "Running: %s\n" "$*"
+    fi
+    "$@"
+}
+
+
+run_suite() {
+    suite_name=$1
+    suite_script=$2
+    title $suite_name
+    log_test $suite_name
+
+    rm -f $TF_FAIL
+    local start_ts=$(date +%s)
+    doit bash $suite_script
+    rc=$?
+    duration=$(($(date +%s) - $start_ts))
+    if [ -f $TF_FAIL -o $rc -ne 0 ]; then
+        status="FAIL"
+    else
+        status="PASS"
+    fi
+    log_test_status $duration $status
+
+    reset_lustre
+}
+
+run_suite_logged() {
+    local suite_name=${1%.sh}
+    local suite=$(echo ${suite_name} | tr "[:lower:]-" "[:upper:]_")
+
+    suite_script=$(find_script_in_path $suite_name $PATH:$LUSTRE/tests)
+
+    if [[ -z $suite_script ]]; then
+        echo "Can't find test script for $suite_name"
+        return 1
+    fi
+
+    echo "run_suite $suite_name $suite_script"
+    local log_name=${suite_name}.suite_log.$(hostname).log
+    if $verbose; then
+       run_suite $suite_name $suite_script 2>&1 |tee  $LOGDIR/$log_name
+    else
+       run_suite $suite_name $suite_script > $LOGDIR/$log_name 2>&1
+    fi
+
+}
+
+#
+# Add this to test-framework somewhere.
+reset_logging() {
+    export LOGDIR=$1
+    unset YAML_LOG
+    init_logging
+}
+
+split_commas() {
+    echo "${*//,/ }"
+}
+
+run_suites() {
+    local n=0
+    local argv=("$@")
+    while ((n < repeat_count)); do
+       local RC=0
+       local logdir=${test_logs_dir}
+       ((repeat_count > 1)) && logdir="$logdir/$n"
+       reset_logging $logdir
+       set -- "${argv[@]}"
+       while [[ -n $1 ]]; do
+           unset ONLY EXCEPT START_AT STOP_AT
+           local opts=""
+           local time_limit=""
+#          echo "argv: $*"
+           suite=$1
+           shift;
+           while [[ -n $1 ]]; do
+               case "$1" in
+                   --only)
+                       shift;
+                       export ONLY=$(split_commas $1)
+                       opts+="ONLY=$ONLY ";;
+                   --except)
+                       shift;
+                       export EXCEPT=$(split_commas $1)
+                       opts+="EXCEPT=$EXCEPT ";;
+                   --start-at)
+                       shift;
+                       export START_AT=$1
+                       opts+="START_AT=$START_AT ";;
+                   --stop-at)
+                       shift;
+                       export STOP_AT=$1
+                       opts+="STOP_AT=$STOP_AT ";;
+                   --time-limit)
+                       shift;
+                       time_limit=$1;;
+                   *)
+                       break;;
+               esac
+               shift
+           done
+           echo "running: $suite $opts"
+           run_suite_logged $suite || RC=$?
+           echo $suite returned $RC
+       done
+       if $upload_logs; then
+           $upload_script $LOGDIR
+       fi
+       n=$((n + 1))
+    done
+}
+
+if [ $upload_logs = true ] ; then
+    upload_script=$(find_script_in_path maloo_upload.sh $PATH:$LUSTRE/tests)
+    if [[ -z $upload_script ]]; then
+        echo "Can't find maloo_upload.sh script"
+        exit 1
+    fi
+
+    if [ ! -r ~/.maloorc ] ; then
+        echo "A ~/.maloorc file is required in order to upload results."
+        echo "Visit your maloo web interface to download your .maloorc file"
+        exit 1
+    fi
+fi
+
+export NAME MOUNT START CLEAN
+. ${CONFIG:-$LUSTRE/tests/cfg/$NAME.sh}
+
+assert_env mds_HOST MDS_MKFS_OPTS
+assert_env ost_HOST OST_MKFS_OPTS OSTCOUNT
+assert_env FSNAME MOUNT MOUNT2
+
+echo "Started at `date`"
+setup_if_needed
+
+run_suites "$@"
+RC=$?
+
+if [[ $RC -eq 0 ]]; then
+    cleanup_if_needed
+fi
+
+echo "Finished at `date` in $((`date +%s` - $STARTTIME))s"
+echo "$0: completed with rc $RC" && exit $RC
diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh

index d2c8f14..f2e34b4 100644 (file)
--- a/lustre/tests/conf-sanity.sh
+++ b/lustre/tests/conf-sanity.sh
@@ -31,6 +31,7 @@ HOSTNAME=`hostname`
  
  . $LUSTRE/tests/test-framework.sh
  init_test_env $@
+init_logging
  # STORED_MDSSIZE is used in test_18
  if [ -n "$MDSSIZE" ]; then
      STORED_MDSSIZE=$MDSSIZE
@@ -40,15 +41,14 @@ MDSSIZE=40000
  OSTSIZE=40000
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
  
+require_dsh_mds || exit 0
+require_dsh_ost || exit 0
+
  if ! combined_mgs_mds; then
      # bug number for skipped test:    23954
      ALWAYS_EXCEPT="$ALWAYS_EXCEPT       24b"
  fi
  
-remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
-remote_ost_nodsh && skip "remote OST with nodsh" && exit 0
-
-#
  [ "$SLOW" = "no" ] && EXCEPT_SLOW="30 31 45"
  
  assert_DIR
@@ -456,7 +456,7 @@ test_5f() {
  
         sleep 5
  
-       if ! ps -f -p $pid >/dev/null; then 
+       if ! ps -f -p $pid >/dev/null; then
                 wait $pid
                 rc=$?
                 grep " $MOUNT " /etc/mtab && echo "test 5f: mtab after mount"
@@ -469,7 +469,7 @@ test_5f() {
         # start mds
         start_mds
  
-       # mount should succeed after start mds 
+       # mount should succeed after start mds
         wait $pid
         rc=$?
         [ $rc -eq 0 ] || error "mount returned $rc"
@@ -649,7 +649,7 @@ test_18() {
          echo "mount mds with large journal..."
          local OLD_MDS_MKFS_OPTS=$MDS_MKFS_OPTS
  
-        local opts="--mdt --fsname=$FSNAME --device-size=$myMDSSIZE --param sys.timeout=$TIMEOUT $MDSOPT" 
+        local opts="--mdt --fsname=$FSNAME --device-size=$myMDSSIZE --param sys.timeout=$TIMEOUT $MDSOPT"
  
          if combined_mgs_mds ; then
              MDS_MKFS_OPTS="--mgs $opts"
@@ -983,7 +983,7 @@ test_27b() {
          setup
  
         # interop 1.8 <-> 2.0:
-       # 1.8: group_acquire_expire, 2.0: identity_acquire_expire 
+       # 1.8: group_acquire_expire, 2.0: identity_acquire_expire
         local acquire_expire=$(do_facet mds lctl get_param md*.$FSNAME-MDT0000.*acquire_expire | \
                 cut -d= -f1 | cut -d. -f3)
         facet_failover mds
@@ -1511,7 +1511,7 @@ test_35b() { # bug 18674
                 return 1
  
         local at_max_saved=0
-       # adaptive timeouts may prevent seeing the issue 
+       # adaptive timeouts may prevent seeing the issue
         if at_is_enabled; then
                 at_max_saved=$(at_max_get mds)
                 at_max_set 0 mds client
@@ -1869,7 +1869,7 @@ cleanup_46a() {
                 stop ost${count} -f || rc=$?
                 let count=count-1
         done    
-       stop_mds || rc=$? 
+       stop_mds || rc=$?
         cleanup_nocli || rc=$?
         #writeconf to remove all ost2 traces for subsequent tests
         writeconf
@@ -1887,7 +1887,7 @@ test_46a() {
         mount_client $MOUNT || return 3
         trap "cleanup_46a $OSTCOUNT" EXIT ERR
  
-       local i 
+       local i
         for (( i=2; i<=$OSTCOUNT; i++ )); do
             start ost$i `ostdevname $i` $OST_MOUNT_OPTS || return $((i+2))
         done
diff --git a/lustre/tests/insanity.sh b/lustre/tests/insanity.sh

index 8f40d52..8206a85 100755 (executable)
--- a/lustre/tests/insanity.sh
+++ b/lustre/tests/insanity.sh
@@ -9,7 +9,7 @@ LUSTRE=${LUSTRE:-`dirname $0`/..}
  init_test_env $@
  
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
-
+init_logging
  ALWAYS_EXCEPT="10 $INSANITY_EXCEPT"
  
  if [ "$FAILURE_MODE" = "HARD" ]; then
@@ -33,8 +33,8 @@ assert_env mds_HOST MDS_MKFS_OPTS MDSDEV
  assert_env ost_HOST OST_MKFS_OPTS OSTCOUNT
  assert_env LIVE_CLIENT FSNAME
  
-remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
-remote_ost_nodsh && skip "remote OST with nodsh" && exit 0
+require_dsh_mds || exit 0
+require_dsh_ost || exit 0
  
  # FAIL_CLIENTS list should not contain the LIVE_CLIENT
  FAIL_CLIENTS=$(echo " $FAIL_CLIENTS " | sed -re "s/\s+$LIVE_CLIENT\s+/ /g")
@@ -64,9 +64,9 @@ fail_clients() {
  
      log "Request clients to fail: ${num}. Num of clients to fail: ${FAIL_NUM}, already failed: $DOWN_NUM"
      if [ -z "$num"  ] || [ "$num" -gt $((FAIL_NUM - DOWN_NUM)) ]; then
-       num=$((FAIL_NUM - DOWN_NUM)) 
+       num=$((FAIL_NUM - DOWN_NUM))
      fi
-    
+
      if [ -z "$num" ] || [ "$num" -le 0 ]; then
          log "No clients failed!"
          return
@@ -505,7 +505,7 @@ run_test 8 "Eighth Failure Mode: CLIENT/OST `date`"
  
  ############### Ninth Failure Mode ###############
  test_9() {
-    echo 
+    echo
  
      #Create files
      echo "Verify Lustre filesystem is up and running"
diff --git a/lustre/tests/large-scale.sh b/lustre/tests/large-scale.sh

index 51b8777..d7b6ce2 100644 (file)
--- a/lustre/tests/large-scale.sh
+++ b/lustre/tests/large-scale.sh
@@ -15,8 +15,9 @@ CLEANUP=${CLEANUP:-""}
  init_test_env $@
  
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  
-remote_mds_nodsh && log "SKIP: remote MDS with nodsh" && exit 0
+require_dsh_mds || exit 0
  
  [ -n "$CLIENTS" ] || { skip_env "$0: Need two or more clients" && exit 0; }
  [ $CLIENTCOUNT -ge 2 ] || \
@@ -35,7 +36,7 @@ rm -rf $DIR/[df][0-9]*
  
  # VBR scale tests
  check_vbr () {
-    do_nodes $CLIENTS "$LCTL get_param mdc.*.connect_flags | grep version_recovery" 
+    do_nodes $CLIENTS "$LCTL get_param mdc.*.connect_flags | grep version_recovery"
  }
  
  check_vbr || \
@@ -119,7 +120,7 @@ test_1c() {
          replay_barrier mds
          do_nodes $CLIENTS "createmany -o $DIR/$tfile-\\\$(hostname)" 25
          # XXX For FAILURE_MODE=HARD it is better to exclude
-        # shutdown_facet and reboot_facet time 
+        # shutdown_facet and reboot_facet time
          fail_mds
  
          local current_ts=`date +%s`
@@ -178,7 +179,7 @@ test_3a() {
  
      local -a nodes=(${CLIENTS//,/ })
  
-    # INCREMENT is a number of clients 
+    # INCREMENT is a number of clients
      # a half of clients by default
      increment=${INCREMENT:-$(( CLIENTCOUNT / 2 ))}
  
@@ -205,7 +206,7 @@ test_3a() {
      local num=$increment
  
      while [ $num -le $CLIENTCOUNT ]; do
-        list=$(comma_list ${nodes[@]:0:$num}) 
+        list=$(comma_list ${nodes[@]:0:$num})
  
          generate_machine_file $list $machinefile ||
              { error "can not generate machinefile"; exit 1; }
@@ -231,7 +232,7 @@ test_3a() {
              fi
  
              duration=$(do_facet mds lctl get_param -n $procfile | grep recovery_duration)
-            
+
              res=( "${res[@]}" "$num" )
              res=( "${res[@]}" "$duration" )
              echo "RECOVERY TIME: NFILES=$nfiles number of clients: $num  $duration"
diff --git a/lustre/tests/lfsck.sh b/lustre/tests/lfsck.sh

index b23559d..926949e 100644 (file)
--- a/lustre/tests/lfsck.sh
+++ b/lustre/tests/lfsck.sh
@@ -9,6 +9,7 @@ LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
  . $LUSTRE/tests/test-framework.sh
  init_test_env $@
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  
  NUMFILES=${NUMFILES:-10}
  NUMDIRS=${NUMDIRS:-4}
@@ -156,7 +157,7 @@ get_files() {
      esac
  
      local files=""
-    local f 
+    local f
      for f in $(seq -f testfile.%g $first $last); do
          test_file=$test_dir/$f
          files="$files $test_file"
diff --git a/lustre/tests/liblustre.sh b/lustre/tests/liblustre.sh

index 12af4d7..0ad8c35 100644 (file)
--- a/lustre/tests/liblustre.sh
+++ b/lustre/tests/liblustre.sh
@@ -8,6 +8,7 @@ LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
  . $LUSTRE/tests/test-framework.sh
  init_test_env $@
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  
  LIBLUSTRETESTS=${LIBLUSTRETESTS:-$LUSTRE/liblustre/tests}
  
diff --git a/lustre/tests/lnet-selftest.sh b/lustre/tests/lnet-selftest.sh

index f4dd5b3..be4b2e8 100755 (executable)
--- a/lustre/tests/lnet-selftest.sh
+++ b/lustre/tests/lnet-selftest.sh
@@ -4,6 +4,7 @@ LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
  . $LUSTRE/tests/test-framework.sh
  init_test_env $@
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  
  #
  ALWAYS_EXCEPT="$ALWAYS_EXCEPT $LNET_SELFTEST_EXCEPT"
@@ -104,7 +105,6 @@ test_smoke_sub () {
      echo 'trap "cleanup $pid" INT TERM'
      echo sleep $smoke_DURATION
      echo 'cleanup $pid'
-    
  }
  
  run_lst () {
@@ -137,24 +137,23 @@ test_smoke () {
      local log=$TMP/$tfile.log
      local rc=0
  
-    test_smoke_sub $servers $clients 2>&1 > $runlst 
+    test_smoke_sub $servers $clients 2>&1 > $runlst
  
      cat $runlst
  
      run_lst $runlst | tee $log
      rc=${PIPESTATUS[0]}
      [ $rc = 0 ] || error "$runlst failed: $rc"
-    
+
      lst_end_session --verbose | tee -a $log
  
      # error counters in "lst show_error" should be checked
      check_lst_err $log
-    
  }
  run_test smoke "lst regression test"
  
  complete $(basename $0) $SECONDS
  if [ "$RESTORE_MOUNT" = yes ]; then
      setupall
-fi 
+fi
  exit_status
diff --git a/lustre/tests/maloo_upload.sh b/lustre/tests/maloo_upload.sh

new file mode 100755 (executable)

index 0000000..dc81ed0
--- /dev/null
+++ b/lustre/tests/maloo_upload.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+FILENAME=$1
+
+if [ -r ~/.maloorc ] ; then
+        source ~/.maloorc
+else
+        echo "Error: ~/.maloorc not found.  Please obtain this file from the maloo web interface, under 'Upload results'"
+        exit 1
+fi
+
+if [ -z $FILENAME ] ; then
+        echo "Usage: ${0} <tarball or directory>"
+        exit 2
+fi
+
+
+if [ ! -r $FILENAME ] ; then
+        echo "Input file '$FILENAME' not found"
+        exit 3
+fi
+
+echo Uploading $FILENAME to $MALOO_URL
+if [ -d $FILENAME ] ; then
+       pushd $FILENAME
+       tar czf - * | curl -F "user_id=${MALOO_USER_ID}" -F "upload=@-" -F "user_upload_token=${MALOO_UPLOAD_TOKEN}" ${MALOO_URL} > /dev/null
+       popd
+else
+       curl -F "user_id=${MALOO_USER_ID}" -F "upload=@${FILENAME}" -F "user_upload_token=${MALOO_UPLOAD_TOKEN}" ${MALOO_URL} > /dev/null
+fi
+echo Complete.
diff --git a/lustre/tests/metadata-updates.sh b/lustre/tests/metadata-updates.sh

index 9ef46ee..a698981 100755 (executable)
--- a/lustre/tests/metadata-updates.sh
+++ b/lustre/tests/metadata-updates.sh
@@ -10,6 +10,7 @@ LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
  . $LUSTRE/tests/test-framework.sh
  init_test_env $@
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  
  TRACE=${TRACE:-"+x"}
  
@@ -71,7 +72,7 @@ do_write () {
      do_nodes $NODES_TO_USE "set $TRACE;
  TESTFILE=$TESTDIR/\\\$(hostname)/$FILE;
  dd if=/dev/zero of=\\\$TESTFILE bs=$FILE_SIZE count=1 2>/dev/null || exit 54;
-echo \\\$(hostname) | dd of=\\\$TESTFILE conv=notrunc 2>/dev/null || exit 55; 
+echo \\\$(hostname) | dd of=\\\$TESTFILE conv=notrunc 2>/dev/null || exit 55;
  md5sum \\\$TESTFILE >> $SUMFILE; " || return ${PIPESTATUS[0]}
      return 0
  }
@@ -90,7 +91,7 @@ do_truncate () {
  
       do_nodes $NODES_TO_USE "set $TRACE;
  TESTFILE=$TESTDIR/\\\$(hostname)/$FILE;
-$TRUNCATE \\\$TESTFILE 0" || return ${PIPESTATUS[0]} 
+$TRUNCATE \\\$TESTFILE 0" || return ${PIPESTATUS[0]}
  
      FILE_SIZE=0
      return 0
@@ -103,7 +104,7 @@ get_stat () {
      echo "Checking file(s) attributes ... "
  
      do_nodesv $NODES_TO_USE "set $TRACE;
-for HOST in ${HOSTS//,/ } ; do 
+for HOST in ${HOSTS//,/ } ; do
      TESTFILE=$TESTDIR/\\\$HOST/$FILE;
      tmp=\\\$(stat -c \\\"%u %g %s 0%a\\\" \\\$TESTFILE);
      echo \\\"\\\$TESTFILE [ uid gid size mode ] expected : $attr ;  got : \\\$tmp \\\";
@@ -112,7 +113,7 @@ for HOST in ${HOSTS//,/ } ; do
          exit 56;
      fi;
  done " || return ${PIPESTATUS[0]}
-    return 0 
+    return 0
  }
  
  do_chmod () {
@@ -121,7 +122,7 @@ do_chmod () {
      do_nodes $NODES_TO_USE "set $TRACE;
  TESTFILE=$TESTDIR/\\\$(hostname)/$FILE;
  chmod $NEW_MODE \\\$TESTFILE" || return ${PIPESTATUS[0]}
- 
+
      CURRENT_MODE=$NEW_MODE
      return 0
  }
@@ -146,7 +147,7 @@ do_check_timestamps () {
      echo "Checking atime, mtime ... "
  
      do_nodesv $NODES_TO_USE "set $TRACE;
-for HOST in ${HOSTS//,/ } ; do 
+for HOST in ${HOSTS//,/ } ; do
      TESTFILE=$TESTDIR/\\\$HOST/$FILE;
      tmp=\\\$(stat -c \\\"%X %Y\\\" \\\$TESTFILE);
      if [ x\\\"\\\$tmp\\\" != x\\\"$times\\\" ] ; then
@@ -155,7 +156,7 @@ for HOST in ${HOSTS//,/ } ; do
      fi;
  done;
  exit \\\$RC" || return ${PIPESTATUS[0]}
-    return 0 
+    return 0
  }
  
  do_fill_dir () {
@@ -176,7 +177,7 @@ check_dir_contents () {
  
      echo "Checking dir contents ... (should exist files : f$num_files ... f$NUM_FILES) ... "
      do_nodes $NODES_TO_USE "set $TRACE;
-for HOST in ${HOSTS//,/ } ; do 
+for HOST in ${HOSTS//,/ } ; do
      DIR=$TESTDIR/\\\$HOST;
      for i in \\\$(seq $NUM_FILES -1 $num_files) ; do
          if ! [ -f \\\$DIR/f\\\$i ] ; then
diff --git a/lustre/tests/mmp.sh b/lustre/tests/mmp.sh

index 6b7c256..4eca25c 100755 (executable)
--- a/lustre/tests/mmp.sh
+++ b/lustre/tests/mmp.sh
@@ -22,9 +22,10 @@ LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
  . $LUSTRE/tests/test-framework.sh
  init_test_env $@
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  
-remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
-remote_ost_nodsh && skip "remote OST with nodsh" && exit 0
+require_dsh_mds || exit 0
+require_dsh_ost || exit 0
  
  # unmount and cleanup the Lustre filesystem
  MMP_RESTORE_MOUNT=false
@@ -164,7 +165,7 @@ mmp_init() {
      fi
  
      local var=${MMP_OSS}failover_HOST
- 
+
      if [ -z "${!var}" ]; then
          log "Failover is not used on OSS, enabling MMP manually..."
          enable_mmp $MMP_OSS $MMP_OSTDEV || \
@@ -204,7 +205,7 @@ mmp_fini() {
      return 0
  }
  
-# Mount the shared target on the failover server after some interval it's 
+# Mount the shared target on the failover server after some interval it's
  # mounted on the primary server.
  mount_after_interval_sub() {
      local interval=$1
@@ -269,7 +270,7 @@ mount_after_interval() {
      return 0
  }
  
-# Mount the shared target on the failover server 
+# Mount the shared target on the failover server
  # during unmounting it on the primary server.
  mount_during_unmount() {
      local device=$1
@@ -309,7 +310,7 @@ mount_during_unmount() {
      return 0
  }
  
-# Mount the shared target on the failover server 
+# Mount the shared target on the failover server
  # after clean unmounting it on the primary server.
  mount_after_unmount() {
      local device=$1
@@ -323,7 +324,7 @@ mount_after_unmount() {
      start $facet $device $mnt_opts || return ${PIPESTATUS[0]}
  
      log "Unmounting $device on $facet..."
-    stop $facet || return ${PIPESTATUS[0]} 
+    stop $facet || return ${PIPESTATUS[0]}
  
      log "Mounting $device on $failover_facet..."
      start $failover_facet $device $mnt_opts || return ${PIPESTATUS[0]}
diff --git a/lustre/tests/obdfilter-survey.sh b/lustre/tests/obdfilter-survey.sh

index 043883f..cc84b4d 100644 (file)
--- a/lustre/tests/obdfilter-survey.sh
+++ b/lustre/tests/obdfilter-survey.sh
@@ -5,12 +5,13 @@ set -e
  LUSTRE=${LUSTRE:-`dirname $0`/..}
  . $LUSTRE/tests/test-framework.sh
  init_test_env $@
+init_logging
  
  nobjhi=${nobjhi:-1}
-thrhi=${thrhi:-16} 
+thrhi=${thrhi:-16}
  size=${size:-1024}
  
-# the summary file a bit smaller than OSTSIZE  
+# the summary file a bit smaller than OSTSIZE
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
  
  [ "$SLOW" = no ] && { nobjhi=1; thrhi=4; }
@@ -85,7 +86,7 @@ print_jbd () {
         local varsvc=${facet}_svc
         local dev=$(ldiskfs_canon "*.${!varsvc}.mntdev" $facet)
  
-       # ext4: /proc/fs/jbd2/sda1:8/history 
+       # ext4: /proc/fs/jbd2/sda1:8/history
         # ext3: /proc/fs/jbd/sdb1/history
  
         do_facet $facet cat /proc/fs/jbd*/${dev}*/$file
diff --git a/lustre/tests/ost-pools.sh b/lustre/tests/ost-pools.sh

index c47dd3e..79e9d9c 100644 (file)
--- a/lustre/tests/ost-pools.sh
+++ b/lustre/tests/ost-pools.sh
@@ -25,6 +25,7 @@ LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
  . $LUSTRE/tests/test-framework.sh
  init_test_env $@
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  
  check_and_setup_lustre
  
@@ -142,7 +143,7 @@ check_file_in_osts() {
          local ost_count=$($GETSTRIPE $file | grep 0x | wc -l)
          [[ -n "$count" ]] && [[ $ost_count -ne $count ]] && \
              { error "Stripe count $count expected; got $ost_count" && return 1; }
-                
+
          return 0
  }
  
@@ -681,10 +682,10 @@ test_12() {
      add_pool $POOL2 $FSNAME-OST[$TGT_FIRST] "$FIRST_UUID "
      do_facet $SINGLEMDS lctl pool_list $FSNAME.$POOL2
  
-    echo Checking the files again    
+    echo Checking the files again
      check_dir_in_pool $POOL_ROOT/dir1 $POOL
      check_dir_in_pool $POOL_ROOT/dir2 $POOL2
-    check_file_in_osts $POOL_ROOT/file1 "$TGT_LIST2"    
+    check_file_in_osts $POOL_ROOT/file1 "$TGT_LIST2"
      check_file_in_osts $POOL_ROOT/file2 "$(seq $start 2 $TGT_MAX)"
  
      echo Creating some more files
@@ -693,14 +694,14 @@ test_12() {
      create_file $POOL_ROOT/file3 $POOL
      create_file $POOL_ROOT/file4 $POOL2
  
-    echo Checking the new files 
+    echo Checking the new files
      check_file_in_pool $POOL_ROOT/file3 $POOL
      check_file_in_pool $POOL_ROOT/file4 $POOL2
  
      destroy_pool $POOL
      destroy_pool $POOL2
  
-    return 0    
+    return 0
  }
  run_test 12 "OST Pool Membership"
  
@@ -786,7 +787,7 @@ test_14() {
  
      create_dir $POOL_ROOT/dir1 $POOL 1
      create_file $POOL_ROOT/dir1/file $POOL 1
-    local OST=$($GETSTRIPE $POOL_ROOT/dir1/file | grep 0x | cut -f2)    
+    local OST=$($GETSTRIPE $POOL_ROOT/dir1/file | grep 0x | cut -f2)
      i=0
      while [[ $i -lt $numfiles ]];
      do
@@ -1297,7 +1298,7 @@ test_24() {
                error "Stripe count ($count) not inherited in $file ($count1)"
            [[ "$size" != "$size1" ]] && [[ "$size" != "0" ]] && \
                error "Stripe size ($size) not inherited in $file ($size1)"
-      done 
+      done
      done
  
      rm -rf $POOL_ROOT
diff --git a/lustre/tests/parallel-scale.sh b/lustre/tests/parallel-scale.sh

index 061db6d..73e0040 100644 (file)
--- a/lustre/tests/parallel-scale.sh
+++ b/lustre/tests/parallel-scale.sh
@@ -6,6 +6,7 @@ LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
  . $LUSTRE/tests/test-framework.sh
  init_test_env $@
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  
  # bug number:
  ALWAYS_EXCEPT="$PARALLEL_SCALE_EXCEPT"
@@ -139,7 +140,7 @@ test_compilebench() {
      mkdir -p $testdir
  
      local savePWD=$PWD
-    cd $cbench_DIR 
+    cd $cbench_DIR
      local cmd="./compilebench -D $testdir -i $cbench_IDIRS -r $cbench_RUNS --makej"
  
      log "$cmd"
@@ -147,7 +148,7 @@ test_compilebench() {
      local rc=0
      eval $cmd
      rc=$?
-        
+
      cd $savePWD
      [ $rc = 0 ] || error "compilebench failed: $rc"
      rm -rf $testdir
@@ -260,9 +261,9 @@ test_connectathon() {
      #    -s  special
      #    -l  lock
      #    -a  all of the above
-    #   
+    #
      # -f      a quick functionality test
-    # 
+    #
  
      tests="-b -g -s"
      # Include lock tests unless we're running on nfsv4
@@ -306,7 +307,7 @@ test_ior() {
  
          echo "free space=$space, Need: $num_clients x $ior_THREADS x $ior_blockSize Gb (blockSize reduced to $ior_blockSize Gb)"
      fi
- 
+
      generate_machine_file $clients $MACHINEFILE || return $?
  
      print_opts IOR ior_THREADS ior_DURATION MACHINEFILE
@@ -316,13 +317,13 @@ test_ior() {
      # mpi_run uses mpiuser
      chmod 0777 $testdir
      if [ "$NFSCLIENT" ]; then
-        setstripe_nfsserver $testdir -c -1 || 
-            { error "setstripe on nfsserver failed" && return 1; } 
+        setstripe_nfsserver $testdir -c -1 ||
+            { error "setstripe on nfsserver failed" && return 1; }
      else
          $LFS setstripe $testdir -c -1 ||
              { error "setstripe failed" && return 2; }
      fi
-    # 
+    #
      # -b N  blockSize -- contiguous bytes to write per task  (e.g.: 8, 4k, 2m, 1g)"
      # -o S  testFileName
      # -t N  transferSize -- size of transfer in bytes (e.g.: 8, 4k, 2m, 1g)"
@@ -342,7 +343,7 @@ test_ior() {
      rm -rf $testdir
  }
  run_test ior "ior"
- 
+
  test_cascading_rw() {
      if [ "$NFSCLIENT" ]; then
          skip "skipped for NFSCLIENT mode"
@@ -369,7 +370,7 @@ test_cascading_rw() {
      # mpi_run uses mpiuser
      chmod 0777 $testdir
  
-    # -g: debug mode 
+    # -g: debug mode
      # -n: repeat test # times
  
      local cmd="$CASC_RW -g -d $testdir -n $casc_REP"
@@ -391,7 +392,7 @@ test_write_append_truncate() {
          return
      fi
  
-    # location is lustre/tests dir 
+    # location is lustre/tests dir
      if ! which write_append_truncate > /dev/null 2>&1 ; then
          skip_env "write_append_truncate not found"
          return
@@ -578,9 +579,9 @@ test_statahead () {
  
      cancel_lru_locks mdc
  
-    local cmd="${MDSRATE} ${MDSRATE_DEBUG} --mknod --dir $testdir --nfiles $num_files --filefmt 'f%%d'"    
+    local cmd="${MDSRATE} ${MDSRATE_DEBUG} --mknod --dir $testdir --nfiles $num_files --filefmt 'f%%d'"
      echo "+ $cmd"
-    
+
      mpi_run -np $((num_clients * 32)) -machinefile ${MACHINEFILE} $cmd
  
      local rc=$?
diff --git a/lustre/tests/performance-sanity.sh b/lustre/tests/performance-sanity.sh

index 918b891..b217d0d 100644 (file)
--- a/lustre/tests/performance-sanity.sh
+++ b/lustre/tests/performance-sanity.sh
@@ -11,13 +11,14 @@ LUSTRE=${LUSTRE:-`dirname $0`/..}
  init_test_env $@
  
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  
  [ -x "$MDSRATE" ] || FAIL_ON_ERROR=true error "No mdsrate program. Aborting."
  which mpirun > /dev/null 2>&1 || \
-       FAIL_ON_ERROR=true error "No mpirun program. Aborting." 
+       FAIL_ON_ERROR=true error "No mpirun program. Aborting."
  
  # Skip these tests
-# bug number:  15266 15266 
+# bug number:  15266 15266
  ALWAYS_EXCEPT="1     2    $PERFORMANCE_SANITY_EXCEPT"
  
  build_test_filter
@@ -28,7 +29,7 @@ test_1() {
  }
  run_test 1 "single-client IO perf ====="
  
-# parallel-IOR-rates 
+# parallel-IOR-rates
  test_2() {
      echo "MPI coordinated test of parallel filesystem system calls and library functions"
  }
diff --git a/lustre/tests/racer.sh b/lustre/tests/racer.sh

index 3567ebd..327f051 100644 (file)
--- a/lustre/tests/racer.sh
+++ b/lustre/tests/racer.sh
@@ -1,4 +1,7 @@
  #!/bin/bash
+# -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*-
+# vim:autoindent:shiftwidth=4:tabstop=4:
+
  #set -vx
  set -e
  
@@ -7,9 +10,11 @@ LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
  . $LUSTRE/tests/test-framework.sh
  init_test_env $@
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  
  racer=$LUSTRE/tests/racer/racer.sh
  echo racer: $racer
+[ -z "$racer" ] && echo racer is not installed && exit 1
  
  CLIENTS=${CLIENTS:-$HOSTNAME}
  RACERDIRS=${RACERDIRS:-$DIR}
@@ -23,38 +28,140 @@ done
  DURATION=${DURATION:-900}
  [ "$SLOW" = "no" ] && DURATION=300
  
+PIDFILE=$TMP/racer.$$
+
+assert_env CLIENTS
+
+timer_on () {
+       sleep $1 && kill -s ALRM $$ &
+       TIMERPID=$!
+       echo TIMERPID=$TIMERPID
+}
+
+do_racer_cleanup () {
+       trap 0
+
+       local WAIT=0
+       local INTERVAL=5
+        local pids
+       local rc=0
+       local TMAX
+
+       local RDIR=$1
+
+       echo "DOING RACER CLEANUP ... "
+
+       # Check if all processes are killed
+
+       local clients=$CLIENTS
+       local num_clients=$(get_node_count ${clients//,/ })
+
+       if at_is_enabled; then
+               TMAX=$(at_max_get mds)
+       else
+               TMAX=$(lctl get_param -n timeout)
+       fi
+
+       [ $TMAX -gt $((num_clients * 60)) ] || TMAX=$((num_clients * 60))
+       # 1.Let chance to racer to kill all it's processes
+       # FIXME: not sure how long does it take for racer to kill all processes
+       # 80 is sometimes are enough for 2 clients; sometimes it takes more than 150 sec
+       while [ $WAIT -lt $TMAX ]; do
+               running=$(do_nodes $clients "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|pdsh|bash)" || true)
+               [ -z "$running" ] && rc=0 && break
+               echo "clients $clients are still running the racer processes. Waited $WAIT secs"
+               echo $running
+               rc=1
+               [ $INTERVAL -lt 40 ] && INTERVAL=$((INTERVAL + INTERVAL))
+               sleep $INTERVAL
+               WAIT=$((WAIT + INTERVAL))
+       done
+
+       # 2. Kill the remaining processes
+       if [ $rc -ne 0 ]; then
+               for C in ${clients//,/ } ; do
+                       pids=$(do_node $C "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|PATH)" | awk '{print $2}' || true)
+                       if [ ! -z "$pids" ]; then
+                               echo "client $C still running racer processes after $WAIT seconds. Killing $pids"
+                               do_node $C "ps uax | grep $RDIR " | egrep -v "(acceptance|grep|PATH)"
+                               do_node $C kill -TERM $pids || true
+                               # let processes to be killed, there maybe many threads to be killed, so give 20 sec gap
+                               sleep 20
+       # 3. Check if the processes were killed
+       # exit error if the processes still exist
+                               for pid in $pids; do
+                                       do_node $C "ps -P $pid" && RC=1 || true
+                               done
+                       else
+                               echo "All processes on client $C exited after $WAIT seconds. OK."
+                       fi
+               done
+       else
+               echo "No racer processes running after $WAIT seconds. OK."
+               wait_remote_prog $racer 10
+       fi
+}
+
+racer_cleanup () {
+       if [ "$timeout" == "timeout" ]; then
+               echo $timeout killing RACERPID=$RACERPID
+               kill $RACERPID || true
+               sleep 2 # give chance racer to kill it's processes
+               local dir
+               for dir in $RDIRS; do
+                       do_racer_cleanup $dir
+               done
+       else
+               echo "Racer completed before DURATION=$DURATION expired. Cleaning up..."
+               kill $TIMERPID || true
+               for dir in $RDIRS; do
+                       do_racer_cleanup $dir
+               done
+       fi
+}
+
+racer_timeout () {
+       timeout="timeout"
+       RACERPID=$(cat $PIDFILE)
+       rm -f $PIDFILE
+       racer_cleanup
+       echo "$0: completed $RC"
+       return $RC
+}
+
  build_test_filter
  check_and_setup_lustre
+trap racer_timeout ALRM
  
  # run racer
  test_1() {
-    local rrc=0
-    local rc=0
-    local clients=${CLIENTS:-$(hostname)}
+    RC=0
  
-    check_progs_installed $clients $racer || \
-        { skip_env "$racer not found" && return 0; }
+    timer_on $((DURATION + 5))
  
-    local rpids=""
+    RACERPID=""
      for rdir in $RDIRS; do
-        do_nodes $clients "DURATION=$DURATION $racer $rdir $NUM_RACER_THREADS" &
+        do_nodes $CLIENTS "DURATION=$DURATION $racer $rdir $NUM_RACER_THREADS" &
          pid=$!
-        rpids="$rpids $pid"
+        RACERPID="$RACERPID $pid"
      done
  
-    echo racers pids: $rpids
-    for pid in $rpids; do
-        wait $pid
+    echo RACERPID=$RACERPID
+    echo $RACERPID > $PIDFILE
+    for rpid in $RACERPID; do
+        wait $rpid
          rc=$?
-        echo "pid=$pid rc=$rc"
+        echo "rpid=$rpid rc=$rc"
          if [ $rc != 0 ]; then
-            rrc=$((rrc + 1))
+            RC=$((RC + 1))
          fi
      done
  
-    return $rrc
+    racer_cleanup
+
+    return $RC
  }
-run_test 1 "racer on clients: ${CLIENTS:-$(hostname)} DURATION=$DURATION"
+run_test 1 "racer on clients: $CLIENTS DURATION=$DURATION"
  
  complete $(basename $0) $SECONDS
  check_and_cleanup_lustre
diff --git a/lustre/tests/recovery-double-scale.sh b/lustre/tests/recovery-double-scale.sh

index 3f83867..4dff18f 100644 (file)
--- a/lustre/tests/recovery-double-scale.sh
+++ b/lustre/tests/recovery-double-scale.sh
@@ -17,6 +17,7 @@ CLEANUP=${CLEANUP:-""}
  init_test_env $@
  
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  TESTSUITELOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh)}
  DEBUGLOG=$TESTSUITELOG.debug
  
@@ -84,7 +85,7 @@ reboot_recover_node () {
                        shutdown_client $c
                        boot_node $c
                        echo "Reintegrating $c"
-                      # one client fails; need dk logs from this client only 
+                      # one client fails; need dk logs from this client only
                        zconf_mount $c $MOUNT || NODES="$c $(facet_host mds) $(osts_nodes)" error_exit "zconf_mount failed"
                   done
                   start_client_loads $item
@@ -166,7 +167,7 @@ failover_pair() {
  
      reboot_recover_node $item1 $type1
  
-    # Hendrix test17 description: 
+    # Hendrix test17 description:
      # Introduce a failure, wait at
      # least 5 minutes (for recovery),
      # introduce a 2nd
@@ -178,13 +179,13 @@ failover_pair() {
      # We have a "double failures" if SERIAL is not set,
      # do not need a sleep between failures for "double failures"
  
-    log "                            Failing type2=$type2 item2=$item2 ... "    
+    log "                            Failing type2=$type2 item2=$item2 ... "
      reboot_recover_node $item2 $type2
  
      # Client loads are allowed to die while in recovery, so we just
      # restart them.
      log "==== Checking the clients loads AFTER  failovers -- ERRORS_OK=$ERRORS_OK"
-    restart_client_loads $NODES_TO_USE $ERRORS_OK || return $? 
+    restart_client_loads $NODES_TO_USE $ERRORS_OK || return $?
      log "Done checking / re-Starting client loads. PASS"
      return 0
  }
@@ -209,7 +210,7 @@ summary_and_cleanup () {
              echo "Client load failed on node $END_RUN_NODE"
              echo
              echo "client $END_RUN_NODE load debug output :"
-            local logfile=${TESTSUITELOG}_run_${!var}.sh-${END_RUN_NODE}.debug 
+            local logfile=${TESTSUITELOG}_run_${!var}.sh-${END_RUN_NODE}.debug
              do_node ${END_RUN_NODE} "set -x; [ -e $logfile ] && cat $logfile " || true
          fi
          rc=1
@@ -260,11 +261,11 @@ START_TS=$(date +%s)
  CURRENT_TS=$START_TS
  ELAPSED=0
  
-# Set SERIAL to serialize the failure through a recovery of the first failure. 
+# Set SERIAL to serialize the failure through a recovery of the first failure.
  SERIAL=${SERIAL:-""}
  ERRORS_OK="yes"
  
-[ "$SERIAL" ] && ERRORS_OK="" 
+[ "$SERIAL" ] && ERRORS_OK=""
  
  FAILOVER_PERIOD=${FAILOVER_PERIOD:-$((60*5))} # 5 minutes
  
@@ -275,7 +276,7 @@ if ! do_nodesv $NODES_TO_USE "cat $TMP/client-load.pid"; then
          exit 3
  fi
  
-# FIXME: Do we want to have an initial sleep period where the clients 
+# FIXME: Do we want to have an initial sleep period where the clients
  # just run before introducing a failure?
  sleep $FAILOVER_PERIOD
  
@@ -296,7 +297,7 @@ if [ $OSTCOUNT -gt 1 ]; then
      sleep $FAILOVER_PERIOD
  else
      skip "$0 : $OSTCOUNT < 2 OSTs, test 4 skipped"
-fi 
+fi
  
  #CMD_TEST_NUM=17.5
  failover_pair OST clients "test 5: failover OST, then 2 clients ===="
diff --git a/lustre/tests/recovery-mds-scale.sh b/lustre/tests/recovery-mds-scale.sh

index 82a5507..496c71c 100644 (file)
--- a/lustre/tests/recovery-mds-scale.sh
+++ b/lustre/tests/recovery-mds-scale.sh
@@ -14,6 +14,7 @@ CLEANUP=${CLEANUP:-""}
  init_test_env $@
  
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  
  TESTSUITELOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh)}
  DEBUGLOG=$TESTSUITELOG.debug
@@ -69,7 +70,7 @@ if [ "$FLAVOR" == "MDS" ]; then
  else
      SERVERS=$OSTS
  fi
- 
+
  if [ "$SLOW" = "no" ]; then
      DURATION=${DURATION:-$((60 * 30))}
      SERVER_FAILOVER_PERIOD=${SERVER_FAILOVER_PERIOD:-$((60 * 5))}
@@ -119,7 +120,7 @@ summary_and_cleanup () {
      # the one we are really interested in.
          if [ -n "$END_RUN_NODE" ]; then
              var=$(node_var_name $END_RUN_NODE)_load
-            echo "Client load failed on node $END_RUN_NODE" 
+            echo "Client load failed on node $END_RUN_NODE"
              echo
              echo "client $END_RUN_NODE load stdout and debug files :
                ${TESTSUITELOG}_run_${!var}.sh-${END_RUN_NODE}
@@ -127,7 +128,7 @@ summary_and_cleanup () {
          fi
          rc=1
      fi
-     
+
      echo $(date +'%F %H:%M:%S') Terminating clients loads ...
      echo "$0" >> $END_RUN_FILE
      local result=PASS
@@ -172,7 +173,7 @@ Status: $result: rc=$rc"
  }
  
  #
-# MAIN 
+# MAIN
  #
  log "-----============= $0 starting =============-----"
  
@@ -204,21 +205,21 @@ CURRENT_TS=$START_TS
  
  while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do
  
-    # In order to perform the 
+    # In order to perform the
      # expected number of failovers, we need to account the following :
      # 1) the time that has elapsed during the client load checking
      # 2) time takes for failover
  
      it_time_start=$(date +%s)
-    
+
      SERVERFACET=$(get_random_entry $SERVERS)
      var=${SERVERFACET}_numfailovers
  
-    # Check that our client loads are still running. If any have died, 
-    # that means they have died outside of recovery, which is unacceptable.    
+    # Check that our client loads are still running. If any have died,
+    # that means they have died outside of recovery, which is unacceptable.
  
      log "==== Checking the clients loads BEFORE failover -- failure NOT OK \
-    ELAPSED=$ELAPSED DURATION=$DURATION PERIOD=$SERVER_FAILOVER_PERIOD" 
+    ELAPSED=$ELAPSED DURATION=$DURATION PERIOD=$SERVER_FAILOVER_PERIOD"
  
      if ! check_client_loads $NODES_TO_USE; then
          exit 4
@@ -234,7 +235,7 @@ while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do
      log "Checking clients are in FULL state before doing next failover"
      if ! wait_clients_import_state $NODES_TO_USE $SERVERFACET FULL; then
          echo "Clients import not FULL, please consider to increase SERVER_FAILOVER_PERIOD=$SERVER_FAILOVER_PERIOD !"
-        
+
      fi
      log "Starting failover on $SERVERFACET"
  
@@ -252,14 +253,14 @@ while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do
      # Increment the number of failovers
      val=$((${!var} + 1))
      eval $var=$val
- 
+
      CURRENT_TS=$(date +%s)
      ELAPSED=$((CURRENT_TS - START_TS))
- 
+
      sleep=$((SERVER_FAILOVER_PERIOD-(CURRENT_TS - it_time_start)))
  
      # keep count the number of itterations when
-    # time spend to failover and two client loads check exceeded 
+    # time spend to failover and two client loads check exceeded
      # the value ( SERVER_FAILOVER_PERIOD - MINSLEEP )
      if [ $sleep -lt $MINSLEEP ]; then
          reqfail=$((reqfail +1))
@@ -269,8 +270,8 @@ This iteration, the load was only applied for sleep=$sleep seconds.
  Estimated max recovery time : $max_recov_time
  Probably the hardware is taking excessively long to boot.
  Try to increase SERVER_FAILOVER_PERIOD (current is $SERVER_FAILOVER_PERIOD), bug 20918"
-        [ $reqfail -gt $REQFAIL ] && exit 6 
-    fi  
+        [ $reqfail -gt $REQFAIL ] && exit 6
+    fi
  
      log "$SERVERFACET has failed over ${!var} times, and counting..."
  
@@ -278,7 +279,7 @@ Try to increase SERVER_FAILOVER_PERIOD (current is $SERVER_FAILOVER_PERIOD), bug
           break
      fi
  
-    if [ $sleep -gt 0 ]; then 
+    if [ $sleep -gt 0 ]; then
          echo "sleeping $sleep seconds ... "
          sleep $sleep
      fi
diff --git a/lustre/tests/recovery-random-scale.sh b/lustre/tests/recovery-random-scale.sh

index 2fced26..fb281e1 100644 (file)
--- a/lustre/tests/recovery-random-scale.sh
+++ b/lustre/tests/recovery-random-scale.sh
@@ -18,6 +18,7 @@ CLEANUP=${CLEANUP:-""}
  init_test_env $@
  
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  
  TESTSUITELOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh)}
  DEBUGLOG=$TESTSUITELOG.debug
@@ -123,7 +124,7 @@ summary_and_cleanup () {
      # the one we are really interested in.
          if [ -n "$END_RUN_NODE" ]; then
              var=$(node_var_name $END_RUN_NODE)_load
-            echo "Client load failed on node $END_RUN_NODE" 
+            echo "Client load failed on node $END_RUN_NODE"
              echo
              echo "client $END_RUN_NODE load stdout and debug files :
                ${TESTSUITELOG}_run_${!var}.sh-${END_RUN_NODE}
@@ -179,7 +180,7 @@ Status: $result: rc=$rc"
  }
  
  #
-# MAIN 
+# MAIN
  #
  log "-----============= $0 starting =============-----"
  
@@ -213,13 +214,13 @@ sleep=0
  ERRORS_OK="yes"
  while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do
  
-    # In order to perform the 
+    # In order to perform the
      # expected number of failovers, we need to account the following :
      # 1) the time that has elapsed during the client load checking
      # 2) time takes for failover
  
      it_time_start=$(date +%s)
-    
+
      FAIL_CLIENT=$(get_random_entry $NODES_TO_USE)
      client_var=$(node_var_name $FAIL_CLIENT)_nums
  
@@ -230,11 +231,11 @@ while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do
      SERVERFACET=$(get_random_entry $MDTS)
      var=${SERVERFACET}_nums
  
-    # Check that our client loads are still running. If any have died, 
-    # that means they have died outside of recovery, which is unacceptable.    
+    # Check that our client loads are still running. If any have died,
+    # that means they have died outside of recovery, which is unacceptable.
  
      log "==== Checking the clients loads BEFORE failover -- failure NOT OK \
-    ELAPSED=$ELAPSED DURATION=$DURATION PERIOD=$SERVER_FAILOVER_PERIOD" 
+    ELAPSED=$ELAPSED DURATION=$DURATION PERIOD=$SERVER_FAILOVER_PERIOD"
  
      if ! check_client_loads $NODES_TO_USE; then
          exit 4
@@ -246,11 +247,11 @@ while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do
      log "Starting failover on $SERVERFACET"
  
      facet_failover "$SERVERFACET" || exit 1
-    if ! wait_recovery_complete $SERVERFACET ; then 
+    if ! wait_recovery_complete $SERVERFACET ; then
          echo "$SERVERFACET recovery is not completed!"
          exit 7
      fi
- 
+
      boot_node $FAIL_CLIENT
      echo "Reintegrating $FAIL_CLIENT"
      zconf_mount $FAIL_CLIENT $MOUNT || exit $?
@@ -269,10 +270,10 @@ while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do
      # not for all clients.
      if [ -e $END_RUN_FILE ]; then
          read END_RUN_NODE < $END_RUN_FILE
-        [[ $END_RUN_NODE = $FAIL_CLIENT ]] && 
+        [[ $END_RUN_NODE = $FAIL_CLIENT ]] &&
              rm -f $END_RUN_FILE || exit 13
      fi
-   
+
      restart_client_loads $FAIL_CLIENT $ERRORS_OK || exit $?
  
      # Check that not failed clients loads are still running.
@@ -286,11 +287,11 @@ while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do
  
      CURRENT_TS=$(date +%s)
      ELAPSED=$((CURRENT_TS - START_TS))
- 
+
      sleep=$((SERVER_FAILOVER_PERIOD-(CURRENT_TS - it_time_start)))
  
      # keep count the number of itterations when
-    # time spend to failover and two client loads check exceeded 
+    # time spend to failover and two client loads check exceeded
      # the value ( SERVER_FAILOVER_PERIOD - MINSLEEP )
      if [ $sleep -lt $MINSLEEP ]; then
          reqfail=$((reqfail +1))
@@ -300,8 +301,8 @@ This iteration, the load was only applied for sleep=$sleep seconds.
  Estimated max recovery time : $max_recov_time
  Probably the hardware is taking excessively long to boot.
  Try to increase SERVER_FAILOVER_PERIOD (current is $SERVER_FAILOVER_PERIOD), bug 20918"
-        [ $reqfail -gt $REQFAIL ] && exit 6 
-    fi  
+        [ $reqfail -gt $REQFAIL ] && exit 6
+    fi
  
      log " Number of failovers:
  $(numfailovers)                and counting..."
@@ -310,7 +311,7 @@ $(numfailovers)                and counting..."
           break
      fi
  
-    if [ $sleep -gt 0 ]; then 
+    if [ $sleep -gt 0 ]; then
          echo "sleeping $sleep seconds ... "
          sleep $sleep
      fi
diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh

index 0897f01..e3558c1 100755 (executable)
--- a/lustre/tests/recovery-small.sh
+++ b/lustre/tests/recovery-small.sh
@@ -10,8 +10,9 @@ LUSTRE=${LUSTRE:-`dirname $0`/..}
  . $LUSTRE/tests/test-framework.sh
  init_test_env $@
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  
-remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
+require_dsh_mds || exit 0
  
  # also long tests: 19, 21a, 21e, 21f, 23, 27
  #                                   1  2.5  2.5    4    4          (min)"
@@ -136,7 +137,7 @@ run_test 11 "wake up a thread waiting for completion after eviction (b=2460)"
  
  #b=2494
  test_12(){
-    $LCTL mark multiop $DIR/$tfile OS_c 
+    $LCTL mark multiop $DIR/$tfile OS_c
      do_facet mds "lctl set_param fail_loc=0x115"
      clear_failloc mds $((TIMEOUT * 2)) &
      multiop_bg_pause $DIR/$tfile OS_c || return 1
@@ -262,7 +263,7 @@ test_18a() {
      local osc2dev=`lctl get_param -n devices | grep ${ost2_svc}-osc- | awk '{print $1}'`
      $LCTL --device $osc2dev deactivate || return 3
      # my understanding is that there should be nothing in the page
-    # cache after the client reconnects?     
+    # cache after the client reconnects?
      rc=0
      pgcache_empty || rc=2
      $LCTL --device $osc2dev activate
@@ -383,7 +384,7 @@ test_20a() {        # bug 2983 - ldlm_handle_enqueue cleanup
         rc=$?
         [ $rc -eq 0 ] && error "multiop didn't fail enqueue: rc $rc" || true
  }
-run_test 20a "ldlm_handle_enqueue error (should return error)" 
+run_test 20a "ldlm_handle_enqueue error (should return error)"
  
  test_20b() {   # bug 2986 - ldlm_handle_enqueue error during open
         remote_ost_nodsh && skip "remote OST with nodsh" && return 0
@@ -693,7 +694,7 @@ test_26a() {      # was test_26 bug 5921 - evict dead exports by pinger
         echo starting with $OST_NEXP OST exports
  # OBD_FAIL_PTLRPC_DROP_RPC 0x505
         do_facet client lctl set_param fail_loc=0x505
-       # evictor takes up to 2.25x to evict.  But if there's a 
+       # evictor takes up to 2.25x to evict.  But if there's a
         # race to start the evictor from various obds, the loser
         # might have to wait for the next ping.
  
@@ -732,8 +733,8 @@ test_26b() {      # bug 10140 - evict dead exports by pinger
         # PING_INTERVAL max(obd_timeout / 4, 1U)
         # PING_EVICT_TIMEOUT (PING_INTERVAL * 6)
  
-       # evictor takes PING_EVICT_TIMEOUT + 3 * PING_INTERVAL to evict.  
-       # But if there's a race to start the evictor from various obds, 
+       # evictor takes PING_EVICT_TIMEOUT + 3 * PING_INTERVAL to evict.
+       # But if there's a race to start the evictor from various obds,
         # the loser might have to wait for the next ping.
         # = 9 * PING_INTERVAL + PING_INTERVAL
         # = 10 PING_INTERVAL = 10 obd_timeout / 4 = 2.5 obd_timeout
@@ -762,7 +763,7 @@ test_27() {
         facet_failover mds
         #no crashes allowed!
          kill -USR1 $CLIENT_PID
-       wait $CLIENT_PID 
+       wait $CLIENT_PID
         true
         FAILURE_MODE=$save_FAILURE_MODE
  }
@@ -802,7 +803,7 @@ test_50() {
         # client process should see no problems even though MDS went down
         sleep $TIMEOUT
          kill -USR1 $CLIENT_PID
-       wait $CLIENT_PID 
+       wait $CLIENT_PID
         rc=$?
         echo writemany returned $rc
         #these may fail because of eviction due to slow AST response.
@@ -833,7 +834,7 @@ test_51() {
         # and recovery was interrupted
         sleep $TIMEOUT
          kill -USR1 $CLIENT_PID
-       wait $CLIENT_PID 
+       wait $CLIENT_PID
         rc=$?
         echo writemany returned $rc
         [ $rc -eq 0 ] || error_ignore 13652 "writemany returned rc $rc" || true
@@ -931,8 +932,8 @@ test_55() {
         count=0
         echo  "step2: testing ......"
         while [ $count -le 64 ]; do
-           dd_name="`ps x | awk '$1 == '$DDPID' { print $5 }'`"            
-           if [ -z  $dd_name ]; then 
+           dd_name="`ps x | awk '$1 == '$DDPID' { print $5 }'`"
+           if [ -z  $dd_name ]; then
                  ls -l $DIR/$tdir
                 echo  "debug: (dd_name=$dd_name, dd_pid=$DDPID, time=$count)"
                 error "dd shouldn't be finished!"
@@ -971,7 +972,7 @@ test_56() { # b=11277
  run_test 56 "do not allow reconnect to busy exports"
  
  test_57_helper() {
-        # no oscs means no client or mdt 
+        # no oscs means no client or mdt
          while lctl get_param osc.*.* > /dev/null 2>&1; do
                  : # loop until proc file is removed
          done
@@ -1038,7 +1039,7 @@ test_61()
         $LFS setstripe -c 1 --index 0 $DIR/d61
  
         replay_barrier mds
-       createmany -o $DIR/d61/$tfile-%d 10 
+       createmany -o $DIR/d61/$tfile-%d 10
         local oid=`do_facet ost1 "lctl get_param -n obdfilter.${ost1_svc}.last_id"`
  
         fail_abort mds
diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh

index 449c4ab..871ecd5 100755 (executable)
--- a/lustre/tests/replay-dual.sh
+++ b/lustre/tests/replay-dual.sh
@@ -14,10 +14,10 @@ MOUNT_2=${MOUNT_2:-"yes"}
  . $LUSTRE/tests/test-framework.sh
  
  init_test_env $@
-
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  
-remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
+require_dsh_mds || exit 0
  
  [ "$SLOW" = "no" ] && EXCEPT_SLOW="1 2 3 4 5 14"
  
diff --git a/lustre/tests/replay-ost-single.sh b/lustre/tests/replay-ost-single.sh

index 563a27a..40afe70 100755 (executable)
--- a/lustre/tests/replay-ost-single.sh
+++ b/lustre/tests/replay-ost-single.sh
@@ -8,20 +8,21 @@ CLEANUP=${CLEANUP:-""}
  . $LUSTRE/tests/test-framework.sh
  init_test_env $@
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  
  # While we do not use OSTCOUNT=1 setup anymore,
  # ost1failover_HOST is used
  #ostfailover_HOST=${ostfailover_HOST:-$ost_HOST}
  #failover= must be defined in OST_MKFS_OPTIONS if ostfailover_HOST != ost_HOST
  
-remote_ost_nodsh && skip "remote OST with nodsh" && exit 0
+require_dsh_ost || exit 0
  
  # Tests that fail on uml
  CPU=`awk '/model/ {print $4}' /proc/cpuinfo`
  [ "$CPU" = "UML" ] && EXCEPT="$EXCEPT 6"
  
  # Skip these tests
-# BUG NUMBER: 
+# BUG NUMBER:
  ALWAYS_EXCEPT="$REPLAY_OST_SINGLE_EXCEPT"
  
  #                                      
@@ -34,7 +35,7 @@ assert_DIR
  rm -rf $DIR/[df][0-9]*
  
  TDIR=$DIR/d0.${TESTSUITE}
-mkdir -p $TDIR 
+mkdir -p $TDIR
  $LFS setstripe $TDIR -i 0 -c 1
  $LFS getstripe $TDIR
  
@@ -67,11 +68,11 @@ run_test 1 "touch"
  test_2() {
      for i in `seq 10`; do
          echo "tag-$i" > $TDIR/$tfile-$i
-    done 
+    done
      fail ost1
      for i in `seq 10`; do
        grep -q "tag-$i" $TDIR/$tfile-$i || error "f2-$i"
-    done 
+    done
      rm -f $TDIR/$tfile-*
  }
  run_test 2 "|x| 10 open(O_CREAT)s"
diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh

index ad7b1e3..eacbecb 100644 (file)
--- a/lustre/tests/replay-single.sh
+++ b/lustre/tests/replay-single.sh
@@ -13,10 +13,11 @@ CLEANUP=${CLEANUP:-}
  . $LUSTRE/tests/test-framework.sh
  init_test_env $@
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  CHECK_GRANT=${CHECK_GRANT:-"yes"}
  GRANT_CHECK_LIST=${GRANT_CHECK_LIST:-""}
  
-remote_mds_nodsh && log "SKIP: remote MDS with nodsh" && exit 0
+require_dsh_mds || exit 0
  
  # Skip these tests
  # bug number:
@@ -906,7 +907,7 @@ test_45() {
      [ "$mdcdev" ] || return 2
      [ $(echo $mdcdev | wc -w) -eq 1 ] || { echo $mdcdev=$mdcdev && return 3; }
  
-    $LCTL --device $mdcdev recover || return 6 
+    $LCTL --device $mdcdev recover || return 6
  
      multiop_bg_pause $DIR/$tfile O_c || return 1
      pid=$!
@@ -2041,7 +2042,7 @@ test_80b() {
          { skip "sync journal is not implemeted" && return; }
  
      do_facet ost1 "lctl set_param -n obdfilter.${ost1_svc}.sync_journal 0"
-    
+
      replay_barrier ost1
      lfs setstripe -i 0 -c 1 $DIR/$tfile
      dd if=/dev/urandom of=$DIR/$tfile bs=1024k count=8 || error "Cannot write"
@@ -2131,14 +2132,14 @@ test_85a() { #bug 16774
      createmany -o $DIR/$tfile- 100
      ls -l $DIR/ > /dev/null
  
-    lov_id=`lctl dl | grep "clilov"` 
+    lov_id=`lctl dl | grep "clilov"`
      addr=`echo $lov_id | awk '{print $4}' | awk -F '-' '{print $3}'`
      count=`lctl get_param -n ldlm.namespaces.*MDT0000*$addr.lock_unused_count`
      echo "before recovery: unused locks count = $count"
      [ $count -ne 0 ] || error "unused locks should not be zero before recovery"
  
      fail mds
-    
+
      count2=`lctl get_param -n ldlm.namespaces.*MDT0000*$addr.lock_unused_count`
      echo "after recovery: unused locks count = $count2"
  
@@ -2161,13 +2162,13 @@ test_85b() { #bug 16774
          dd if=$DIR/$tfile-$i of=/dev/null bs=4096 count=32 >/dev/null 2>&1
      done
  
-    lov_id=`lctl dl | grep "clilov"` 
+    lov_id=`lctl dl | grep "clilov"`
      addr=`echo $lov_id | awk '{print $4}' | awk -F '-' '{print $3}'`
      count=`lctl get_param -n ldlm.namespaces.*OST0000*$addr.lock_unused_count`
      echo "before recovery: unused locks count = $count"
  
      fail ost1
-    
+
      count2=`lctl get_param -n ldlm.namespaces.*OST0000*$addr.lock_unused_count`
      echo "after recovery: unused locks count = $count2"
  
@@ -2202,7 +2203,7 @@ test_87() { #bug 17485
      local mdtosc=$(get_mdtosc_proc_path $OST)
      local last_id=$(do_facet mds lctl get_param -n osc.$mdtosc.prealloc_last_id)
      local next_id=$(do_facet mds lctl get_param -n osc.$mdtosc.prealloc_next_id)
-    echo "before test: last_id = $last_id, next_id = $next_id" 
+    echo "before test: last_id = $last_id, next_id = $next_id"
  
      echo "Creating to objid $last_id on ost $OST..."
      createmany -o $DIR/$tdir/f-%d $next_id $((last_id - next_id + 2))
@@ -2213,7 +2214,7 @@ test_87() { #bug 17485
  
      last_id2=$(do_facet mds lctl get_param -n osc.$mdtosc.prealloc_last_id)
      next_id2=$(do_facet mds lctl get_param -n osc.$mdtosc.prealloc_next_id)
-    echo "before recovery: last_id = $last_id2, next_id = $next_id2" 
+    echo "before recovery: last_id = $last_id2, next_id = $next_id2"
  
      # if test uses shutdown_facet && reboot_facet instead of facet_failover ()
      # it has to take care about the affected facets, bug20407
@@ -2237,9 +2238,9 @@ test_87() { #bug 17485
  
      last_id2=$(do_facet mds lctl get_param -n osc.$mdtosc.prealloc_last_id)
      next_id2=$(do_facet mds lctl get_param -n osc.$mdtosc.prealloc_next_id)
-    echo "after recovery: last_id = $last_id2, next_id = $next_id2" 
+    echo "after recovery: last_id = $last_id2, next_id = $next_id2"
  
-    # create new files, which should use new objids, and ensure the orphan 
+    # create new files, which should use new objids, and ensure the orphan
      # cleanup phase for ost1 is completed at the same time
      for i in `seq 8`; do
          file_id=$(($last_id + 10 + $i))
diff --git a/lustre/tests/replay-vbr.sh b/lustre/tests/replay-vbr.sh

index 7a3c8f9..4c09fc6 100644 (file)
--- a/lustre/tests/replay-vbr.sh
+++ b/lustre/tests/replay-vbr.sh
@@ -13,13 +13,14 @@ CLEANUP=${CLEANUP:-""}
  . $LUSTRE/tests/test-framework.sh
  
  init_test_env $@
-
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  
  [ -n "$CLIENTS" ] || { skip_env "Need two or more clients" && exit 0; }
  [ $CLIENTCOUNT -ge 2 ] || \
      { skip_env "Need two or more remote clients, have $CLIENTCOUNT" && exit 0; }
-remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
+
+require_dsh_mds || exit 0
  
  [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
  
diff --git a/lustre/tests/rpc.sh b/lustre/tests/rpc.sh

index 15e960a..79c1327 100755 (executable)
--- a/lustre/tests/rpc.sh
+++ b/lustre/tests/rpc.sh
@@ -3,12 +3,19 @@ export PATH=`dirname $0`/../utils:$PATH
  NAME=${NAME:-local}
  
  LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
+
+if [ ! -f $LUSTRE/tests/rpc.sh ]; then
+    LUSTRE=$(cd $(dirname $(which $0))/..; echo $PWD)
+fi
+
  . $LUSTRE/tests/test-framework.sh
  init_test_env
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
  
-cmd=$1
-shift
-$cmd $@
+# Reset the trap on ERR set by the framework.  Noticing this failure is the
+# framework's job.
+trap - ERR
+
+# Execute the command
+"$@"
  
-exit $?
diff --git a/lustre/tests/runtests b/lustre/tests/runtests

index f99f69f..1416303 100755 (executable)
--- a/lustre/tests/runtests
+++ b/lustre/tests/runtests
@@ -13,6 +13,7 @@ export NAME=${NAME:-local}
  . $LUSTRE/tests/test-framework.sh
  init_test_env $@
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  
  RUNTESTS_SRC=${RUNTESTS_SRC:-"/etc /bin"}
  [ "$COUNT" ] || COUNT=1000
diff --git a/lustre/tests/sanity-benchmark.sh b/lustre/tests/sanity-benchmark.sh

index 4c19a53..2ea5b3d 100644 (file)
--- a/lustre/tests/sanity-benchmark.sh
+++ b/lustre/tests/sanity-benchmark.sh
@@ -12,6 +12,7 @@ LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
  . $LUSTRE/tests/test-framework.sh
  init_test_env $@
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  
  # bug number:
  ALWAYS_EXCEPT="$SANITY_BENCHMARK_EXCEPT"
@@ -58,7 +59,7 @@ test_dbench() {
      local SPACE=`df -P $MOUNT | tail -n 1 | awk '{ print $4 }'`
      DB_THREADS=$((SPACE / 50000))
      [ $THREADS -lt $DB_THREADS ] && DB_THREADS=$THREADS
-    
+
      $DEBUG_OFF
      myUID=$RUNAS_ID
      myGID=$RUNAS_GID
@@ -113,7 +114,7 @@ test_iozone() {
      fi
  
      export O_DIRECT
-    
+
      local IOZDIR=$DIR/d0.iozone
      mkdir -p $IOZDIR
      $LFS setstripe -c -1 $IOZDIR
@@ -138,7 +139,7 @@ test_iozone() {
         { error "iozone (1) failed" && return 1; }
      rm -f $IOZLOG
      $DEBUG_ON
-    
+
      # check if O_DIRECT support is implemented in kernel
      if [ -z "$O_DIRECT" ]; then
         touch $DIR/f.iozone
@@ -245,7 +246,7 @@ space_check () {
      local num_runs=$(echo ${pios_THREADCOUNT//,/ } | wc -w)
      size=$(( size * $num_runs))
      space=$((space * 1024))
-    echo size=$size space=$space 
+    echo size=$size space=$space
      if [ $space -le $size ]; then
          local ratio=$(( size / space + 1 ))
          echo "Need free space atleast $size, available $space, ratio=$ratio"
@@ -260,7 +261,7 @@ space_check () {
      fi
  }
  
-pios_setup() { 
+pios_setup() {
      local testdir=$DIR/$tdir
      mkdir -p $testdir
  
@@ -285,8 +286,8 @@ run_pios () {
      local cmd="$PIOSBIN  -t $pios_THREADCOUNT -n $pios_REGIONCOUNT \
                           -c $pios_CHUNKSIZE -s $pios_REGIONSIZE    \
                           -o $pios_OFFSET $@ -p $testdir"
-    
-    if [ ! -d $testdir ]; then  
+
+    if [ ! -d $testdir ]; then
          error "No test directory created, setup_pios must have failed"
          return 20
      fi
@@ -314,7 +315,7 @@ test_pios_ssf() {
          return 0
      fi
      run_pios || return
-    run_pios  --verify || rc=$? 
+    run_pios  --verify || rc=$?
      pios_cleanup $rc
      return $rc
  }
diff --git a/lustre/tests/sanity-quota.sh b/lustre/tests/sanity-quota.sh

index 23b3523..5d15058 100755 (executable)
--- a/lustre/tests/sanity-quota.sh
+++ b/lustre/tests/sanity-quota.sh
@@ -53,12 +53,13 @@ LUSTRE=${LUSTRE:-`dirname $0`/..}
  . $LUSTRE/tests/test-framework.sh
  init_test_env $@
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  DIRECTIO=${DIRECTIO:-$LUSTRE/tests/directio}
  
  unset ENABLE_QUOTA
  
-remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
-remote_ost_nodsh && skip "remote OST with nodsh" && exit 0
+require_dsh_mds || exit 0
+require_dsh_ost || exit 0
  
  [ "$SLOW" = "no" ] && EXCEPT_SLOW="9 10 11 18b 21"
  
@@ -1089,7 +1090,7 @@ test_11() {
             echo ""
             PROCS=$(ps -ef | grep -v grep | grep "dd if /dev/zero of $TESTDIR" | wc -l)
             LAST_USED=0
-           while [ $PROCS -gt 0 ]; do 
+           while [ $PROCS -gt 0 ]; do
               sleep 20
               SECS=$((SECS + sleep))
               PROCS=$(ps -ef | grep -v grep | grep "dd if /dev/zero of $TESTDIR" | wc -l)
@@ -1867,7 +1868,6 @@ test_24() {
  
          set_blk_unitsz $((128 * 1024))
          set_blk_tunesz $((128 * 1024 / 2))
-        
  }
  run_test_with_stat 24 "test if lfs draws an asterix when limit is reached (16646) ==========="
  
diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh

index 637e0a6..17c87a1 100644 (file)
--- a/lustre/tests/sanity.sh
+++ b/lustre/tests/sanity.sh
@@ -65,7 +65,7 @@ LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
  . $LUSTRE/tests/test-framework.sh
  init_test_env $@
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
-
+init_logging
  [ "$SLOW" = "no" ] && EXCEPT_SLOW="24o 27m 36f 36g 36h 51b 51c 60c 63 64b 68 71 73 77f 78 101 103 115 120g 124b"
  
  FAIL_ON_ERROR=${FAIL_ON_ERROR:-false}
@@ -6856,7 +6856,7 @@ test_201c() {
  
         do_facet mgs $LCTL pool_destroy $FSNAME.$POOL
         
-       sleep 2                        
+       sleep 2
      # striping on an empty/nonexistant pool should fall back to "pool of everything"
         touch ${POOL_DIR}/$tfile || error "failed to use fallback striping for missing pool"
         # setstripe on an empty pool should fail
@@ -6940,4 +6940,4 @@ check_and_cleanup_lustre
  if [ "$I_MOUNTED" != "yes" ]; then
         lctl set_param debug="$OLDDEBUG" 2> /dev/null || true
  fi
-exit_status 
+exit_status
diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh

index f0521bb..9a909ac 100644 (file)
--- a/lustre/tests/sanityn.sh
+++ b/lustre/tests/sanityn.sh
@@ -38,6 +38,7 @@ CLEANUP=${CLEANUP:-:}
  SETUP=${SETUP:-:}
  init_test_env $@
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  
  [ "$SLOW" = "no" ] && EXCEPT_SLOW="12 16"
  
@@ -64,6 +65,9 @@ check_runas_id $RUNAS_ID $RUNAS_GID $RUNAS
  
  build_test_filter
  
+mkdir -p $MOUNT2
+mount_client $MOUNT2
+
  test_1a() {
         touch $DIR1/f1
         [ -f $DIR2/f1 ] || error
diff --git a/lustre/tests/sgpdd-survey.sh b/lustre/tests/sgpdd-survey.sh

index ca9b3d6..0f6d2e5 100644 (file)
--- a/lustre/tests/sgpdd-survey.sh
+++ b/lustre/tests/sgpdd-survey.sh
@@ -5,11 +5,12 @@ set -e
  LUSTRE=${LUSTRE:-`dirname $0`/..}
  . $LUSTRE/tests/test-framework.sh
  init_test_env $@
+init_logging
  
  # QE uses the following parameters:
  # size=128 crghi=16 thrhi=32
  crghi=${crghi:-2}
-thrhi=${thrhi:-16} 
+thrhi=${thrhi:-16}
  size=${size:-1024}
  
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh

index fae3a3a..ea93c40 100644 (file)
--- a/lustre/tests/test-framework.sh
+++ b/lustre/tests/test-framework.sh
@@ -16,6 +16,7 @@ export CATASTROPHE=${CATASTROPHE:-/proc/sys/lnet/catastrophe}
  # function used by scripts run on remote nodes
  LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
  . $LUSTRE/tests/functions.sh
+. $LUSTRE/tests/yaml.sh
  
  LUSTRE_TESTS_CFG_DIR=${LUSTRE_TESTS_CFG_DIR:-${LUSTRE}/tests/cfg}
  
@@ -48,14 +49,15 @@ usage() {
  
  print_summary () {
      trap 0
-    [ "$TESTSUITE" == "lfscktest" ] && return 0
+    [ "$TESTSUITE" == "lfsck" ] && return 0
      [ -n "$ONLY" ] && echo "WARNING: ONLY is set to $(echo $ONLY)"
      local details
      local form="%-13s %-17s %-9s %s %s\n"
      printf "$form" "status" "script" "Total(sec)" "E(xcluded) S(low)"
      echo "------------------------------------------------------------------------------------"
-    for O in $TESTSUITE_LIST; do
+    for O in $DEFAULT_SUITES; do
          [ "${!O}" = "no" ] && continue || true
+        O=$(echo $O  | tr "-" "_" | tr "[:lower:]" "[:upper:]")
          local o=$(echo $O | tr "[:upper:]" "[:lower:]")
          o=${o//_/-}
          local log=${TMP}/${o}.log
@@ -82,23 +84,25 @@ print_summary () {
          printf "$form" "-" "-" "-" "S=$(echo $slow)"
      done
  
-    for O in $TESTSUITE_LIST; do
+    for O in $DEFAULT_SUITES; do
+        O=$(echo $O  | tr "-" "_" | tr "[:lower:]" "[:upper:]")
          if [ "${!O}" = "no" ]; then
              # FIXME.
              # only for those tests suits which are run directly from acc-sm script:
              # bonnie, iozone, etc.
              if [ -f "$TESTSUITELOG" ] && grep FAIL $TESTSUITELOG | grep -q ' '$O  ; then
-               printf "$form" "UNFINISHED" "$O" ""  
+               printf "$form" "UNFINISHED" "$O" ""
              else
                 printf "$form" "Skipped" "$O" ""
              fi
          fi
      done
  
-    # print the detailed tests durations if DDETAILS=true
-    if $DDETAILS; then
-        echo "$details"
-    fi
+    for O in $DEFAULT_SUITES; do
+        O=$(echo $O  | tr "-" "_" | tr "[:lower:]" "[:upper:]")
+        [ "${!O}" = "done" -o "${!O}" = "no" ] || \
+            printf "$form" "UNFINISHED" "$O" ""
+    done
  }
  
  init_test_env() {
@@ -134,12 +138,16 @@ init_test_env() {
      #[ -d /r ] && export ROOT=${ROOT:-/r}
      export TMP=${TMP:-$ROOT/tmp}
      export TESTSUITELOG=${TMP}/${TESTSUITE}.log
+    if [[ -z $LOGDIRSET ]]; then
+        export LOGDIR=${LOGDIR:-${TMP}/test_logs/}/$(date +%s)
+        export LOGDIRSET=true
+    fi
      export HOSTNAME=${HOSTNAME:-`hostname`}
      if ! echo $PATH | grep -q $LUSTRE/utils; then
-       export PATH=$PATH:$LUSTRE/utils
+        export PATH=$PATH:$LUSTRE/utils
      fi
      if ! echo $PATH | grep -q $LUSTRE/test; then
-       export PATH=$PATH:$LUSTRE/tests
+        export PATH=$PATH:$LUSTRE/tests
      fi
      if ! echo $PATH | grep -q $LUSTRE/../lustre-iokit/sgpdd-survey; then
          export PATH=$PATH:$LUSTRE/../lustre-iokit/sgpdd-survey
@@ -154,7 +162,7 @@ init_test_env() {
      export MDSRATE=${MDSRATE:-"$LUSTRE/tests/mpi/mdsrate"}
      [ ! -f "$MDSRATE" ] && export MDSRATE=$(which mdsrate 2> /dev/null)
      if ! echo $PATH | grep -q $LUSTRE/tests/racer; then
-        export PATH=$PATH:$LUSTRE/tests/racer
+        export PATH=$LUSTRE/tests/racer:$PATH:
      fi
      if ! echo $PATH | grep -q $LUSTRE/tests/mpi; then
          export PATH=$PATH:$LUSTRE/tests/mpi
@@ -353,7 +361,7 @@ load_modules () {
      if $LOAD_MODULES_REMOTE ; then
          local list=$(comma_list $(remote_nodes_list))
          echo loading modules on $list
-        do_rpc_nodes $list load_modules 
+        do_rpc_nodes $list load_modules
      fi
  }
  
@@ -534,7 +542,7 @@ quota_save_version() {
          $LFS quotaoff -ug $MOUNT # just in case
          [ -n "$ver" ] && quota_set_version $ver
      else
-        echo mds running $lustre_version 
+        echo mds running $lustre_version
          [ -n "$ver" -a "$ver" != "3" ] && error "wrong quota version specifier"
      fi
  
@@ -682,7 +690,7 @@ fi"
  }
  
  sanity_mount_check_servers () {
-    [ "$CLIENTONLY" ] && 
+    [ "$CLIENTONLY" ] &&
          { echo "CLIENTONLY mode, skip mount_check_servers"; return 0; } || true
      echo Checking servers environments
  
@@ -1575,12 +1583,12 @@ do_node() {
  
      if [ "$myPDSH" = "rsh" ]; then
  # we need this because rsh does not return exit code of an executed command
-       local command_status="$TMP/cs"
-       rsh $HOST ":> $command_status"
-       rsh $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin;
-                   cd $RPWD; sh -c \"$@\") ||
-                   echo command failed >$command_status"
-       [ -n "$($myPDSH $HOST cat $command_status)" ] && return 1 || true
+        local command_status="$TMP/cs"
+        rsh $HOST ":> $command_status"
+        rsh $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin;
+                    cd $RPWD; sh -c \"$@\") ||
+                    echo command failed >$command_status"
+        [ -n "$($myPDSH $HOST cat $command_status)" ] && return 1 || true
          return 0
      fi
  
@@ -1616,7 +1624,7 @@ do_nodes() {
      local rnodes=$1
      shift
  
-    if $(single_local_node $rnodes); then
+    if single_local_node $rnodes; then
          if $verbose; then
             do_nodev $rnodes "$@"
          else
@@ -1714,7 +1722,7 @@ stopall() {
          rm -f $TMP/ost${num}active
      done
      if ! combined_mgs_mds ; then
-        stop mgs 
+        stop mgs
      fi
  
      return 0
@@ -1753,12 +1761,12 @@ mkfs_opts () {
      [[ $facet = mgs ]] && echo $opt && return
  
      # 1.
-    # --failnode options 
+    # --failnode options
      local var=${facet}failover_HOST
      if [ x"${!var}" != x ] && [ x"${!var}" != x$(facet_host $facet) ] ; then
          local failnode=$(h2$NETTYPE ${!var})
          failnode="--failnode=$failnode"
-        # options does not contain 
+        # options does not contain
          # or contains wrong --failnode=
          if [[ $opt != *${failnode}* ]]; then
              opt=$(echo $opt | sed 's/--failnode=.* / /')
@@ -1824,8 +1832,8 @@ mount_client() {
  
  remount_client()
  {
-       zconf_umount `hostname` $1 || error "umount failed"
-       zconf_mount `hostname` $1 || error "mount failed"
+        zconf_umount `hostname` $1 || error "umount failed"
+        zconf_mount `hostname` $1 || error "mount failed"
  }
  
  writeconf_facet () {
@@ -1894,7 +1902,7 @@ setupall() {
  }
  
  mounted_lustre_filesystems() {
-       awk '($3 ~ "lustre" && $1 ~ ":") { print $2 }' /proc/mounts
+        awk '($3 ~ "lustre" && $1 ~ ":") { print $2 }' /proc/mounts
  }
  
  init_facet_vars () {
@@ -1930,7 +1938,7 @@ init_facet_vars () {
      # get mount point of already mounted device
      # is facet_dev is already mounted then use the real
      #  mount point of this facet; otherwise use $(facet_mntpt $facet)
-    # i.e. ${facet}_MOUNT if specified by user or default 
+    # i.e. ${facet}_MOUNT if specified by user or default
      local mntpt=$(do_facet ${facet} cat /proc/mounts | \
              awk '"'${!dev}'" == $1 && $3 == "lustre" { print $2 }')
      if [ -z $mntpt ]; then
@@ -1981,7 +1989,7 @@ nfs_client_mode () {
          declare -a nfsexport=(`grep ' '$MOUNT' ' /proc/mounts | awk '{print $1}' | awk -F: '{print $1 " "  $2}'`)
          if [[ ${#nfsexport[@]} -eq 0 ]]; then
                  error_exit NFSCLIENT=$NFSCLIENT mode, but no NFS export found!
-        fi 
+        fi
          do_nodes ${nfsexport[0]} "echo \\\$(hostname); df -T  ${nfsexport[1]}"
          return
      fi
@@ -1999,7 +2007,7 @@ check_config_client () {
          # in theory someone could create a new,
          # client-only config file that assumed lustre was already
          # configured and didn't set the MGSNID. If MGSNID is not set,
-        # then we should use the mgs nid currently being used 
+        # then we should use the mgs nid currently being used
          # as the default value. bug 18021
          [[ x$MGSNID = x ]] &&
              MGSNID=${mgc//MGC/}
@@ -2109,7 +2117,7 @@ check_and_setup_lustre() {
                      restore_mount $MOUNT2
                      export I_MOUNTED2=yes
                  fi
-            fi 
+            fi
  
      # 5.
      # MOUNT is mounted MOUNT2 is not mounted
@@ -2145,7 +2153,7 @@ cleanup_mount () {
      local clients=${CLIENTS:-$HOSTNAME}
      local mntpt=$1
  
-    zconf_umount_clients $clients $mntpt    
+    zconf_umount_clients $clients $mntpt
  }
  
  cleanup_and_setup_lustre() {
@@ -2153,7 +2161,7 @@ cleanup_and_setup_lustre() {
          lctl set_param debug=0 || true
          cleanupall
          if [ "$ONLY" == "cleanup" ]; then
-           exit 0
+            exit 0
          fi
      fi
      check_and_setup_lustre
@@ -2219,7 +2227,7 @@ generate_db() {
      local dev
      local tmp_file
  
-    tmp_file=$(mktemp -p $SHARED_DIRECTORY || 
+    tmp_file=$(mktemp -p $SHARED_DIRECTORY ||
          error_exit "fail to create file in $SHARED_DIRECTORY")
  
      # make sure everything gets to the backing store
@@ -2299,7 +2307,6 @@ wait_for_function () {
      if [ "$1" = "--quiet" ]; then
          shift
          quiet=" > /dev/null 2>&1"
-        
      fi
  
      local fn=$1
@@ -2347,7 +2354,7 @@ comma_list() {
  list_member () {
      local list=$1
      local item=$2
-    echo $list | grep -qw $item  
+    echo $list | grep -qw $item
  }
  
  # list, excluded are the comma separated lists
@@ -2599,7 +2606,6 @@ debugrestore() {
  
  error_noexit() {
      local TYPE=${TYPE:-"FAIL"}
-    local ERRLOG
      local tmp=$TMP
      [ -d "$SHARED_DIR_LOGS" ] && tmp=$SHARED_DIR_LOGS
  
@@ -2612,17 +2618,14 @@ error_noexit() {
  
      log " ${TESTSUITE} ${TESTNAME}: @@@@@@ ${TYPE}: $@ "
  
+    # We need to dump the logs on all nodes
      if $dump; then
-        ERRLOG=$tmp/lustre_${TESTSUITE}_${TESTNAME}.$(date +%s)
-        [[ $cntlog -eq 0 ]] || ERRLOG=$ERRLOG.$cntlog
-        (( cntlog+=1 )) 
-        echo "Dumping lctl log to $ERRLOG"
-        # We need to dump the logs on all nodes
-        do_nodes $(comma_list $(nodes_list)) $NODE $LCTL dk $ERRLOG
+        gather_logs $(comma_list $(nodes_list))
      fi
+
      debugrestore
      [ "$TESTSUITELOG" ] && echo "$0: ${TYPE}: $TESTNAME $@" >> $TESTSUITELOG
-    TEST_FAILED=true
+    echo "$@" > $LOGDIR/err
  }
  
  exit_status () {
@@ -2684,7 +2687,7 @@ build_test_filter() {
      done
      for G in $GRANT_CHECK_LIST; do
          eval GCHECK_ONLY_${G}=true
-       done
+    done
  }
  
  basetest() {
@@ -2705,13 +2708,13 @@ run_test() {
          testname=ONLY_$1
          if [ ${!testname}x != x ]; then
              [ "$LAST_SKIPPED" ] && echo "" && LAST_SKIPPED=
-            run_one $1 "$2"
+            run_one_logged $1 "$2"
              return $?
          fi
          testname=ONLY_$base
          if [ ${!testname}x != x ]; then
              [ "$LAST_SKIPPED" ] && echo "" && LAST_SKIPPED=
-            run_one $1 "$2"
+            run_one_logged $1 "$2"
              return $?
          fi
          LAST_SKIPPED="y"
@@ -2744,7 +2747,7 @@ run_test() {
      fi
  
      LAST_SKIPPED=
-    run_one $1 "$2"
+    run_one_logged $1 "$2"
  
      return $?
  }
@@ -2790,9 +2793,13 @@ complete () {
  }
  
  pass() {
-    local status=PASS
-    $TEST_FAILED && status=FAIL
-    echo "$status $testnum $@" 2>&1 | tee -a $TESTSUITELOG
+    # Set TEST_STATUS here; will be used for logging the result
+    if [ -f $LOGDIR/err ]; then
+        TEST_STATUS="FAIL"
+    else
+        TEST_STATUS="PASS"
+    fi
+    echo $TEST_STATUS " " $@
  }
  
  check_mds() {
@@ -2812,28 +2819,48 @@ run_one() {
      message=$2
      tfile=f${testnum}
      export tdir=d0.${TESTSUITE}/d${base}
-
+    export TESTNAME=test_$testnum
      local SAVE_UMASK=`umask`
      umask 0022
  
-    local BEFORE=`date +%s`
      echo
-    log "== test $testnum: $message == `date +%H:%M:%S` ($BEFORE)"
-    export TESTNAME=test_$testnum
-    TEST_FAILED=false
-    cntlog=0
+    log "== test $testnum: $message == `date +%H:%M:%S`"
      test_${testnum} || error "test_$testnum failed with $?"
      cd $SAVE_PWD
      reset_fail_loc
-    check_grant ${testnum} || $TEST_FAILED || error "check_grant $testnum failed"
-    check_catastrophe || $TEST_FAILED || error "LBUG/LASSERT detected"
-    ps auxww | grep -v grep | grep -q multiop && ($TEST_FAILED || error "multiop still running")
-    pass "($((`date +%s` - $BEFORE))s)"
-    TEST_FAILED=false
-    cntlog=0
+    check_grant ${testnum} || error "check_grant $testnum failed with $?"
+    check_catastrophe || error "LBUG/LASSERT detected"
+    ps auxww | grep -v grep | grep -q multiop && error "multiop still running"
      unset TESTNAME
      unset tdir
      umask $SAVE_UMASK
+    return 0
+}
+
+run_one_logged() {
+    local BEFORE=`date +%s`
+    local TEST_ERROR
+    local name=${TESTSUITE}.test_${1}.test_log.$(hostname).log
+    local test_log=$LOGDIR/$name
+    rm -rf $LOGDIR/err
+
+    log_sub_test_begin test_${1}
+    (run_one $1 "$2") 2>&1 | tee $test_log
+    local RC=${PIPESTATUS[0]}
+
+    [ $RC -ne 0 ] && [ ! -f $LOGDIR/err ] && \
+        echo "test_$1 returned $RC" | tee $LOGDIR/err
+
+    duration=$((`date +%s` - $BEFORE))
+    pass "(${duration}s)"
+    [ -f $LOGDIR/err ] && TEST_ERROR=$(cat $LOGDIR/err)
+    log_sub_test_end $TEST_STATUS $duration "$RC" "$TEST_ERROR"
+
+    if [ -f $LOGDIR/err ]; then
+        $FAIL_ON_ERROR && exit $RC
+    fi
+
+    return 0
  }
  
  canonical_path() {
@@ -2906,6 +2933,13 @@ remote_mds_nodsh()
      remote_mds && [ "$PDSH" = "no_dsh" -o -z "$PDSH" -o -z "$mds_HOST" ]
  }
  
+require_dsh_mds()
+{
+        remote_mds_nodsh && echo "SKIP: $TESTSUITE: remote MDS with nodsh" && \
+            MSKIPPED=1 && return 1
+        return 0
+}
+
  remote_ost ()
  {
      local node
@@ -2917,10 +2951,17 @@ remote_ost ()
  
  remote_ost_nodsh()
  {
-    [ "$CLIENTONLY" ] && return 0 || true 
+    [ "$CLIENTONLY" ] && return 0 || true
      remote_ost && [ "$PDSH" = "no_dsh" -o -z "$PDSH" -o -z "$ost_HOST" ]
  }
  
+require_dsh_ost()
+{
+        remote_ost_nodsh && echo "SKIP: $TESTSUITE: remote OST with nodsh" && \
+            OSKIPPED=1 && return 1
+        return 0
+}
+
  remote_mgs_nodsh()
  {
      local MGS
@@ -3140,7 +3181,7 @@ do_and_time () {
  
      SECONDS=0
      eval '$cmd'
-    
+
      [ ${PIPESTATUS[0]} -eq 0 ] || rc=1
  
      echo $SECONDS
@@ -3210,19 +3251,19 @@ exit \\\$rc;"
  # $2 file
  # $3 $RUNAS
  get_stripe_info() {
-       local tmp_file
+        local tmp_file
  
-       stripe_size=0
-       stripe_count=0
-       stripe_index=0
-       tmp_file=$(mktemp)
+        stripe_size=0
+        stripe_count=0
+        stripe_index=0
+        tmp_file=$(mktemp)
  
-       do_facet $1 $3 lfs getstripe -v $2 > $tmp_file
+        do_facet $1 $3 lfs getstripe -v $2 > $tmp_file
  
-       stripe_size=`awk '$1 ~ /size/ {print $2}' $tmp_file`
-       stripe_count=`awk '$1 ~ /count/ {print $2}' $tmp_file`
-       stripe_index=`awk '$1 ~ /stripe_offset/ {print $2}' $tmp_file`
-       rm -f $tmp_file
+        stripe_size=`awk '$1 ~ /size/ {print $2}' $tmp_file`
+        stripe_count=`awk '$1 ~ /count/ {print $2}' $tmp_file`
+        stripe_index=`awk '$1 ~ /stripe_offset/ {print $2}' $tmp_file`
+        rm -f $tmp_file
  }
  
  mdsrate_cleanup () {
@@ -3341,7 +3382,7 @@ get_md_name () {
  
  ########################
  
-convert_facet2label() { 
+convert_facet2label() {
      local facet=$1
  
      if [ x$facet = xost ]; then
@@ -3352,7 +3393,7 @@ convert_facet2label() {
  
      if [ -n ${!varsvc} ]; then
          echo ${!varsvc}
-    else  
+    else
          error "No lablel for $facet!"
      fi
  }
@@ -3420,10 +3461,10 @@ wait_osc_import_state() {
      CONN_PROC="osc.${ost}.ost_server_uuid"
      CONN_STATE=$(do_facet $facet lctl get_param -n $CONN_PROC 2>/dev/null | cut -f2)
      while [ "${CONN_STATE}" != "${expected}" ]; do
-        if [ "${expected}" == "DISCONN" ]; then 
+        if [ "${expected}" == "DISCONN" ]; then
              # for disconn we can check after proc entry is removed
              [ "x${CONN_STATE}" == "x" ] && return 0
-            #  with AT enabled, we can have connect request timeout near of 
+            #  with AT enabled, we can have connect request timeout near of
              # reconnect timeout and test can't see real disconnect
              [ "${CONN_STATE}" == "CONNECTING" ] && return 0
          fi
@@ -3438,7 +3479,6 @@ wait_osc_import_state() {
      log "${ost_facet} now in ${CONN_STATE} state"
      return 0
  }
-
  get_clientmdc_proc_path() {
      echo "${1}-mdc-*"
  }
@@ -3447,7 +3487,8 @@ do_rpc_nodes () {
      local list=$1
      shift
  
-    local RPATH="PATH=$LUSTRE/tests/:$PATH"
+    # Add paths to lustre tests for 32 and 64 bit systems.
+    local RPATH="PATH=$RLUSTRE/tests:/usr/lib/lustre/tests:/usr/lib64/lustre/tests:$PATH"
      do_nodesv $list "${RPATH} NAME=${NAME} sh rpc.sh $@ "
  }
  
@@ -3545,27 +3586,30 @@ gather_logs () {
      # of writing the file to an NFS directory so it doesn't need to be copied.
      local tmp=$TMP
      local docp=true
-    [ -d "$SHARED_DIR_LOGS" ] && tmp=$SHARED_DIR_LOGS && docp=false
+    [ -f $LOGDIR/shared ] && docp=false
  
      # dump lustre logs, dmesg
-    do_nodes $list "log=$tmp/\\\$(hostname)-debug-$ts.log ;
-lctl dk \\\$log >/dev/null;
-log=$tmp/\\\$(hostname)-dmesg-$ts.log;
-dmesg > \\\$log; "
  
-    # FIXME: does it make sense to collect the logs for $ts only, but all
-    # TESTSUITE logs?
-    # rsync $TMP/*${TESTSUITE}* to gather the logs dumped by error fn
-    local logs=$TMP/'*'${TESTSUITE}'*'
-    if $docp; then
-        logs=$logs' '$tmp/'*'$ts'*'
+    prefix="$LOGDIR/${TESTSUITE}.${TESTNAME}"
+    suffix="$ts.log"
+    echo "Dumping lctl log to ${prefix}.*.${suffix}"
+
+    if [ "$CLIENTONLY" -o "$PDSH" == "no_dsh" ]; then
+        echo "Dumping logs only on local client."
+        $LCTL dk > ${prefix}.debug_log.$(hostname).${suffix}
+        dmesg > ${prefix}.dmesg.$(hostname).${suffix}
+        return
      fi
-    for node in ${list//,/ }; do
-        rsync -az $node:"$logs" $TMP
-    done
  
-    local archive=$TMP/${TESTSUITE}-$ts.tar.bz2
-    tar -jcf $archive $tmp/*$ts* $TMP/*${TESTSUITE}*
+    do_nodes --verbose $list \
+        "$LCTL dk > ${prefix}.debug_log.\\\$(hostname).${suffix};
+         dmesg > ${prefix}.dmesg.\\\$(hostname).${suffix}"
+    if [ ! -f $LOGDIR/shared ]; then
+        do_nodes $list rsync -az "${prefix}.*.${suffix}" $HOSTNAME:$LOGDIR
+      fi
+
+    local archive=$LOGDIR/${TESTSUITE}-$ts.tar.bz2
+    tar -jcf $archive $LOGDIR/*$ts* $LOGDIR/*${TESTSUITE}*
  
      echo $archive
  }
@@ -3610,11 +3654,11 @@ do_ls () {
  
  max_recovery_time () {
      local init_connect_timeout=$(( TIMEOUT / 20 ))
-    [[ $init_connect_timeout > 5 ]] || init_connect_timeout=5 
+    [[ $init_connect_timeout > 5 ]] || init_connect_timeout=5
  
      local service_time=$(( $(at_max_get client) + $(( 2 * $(( 25 + 1  + init_connect_timeout)) )) ))
  
-    echo $service_time 
+    echo $service_time
  }
  
  remove_mdt_files() {
@@ -3708,3 +3752,65 @@ min_ost_size () {
      $LCTL get_param -n osc.*.kbytesavail | sort -n | head -n1
  }
  
+check_logdir() {
+    local dir=$1
+    # Checking for shared logdir
+    if [ ! -d $dir ]; then
+        # Not found. Create local logdir
+        mkdir -p $dir
+    else
+        touch $dir/node.$(hostname).yml
+    fi
+    return 0
+}
+
+check_write_access() {
+    local dir=$1
+    for node in $(nodes_list); do
+        if [ ! -f "$dir/node.${node}.yml" ]; then
+            # Logdir not accessible/writable from this node.
+            return 1
+        fi
+    done
+    return 0
+}
+
+init_logging() {
+    if [[ -n $YAML_LOG ]]; then
+        return
+    fi
+    export YAML_LOG=${LOGDIR}/results.yml
+    mkdir -p $LOGDIR
+    init_clients_lists
+
+    do_rpc_nodes $(comma_list $(nodes_list)) check_logdir $LOGDIR
+    if check_write_access $LOGDIR; then
+        touch $LOGDIR/shared
+        echo "Logging to shared log directory: $LOGDIR"
+    else
+        echo "Logging to local directory: $LOGDIR"
+    fi
+
+    yml_nodes_file $LOGDIR >> $YAML_LOG
+    yml_results_file >> $YAML_LOG
+}
+
+log_test() {
+    yml_log_test $1 >> $YAML_LOG
+}
+
+log_sub_test() {
+    yml_log_sub_test $@ >> $YAML_LOG
+}
+
+log_test_status() {
+     yml_log_test_status $@ >> $YAML_LOG
+}
+
+log_sub_test_begin() {
+    yml_log_sub_test_begin $@ >> $YAML_LOG
+}
+
+log_sub_test_end() {
+    yml_log_sub_test_end $@ >> $YAML_LOG
+}
diff --git a/lustre/tests/test-groups/regression b/lustre/tests/test-groups/regression

new file mode 100644 (file)

index 0000000..1c79bc8
--- /dev/null
+++ b/lustre/tests/test-groups/regression
@@ -0,0 +1,20 @@
+sanity
+metadata-updates
+sanity-benchmark
+sanityn
+lfsck
+liblustre
+racer
+replay-single
+conf-sanity
+recovery-small
+replay-ost-single
+replay-dual
+replay-vbr
+insanity
+sanity-quota
+ost-pools
+lnet-selftest
+mmp
+obdfilter-survey
+sgpdd-survey
diff --git a/lustre/tests/test-groups/regression-mpi b/lustre/tests/test-groups/regression-mpi

new file mode 100644 (file)

index 0000000..fd44302
--- /dev/null
+++ b/lustre/tests/test-groups/regression-mpi
@@ -0,0 +1,3 @@
+performance-sanity
+large-scale
+parallel-scale
diff --git a/lustre/tests/yaml.sh b/lustre/tests/yaml.sh

new file mode 100644 (file)

index 0000000..f5803e2
--- /dev/null
+++ b/lustre/tests/yaml.sh
@@ -0,0 +1,191 @@
+#!/bin/bash
+# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4:
+
+#
+# Shell routines for logging results to a yaml file.
+#
+
+split_output() {
+    while read line; do
+        host=${line%%:*};
+        echo "$line" | sed "s/^${host}: //" | sed "s/^${host}://" \
+            >> $logdir/node.$host.yml;
+    done
+}
+
+yml_nodes_file() {
+    export logdir=$1
+
+    if [ -f $logdir/shared ]; then
+        do_rpc_nodes $(comma_list $(nodes_list)) \
+            "yml_node >> $logdir/node.\\\$(hostname).yml"
+    else
+        do_rpc_nodes $(comma_list $(nodes_list)) yml_node | split_output
+    fi
+    yml_entities
+}
+
+yml_results_file() {
+    export logdir=$1
+
+    #TestGroup
+    yml_test_group
+
+    # Tests
+    printf "Tests:\n"
+}
+
+# Called on the node for which we the info is needed.
+yml_node() {
+    local node=$(hostname)
+    logdir=$1
+
+    printf "Build:\n"
+    yml_build_info
+    printf "\n"
+
+    printf "Node:\n"
+    yml_node_info
+    printf "\n"
+
+    printf "LustreEntities:\n"
+}
+
+yml_test_group() {
+    TEST_GROUP=${TEST_GROUP:-"acc-sm-$(hostname)"}
+    TEST_HOST=${TEST_HOST:-$(hostname)}
+    TEST_USER=${TEST_USER:-$USER}
+
+    # TestGroup information
+    cat <<EOF
+TestGroup:
+    test_group: $TEST_GROUP
+    testhost: $TEST_HOST
+    submission: $(date)
+    user_name: $TEST_USER
+
+EOF
+}
+
+release() {
+   if [ -r /etc/lsb-release ]; then
+      dist=$(grep 'DISTRIB_ID' /etc/lsb-release | sed 's/DISTRIB_ID=//' | head -1)
+   elif [ -r /etc/redhat-release ]; then
+       dist=$(awk '/release/ { printf("%s %s %s", $1, $2, $3)}' /etc/redhat-release)
+   elif [ -r /etc/*-release ]; then
+       dist=$(find /etc/ -maxdepth 1 -name '*release' 2> /dev/null | \
+           sed -e 's/\/etc\///' -e 's/-release//' | head -1)
+   else
+       dist="UNKNOWN"
+   fi
+
+   echo $dist
+}
+
+yml_build_info() {
+    TEST_DISTRO=$(release)
+    LUSTRE_VERSION=$(lctl lustre_build_version | awk '/Lustre version:/ {print $3}')
+    LUSTRE_BUILD=$(sed 's/-.*//' <<<$LUSTRE_VERSION)
+
+cat <<EOF
+    lbats_build_id: $LBATS_ID
+    lbats_build_name: $LBATS_NAME
+    architecture: $(uname -m)
+    os: $(uname -o)
+    os_distribution: $TEST_DISTRO
+    lustre_version: $LUSTRE_VERSION
+    lustre_build: $LUSTRE_BUILD
+    kernel_version: $(uname -r)
+EOF
+}
+
+yml_node_info()
+{
+    mem=$(awk '/MemTotal:/ {print $2 " " $3}' /proc/meminfo)
+cat <<EOF
+    node_name: $(hostname)
+    mem_size: $mem
+    architecture: $(uname -m)
+    networks:
+EOF
+    for nw in $(lctl list_nids | grep -v @lo | cut -f 2 -d '@' | uniq); do
+        printf "        - $nw\n"
+    done
+}
+
+yml_entity() {
+    cat<<EOF
+-
+    node_type: $1
+    node_name: $2
+EOF
+}
+
+yml_entities() {
+    local host
+    for num in $(seq $MDSCOUNT); do
+        host=$(facet_active_host mds$num)
+        yml_entity "MDS $num" $host >> $logdir/node.$host.yml
+    done
+
+    for num in $(seq $OSTCOUNT); do
+        host=$(facet_active_host ost$num)
+        yml_entity "OST $num" $host >> $logdir/node.$host.yml
+    done
+
+    i=1
+    for host in ${CLIENTS//,/ }; do
+        yml_entity "Client $i" $host >> $logdir/node.$host.yml
+        i=$((i+1))
+    done
+}
+
+yml_log_test() {
+    if [ $1 != "FINISHED" ]; then
+        cat <<EOF
+-
+        name: $1
+        description: $TESTSUITE $1
+        submission: $(date)
+        report_version: 2
+        SubTests:
+EOF
+    fi
+}
+
+yml_log_test_status() {
+    cat <<EOF
+        duration: $1
+        status: $2
+EOF
+}
+
+yml_log_sub_test_begin() {
+    cat <<EOF
+        -
+            name: $1
+EOF
+}
+
+yml_log_sub_test_end() {
+    cat <<EOF
+            status: $1
+            duration: $2
+            return_code: $3
+EOF
+    shift 3
+    if [ -z "$*" ]; then
+        printf '            error:\n'
+    else
+        printf '            error: "%q"\n' "$*"
+    fi
+}
+
+yml_log_sub_test_log() {
+    cat <<EOF
+        -
+            name: $1
+            type: $2
+            location: $3
+EOF
+}
author	root <root@murdoch.sodor>
	Tue, 21 Dec 2010 14:00:06 +0000 (14:00 +0000)
committer	Johann Lombardi <johann@whamcloud.com>
	Wed, 20 Apr 2011 13:36:09 +0000 (06:36 -0700)
lustre/tests/Makefile.am		patch \| blob \| history
lustre/tests/acceptance-small.sh		patch \| blob \| history
lustre/tests/auster.sh	[new file with mode: 0755]	patch \| blob
lustre/tests/conf-sanity.sh		patch \| blob \| history
lustre/tests/insanity.sh		patch \| blob \| history
lustre/tests/large-scale.sh		patch \| blob \| history
lustre/tests/lfsck.sh		patch \| blob \| history
lustre/tests/liblustre.sh		patch \| blob \| history
lustre/tests/lnet-selftest.sh		patch \| blob \| history
lustre/tests/maloo_upload.sh	[new file with mode: 0755]	patch \| blob
lustre/tests/metadata-updates.sh		patch \| blob \| history
lustre/tests/mmp.sh		patch \| blob \| history
lustre/tests/obdfilter-survey.sh		patch \| blob \| history
lustre/tests/ost-pools.sh		patch \| blob \| history
lustre/tests/parallel-scale.sh		patch \| blob \| history
lustre/tests/performance-sanity.sh		patch \| blob \| history
lustre/tests/racer.sh		patch \| blob \| history
lustre/tests/recovery-double-scale.sh		patch \| blob \| history
lustre/tests/recovery-mds-scale.sh		patch \| blob \| history
lustre/tests/recovery-random-scale.sh		patch \| blob \| history
lustre/tests/recovery-small.sh		patch \| blob \| history
lustre/tests/replay-dual.sh		patch \| blob \| history
lustre/tests/replay-ost-single.sh		patch \| blob \| history
lustre/tests/replay-single.sh		patch \| blob \| history
lustre/tests/replay-vbr.sh		patch \| blob \| history
lustre/tests/rpc.sh		patch \| blob \| history
lustre/tests/runtests		patch \| blob \| history
lustre/tests/sanity-benchmark.sh		patch \| blob \| history
lustre/tests/sanity-quota.sh		patch \| blob \| history
lustre/tests/sanity.sh		patch \| blob \| history
lustre/tests/sanityn.sh		patch \| blob \| history
lustre/tests/sgpdd-survey.sh		patch \| blob \| history
lustre/tests/test-framework.sh		patch \| blob \| history
lustre/tests/test-groups/regression	[new file with mode: 0644]	patch \| blob
lustre/tests/test-groups/regression-mpi	[new file with mode: 0644]	patch \| blob
lustre/tests/yaml.sh	[new file with mode: 0644]	patch \| blob