X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftests%2Fparallel-scale.sh;h=840a8b8cdc20b217c0f6aad7f4d06689cd536b2b;hb=7955e2c62e7c97c2e56e1bfc8d7598f2e80a4e52;hp=a7613e0d5843f005faa267b21389f5429288609c;hpb=548bd0e654ffd296e9df5b4f5af41d558a6fcefd;p=fs%2Flustre-release.git

diff --git a/lustre/tests/parallel-scale.sh b/lustre/tests/parallel-scale.sh
index a7613e0..840a8b8 100644
--- a/lustre/tests/parallel-scale.sh
+++ b/lustre/tests/parallel-scale.sh
@@ -6,90 +6,66 @@ LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
 . $LUSTRE/tests/test-framework.sh
 init_test_env $@
 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
 
-#              bug 20670 
-ALWAYS_EXCEPT="parallel_grouplock  $PARALLEL_SCALE_EXCEPT"
+# bug number for skipped test:  LU-9429
+     ALWAYS_EXCEPT="            parallel_grouplock  $PARALLEL_SCALE_EXCEPT "
 
-#
-# compilbench
-#
-cbench_DIR=${cbench_DIR:-""}
-cbench_IDIRS=${cbench_IDIRS:-4}
-cbench_RUNS=${cbench_RUNS:-4}	# FIXME: wiki page requirements is 30, do we really need 30 ?
+if [ $(facet_fstype $SINGLEMDS) = zfs -o $(facet_fstype "ost1") = zfs ]; then
+	ZFSSLOW=$SLOW
+	SLOW=no
+
+	cbench_IDIRS=${cbench_IDIRS:-1}
+	cbench_RUNS=${cbench_RUNS:-1}
+
+	mdtest_nFiles=${mdtest_nFiles:-"10000"}
+	statahead_NUMFILES=${statahead_NUMFILES:-100000}
+fi
 
+# common setup
+MACHINEFILE=${MACHINEFILE:-$TMP/$(basename $0 .sh).machines}
+clients=${CLIENTS:-$HOSTNAME}
+generate_machine_file $clients $MACHINEFILE ||
+    error "Failed to generate machine file"
+num_clients=$(get_node_count ${clients//,/ })
+
+# compilbench
 if [ "$SLOW" = "no" ]; then
-    cbench_IDIRS=2
-    cbench_RUNS=2
+	cbench_IDIRS=${cbench_IDIRS:-2}
+	cbench_RUNS=${cbench_RUNS:-2}
 fi
 
-#
 # metabench
-#
-METABENCH=${METABENCH:-$(which metabench 2> /dev/null || true)}
-mbench_NFILES=${mbench_NFILES:-30400}
-[ "$SLOW" = "no" ] && mbench_NFILES=10000
-MACHINEFILE=${MACHINEFILE:-$TMP/$(basename $0 .sh).machines}
-# threads per client
-mbench_THREADS=${mbench_THREADS:-4}
+[ "$SLOW" = "no" ] && mbench_NFILES=${mbench_NFILES:-10000}
 
-#
 # simul
-#
-SIMUL=${SIMUL:=$(which simul 2> /dev/null || true)}
-# threads per client
-simul_THREADS=${simul_THREADS:-2}
-simul_REP=${simul_REP:-20}
-[ "$SLOW" = "no" ] && simul_REP=2
+[ "$SLOW" = "no" ] && simul_REP=${simul_REP:-2}
 
-#
 # connectathon
-#
-cnt_DIR=${cnt_DIR:-""}
-cnt_NRUN=${cnt_NRUN:-10}
-[ "$SLOW" = "no" ] && cnt_NRUN=2
+[ "$SLOW" = "no" ] && cnt_NRUN=${cnt_NRUN:-2}
 
-#
 # cascading rw
-#
-CASC_RW=${CASC_RW:-$(which cascading_rw 2> /dev/null || true)}
-# threads per client
-casc_THREADS=${casc_THREADS:-2}
-casc_REP=${casc_REP:-300}
-[ "$SLOW" = "no" ] && casc_REP=10
+[ "$SLOW" = "no" ] && casc_REP=${casc_REP:-10}
 
-#
 # IOR
-#
-IOR=${IOR:-$(which IOR 2> /dev/null || true)}
-# threads per client
-ior_THREADS=${ior_THREADS:-2}
-ior_blockSize=${ior_blockSize:-6}	# Gb
-ior_DURATION=${ior_DURATION:-30}	# minutes
-[ "$SLOW" = "no" ] && ior_DURATION=5
+[ "$SLOW" = "no" ] && ior_DURATION=${ior_DURATION:-5}
 
-#
 # write_append_truncate
-#
-# threads per client
-write_THREADS=${write_THREADS:-8}
-write_REP=${write_REP:-10000}
-[ "$SLOW" = "no" ] && write_REP=100
+[ "$SLOW" = "no" ] && write_REP=${write_REP:-100}
 
-#
 # write_disjoint
-#
-WRITE_DISJOINT=${WRITE_DISJOINT:-$(which write_disjoint 2> /dev/null || true)}
-# threads per client
-wdisjoint_THREADS=${wdisjoint_THREADS:-4}
-wdisjoint_REP=${wdisjoint_REP:-10000}
-[ "$SLOW" = "no" ] && wdisjoint_REP=100
+[ "$SLOW" = "no" ] && wdisjoint_REP=${wdisjoint_REP:-100}
 
-#
-# parallel_grouplock
-#
-#
-PARALLEL_GROUPLOCK=${PARALLEL_GROUPLOCK:-$(which parallel_grouplock 2> /dev/null || true)}
-parallel_grouplock_MINTASKS=${parallel_grouplock_MINTASKS:-5}
+# fs_test
+if [ "$SLOW" = "no" ]; then
+	fs_test_ndirs=${fs_test_ndirs:-10000}
+	fs_test_nobj=${fs_test_nobj:-2}
+fi
+
+# xdd
+[ "$SLOW" = "no" ] && xdd_passes=${xdd_passes:-15}
+
+. $LUSTRE/tests/functions.sh
 
 build_test_filter
 check_and_setup_lustre
@@ -98,387 +74,116 @@ get_mpiuser_id $MPI_USER
 MPI_RUNAS=${MPI_RUNAS:-"runas -u $MPI_USER_UID -g $MPI_USER_GID"}
 $GSS_KRB5 && refresh_krb5_tgt $MPI_USER_UID $MPI_USER_GID $MPI_RUNAS
 
-print_opts () {
-    local var
-
-    echo OPTIONS:
-
-    for i in $@; do
-        var=$i
-        echo "${var}=${!var}"
-    done
-    [ -e $MACHINEFILE ] && cat $MACHINEFILE
-}
-
-# Takes:
-# 5 min * cbench_RUNS
-#        SLOW=no     10 mins
-#        SLOW=yes    50 mins
-# Space estimation:
-#        compile dir kernel-1 680MB
-#        required space       680MB * cbench_IDIRS = ~7 Gb
-
 test_compilebench() {
-    print_opts cbench_DIR cbench_IDIRS cbench_RUNS
-
-    [ x$cbench_DIR = x ] &&
-        { skip_env "compilebench not found" && return; }
-
-    [ -e $cbench_DIR/compilebench ] || \
-        { skip_env "No compilebench build" && return; }
-
-    local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
-    if [ $space -le $((680 * 1024 * cbench_IDIRS)) ]; then
-        cbench_IDIRS=$(( space / 680 / 1024))
-        [ $cbench_IDIRS = 0 ] && \
-            skip_env "Need free space atleast 680 Mb, have $space" && return
-
-        log free space=$space, reducing initial dirs to $cbench_IDIRS
-    fi
-    # FIXME:
-    # t-f _base needs to be modifyed to set properly tdir
-    # for new "test_foo" functions names
-    # local testdir=$DIR/$tdir
-    local testdir=$DIR/d0.compilebench
-    mkdir -p $testdir
-
-    local savePWD=$PWD
-    cd $cbench_DIR 
-    local cmd="./compilebench -D $testdir -i $cbench_IDIRS -r $cbench_RUNS --makej"
-
-    log "$cmd"
-
-    local rc=0
-    eval $cmd
-    rc=$?
-        
-    cd $savePWD
-    [ $rc = 0 ] || error "compilebench failed: $rc"
-    rm -rf $testdir
+    run_compilebench
 }
 run_test compilebench "compilebench"
 
 test_metabench() {
-    [ x$METABENCH = x ] &&
-        { skip_env "metabench not found" && return; }
-
-    local clients=$CLIENTS
-    [ -z $clients ] && clients=$(hostname)
-
-    num_clients=$(get_node_count ${clients//,/ })
-
-    # FIXME
-    # Need space estimation here.
-
-    generate_machine_file $clients $MACHINEFILE || \
-        error "can not generate machinefile $MACHINEFILE"
-
-    print_opts METABENCH clients mbench_NFILES mbench_THREADS
-
-    local testdir=$DIR/d0.metabench
-    mkdir -p $testdir
-    # mpi_run uses mpiuser
-    chmod 0777 $testdir
-
-    # -C             Run the file creation tests.
-    # -S             Run the file stat tests.
-    # -c nfile       Number of files to be used in each test.
-    # -k             Cleanup.  Remove the test directories.
-    local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S -k"
-    echo "+ $cmd"
-    mpi_run -np $((num_clients * $mbench_THREADS)) -machinefile ${MACHINEFILE} $cmd
-    local rc=$?
-    if [ $rc != 0 ] ; then
-        error "metabench failed! $rc"
-    fi
-    rm -rf $testdir
+    run_metabench
 }
 run_test metabench "metabench"
 
 test_simul() {
-    [ x$SIMUL = x ] &&
-        { skip_env "simul not found" && return; }
-
-    local clients=$CLIENTS
-    [ -z $clients ] && clients=$(hostname)
-
-    local num_clients=$(get_node_count ${clients//,/ })
-
-    # FIXME
-    # Need space estimation here.
-
-    generate_machine_file $clients $MACHINEFILE || \
-        error "can not generate machinefile $MACHINEFILE"
-
-    print_opts SIMUL clients simul_REP simul_THREADS
-
-    local testdir=$DIR/d0.simul
-    mkdir -p $testdir
-    # mpi_run uses mpiuser
-    chmod 0777 $testdir
-
-    # -n # : repeat each test # times
-    # -N # : repeat the entire set of tests # times
-
-    local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP"
-
-    echo "+ $cmd"
-    mpi_run -np $((num_clients * $simul_THREADS)) -machinefile ${MACHINEFILE} $cmd
-
-    local rc=$?
-    if [ $rc != 0 ] ; then
-        error "simul failed! $rc"
-    fi
-    rm -rf $testdir
+    run_simul
 }
 run_test simul "simul"
 
-test_connectathon() {
-    print_opts cnt_DIR cnt_NRUN
-
-    [ x$cnt_DIR = x ] &&
-        { skip_env "connectathon dir not found" && return; }
-
-    [ -e $cnt_DIR/runtests ] || \
-        { skip_env "No connectathon runtests found" && return; }
-
-    local testdir=$DIR/d0.connectathon
-    mkdir -p $testdir
-
-    local savePWD=$PWD
-    cd $cnt_DIR
-
-    # -f      a quick functionality test
-    # -a      run basic, general, special, and lock tests
-    # -N numpasses - will be passed to the runtests script.  This argument
-    #         is optional.  It specifies the number of times to run
-    #         through the tests.
-
-    local cmd="./runtests -N $cnt_NRUN -a -f $testdir"
-
-    log "$cmd"
+test_mdtestssf() {
+    run_mdtest "ssf"
+}
+run_test mdtestssf "mdtestssf"
 
-    local rc=0
-    eval $cmd
-    rc=$?
+test_mdtestfpp() {
+    run_mdtest "fpp"
+}
+run_test mdtestfpp "mdtestfpp"
 
-    cd $savePWD
-    [ $rc = 0 ] || error "connectathon failed: $rc"
-    rm -rf $testdir
+test_connectathon() {
+    run_connectathon
 }
 run_test connectathon "connectathon"
 
-test_ior() {
-    [ x$IOR = x ] &&
-        { skip_env "IOR not found" && return; }
-
-    local clients=$CLIENTS
-    [ -z $clients ] && clients=$(hostname)
-
-    local num_clients=$(get_node_count ${clients//,/ })
-
-    local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
-    echo "+ $ior_blockSize * 1024 * 1024 * $num_clients * $ior_THREADS "
-    if [ $((space / 2)) -le $(( ior_blockSize * 1024 * 1024 * num_clients * ior_THREADS)) ]; then
-        echo "+ $space * 9/10 / 1024 / 1024 / $num_clients / $ior_THREADS"
-        ior_blockSize=$(( space /2 /1024 /1024 / num_clients / ior_THREADS ))
-        [ $ior_blockSize = 0 ] && \
-            skip_env "Need free space more than ($num_clients * $ior_THREADS )Gb: $((num_clients*ior_THREADS *1024 *1024*2)), have $space" && return
-
-        echo "free space=$space, Need: $num_clients x $ior_THREADS x $ior_blockSize Gb (blockSize reduced to $ior_blockSize Gb)"
-    fi
- 
-    generate_machine_file $clients $MACHINEFILE || \
-        error "can not generate machinefile $MACHINEFILE"
-
-    print_opts IOR ior_THREADS ior_DURATION MACHINEFILE
-
-    local testdir=$DIR/d0.ior
-    mkdir -p $testdir
-    # mpi_run uses mpiuser
-    chmod 0777 $testdir
-    $LFS setstripe $testdir -c -1
-
-    # 
-    # -b N  blockSize -- contiguous bytes to write per task  (e.g.: 8, 4k, 2m, 1g)"
-    # -o S  testFileName
-    # -t N  transferSize -- size of transfer in bytes (e.g.: 8, 4k, 2m, 1g)"
-    # -w    writeFile -- write file"
-    # -r    readFile -- read existing file"
-    # -T    maxTimeDuration -- max time in minutes to run tests"
-    # -k    keepFile -- keep testFile(s) on program exit
-    local cmd="$IOR -a POSIX -b ${ior_blockSize}g -o $testdir/iorData -t 2m -v -w -r -T $ior_DURATION -k"
-
-    echo "+ $cmd"
-    mpi_run -np $((num_clients * $ior_THREADS)) -machinefile ${MACHINEFILE} $cmd
-
-    local rc=$?
-    if [ $rc != 0 ] ; then
-        error "ior failed! $rc"
-    fi
-    rm -rf $testdir
+test_iorssf() {
+    run_ior "ssf"
 }
-run_test ior "ior"
- 
-test_cascading_rw() {
-    [ x$CASC_RW = x ] &&
-        { skip_env "cascading_rw not found" && return; }
-
-    local clients=$CLIENTS
-    [ -z $clients ] && clients=$(hostname)
+run_test iorssf "iorssf"
 
-    num_clients=$(get_node_count ${clients//,/ })
-
-    # FIXME
-    # Need space estimation here.
-
-    generate_machine_file $clients $MACHINEFILE || \
-        error "can not generate machinefile $MACHINEFILE"
-
-    print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE
-
-    local testdir=$DIR/d0.cascading_rw
-    mkdir -p $testdir
-    # mpi_run uses mpiuser
-    chmod 0777 $testdir
+test_iorfpp() {
+    run_ior "fpp"
+}
+run_test iorfpp "iorfpp"
 
-    # -g: debug mode 
-    # -n: repeat test # times
+test_ior_mdtest_parallel_ssf() {
+	ior_mdtest_parallel "ssf"
+}
+run_test ior_mdtest_parallel_ssf "iormdtestssf"
 
-    local cmd="$CASC_RW -g -d $testdir -n $casc_REP"
+test_ior_mdtest_parallel_fpp() {
+	ior_mdtest_parallel "fpp"
+}
+run_test ior_mdtest_parallel_fpp "iormdtestfpp"
 
-    echo "+ $cmd"
-    mpi_run -np $((num_clients * $casc_THREADS)) -machinefile ${MACHINEFILE} $cmd
+test_mib() {
+    run_mib
+}
+run_test mib "mib"
 
-    local rc=$?
-    if [ $rc != 0 ] ; then
-        error "cascading_rw failed! $rc"
-    fi
-    rm -rf $testdir
+test_cascading_rw() {
+    run_cascading_rw
 }
 run_test cascading_rw "cascading_rw"
 
 test_write_append_truncate() {
-    # location is lustre/tests dir 
-    if ! which write_append_truncate > /dev/null 2>&1 ; then
-        skip_env "write_append_truncate not found"
-        return
-    fi
-
-    local clients=$CLIENTS
-    [ -z $clients ] && clients=$(hostname)
-
-    local num_clients=$(get_node_count ${clients//,/ })
-
-    # FIXME
-    # Need space estimation here.
-
-    generate_machine_file $clients $MACHINEFILE || \
-        error "can not generate machinefile $MACHINEFILE"
-
-    local testdir=$DIR/d0.write_append_truncate
-    local file=$testdir/f0.wat
-
-    print_opts clients write_REP write_THREADS MACHINEFILE
-
-    mkdir -p $testdir
-    # mpi_run uses mpiuser
-    chmod 0777 $testdir
-
-    local cmd="write_append_truncate -n $write_REP $file"
-
-    echo "+ $cmd"
-    mpi_run -np $((num_clients * $write_THREADS)) -machinefile ${MACHINEFILE} $cmd
-
-    local rc=$?
-    if [ $rc != 0 ] ; then
-        error "write_append_truncate failed! $rc"
-        return $rc
-    fi
-    rm -rf $testdir
+    run_write_append_truncate
 }
 run_test write_append_truncate "write_append_truncate"
 
+# Argument is chunk size limit, the upper bound on write size
 test_write_disjoint() {
-    [ x$WRITE_DISJOINT = x ] &&
-        { skip_env "write_disjoint not found" && return; }
-
-    local clients=$CLIENTS
-    [ -z $clients ] && clients=$(hostname)
-
-    local num_clients=$(get_node_count ${clients//,/ })
+    run_write_disjoint 123456
+}
+run_test write_disjoint "write_disjoint"
 
-    # FIXME
-    # Need space estimation here.
+# Make sure to exercise the tiny write code
+test_write_disjoint() {
+    run_write_disjoint 16384
+}
+run_test write_disjoint "write_disjoint_tiny"
 
-    generate_machine_file $clients $MACHINEFILE || \
-        error "can not generate machinefile $MACHINEFILE"
+test_parallel_grouplock() {
+    run_parallel_grouplock
+}
+run_test parallel_grouplock "parallel_grouplock"
 
-    print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP MACHINEFILE
-    local testdir=$DIR/d0.write_disjoint
-    mkdir -p $testdir
-    # mpi_run uses mpiuser
-    chmod 0777 $testdir
+test_statahead () {
+    run_statahead
+}
+run_test statahead "statahead test, multiple clients"
 
-    local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP"
+test_rr_alloc () {
+	run_rr_alloc
+}
+run_test rr_alloc "Checking even file distribution over OSTs in RR policy"
 
-    echo "+ $cmd"
-    mpi_run -np $((num_clients * $wdisjoint_THREADS)) -machinefile ${MACHINEFILE} $cmd
+test_fs_test () {
+	run_fs_test
+}
+run_test fs_test "fs_test"
 
-    local rc=$?
-    if [ $rc != 0 ] ; then
-        error "write_disjoint failed! $rc"
-    fi
-    rm -rf $testdir
+test_fio () {
+	run_fio
 }
-run_test write_disjoint "write_disjoint"
+run_test fio "fio"
 
-test_parallel_grouplock() {
-    [ x$PARALLEL_GROUPLOCK = x ] &&
-        { skip "PARALLEL_GROUPLOCK not found" && return; }
-
-    local clients=$CLIENTS
-    [ -z $clients ] && clients=$(hostname)
-
-    local num_clients=$(get_node_count ${clients//,/ })
-
-    generate_machine_file $clients $MACHINEFILE || \
-        error "can not generate machinefile $MACHINEFILE"
-
-    print_opts clients parallel_grouplock_MINTASKS MACHINEFILE
-
-    local testdir=$DIR/d0.parallel_grouplock
-    mkdir -p $testdir
-    # mpi_run uses mpiuser
-    chmod 0777 $testdir
-
-    do_nodes $clients "lctl set_param llite.*.max_rw_chunk=0" ||
-        error "set_param max_rw_chunk=0 failed "
-
-    local cmd
-    local status=0
-    local subtest
-    for i in $(seq 12); do
-        subtest="-t $i"
-        local cmd="$PARALLEL_GROUPLOCK -g -v -d $testdir $subtest"
-        echo "+ $cmd"
-
-        mpi_run -np $parallel_grouplock_MINTASKS -machinefile ${MACHINEFILE} $cmd
-        local rc=$?
-        if [ $rc != 0 ] ; then
-            error_noexit "parallel_grouplock subtests $subtest failed! $rc"
-        else
-            echo "parallel_grouplock subtests $subtest PASS"
-        fi
-        let status=$((status + rc))
-        # clear debug to collect one log per one test
-        do_nodes $(comma_list $(nodes_list)) lctl clear
-     done
-    [ $status -eq 0 ] || error "parallel_grouplock status: $status"
-    rm -rf $testdir
+test_xdd () {
+	run_xdd
 }
-run_test parallel_grouplock "parallel_grouplock"
+run_test xdd "xdd"
+
+[ $(facet_fstype $SINGLEMDS) = zfs -o $(facet_fstype "ost1") = zfs ] &&
+	SLOW=$ZFSSLOW
 
-equals_msg `basename $0`: test complete, cleaning up
+complete $SECONDS
 check_and_cleanup_lustre
-[ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true
+exit_status