X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftests%2Fparallel-scale.sh;h=af4ad5d1bc2fecf1253be2bae6cf306f7c98356c;hb=a838fe8b43a8e3afb35d4854745b4876726f82c5;hp=246fb5e7e33ac4cb7af8dd01f5aba371b2c4006a;hpb=93b17568e18eb51bd6060869ffb022d225d67af9;p=fs%2Flustre-release.git diff --git a/lustre/tests/parallel-scale.sh b/lustre/tests/parallel-scale.sh index 246fb5e..af4ad5d 100644 --- a/lustre/tests/parallel-scale.sh +++ b/lustre/tests/parallel-scale.sh @@ -2,431 +2,142 @@ # #set -vx -set -e - LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} . $LUSTRE/tests/test-framework.sh init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} +init_logging -# -# compilbench -# -# Boulder cluster compilebench default location -cbench_DIR=${cbench_DIR:-/testsuite/tests/$(arch)/compilebench} -cbench_IDIRS=${cbench_IDIRS:-10} -cbench_RUNS=${cbench_RUNS:-10} # FIXME: wiki page requirements is 30, do we really need 30 ? +# bug 20670 +ALWAYS_EXCEPT="parallel_grouplock $PARALLEL_SCALE_EXCEPT" + +if [ $(facet_fstype $SINGLEMDS) = zfs -o $(facet_fstype "ost1") = zfs ]; then + ZFSSLOW=$SLOW + SLOW=no + + cbench_IDIRS=${cbench_IDIRS:-1} + cbench_RUNS=${cbench_RUNS:-1} + mdtest_nFiles=${mdtest_nFiles:-"10000"} + statahead_NUMFILES=${statahead_NUMFILES:-100000} +fi + +# common setup +MACHINEFILE=${MACHINEFILE:-$TMP/$(basename $0 .sh).machines} +clients=${CLIENTS:-$HOSTNAME} +generate_machine_file $clients $MACHINEFILE || + error "Failed to generate machine file" +num_clients=$(get_node_count ${clients//,/ }) + +# compilbench if [ "$SLOW" = "no" ]; then - cbench_IDIRS=2 - cbench_RUNS=2 + cbench_IDIRS=${cbench_IDIRS:-2} + cbench_RUNS=${cbench_RUNS:-2} fi -# # metabench -# -# Boulder cluster metabench default location -METABENCH=${METABENCH:-/testsuite/tests/$(arch)/METABENCH/src/metabench} -mbench_NFILES=${mbench_NFILES:-30400} -[ "$SLOW" = "no" ] && mbench_NFILES=10000 -MACHINEFILE=${MACHINEFILE:-$TMP/$(basename $0 .sh).machines} -# threads per client -mbench_THREADS=${mbench_THREADS:-4} +[ "$SLOW" = "no" ] && mbench_NFILES=${mbench_NFILES:-10000} -# # simul -# -# Boulder cluster default location -SIMUL=${SIMUL:-/testsuite/tests/$(arch)/simul/simul} -# threads per client -simul_THREADS=${simul_THREADS:-2} -simul_REP=${simul_REP:-20} -[ "$SLOW" = "no" ] && simul_REP=2 +[ "$SLOW" = "no" ] && simul_REP=${simul_REP:-2} -# # connectathon -# -# Boulder cluster default location -cnt_DIR=${cnt_DIR:-/testsuite/tests/$(arch)/connectathon} -cnt_NRUN=${cnt_NRUN:-10} -[ "$SLOW" = "no" ] && cnt_NRUN=2 +[ "$SLOW" = "no" ] && cnt_NRUN=${cnt_NRUN:-2} -# # cascading rw -# -# Boulder cluster default location -CASC_RW=${CASC_RW:-/testsuite/tests/$(arch)/parallel/cascading_rw} -# threads per client -casc_THREADS=${casc_THREADS:-2} -casc_REP=${casc_REP:-300} -[ "$SLOW" = "no" ] && casc_REP=10 +[ "$SLOW" = "no" ] && casc_REP=${casc_REP:-10} -# # IOR -# -# Boulder cluster default location -IOR=${IOR:-/testsuite/tests/$(arch)/IOR/src/C/IOR} -# threads per client -ior_THREADS=${ior_THREADS:-2} -ior_blockSize=${ior_blockSize:-6} # Gb -ior_DURATION=${ior_DURATION:-30} # minutes -[ "$SLOW" = "no" ] && ior_DURATION=5 +[ "$SLOW" = "no" ] && ior_DURATION=${ior_DURATION:-5} -# # write_append_truncate -# -# threads per client -write_THREADS=${write_THREADS:-8} -write_REP=${write_REP:-10000} -[ "$SLOW" = "no" ] && write_REP=100 +[ "$SLOW" = "no" ] && write_REP=${write_REP:-100} -# # write_disjoint -# -# Boulder cluster default location -WRITE_DISJOINT=${WRITE_DISJOINT:-/testsuite/tests/x86_64/lustre/lustre/tests/write_disjoint} -# threads per client -wdisjoint_THREADS=${wdisjoint_THREADS:-4} -wdisjoint_REP=${wdisjoint_REP:-10000} -[ "$SLOW" = "no" ] && wdisjoint_REP=100 +[ "$SLOW" = "no" ] && wdisjoint_REP=${wdisjoint_REP:-100} + +. $LUSTRE/tests/functions.sh build_test_filter check_and_setup_lustre -print_opts () { - local var - - echo OPTIONS: - - for i in $@; do - var=$i - echo "${var}=${!var}" - done - [ -e $MACHINEFILE ] && cat $MACHINEFILE -} - -# Takes: -# 5 min * cbench_RUNS -# SLOW=no 10 mins -# SLOW=yes 50 mins -# Space estimation: -# compile dir kernel-1 680MB -# required space 680MB * cbench_IDIRS = ~7 Gb +get_mpiuser_id $MPI_USER +MPI_RUNAS=${MPI_RUNAS:-"runas -u $MPI_USER_UID -g $MPI_USER_GID"} +$GSS_KRB5 && refresh_krb5_tgt $MPI_USER_UID $MPI_USER_GID $MPI_RUNAS test_compilebench() { - print_opts cbench_DIR cbench_IDIRS cbench_RUNS - - [ -d $cbench_DIR ] || \ - { skip "No compilebench found" && return; } - - [ -e $cbench_DIR/compilebench ] || \ - { skip "No compilebench build" && return; } - - local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }') - if [ $space -le $((680 * 1024 * cbench_IDIRS)) ]; then - cbench_IDIRS=$(( space / 680 / 1024)) - [ $cbench_IDIRS = 0 ] && \ - skip "Need free space atleast 680 Mb, have $space" && return - - log free space=$space, reducing initial dirs to $cbench_IDIRS - fi - # FIXME: - # t-f _base needs to be modifyed to set properly tdir - # for new "test_foo" functions names - # local testdir=$DIR/$tdir - local testdir=$DIR/d0.compilebench - mkdir -p $testdir - - local savePWD=$PWD - cd $cbench_DIR - local cmd="./compilebench -D $testdir -i $cbench_IDIRS -r $cbench_RUNS --makej" - - log "$cmd" - - local rc=0 - eval $cmd - rc=$? - - cd $savePWD - [ $rc = 0 ] || error "compilebench failed: $rc" - rm -rf $testdir + run_compilebench } run_test compilebench "compilebench" test_metabench() { - [ -e $METABENCH ] || \ - { skip "metabench not found" && return; } - - local clients=$CLIENTS - [ -z $clients ] && clients=$(hostname) - - num_clients=$(get_node_count ${clients//,/ }) - - # FIXME - # Need space estimation here. - - generate_machine_file $clients $MACHINEFILE || \ - error "can not generate machinefile $MACHINEFILE" - - print_opts METABENCH clients mbench_NFILES mbench_THREADS - - local testdir=$DIR/d0.metabench - mkdir -p $testdir - # mpi_run uses mpiuser - chmod 0777 $testdir - - # -C Run the file creation tests. - # -S Run the file stat tests. - # -c nfile Number of files to be used in each test. - # -k Cleanup. Remove the test directories. - local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S -k" - echo "+ $cmd" - mpi_run -np $((num_clients * $mbench_THREADS)) -machinefile ${MACHINEFILE} $cmd - local rc=$? - if [ $rc != 0 ] ; then - error "metabench failed! $rc" - fi - rm -rf $testdir + run_metabench } run_test metabench "metabench" test_simul() { - [ -e $SIMUL ] || \ - { skip "simul not found" && return; } - - local clients=$CLIENTS - [ -z $clients ] && clients=$(hostname) - - local num_clients=$(get_node_count ${clients//,/ }) - - # FIXME - # Need space estimation here. - - generate_machine_file $clients $MACHINEFILE || \ - error "can not generate machinefile $MACHINEFILE" - - print_opts SIMUL clients simul_REP simul_THREADS - - local testdir=$DIR/d0.simul - mkdir -p $testdir - # mpi_run uses mpiuser - chmod 0777 $testdir - - # -n # : repeat each test # times - # -N # : repeat the entire set of tests # times - - local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP" - - echo "+ $cmd" - mpi_run -np $((num_clients * $simul_THREADS)) -machinefile ${MACHINEFILE} $cmd - - local rc=$? - if [ $rc != 0 ] ; then - error "simul failed! $rc" - fi - rm -rf $testdir + run_simul } run_test simul "simul" -test_connectathon() { - print_opts cnt_DIR cnt_NRUN - - [ -d $cnt_DIR ] || \ - { skip "No connectathon dir found" && return; } - - [ -e $cnt_DIR/runtests ] || \ - { skip "No connectathon runtests found" && return; } - - local testdir=$DIR/d0.connectathon - mkdir -p $testdir - - local savePWD=$PWD - cd $cnt_DIR - - # -f a quick functionality test - # -a run basic, general, special, and lock tests - # -N numpasses - will be passed to the runtests script. This argument - # is optional. It specifies the number of times to run - # through the tests. - - local cmd="./runtests -N $cnt_NRUN -a -f $testdir" - - log "$cmd" +test_mdtestssf() { + run_mdtest "ssf" +} +run_test mdtestssf "mdtestssf" - local rc=0 - eval $cmd - rc=$? +test_mdtestfpp() { + run_mdtest "fpp" +} +run_test mdtestfpp "mdtestfpp" - cd $savePWD - [ $rc = 0 ] || error "connectathon failed: $rc" - rm -rf $testdir +test_connectathon() { + run_connectathon } run_test connectathon "connectathon" -test_ior() { - [ -e $IOR ] || \ - { skip "IOR not found" && return; } - - local clients=$CLIENTS - [ -z $clients ] && clients=$(hostname) - - local num_clients=$(get_node_count ${clients//,/ }) - - local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }') - echo "+ $ior_blockSize * 1024 * 1024 * $num_clients * $ior_THREADS " - if [ $((space / 2)) -le $(( ior_blockSize * 1024 * 1024 * num_clients * ior_THREADS)) ]; then - echo "+ $space * 9/10 / 1024 / 1024 / $num_clients / $ior_THREADS" - ior_blockSize=$(( space /2 /1024 /1024 / num_clients / ior_THREADS )) - [ $ior_blockSize = 0 ] && \ - skip "Need free space more than ($num_clients * $ior_THREADS )Gb: $((num_clients*ior_THREADS *1024 *1024*2)), have $space" && return - - echo "free space=$space, Need: $num_clients x $ior_THREADS x $ior_blockSize Gb (blockSize reduced to $ior_blockSize Gb)" - fi - - generate_machine_file $clients $MACHINEFILE || \ - error "can not generate machinefile $MACHINEFILE" - - print_opts IOR ior_THREADS ior_DURATION MACHINEFILE - - local testdir=$DIR/d0.ior - mkdir -p $testdir - # mpi_run uses mpiuser - chmod 0777 $testdir - - # - # -b N blockSize -- contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)" - # -o S testFileName - # -t N transferSize -- size of transfer in bytes (e.g.: 8, 4k, 2m, 1g)" - # -w writeFile -- write file" - # -r readFile -- read existing file" - # -T maxTimeDuration -- max time in minutes to run tests" - # -k keepFile -- keep testFile(s) on program exit - local cmd="$IOR -a POSIX -b ${ior_blockSize}g -o $testdir/iorData -t 2m -v -w -r -T $ior_DURATION -k" - - echo "+ $cmd" - mpi_run -np $((num_clients * $ior_THREADS)) -machinefile ${MACHINEFILE} $cmd - - local rc=$? - if [ $rc != 0 ] ; then - error "ior failed! $rc" - fi - rm -rf $testdir +test_iorssf() { + run_ior "ssf" } -run_test ior "ior" - -test_cascading_rw() { - [ -e $CASC_RW ] || \ - { skip "cascading_rw not found" && return; } +run_test iorssf "iorssf" - local clients=$CLIENTS - [ -z $clients ] && clients=$(hostname) - - num_clients=$(get_node_count ${clients//,/ }) - - # FIXME - # Need space estimation here. - - generate_machine_file $clients $MACHINEFILE || \ - error "can not generate machinefile $MACHINEFILE" - - print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE - - local testdir=$DIR/d0.cascading_rw - mkdir -p $testdir - # mpi_run uses mpiuser - chmod 0777 $testdir - - # -g: debug mode - # -n: repeat test # times - - local cmd="$CASC_RW -g -d $testdir -n $casc_REP" +test_iorfpp() { + run_ior "fpp" +} +run_test iorfpp "iorfpp" - echo "+ $cmd" - mpi_run -np $((num_clients * $casc_THREADS)) -machinefile ${MACHINEFILE} $cmd +test_mib() { + run_mib +} +run_test mib "mib" - local rc=$? - if [ $rc != 0 ] ; then - error "cascading_rw failed! $rc" - fi - rm -rf $testdir +test_cascading_rw() { + run_cascading_rw } run_test cascading_rw "cascading_rw" test_write_append_truncate() { - # location is lustre/tests dir - if ! which write_append_truncate > /dev/null 2>&1 ; then - skip "write_append_truncate not found" - return - fi - - local clients=$CLIENTS - [ -z $clients ] && clients=$(hostname) - - local num_clients=$(get_node_count ${clients//,/ }) - - # FIXME - # Need space estimation here. - - generate_machine_file $clients $MACHINEFILE || \ - error "can not generate machinefile $MACHINEFILE" - - local testdir=$DIR/d0.write_append_truncate - local file=$testdir/f0.wat - - print_opts clients write_REP write_THREADS MACHINEFILE - - mkdir -p $testdir - # mpi_run uses mpiuser - chmod 0777 $testdir - - local cmd="write_append_truncate -n $write_REP $file" - - echo "+ $cmd" - mpi_run -np $((num_clients * $write_THREADS)) -machinefile ${MACHINEFILE} $cmd - - local rc=$? - if [ $rc != 0 ] ; then - error "write_append_truncate failed! $rc" - return $rc - fi - rm -rf $testdir + run_write_append_truncate } run_test write_append_truncate "write_append_truncate" test_write_disjoint() { - [ -e $WRITE_DISJOINT ] || \ - { skip "write_disjoint not found" && return; } - - local clients=$CLIENTS - [ -z $clients ] && clients=$(hostname) - - local num_clients=$(get_node_count ${clients//,/ }) - - # FIXME - # Need space estimation here. - - generate_machine_file $clients $MACHINEFILE || \ - error "can not generate machinefile $MACHINEFILE" - - print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP MACHINEFILE - local testdir=$DIR/d0.write_disjoint - mkdir -p $testdir - # mpi_run uses mpiuser - chmod 0777 $testdir - - local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP" + run_write_disjoint +} +run_test write_disjoint "write_disjoint" - echo "+ $cmd" - mpi_run -np $((num_clients * $wdisjoint_THREADS)) -machinefile ${MACHINEFILE} $cmd +test_parallel_grouplock() { + run_parallel_grouplock +} +run_test parallel_grouplock "parallel_grouplock" - local rc=$? - if [ $rc != 0 ] ; then - error "write_disjoint failed! $rc" - fi - rm -rf $testdir +test_statahead () { + run_statahead } -run_test write_disjoint "write_disjoint" +run_test statahead "statahead test, multiple clients" + +[ $(facet_fstype $SINGLEMDS) = zfs -o $(facet_fstype "ost1") = zfs ] && + SLOW=$ZFSSLOW -equals_msg `basename $0`: test complete, cleaning up +complete $SECONDS check_and_cleanup_lustre -[ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG || true +exit_status