From: Minh Diep Date: Mon, 7 Nov 2011 18:11:38 +0000 (-0800) Subject: LU-780 test: improve parallel-scale to support hyperion run X-Git-Tag: v1_8_7_81_WC1~37 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=446c186a0611280e89c9671ae6cd8500933b32ef;p=fs%2Flustre-release.git LU-780 test: improve parallel-scale to support hyperion run We need to add support for srun/slurm, and a few tests from hyperion-sanity script that has been used for hyperion testing Signed-off-by: Minh Diep Change-Id: I30d5c9c84fed6f6533ec766e6278a9c86046e4a9 Reviewed-on: http://review.whamcloud.com/1662 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Yu Jian Reviewed-by: Cliff White Reviewed-by: Johann Lombardi --- diff --git a/lustre/tests/cfg/ncli.sh b/lustre/tests/cfg/ncli.sh index 2249dd1..ca0b7bd 100644 --- a/lustre/tests/cfg/ncli.sh +++ b/lustre/tests/cfg/ncli.sh @@ -21,3 +21,9 @@ for i in $LOADS; do error "incorrect load: $i" done CLIENT_LOADS=($LOADS) + +# This is used when testing on SLURM environment. +# Test will use srun when SRUN_PARTITION is set +SRUN=${SRUN:-$(which srun 2>/dev/null || true)} +SRUN_PARTITION=${SRUN_PARTITION:-""} +SRUN_OPTIONS=${SRUN_OPTIONS:-"-W 1800 -l -O"} diff --git a/lustre/tests/parallel-scale.sh b/lustre/tests/parallel-scale.sh index 365684db..86a3b0d 100644 --- a/lustre/tests/parallel-scale.sh +++ b/lustre/tests/parallel-scale.sh @@ -11,12 +11,19 @@ init_logging # bug number: ALWAYS_EXCEPT="$PARALLEL_SCALE_EXCEPT" +# common setup +# +MACHINEFILE=${MACHINEFILE:-$TMP/$(basename $0 .sh).machines} +clients=${CLIENTS:-$HOSTNAME} +generate_machine_file $clients $MACHINEFILE || return $? +num_clients=$(get_node_count ${clients//,/ }) + # # compilbench # cbench_DIR=${cbench_DIR:-""} cbench_IDIRS=${cbench_IDIRS:-10} -cbench_RUNS=${cbench_RUNS:-10} # FIXME: wiki page requirements is 30, do we really need 30 ? +cbench_RUNS=${cbench_RUNS:-10} # FIXME: wiki page requirements is 30, do we really need 30 ? if [ "$SLOW" = "no" ]; then cbench_IDIRS=2 @@ -29,7 +36,6 @@ fi METABENCH=${METABENCH:-$(which metabench 2> /dev/null || true)} mbench_NFILES=${mbench_NFILES:-30400} [ "$SLOW" = "no" ] && mbench_NFILES=10000 -MACHINEFILE=${MACHINEFILE:-$TMP/$(basename $0 .sh).machines} # threads per client mbench_THREADS=${mbench_THREADS:-4} @@ -43,6 +49,27 @@ simul_REP=${simul_REP:-20} [ "$SLOW" = "no" ] && simul_REP=2 # +# mib +# +MIB=${MIB:=$(which mib 2> /dev/null || true)} +# threads per client +mib_THREADS=${mib_THREADS:-2} +mib_xferSize=${mib_xferSize:-1m} +mib_xferLimit=${mib_xferLimit:-5000} +mib_timeLimit=${mib_timeLimit:-300} + +# +# MDTEST +# +MDTEST=${MDTEST:=$(which mdtest 2> /dev/null || true)} +# threads per client +mdtest_THREADS=${mdtest_THREADS:-2} +mdtest_nFiles=${mdtest_nFiles:-"100000"} +# We devide the files by number of core +mdtest_nFiles=$((mdtest_nFiles/mdtest_THREADS/num_clients)) +mdtest_iteration=${mdtest_iteration:-1} + +# # connectathon # cnt_DIR=${cnt_DIR:-""} @@ -64,7 +91,10 @@ casc_REP=${casc_REP:-300} IOR=${IOR:-$(which IOR 2> /dev/null || true)} # threads per client ior_THREADS=${ior_THREADS:-2} +ior_iteration=${ior_iteration:-1} ior_blockSize=${ior_blockSize:-6} # Gb +ior_xferSize=${ior_xferSize:-2m} +ior_type=${ior_type:-POSIX} ior_DURATION=${ior_DURATION:-30} # minutes [ "$SLOW" = "no" ] && ior_DURATION=5 @@ -159,16 +189,9 @@ test_metabench() { [ x$METABENCH = x ] && { skip_env "metabench not found" && return; } - local clients=$CLIENTS - [ -z $clients ] && clients=$(hostname) - - num_clients=$(get_node_count ${clients//,/ }) - # FIXME # Need space estimation here. - generate_machine_file $clients $MACHINEFILE || return $? - print_opts METABENCH clients mbench_NFILES mbench_THREADS local testdir=$DIR/d0.metabench @@ -182,7 +205,15 @@ test_metabench() { # -k Cleanup. Remove the test directories. local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S -k" echo "+ $cmd" - mpi_run -np $((num_clients * $mbench_THREADS)) -machinefile ${MACHINEFILE} $cmd + + # find out if we need to use srun by checking $SRUN_PARTITION + if [ "$SRUN_PARTITION" ]; then + $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \ + -n $((num_clients * mbench_THREADS)) -p $SRUN_PARTITION -- $cmd + else + mpi_run -np $((num_clients * $mbench_THREADS)) -machinefile ${MACHINEFILE} $cmd + fi + local rc=$? if [ $rc != 0 ] ; then error "metabench failed! $rc" @@ -200,16 +231,9 @@ test_simul() { [ x$SIMUL = x ] && { skip_env "simul not found" && return; } - local clients=$CLIENTS - [ -z $clients ] && clients=$(hostname) - - local num_clients=$(get_node_count ${clients//,/ }) - # FIXME # Need space estimation here. - generate_machine_file $clients $MACHINEFILE || return $? - print_opts SIMUL clients simul_REP simul_THREADS local testdir=$DIR/d0.simul @@ -223,7 +247,14 @@ test_simul() { local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP" echo "+ $cmd" - mpi_run -np $((num_clients * $simul_THREADS)) -machinefile ${MACHINEFILE} $cmd + + # find out if we need to use srun by checking $SRUN_PARTITION + if [ "$SRUN_PARTITION" ]; then + $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \ + -n $((num_clients * simul_THREADS)) -p $SRUN_PARTITION -- $cmd + else + mpi_run -np $((num_clients * $simul_THREADS)) -machinefile ${MACHINEFILE} $cmd + fi local rc=$? if [ $rc != 0 ] ; then @@ -233,6 +264,61 @@ test_simul() { } run_test simul "simul" +test_mdtest() { + local type=${1:-"ssf"} + + if [ "$NFSCLIENT" ]; then + skip "skipped for NFSCLIENT mode" + return + fi + + [ x$MDTEST = x ] && + { skip_env "mdtest not found" && return; } + + # FIXME + # Need space estimation here. + + print_opts MDTEST mdtest_iteration mdtest_THREADS mdtest_nFiles + + local testdir=$DIR/d0.mdtest + mkdir -p $testdir + # mpi_run uses mpiuser + chmod 0777 $testdir + + # -i # : repeat each test # times + # -d : test dir + # -n # : number of file/dir to create/stat/remove + # -u : each process create/stat/remove individually + + local cmd="$MDTEST -d $testdir -i $mdtest_iteration -n $mdtest_nFiles" + [ $type = "fpp" ] && cmd="$cmd -u" + + echo "+ $cmd" + # find out if we need to use srun by checking $SRUN_PARTITION + if [ "$SRUN_PARTITION" ]; then + $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \ + -n $((num_clients * mdtest_THREADS)) -p $SRUN_PARTITION -- $cmd + else + mpi_run -np $((num_clients * mdtest_THREADS)) -machinefile ${MACHINEFILE} $cmd + fi + + local rc=$? + if [ $rc != 0 ] ; then + error "mdtest failed! $rc" + fi + rm -rf $testdir +} + +test_mdtestssf() { + test_mdtest "ssf" +} +run_test mdtestssf "mdtestssf" + +test_mdtestfpp() { + test_mdtest "fpp" +} +run_test mdtestfpp "mdtestfpp" + test_connectathon() { print_opts cnt_DIR cnt_NRUN @@ -270,17 +356,17 @@ test_connectathon() { local fstype=$(df -TP $testdir | awk 'NR==2 {print $2}') echo "$testdir: $fstype" if [[ $fstype != "nfs4" ]]; then - tests="$tests -l" + tests="$tests -l" fi echo "tests: $tests" for test in $tests; do - local cmd="./runtests -N $cnt_NRUN $test -f $testdir" - local rc=0 + local cmd="./runtests -N $cnt_NRUN $test -f $testdir" + local rc=0 - log "$cmd" - eval $cmd - rc=$? - [ $rc = 0 ] || error "connectathon failed: $rc" + log "$cmd" + eval $cmd + rc=$? + [ $rc = 0 ] || error "connectathon failed: $rc" done cd $savePWD @@ -289,14 +375,11 @@ test_connectathon() { run_test connectathon "connectathon" test_ior() { + local type=${1:="ssf"} + [ x$IOR = x ] && { skip_env "IOR not found" && return; } - local clients=$CLIENTS - [ -z $clients ] && clients=$(hostname) - - local num_clients=$(get_node_count ${clients//,/ }) - local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }') echo "+ $ior_blockSize * 1024 * 1024 * $num_clients * $ior_THREADS " if [ $((space / 2)) -le $(( ior_blockSize * 1024 * 1024 * num_clients * ior_THREADS)) ]; then @@ -308,8 +391,6 @@ test_ior() { echo "free space=$space, Need: $num_clients x $ior_THREADS x $ior_blockSize Gb (blockSize reduced to $ior_blockSize Gb)" fi - generate_machine_file $clients $MACHINEFILE || return $? - print_opts IOR ior_THREADS ior_DURATION MACHINEFILE local testdir=$DIR/d0.ior @@ -332,9 +413,18 @@ test_ior() { # -T maxTimeDuration -- max time in minutes to run tests" # -k keepFile -- keep testFile(s) on program exit local cmd="$IOR -a POSIX -b ${ior_blockSize}g -o $testdir/iorData -t 2m -v -w -r -T $ior_DURATION -k" + local cmd="$IOR -a $ior_type -b ${ior_blockSize}g -o $testdir/iorData -t $ior_xferSize -v -w -r -i $ior_iteration -T $ior_DURATION -k" + [ $type = "fpp" ] && cmd="$cmd -F" echo "+ $cmd" - mpi_run -np $((num_clients * $ior_THREADS)) -machinefile ${MACHINEFILE} $cmd + + # find out if we need to use srun by checking $SRUN_PARTITION + if [ "$SRUN_PARTITION" ]; then + $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \ + -n $((num_clients * ior_THREADS)) -p $SRUN_PARTITION -- $cmd + else + mpi_run -np $((num_clients * $ior_THREADS)) -machinefile ${MACHINEFILE} $cmd + fi local rc=$? if [ $rc != 0 ] ; then @@ -342,7 +432,59 @@ test_ior() { fi rm -rf $testdir } -run_test ior "ior" + +test_iorssf() { + test_ior "ssf" +} +run_test iorssf "iorssf" + +test_iorfpp() { + test_ior "fpp" +} +run_test iorfpp "iorfpp" + +test_mib() { + if [ "$NFSCLIENT" ]; then + skip "skipped for NFSCLIENT mode" + return + fi + + [ x$MIB = x ] && + { skip_env "MIB not found" && return; } + + print_opts MIB mib_THREADS mib_xferSize mib_xferLimit mib_timeLimit MACHINEFILE + + local testdir=$DIR/d0.mib + mkdir -p $testdir + # mpi_run uses mpiuser + chmod 0777 $testdir + $LFS setstripe $testdir -c -1 || + { error "setstripe failed" && return 2; } + # + # -I Show intermediate values in output + # -H Show headers in output + # -L Do not issue new system calls after this many seconds + # -s Use system calls of this size + # -t test dir + # -l Issue no more than this many system calls + local cmd="$MIB -t $testdir -s $mib_xferSize -l $mib_xferLimit -L $mib_timeLimit -HI -p mib.$(date +%Y%m%d%H%M%S)" + + echo "+ $cmd" + # find out if we need to use srun by checking $SRUN_PARTITION + if [ "$SRUN_PARTITION" ]; then + $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \ + -n $((num_clients * mib_THREADS)) -p $SRUN_PARTITION -- $cmd + else + mpi_run -np $((num_clients * mib_THREADS)) -machinefile ${MACHINEFILE} $cmd + fi + + local rc=$? + if [ $rc != 0 ] ; then + error "mib failed! $rc" + fi + rm -rf $testdir +} +run_test mib "mib" test_cascading_rw() { if [ "$NFSCLIENT" ]; then @@ -353,16 +495,9 @@ test_cascading_rw() { [ x$CASC_RW = x ] && { skip_env "cascading_rw not found" && return; } - local clients=$CLIENTS - [ -z $clients ] && clients=$(hostname) - - num_clients=$(get_node_count ${clients//,/ }) - # FIXME # Need space estimation here. - generate_machine_file $clients $MACHINEFILE || return $? - print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE local testdir=$DIR/d0.cascading_rw @@ -398,16 +533,9 @@ test_write_append_truncate() { return fi - local clients=$CLIENTS - [ -z $clients ] && clients=$(hostname) - - local num_clients=$(get_node_count ${clients//,/ }) - # FIXME # Need space estimation here. - generate_machine_file $clients $MACHINEFILE || return $? - local testdir=$DIR/d0.write_append_truncate local file=$testdir/f0.wat @@ -440,16 +568,9 @@ test_write_disjoint() { [ x$WRITE_DISJOINT = x ] && { skip_env "write_disjoint not found" && return; } - local clients=$CLIENTS - [ -z $clients ] && clients=$(hostname) - - local num_clients=$(get_node_count ${clients//,/ }) - # FIXME # Need space estimation here. - generate_machine_file $clients $MACHINEFILE || return $? - print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP MACHINEFILE local testdir=$DIR/d0.write_disjoint mkdir -p $testdir @@ -478,13 +599,6 @@ test_parallel_grouplock() { [ x$PARALLEL_GROUPLOCK = x ] && { skip "PARALLEL_GROUPLOCK not found" && return; } - local clients=$CLIENTS - [ -z $clients ] && clients=$(hostname) - - local num_clients=$(get_node_count ${clients//,/ }) - - generate_machine_file $clients $MACHINEFILE || return $? - print_opts clients parallel_grouplock_MINTASKS MACHINEFILE local testdir=$DIR/d0.parallel_grouplock @@ -544,13 +658,6 @@ test_statahead () { [ x$MDSRATE = x ] && { skip_env "mdsrate not found" && return; } - local clients=$CLIENTS - [ -z $clients ] && clients=$(hostname) - - local num_clients=$(get_node_count ${clients//,/ }) - - generate_machine_file $clients $MACHINEFILE || return $? - print_opts MDSRATE clients statahead_NUMMNTPTS statahead_NUMFILES # create large dir