From 1add4b9c0b22077d6fba02751eb03d709791f682 Mon Sep 17 00:00:00 2001 From: eeb Date: Wed, 29 Sep 2004 13:48:26 +0000 Subject: [PATCH] * Cleaned up sgpdd-survey * Added a README for sgpdd-survey --- lustre-iokit/sgpdd-survey/README | 64 +++++++++++++++++ lustre-iokit/sgpdd-survey/sgpdd-survey | 122 +++++++++++++++++++++++++++++++++ 2 files changed, 186 insertions(+) create mode 100644 lustre-iokit/sgpdd-survey/README create mode 100755 lustre-iokit/sgpdd-survey/sgpdd-survey diff --git a/lustre-iokit/sgpdd-survey/README b/lustre-iokit/sgpdd-survey/README new file mode 100644 index 0000000..a6bea39 --- /dev/null +++ b/lustre-iokit/sgpdd-survey/README @@ -0,0 +1,64 @@ + +Requirements +------------ + +. sg3_utils (for sgp_dd) +. SCSI device + + +Overview +-------- + +This survey may be used to characterise the performance of a SCSI device. +It simulates an OST serving multiple stripe files. The data gathered by it +can help set expectations for the performance of a lustre OST exporting the +device. + +The script uses sgp_dd to do raw sequential disk I/O. It runs with +variable numbers of sgp_dd threads to show how performance varies with +different request queue depths. + +The script spawns variable numbers of sgp_dd instances, each reading or +writing a separate area of the disk to show how performance varies with the +number of concurrent stripe files. + + +Running +------- + +The script must be customised according to the particular device under test +and where it should keep its working files. Customisation variables are +described clearly at the start of the script. + +When the script runs, it creates a number of working files and a pair of +result files. All files start with the prefix given by ${rslt}. + +${rslt}_.summary same as stdout +${rslt}__* tmp files +${rslt}_.detail collected tmp files for post-mortem + +The summary file and stdout contain lines like... + +total_size 8388608K rsz 1024 thr 1 crg 1 180.45 MB/s 1 x 180.50 = 180.50 MB/s + +The first number is the bandwidth computed by measuring total data and +elapsed time. The other numbers are a check on the bandwidths reported by +the individual sgp_dd instances. + +If there are so many threads that sgp_dd is unlikely to be able to allocate +I/O buffers, "ENOMEM" is printed. + +If not all the sgp_dd instances successfully reported a bandwidth number +"failed" is printed. + + +Visualising Results +------------------- + +I've found it most useful to import the summary data (it's fixed width) +into Excel (or any graphing package) and graph bandwidth v. # threads for +varying numbers of concurrent regions. This shows how the device performs +with varying queue depth. If the series (varying numbers of concurrent +regions) all seem to land on top of each other, it shows the device is +phased by seeks at the given record size. + diff --git a/lustre-iokit/sgpdd-survey/sgpdd-survey b/lustre-iokit/sgpdd-survey/sgpdd-survey new file mode 100755 index 0000000..85a80b9 --- /dev/null +++ b/lustre-iokit/sgpdd-survey/sgpdd-survey @@ -0,0 +1,122 @@ +#!/bin/bash + +###################################################################### +# customize per survey + +# the SG device to measure +dev=/dev/sg6 + +# result file prefix +# NB ensure the path exists if it includes subdirs +rslt=/tmp/sg_dd_rslt + +# what to do (read or write) +action=write + +# total size (MBytes) +# NB bigger than device cache is good +size=8192 + +# record size (KBytes) +rszlo=1024 +rszhi=1024 + +# Concurrent regions +crglo=1 +crghi=4 + +# total numbers of threads to share between concurrent regions +# NB survey skips over #thr < #regions +thrlo=1 +thrhi=1024 + +##################################################################### + +# disk block size (Bytes) +bs=512 + +if [ $action = read ]; then + f1="if=$dev" + f2="of=/dev/null" + skip=skip +else + f1="if=/dev/zero" + f2="of=$dev" + skip=seek +fi + +start=`date +%F@%R` +rsltf=${rslt}_${start}.summary +echo -n > $rsltf +workf=${rslt}_${start}.detail +echo -n > $workf + +print_summary () { + if [ "$1" = "-n" ]; then + minusn=$1; shift + else + minusn="" + fi + echo $minusn "$*" >> $rsltf + echo $minusn "$*" +} + +for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do + for ((crg=$crglo;crg<=$crghi;crg*=2)); do + for ((thr=$thrlo;thr<=$thrhi;thr*=2)); do + if ((thr < crg)); then + continue + fi + # compute parameters + bpt=$((rsz*1024/bs)) + blocks=$((size*((1024*1024)/bs)/crg)) + count=$blocks + # show computed parameters + actual_rsz=$((bpt*bs/1024)) + actual_size=$((bs*count*crg/1024)) + str=`printf 'total_size %8dK rsz %4d thr %5d crg %3d ' \ + $actual_size $actual_rsz $thr $crg` + echo "==============> $str" >> $workf + print_summary -n "$str" + freemem=`awk < /proc/meminfo '/^MemTotal:/ {printf "%d\n", $2}'` + if (((actual_rsz * thr /crg + 64) * crg > freemem)); then + print_summary "ENOMEM" + continue + fi + # start test + t0=`date +%s.%N` + for ((i=0;i ${rslt}_tmp${i} \ + $f1 $f2 ${skip}=$((1024+i*blocks)) \ + thr=$((thr/crg)) count=$count bs=$bs bpt=$bpt time=1& + done + wait + t1=`date +%s.%N` + # collect all results in 1 file + rfile=${rslt}_thr${thr}_crg${crg}_rsz${rsz} + echo > $rfile + ok=0 + for ((i=0;i /dev/null 2>&1; then + ok=$((ok + 1)) + fi + cat ${rslt}_tmp${i} >> $rfile + cat ${rslt}_tmp${i} >> $workf + rm ${rslt}_tmp${i} + done + if [ $ok -ne $crg ]; then + print_summary `printf "failed %d" $((crg - ok))` + else + # compute MB/sec from elapsed + bw=`awk "BEGIN {printf \"%6.2f MB/s\", $actual_size / (( $t1 - $t0 ) * 1024); exit}"` + # compute MB/sec from nregions*slowest + check=`awk < $rfile \ + '/time to transfer data/ {mb=$8/1.048576; if (n == 0 || mb < min) min = mb; n++}\ + END {printf "%3d x %6.2f = %6.2f MB/s", n, min, min * n}'` + print_summary "$bw $check" + fi + rm $rfile + done + done +done -- 1.8.3.1