X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fscripts%2Fbdev-io-survey.sh;h=084ca5a810fc21d1fb94b06e865896edd35f9ba6;hb=0607e01af74a81d0fe12ceec79bd22810a5dfe92;hp=229db614c43102d3f5edc3ab503791eab248bfd4;hpb=1632015b5b019f83154dbc54725be70771a1a3e2;p=fs%2Flustre-release.git diff --git a/lustre/scripts/bdev-io-survey.sh b/lustre/scripts/bdev-io-survey.sh index 229db61..084ca5a 100755 --- a/lustre/scripts/bdev-io-survey.sh +++ b/lustre/scripts/bdev-io-survey.sh @@ -12,6 +12,7 @@ # discard first vmstat line # +HOSTNAME=`hostname` # a temp dir that is setup and torn down for each script run tmpdir="" # so we can kill background processes as the test cleans up @@ -20,10 +21,18 @@ declare -a cleanup_pids declare -a cleanup_mounts # global for completing the table. XXX this is a wart that could go cur_y="0" +# a global which funcs use to get at the blocks[] array +last_block=-1 +# prefix to run oprofile or readprofile +oprofile="" +readprofile="" # defaults for some options: min_threads=1 max_threads=4 +possible_tests="sgp_dd ext2_iozone echo_filter" +run_tests="$possible_tests" +echo_module="" # optional output directory output_dir="" @@ -43,13 +52,17 @@ save_output() { [ ! -z "$output_dir" ] && mv -f $1 $output_dir/$2 } cleanup() { - for pid in ${cleanup_pids[*]}; do - kill $pid - done - cleanup_echo_filter - for a in ${cleanup_mounts[*]}; do - umount -f $a - done + # only cleanup test runs if we have block devices + if [ $last_block != -1 ]; then + for pid in ${cleanup_pids[*]}; do + kill $pid + done + cleanup_echo_filter + for a in ${cleanup_mounts[*]}; do + umount -f $a + done + fi + [ ${#tmpdir} == 18 ] && [ -d $tmpdir ] && rm -rf $tmpdir } trap cleanup EXIT @@ -66,8 +79,13 @@ pid_has_stopped() { commas() { echo $* | sed -e 's/ /,/g' } +do_bc_scale() { + local scale=$1 + shift + echo "scale=$scale; $*" | bc +} do_bc() { - echo "scale=2; $*" | bc + do_bc_scale 10 $* } mean_stddev() { local points=$* @@ -89,7 +107,8 @@ mean_stddev() { local dev=`do_bc \($p - $avg\) \^ 2` tmp=`do_bc $tmp + $dev` done - tmp=`do_bc sqrt \( $tmp / \($num - 1\) \)` + tmp=`do_bc_scale 1 sqrt \( $tmp / \($num - 1\) \)` + avg=`do_bc_scale 1 $avg / 1` echo "$avg:$tmp" } @@ -100,6 +119,7 @@ usage() { echo " -l " echo " -t " echo " -T " + echo " -r " exit; } @@ -210,7 +230,7 @@ sgp_dd_start() { local bdev=${blocks[$i]}; case "$wor" in - w) ifof="if=/dev/zero of=$bdev" ;; + [wo]) ifof="if=/dev/zero of=$bdev" ;; r) ifof="if=$bdev of=/dev/null" ;; *) die "asked to do io with $wor?" esac @@ -252,6 +272,10 @@ ext2_iozone_prepare() { echo "iozone binary not found in PATH" return 1 fi + if ! iozone -i 0 -w -+o -s 1k -r 1k -f /dev/null > /dev/null; then + echo "iozone doesn't support -+o" + return 1 + fi if ! which mke2fs; then echo "mke2fs binary not found in PATH" return 1 @@ -282,7 +306,7 @@ ext2_iozone_setup() { case "$wor" in w) rm -f $f ;; - r) ;; + [or]) ;; *) die "asked to do io with $wor?" esac } @@ -295,30 +319,36 @@ ext2_iozone_start() { local f="$tmpdir/mount_$id/iozone" case "$wor" in - w) args="-i 0 -w" ;; - r) args="-i 1 -w" ;; + [wo]) args="-i 0 -w" ;; + r) args="-i 1" ;; *) die "asked to do io with $wor?" esac - echo iozone "$args -r ${iosize}k -s ${io_len}k -I -f $f" + echo iozone "$args -r ${iosize}k -s $(($io_len / $threads))k \ + -t $threads -+o -x -I -f $f" } ext2_iozone_result() { local output=$1 - - kps=`awk '($2 == "reclen"){results=NR+1}(results == NR){print $3}' \ - < $output` - do_bc "$kps / 1024" -} -ext2_iozone_cleanup() { - local id=$1 local wor=$2 - local f="$tmpdir/mount_$id/iozone" + local string + local field case "$wor" in - w) ;; - r) rm -f $f ;; + [wo]) string="writers" + field=7 + ;; + r) string="readers" + field=6 + ;; *) die "asked to do io with $wor?" esac + + do_bc_scale 1 `awk '($1 == "Parent" && $'$field' == "'$string'") \ + {print $'$(($field + 2))'}' $output` / 1024 +} +ext2_iozone_cleanup() { + # the final read w/o -w removed the file + local nothing=0 } ext2_iozone_finish() { local index=$1 @@ -336,8 +366,9 @@ ext2_iozone_teardown() { # the echo_client setup is nutty enough to warrant its own clenaup running_config="" -running_modules="" +running_module="" declare -a running_names +declare -a running_oids cleanup_echo_filter() { local i @@ -347,7 +378,7 @@ cleanup_echo_filter() { lctl --device "\$"echo_$i destroy ${running_oids[$i]} \ $running_threads done - running_oids="" + unset running_oids for n in ${running_names[*]}; do # I can't believe leading whitespace matters here. @@ -358,12 +389,12 @@ detach quit EOF done - running_names="" + unset running_names - for m in $running_modules; do + for m in $running_module; do rmmod $m done - running_modules="" + running_module="" [ ! -z "$running_config" ] && lconf --cleanup $running_config running_config="" @@ -391,14 +422,14 @@ echo_filter_config() { fi if [ $index = 0 ]; then - if ! lmc -m $config --add net \ - --node localhost --nid localhost --nettype tcp; then - echo "error adding localhost net node" + if ! lmc -m $config --add net \ + --node $HOSTNAME --nid $HOSTNAME --nettype tcp; then + echo "error adding $HOSTNAME net node" return 1 fi fi - if ! lmc -m $config --add ost --ost ost_$index --node localhost \ + if ! lmc -m $config --add ost --ost ost_$index --node $HOSTNAME \ --fstype ext3 --dev $bdev --journal_size 400; then echo "error adding $bdev to config with lmc" return 1 @@ -420,12 +451,28 @@ echo_filter_prepare() { return 1; fi running_config="$config" + + echo 0 > /proc/sys/lnet/debug + echo 0 > /proc/sys/lnet/subsystem_debug + if ! grep -q '^obdecho\>' /proc/modules; then + local m if ! modprobe obdecho; then - echo "error running modprobe obdecho" - return 1; + if [ ! -z "$echo_module" ]; then + if ! insmod $echo_module; then + echo "err: insmod $echo_module" + return 1; + else + m="$echo_module" + fi + else + echo "err: modprobe $obdecho" + return 1; + fi + else + m=obdecho fi - running_modules="obdecho" + running_module=`basename $m | cut -d'.' -f 1` fi fi @@ -450,13 +497,13 @@ echo_filter_setup() { case "$wor" in w) ;; - r) return ;; + [or]) return ;; *) die "asked to do io with $wor?" esac running_threads=$threads oid=`lctl --device "\$"$name create $threads | \ - awk '/1 is object id/ { print $6 }'` + awk '/ #1 is object id/ { print $6 }'` # XXX need to deal with errors running_oids[$id]=$oid } @@ -465,17 +512,20 @@ echo_filter_start() { local iosize=$2 local wor=$3 local id=$4 + local rw + local name="echo_$id" - local pages=$(($io_len / 4)) + local len_pages=$(($io_len / $(($page_size / 1024)) / $threads )) + local size_pages=$(($iosize / $(($page_size / 1024)) )) case "$wor" in - w) args="-i 0 -w" ;; - r) args="-i 1 -w" ;; + [wo]) rw="w" ;; + r) rw="r" ;; *) die "asked to do io with $wor?" esac echo lctl --threads $threads v "\$"$name \ - test_brw 1 w v $pages ${running_oids[$i]} p$iosize + test_brw 1 $rw v $len_pages t${running_oids[$id]} p$size_pages } echo_filter_result() { local output=$1 @@ -485,7 +535,7 @@ echo_filter_result() { for mbs in `awk '($8=="MB/s):"){print substr($7,2)}' < $output`; do total=$(do_bc $total + $mbs) done - echo $total + do_bc_scale 2 $total / 1 } echo_filter_cleanup() { local id=$1 @@ -494,13 +544,13 @@ echo_filter_cleanup() { local name="echo_$id" case "$wor" in - w) return ;; + [wo]) return ;; r) ;; *) die "asked to do io with $wor?" esac - lctl --device "\$"$name destroy ${running_oids[$i]} $threads - unset running_oids[$i] + lctl --device "\$"$name destroy ${running_oids[$id]} $threads + unset running_oids[$id] } echo_filter_finish() { local index=$1 @@ -516,13 +566,20 @@ echo_filter_teardown() { test_one() { local test=$1 local my_x=$2 - local my_y=$3 - local threads=$4 - local iosize=$5 - local wor=$6 + local threads=$3 + local iosize=$4 + local wor=$5 local vmstat_pid local vmstat_log="$tmpdir/vmstat.log" local opref="$test-$threads-$iosize-$wor" + local -a iostat_pids + # sigh. but this makes it easier to dump into the tables + local -a read_req_s + local -a mb_s + local -a write_req_s + local -a sects_req + local -a queued_reqs + local -a service_ms for i in `seq 0 $last_block`; do ${test}_setup $i $wor $threads @@ -530,17 +587,32 @@ test_one() { echo $test with $threads threads + $oprofile opcontrol --start + # start up vmstat and record its pid - echo starting `date` nice -19 vmstat 1 > $vmstat_log 2>&1 & [ $? = 0 ] || die "vmstat failed" vmstat_pid=$! pid_now_running $vmstat_pid + # start up each block device's iostat + for i in `seq 0 $last_block`; do + nice -19 iostat -x ${blocks[$i]} 1 | awk \ + '($1 == "'${blocks[$i]}'"){print $0; fflush()}' \ + > $tmpdir/iostat.$i & + local pid=$! + pid_now_running $pid + iostat_pids[$i]=$pid + done + + $oprofile opcontrol --reset + $readprofile -r + # start all the tests. each returns a pid to wait on pids="" for i in `seq 0 $last_block`; do - cmd=`${test}_start $threads $iosize $wor $i` + local cmd=`${test}_start $threads $iosize $wor $i` + echo "$cmd" >> $tmpdir/commands $cmd > $tmpdir/$i 2>&1 & local pid=$! pids="$pids $pid" @@ -553,29 +625,60 @@ test_one() { echo -n . pid_has_stopped $p done - echo - # stop vmstat and get cpu use from it + # stop vmstat and all the iostats kill $vmstat_pid - echo stopping `date` pid_has_stopped $vmstat_pid + for i in `seq 0 $last_block`; do + local pid=${iostat_pids[$i]} + [ -z "$pid" ] && continue + + kill $pid + unset iostat_pids[$i] + pid_has_stopped $pid + done + + $readprofile | sort -rn > $tmpdir/readprofile + + $oprofile opcontrol --shutdown + $oprofile opreport > $tmpdir/oprofile + echo >> $tmpdir/oprofile + $oprofile opreport -c -l | head -20 >> $tmpdir/oprofile + + save_output $tmpdir/oprofile $opref.oprofile + save_output $tmpdir/readprofile $opref.readprofile + + # collect the results of vmstat and iostat cpu=$(mean_stddev $(awk \ '(NR > 3 && NF == 16 && $16 != "id" ) \ {print 100 - $16}' < $vmstat_log) ) save_output $vmstat_log $opref.vmstat + for i in `seq 0 $last_block`; do + read_req_s[$i]=$(mean_stddev $(awk \ + '(NR > 1) {print $4}' < $tmpdir/iostat.$i) ) + write_req_s[$i]=$(mean_stddev $(awk \ + '(NR > 1) {print $5}' < $tmpdir/iostat.$i) ) + sects_req[$i]=$(mean_stddev $(awk \ + '(NR > 1) {print $10}' < $tmpdir/iostat.$i) ) + queued_reqs[$i]=$(mean_stddev $(awk \ + '(NR > 1) {print $11}' < $tmpdir/iostat.$i) ) + service_ms[$i]=$(mean_stddev $(awk \ + '(NR > 1) {print $13}' < $tmpdir/iostat.$i) ) + + save_output $tmpdir/iostat.$i $opref.iostat.$i + done + # record each index's test results and sum them thru=0 - line="" for i in `seq 0 $last_block`; do - local t=`${test}_result $tmpdir/$i` + local t=`${test}_result $tmpdir/$i $wor` save_output $tmpdir/$i $opref.$i echo test returned "$t" - line="$line $t" - # some tests return mean:stddev per thread, filter out stddev + mb_s[$i]="$t" + # some tests return mean:stddev per device, filter out stddev thru=$(do_bc $thru + $(echo $t | sed -e 's/:.*$//g')) done - line="("`commas $line`")" for i in `seq 0 $last_block`; do ${test}_cleanup $i $wor $threads @@ -583,9 +686,20 @@ test_one() { # tabulate the results echo $test did $thru mb/s with $cpu - table_set $test $my_x $my_y $thru - table_set $test $(($my_x + 1)) $my_y $cpu - table_set $test $(($my_x + 2)) $my_y $line + table_set $test $my_x $cur_y `do_bc_scale 2 $thru / 1` + table_set $test $(($my_x + 1)) $cur_y $cpu + + for i in `seq 0 $last_block`; do + cur_y=$(($cur_y + 1)) + table_set $test $(($my_x)) $cur_y ${mb_s[$i]} + table_set $test $(($my_x + 1)) $cur_y ${read_req_s[$i]} + table_set $test $(($my_x + 2)) $cur_y ${write_req_s[$i]} + table_set $test $(($my_x + 3)) $cur_y ${sects_req[$i]} + table_set $test $(($my_x + 4)) $cur_y ${queued_reqs[$i]} + table_set $test $(($my_x + 5)) $cur_y ${service_ms[$i]} + done + + cur_y=$(($cur_y + 1)) } test_iterator() { @@ -618,15 +732,13 @@ test_iterator() { done while [ -z "$cleanup" -a $thr -lt $(($max_threads + 1)) ]; do - for iosize in 64 128; do + for iosize in 128 512; do table_set $test 0 $cur_y $thr table_set $test 1 $cur_y $iosize - table_set $test 2 $cur_y "|" - for wor in w r; do - table_set $test 3 $cur_y $wor - test_one $test 4 $cur_y $thr $iosize $wor - cur_y=$(($cur_y + 1)) + for wor in w o r; do + table_set $test 2 $cur_y $wor + test_one $test 3 $thr $iosize $wor done done thr=$(($thr + $thr)) @@ -645,37 +757,76 @@ test_iterator() { return $rc; } -while getopts ":d:b:l:t:T:" opt; do +while getopts ":d:b:l:t:T:r:e:" opt; do case $opt in + e) echo_module=$OPTARG ;; b) block=$OPTARG ;; d) output_dir=$OPTARG ;; l) io_len=$OPTARG ;; + r) run_tests=$OPTARG ;; t) min_threads=$OPTARG ;; T) max_threads=$OPTARG ;; \?) usage esac done +page_size=`getconf PAGE_SIZE` || die '"getconf PAGE_SIZE" failed' + +[ ! -z "$echo_module" -a ! -f "$echo_module" ] && \ + die "obdecho module $echo_module is not a file" + if [ -z "$io_len" ]; then io_len=`awk '($1 == "MemTotal:"){print $2}' < /proc/meminfo` [ -z "$io_len" ] && die "couldn't determine the amount of memory" fi if [ ! -z "$output_dir" ]; then - [ ! -e "$output_dir" ] && "output dir $output_dir doesn't exist" - [ ! -d "$output_dir" ] && "output dir $output_dir isn't a directory" + if [ ! -e "$output_dir" ]; then + mkdir -p "$output_dir" || die "error creating $output_dir" + fi + [ ! -d "$output_dir" ] && die "$output_dir isn't a directory" fi block=`echo $block | sed -e 's/,/ /g'` [ -z "$block" ] && usage "need block devices" +run_tests=`echo $run_tests | sed -e 's/,/ /g'` +[ -z "$run_tests" ] && usage "need to specify tests to run with -r" +for t in $run_tests; do + if ! echo $possible_tests | grep -q $t ; then + die "$t isn't one of the possible tests: $possible_tests" + fi +done + +if which opcontrol; then + echo generating oprofile results + oprofile="" +else + echo not using oprofile + oprofile=": " +fi + +if which readprofile; then + map="/boot/System.map-`uname -r`" + if [ -f /proc/profile -a -f "$map" ]; then + echo generating profiles with 'readprofile' + readprofile="readprofile -m $map" + fi +fi +if [ -z "$readprofile" ]; then + echo not using readprofile + readprofile=": " +fi + [ $min_threads -gt $max_threads ] && \ die "min threads $min_threads must be <= min_threads $min_threads" -last_block=-1 for b in $block; do [ ! -e $b ] && die "block device file $b doesn't exist" [ ! -b $b ] && die "$b isn't a block device" + dd if=$b of=/dev/null bs=8192 count=1 || \ + die "couldn't read 8k from $b, is it alive?" + [ ! -b $b ] && die "$b isn't a block device" last_block=$(($last_block + 1)) blocks[$last_block]=$b done @@ -684,18 +835,22 @@ tmpdir=`mktemp -d /tmp/.surveyXXXXXX` || die "couldn't create tmp dir" echo each test will operate on $io_len"k" -tests="sgp_dd ext2_iozone echo_filter" test_results="" -for t in $tests; do +for t in $run_tests; do table_set $t 0 0 "T" table_set $t 1 0 "L" - table_set $t 2 0 "|" - table_set $t 3 0 "W" - table_set $t 5 0 "C:S" - table_set $t 6 0 "B" - cur_y=1; + table_set $t 2 0 "m" + table_set $t 3 0 "A" + table_set $t 4 0 "C" + table_set $t 3 1 "MB" + table_set $t 4 1 "rR" + table_set $t 5 1 "wR" + table_set $t 6 1 "SR" + table_set $t 7 1 "Q" + table_set $t 8 1 "ms" + cur_y=2; if ! test_iterator $t; then continue; @@ -703,15 +858,23 @@ for t in $tests; do test_results="$test_results $t" done +save_output $tmpdir/commands commands + [ ! -z "$test_results" ] && ( echo echo "T = number of concurrent threads per device" echo "L = base io operation length, in KB" - echo "W/O/R = write/overwrite/read throughput, in MB/s" + echo "m = IO method: read, write, or over-write" + echo "A = aggregate throughput from all devices" echo "C = percentage CPU used, both user and system" - echo "S = standard deviation in cpu use" - echo "B = per-block results: ("`echo ${blocks[*]} | sed -e 's/ /,/g'`")" + echo "MB/s = per-device throughput" + echo "rR = read requests issued to the device per second" + echo "wR = write requests issued to the device per second" + echo "SR = sectors per request; sectors tend to be 512 bytes" + echo "Q = the average number of requests queued on the device" + echo "ms = the average ms taken by the device to service a req" echo + echo "foo:bar represents a mean of foo with a stddev of bar" ) for t in $test_results; do