From 1632015b5b019f83154dbc54725be70771a1a3e2 Mon Sep 17 00:00:00 2001
From: zab <zab>
Date: Thu, 29 Jan 2004 02:16:18 +0000
Subject: [PATCH] a block device profiling script which is built to reliably
 run different tests by itself and give detailed results.

---
 lustre/scripts/bdev-io-survey.sh | 720 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 720 insertions(+)
 create mode 100755 lustre/scripts/bdev-io-survey.sh
diff --git a/lustre/scripts/bdev-io-survey.sh b/lustre/scripts/bdev-io-survey.sh
new file mode 100755
index 0000000..229db61
--- /dev/null
+++ b/lustre/scripts/bdev-io-survey.sh
@@ -0,0 +1,720 @@
+#!/bin/bash
+
+# for now all the units are in 'k', but we could introduce some helpers
+# would be nice to run tests in the background and trap signals and kill
+#
+#  todo:
+#	make sure devices aren't in use before going to town
+#	really use threads with iozone
+#	look into what sgp_dd is really doing, update arguments
+# 	rename config/prepare/setup/cleanup/finish/teardown
+# 	do something with sf and fpp iterating
+#	discard first vmstat line
+#
+
+# a temp dir that is setup and torn down for each script run
+tmpdir=""
+# so we can kill background processes as the test cleans up
+declare -a cleanup_pids
+# to unmount mounts in our tmpdir before removing it
+declare -a cleanup_mounts
+# global for completing the table.  XXX this is a wart that could go
+cur_y="0"
+
+# defaults for some options:
+min_threads=1
+max_threads=4
+
+# optional output directory
+output_dir=""
+ 
+die() {
+        echo $* 1>&2
+        exit 1
+}
+rm_or_die() {
+        for path in $*; do
+                [ -e $path ] || continue;
+                [ -f $path ] || die "needed to remove non-file $path"
+                rm -f $path || die "couldn't remove $path"
+        done
+}
+save_output() {
+	[ ! -z "$output_dir" ] && mv -f $1 $output_dir/$2
+}
+cleanup() {
+	for pid in ${cleanup_pids[*]}; do
+		kill $pid
+	done
+	cleanup_echo_filter
+	for a in ${cleanup_mounts[*]}; do
+		umount -f $a
+	done
+        [ ${#tmpdir} == 18 ] && [ -d $tmpdir ] && rm -rf $tmpdir
+}
+trap cleanup EXIT
+
+pid_now_running() {
+	local pid=$1
+	cleanup_pids[$pid]=$pid
+}
+pid_has_stopped() {
+	local pid=$1
+	unset cleanup_pids[$pid]
+}
+                                                                                
+commas() {
+	echo $* | sed -e 's/ /,/g'
+}
+do_bc() {
+        echo "scale=2; $*" | bc
+}
+mean_stddev() {
+	local points=$*
+
+	local avg=0
+	local num=0
+	for p in $points; do
+		avg=`do_bc $avg + $p`
+		num=`do_bc $num + 1`
+	done
+	case $num in
+		0) echo '??' ; return ;;
+		1) echo "$avg:0" ; return ;;
+	esac
+
+	avg=`do_bc $avg / $num`
+	local tmp=0
+	for p in $points; do
+	        local dev=`do_bc \($p - $avg\) \^ 2`
+	        tmp=`do_bc $tmp + $dev`
+	done
+	tmp=`do_bc sqrt \( $tmp / \($num - 1\) \)`
+	echo "$avg:$tmp"
+}
+
+usage() {
+        echo $*
+        echo "       -b <block device to profile>"
+        echo "       -d <summary output directory>"
+        echo "       -l <max io len>"
+        echo "       -t <minimum number of threads per device>"
+        echo "       -T <maximum number of threads per device>"
+        exit;
+}
+
+# some cute code for handling tables whose columns fit
+set_max() {
+        local target=$1
+        local val=$2
+                                                                                
+        if [ $val -gt ${!target:-0} ]; then
+                eval $target=$val
+        fi
+}
+table_set() {
+        local name="_table_$1"
+        local col=$2
+        local row=$3
+        local val=$4
+        local num
+                                                                                
+        eval ${name}_${row}_${col}="'$val'"
+                                                                                
+        set_max ${name}_${col}_longest ${#val}
+        set_max ${name}_num_col $(($col + 1))
+        set_max ${name}_num_row $(($row + 1))
+}
+                                                                                
+table_get() {
+        local name="_table_$1"
+        local col=$2
+        local row=$3
+        tmp="${name}_${row}_${col}"
+        echo ${!tmp}
+}
+                                                                                
+table_dump() {
+        local name="_table_$1"
+        local num_col;
+        local num_row;
+        local fmt="";
+        local tmp
+        local sep
+                                                                                
+        tmp="${name}_num_col"
+        num_col="${!tmp:-0}"
+        tmp="${name}_num_row"
+        num_row="${!tmp:-0}"
+                                                                                
+        # iterate through the columns to find the longest
+                                                                                
+        sep=" "
+        for x in `seq 0 $num_col`; do
+                tmp="${name}_${x}_longest"
+                tmp=${!tmp:-0}
+                [ $tmp -eq 0 ] && continue
+                                                                                
+                [ $x -eq $((num_col - 1)) ] && sep='\n'
+                                                                                
+                fmt="$fmt%-${tmp}s$sep"
+        done
+                                                                                
+        # nothing in the table to print
+        [ -z "$fmt" ] && return
+                                                                                
+        for y in `seq 0 $num_row`; do
+                local row=""
+                for x in `seq 0 $num_col`; do
+                                                                                
+                        # skip this element if the column is empty
+                        tmp="${name}_${x}_longest"
+                        [ ${!tmp:-0} -eq 0 ] && continue
+                                                                                
+                        # fill this cell with the value or '' for printf
+                        tmp="${name}_${y}_${x}"
+                        row="$row'${!tmp:-""}' "
+                done
+                eval printf "'$fmt'" $row
+        done
+}
+
+######################################################################
+# the sgp_dd tests
+sgp_dd_banner() {
+	echo sgp_dd using dio=1 and thr=
+}
+sgp_dd_config() {
+	# it could be making sure that the block dev
+	# isn't in use by something else
+	local nothing=0
+}
+sgp_dd_prepare() {
+	if ! which sgp_dd; then
+		echo "can't find sgp_dd binary"
+		return 1
+	fi
+	return 0
+}
+sgp_dd_setup() {
+	# it could be making sure that the block dev
+	# isn't in use by something else
+	local nothing=0
+}
+sgp_dd_start() {
+	local threads=$1
+	local iosize=$2
+	local wor=$3
+	local i=$4
+	local ifof;
+	local bdev=${blocks[$i]};
+
+	case "$wor" in
+		w) ifof="if=/dev/zero of=$bdev" ;;
+		r) ifof="if=$bdev of=/dev/null" ;;
+		*) die "asked to do io with $wor?"
+	esac
+	echo sgp_dd $ifof bs=$iosize"k" count=$(($io_len / $iosize)) time=1 \
+			dio=1 thr=$threads
+}
+sgp_dd_result() {
+	local output=$1
+
+	awk '($(NF) == "MB/sec") {print $(NF-1)}' < $output
+}
+sgp_dd_cleanup() {
+	# got me
+	local nothing=0
+}
+sgp_dd_finish() {
+	# got me
+	local nothing=0
+}
+sgp_dd_teardown() {
+	# got me
+	local nothing=0
+}
+
+######################################################################
+# the iozone tests
+ext2_iozone_banner() {
+	echo "iozone -I on a clean ext2 fs"
+}
+ext2_iozone_config() {
+	local nothing=0
+}
+ext2_iozone_prepare() {
+	local index=$1
+	local bdev=${blocks[$index]}
+	local mntpnt=$tmpdir/mount_$index
+
+	if ! which iozone; then
+		echo "iozone binary not found in PATH"
+		return 1
+	fi
+	if ! which mke2fs; then
+		echo "mke2fs binary not found in PATH"
+		return 1
+	fi
+
+	if ! mkdir -p $mntpnt ; then
+		echo "$mntpnt isn't a directory?"
+	fi
+
+	echo making ext2 filesystem on $bdev
+	if ! mke2fs -b 4096 $bdev; then
+		echo "mke2fs failed"
+		return 1;
+	fi
+
+	if ! mount -t ext2 $bdev $mntpnt; then 
+		echo "couldn't mount $bdev on $mntpnt"
+		return 1;
+	fi
+
+	cleanup_mounts[$index]="$mntpnt"
+	return 0
+}
+ext2_iozone_setup() {
+	local id=$1
+	local wor=$2
+	local f="$tmpdir/mount_$id/iozone"
+
+	case "$wor" in
+		w) rm -f $f ;;
+		r) ;;
+		*) die "asked to do io with $wor?"
+	esac
+}
+ext2_iozone_start() {
+	local threads=$1
+	local iosize=$2
+	local wor=$3
+	local id=$4
+	local args;
+	local f="$tmpdir/mount_$id/iozone"
+
+	case "$wor" in
+		w) args="-i 0 -w" ;;
+		r) args="-i 1 -w" ;;
+		*) die "asked to do io with $wor?"
+	esac
+
+	echo iozone "$args -r ${iosize}k -s ${io_len}k -I -f $f"
+}
+ext2_iozone_result() {
+	local output=$1
+
+	kps=`awk '($2 == "reclen"){results=NR+1}(results == NR){print $3}' \
+		< $output`
+	do_bc "$kps / 1024"
+}
+ext2_iozone_cleanup() {
+	local id=$1
+	local wor=$2
+	local f="$tmpdir/mount_$id/iozone"
+
+	case "$wor" in
+		w) ;;
+		r) rm -f $f ;;
+		*) die "asked to do io with $wor?"
+	esac
+}
+ext2_iozone_finish() {
+	local index=$1
+	local mntpnt=$tmpdir/mount_$index
+
+	umount -f $mntpnt
+	unset cleanup_mounts[$index]
+}
+ext2_iozone_teardown() {
+	local nothing=0
+}
+
+######################################################################
+# the lctl test_brw via the echo_client on top of the filter
+
+# the echo_client setup is nutty enough to warrant its own clenaup
+running_config=""
+running_modules=""
+declare -a running_names
+
+cleanup_echo_filter() {
+	local i
+
+	for i in `seq 0 $last_block`; do
+		[ -z "${running_oids[$i]}" ] && continue
+		lctl --device "\$"echo_$i destroy ${running_oids[$i]} \
+			$running_threads
+	done
+	running_oids=""
+
+	for n in ${running_names[*]}; do
+# I can't believe leading whitespace matters here.
+lctl << EOF
+cfg_device $n
+cleanup
+detach
+quit
+EOF
+	done
+	running_names=""
+
+	for m in $running_modules; do
+		rmmod $m
+	done
+	running_modules=""
+
+	[ ! -z "$running_config" ] && lconf --cleanup $running_config
+	running_config=""
+}
+
+echo_filter_banner() {
+	echo "test_brw on the echo_client on the filter" 
+}
+echo_filter_config() {
+	local index=$1
+	local bdev=${blocks[$index]}
+	local config="$tmpdir/config.xml"
+
+	if ! which lmc; then
+		echo "lmc binary not found in PATH"
+		return 1
+	fi
+	if ! which lconf; then
+		echo "lconf binary not found in PATH"
+		return 1
+	fi
+	if ! which lctl; then
+		echo "lctl binary not found in PATH"
+		return 1
+	fi
+
+	if [ $index = 0 ]; then
+		if ! lmc -m $config --add net  \
+			--node localhost --nid localhost --nettype tcp; then
+			echo "error adding localhost net node"
+			return 1
+		fi
+	fi
+
+	if ! lmc -m $config --add ost --ost ost_$index --node localhost \
+			--fstype ext3 --dev $bdev --journal_size 400; then
+		echo "error adding $bdev to config with lmc"
+		return 1
+	fi
+
+	# it would be nice to be able to ask lmc to setup an echo client
+	# to the filter here.  --add echo_client assumes osc
+}
+echo_filter_prepare() {
+	local index=$1
+	local bdev=${blocks[$index]}
+	local config="$tmpdir/config.xml"
+	local name="echo_$index"
+	local uuid="echo_$index_uuid"
+
+	if [ $index = 0 ]; then
+		if ! lconf --reformat $config; then
+			echo "error setting up with lconf"
+			return 1;
+		fi
+		running_config="$config"
+		if ! grep -q '^obdecho\>' /proc/modules; then
+			if ! modprobe obdecho; then
+				echo "error running modprobe obdecho"
+				return 1;
+			fi
+			running_modules="obdecho"
+		fi
+	fi
+
+lctl << EOF
+        newdev
+        attach echo_client $name $uuid
+        setup ost_$index
+        quit
+EOF
+	if [  $? != 0 ]; then
+		echo "error setting up echo_client $name against ost_$index"
+		return 1
+	fi
+	running_names[$index]=$name
+}
+echo_filter_setup() {
+	local id=$1
+	local wor=$2
+	local threads=$3
+	local name="echo_$id"
+	local oid
+
+	case "$wor" in
+		w) ;;
+		r) return ;;
+		*) die "asked to do io with $wor?"
+	esac
+
+	running_threads=$threads
+	oid=`lctl --device "\$"$name create $threads | \
+		awk '/1 is object id/ { print $6 }'`
+	# XXX need to deal with errors
+	running_oids[$id]=$oid
+}
+echo_filter_start() {
+	local threads=$1
+	local iosize=$2
+	local wor=$3
+	local id=$4
+	local name="echo_$id"
+	local pages=$(($io_len / 4))
+
+	case "$wor" in
+		w) args="-i 0 -w" ;;
+		r) args="-i 1 -w" ;;
+		*) die "asked to do io with $wor?"
+	esac
+
+	echo lctl --threads $threads v "\$"$name \
+		test_brw 1 w v $pages ${running_oids[$i]} p$iosize
+}
+echo_filter_result() {
+	local output=$1
+	local total=0
+	local mbs
+
+	for mbs in `awk '($8=="MB/s):"){print substr($7,2)}' < $output`; do
+		total=$(do_bc $total + $mbs)
+	done
+	echo $total
+}
+echo_filter_cleanup() {
+	local id=$1
+	local wor=$2
+	local threads=$3
+	local name="echo_$id"
+
+	case "$wor" in
+		w) return ;;
+		r) ;;
+		*) die "asked to do io with $wor?"
+	esac
+
+	lctl --device "\$"$name destroy ${running_oids[$i]} $threads
+	unset running_oids[$i]
+}
+echo_filter_finish() {
+	local index=$1
+	# leave real work for _teardown
+}
+echo_filter_teardown() {
+	cleanup_echo_filter
+}
+
+######################################################################
+# the iteration that drives the tests
+
+test_one() {
+	local test=$1
+	local my_x=$2
+	local my_y=$3
+	local threads=$4
+	local iosize=$5
+	local wor=$6
+	local vmstat_pid
+	local vmstat_log="$tmpdir/vmstat.log"
+	local opref="$test-$threads-$iosize-$wor"
+
+	for i in `seq 0 $last_block`; do
+		${test}_setup $i $wor $threads
+	done
+
+	echo $test with $threads threads
+
+	# start up vmstat and record its pid
+	echo starting `date`
+        nice -19 vmstat 1 > $vmstat_log 2>&1 &
+	[ $? = 0 ] || die "vmstat failed"
+	vmstat_pid=$!
+	pid_now_running $vmstat_pid
+
+	# start all the tests.  each returns a pid to wait on
+	pids=""
+	for i in `seq 0 $last_block`; do
+		cmd=`${test}_start $threads $iosize $wor $i`
+		$cmd > $tmpdir/$i 2>&1 &
+		local pid=$!
+		pids="$pids $pid"
+		pid_now_running $pid
+	done
+
+	echo -n waiting on pids $pids:
+	for p in $pids; do
+		wait $p
+		echo -n .
+		pid_has_stopped $p
+	done
+	echo
+
+	# stop vmstat and get cpu use from it
+	kill $vmstat_pid
+	echo stopping `date`
+	pid_has_stopped $vmstat_pid
+	cpu=$(mean_stddev $(awk \
+	      '(NR > 3 && NF == 16 && $16 != "id" )	\
+		{print 100 - $16}' < $vmstat_log) )
+	save_output $vmstat_log $opref.vmstat
+
+	# record each index's test results and sum them
+	thru=0
+	line=""
+	for i in `seq 0 $last_block`; do
+		local t=`${test}_result $tmpdir/$i`
+		save_output $tmpdir/$i $opref.$i
+		echo test returned "$t"
+		line="$line $t"
+		# some tests return mean:stddev per thread, filter out stddev
+		thru=$(do_bc $thru + $(echo $t | sed -e 's/:.*$//g'))
+	done
+	line="("`commas $line`")"
+
+	for i in `seq 0 $last_block`; do
+		${test}_cleanup $i $wor $threads
+	done
+
+	# tabulate the results
+	echo $test did $thru mb/s with $cpu
+	table_set $test $my_x $my_y $thru
+	table_set $test $(($my_x + 1)) $my_y $cpu
+	table_set $test $(($my_x + 2)) $my_y $line
+}
+
+test_iterator() {
+	local test=$1
+	local thr=$min_threads
+	local cleanup=""
+	local rc=0
+	local i
+	
+	for i in `seq 0 $last_block`; do
+		if ! ${test}_config $i; then
+			echo "couldn't config $test for bdev ${blocks[$i]}"
+			echo "skipping $test for all block devices"
+			cleanup=$(($i - 1))
+			rc=1;
+			break
+		fi
+	done
+
+	for i in `seq 0 $last_block`; do
+		# don't prepare if _config already failed
+		[ ! -z "$cleanup" ] && break
+		if ! ${test}_prepare $i; then
+			echo "couldn't prepare $test for bdev ${blocks[$i]}"
+			echo "skipping $test for all block devices"
+			cleanup=$(($i - 1))
+			rc=1;
+			break
+		fi
+	done
+
+	while [ -z "$cleanup" -a $thr -lt $(($max_threads + 1)) ]; do
+		for iosize in 64 128; do
+			table_set $test 0 $cur_y $thr
+			table_set $test 1 $cur_y $iosize
+			table_set $test 2 $cur_y "|"
+
+			for wor in w r; do
+				table_set $test 3 $cur_y $wor
+				test_one $test 4 $cur_y $thr $iosize $wor
+				cur_y=$(($cur_y + 1))
+			done
+		done
+		thr=$(($thr + $thr))
+	done
+
+	[ -z "$cleanup" ] && cleanup=$last_block
+
+	if [ "$cleanup" != -1 ]; then
+		for i in `seq $cleanup 0`; do
+			${test}_finish $i
+		done
+	fi
+
+	${test}_teardown
+
+	return $rc;
+}
+
+while getopts ":d:b:l:t:T:" opt; do
+        case $opt in
+                b) block=$OPTARG                 ;;
+                d) output_dir=$OPTARG                 ;;
+                l) io_len=$OPTARG			;;
+                t) min_threads=$OPTARG			;;
+                T) max_threads=$OPTARG			;;
+                \?) usage
+        esac
+done
+
+if [ -z "$io_len" ]; then
+	io_len=`awk '($1 == "MemTotal:"){print $2}' < /proc/meminfo`
+	[ -z "$io_len" ] && die "couldn't determine the amount of memory"
+fi
+
+if [ ! -z "$output_dir" ]; then
+	[ ! -e "$output_dir" ] && "output dir $output_dir doesn't exist"
+	[ ! -d "$output_dir" ] && "output dir $output_dir isn't a directory"
+fi
+
+block=`echo $block | sed -e 's/,/ /g'`
+[ -z "$block" ] && usage "need block devices"
+
+[ $min_threads -gt $max_threads ] && \
+	die "min threads $min_threads must be <= min_threads $min_threads"
+
+last_block=-1
+for b in $block; do
+	[ ! -e $b ] && die "block device file $b doesn't exist"
+	[ ! -b $b ] && die "$b isn't a block device"
+	last_block=$(($last_block + 1))
+	blocks[$last_block]=$b
+done	
+
+tmpdir=`mktemp -d /tmp/.surveyXXXXXX` || die "couldn't create tmp dir"
+
+echo each test will operate on $io_len"k"
+
+tests="sgp_dd ext2_iozone echo_filter"
+test_results=""
+
+for t in $tests; do
+
+	table_set $t 0 0 "T"
+	table_set $t 1 0 "L"
+	table_set $t 2 0 "|"
+	table_set $t 3 0 "W"
+	table_set $t 5 0 "C:S"
+	table_set $t 6 0 "B"
+	cur_y=1;
+
+	if ! test_iterator $t; then
+		continue;
+	fi
+	test_results="$test_results $t"
+done
+
+[ ! -z "$test_results" ] && (
+	echo
+	echo "T = number of concurrent threads per device"
+	echo "L = base io operation length, in KB"
+	echo "W/O/R = write/overwrite/read throughput, in MB/s"
+	echo "C = percentage CPU used, both user and system"
+	echo "S = standard deviation in cpu use"
+	echo "B = per-block results: ("`echo ${blocks[*]} | sed -e 's/ /,/g'`")"
+	echo
+)
+
+for t in $test_results; do
+	${t}_banner
+	table_dump $t
+done
-- 
1.8.3.1