Whamcloud - gitweb
a block device profiling script which is built to reliably run different
[fs/lustre-release.git] / lustre / scripts / bdev-io-survey.sh
1 #!/bin/bash
2
3 # for now all the units are in 'k', but we could introduce some helpers
4 # would be nice to run tests in the background and trap signals and kill
5 #
6 #  todo:
7 #       make sure devices aren't in use before going to town
8 #       really use threads with iozone
9 #       look into what sgp_dd is really doing, update arguments
10 #       rename config/prepare/setup/cleanup/finish/teardown
11 #       do something with sf and fpp iterating
12 #       discard first vmstat line
13 #
14
15 # a temp dir that is setup and torn down for each script run
16 tmpdir=""
17 # so we can kill background processes as the test cleans up
18 declare -a cleanup_pids
19 # to unmount mounts in our tmpdir before removing it
20 declare -a cleanup_mounts
21 # global for completing the table.  XXX this is a wart that could go
22 cur_y="0"
23
24 # defaults for some options:
25 min_threads=1
26 max_threads=4
27
28 # optional output directory
29 output_dir=""
30  
31 die() {
32         echo $* 1>&2
33         exit 1
34 }
35 rm_or_die() {
36         for path in $*; do
37                 [ -e $path ] || continue;
38                 [ -f $path ] || die "needed to remove non-file $path"
39                 rm -f $path || die "couldn't remove $path"
40         done
41 }
42 save_output() {
43         [ ! -z "$output_dir" ] && mv -f $1 $output_dir/$2
44 }
45 cleanup() {
46         for pid in ${cleanup_pids[*]}; do
47                 kill $pid
48         done
49         cleanup_echo_filter
50         for a in ${cleanup_mounts[*]}; do
51                 umount -f $a
52         done
53         [ ${#tmpdir} == 18 ] && [ -d $tmpdir ] && rm -rf $tmpdir
54 }
55 trap cleanup EXIT
56
57 pid_now_running() {
58         local pid=$1
59         cleanup_pids[$pid]=$pid
60 }
61 pid_has_stopped() {
62         local pid=$1
63         unset cleanup_pids[$pid]
64 }
65                                                                                 
66 commas() {
67         echo $* | sed -e 's/ /,/g'
68 }
69 do_bc() {
70         echo "scale=2; $*" | bc
71 }
72 mean_stddev() {
73         local points=$*
74
75         local avg=0
76         local num=0
77         for p in $points; do
78                 avg=`do_bc $avg + $p`
79                 num=`do_bc $num + 1`
80         done
81         case $num in
82                 0) echo '??' ; return ;;
83                 1) echo "$avg:0" ; return ;;
84         esac
85
86         avg=`do_bc $avg / $num`
87         local tmp=0
88         for p in $points; do
89                 local dev=`do_bc \($p - $avg\) \^ 2`
90                 tmp=`do_bc $tmp + $dev`
91         done
92         tmp=`do_bc sqrt \( $tmp / \($num - 1\) \)`
93         echo "$avg:$tmp"
94 }
95
96 usage() {
97         echo $*
98         echo "       -b <block device to profile>"
99         echo "       -d <summary output directory>"
100         echo "       -l <max io len>"
101         echo "       -t <minimum number of threads per device>"
102         echo "       -T <maximum number of threads per device>"
103         exit;
104 }
105
106 # some cute code for handling tables whose columns fit
107 set_max() {
108         local target=$1
109         local val=$2
110                                                                                 
111         if [ $val -gt ${!target:-0} ]; then
112                 eval $target=$val
113         fi
114 }
115 table_set() {
116         local name="_table_$1"
117         local col=$2
118         local row=$3
119         local val=$4
120         local num
121                                                                                 
122         eval ${name}_${row}_${col}="'$val'"
123                                                                                 
124         set_max ${name}_${col}_longest ${#val}
125         set_max ${name}_num_col $(($col + 1))
126         set_max ${name}_num_row $(($row + 1))
127 }
128                                                                                 
129 table_get() {
130         local name="_table_$1"
131         local col=$2
132         local row=$3
133         tmp="${name}_${row}_${col}"
134         echo ${!tmp}
135 }
136                                                                                 
137 table_dump() {
138         local name="_table_$1"
139         local num_col;
140         local num_row;
141         local fmt="";
142         local tmp
143         local sep
144                                                                                 
145         tmp="${name}_num_col"
146         num_col="${!tmp:-0}"
147         tmp="${name}_num_row"
148         num_row="${!tmp:-0}"
149                                                                                 
150         # iterate through the columns to find the longest
151                                                                                 
152         sep=" "
153         for x in `seq 0 $num_col`; do
154                 tmp="${name}_${x}_longest"
155                 tmp=${!tmp:-0}
156                 [ $tmp -eq 0 ] && continue
157                                                                                 
158                 [ $x -eq $((num_col - 1)) ] && sep='\n'
159                                                                                 
160                 fmt="$fmt%-${tmp}s$sep"
161         done
162                                                                                 
163         # nothing in the table to print
164         [ -z "$fmt" ] && return
165                                                                                 
166         for y in `seq 0 $num_row`; do
167                 local row=""
168                 for x in `seq 0 $num_col`; do
169                                                                                 
170                         # skip this element if the column is empty
171                         tmp="${name}_${x}_longest"
172                         [ ${!tmp:-0} -eq 0 ] && continue
173                                                                                 
174                         # fill this cell with the value or '' for printf
175                         tmp="${name}_${y}_${x}"
176                         row="$row'${!tmp:-""}' "
177                 done
178                 eval printf "'$fmt'" $row
179         done
180 }
181
182 ######################################################################
183 # the sgp_dd tests
184 sgp_dd_banner() {
185         echo sgp_dd using dio=1 and thr=
186 }
187 sgp_dd_config() {
188         # it could be making sure that the block dev
189         # isn't in use by something else
190         local nothing=0
191 }
192 sgp_dd_prepare() {
193         if ! which sgp_dd; then
194                 echo "can't find sgp_dd binary"
195                 return 1
196         fi
197         return 0
198 }
199 sgp_dd_setup() {
200         # it could be making sure that the block dev
201         # isn't in use by something else
202         local nothing=0
203 }
204 sgp_dd_start() {
205         local threads=$1
206         local iosize=$2
207         local wor=$3
208         local i=$4
209         local ifof;
210         local bdev=${blocks[$i]};
211
212         case "$wor" in
213                 w) ifof="if=/dev/zero of=$bdev" ;;
214                 r) ifof="if=$bdev of=/dev/null" ;;
215                 *) die "asked to do io with $wor?"
216         esac
217         echo sgp_dd $ifof bs=$iosize"k" count=$(($io_len / $iosize)) time=1 \
218                         dio=1 thr=$threads
219 }
220 sgp_dd_result() {
221         local output=$1
222
223         awk '($(NF) == "MB/sec") {print $(NF-1)}' < $output
224 }
225 sgp_dd_cleanup() {
226         # got me
227         local nothing=0
228 }
229 sgp_dd_finish() {
230         # got me
231         local nothing=0
232 }
233 sgp_dd_teardown() {
234         # got me
235         local nothing=0
236 }
237
238 ######################################################################
239 # the iozone tests
240 ext2_iozone_banner() {
241         echo "iozone -I on a clean ext2 fs"
242 }
243 ext2_iozone_config() {
244         local nothing=0
245 }
246 ext2_iozone_prepare() {
247         local index=$1
248         local bdev=${blocks[$index]}
249         local mntpnt=$tmpdir/mount_$index
250
251         if ! which iozone; then
252                 echo "iozone binary not found in PATH"
253                 return 1
254         fi
255         if ! which mke2fs; then
256                 echo "mke2fs binary not found in PATH"
257                 return 1
258         fi
259
260         if ! mkdir -p $mntpnt ; then
261                 echo "$mntpnt isn't a directory?"
262         fi
263
264         echo making ext2 filesystem on $bdev
265         if ! mke2fs -b 4096 $bdev; then
266                 echo "mke2fs failed"
267                 return 1;
268         fi
269
270         if ! mount -t ext2 $bdev $mntpnt; then 
271                 echo "couldn't mount $bdev on $mntpnt"
272                 return 1;
273         fi
274
275         cleanup_mounts[$index]="$mntpnt"
276         return 0
277 }
278 ext2_iozone_setup() {
279         local id=$1
280         local wor=$2
281         local f="$tmpdir/mount_$id/iozone"
282
283         case "$wor" in
284                 w) rm -f $f ;;
285                 r) ;;
286                 *) die "asked to do io with $wor?"
287         esac
288 }
289 ext2_iozone_start() {
290         local threads=$1
291         local iosize=$2
292         local wor=$3
293         local id=$4
294         local args;
295         local f="$tmpdir/mount_$id/iozone"
296
297         case "$wor" in
298                 w) args="-i 0 -w" ;;
299                 r) args="-i 1 -w" ;;
300                 *) die "asked to do io with $wor?"
301         esac
302
303         echo iozone "$args -r ${iosize}k -s ${io_len}k -I -f $f"
304 }
305 ext2_iozone_result() {
306         local output=$1
307
308         kps=`awk '($2 == "reclen"){results=NR+1}(results == NR){print $3}' \
309                 < $output`
310         do_bc "$kps / 1024"
311 }
312 ext2_iozone_cleanup() {
313         local id=$1
314         local wor=$2
315         local f="$tmpdir/mount_$id/iozone"
316
317         case "$wor" in
318                 w) ;;
319                 r) rm -f $f ;;
320                 *) die "asked to do io with $wor?"
321         esac
322 }
323 ext2_iozone_finish() {
324         local index=$1
325         local mntpnt=$tmpdir/mount_$index
326
327         umount -f $mntpnt
328         unset cleanup_mounts[$index]
329 }
330 ext2_iozone_teardown() {
331         local nothing=0
332 }
333
334 ######################################################################
335 # the lctl test_brw via the echo_client on top of the filter
336
337 # the echo_client setup is nutty enough to warrant its own clenaup
338 running_config=""
339 running_modules=""
340 declare -a running_names
341
342 cleanup_echo_filter() {
343         local i
344
345         for i in `seq 0 $last_block`; do
346                 [ -z "${running_oids[$i]}" ] && continue
347                 lctl --device "\$"echo_$i destroy ${running_oids[$i]} \
348                         $running_threads
349         done
350         running_oids=""
351
352         for n in ${running_names[*]}; do
353 # I can't believe leading whitespace matters here.
354 lctl << EOF
355 cfg_device $n
356 cleanup
357 detach
358 quit
359 EOF
360         done
361         running_names=""
362
363         for m in $running_modules; do
364                 rmmod $m
365         done
366         running_modules=""
367
368         [ ! -z "$running_config" ] && lconf --cleanup $running_config
369         running_config=""
370 }
371
372 echo_filter_banner() {
373         echo "test_brw on the echo_client on the filter" 
374 }
375 echo_filter_config() {
376         local index=$1
377         local bdev=${blocks[$index]}
378         local config="$tmpdir/config.xml"
379
380         if ! which lmc; then
381                 echo "lmc binary not found in PATH"
382                 return 1
383         fi
384         if ! which lconf; then
385                 echo "lconf binary not found in PATH"
386                 return 1
387         fi
388         if ! which lctl; then
389                 echo "lctl binary not found in PATH"
390                 return 1
391         fi
392
393         if [ $index = 0 ]; then
394                 if ! lmc -m $config --add net  \
395                         --node localhost --nid localhost --nettype tcp; then
396                         echo "error adding localhost net node"
397                         return 1
398                 fi
399         fi
400
401         if ! lmc -m $config --add ost --ost ost_$index --node localhost \
402                         --fstype ext3 --dev $bdev --journal_size 400; then
403                 echo "error adding $bdev to config with lmc"
404                 return 1
405         fi
406
407         # it would be nice to be able to ask lmc to setup an echo client
408         # to the filter here.  --add echo_client assumes osc
409 }
410 echo_filter_prepare() {
411         local index=$1
412         local bdev=${blocks[$index]}
413         local config="$tmpdir/config.xml"
414         local name="echo_$index"
415         local uuid="echo_$index_uuid"
416
417         if [ $index = 0 ]; then
418                 if ! lconf --reformat $config; then
419                         echo "error setting up with lconf"
420                         return 1;
421                 fi
422                 running_config="$config"
423                 if ! grep -q '^obdecho\>' /proc/modules; then
424                         if ! modprobe obdecho; then
425                                 echo "error running modprobe obdecho"
426                                 return 1;
427                         fi
428                         running_modules="obdecho"
429                 fi
430         fi
431
432 lctl << EOF
433         newdev
434         attach echo_client $name $uuid
435         setup ost_$index
436         quit
437 EOF
438         if [  $? != 0 ]; then
439                 echo "error setting up echo_client $name against ost_$index"
440                 return 1
441         fi
442         running_names[$index]=$name
443 }
444 echo_filter_setup() {
445         local id=$1
446         local wor=$2
447         local threads=$3
448         local name="echo_$id"
449         local oid
450
451         case "$wor" in
452                 w) ;;
453                 r) return ;;
454                 *) die "asked to do io with $wor?"
455         esac
456
457         running_threads=$threads
458         oid=`lctl --device "\$"$name create $threads | \
459                 awk '/1 is object id/ { print $6 }'`
460         # XXX need to deal with errors
461         running_oids[$id]=$oid
462 }
463 echo_filter_start() {
464         local threads=$1
465         local iosize=$2
466         local wor=$3
467         local id=$4
468         local name="echo_$id"
469         local pages=$(($io_len / 4))
470
471         case "$wor" in
472                 w) args="-i 0 -w" ;;
473                 r) args="-i 1 -w" ;;
474                 *) die "asked to do io with $wor?"
475         esac
476
477         echo lctl --threads $threads v "\$"$name \
478                 test_brw 1 w v $pages ${running_oids[$i]} p$iosize
479 }
480 echo_filter_result() {
481         local output=$1
482         local total=0
483         local mbs
484
485         for mbs in `awk '($8=="MB/s):"){print substr($7,2)}' < $output`; do
486                 total=$(do_bc $total + $mbs)
487         done
488         echo $total
489 }
490 echo_filter_cleanup() {
491         local id=$1
492         local wor=$2
493         local threads=$3
494         local name="echo_$id"
495
496         case "$wor" in
497                 w) return ;;
498                 r) ;;
499                 *) die "asked to do io with $wor?"
500         esac
501
502         lctl --device "\$"$name destroy ${running_oids[$i]} $threads
503         unset running_oids[$i]
504 }
505 echo_filter_finish() {
506         local index=$1
507         # leave real work for _teardown
508 }
509 echo_filter_teardown() {
510         cleanup_echo_filter
511 }
512
513 ######################################################################
514 # the iteration that drives the tests
515
516 test_one() {
517         local test=$1
518         local my_x=$2
519         local my_y=$3
520         local threads=$4
521         local iosize=$5
522         local wor=$6
523         local vmstat_pid
524         local vmstat_log="$tmpdir/vmstat.log"
525         local opref="$test-$threads-$iosize-$wor"
526
527         for i in `seq 0 $last_block`; do
528                 ${test}_setup $i $wor $threads
529         done
530
531         echo $test with $threads threads
532
533         # start up vmstat and record its pid
534         echo starting `date`
535         nice -19 vmstat 1 > $vmstat_log 2>&1 &
536         [ $? = 0 ] || die "vmstat failed"
537         vmstat_pid=$!
538         pid_now_running $vmstat_pid
539
540         # start all the tests.  each returns a pid to wait on
541         pids=""
542         for i in `seq 0 $last_block`; do
543                 cmd=`${test}_start $threads $iosize $wor $i`
544                 $cmd > $tmpdir/$i 2>&1 &
545                 local pid=$!
546                 pids="$pids $pid"
547                 pid_now_running $pid
548         done
549
550         echo -n waiting on pids $pids:
551         for p in $pids; do
552                 wait $p
553                 echo -n .
554                 pid_has_stopped $p
555         done
556         echo
557
558         # stop vmstat and get cpu use from it
559         kill $vmstat_pid
560         echo stopping `date`
561         pid_has_stopped $vmstat_pid
562         cpu=$(mean_stddev $(awk \
563               '(NR > 3 && NF == 16 && $16 != "id" )     \
564                 {print 100 - $16}' < $vmstat_log) )
565         save_output $vmstat_log $opref.vmstat
566
567         # record each index's test results and sum them
568         thru=0
569         line=""
570         for i in `seq 0 $last_block`; do
571                 local t=`${test}_result $tmpdir/$i`
572                 save_output $tmpdir/$i $opref.$i
573                 echo test returned "$t"
574                 line="$line $t"
575                 # some tests return mean:stddev per thread, filter out stddev
576                 thru=$(do_bc $thru + $(echo $t | sed -e 's/:.*$//g'))
577         done
578         line="("`commas $line`")"
579
580         for i in `seq 0 $last_block`; do
581                 ${test}_cleanup $i $wor $threads
582         done
583
584         # tabulate the results
585         echo $test did $thru mb/s with $cpu
586         table_set $test $my_x $my_y $thru
587         table_set $test $(($my_x + 1)) $my_y $cpu
588         table_set $test $(($my_x + 2)) $my_y $line
589 }
590
591 test_iterator() {
592         local test=$1
593         local thr=$min_threads
594         local cleanup=""
595         local rc=0
596         local i
597         
598         for i in `seq 0 $last_block`; do
599                 if ! ${test}_config $i; then
600                         echo "couldn't config $test for bdev ${blocks[$i]}"
601                         echo "skipping $test for all block devices"
602                         cleanup=$(($i - 1))
603                         rc=1;
604                         break
605                 fi
606         done
607
608         for i in `seq 0 $last_block`; do
609                 # don't prepare if _config already failed
610                 [ ! -z "$cleanup" ] && break
611                 if ! ${test}_prepare $i; then
612                         echo "couldn't prepare $test for bdev ${blocks[$i]}"
613                         echo "skipping $test for all block devices"
614                         cleanup=$(($i - 1))
615                         rc=1;
616                         break
617                 fi
618         done
619
620         while [ -z "$cleanup" -a $thr -lt $(($max_threads + 1)) ]; do
621                 for iosize in 64 128; do
622                         table_set $test 0 $cur_y $thr
623                         table_set $test 1 $cur_y $iosize
624                         table_set $test 2 $cur_y "|"
625
626                         for wor in w r; do
627                                 table_set $test 3 $cur_y $wor
628                                 test_one $test 4 $cur_y $thr $iosize $wor
629                                 cur_y=$(($cur_y + 1))
630                         done
631                 done
632                 thr=$(($thr + $thr))
633         done
634
635         [ -z "$cleanup" ] && cleanup=$last_block
636
637         if [ "$cleanup" != -1 ]; then
638                 for i in `seq $cleanup 0`; do
639                         ${test}_finish $i
640                 done
641         fi
642
643         ${test}_teardown
644
645         return $rc;
646 }
647
648 while getopts ":d:b:l:t:T:" opt; do
649         case $opt in
650                 b) block=$OPTARG                 ;;
651                 d) output_dir=$OPTARG                 ;;
652                 l) io_len=$OPTARG                       ;;
653                 t) min_threads=$OPTARG                  ;;
654                 T) max_threads=$OPTARG                  ;;
655                 \?) usage
656         esac
657 done
658
659 if [ -z "$io_len" ]; then
660         io_len=`awk '($1 == "MemTotal:"){print $2}' < /proc/meminfo`
661         [ -z "$io_len" ] && die "couldn't determine the amount of memory"
662 fi
663
664 if [ ! -z "$output_dir" ]; then
665         [ ! -e "$output_dir" ] && "output dir $output_dir doesn't exist"
666         [ ! -d "$output_dir" ] && "output dir $output_dir isn't a directory"
667 fi
668
669 block=`echo $block | sed -e 's/,/ /g'`
670 [ -z "$block" ] && usage "need block devices"
671
672 [ $min_threads -gt $max_threads ] && \
673         die "min threads $min_threads must be <= min_threads $min_threads"
674
675 last_block=-1
676 for b in $block; do
677         [ ! -e $b ] && die "block device file $b doesn't exist"
678         [ ! -b $b ] && die "$b isn't a block device"
679         last_block=$(($last_block + 1))
680         blocks[$last_block]=$b
681 done    
682
683 tmpdir=`mktemp -d /tmp/.surveyXXXXXX` || die "couldn't create tmp dir"
684
685 echo each test will operate on $io_len"k"
686
687 tests="sgp_dd ext2_iozone echo_filter"
688 test_results=""
689
690 for t in $tests; do
691
692         table_set $t 0 0 "T"
693         table_set $t 1 0 "L"
694         table_set $t 2 0 "|"
695         table_set $t 3 0 "W"
696         table_set $t 5 0 "C:S"
697         table_set $t 6 0 "B"
698         cur_y=1;
699
700         if ! test_iterator $t; then
701                 continue;
702         fi
703         test_results="$test_results $t"
704 done
705
706 [ ! -z "$test_results" ] && (
707         echo
708         echo "T = number of concurrent threads per device"
709         echo "L = base io operation length, in KB"
710         echo "W/O/R = write/overwrite/read throughput, in MB/s"
711         echo "C = percentage CPU used, both user and system"
712         echo "S = standard deviation in cpu use"
713         echo "B = per-block results: ("`echo ${blocks[*]} | sed -e 's/ /,/g'`")"
714         echo
715 )
716
717 for t in $test_results; do
718         ${t}_banner
719         table_dump $t
720 done