Whamcloud - gitweb
2d092ac99216d8947fd5e4227896345e81efd581
[fs/lustre-release.git] / lustre / scripts / bdev-io-survey.sh
1 #!/bin/bash
2
3 # for now all the units are in 'k', but we could introduce some helpers
4 # would be nice to run tests in the background and trap signals and kill
5 #
6 #  todo:
7 #       make sure devices aren't in use before going to town
8 #       really use threads with iozone
9 #       look into what sgp_dd is really doing, update arguments
10 #       rename config/prepare/setup/cleanup/finish/teardown
11 #       do something with sf and fpp iterating
12 #       discard first vmstat line
13 #
14
15 # a temp dir that is setup and torn down for each script run
16 tmpdir=""
17 # so we can kill background processes as the test cleans up
18 declare -a cleanup_pids
19 # to unmount mounts in our tmpdir before removing it
20 declare -a cleanup_mounts
21 # global for completing the table.  XXX this is a wart that could go
22 cur_y="0"
23
24 # defaults for some options:
25 min_threads=1
26 max_threads=4
27 possible_tests="sgp_dd ext2_iozone echo_filter"
28 run_tests="$possible_tests"
29
30 # optional output directory
31 output_dir=""
32  
33 die() {
34         echo $* 1>&2
35         exit 1
36 }
37 rm_or_die() {
38         for path in $*; do
39                 [ -e $path ] || continue;
40                 [ -f $path ] || die "needed to remove non-file $path"
41                 rm -f $path || die "couldn't remove $path"
42         done
43 }
44 save_output() {
45         [ ! -z "$output_dir" ] && mv -f $1 $output_dir/$2
46 }
47 cleanup() {
48         for pid in ${cleanup_pids[*]}; do
49                 kill $pid
50         done
51         cleanup_echo_filter
52         for a in ${cleanup_mounts[*]}; do
53                 umount -f $a
54         done
55         [ ${#tmpdir} == 18 ] && [ -d $tmpdir ] && rm -rf $tmpdir
56 }
57 trap cleanup EXIT
58
59 pid_now_running() {
60         local pid=$1
61         cleanup_pids[$pid]=$pid
62 }
63 pid_has_stopped() {
64         local pid=$1
65         unset cleanup_pids[$pid]
66 }
67                                                                                 
68 commas() {
69         echo $* | sed -e 's/ /,/g'
70 }
71 do_bc() {
72         echo "scale=2; $*" | bc
73 }
74 mean_stddev() {
75         local points=$*
76
77         local avg=0
78         local num=0
79         for p in $points; do
80                 avg=`do_bc $avg + $p`
81                 num=`do_bc $num + 1`
82         done
83         case $num in
84                 0) echo '??' ; return ;;
85                 1) echo "$avg:0" ; return ;;
86         esac
87
88         avg=`do_bc $avg / $num`
89         local tmp=0
90         for p in $points; do
91                 local dev=`do_bc \($p - $avg\) \^ 2`
92                 tmp=`do_bc $tmp + $dev`
93         done
94         tmp=`do_bc sqrt \( $tmp / \($num - 1\) \)`
95         echo "$avg:$tmp"
96 }
97
98 usage() {
99         echo $*
100         echo "       -b <block device to profile>"
101         echo "       -d <summary output directory>"
102         echo "       -l <max io len>"
103         echo "       -t <minimum number of threads per device>"
104         echo "       -T <maximum number of threads per device>"
105         echo "       -r <tests to run>"
106         exit;
107 }
108
109 # some cute code for handling tables whose columns fit
110 set_max() {
111         local target=$1
112         local val=$2
113                                                                                 
114         if [ $val -gt ${!target:-0} ]; then
115                 eval $target=$val
116         fi
117 }
118 table_set() {
119         local name="_table_$1"
120         local col=$2
121         local row=$3
122         local val=$4
123         local num
124                                                                                 
125         eval ${name}_${row}_${col}="'$val'"
126                                                                                 
127         set_max ${name}_${col}_longest ${#val}
128         set_max ${name}_num_col $(($col + 1))
129         set_max ${name}_num_row $(($row + 1))
130 }
131                                                                                 
132 table_get() {
133         local name="_table_$1"
134         local col=$2
135         local row=$3
136         tmp="${name}_${row}_${col}"
137         echo ${!tmp}
138 }
139                                                                                 
140 table_dump() {
141         local name="_table_$1"
142         local num_col;
143         local num_row;
144         local fmt="";
145         local tmp
146         local sep
147                                                                                 
148         tmp="${name}_num_col"
149         num_col="${!tmp:-0}"
150         tmp="${name}_num_row"
151         num_row="${!tmp:-0}"
152                                                                                 
153         # iterate through the columns to find the longest
154                                                                                 
155         sep=" "
156         for x in `seq 0 $num_col`; do
157                 tmp="${name}_${x}_longest"
158                 tmp=${!tmp:-0}
159                 [ $tmp -eq 0 ] && continue
160                                                                                 
161                 [ $x -eq $((num_col - 1)) ] && sep='\n'
162                                                                                 
163                 fmt="$fmt%-${tmp}s$sep"
164         done
165                                                                                 
166         # nothing in the table to print
167         [ -z "$fmt" ] && return
168                                                                                 
169         for y in `seq 0 $num_row`; do
170                 local row=""
171                 for x in `seq 0 $num_col`; do
172                                                                                 
173                         # skip this element if the column is empty
174                         tmp="${name}_${x}_longest"
175                         [ ${!tmp:-0} -eq 0 ] && continue
176                                                                                 
177                         # fill this cell with the value or '' for printf
178                         tmp="${name}_${y}_${x}"
179                         row="$row'${!tmp:-""}' "
180                 done
181                 eval printf "'$fmt'" $row
182         done
183 }
184
185 ######################################################################
186 # the sgp_dd tests
187 sgp_dd_banner() {
188         echo sgp_dd using dio=1 and thr=
189 }
190 sgp_dd_config() {
191         # it could be making sure that the block dev
192         # isn't in use by something else
193         local nothing=0
194 }
195 sgp_dd_prepare() {
196         if ! which sgp_dd; then
197                 echo "can't find sgp_dd binary"
198                 return 1
199         fi
200         return 0
201 }
202 sgp_dd_setup() {
203         # it could be making sure that the block dev
204         # isn't in use by something else
205         local nothing=0
206 }
207 sgp_dd_start() {
208         local threads=$1
209         local iosize=$2
210         local wor=$3
211         local i=$4
212         local ifof;
213         local bdev=${blocks[$i]};
214
215         case "$wor" in
216                 w) ifof="if=/dev/zero of=$bdev" ;;
217                 r) ifof="if=$bdev of=/dev/null" ;;
218                 *) die "asked to do io with $wor?"
219         esac
220         echo sgp_dd $ifof bs=$iosize"k" count=$(($io_len / $iosize)) time=1 \
221                         dio=1 thr=$threads
222 }
223 sgp_dd_result() {
224         local output=$1
225
226         awk '($(NF) == "MB/sec") {print $(NF-1)}' < $output
227 }
228 sgp_dd_cleanup() {
229         # got me
230         local nothing=0
231 }
232 sgp_dd_finish() {
233         # got me
234         local nothing=0
235 }
236 sgp_dd_teardown() {
237         # got me
238         local nothing=0
239 }
240
241 ######################################################################
242 # the iozone tests
243 ext2_iozone_banner() {
244         echo "iozone -I on a clean ext2 fs"
245 }
246 ext2_iozone_config() {
247         local nothing=0
248 }
249 ext2_iozone_prepare() {
250         local index=$1
251         local bdev=${blocks[$index]}
252         local mntpnt=$tmpdir/mount_$index
253
254         if ! which iozone; then
255                 echo "iozone binary not found in PATH"
256                 return 1
257         fi
258         if ! which mke2fs; then
259                 echo "mke2fs binary not found in PATH"
260                 return 1
261         fi
262
263         if ! mkdir -p $mntpnt ; then
264                 echo "$mntpnt isn't a directory?"
265         fi
266
267         echo making ext2 filesystem on $bdev
268         if ! mke2fs -b 4096 $bdev; then
269                 echo "mke2fs failed"
270                 return 1;
271         fi
272
273         if ! mount -t ext2 $bdev $mntpnt; then 
274                 echo "couldn't mount $bdev on $mntpnt"
275                 return 1;
276         fi
277
278         cleanup_mounts[$index]="$mntpnt"
279         return 0
280 }
281 ext2_iozone_setup() {
282         local id=$1
283         local wor=$2
284         local f="$tmpdir/mount_$id/iozone"
285
286         case "$wor" in
287                 w) rm -f $f ;;
288                 r) ;;
289                 *) die "asked to do io with $wor?"
290         esac
291 }
292 ext2_iozone_start() {
293         local threads=$1
294         local iosize=$2
295         local wor=$3
296         local id=$4
297         local args;
298         local f="$tmpdir/mount_$id/iozone"
299
300         case "$wor" in
301                 w) args="-i 0 -w" ;;
302                 r) args="-i 1 -w" ;;
303                 *) die "asked to do io with $wor?"
304         esac
305
306         echo iozone "$args -r ${iosize}k -s ${io_len}k -I -f $f"
307 }
308 ext2_iozone_result() {
309         local output=$1
310
311         kps=`awk '($2 == "reclen"){results=NR+1}(results == NR){print $3}' \
312                 < $output`
313         do_bc "$kps / 1024"
314 }
315 ext2_iozone_cleanup() {
316         local id=$1
317         local wor=$2
318         local f="$tmpdir/mount_$id/iozone"
319
320         case "$wor" in
321                 w) ;;
322                 r) rm -f $f ;;
323                 *) die "asked to do io with $wor?"
324         esac
325 }
326 ext2_iozone_finish() {
327         local index=$1
328         local mntpnt=$tmpdir/mount_$index
329
330         umount -f $mntpnt
331         unset cleanup_mounts[$index]
332 }
333 ext2_iozone_teardown() {
334         local nothing=0
335 }
336
337 ######################################################################
338 # the lctl test_brw via the echo_client on top of the filter
339
340 # the echo_client setup is nutty enough to warrant its own clenaup
341 running_config=""
342 running_modules=""
343 declare -a running_names
344
345 cleanup_echo_filter() {
346         local i
347
348         for i in `seq 0 $last_block`; do
349                 [ -z "${running_oids[$i]}" ] && continue
350                 lctl --device "\$"echo_$i destroy ${running_oids[$i]} \
351                         $running_threads
352         done
353         running_oids=""
354
355         for n in ${running_names[*]}; do
356 # I can't believe leading whitespace matters here.
357 lctl << EOF
358 cfg_device $n
359 cleanup
360 detach
361 quit
362 EOF
363         done
364         running_names=""
365
366         for m in $running_modules; do
367                 rmmod $m
368         done
369         running_modules=""
370
371         [ ! -z "$running_config" ] && lconf --cleanup $running_config
372         running_config=""
373 }
374
375 echo_filter_banner() {
376         echo "test_brw on the echo_client on the filter" 
377 }
378 echo_filter_config() {
379         local index=$1
380         local bdev=${blocks[$index]}
381         local config="$tmpdir/config.xml"
382
383         if ! which lmc; then
384                 echo "lmc binary not found in PATH"
385                 return 1
386         fi
387         if ! which lconf; then
388                 echo "lconf binary not found in PATH"
389                 return 1
390         fi
391         if ! which lctl; then
392                 echo "lctl binary not found in PATH"
393                 return 1
394         fi
395
396         if [ $index = 0 ]; then
397                 if ! lmc -m $config --add net  \
398                         --node localhost --nid localhost --nettype tcp; then
399                         echo "error adding localhost net node"
400                         return 1
401                 fi
402         fi
403
404         if ! lmc -m $config --add ost --ost ost_$index --node localhost \
405                         --fstype ext3 --dev $bdev --journal_size 400; then
406                 echo "error adding $bdev to config with lmc"
407                 return 1
408         fi
409
410         # it would be nice to be able to ask lmc to setup an echo client
411         # to the filter here.  --add echo_client assumes osc
412 }
413 echo_filter_prepare() {
414         local index=$1
415         local bdev=${blocks[$index]}
416         local config="$tmpdir/config.xml"
417         local name="echo_$index"
418         local uuid="echo_$index_uuid"
419
420         if [ $index = 0 ]; then
421                 if ! lconf --reformat $config; then
422                         echo "error setting up with lconf"
423                         return 1;
424                 fi
425                 running_config="$config"
426                 if ! grep -q '^obdecho\>' /proc/modules; then
427                         if ! modprobe obdecho; then
428                                 echo "error running modprobe obdecho"
429                                 return 1;
430                         fi
431                         running_modules="obdecho"
432                 fi
433         fi
434
435 lctl << EOF
436         newdev
437         attach echo_client $name $uuid
438         setup ost_$index
439         quit
440 EOF
441         if [  $? != 0 ]; then
442                 echo "error setting up echo_client $name against ost_$index"
443                 return 1
444         fi
445         running_names[$index]=$name
446 }
447 echo_filter_setup() {
448         local id=$1
449         local wor=$2
450         local threads=$3
451         local name="echo_$id"
452         local oid
453
454         case "$wor" in
455                 w) ;;
456                 r) return ;;
457                 *) die "asked to do io with $wor?"
458         esac
459
460         running_threads=$threads
461         oid=`lctl --device "\$"$name create $threads | \
462                 awk '/1 is object id/ { print $6 }'`
463         # XXX need to deal with errors
464         running_oids[$id]=$oid
465 }
466 echo_filter_start() {
467         local threads=$1
468         local iosize=$2
469         local wor=$3
470         local id=$4
471         local name="echo_$id"
472         local pages=$(($io_len / 4))
473
474         case "$wor" in
475                 w) args="-i 0 -w" ;;
476                 r) args="-i 1 -w" ;;
477                 *) die "asked to do io with $wor?"
478         esac
479
480         echo lctl --threads $threads v "\$"$name \
481                 test_brw 1 w v $pages ${running_oids[$i]} p$iosize
482 }
483 echo_filter_result() {
484         local output=$1
485         local total=0
486         local mbs
487
488         for mbs in `awk '($8=="MB/s):"){print substr($7,2)}' < $output`; do
489                 total=$(do_bc $total + $mbs)
490         done
491         echo $total
492 }
493 echo_filter_cleanup() {
494         local id=$1
495         local wor=$2
496         local threads=$3
497         local name="echo_$id"
498
499         case "$wor" in
500                 w) return ;;
501                 r) ;;
502                 *) die "asked to do io with $wor?"
503         esac
504
505         lctl --device "\$"$name destroy ${running_oids[$i]} $threads
506         unset running_oids[$i]
507 }
508 echo_filter_finish() {
509         local index=$1
510         # leave real work for _teardown
511 }
512 echo_filter_teardown() {
513         cleanup_echo_filter
514 }
515
516 ######################################################################
517 # the iteration that drives the tests
518
519 test_one() {
520         local test=$1
521         local my_x=$2
522         local my_y=$3
523         local threads=$4
524         local iosize=$5
525         local wor=$6
526         local vmstat_pid
527         local vmstat_log="$tmpdir/vmstat.log"
528         local opref="$test-$threads-$iosize-$wor"
529
530         for i in `seq 0 $last_block`; do
531                 ${test}_setup $i $wor $threads
532         done
533
534         echo $test with $threads threads
535
536         # start up vmstat and record its pid
537         echo starting `date`
538         nice -19 vmstat 1 > $vmstat_log 2>&1 &
539         [ $? = 0 ] || die "vmstat failed"
540         vmstat_pid=$!
541         pid_now_running $vmstat_pid
542
543         # start all the tests.  each returns a pid to wait on
544         pids=""
545         for i in `seq 0 $last_block`; do
546                 cmd=`${test}_start $threads $iosize $wor $i`
547                 $cmd > $tmpdir/$i 2>&1 &
548                 local pid=$!
549                 pids="$pids $pid"
550                 pid_now_running $pid
551         done
552
553         echo -n waiting on pids $pids:
554         for p in $pids; do
555                 wait $p
556                 echo -n .
557                 pid_has_stopped $p
558         done
559         echo
560
561         # stop vmstat and get cpu use from it
562         kill $vmstat_pid
563         echo stopping `date`
564         pid_has_stopped $vmstat_pid
565         cpu=$(mean_stddev $(awk \
566               '(NR > 3 && NF == 16 && $16 != "id" )     \
567                 {print 100 - $16}' < $vmstat_log) )
568         save_output $vmstat_log $opref.vmstat
569
570         # record each index's test results and sum them
571         thru=0
572         line=""
573         for i in `seq 0 $last_block`; do
574                 local t=`${test}_result $tmpdir/$i`
575                 save_output $tmpdir/$i $opref.$i
576                 echo test returned "$t"
577                 line="$line $t"
578                 # some tests return mean:stddev per thread, filter out stddev
579                 thru=$(do_bc $thru + $(echo $t | sed -e 's/:.*$//g'))
580         done
581         line="("`commas $line`")"
582
583         for i in `seq 0 $last_block`; do
584                 ${test}_cleanup $i $wor $threads
585         done
586
587         # tabulate the results
588         echo $test did $thru mb/s with $cpu
589         table_set $test $my_x $my_y $thru
590         table_set $test $(($my_x + 1)) $my_y $cpu
591         table_set $test $(($my_x + 2)) $my_y $line
592 }
593
594 test_iterator() {
595         local test=$1
596         local thr=$min_threads
597         local cleanup=""
598         local rc=0
599         local i
600         
601         for i in `seq 0 $last_block`; do
602                 if ! ${test}_config $i; then
603                         echo "couldn't config $test for bdev ${blocks[$i]}"
604                         echo "skipping $test for all block devices"
605                         cleanup=$(($i - 1))
606                         rc=1;
607                         break
608                 fi
609         done
610
611         for i in `seq 0 $last_block`; do
612                 # don't prepare if _config already failed
613                 [ ! -z "$cleanup" ] && break
614                 if ! ${test}_prepare $i; then
615                         echo "couldn't prepare $test for bdev ${blocks[$i]}"
616                         echo "skipping $test for all block devices"
617                         cleanup=$(($i - 1))
618                         rc=1;
619                         break
620                 fi
621         done
622
623         while [ -z "$cleanup" -a $thr -lt $(($max_threads + 1)) ]; do
624                 for iosize in 64 128; do
625                         table_set $test 0 $cur_y $thr
626                         table_set $test 1 $cur_y $iosize
627                         table_set $test 2 $cur_y "|"
628
629                         for wor in w r; do
630                                 table_set $test 3 $cur_y $wor
631                                 test_one $test 4 $cur_y $thr $iosize $wor
632                                 cur_y=$(($cur_y + 1))
633                         done
634                 done
635                 thr=$(($thr + $thr))
636         done
637
638         [ -z "$cleanup" ] && cleanup=$last_block
639
640         if [ "$cleanup" != -1 ]; then
641                 for i in `seq $cleanup 0`; do
642                         ${test}_finish $i
643                 done
644         fi
645
646         ${test}_teardown
647
648         return $rc;
649 }
650
651 while getopts ":d:b:l:t:T:r:" opt; do
652         case $opt in
653                 b) block=$OPTARG                 ;;
654                 d) output_dir=$OPTARG                 ;;
655                 l) io_len=$OPTARG                       ;;
656                 r) run_tests=$OPTARG                    ;;
657                 t) min_threads=$OPTARG                  ;;
658                 T) max_threads=$OPTARG                  ;;
659                 \?) usage
660         esac
661 done
662
663 if [ -z "$io_len" ]; then
664         io_len=`awk '($1 == "MemTotal:"){print $2}' < /proc/meminfo`
665         [ -z "$io_len" ] && die "couldn't determine the amount of memory"
666 fi
667
668 if [ ! -z "$output_dir" ]; then
669         [ ! -e "$output_dir" ] && "output dir $output_dir doesn't exist"
670         [ ! -d "$output_dir" ] && "output dir $output_dir isn't a directory"
671 fi
672
673 block=`echo $block | sed -e 's/,/ /g'`
674 [ -z "$block" ] && usage "need block devices"
675
676 run_tests=`echo $run_tests | sed -e 's/,/ /g'`
677 [ -z "$run_tests" ] && usage "need to specify tests to run with -r"
678 for t in $run_tests; do
679         if ! echo $possible_tests | grep -q $t ; then
680                 die "$t isn't one of the possible tests: $possible_tests"
681         fi
682 done
683
684 [ $min_threads -gt $max_threads ] && \
685         die "min threads $min_threads must be <= min_threads $min_threads"
686
687 last_block=-1
688 for b in $block; do
689         [ ! -e $b ] && die "block device file $b doesn't exist"
690         [ ! -b $b ] && die "$b isn't a block device"
691         last_block=$(($last_block + 1))
692         blocks[$last_block]=$b
693 done    
694
695 tmpdir=`mktemp -d /tmp/.surveyXXXXXX` || die "couldn't create tmp dir"
696
697 echo each test will operate on $io_len"k"
698
699 test_results=""
700
701 for t in $run_tests; do
702
703         table_set $t 0 0 "T"
704         table_set $t 1 0 "L"
705         table_set $t 2 0 "|"
706         table_set $t 3 0 "W"
707         table_set $t 5 0 "C:S"
708         table_set $t 6 0 "B"
709         cur_y=1;
710
711         if ! test_iterator $t; then
712                 continue;
713         fi
714         test_results="$test_results $t"
715 done
716
717 [ ! -z "$test_results" ] && (
718         echo
719         echo "T = number of concurrent threads per device"
720         echo "L = base io operation length, in KB"
721         echo "W/O/R = write/overwrite/read throughput, in MB/s"
722         echo "C = percentage CPU used, both user and system"
723         echo "S = standard deviation in cpu use"
724         echo "B = per-block results: ("`echo ${blocks[*]} | sed -e 's/ /,/g'`")"
725         echo
726 )
727
728 for t in $test_results; do
729         ${t}_banner
730         table_dump $t
731 done