Whamcloud - gitweb
- try to verify that a bdev is alive by reading from it
[fs/lustre-release.git] / lustre / scripts / bdev-io-survey.sh
1 #!/bin/bash
2
3 # for now all the units are in 'k', but we could introduce some helpers
4 # would be nice to run tests in the background and trap signals and kill
5 #
6 #  todo:
7 #       make sure devices aren't in use before going to town
8 #       really use threads with iozone
9 #       look into what sgp_dd is really doing, update arguments
10 #       rename config/prepare/setup/cleanup/finish/teardown
11 #       do something with sf and fpp iterating
12 #       discard first vmstat line
13 #
14
15 # a temp dir that is setup and torn down for each script run
16 tmpdir=""
17 # so we can kill background processes as the test cleans up
18 declare -a cleanup_pids
19 # to unmount mounts in our tmpdir before removing it
20 declare -a cleanup_mounts
21 # global for completing the table.  XXX this is a wart that could go
22 cur_y="0"
23
24 # defaults for some options:
25 min_threads=1
26 max_threads=4
27 possible_tests="sgp_dd ext2_iozone echo_filter"
28 run_tests="$possible_tests"
29
30 # optional output directory
31 output_dir=""
32  
33 die() {
34         echo $* 1>&2
35         exit 1
36 }
37 rm_or_die() {
38         for path in $*; do
39                 [ -e $path ] || continue;
40                 [ -f $path ] || die "needed to remove non-file $path"
41                 rm -f $path || die "couldn't remove $path"
42         done
43 }
44 save_output() {
45         [ ! -z "$output_dir" ] && mv -f $1 $output_dir/$2
46 }
47 cleanup() {
48         # only cleanup test runs if we have block devices
49         if [ $last_block != -1 ]; then
50                 for pid in ${cleanup_pids[*]}; do
51                         kill $pid
52                 done
53                 cleanup_echo_filter
54                 for a in ${cleanup_mounts[*]}; do
55                         umount -f $a
56                 done
57         fi
58
59         [ ${#tmpdir} == 18 ] && [ -d $tmpdir ] && rm -rf $tmpdir
60 }
61 trap cleanup EXIT
62
63 pid_now_running() {
64         local pid=$1
65         cleanup_pids[$pid]=$pid
66 }
67 pid_has_stopped() {
68         local pid=$1
69         unset cleanup_pids[$pid]
70 }
71                                                                                 
72 commas() {
73         echo $* | sed -e 's/ /,/g'
74 }
75 do_bc() {
76         echo "scale=2; $*" | bc
77 }
78 mean_stddev() {
79         local points=$*
80
81         local avg=0
82         local num=0
83         for p in $points; do
84                 avg=`do_bc $avg + $p`
85                 num=`do_bc $num + 1`
86         done
87         case $num in
88                 0) echo '??' ; return ;;
89                 1) echo "$avg:0" ; return ;;
90         esac
91
92         avg=`do_bc $avg / $num`
93         local tmp=0
94         for p in $points; do
95                 local dev=`do_bc \($p - $avg\) \^ 2`
96                 tmp=`do_bc $tmp + $dev`
97         done
98         tmp=`do_bc sqrt \( $tmp / \($num - 1\) \)`
99         echo "$avg:$tmp"
100 }
101
102 usage() {
103         echo $*
104         echo "       -b <block device to profile>"
105         echo "       -d <summary output directory>"
106         echo "       -l <max io len>"
107         echo "       -t <minimum number of threads per device>"
108         echo "       -T <maximum number of threads per device>"
109         echo "       -r <tests to run>"
110         exit;
111 }
112
113 # some cute code for handling tables whose columns fit
114 set_max() {
115         local target=$1
116         local val=$2
117                                                                                 
118         if [ $val -gt ${!target:-0} ]; then
119                 eval $target=$val
120         fi
121 }
122 table_set() {
123         local name="_table_$1"
124         local col=$2
125         local row=$3
126         local val=$4
127         local num
128                                                                                 
129         eval ${name}_${row}_${col}="'$val'"
130                                                                                 
131         set_max ${name}_${col}_longest ${#val}
132         set_max ${name}_num_col $(($col + 1))
133         set_max ${name}_num_row $(($row + 1))
134 }
135                                                                                 
136 table_get() {
137         local name="_table_$1"
138         local col=$2
139         local row=$3
140         tmp="${name}_${row}_${col}"
141         echo ${!tmp}
142 }
143                                                                                 
144 table_dump() {
145         local name="_table_$1"
146         local num_col;
147         local num_row;
148         local fmt="";
149         local tmp
150         local sep
151                                                                                 
152         tmp="${name}_num_col"
153         num_col="${!tmp:-0}"
154         tmp="${name}_num_row"
155         num_row="${!tmp:-0}"
156                                                                                 
157         # iterate through the columns to find the longest
158                                                                                 
159         sep=" "
160         for x in `seq 0 $num_col`; do
161                 tmp="${name}_${x}_longest"
162                 tmp=${!tmp:-0}
163                 [ $tmp -eq 0 ] && continue
164                                                                                 
165                 [ $x -eq $((num_col - 1)) ] && sep='\n'
166                                                                                 
167                 fmt="$fmt%-${tmp}s$sep"
168         done
169                                                                                 
170         # nothing in the table to print
171         [ -z "$fmt" ] && return
172                                                                                 
173         for y in `seq 0 $num_row`; do
174                 local row=""
175                 for x in `seq 0 $num_col`; do
176                                                                                 
177                         # skip this element if the column is empty
178                         tmp="${name}_${x}_longest"
179                         [ ${!tmp:-0} -eq 0 ] && continue
180                                                                                 
181                         # fill this cell with the value or '' for printf
182                         tmp="${name}_${y}_${x}"
183                         row="$row'${!tmp:-""}' "
184                 done
185                 eval printf "'$fmt'" $row
186         done
187 }
188
189 ######################################################################
190 # the sgp_dd tests
191 sgp_dd_banner() {
192         echo sgp_dd using dio=1 and thr=
193 }
194 sgp_dd_config() {
195         # it could be making sure that the block dev
196         # isn't in use by something else
197         local nothing=0
198 }
199 sgp_dd_prepare() {
200         if ! which sgp_dd; then
201                 echo "can't find sgp_dd binary"
202                 return 1
203         fi
204         return 0
205 }
206 sgp_dd_setup() {
207         # it could be making sure that the block dev
208         # isn't in use by something else
209         local nothing=0
210 }
211 sgp_dd_start() {
212         local threads=$1
213         local iosize=$2
214         local wor=$3
215         local i=$4
216         local ifof;
217         local bdev=${blocks[$i]};
218
219         case "$wor" in
220                 w) ifof="if=/dev/zero of=$bdev" ;;
221                 r) ifof="if=$bdev of=/dev/null" ;;
222                 *) die "asked to do io with $wor?"
223         esac
224         echo sgp_dd $ifof bs=$iosize"k" count=$(($io_len / $iosize)) time=1 \
225                         dio=1 thr=$threads
226 }
227 sgp_dd_result() {
228         local output=$1
229
230         awk '($(NF) == "MB/sec") {print $(NF-1)}' < $output
231 }
232 sgp_dd_cleanup() {
233         # got me
234         local nothing=0
235 }
236 sgp_dd_finish() {
237         # got me
238         local nothing=0
239 }
240 sgp_dd_teardown() {
241         # got me
242         local nothing=0
243 }
244
245 ######################################################################
246 # the iozone tests
247 ext2_iozone_banner() {
248         echo "iozone -I on a clean ext2 fs"
249 }
250 ext2_iozone_config() {
251         local nothing=0
252 }
253 ext2_iozone_prepare() {
254         local index=$1
255         local bdev=${blocks[$index]}
256         local mntpnt=$tmpdir/mount_$index
257
258         if ! which iozone; then
259                 echo "iozone binary not found in PATH"
260                 return 1
261         fi
262         if ! which mke2fs; then
263                 echo "mke2fs binary not found in PATH"
264                 return 1
265         fi
266
267         if ! mkdir -p $mntpnt ; then
268                 echo "$mntpnt isn't a directory?"
269         fi
270
271         echo making ext2 filesystem on $bdev
272         if ! mke2fs -b 4096 $bdev; then
273                 echo "mke2fs failed"
274                 return 1;
275         fi
276
277         if ! mount -t ext2 $bdev $mntpnt; then 
278                 echo "couldn't mount $bdev on $mntpnt"
279                 return 1;
280         fi
281
282         cleanup_mounts[$index]="$mntpnt"
283         return 0
284 }
285 ext2_iozone_setup() {
286         local id=$1
287         local wor=$2
288         local f="$tmpdir/mount_$id/iozone"
289
290         case "$wor" in
291                 w) rm -f $f ;;
292                 r) ;;
293                 *) die "asked to do io with $wor?"
294         esac
295 }
296 ext2_iozone_start() {
297         local threads=$1
298         local iosize=$2
299         local wor=$3
300         local id=$4
301         local args;
302         local f="$tmpdir/mount_$id/iozone"
303
304         case "$wor" in
305                 w) args="-i 0 -w" ;;
306                 r) args="-i 1 -w" ;;
307                 *) die "asked to do io with $wor?"
308         esac
309
310         echo iozone "$args -r ${iosize}k -s ${io_len}k -I -f $f"
311 }
312 ext2_iozone_result() {
313         local output=$1
314
315         kps=`awk '($2 == "reclen"){results=NR+1}(results == NR){print $3}' \
316                 < $output`
317         do_bc "$kps / 1024"
318 }
319 ext2_iozone_cleanup() {
320         local id=$1
321         local wor=$2
322         local f="$tmpdir/mount_$id/iozone"
323
324         case "$wor" in
325                 w) ;;
326                 r) rm -f $f ;;
327                 *) die "asked to do io with $wor?"
328         esac
329 }
330 ext2_iozone_finish() {
331         local index=$1
332         local mntpnt=$tmpdir/mount_$index
333
334         umount -f $mntpnt
335         unset cleanup_mounts[$index]
336 }
337 ext2_iozone_teardown() {
338         local nothing=0
339 }
340
341 ######################################################################
342 # the lctl test_brw via the echo_client on top of the filter
343
344 # the echo_client setup is nutty enough to warrant its own clenaup
345 running_config=""
346 running_modules=""
347 declare -a running_names
348 declare -a running_oids
349
350 cleanup_echo_filter() {
351         local i
352
353         for i in `seq 0 $last_block`; do
354                 [ -z "${running_oids[$i]}" ] && continue
355                 lctl --device "\$"echo_$i destroy ${running_oids[$i]} \
356                         $running_threads
357         done
358         unset running_oids
359
360         for n in ${running_names[*]}; do
361 # I can't believe leading whitespace matters here.
362 lctl << EOF
363 cfg_device $n
364 cleanup
365 detach
366 quit
367 EOF
368         done
369         running_names=""
370
371         for m in $running_modules; do
372                 rmmod $m
373         done
374         running_modules=""
375
376         [ ! -z "$running_config" ] && lconf --cleanup $running_config
377         running_config=""
378 }
379
380 echo_filter_banner() {
381         echo "test_brw on the echo_client on the filter" 
382 }
383 echo_filter_config() {
384         local index=$1
385         local bdev=${blocks[$index]}
386         local config="$tmpdir/config.xml"
387
388         if ! which lmc; then
389                 echo "lmc binary not found in PATH"
390                 return 1
391         fi
392         if ! which lconf; then
393                 echo "lconf binary not found in PATH"
394                 return 1
395         fi
396         if ! which lctl; then
397                 echo "lctl binary not found in PATH"
398                 return 1
399         fi
400
401         if [ $index = 0 ]; then
402                 if ! lmc -m $config --add net  \
403                         --node localhost --nid localhost --nettype tcp; then
404                         echo "error adding localhost net node"
405                         return 1
406                 fi
407         fi
408
409         if ! lmc -m $config --add ost --ost ost_$index --node localhost \
410                         --fstype ext3 --dev $bdev --journal_size 400; then
411                 echo "error adding $bdev to config with lmc"
412                 return 1
413         fi
414
415         # it would be nice to be able to ask lmc to setup an echo client
416         # to the filter here.  --add echo_client assumes osc
417 }
418 echo_filter_prepare() {
419         local index=$1
420         local bdev=${blocks[$index]}
421         local config="$tmpdir/config.xml"
422         local name="echo_$index"
423         local uuid="echo_$index_uuid"
424
425         if [ $index = 0 ]; then
426                 if ! lconf --reformat $config; then
427                         echo "error setting up with lconf"
428                         return 1;
429                 fi
430                 running_config="$config"
431                 if ! grep -q '^obdecho\>' /proc/modules; then
432                         if ! modprobe obdecho; then
433                                 echo "error running modprobe obdecho"
434                                 return 1;
435                         fi
436                         running_modules="obdecho"
437                 fi
438         fi
439
440 lctl << EOF
441         newdev
442         attach echo_client $name $uuid
443         setup ost_$index
444         quit
445 EOF
446         if [  $? != 0 ]; then
447                 echo "error setting up echo_client $name against ost_$index"
448                 return 1
449         fi
450         running_names[$index]=$name
451 }
452 echo_filter_setup() {
453         local id=$1
454         local wor=$2
455         local threads=$3
456         local name="echo_$id"
457         local oid
458
459         case "$wor" in
460                 w) ;;
461                 r) return ;;
462                 *) die "asked to do io with $wor?"
463         esac
464
465         running_threads=$threads
466         oid=`lctl --device "\$"$name create $threads | \
467                 awk '/1 is object id/ { print $6 }'`
468         # XXX need to deal with errors
469         running_oids[$id]=$oid
470 }
471 echo_filter_start() {
472         local threads=$1
473         local iosize=$2
474         local wor=$3
475         local id=$4
476         local name="echo_$id"
477         local pages=$(($io_len / 4))
478
479         case "$wor" in
480                 w) args="-i 0 -w" ;;
481                 r) args="-i 1 -w" ;;
482                 *) die "asked to do io with $wor?"
483         esac
484
485         echo lctl --threads $threads v "\$"$name \
486                 test_brw 1 w v $pages ${running_oids[$i]} p$iosize
487 }
488 echo_filter_result() {
489         local output=$1
490         local total=0
491         local mbs
492
493         for mbs in `awk '($8=="MB/s):"){print substr($7,2)}' < $output`; do
494                 total=$(do_bc $total + $mbs)
495         done
496         echo $total
497 }
498 echo_filter_cleanup() {
499         local id=$1
500         local wor=$2
501         local threads=$3
502         local name="echo_$id"
503
504         case "$wor" in
505                 w) return ;;
506                 r) ;;
507                 *) die "asked to do io with $wor?"
508         esac
509
510         lctl --device "\$"$name destroy ${running_oids[$i]} $threads
511         unset running_oids[$i]
512 }
513 echo_filter_finish() {
514         local index=$1
515         # leave real work for _teardown
516 }
517 echo_filter_teardown() {
518         cleanup_echo_filter
519 }
520
521 ######################################################################
522 # the iteration that drives the tests
523
524 test_one() {
525         local test=$1
526         local my_x=$2
527         local my_y=$3
528         local threads=$4
529         local iosize=$5
530         local wor=$6
531         local vmstat_pid
532         local vmstat_log="$tmpdir/vmstat.log"
533         local opref="$test-$threads-$iosize-$wor"
534
535         for i in `seq 0 $last_block`; do
536                 ${test}_setup $i $wor $threads
537         done
538
539         echo $test with $threads threads
540
541         # start up vmstat and record its pid
542         echo starting `date`
543         nice -19 vmstat 1 > $vmstat_log 2>&1 &
544         [ $? = 0 ] || die "vmstat failed"
545         vmstat_pid=$!
546         pid_now_running $vmstat_pid
547
548         # start all the tests.  each returns a pid to wait on
549         pids=""
550         for i in `seq 0 $last_block`; do
551                 cmd=`${test}_start $threads $iosize $wor $i`
552                 $cmd > $tmpdir/$i 2>&1 &
553                 local pid=$!
554                 pids="$pids $pid"
555                 pid_now_running $pid
556         done
557
558         echo -n waiting on pids $pids:
559         for p in $pids; do
560                 wait $p
561                 echo -n .
562                 pid_has_stopped $p
563         done
564         echo
565
566         # stop vmstat and get cpu use from it
567         kill $vmstat_pid
568         echo stopping `date`
569         pid_has_stopped $vmstat_pid
570         cpu=$(mean_stddev $(awk \
571               '(NR > 3 && NF == 16 && $16 != "id" )     \
572                 {print 100 - $16}' < $vmstat_log) )
573         save_output $vmstat_log $opref.vmstat
574
575         # record each index's test results and sum them
576         thru=0
577         line=""
578         for i in `seq 0 $last_block`; do
579                 local t=`${test}_result $tmpdir/$i`
580                 save_output $tmpdir/$i $opref.$i
581                 echo test returned "$t"
582                 line="$line $t"
583                 # some tests return mean:stddev per thread, filter out stddev
584                 thru=$(do_bc $thru + $(echo $t | sed -e 's/:.*$//g'))
585         done
586         line="("`commas $line`")"
587
588         for i in `seq 0 $last_block`; do
589                 ${test}_cleanup $i $wor $threads
590         done
591
592         # tabulate the results
593         echo $test did $thru mb/s with $cpu
594         table_set $test $my_x $my_y $thru
595         table_set $test $(($my_x + 1)) $my_y $cpu
596         table_set $test $(($my_x + 2)) $my_y $line
597 }
598
599 test_iterator() {
600         local test=$1
601         local thr=$min_threads
602         local cleanup=""
603         local rc=0
604         local i
605         
606         for i in `seq 0 $last_block`; do
607                 if ! ${test}_config $i; then
608                         echo "couldn't config $test for bdev ${blocks[$i]}"
609                         echo "skipping $test for all block devices"
610                         cleanup=$(($i - 1))
611                         rc=1;
612                         break
613                 fi
614         done
615
616         for i in `seq 0 $last_block`; do
617                 # don't prepare if _config already failed
618                 [ ! -z "$cleanup" ] && break
619                 if ! ${test}_prepare $i; then
620                         echo "couldn't prepare $test for bdev ${blocks[$i]}"
621                         echo "skipping $test for all block devices"
622                         cleanup=$(($i - 1))
623                         rc=1;
624                         break
625                 fi
626         done
627
628         while [ -z "$cleanup" -a $thr -lt $(($max_threads + 1)) ]; do
629                 for iosize in 64 128; do
630                         table_set $test 0 $cur_y $thr
631                         table_set $test 1 $cur_y $iosize
632                         table_set $test 2 $cur_y "|"
633
634                         for wor in w r; do
635                                 table_set $test 3 $cur_y $wor
636                                 test_one $test 4 $cur_y $thr $iosize $wor
637                                 cur_y=$(($cur_y + 1))
638                         done
639                 done
640                 thr=$(($thr + $thr))
641         done
642
643         [ -z "$cleanup" ] && cleanup=$last_block
644
645         if [ "$cleanup" != -1 ]; then
646                 for i in `seq $cleanup 0`; do
647                         ${test}_finish $i
648                 done
649         fi
650
651         ${test}_teardown
652
653         return $rc;
654 }
655
656 while getopts ":d:b:l:t:T:r:" opt; do
657         case $opt in
658                 b) block=$OPTARG                 ;;
659                 d) output_dir=$OPTARG                 ;;
660                 l) io_len=$OPTARG                       ;;
661                 r) run_tests=$OPTARG                    ;;
662                 t) min_threads=$OPTARG                  ;;
663                 T) max_threads=$OPTARG                  ;;
664                 \?) usage
665         esac
666 done
667
668 if [ -z "$io_len" ]; then
669         io_len=`awk '($1 == "MemTotal:"){print $2}' < /proc/meminfo`
670         [ -z "$io_len" ] && die "couldn't determine the amount of memory"
671 fi
672
673 if [ ! -z "$output_dir" ]; then
674         [ ! -e "$output_dir" ] && "output dir $output_dir doesn't exist"
675         [ ! -d "$output_dir" ] && "output dir $output_dir isn't a directory"
676 fi
677
678 block=`echo $block | sed -e 's/,/ /g'`
679 [ -z "$block" ] && usage "need block devices"
680
681 run_tests=`echo $run_tests | sed -e 's/,/ /g'`
682 [ -z "$run_tests" ] && usage "need to specify tests to run with -r"
683 for t in $run_tests; do
684         if ! echo $possible_tests | grep -q $t ; then
685                 die "$t isn't one of the possible tests: $possible_tests"
686         fi
687 done
688
689 [ $min_threads -gt $max_threads ] && \
690         die "min threads $min_threads must be <= min_threads $min_threads"
691
692 last_block=-1
693 for b in $block; do
694         [ ! -e $b ] && die "block device file $b doesn't exist"
695         [ ! -b $b ] && die "$b isn't a block device"
696         dd if=$b of=/dev/null bs=8192 count=1 || \
697                 die "couldn't read 8k from $b, is it alive?"
698         [ ! -b $b ] && die "$b isn't a block device"
699         last_block=$(($last_block + 1))
700         blocks[$last_block]=$b
701 done    
702
703 tmpdir=`mktemp -d /tmp/.surveyXXXXXX` || die "couldn't create tmp dir"
704
705 echo each test will operate on $io_len"k"
706
707 test_results=""
708
709 for t in $run_tests; do
710
711         table_set $t 0 0 "T"
712         table_set $t 1 0 "L"
713         table_set $t 2 0 "|"
714         table_set $t 3 0 "W"
715         table_set $t 5 0 "C:S"
716         table_set $t 6 0 "B"
717         cur_y=1;
718
719         if ! test_iterator $t; then
720                 continue;
721         fi
722         test_results="$test_results $t"
723 done
724
725 [ ! -z "$test_results" ] && (
726         echo
727         echo "T = number of concurrent threads per device"
728         echo "L = base io operation length, in KB"
729         echo "W/O/R = write/overwrite/read throughput, in MB/s"
730         echo "C = percentage CPU used, both user and system"
731         echo "S = standard deviation in cpu use"
732         echo "B = per-block results: ("`echo ${blocks[*]} | sed -e 's/ /,/g'`")"
733         echo
734 )
735
736 for t in $test_results; do
737         ${t}_banner
738         table_dump $t
739 done