lustre/scripts/bdev-io-survey.sh

   1 #!/bin/bash
   2
   3 # for now all the units are in 'k', but we could introduce some helpers
   4 # would be nice to run tests in the background and trap signals and kill
   5 #
   6 #  todo:
   7 #       make sure devices aren't in use before going to town
   8 #       really use threads with iozone
   9 #       look into what sgp_dd is really doing, update arguments
  10 #       rename config/prepare/setup/cleanup/finish/teardown
  11 #       do something with sf and fpp iterating
  12 #       discard first vmstat line
  13 #
  14
  15 # a temp dir that is setup and torn down for each script run
  16 tmpdir=""
  17 # so we can kill background processes as the test cleans up
  18 declare -a cleanup_pids
  19 # to unmount mounts in our tmpdir before removing it
  20 declare -a cleanup_mounts
  21 # global for completing the table.  XXX this is a wart that could go
  22 cur_y="0"
  23
  24 # defaults for some options:
  25 min_threads=1
  26 max_threads=4
  27 possible_tests="sgp_dd ext2_iozone echo_filter"
  28 run_tests="$possible_tests"
  29
  30 # optional output directory
  31 output_dir=""
  32
  33 die() {
  34         echo $* 1>&2
  35         exit 1
  36 }
  37 rm_or_die() {
  38         for path in $*; do
  39                 [ -e $path ] || continue;
  40                 [ -f $path ] || die "needed to remove non-file $path"
  41                 rm -f $path || die "couldn't remove $path"
  42         done
  43 }
  44 save_output() {
  45         [ ! -z "$output_dir" ] && mv -f $1 $output_dir/$2
  46 }
  47 cleanup() {
  48         # only cleanup test runs if we have block devices
  49         if [ $last_block != -1 ]; then
  50                 for pid in ${cleanup_pids[*]}; do
  51                         kill $pid
  52                 done
  53                 cleanup_echo_filter
  54                 for a in ${cleanup_mounts[*]}; do
  55                         umount -f $a
  56                 done
  57         fi
  58
  59         [ ${#tmpdir} == 18 ] && [ -d $tmpdir ] && rm -rf $tmpdir
  60 }
  61 trap cleanup EXIT
  62
  63 pid_now_running() {
  64         local pid=$1
  65         cleanup_pids[$pid]=$pid
  66 }
  67 pid_has_stopped() {
  68         local pid=$1
  69         unset cleanup_pids[$pid]
  70 }
  71
  72 commas() {
  73         echo $* | sed -e 's/ /,/g'
  74 }
  75 do_bc() {
  76         echo "scale=2; $*" | bc
  77 }
  78 mean_stddev() {
  79         local points=$*
  80
  81         local avg=0
  82         local num=0
  83         for p in $points; do
  84                 avg=`do_bc $avg + $p`
  85                 num=`do_bc $num + 1`
  86         done
  87         case $num in
  88                 0) echo '??' ; return ;;
  89                 1) echo "$avg:0" ; return ;;
  90         esac
  91
  92         avg=`do_bc $avg / $num`
  93         local tmp=0
  94         for p in $points; do
  95                 local dev=`do_bc \($p - $avg\) \^ 2`
  96                 tmp=`do_bc $tmp + $dev`
  97         done
  98         tmp=`do_bc sqrt \( $tmp / \($num - 1\) \)`
  99         echo "$avg:$tmp"
 100 }
 101
 102 usage() {
 103         echo $*
 104         echo "       -b <block device to profile>"
 105         echo "       -d <summary output directory>"
 106         echo "       -l <max io len>"
 107         echo "       -t <minimum number of threads per device>"
 108         echo "       -T <maximum number of threads per device>"
 109         echo "       -r <tests to run>"
 110         exit;
 111 }
 112
 113 # some cute code for handling tables whose columns fit
 114 set_max() {
 115         local target=$1
 116         local val=$2
 117
 118         if [ $val -gt ${!target:-0} ]; then
 119                 eval $target=$val
 120         fi
 121 }
 122 table_set() {
 123         local name="_table_$1"
 124         local col=$2
 125         local row=$3
 126         local val=$4
 127         local num
 128
 129         eval ${name}_${row}_${col}="'$val'"
 130
 131         set_max ${name}_${col}_longest ${#val}
 132         set_max ${name}_num_col $(($col + 1))
 133         set_max ${name}_num_row $(($row + 1))
 134 }
 135
 136 table_get() {
 137         local name="_table_$1"
 138         local col=$2
 139         local row=$3
 140         tmp="${name}_${row}_${col}"
 141         echo ${!tmp}
 142 }
 143
 144 table_dump() {
 145         local name="_table_$1"
 146         local num_col;
 147         local num_row;
 148         local fmt="";
 149         local tmp
 150         local sep
 151
 152         tmp="${name}_num_col"
 153         num_col="${!tmp:-0}"
 154         tmp="${name}_num_row"
 155         num_row="${!tmp:-0}"
 156
 157         # iterate through the columns to find the longest
 158
 159         sep=" "
 160         for x in `seq 0 $num_col`; do
 161                 tmp="${name}_${x}_longest"
 162                 tmp=${!tmp:-0}
 163                 [ $tmp -eq 0 ] && continue
 164
 165                 [ $x -eq $((num_col - 1)) ] && sep='\n'
 166
 167                 fmt="$fmt%-${tmp}s$sep"
 168         done
 169
 170         # nothing in the table to print
 171         [ -z "$fmt" ] && return
 172
 173         for y in `seq 0 $num_row`; do
 174                 local row=""
 175                 for x in `seq 0 $num_col`; do
 176
 177                         # skip this element if the column is empty
 178                         tmp="${name}_${x}_longest"
 179                         [ ${!tmp:-0} -eq 0 ] && continue
 180
 181                         # fill this cell with the value or '' for printf
 182                         tmp="${name}_${y}_${x}"
 183                         row="$row'${!tmp:-""}' "
 184                 done
 185                 eval printf "'$fmt'" $row
 186         done
 187 }
 188
 189 ######################################################################
 190 # the sgp_dd tests
 191 sgp_dd_banner() {
 192         echo sgp_dd using dio=1 and thr=
 193 }
 194 sgp_dd_config() {
 195         # it could be making sure that the block dev
 196         # isn't in use by something else
 197         local nothing=0
 198 }
 199 sgp_dd_prepare() {
 200         if ! which sgp_dd; then
 201                 echo "can't find sgp_dd binary"
 202                 return 1
 203         fi
 204         return 0
 205 }
 206 sgp_dd_setup() {
 207         # it could be making sure that the block dev
 208         # isn't in use by something else
 209         local nothing=0
 210 }
 211 sgp_dd_start() {
 212         local threads=$1
 213         local iosize=$2
 214         local wor=$3
 215         local i=$4
 216         local ifof;
 217         local bdev=${blocks[$i]};
 218
 219         case "$wor" in
 220                 w) ifof="if=/dev/zero of=$bdev" ;;
 221                 r) ifof="if=$bdev of=/dev/null" ;;
 222                 *) die "asked to do io with $wor?"
 223         esac
 224         echo sgp_dd $ifof bs=$iosize"k" count=$(($io_len / $iosize)) time=1 \
 225                         dio=1 thr=$threads
 226 }
 227 sgp_dd_result() {
 228         local output=$1
 229
 230         awk '($(NF) == "MB/sec") {print $(NF-1)}' < $output
 231 }
 232 sgp_dd_cleanup() {
 233         # got me
 234         local nothing=0
 235 }
 236 sgp_dd_finish() {
 237         # got me
 238         local nothing=0
 239 }
 240 sgp_dd_teardown() {
 241         # got me
 242         local nothing=0
 243 }
 244
 245 ######################################################################
 246 # the iozone tests
 247 ext2_iozone_banner() {
 248         echo "iozone -I on a clean ext2 fs"
 249 }
 250 ext2_iozone_config() {
 251         local nothing=0
 252 }
 253 ext2_iozone_prepare() {
 254         local index=$1
 255         local bdev=${blocks[$index]}
 256         local mntpnt=$tmpdir/mount_$index
 257
 258         if ! which iozone; then
 259                 echo "iozone binary not found in PATH"
 260                 return 1
 261         fi
 262         if ! which mke2fs; then
 263                 echo "mke2fs binary not found in PATH"
 264                 return 1
 265         fi
 266
 267         if ! mkdir -p $mntpnt ; then
 268                 echo "$mntpnt isn't a directory?"
 269         fi
 270
 271         echo making ext2 filesystem on $bdev
 272         if ! mke2fs -b 4096 $bdev; then
 273                 echo "mke2fs failed"
 274                 return 1;
 275         fi
 276
 277         if ! mount -t ext2 $bdev $mntpnt; then
 278                 echo "couldn't mount $bdev on $mntpnt"
 279                 return 1;
 280         fi
 281
 282         cleanup_mounts[$index]="$mntpnt"
 283         return 0
 284 }
 285 ext2_iozone_setup() {
 286         local id=$1
 287         local wor=$2
 288         local f="$tmpdir/mount_$id/iozone"
 289
 290         case "$wor" in
 291                 w) rm -f $f ;;
 292                 r) ;;
 293                 *) die "asked to do io with $wor?"
 294         esac
 295 }
 296 ext2_iozone_start() {
 297         local threads=$1
 298         local iosize=$2
 299         local wor=$3
 300         local id=$4
 301         local args;
 302         local f="$tmpdir/mount_$id/iozone"
 303
 304         case "$wor" in
 305                 w) args="-i 0 -w" ;;
 306                 r) args="-i 1 -w" ;;
 307                 *) die "asked to do io with $wor?"
 308         esac
 309
 310         echo iozone "$args -r ${iosize}k -s ${io_len}k -I -f $f"
 311 }
 312 ext2_iozone_result() {
 313         local output=$1
 314
 315         kps=`awk '($2 == "reclen"){results=NR+1}(results == NR){print $3}' \
 316                 < $output`
 317         do_bc "$kps / 1024"
 318 }
 319 ext2_iozone_cleanup() {
 320         local id=$1
 321         local wor=$2
 322         local f="$tmpdir/mount_$id/iozone"
 323
 324         case "$wor" in
 325                 w) ;;
 326                 r) rm -f $f ;;
 327                 *) die "asked to do io with $wor?"
 328         esac
 329 }
 330 ext2_iozone_finish() {
 331         local index=$1
 332         local mntpnt=$tmpdir/mount_$index
 333
 334         umount -f $mntpnt
 335         unset cleanup_mounts[$index]
 336 }
 337 ext2_iozone_teardown() {
 338         local nothing=0
 339 }
 340
 341 ######################################################################
 342 # the lctl test_brw via the echo_client on top of the filter
 343
 344 # the echo_client setup is nutty enough to warrant its own clenaup
 345 running_config=""
 346 running_modules=""
 347 declare -a running_names
 348 declare -a running_oids
 349
 350 cleanup_echo_filter() {
 351         local i
 352
 353         for i in `seq 0 $last_block`; do
 354                 [ -z "${running_oids[$i]}" ] && continue
 355                 lctl --device "\$"echo_$i destroy ${running_oids[$i]} \
 356                         $running_threads
 357         done
 358         unset running_oids
 359
 360         for n in ${running_names[*]}; do
 361 # I can't believe leading whitespace matters here.
 362 lctl << EOF
 363 cfg_device $n
 364 cleanup
 365 detach
 366 quit
 367 EOF
 368         done
 369         running_names=""
 370
 371         for m in $running_modules; do
 372                 rmmod $m
 373         done
 374         running_modules=""
 375
 376         [ ! -z "$running_config" ] && lconf --cleanup $running_config
 377         running_config=""
 378 }
 379
 380 echo_filter_banner() {
 381         echo "test_brw on the echo_client on the filter"
 382 }
 383 echo_filter_config() {
 384         local index=$1
 385         local bdev=${blocks[$index]}
 386         local config="$tmpdir/config.xml"
 387
 388         if ! which lmc; then
 389                 echo "lmc binary not found in PATH"
 390                 return 1
 391         fi
 392         if ! which lconf; then
 393                 echo "lconf binary not found in PATH"
 394                 return 1
 395         fi
 396         if ! which lctl; then
 397                 echo "lctl binary not found in PATH"
 398                 return 1
 399         fi
 400
 401         if [ $index = 0 ]; then
 402                 if ! lmc -m $config --add net  \
 403                         --node localhost --nid localhost --nettype tcp; then
 404                         echo "error adding localhost net node"
 405                         return 1
 406                 fi
 407         fi
 408
 409         if ! lmc -m $config --add ost --ost ost_$index --node localhost \
 410                         --fstype ext3 --dev $bdev --journal_size 400; then
 411                 echo "error adding $bdev to config with lmc"
 412                 return 1
 413         fi
 414
 415         # it would be nice to be able to ask lmc to setup an echo client
 416         # to the filter here.  --add echo_client assumes osc
 417 }
 418 echo_filter_prepare() {
 419         local index=$1
 420         local bdev=${blocks[$index]}
 421         local config="$tmpdir/config.xml"
 422         local name="echo_$index"
 423         local uuid="echo_$index_uuid"
 424
 425         if [ $index = 0 ]; then
 426                 if ! lconf --reformat $config; then
 427                         echo "error setting up with lconf"
 428                         return 1;
 429                 fi
 430                 running_config="$config"
 431                 if ! grep -q '^obdecho\>' /proc/modules; then
 432                         if ! modprobe obdecho; then
 433                                 echo "error running modprobe obdecho"
 434                                 return 1;
 435                         fi
 436                         running_modules="obdecho"
 437                 fi
 438         fi
 439
 440 lctl << EOF
 441         newdev
 442         attach echo_client $name $uuid
 443         setup ost_$index
 444         quit
 445 EOF
 446         if [  $? != 0 ]; then
 447                 echo "error setting up echo_client $name against ost_$index"
 448                 return 1
 449         fi
 450         running_names[$index]=$name
 451 }
 452 echo_filter_setup() {
 453         local id=$1
 454         local wor=$2
 455         local threads=$3
 456         local name="echo_$id"
 457         local oid
 458
 459         case "$wor" in
 460                 w) ;;
 461                 r) return ;;
 462                 *) die "asked to do io with $wor?"
 463         esac
 464
 465         running_threads=$threads
 466         oid=`lctl --device "\$"$name create $threads | \
 467                 awk '/1 is object id/ { print $6 }'`
 468         # XXX need to deal with errors
 469         running_oids[$id]=$oid
 470 }
 471 echo_filter_start() {
 472         local threads=$1
 473         local iosize=$2
 474         local wor=$3
 475         local id=$4
 476         local name="echo_$id"
 477         local pages=$(($io_len / 4))
 478
 479         case "$wor" in
 480                 w) args="-i 0 -w" ;;
 481                 r) args="-i 1 -w" ;;
 482                 *) die "asked to do io with $wor?"
 483         esac
 484
 485         echo lctl --threads $threads v "\$"$name \
 486                 test_brw 1 w v $pages ${running_oids[$i]} p$iosize
 487 }
 488 echo_filter_result() {
 489         local output=$1
 490         local total=0
 491         local mbs
 492
 493         for mbs in `awk '($8=="MB/s):"){print substr($7,2)}' < $output`; do
 494                 total=$(do_bc $total + $mbs)
 495         done
 496         echo $total
 497 }
 498 echo_filter_cleanup() {
 499         local id=$1
 500         local wor=$2
 501         local threads=$3
 502         local name="echo_$id"
 503
 504         case "$wor" in
 505                 w) return ;;
 506                 r) ;;
 507                 *) die "asked to do io with $wor?"
 508         esac
 509
 510         lctl --device "\$"$name destroy ${running_oids[$i]} $threads
 511         unset running_oids[$i]
 512 }
 513 echo_filter_finish() {
 514         local index=$1
 515         # leave real work for _teardown
 516 }
 517 echo_filter_teardown() {
 518         cleanup_echo_filter
 519 }
 520
 521 ######################################################################
 522 # the iteration that drives the tests
 523
 524 test_one() {
 525         local test=$1
 526         local my_x=$2
 527         local my_y=$3
 528         local threads=$4
 529         local iosize=$5
 530         local wor=$6
 531         local vmstat_pid
 532         local vmstat_log="$tmpdir/vmstat.log"
 533         local opref="$test-$threads-$iosize-$wor"
 534
 535         for i in `seq 0 $last_block`; do
 536                 ${test}_setup $i $wor $threads
 537         done
 538
 539         echo $test with $threads threads
 540
 541         # start up vmstat and record its pid
 542         echo starting `date`
 543         nice -19 vmstat 1 > $vmstat_log 2>&1 &
 544         [ $? = 0 ] || die "vmstat failed"
 545         vmstat_pid=$!
 546         pid_now_running $vmstat_pid
 547
 548         # start all the tests.  each returns a pid to wait on
 549         pids=""
 550         for i in `seq 0 $last_block`; do
 551                 cmd=`${test}_start $threads $iosize $wor $i`
 552                 $cmd > $tmpdir/$i 2>&1 &
 553                 local pid=$!
 554                 pids="$pids $pid"
 555                 pid_now_running $pid
 556         done
 557
 558         echo -n waiting on pids $pids:
 559         for p in $pids; do
 560                 wait $p
 561                 echo -n .
 562                 pid_has_stopped $p
 563         done
 564         echo
 565
 566         # stop vmstat and get cpu use from it
 567         kill $vmstat_pid
 568         echo stopping `date`
 569         pid_has_stopped $vmstat_pid
 570         cpu=$(mean_stddev $(awk \
 571               '(NR > 3 && NF == 16 && $16 != "id" )     \
 572                 {print 100 - $16}' < $vmstat_log) )
 573         save_output $vmstat_log $opref.vmstat
 574
 575         # record each index's test results and sum them
 576         thru=0
 577         line=""
 578         for i in `seq 0 $last_block`; do
 579                 local t=`${test}_result $tmpdir/$i`
 580                 save_output $tmpdir/$i $opref.$i
 581                 echo test returned "$t"
 582                 line="$line $t"
 583                 # some tests return mean:stddev per thread, filter out stddev
 584                 thru=$(do_bc $thru + $(echo $t | sed -e 's/:.*$//g'))
 585         done
 586         line="("`commas $line`")"
 587
 588         for i in `seq 0 $last_block`; do
 589                 ${test}_cleanup $i $wor $threads
 590         done
 591
 592         # tabulate the results
 593         echo $test did $thru mb/s with $cpu
 594         table_set $test $my_x $my_y $thru
 595         table_set $test $(($my_x + 1)) $my_y $cpu
 596         table_set $test $(($my_x + 2)) $my_y $line
 597 }
 598
 599 test_iterator() {
 600         local test=$1
 601         local thr=$min_threads
 602         local cleanup=""
 603         local rc=0
 604         local i
 605
 606         for i in `seq 0 $last_block`; do
 607                 if ! ${test}_config $i; then
 608                         echo "couldn't config $test for bdev ${blocks[$i]}"
 609                         echo "skipping $test for all block devices"
 610                         cleanup=$(($i - 1))
 611                         rc=1;
 612                         break
 613                 fi
 614         done
 615
 616         for i in `seq 0 $last_block`; do
 617                 # don't prepare if _config already failed
 618                 [ ! -z "$cleanup" ] && break
 619                 if ! ${test}_prepare $i; then
 620                         echo "couldn't prepare $test for bdev ${blocks[$i]}"
 621                         echo "skipping $test for all block devices"
 622                         cleanup=$(($i - 1))
 623                         rc=1;
 624                         break
 625                 fi
 626         done
 627
 628         while [ -z "$cleanup" -a $thr -lt $(($max_threads + 1)) ]; do
 629                 for iosize in 64 128; do
 630                         table_set $test 0 $cur_y $thr
 631                         table_set $test 1 $cur_y $iosize
 632                         table_set $test 2 $cur_y "|"
 633
 634                         for wor in w r; do
 635                                 table_set $test 3 $cur_y $wor
 636                                 test_one $test 4 $cur_y $thr $iosize $wor
 637                                 cur_y=$(($cur_y + 1))
 638                         done
 639                 done
 640                 thr=$(($thr + $thr))
 641         done
 642
 643         [ -z "$cleanup" ] && cleanup=$last_block
 644
 645         if [ "$cleanup" != -1 ]; then
 646                 for i in `seq $cleanup 0`; do
 647                         ${test}_finish $i
 648                 done
 649         fi
 650
 651         ${test}_teardown
 652
 653         return $rc;
 654 }
 655
 656 while getopts ":d:b:l:t:T:r:" opt; do
 657         case $opt in
 658                 b) block=$OPTARG                 ;;
 659                 d) output_dir=$OPTARG                 ;;
 660                 l) io_len=$OPTARG                       ;;
 661                 r) run_tests=$OPTARG                    ;;
 662                 t) min_threads=$OPTARG                  ;;
 663                 T) max_threads=$OPTARG                  ;;
 664                 \?) usage
 665         esac
 666 done
 667
 668 if [ -z "$io_len" ]; then
 669         io_len=`awk '($1 == "MemTotal:"){print $2}' < /proc/meminfo`
 670         [ -z "$io_len" ] && die "couldn't determine the amount of memory"
 671 fi
 672
 673 if [ ! -z "$output_dir" ]; then
 674         [ ! -e "$output_dir" ] && "output dir $output_dir doesn't exist"
 675         [ ! -d "$output_dir" ] && "output dir $output_dir isn't a directory"
 676 fi
 677
 678 block=`echo $block | sed -e 's/,/ /g'`
 679 [ -z "$block" ] && usage "need block devices"
 680
 681 run_tests=`echo $run_tests | sed -e 's/,/ /g'`
 682 [ -z "$run_tests" ] && usage "need to specify tests to run with -r"
 683 for t in $run_tests; do
 684         if ! echo $possible_tests | grep -q $t ; then
 685                 die "$t isn't one of the possible tests: $possible_tests"
 686         fi
 687 done
 688
 689 [ $min_threads -gt $max_threads ] && \
 690         die "min threads $min_threads must be <= min_threads $min_threads"
 691
 692 last_block=-1
 693 for b in $block; do
 694         [ ! -e $b ] && die "block device file $b doesn't exist"
 695         [ ! -b $b ] && die "$b isn't a block device"
 696         dd if=$b of=/dev/null bs=8192 count=1 || \
 697                 die "couldn't read 8k from $b, is it alive?"
 698         [ ! -b $b ] && die "$b isn't a block device"
 699         last_block=$(($last_block + 1))
 700         blocks[$last_block]=$b
 701 done
 702
 703 tmpdir=`mktemp -d /tmp/.surveyXXXXXX` || die "couldn't create tmp dir"
 704
 705 echo each test will operate on $io_len"k"
 706
 707 test_results=""
 708
 709 for t in $run_tests; do
 710
 711         table_set $t 0 0 "T"
 712         table_set $t 1 0 "L"
 713         table_set $t 2 0 "|"
 714         table_set $t 3 0 "W"
 715         table_set $t 5 0 "C:S"
 716         table_set $t 6 0 "B"
 717         cur_y=1;
 718
 719         if ! test_iterator $t; then
 720                 continue;
 721         fi
 722         test_results="$test_results $t"
 723 done
 724
 725 [ ! -z "$test_results" ] && (
 726         echo
 727         echo "T = number of concurrent threads per device"
 728         echo "L = base io operation length, in KB"
 729         echo "W/O/R = write/overwrite/read throughput, in MB/s"
 730         echo "C = percentage CPU used, both user and system"
 731         echo "S = standard deviation in cpu use"
 732         echo "B = per-block results: ("`echo ${blocks[*]} | sed -e 's/ /,/g'`")"
 733         echo
 734 )
 735
 736 for t in $test_results; do
 737         ${t}_banner
 738         table_dump $t
 739 done