lustre/scripts/bdev-io-survey.sh

   1 #!/bin/bash
   2
   3 # for now all the units are in 'k', but we could introduce some helpers
   4 # would be nice to run tests in the background and trap signals and kill
   5 #
   6 #  todo:
   7 #       make sure devices aren't in use before going to town
   8 #       really use threads with iozone
   9 #       look into what sgp_dd is really doing, update arguments
  10 #       rename config/prepare/setup/cleanup/finish/teardown
  11 #       do something with sf and fpp iterating
  12 #       discard first vmstat line
  13 #
  14
  15 # a temp dir that is setup and torn down for each script run
  16 tmpdir=""
  17 # so we can kill background processes as the test cleans up
  18 declare -a cleanup_pids
  19 # to unmount mounts in our tmpdir before removing it
  20 declare -a cleanup_mounts
  21 # global for completing the table.  XXX this is a wart that could go
  22 cur_y="0"
  23
  24 # defaults for some options:
  25 min_threads=1
  26 max_threads=4
  27
  28 # optional output directory
  29 output_dir=""
  30
  31 die() {
  32         echo $* 1>&2
  33         exit 1
  34 }
  35 rm_or_die() {
  36         for path in $*; do
  37                 [ -e $path ] || continue;
  38                 [ -f $path ] || die "needed to remove non-file $path"
  39                 rm -f $path || die "couldn't remove $path"
  40         done
  41 }
  42 save_output() {
  43         [ ! -z "$output_dir" ] && mv -f $1 $output_dir/$2
  44 }
  45 cleanup() {
  46         for pid in ${cleanup_pids[*]}; do
  47                 kill $pid
  48         done
  49         cleanup_echo_filter
  50         for a in ${cleanup_mounts[*]}; do
  51                 umount -f $a
  52         done
  53         [ ${#tmpdir} == 18 ] && [ -d $tmpdir ] && rm -rf $tmpdir
  54 }
  55 trap cleanup EXIT
  56
  57 pid_now_running() {
  58         local pid=$1
  59         cleanup_pids[$pid]=$pid
  60 }
  61 pid_has_stopped() {
  62         local pid=$1
  63         unset cleanup_pids[$pid]
  64 }
  65
  66 commas() {
  67         echo $* | sed -e 's/ /,/g'
  68 }
  69 do_bc() {
  70         echo "scale=2; $*" | bc
  71 }
  72 mean_stddev() {
  73         local points=$*
  74
  75         local avg=0
  76         local num=0
  77         for p in $points; do
  78                 avg=`do_bc $avg + $p`
  79                 num=`do_bc $num + 1`
  80         done
  81         case $num in
  82                 0) echo '??' ; return ;;
  83                 1) echo "$avg:0" ; return ;;
  84         esac
  85
  86         avg=`do_bc $avg / $num`
  87         local tmp=0
  88         for p in $points; do
  89                 local dev=`do_bc \($p - $avg\) \^ 2`
  90                 tmp=`do_bc $tmp + $dev`
  91         done
  92         tmp=`do_bc sqrt \( $tmp / \($num - 1\) \)`
  93         echo "$avg:$tmp"
  94 }
  95
  96 usage() {
  97         echo $*
  98         echo "       -b <block device to profile>"
  99         echo "       -d <summary output directory>"
 100         echo "       -l <max io len>"
 101         echo "       -t <minimum number of threads per device>"
 102         echo "       -T <maximum number of threads per device>"
 103         exit;
 104 }
 105
 106 # some cute code for handling tables whose columns fit
 107 set_max() {
 108         local target=$1
 109         local val=$2
 110
 111         if [ $val -gt ${!target:-0} ]; then
 112                 eval $target=$val
 113         fi
 114 }
 115 table_set() {
 116         local name="_table_$1"
 117         local col=$2
 118         local row=$3
 119         local val=$4
 120         local num
 121
 122         eval ${name}_${row}_${col}="'$val'"
 123
 124         set_max ${name}_${col}_longest ${#val}
 125         set_max ${name}_num_col $(($col + 1))
 126         set_max ${name}_num_row $(($row + 1))
 127 }
 128
 129 table_get() {
 130         local name="_table_$1"
 131         local col=$2
 132         local row=$3
 133         tmp="${name}_${row}_${col}"
 134         echo ${!tmp}
 135 }
 136
 137 table_dump() {
 138         local name="_table_$1"
 139         local num_col;
 140         local num_row;
 141         local fmt="";
 142         local tmp
 143         local sep
 144
 145         tmp="${name}_num_col"
 146         num_col="${!tmp:-0}"
 147         tmp="${name}_num_row"
 148         num_row="${!tmp:-0}"
 149
 150         # iterate through the columns to find the longest
 151
 152         sep=" "
 153         for x in `seq 0 $num_col`; do
 154                 tmp="${name}_${x}_longest"
 155                 tmp=${!tmp:-0}
 156                 [ $tmp -eq 0 ] && continue
 157
 158                 [ $x -eq $((num_col - 1)) ] && sep='\n'
 159
 160                 fmt="$fmt%-${tmp}s$sep"
 161         done
 162
 163         # nothing in the table to print
 164         [ -z "$fmt" ] && return
 165
 166         for y in `seq 0 $num_row`; do
 167                 local row=""
 168                 for x in `seq 0 $num_col`; do
 169
 170                         # skip this element if the column is empty
 171                         tmp="${name}_${x}_longest"
 172                         [ ${!tmp:-0} -eq 0 ] && continue
 173
 174                         # fill this cell with the value or '' for printf
 175                         tmp="${name}_${y}_${x}"
 176                         row="$row'${!tmp:-""}' "
 177                 done
 178                 eval printf "'$fmt'" $row
 179         done
 180 }
 181
 182 ######################################################################
 183 # the sgp_dd tests
 184 sgp_dd_banner() {
 185         echo sgp_dd using dio=1 and thr=
 186 }
 187 sgp_dd_config() {
 188         # it could be making sure that the block dev
 189         # isn't in use by something else
 190         local nothing=0
 191 }
 192 sgp_dd_prepare() {
 193         if ! which sgp_dd; then
 194                 echo "can't find sgp_dd binary"
 195                 return 1
 196         fi
 197         return 0
 198 }
 199 sgp_dd_setup() {
 200         # it could be making sure that the block dev
 201         # isn't in use by something else
 202         local nothing=0
 203 }
 204 sgp_dd_start() {
 205         local threads=$1
 206         local iosize=$2
 207         local wor=$3
 208         local i=$4
 209         local ifof;
 210         local bdev=${blocks[$i]};
 211
 212         case "$wor" in
 213                 w) ifof="if=/dev/zero of=$bdev" ;;
 214                 r) ifof="if=$bdev of=/dev/null" ;;
 215                 *) die "asked to do io with $wor?"
 216         esac
 217         echo sgp_dd $ifof bs=$iosize"k" count=$(($io_len / $iosize)) time=1 \
 218                         dio=1 thr=$threads
 219 }
 220 sgp_dd_result() {
 221         local output=$1
 222
 223         awk '($(NF) == "MB/sec") {print $(NF-1)}' < $output
 224 }
 225 sgp_dd_cleanup() {
 226         # got me
 227         local nothing=0
 228 }
 229 sgp_dd_finish() {
 230         # got me
 231         local nothing=0
 232 }
 233 sgp_dd_teardown() {
 234         # got me
 235         local nothing=0
 236 }
 237
 238 ######################################################################
 239 # the iozone tests
 240 ext2_iozone_banner() {
 241         echo "iozone -I on a clean ext2 fs"
 242 }
 243 ext2_iozone_config() {
 244         local nothing=0
 245 }
 246 ext2_iozone_prepare() {
 247         local index=$1
 248         local bdev=${blocks[$index]}
 249         local mntpnt=$tmpdir/mount_$index
 250
 251         if ! which iozone; then
 252                 echo "iozone binary not found in PATH"
 253                 return 1
 254         fi
 255         if ! which mke2fs; then
 256                 echo "mke2fs binary not found in PATH"
 257                 return 1
 258         fi
 259
 260         if ! mkdir -p $mntpnt ; then
 261                 echo "$mntpnt isn't a directory?"
 262         fi
 263
 264         echo making ext2 filesystem on $bdev
 265         if ! mke2fs -b 4096 $bdev; then
 266                 echo "mke2fs failed"
 267                 return 1;
 268         fi
 269
 270         if ! mount -t ext2 $bdev $mntpnt; then
 271                 echo "couldn't mount $bdev on $mntpnt"
 272                 return 1;
 273         fi
 274
 275         cleanup_mounts[$index]="$mntpnt"
 276         return 0
 277 }
 278 ext2_iozone_setup() {
 279         local id=$1
 280         local wor=$2
 281         local f="$tmpdir/mount_$id/iozone"
 282
 283         case "$wor" in
 284                 w) rm -f $f ;;
 285                 r) ;;
 286                 *) die "asked to do io with $wor?"
 287         esac
 288 }
 289 ext2_iozone_start() {
 290         local threads=$1
 291         local iosize=$2
 292         local wor=$3
 293         local id=$4
 294         local args;
 295         local f="$tmpdir/mount_$id/iozone"
 296
 297         case "$wor" in
 298                 w) args="-i 0 -w" ;;
 299                 r) args="-i 1 -w" ;;
 300                 *) die "asked to do io with $wor?"
 301         esac
 302
 303         echo iozone "$args -r ${iosize}k -s ${io_len}k -I -f $f"
 304 }
 305 ext2_iozone_result() {
 306         local output=$1
 307
 308         kps=`awk '($2 == "reclen"){results=NR+1}(results == NR){print $3}' \
 309                 < $output`
 310         do_bc "$kps / 1024"
 311 }
 312 ext2_iozone_cleanup() {
 313         local id=$1
 314         local wor=$2
 315         local f="$tmpdir/mount_$id/iozone"
 316
 317         case "$wor" in
 318                 w) ;;
 319                 r) rm -f $f ;;
 320                 *) die "asked to do io with $wor?"
 321         esac
 322 }
 323 ext2_iozone_finish() {
 324         local index=$1
 325         local mntpnt=$tmpdir/mount_$index
 326
 327         umount -f $mntpnt
 328         unset cleanup_mounts[$index]
 329 }
 330 ext2_iozone_teardown() {
 331         local nothing=0
 332 }
 333
 334 ######################################################################
 335 # the lctl test_brw via the echo_client on top of the filter
 336
 337 # the echo_client setup is nutty enough to warrant its own clenaup
 338 running_config=""
 339 running_modules=""
 340 declare -a running_names
 341
 342 cleanup_echo_filter() {
 343         local i
 344
 345         for i in `seq 0 $last_block`; do
 346                 [ -z "${running_oids[$i]}" ] && continue
 347                 lctl --device "\$"echo_$i destroy ${running_oids[$i]} \
 348                         $running_threads
 349         done
 350         running_oids=""
 351
 352         for n in ${running_names[*]}; do
 353 # I can't believe leading whitespace matters here.
 354 lctl << EOF
 355 cfg_device $n
 356 cleanup
 357 detach
 358 quit
 359 EOF
 360         done
 361         running_names=""
 362
 363         for m in $running_modules; do
 364                 rmmod $m
 365         done
 366         running_modules=""
 367
 368         [ ! -z "$running_config" ] && lconf --cleanup $running_config
 369         running_config=""
 370 }
 371
 372 echo_filter_banner() {
 373         echo "test_brw on the echo_client on the filter"
 374 }
 375 echo_filter_config() {
 376         local index=$1
 377         local bdev=${blocks[$index]}
 378         local config="$tmpdir/config.xml"
 379
 380         if ! which lmc; then
 381                 echo "lmc binary not found in PATH"
 382                 return 1
 383         fi
 384         if ! which lconf; then
 385                 echo "lconf binary not found in PATH"
 386                 return 1
 387         fi
 388         if ! which lctl; then
 389                 echo "lctl binary not found in PATH"
 390                 return 1
 391         fi
 392
 393         if [ $index = 0 ]; then
 394                 if ! lmc -m $config --add net  \
 395                         --node localhost --nid localhost --nettype tcp; then
 396                         echo "error adding localhost net node"
 397                         return 1
 398                 fi
 399         fi
 400
 401         if ! lmc -m $config --add ost --ost ost_$index --node localhost \
 402                         --fstype ext3 --dev $bdev --journal_size 400; then
 403                 echo "error adding $bdev to config with lmc"
 404                 return 1
 405         fi
 406
 407         # it would be nice to be able to ask lmc to setup an echo client
 408         # to the filter here.  --add echo_client assumes osc
 409 }
 410 echo_filter_prepare() {
 411         local index=$1
 412         local bdev=${blocks[$index]}
 413         local config="$tmpdir/config.xml"
 414         local name="echo_$index"
 415         local uuid="echo_$index_uuid"
 416
 417         if [ $index = 0 ]; then
 418                 if ! lconf --reformat $config; then
 419                         echo "error setting up with lconf"
 420                         return 1;
 421                 fi
 422                 running_config="$config"
 423                 if ! grep -q '^obdecho\>' /proc/modules; then
 424                         if ! modprobe obdecho; then
 425                                 echo "error running modprobe obdecho"
 426                                 return 1;
 427                         fi
 428                         running_modules="obdecho"
 429                 fi
 430         fi
 431
 432 lctl << EOF
 433         newdev
 434         attach echo_client $name $uuid
 435         setup ost_$index
 436         quit
 437 EOF
 438         if [  $? != 0 ]; then
 439                 echo "error setting up echo_client $name against ost_$index"
 440                 return 1
 441         fi
 442         running_names[$index]=$name
 443 }
 444 echo_filter_setup() {
 445         local id=$1
 446         local wor=$2
 447         local threads=$3
 448         local name="echo_$id"
 449         local oid
 450
 451         case "$wor" in
 452                 w) ;;
 453                 r) return ;;
 454                 *) die "asked to do io with $wor?"
 455         esac
 456
 457         running_threads=$threads
 458         oid=`lctl --device "\$"$name create $threads | \
 459                 awk '/1 is object id/ { print $6 }'`
 460         # XXX need to deal with errors
 461         running_oids[$id]=$oid
 462 }
 463 echo_filter_start() {
 464         local threads=$1
 465         local iosize=$2
 466         local wor=$3
 467         local id=$4
 468         local name="echo_$id"
 469         local pages=$(($io_len / 4))
 470
 471         case "$wor" in
 472                 w) args="-i 0 -w" ;;
 473                 r) args="-i 1 -w" ;;
 474                 *) die "asked to do io with $wor?"
 475         esac
 476
 477         echo lctl --threads $threads v "\$"$name \
 478                 test_brw 1 w v $pages ${running_oids[$i]} p$iosize
 479 }
 480 echo_filter_result() {
 481         local output=$1
 482         local total=0
 483         local mbs
 484
 485         for mbs in `awk '($8=="MB/s):"){print substr($7,2)}' < $output`; do
 486                 total=$(do_bc $total + $mbs)
 487         done
 488         echo $total
 489 }
 490 echo_filter_cleanup() {
 491         local id=$1
 492         local wor=$2
 493         local threads=$3
 494         local name="echo_$id"
 495
 496         case "$wor" in
 497                 w) return ;;
 498                 r) ;;
 499                 *) die "asked to do io with $wor?"
 500         esac
 501
 502         lctl --device "\$"$name destroy ${running_oids[$i]} $threads
 503         unset running_oids[$i]
 504 }
 505 echo_filter_finish() {
 506         local index=$1
 507         # leave real work for _teardown
 508 }
 509 echo_filter_teardown() {
 510         cleanup_echo_filter
 511 }
 512
 513 ######################################################################
 514 # the iteration that drives the tests
 515
 516 test_one() {
 517         local test=$1
 518         local my_x=$2
 519         local my_y=$3
 520         local threads=$4
 521         local iosize=$5
 522         local wor=$6
 523         local vmstat_pid
 524         local vmstat_log="$tmpdir/vmstat.log"
 525         local opref="$test-$threads-$iosize-$wor"
 526
 527         for i in `seq 0 $last_block`; do
 528                 ${test}_setup $i $wor $threads
 529         done
 530
 531         echo $test with $threads threads
 532
 533         # start up vmstat and record its pid
 534         echo starting `date`
 535         nice -19 vmstat 1 > $vmstat_log 2>&1 &
 536         [ $? = 0 ] || die "vmstat failed"
 537         vmstat_pid=$!
 538         pid_now_running $vmstat_pid
 539
 540         # start all the tests.  each returns a pid to wait on
 541         pids=""
 542         for i in `seq 0 $last_block`; do
 543                 cmd=`${test}_start $threads $iosize $wor $i`
 544                 $cmd > $tmpdir/$i 2>&1 &
 545                 local pid=$!
 546                 pids="$pids $pid"
 547                 pid_now_running $pid
 548         done
 549
 550         echo -n waiting on pids $pids:
 551         for p in $pids; do
 552                 wait $p
 553                 echo -n .
 554                 pid_has_stopped $p
 555         done
 556         echo
 557
 558         # stop vmstat and get cpu use from it
 559         kill $vmstat_pid
 560         echo stopping `date`
 561         pid_has_stopped $vmstat_pid
 562         cpu=$(mean_stddev $(awk \
 563               '(NR > 3 && NF == 16 && $16 != "id" )     \
 564                 {print 100 - $16}' < $vmstat_log) )
 565         save_output $vmstat_log $opref.vmstat
 566
 567         # record each index's test results and sum them
 568         thru=0
 569         line=""
 570         for i in `seq 0 $last_block`; do
 571                 local t=`${test}_result $tmpdir/$i`
 572                 save_output $tmpdir/$i $opref.$i
 573                 echo test returned "$t"
 574                 line="$line $t"
 575                 # some tests return mean:stddev per thread, filter out stddev
 576                 thru=$(do_bc $thru + $(echo $t | sed -e 's/:.*$//g'))
 577         done
 578         line="("`commas $line`")"
 579
 580         for i in `seq 0 $last_block`; do
 581                 ${test}_cleanup $i $wor $threads
 582         done
 583
 584         # tabulate the results
 585         echo $test did $thru mb/s with $cpu
 586         table_set $test $my_x $my_y $thru
 587         table_set $test $(($my_x + 1)) $my_y $cpu
 588         table_set $test $(($my_x + 2)) $my_y $line
 589 }
 590
 591 test_iterator() {
 592         local test=$1
 593         local thr=$min_threads
 594         local cleanup=""
 595         local rc=0
 596         local i
 597
 598         for i in `seq 0 $last_block`; do
 599                 if ! ${test}_config $i; then
 600                         echo "couldn't config $test for bdev ${blocks[$i]}"
 601                         echo "skipping $test for all block devices"
 602                         cleanup=$(($i - 1))
 603                         rc=1;
 604                         break
 605                 fi
 606         done
 607
 608         for i in `seq 0 $last_block`; do
 609                 # don't prepare if _config already failed
 610                 [ ! -z "$cleanup" ] && break
 611                 if ! ${test}_prepare $i; then
 612                         echo "couldn't prepare $test for bdev ${blocks[$i]}"
 613                         echo "skipping $test for all block devices"
 614                         cleanup=$(($i - 1))
 615                         rc=1;
 616                         break
 617                 fi
 618         done
 619
 620         while [ -z "$cleanup" -a $thr -lt $(($max_threads + 1)) ]; do
 621                 for iosize in 64 128; do
 622                         table_set $test 0 $cur_y $thr
 623                         table_set $test 1 $cur_y $iosize
 624                         table_set $test 2 $cur_y "|"
 625
 626                         for wor in w r; do
 627                                 table_set $test 3 $cur_y $wor
 628                                 test_one $test 4 $cur_y $thr $iosize $wor
 629                                 cur_y=$(($cur_y + 1))
 630                         done
 631                 done
 632                 thr=$(($thr + $thr))
 633         done
 634
 635         [ -z "$cleanup" ] && cleanup=$last_block
 636
 637         if [ "$cleanup" != -1 ]; then
 638                 for i in `seq $cleanup 0`; do
 639                         ${test}_finish $i
 640                 done
 641         fi
 642
 643         ${test}_teardown
 644
 645         return $rc;
 646 }
 647
 648 while getopts ":d:b:l:t:T:" opt; do
 649         case $opt in
 650                 b) block=$OPTARG                 ;;
 651                 d) output_dir=$OPTARG                 ;;
 652                 l) io_len=$OPTARG                       ;;
 653                 t) min_threads=$OPTARG                  ;;
 654                 T) max_threads=$OPTARG                  ;;
 655                 \?) usage
 656         esac
 657 done
 658
 659 if [ -z "$io_len" ]; then
 660         io_len=`awk '($1 == "MemTotal:"){print $2}' < /proc/meminfo`
 661         [ -z "$io_len" ] && die "couldn't determine the amount of memory"
 662 fi
 663
 664 if [ ! -z "$output_dir" ]; then
 665         [ ! -e "$output_dir" ] && "output dir $output_dir doesn't exist"
 666         [ ! -d "$output_dir" ] && "output dir $output_dir isn't a directory"
 667 fi
 668
 669 block=`echo $block | sed -e 's/,/ /g'`
 670 [ -z "$block" ] && usage "need block devices"
 671
 672 [ $min_threads -gt $max_threads ] && \
 673         die "min threads $min_threads must be <= min_threads $min_threads"
 674
 675 last_block=-1
 676 for b in $block; do
 677         [ ! -e $b ] && die "block device file $b doesn't exist"
 678         [ ! -b $b ] && die "$b isn't a block device"
 679         last_block=$(($last_block + 1))
 680         blocks[$last_block]=$b
 681 done
 682
 683 tmpdir=`mktemp -d /tmp/.surveyXXXXXX` || die "couldn't create tmp dir"
 684
 685 echo each test will operate on $io_len"k"
 686
 687 tests="sgp_dd ext2_iozone echo_filter"
 688 test_results=""
 689
 690 for t in $tests; do
 691
 692         table_set $t 0 0 "T"
 693         table_set $t 1 0 "L"
 694         table_set $t 2 0 "|"
 695         table_set $t 3 0 "W"
 696         table_set $t 5 0 "C:S"
 697         table_set $t 6 0 "B"
 698         cur_y=1;
 699
 700         if ! test_iterator $t; then
 701                 continue;
 702         fi
 703         test_results="$test_results $t"
 704 done
 705
 706 [ ! -z "$test_results" ] && (
 707         echo
 708         echo "T = number of concurrent threads per device"
 709         echo "L = base io operation length, in KB"
 710         echo "W/O/R = write/overwrite/read throughput, in MB/s"
 711         echo "C = percentage CPU used, both user and system"
 712         echo "S = standard deviation in cpu use"
 713         echo "B = per-block results: ("`echo ${blocks[*]} | sed -e 's/ /,/g'`")"
 714         echo
 715 )
 716
 717 for t in $test_results; do
 718         ${t}_banner
 719         table_dump $t
 720 done