lustre/scripts/bdev-io-survey.sh

   1 #!/bin/bash
   2
   3 # for now all the units are in 'k', but we could introduce some helpers
   4 # would be nice to run tests in the background and trap signals and kill
   5 #
   6 #  todo:
   7 #       make sure devices aren't in use before going to town
   8 #       really use threads with iozone
   9 #       look into what sgp_dd is really doing, update arguments
  10 #       rename config/prepare/setup/cleanup/finish/teardown
  11 #       do something with sf and fpp iterating
  12 #       discard first vmstat line
  13 #
  14
  15 # a temp dir that is setup and torn down for each script run
  16 tmpdir=""
  17 # so we can kill background processes as the test cleans up
  18 declare -a cleanup_pids
  19 # to unmount mounts in our tmpdir before removing it
  20 declare -a cleanup_mounts
  21 # global for completing the table.  XXX this is a wart that could go
  22 cur_y="0"
  23
  24 # defaults for some options:
  25 min_threads=1
  26 max_threads=4
  27 possible_tests="sgp_dd ext2_iozone echo_filter"
  28 run_tests="$possible_tests"
  29
  30 # optional output directory
  31 output_dir=""
  32
  33 die() {
  34         echo $* 1>&2
  35         exit 1
  36 }
  37 rm_or_die() {
  38         for path in $*; do
  39                 [ -e $path ] || continue;
  40                 [ -f $path ] || die "needed to remove non-file $path"
  41                 rm -f $path || die "couldn't remove $path"
  42         done
  43 }
  44 save_output() {
  45         [ ! -z "$output_dir" ] && mv -f $1 $output_dir/$2
  46 }
  47 cleanup() {
  48         for pid in ${cleanup_pids[*]}; do
  49                 kill $pid
  50         done
  51         cleanup_echo_filter
  52         for a in ${cleanup_mounts[*]}; do
  53                 umount -f $a
  54         done
  55         [ ${#tmpdir} == 18 ] && [ -d $tmpdir ] && rm -rf $tmpdir
  56 }
  57 trap cleanup EXIT
  58
  59 pid_now_running() {
  60         local pid=$1
  61         cleanup_pids[$pid]=$pid
  62 }
  63 pid_has_stopped() {
  64         local pid=$1
  65         unset cleanup_pids[$pid]
  66 }
  67
  68 commas() {
  69         echo $* | sed -e 's/ /,/g'
  70 }
  71 do_bc() {
  72         echo "scale=2; $*" | bc
  73 }
  74 mean_stddev() {
  75         local points=$*
  76
  77         local avg=0
  78         local num=0
  79         for p in $points; do
  80                 avg=`do_bc $avg + $p`
  81                 num=`do_bc $num + 1`
  82         done
  83         case $num in
  84                 0) echo '??' ; return ;;
  85                 1) echo "$avg:0" ; return ;;
  86         esac
  87
  88         avg=`do_bc $avg / $num`
  89         local tmp=0
  90         for p in $points; do
  91                 local dev=`do_bc \($p - $avg\) \^ 2`
  92                 tmp=`do_bc $tmp + $dev`
  93         done
  94         tmp=`do_bc sqrt \( $tmp / \($num - 1\) \)`
  95         echo "$avg:$tmp"
  96 }
  97
  98 usage() {
  99         echo $*
 100         echo "       -b <block device to profile>"
 101         echo "       -d <summary output directory>"
 102         echo "       -l <max io len>"
 103         echo "       -t <minimum number of threads per device>"
 104         echo "       -T <maximum number of threads per device>"
 105         echo "       -r <tests to run>"
 106         exit;
 107 }
 108
 109 # some cute code for handling tables whose columns fit
 110 set_max() {
 111         local target=$1
 112         local val=$2
 113
 114         if [ $val -gt ${!target:-0} ]; then
 115                 eval $target=$val
 116         fi
 117 }
 118 table_set() {
 119         local name="_table_$1"
 120         local col=$2
 121         local row=$3
 122         local val=$4
 123         local num
 124
 125         eval ${name}_${row}_${col}="'$val'"
 126
 127         set_max ${name}_${col}_longest ${#val}
 128         set_max ${name}_num_col $(($col + 1))
 129         set_max ${name}_num_row $(($row + 1))
 130 }
 131
 132 table_get() {
 133         local name="_table_$1"
 134         local col=$2
 135         local row=$3
 136         tmp="${name}_${row}_${col}"
 137         echo ${!tmp}
 138 }
 139
 140 table_dump() {
 141         local name="_table_$1"
 142         local num_col;
 143         local num_row;
 144         local fmt="";
 145         local tmp
 146         local sep
 147
 148         tmp="${name}_num_col"
 149         num_col="${!tmp:-0}"
 150         tmp="${name}_num_row"
 151         num_row="${!tmp:-0}"
 152
 153         # iterate through the columns to find the longest
 154
 155         sep=" "
 156         for x in `seq 0 $num_col`; do
 157                 tmp="${name}_${x}_longest"
 158                 tmp=${!tmp:-0}
 159                 [ $tmp -eq 0 ] && continue
 160
 161                 [ $x -eq $((num_col - 1)) ] && sep='\n'
 162
 163                 fmt="$fmt%-${tmp}s$sep"
 164         done
 165
 166         # nothing in the table to print
 167         [ -z "$fmt" ] && return
 168
 169         for y in `seq 0 $num_row`; do
 170                 local row=""
 171                 for x in `seq 0 $num_col`; do
 172
 173                         # skip this element if the column is empty
 174                         tmp="${name}_${x}_longest"
 175                         [ ${!tmp:-0} -eq 0 ] && continue
 176
 177                         # fill this cell with the value or '' for printf
 178                         tmp="${name}_${y}_${x}"
 179                         row="$row'${!tmp:-""}' "
 180                 done
 181                 eval printf "'$fmt'" $row
 182         done
 183 }
 184
 185 ######################################################################
 186 # the sgp_dd tests
 187 sgp_dd_banner() {
 188         echo sgp_dd using dio=1 and thr=
 189 }
 190 sgp_dd_config() {
 191         # it could be making sure that the block dev
 192         # isn't in use by something else
 193         local nothing=0
 194 }
 195 sgp_dd_prepare() {
 196         if ! which sgp_dd; then
 197                 echo "can't find sgp_dd binary"
 198                 return 1
 199         fi
 200         return 0
 201 }
 202 sgp_dd_setup() {
 203         # it could be making sure that the block dev
 204         # isn't in use by something else
 205         local nothing=0
 206 }
 207 sgp_dd_start() {
 208         local threads=$1
 209         local iosize=$2
 210         local wor=$3
 211         local i=$4
 212         local ifof;
 213         local bdev=${blocks[$i]};
 214
 215         case "$wor" in
 216                 w) ifof="if=/dev/zero of=$bdev" ;;
 217                 r) ifof="if=$bdev of=/dev/null" ;;
 218                 *) die "asked to do io with $wor?"
 219         esac
 220         echo sgp_dd $ifof bs=$iosize"k" count=$(($io_len / $iosize)) time=1 \
 221                         dio=1 thr=$threads
 222 }
 223 sgp_dd_result() {
 224         local output=$1
 225
 226         awk '($(NF) == "MB/sec") {print $(NF-1)}' < $output
 227 }
 228 sgp_dd_cleanup() {
 229         # got me
 230         local nothing=0
 231 }
 232 sgp_dd_finish() {
 233         # got me
 234         local nothing=0
 235 }
 236 sgp_dd_teardown() {
 237         # got me
 238         local nothing=0
 239 }
 240
 241 ######################################################################
 242 # the iozone tests
 243 ext2_iozone_banner() {
 244         echo "iozone -I on a clean ext2 fs"
 245 }
 246 ext2_iozone_config() {
 247         local nothing=0
 248 }
 249 ext2_iozone_prepare() {
 250         local index=$1
 251         local bdev=${blocks[$index]}
 252         local mntpnt=$tmpdir/mount_$index
 253
 254         if ! which iozone; then
 255                 echo "iozone binary not found in PATH"
 256                 return 1
 257         fi
 258         if ! which mke2fs; then
 259                 echo "mke2fs binary not found in PATH"
 260                 return 1
 261         fi
 262
 263         if ! mkdir -p $mntpnt ; then
 264                 echo "$mntpnt isn't a directory?"
 265         fi
 266
 267         echo making ext2 filesystem on $bdev
 268         if ! mke2fs -b 4096 $bdev; then
 269                 echo "mke2fs failed"
 270                 return 1;
 271         fi
 272
 273         if ! mount -t ext2 $bdev $mntpnt; then
 274                 echo "couldn't mount $bdev on $mntpnt"
 275                 return 1;
 276         fi
 277
 278         cleanup_mounts[$index]="$mntpnt"
 279         return 0
 280 }
 281 ext2_iozone_setup() {
 282         local id=$1
 283         local wor=$2
 284         local f="$tmpdir/mount_$id/iozone"
 285
 286         case "$wor" in
 287                 w) rm -f $f ;;
 288                 r) ;;
 289                 *) die "asked to do io with $wor?"
 290         esac
 291 }
 292 ext2_iozone_start() {
 293         local threads=$1
 294         local iosize=$2
 295         local wor=$3
 296         local id=$4
 297         local args;
 298         local f="$tmpdir/mount_$id/iozone"
 299
 300         case "$wor" in
 301                 w) args="-i 0 -w" ;;
 302                 r) args="-i 1 -w" ;;
 303                 *) die "asked to do io with $wor?"
 304         esac
 305
 306         echo iozone "$args -r ${iosize}k -s ${io_len}k -I -f $f"
 307 }
 308 ext2_iozone_result() {
 309         local output=$1
 310
 311         kps=`awk '($2 == "reclen"){results=NR+1}(results == NR){print $3}' \
 312                 < $output`
 313         do_bc "$kps / 1024"
 314 }
 315 ext2_iozone_cleanup() {
 316         local id=$1
 317         local wor=$2
 318         local f="$tmpdir/mount_$id/iozone"
 319
 320         case "$wor" in
 321                 w) ;;
 322                 r) rm -f $f ;;
 323                 *) die "asked to do io with $wor?"
 324         esac
 325 }
 326 ext2_iozone_finish() {
 327         local index=$1
 328         local mntpnt=$tmpdir/mount_$index
 329
 330         umount -f $mntpnt
 331         unset cleanup_mounts[$index]
 332 }
 333 ext2_iozone_teardown() {
 334         local nothing=0
 335 }
 336
 337 ######################################################################
 338 # the lctl test_brw via the echo_client on top of the filter
 339
 340 # the echo_client setup is nutty enough to warrant its own clenaup
 341 running_config=""
 342 running_modules=""
 343 declare -a running_names
 344
 345 cleanup_echo_filter() {
 346         local i
 347
 348         for i in `seq 0 $last_block`; do
 349                 [ -z "${running_oids[$i]}" ] && continue
 350                 lctl --device "\$"echo_$i destroy ${running_oids[$i]} \
 351                         $running_threads
 352         done
 353         running_oids=""
 354
 355         for n in ${running_names[*]}; do
 356 # I can't believe leading whitespace matters here.
 357 lctl << EOF
 358 cfg_device $n
 359 cleanup
 360 detach
 361 quit
 362 EOF
 363         done
 364         running_names=""
 365
 366         for m in $running_modules; do
 367                 rmmod $m
 368         done
 369         running_modules=""
 370
 371         [ ! -z "$running_config" ] && lconf --cleanup $running_config
 372         running_config=""
 373 }
 374
 375 echo_filter_banner() {
 376         echo "test_brw on the echo_client on the filter"
 377 }
 378 echo_filter_config() {
 379         local index=$1
 380         local bdev=${blocks[$index]}
 381         local config="$tmpdir/config.xml"
 382
 383         if ! which lmc; then
 384                 echo "lmc binary not found in PATH"
 385                 return 1
 386         fi
 387         if ! which lconf; then
 388                 echo "lconf binary not found in PATH"
 389                 return 1
 390         fi
 391         if ! which lctl; then
 392                 echo "lctl binary not found in PATH"
 393                 return 1
 394         fi
 395
 396         if [ $index = 0 ]; then
 397                 if ! lmc -m $config --add net  \
 398                         --node localhost --nid localhost --nettype tcp; then
 399                         echo "error adding localhost net node"
 400                         return 1
 401                 fi
 402         fi
 403
 404         if ! lmc -m $config --add ost --ost ost_$index --node localhost \
 405                         --fstype ext3 --dev $bdev --journal_size 400; then
 406                 echo "error adding $bdev to config with lmc"
 407                 return 1
 408         fi
 409
 410         # it would be nice to be able to ask lmc to setup an echo client
 411         # to the filter here.  --add echo_client assumes osc
 412 }
 413 echo_filter_prepare() {
 414         local index=$1
 415         local bdev=${blocks[$index]}
 416         local config="$tmpdir/config.xml"
 417         local name="echo_$index"
 418         local uuid="echo_$index_uuid"
 419
 420         if [ $index = 0 ]; then
 421                 if ! lconf --reformat $config; then
 422                         echo "error setting up with lconf"
 423                         return 1;
 424                 fi
 425                 running_config="$config"
 426                 if ! grep -q '^obdecho\>' /proc/modules; then
 427                         if ! modprobe obdecho; then
 428                                 echo "error running modprobe obdecho"
 429                                 return 1;
 430                         fi
 431                         running_modules="obdecho"
 432                 fi
 433         fi
 434
 435 lctl << EOF
 436         newdev
 437         attach echo_client $name $uuid
 438         setup ost_$index
 439         quit
 440 EOF
 441         if [  $? != 0 ]; then
 442                 echo "error setting up echo_client $name against ost_$index"
 443                 return 1
 444         fi
 445         running_names[$index]=$name
 446 }
 447 echo_filter_setup() {
 448         local id=$1
 449         local wor=$2
 450         local threads=$3
 451         local name="echo_$id"
 452         local oid
 453
 454         case "$wor" in
 455                 w) ;;
 456                 r) return ;;
 457                 *) die "asked to do io with $wor?"
 458         esac
 459
 460         running_threads=$threads
 461         oid=`lctl --device "\$"$name create $threads | \
 462                 awk '/1 is object id/ { print $6 }'`
 463         # XXX need to deal with errors
 464         running_oids[$id]=$oid
 465 }
 466 echo_filter_start() {
 467         local threads=$1
 468         local iosize=$2
 469         local wor=$3
 470         local id=$4
 471         local name="echo_$id"
 472         local pages=$(($io_len / 4))
 473
 474         case "$wor" in
 475                 w) args="-i 0 -w" ;;
 476                 r) args="-i 1 -w" ;;
 477                 *) die "asked to do io with $wor?"
 478         esac
 479
 480         echo lctl --threads $threads v "\$"$name \
 481                 test_brw 1 w v $pages ${running_oids[$i]} p$iosize
 482 }
 483 echo_filter_result() {
 484         local output=$1
 485         local total=0
 486         local mbs
 487
 488         for mbs in `awk '($8=="MB/s):"){print substr($7,2)}' < $output`; do
 489                 total=$(do_bc $total + $mbs)
 490         done
 491         echo $total
 492 }
 493 echo_filter_cleanup() {
 494         local id=$1
 495         local wor=$2
 496         local threads=$3
 497         local name="echo_$id"
 498
 499         case "$wor" in
 500                 w) return ;;
 501                 r) ;;
 502                 *) die "asked to do io with $wor?"
 503         esac
 504
 505         lctl --device "\$"$name destroy ${running_oids[$i]} $threads
 506         unset running_oids[$i]
 507 }
 508 echo_filter_finish() {
 509         local index=$1
 510         # leave real work for _teardown
 511 }
 512 echo_filter_teardown() {
 513         cleanup_echo_filter
 514 }
 515
 516 ######################################################################
 517 # the iteration that drives the tests
 518
 519 test_one() {
 520         local test=$1
 521         local my_x=$2
 522         local my_y=$3
 523         local threads=$4
 524         local iosize=$5
 525         local wor=$6
 526         local vmstat_pid
 527         local vmstat_log="$tmpdir/vmstat.log"
 528         local opref="$test-$threads-$iosize-$wor"
 529
 530         for i in `seq 0 $last_block`; do
 531                 ${test}_setup $i $wor $threads
 532         done
 533
 534         echo $test with $threads threads
 535
 536         # start up vmstat and record its pid
 537         echo starting `date`
 538         nice -19 vmstat 1 > $vmstat_log 2>&1 &
 539         [ $? = 0 ] || die "vmstat failed"
 540         vmstat_pid=$!
 541         pid_now_running $vmstat_pid
 542
 543         # start all the tests.  each returns a pid to wait on
 544         pids=""
 545         for i in `seq 0 $last_block`; do
 546                 cmd=`${test}_start $threads $iosize $wor $i`
 547                 $cmd > $tmpdir/$i 2>&1 &
 548                 local pid=$!
 549                 pids="$pids $pid"
 550                 pid_now_running $pid
 551         done
 552
 553         echo -n waiting on pids $pids:
 554         for p in $pids; do
 555                 wait $p
 556                 echo -n .
 557                 pid_has_stopped $p
 558         done
 559         echo
 560
 561         # stop vmstat and get cpu use from it
 562         kill $vmstat_pid
 563         echo stopping `date`
 564         pid_has_stopped $vmstat_pid
 565         cpu=$(mean_stddev $(awk \
 566               '(NR > 3 && NF == 16 && $16 != "id" )     \
 567                 {print 100 - $16}' < $vmstat_log) )
 568         save_output $vmstat_log $opref.vmstat
 569
 570         # record each index's test results and sum them
 571         thru=0
 572         line=""
 573         for i in `seq 0 $last_block`; do
 574                 local t=`${test}_result $tmpdir/$i`
 575                 save_output $tmpdir/$i $opref.$i
 576                 echo test returned "$t"
 577                 line="$line $t"
 578                 # some tests return mean:stddev per thread, filter out stddev
 579                 thru=$(do_bc $thru + $(echo $t | sed -e 's/:.*$//g'))
 580         done
 581         line="("`commas $line`")"
 582
 583         for i in `seq 0 $last_block`; do
 584                 ${test}_cleanup $i $wor $threads
 585         done
 586
 587         # tabulate the results
 588         echo $test did $thru mb/s with $cpu
 589         table_set $test $my_x $my_y $thru
 590         table_set $test $(($my_x + 1)) $my_y $cpu
 591         table_set $test $(($my_x + 2)) $my_y $line
 592 }
 593
 594 test_iterator() {
 595         local test=$1
 596         local thr=$min_threads
 597         local cleanup=""
 598         local rc=0
 599         local i
 600
 601         for i in `seq 0 $last_block`; do
 602                 if ! ${test}_config $i; then
 603                         echo "couldn't config $test for bdev ${blocks[$i]}"
 604                         echo "skipping $test for all block devices"
 605                         cleanup=$(($i - 1))
 606                         rc=1;
 607                         break
 608                 fi
 609         done
 610
 611         for i in `seq 0 $last_block`; do
 612                 # don't prepare if _config already failed
 613                 [ ! -z "$cleanup" ] && break
 614                 if ! ${test}_prepare $i; then
 615                         echo "couldn't prepare $test for bdev ${blocks[$i]}"
 616                         echo "skipping $test for all block devices"
 617                         cleanup=$(($i - 1))
 618                         rc=1;
 619                         break
 620                 fi
 621         done
 622
 623         while [ -z "$cleanup" -a $thr -lt $(($max_threads + 1)) ]; do
 624                 for iosize in 64 128; do
 625                         table_set $test 0 $cur_y $thr
 626                         table_set $test 1 $cur_y $iosize
 627                         table_set $test 2 $cur_y "|"
 628
 629                         for wor in w r; do
 630                                 table_set $test 3 $cur_y $wor
 631                                 test_one $test 4 $cur_y $thr $iosize $wor
 632                                 cur_y=$(($cur_y + 1))
 633                         done
 634                 done
 635                 thr=$(($thr + $thr))
 636         done
 637
 638         [ -z "$cleanup" ] && cleanup=$last_block
 639
 640         if [ "$cleanup" != -1 ]; then
 641                 for i in `seq $cleanup 0`; do
 642                         ${test}_finish $i
 643                 done
 644         fi
 645
 646         ${test}_teardown
 647
 648         return $rc;
 649 }
 650
 651 while getopts ":d:b:l:t:T:r:" opt; do
 652         case $opt in
 653                 b) block=$OPTARG                 ;;
 654                 d) output_dir=$OPTARG                 ;;
 655                 l) io_len=$OPTARG                       ;;
 656                 r) run_tests=$OPTARG                    ;;
 657                 t) min_threads=$OPTARG                  ;;
 658                 T) max_threads=$OPTARG                  ;;
 659                 \?) usage
 660         esac
 661 done
 662
 663 if [ -z "$io_len" ]; then
 664         io_len=`awk '($1 == "MemTotal:"){print $2}' < /proc/meminfo`
 665         [ -z "$io_len" ] && die "couldn't determine the amount of memory"
 666 fi
 667
 668 if [ ! -z "$output_dir" ]; then
 669         [ ! -e "$output_dir" ] && "output dir $output_dir doesn't exist"
 670         [ ! -d "$output_dir" ] && "output dir $output_dir isn't a directory"
 671 fi
 672
 673 block=`echo $block | sed -e 's/,/ /g'`
 674 [ -z "$block" ] && usage "need block devices"
 675
 676 run_tests=`echo $run_tests | sed -e 's/,/ /g'`
 677 [ -z "$run_tests" ] && usage "need to specify tests to run with -r"
 678 for t in $run_tests; do
 679         if ! echo $possible_tests | grep -q $t ; then
 680                 die "$t isn't one of the possible tests: $possible_tests"
 681         fi
 682 done
 683
 684 [ $min_threads -gt $max_threads ] && \
 685         die "min threads $min_threads must be <= min_threads $min_threads"
 686
 687 last_block=-1
 688 for b in $block; do
 689         [ ! -e $b ] && die "block device file $b doesn't exist"
 690         [ ! -b $b ] && die "$b isn't a block device"
 691         last_block=$(($last_block + 1))
 692         blocks[$last_block]=$b
 693 done
 694
 695 tmpdir=`mktemp -d /tmp/.surveyXXXXXX` || die "couldn't create tmp dir"
 696
 697 echo each test will operate on $io_len"k"
 698
 699 test_results=""
 700
 701 for t in $run_tests; do
 702
 703         table_set $t 0 0 "T"
 704         table_set $t 1 0 "L"
 705         table_set $t 2 0 "|"
 706         table_set $t 3 0 "W"
 707         table_set $t 5 0 "C:S"
 708         table_set $t 6 0 "B"
 709         cur_y=1;
 710
 711         if ! test_iterator $t; then
 712                 continue;
 713         fi
 714         test_results="$test_results $t"
 715 done
 716
 717 [ ! -z "$test_results" ] && (
 718         echo
 719         echo "T = number of concurrent threads per device"
 720         echo "L = base io operation length, in KB"
 721         echo "W/O/R = write/overwrite/read throughput, in MB/s"
 722         echo "C = percentage CPU used, both user and system"
 723         echo "S = standard deviation in cpu use"
 724         echo "B = per-block results: ("`echo ${blocks[*]} | sed -e 's/ /,/g'`")"
 725         echo
 726 )
 727
 728 for t in $test_results; do
 729         ${t}_banner
 730         table_dump $t
 731 done