From 90e1ec3035c82ae9cc1ad471c0c8cad7d448f23f Mon Sep 17 00:00:00 2001 From: Gregoire Pichon Date: Mon, 17 Jun 2013 16:32:10 +0200 Subject: [PATCH] LU-3478 iokit: fix sgpdd-survey scripts (output and plotting) The sgpdd-survey script does not provide minimal and maximal bandwidth of sgp_dd instances. This is important to be able to identify if low LUNs are present. This patch updates the output to display this information, similarly to obdfilter-survey. The plot-sgpdd script is updated to handle this output format change. It is also reorganized to simplify operation handling. Signed-off-by: Gregoire Pichon Change-Id: Ie6ad57951dff8c8b2871457652a94df6dccbc491 Reviewed-on: http://review.whamcloud.com/6680 Reviewed-by: Minh Diep Reviewed-by: Bobbie Lind Reviewed-by: Cliff White Tested-by: Cliff White Tested-by: Hudson Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre-iokit/sgpdd-survey/README.sgpdd-survey | 9 +- lustre-iokit/sgpdd-survey/plot-sgpdd | 239 +++++++++----------------- lustre-iokit/sgpdd-survey/sgpdd-survey | 32 ++-- 3 files changed, 107 insertions(+), 173 deletions(-) diff --git a/lustre-iokit/sgpdd-survey/README.sgpdd-survey b/lustre-iokit/sgpdd-survey/README.sgpdd-survey index c1f3337..055bed3 100644 --- a/lustre-iokit/sgpdd-survey/README.sgpdd-survey +++ b/lustre-iokit/sgpdd-survey/README.sgpdd-survey @@ -58,11 +58,12 @@ ${rslt}_.detail collected tmp files for post-mortem The summary file and stdout contain lines like... -total_size 8388608K rsz 1024 thr 1 crg 1 180.45 MB/s 1 x 180.50 = 180.50 MB/s +dev 30 sz 125829120K rsz 1024K crg 60 thr 120 write 6036.79 [ 100.69, 106.49] read 6720.94 [ 112.64, 220.85] -The number immediately before the first MB/s is the bandwidth computed by -measuring total data and elapsed time. The other numbers are a check on -the bandwidths reported by the individual sgp_dd instances. +The number immediately after the write and the read word is the bandwidth +in MiB/s computed by measuring total data and elapsed time. The other numbers +are the minimal and maximal bandwidths reported by the individual sgp_dd +instances. If there are so many threads that sgp_dd is unlikely to be able to allocate I/O buffers, "ENOMEM" is printed. diff --git a/lustre-iokit/sgpdd-survey/plot-sgpdd b/lustre-iokit/sgpdd-survey/plot-sgpdd index f6aaa43..e3386ecd 100755 --- a/lustre-iokit/sgpdd-survey/plot-sgpdd +++ b/lustre-iokit/sgpdd-survey/plot-sgpdd @@ -41,29 +41,28 @@ my @GraphTitle; #Subroutine to write .scr file that further used by gnuplot to plot the graph. sub write_scr_file() { my $op = $_[0]; + my $rwlabel = $_[1]; print "generating plot $file-$rsz-$op.png\n"; open ( SCRFILE, "> $file-$rsz-$op.scr" ) or die "Can't open scr file for writing"; - if ($op eq "rd") { - $rwlabel = "Read"; - } - if ($op eq "wr") { - $rwlabel = "Write"; - } if ($opt_rdtitle || $opt_wrtitle) { if ($op eq "rd") { - print SCRFILE "set title \"@GraphTitle\\n$rwlabel, Rsize = $rsz KBytes, $opt_rdtitle\"\n"; + print SCRFILE "set title \"@GraphTitle\\n$rwlabel, " . + "Rsize = $rsz, $lun LUNs, $opt_rdtitle\"\n"; } if ($op eq "wr") { - print SCRFILE "set title \"@GraphTitle\\n$rwlabel, Rsize = $rsz KBytes, $opt_wrtitle\"\n"; + print SCRFILE "set title \"@GraphTitle\\n$rwlabel, " . + "Rsize = $rsz, $lun LUNs, $opt_wrtitle\"\n"; } } else { - print SCRFILE "set title \"@GraphTitle\\n$rwlabel, Rsize = $rsz KBytes\"\n"; + print SCRFILE "set title \"@GraphTitle\\n$rwlabel, " . + "Rsize = $rsz, $lun LUNs\"\n"; } - print SCRFILE "set xlabel \"Threads\"\n"; - print SCRFILE "set ylabel \"Speeds(MB/s)\"\n"; + print SCRFILE "set xlabel \"Threads/LUN\"\n"; + print SCRFILE "set ylabel \"Throughput (MiB/s)\"\n"; print SCRFILE "set logscale x\n"; print SCRFILE "set grid\n"; + print SCRFILE "set key right bottom\n"; print SCRFILE "set terminal png\n"; print SCRFILE "set output \"/dev/null\"\n"; if ($opt_y != 0) { @@ -74,19 +73,14 @@ sub write_scr_file() { my $plot = "plot"; $i = 2; - my @numrgs = split " ", $regions; $xrange = 1; # generate instructions for gnuplot, with adjusting X-axes ranges - - foreach my $j (sort numerically split " ", $threads) { - if ($op eq "wr") { - $using = ( $i < $#numrgs ) ? $i : $#numrgs; - printf SCRFILE "$plot \"$file-$rsz-$op.dat\" using 1:$using axes x%dy1 title \"write-obj$j\" with line\n", $xrange; - } - if ($op eq "rd") { - $using = ( $i < $#numrgs ) ? $i : $#numrgs; - printf SCRFILE "$plot \"$file-$rsz-$op.dat\" using 1:$using axes x%dy1 title \"read-obj$j\" with line\n", $xrange; - } + for ($j = $first_crg; $j <= $crg ; $j = $j + $j) { + $tmp=$j/$lun; + printf SCRFILE "$plot \"$file-$rsz-$op.dat\" " . + "using 1:$i:xticlabels(1) axes x%dy1 " . + "title \"$tmp crg/LUN\" " . + "with linespoints lw 2\n", $xrange; $i++; $plot = "replot"; } @@ -98,28 +92,6 @@ sub write_scr_file() { system ("rm $file-$rsz-$op.scr"); } -sub check_data_file () { - my $file=shift; - my @values; - my @tmp; - - open ( FILE, "< $file" ) or die "Can't open $file for reading"; - while ( ) { - @tmp = split; - push @values, [ @tmp ]; - } - close FILE; - - for ( $j = 0; $j <= $#tmp; $j++) { - my $sum=0; - for ($i = 2; $i <= $#values ; $i ++) { - $values [$i][$j] =~ "-" or $sum = $sum + $values [$i][$j]; - } - die "File: $file : $j column contains no data.\n" unless $sum != 0; - } -} - -sub numerically { $a <=> $b; } #Subroutine to write .dat file that further used by gnuplot to plot the graph. sub write_dat_file() { @@ -128,53 +100,59 @@ sub write_dat_file() { # Open .csv/.dat file for writing required columns from log file. my $datafile = "$file-$rsz-$op.dat"; open ( DATAFILE, "> $datafile" ) or die "Can't open csv $datafile for writing"; - printf DATAFILE "%-6s", "0"; - - foreach my $j (sort numerically split " ", $regions) { - printf DATAFILE "%-8s", "$op$j"; - } - - # threads, line [7], strings - foreach my $i (sort numerically split " ", $threads) { - printf DATAFILE "\n%-6s", $i; - - # regions, line [5], column - foreach my $j (sort numerically split " ", $regions) { - if (($op eq "rd" && $rdwr) || ($op eq "wr" && $wrrd) || ($readop) || ($writeop)) { - if ( $out{$i}{$j} ) { - printf DATAFILE "%-8s", $out{$i}{$j}; + printf DATAFILE "%-6s", "thrd"; + for ($j = $first_crg; $j <= $crg ; $j = $j + $j) { + $tmp = $j/$lun; + printf DATAFILE "%-10s", "$tmp-crg"; + } + for ( $i = $first_thread; $i <= $thread; $i = $i + $i ) { + printf DATAFILE "\n%-6s", $i/$lun; + for ($j = $first_crg; $j <= $crg ; $j = $j + $j) { + if ($op eq "rd") { + if ( $ard{$i}{$j} ) { + printf DATAFILE "%-10s", $ard{$i}{$j}; } else { - printf DATAFILE "%-8s", "-"; + printf DATAFILE "%-10s", "-"; } - } else { - if (($j <= 1 && $out{$i}{$j - 1})) { - printf DATAFILE "%-8s", $out{$i}{$j - 1}; - }elsif ($out{$i}{$j + 1} && $j > 1) { - printf DATAFILE "%-8s", $out{$i}{$j + 1}; + } elsif ($op eq "wr" ) { + if ( $awr{$i}{$j} ) { + printf DATAFILE "%-10s", $awr{$i}{$j}; } else { - printf DATAFILE "%-8s", "-"; + printf DATAFILE "%-10s", "-"; } } } } close DATAFILE; - &check_data_file ( $datafile ); +} + +#Subroutine to call .scr and .dat file write routines. +sub write_files() { + for ($cnt = 0; $cnt < @operations; $cnt = $cnt + 1) { + if($operations[$cnt] eq "read") { + &write_dat_file("rd"); + &write_scr_file("rd", "read"); + } elsif ($operations[$cnt] eq "write") { + &write_dat_file("wr"); + &write_scr_file("wr", "write"); + } + } } if ( !$ARGV[0] ) { usage(); } -$regions = ""; -$threads = ""; +$crg = 0; +$thread = 0; +$first_crg = 1; +$first_thread = 1; $count = 0; -$wrrd = 0; -$rdwr = 0; -$writeop = 0; -$readop = 0; $rsz = 0; $opt_rdtitle = ""; $opt_wrtitle = ""; $opt_y = 0; +$cnt = 0; +@operations = (); # Command line parameter parsing use Getopt::Long; GetOptions ('help' => \$opt_help, 'rt=s' => \$opt_rdtitle, 'wt=s' => \$opt_wrtitle, 'y=i' => \$opt_y) or usage(); @@ -186,105 +164,58 @@ $file = $ARGV[0]; open ( PFILE, "$file") or die "Can't open $file"; LABEL: while ( ) { chomp; + # replace error strings to ensure same ordering of line fields + s/failed/failed . . . . ./g; @line = split( /\s+/ ); - # Remove the 'K' after rsz - $frsz = substr($line[3], 0, -1); - if ($line[28] && $count != 0) { - print "invalid file format\n"; - exit 1; - } if ($count == 0) { - @GraphTitle = @line; + @GraphTitle = @line[0 .. 6]; $count++; next LABEL; } - if ($line[8]) { - if ($line[8] eq "ENOMEM") { - next LABEL; - } - } - if (!$rsz && $frsz) { - $rsz = $frsz + # output format + # dev 1 sz 1048576K rsz 1024K crg 1 thr 8 write 604.55 [ 606.43, 606.43] read 754.02 [ 756.95, 756.95] + $linelen = @line; + if ($linelen < 11) { + print "invalid file format at line $count\n"; + exit 1; } - if ($rsz != $frsz) { - if($readop) { - &write_dat_file("rd"); - &write_scr_file("rd"); - } - if($writeop) { - &write_dat_file("wr"); - &write_scr_file("wr"); - } - if ($wrrd || $rdwr) { - &write_dat_file("rd"); - &write_scr_file("rd"); - &write_dat_file("wr"); - &write_scr_file("wr"); + if ($line[10]) { + if ($line[10] eq "ENOMEM") { + next LABEL; } - $rsz = substr($line[3],0,-1);; - $regions = ""; - $threads = ""; } - $rindex = 20; - print "DEBUG rg$line[7] th$line[9] w$line[11] r$line[$rindex]\n"; - if ($line[18]) { - if ($line[12] eq "failed") { - $rindex = 12; - } - if ($line[10] eq "write" && $line[19] eq "read") { - $wrrd = 1; - } - if ($line[10] eq "read" && $line[19] eq "write") { - $rdwr = 1; - } - } else { - if ($line[10] eq "write" && $line[11]) { - $writeop = 1; - } - if ($line[10] eq "read" && $line[11]) { - $readop = 1; + if (!$rsz || $rsz ne $line[5]) { + &write_files() unless !$rsz; + $cnt = 0; + $lun = $line[1]; + $rsz = $line[5]; + $first_crg = $line[7]; + $first_thread = $line[9]; + for ($i = 10; $i <= $linelen; $i = $i + 5) { + if ($line[$i]) { + $operations[$cnt] = $line[$i]; + $cnt++; + } } } - if ($wrrd || $rdwr) { - $out{$line[9]}{$line[7]} = $line[11]; - if ($line[$rindex+1]) { - if (!($line[$rindex+1] eq "failed")) { - goto LABEL2; - } - } else { -LABEL2: if ($line[7] <= 1 ) { - $out{$line[9]}{$line[7] - 1} = $line[$rindex]; - } else { - $out{$line[9]}{$line[7] + 1} = $line[$rindex]; - } + for ($i = 0; $i < @operations; $i++) { + if ($operations[$i] eq "read") { + $ard{$line[9]}{$line[7]} = $line[$i * 5 + 11]; + } elsif ($operations[$i] eq "write") { + $awr{$line[9]}{$line[7]} = $line[$i * 5 + 11]; } } - if ($writeop) { - $out{$line[9]}{$line[7]} = $line[11]; + if ( $crg < $line[7] ) { + $crg = $line[7]; } - if ($readop) { - $out{$line[9]}{$line[7]} = $line[11]; + if ( $thread < $line[9] ) { + $thread = $line[9]; } - $regions .= " $line[7]" unless $regions =~ $line[7]; - $threads .= " $line[9]" unless $threads =~ $line[9]; $count++; } close PFILE; if ($count > 1 && $rsz) { - if($readop) { - &write_dat_file("rd"); - &write_scr_file("rd"); - } - if($writeop) { - &write_dat_file("wr"); - &write_scr_file("wr"); - } - if ($wrrd || $rdwr) { - &write_dat_file("rd"); - &write_scr_file("rd"); - &write_dat_file("wr"); - &write_scr_file("wr"); - } + &write_files() } else { print "Invalid log file format\n"; } diff --git a/lustre-iokit/sgpdd-survey/sgpdd-survey b/lustre-iokit/sgpdd-survey/sgpdd-survey index 5c2af8a..4082720 100755 --- a/lustre-iokit/sgpdd-survey/sgpdd-survey +++ b/lustre-iokit/sgpdd-survey/sgpdd-survey @@ -44,7 +44,6 @@ thrhi=${thrhi:-4096} # and max # threads one instance will spawn SG_MAX_QUEUE=16 - unique () { echo "$@" | xargs -n1 echo | sort -u } @@ -138,27 +137,28 @@ if [ "$scsidevs" ]; then done for ((i=0; i < $ndevs; i++)); do - # resolv symbolic link if any + # resolve symbolic link if any devs[$i]=$(remote_shell ${hosts[$i]} readlink -f ${devs[$i]}) # retrieve associated sg device # we will test for a LUN, the test for a partition # if the partition number is > 9 this will fail - devs[$i]=$(remote_shell ${hosts[$i]} sg_map | \ - awk -v dev=${devs[$i]} '{if ($2 == dev) print $1}') - if [ -z "${devs[i]}" ]; then + tmp=$(remote_shell ${hosts[$i]} sg_map | \ + awk -v dev=${devs[$i]} '{if ($2 == dev) print $1}') + if [ -z "$tmp" ]; then echo "Can't find SG device for ${hosts[$i]}:${devs[$i]}, " \ "testing for partition" pt=`echo ${devs[$i]} | sed 's/[0-9]*$//'` # Try again - devs[$i]=$(remote_shell ${hosts[$i]} sg_map | \ - awk -v dev=$pt '{if ($2 == dev) print $1}') - if [ -z "${devs[i]}" ]; then + tmp=$(remote_shell ${hosts[$i]} sg_map | \ + awk -v dev=$pt '{if ($2 == dev) print $1}') + if [ -z "$tmp" ]; then echo -e "Can't find SG device ${hosts[$i]}:$pt.\n" \ "Do you have the sg module configured for your kernel?" exit 1 fi fi + devs[$i]=$tmp done elif [ "$rawdevs" ]; then for ((i=0; i < $ndevs; i++)); do @@ -330,15 +330,17 @@ for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do if ((ok != ndevs*crg)); then print_summary -n "$((ndevs*crg - ok)) failed " else - # compute MB/sec from elapsed - bw=`awk "BEGIN {printf \"%7.2f MB/s\", \ + # compute bandwidth in MiB/s from total data / elapsed time + bw=`awk "BEGIN {printf \"%7.2f \", \ $total_size / (( $t1 - $t0 ) * 1024); exit}"` - # compute MB/sec from nregions*slowest - check=`awk < $tmpf \ + # compute global min/max stats + minmax=`awk < $tmpf \ '/time to transfer data/ {mb=$8/1.048576; \ - if (n == 0 || mb < min) min = mb; n++}\ - END {printf "%5d x %6.2f = %7.2f MB/s", n, min, min * n}'` - print_summary -n "$bw $check " + if (n == 0 || mb < min) min = mb; \ + if (n == 0 || mb > max) max = mb; \ + n++} \ + END {printf "[ %7.2f, %7.2f] ",min,max;}'` + print_summary -n "$bw $minmax " fi rm $tmpf done -- 1.8.3.1