Whamcloud - gitweb
b=11171
[fs/lustre-release.git] / lustre-iokit / sgpdd-survey / sgpdd-survey
1 #!/bin/bash
2
3 ######################################################################
4 # customize per survey
5
6 # CHOOSE EITHER scsidevs or rawdevs
7 # the SCSI devices to measure - WARNING: will be erased.
8 # The raw devices to use
9 # rawdevs=${rawdevs:-"/dev/raw/raw1"}
10 # scsidevs=`ls /dev/sd[a-z] /dev/sd[a-z][a-z]` # all devices, if you use udev
11
12 # result file prefix.  date/time+hostname makes unique
13 # NB ensure the path exists if it includes subdirs
14 rslt_loc=${rslt_loc:-"/tmp"}
15 rslt=${rslt:-"$rslt_loc/sgpdd_survey_`date +%F@%R`_`uname -n`"}
16
17 # what to do (read or write)
18 actions=${actions:-"write read"}
19
20 # total size per device (MBytes)
21 # NB bigger than device cache is good
22 size=${size:-8192}
23
24 # record size (KBytes)
25 rszlo=${rszlo:-1024}
26 rszhi=${rszhi:-1024}
27
28 # Concurrent regions per device
29 crglo=${crglo:-1}
30 crghi=${crghi:-256}
31
32 # threads to share between concurrent regions per device
33 # multiple threads per region simulates a deeper request queue
34 # NB survey skips over #thr < #regions and #thr/#regions > SG_MAX_QUEUE
35 thrlo=${thrlo:-1}
36 thrhi=${thrhi:-4096}
37
38 #####################################################################
39 # leave the rest of this alone unless you know what you're doing...
40
41 # and max # threads one instance will spawn
42 SG_MAX_QUEUE=16
43
44 # map given device names into SG device names
45 i=0
46 devs=()
47 if [ "$scsidevs" ]; then
48         # we will test for a LUN, the test for a partition
49         # if the partition number is > 9 this will fail
50     for d in $scsidevs; do
51         devs[$i]=`sg_map | awk "{if (\\\$2 == \"$d\") print \\\$1}"`
52         if [ -z "${devs[i]}" ]; then
53             echo "Can't find SG device for $d, testing for partition"
54             pt=`echo $d | sed 's/[0-9]$//'`
55             # Try again
56             devs[$i]=`sg_map | awk "{if (\\\$2 == \"$pt\") print \\\$1}"`
57             if [ -z "${devs[i]}" ]; then
58                 echo "Can't find SG device $pt"
59                 exit 1
60            fi
61         fi
62         i=$((i+1))
63     done
64 elif [ "$rawdevs" ]; then
65     for r in $rawdevs; do
66         RES=`raw -q $r`
67         if [ $? -eq 0 ];then
68             devs[$i]=$r
69             i=$((i+1))
70         else
71             echo "Raw device $r not set up"
72             exit 1
73         fi
74     done
75 else
76     echo "Must specify scsidevs or rawdevs"
77     exit 1
78 fi
79
80 ndevs=${#devs[@]}
81
82 # determine block size. This should also work for raw devices
83 # If it fails, set to 512
84 bs=$((`sg_readcap -b ${devs[0]} | awk '{print $2}'`))
85 if [ $bs == 0  ];then
86         echo "sg_readcap failed, setting block size to 512"
87         bs=512
88 fi
89 rsltf=${rslt}.summary
90 workf=${rslt}.detail
91 echo -n > $rsltf
92 echo -n > $workf
93
94 print_summary () {
95     if [ "$1" = "-n" ]; then
96         minusn=$1; shift
97     else
98         minusn=""
99     fi
100     echo $minusn "$*" >> $rsltf
101     echo $minusn "$*"
102 }
103
104 print_summary "$(date) sgpdd-survey on $rawdevs$scsidevs from $(hostname)"
105
106 for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do
107     for ((crg=$crglo;crg<=$crghi;crg*=2)); do 
108         for ((thr=$thrlo;thr<=$thrhi;thr*=2)); do
109             if ((thr < crg || thr/crg > SG_MAX_QUEUE)); then
110                 continue
111             fi
112             # compute parameters
113             bpt=$((rsz*1024/bs))
114             blocks=$((size*((1024*1024)/bs)/crg))
115             count=$blocks
116             # show computed parameters
117             actual_rsz=$((bpt*bs/1024))
118             actual_size=$((bs*count*crg/1024))
119             str=`printf 'total_size %8dK rsz %4d crg %5d thr %5d ' \
120                          $((actual_size*ndevs)) $actual_rsz $((crg*ndevs)) $((thr*ndevs))`
121             echo "==============> $str" >> $workf
122             print_summary -n "$str"
123             freemem=`awk < /proc/meminfo '/^MemTotal:/ {printf "%d\n", $2}'`
124             if (((actual_rsz*thr/crg + 64)*crg*ndevs > freemem)); then
125                 print_summary "ENOMEM"
126                 continue
127             fi
128             # run tests
129             for action in $actions; do
130                 print_summary -n "$action "
131                 echo "=====> $action" >> $workf
132                 tmpf=${workf}_tmp
133                 # start test
134                 t0=`date +%s.%N`
135                 for ((i=0;i<ndevs;i++)); do
136                     dev=${devs[i]}
137                     devsize=$((bs*`sg_readcap -b ${dev} | awk '{print $1}'`/1024))
138                     if [ $devsize -lt $actual_size ]; then
139                         echo -e "device $(sg_map | grep $dev | awk '{ print $2; }') not big enough: $devsize < $actual_size.\nConsider reducing \$size"
140                         exit 1
141                     fi
142                     if [ $action = read ]; then
143                         inf="if=$dev"
144                         outf="of=/dev/null"
145                         skip=skip
146                     else
147                         inf="if=/dev/zero"
148                         outf="of=$dev"
149                         skip=seek
150                     fi
151                     for ((j=0;j<crg;j++)); do 
152                         sgp_dd 2> ${tmpf}_${i}_${j} \
153                             $inf $outf ${skip}=$((1024+j*blocks)) \
154                             thr=$((thr/crg)) count=$count bs=$bs bpt=$bpt time=1&
155                     done
156                 done 
157                 wait
158                 t1=`date +%s.%N`
159                 # collect/check individual stats
160                 echo > $tmpf
161                 ok=0
162                 for ((i=0;i<ndevs;i++)); do
163                     for ((j=0;j<crg;j++)); do
164                         rtmp=${tmpf}_${i}_${j}
165                         if grep 'error' $rtmp > /dev/null 2>&1; then
166                                 echo "Error found in $rtmp"
167                         elif grep 'time to transfer data' $rtmp > /dev/null 2>&1; then
168                             ok=$((ok + 1))
169                         fi
170                         cat ${rtmp} >> $tmpf
171                         cat ${rtmp} >> $workf
172                         rm  ${rtmp}
173                     done
174                 done
175                 if ((ok != ndevs*crg)); then
176                     print_summary -n "$((ndevs*crg - ok)) failed "
177                 else
178                     # compute MB/sec from elapsed
179                     bw=`awk "BEGIN {printf \"%7.2f MB/s\", $actual_size * $ndevs / (( $t1 - $t0 ) * 1024); exit}"`
180                     # compute MB/sec from nregions*slowest
181                     check=`awk < $tmpf \
182                         '/time to transfer data/ {mb=$8/1.048576; if (n == 0 || mb < min) min = mb; n++}\
183                         END {printf "%5d x %6.2f = %7.2f MB/s", n, min, min * n}'`
184                     print_summary -n "$bw $check "
185                 fi
186                 rm $tmpf
187             done
188             print_summary ""
189         done
190     done
191 done