Whamcloud - gitweb
4082720ab3e476c48824ce8b7c3fc9e9d609263c
[fs/lustre-release.git] / lustre-iokit / sgpdd-survey / sgpdd-survey
1 #!/bin/bash
2
3 ######################################################################
4 # customize per survey
5
6 # CHOOSE EITHER scsidevs or rawdevs
7 # the SCSI devices to measure - WARNING: will be erased.
8 # The raw devices to use
9 # rawdevs=${rawdevs:-"/dev/raw/raw1"}
10 # scsidevs=`ls /dev/sd[a-z] /dev/sd[a-z][a-z]` # all devices, if you use udev
11
12 # result file prefix.
13 # NB ensure the path exists on all servers if it includes subdirs
14 rslt_loc=${rslt_loc:-"/tmp"}
15 rslt=${rslt:-"$rslt_loc/sgpdd_survey_`date +%F@%R`"}
16
17 # what to do (read or write)
18 actions=${actions:-"write read"}
19
20 # total size per device (MBytes)
21 # NB bigger than device cache is good
22 size=${size:-8192}
23
24 # record size (KBytes)
25 rszlo=${rszlo:-1024}
26 rszhi=${rszhi:-1024}
27
28 # Concurrent regions per device
29 crglo=${crglo:-1}
30 crghi=${crghi:-256}
31
32 # boundary blocks between concurrent regions per device
33 boundary=${boundary:-1024}
34
35 # threads to share between concurrent regions per device
36 # multiple threads per region simulates a deeper request queue
37 # NB survey skips over #thr < #regions and #thr/#regions > SG_MAX_QUEUE
38 thrlo=${thrlo:-1}
39 thrhi=${thrhi:-4096}
40
41 #####################################################################
42 # leave the rest of this alone unless you know what you're doing...
43
44 # and max # threads one instance will spawn
45 SG_MAX_QUEUE=16
46
47 unique () {
48     echo "$@" | xargs -n1 echo | sort -u
49 }
50
51 split_hostname () {
52     local name=$1
53     case $name in
54     *:*) host=`echo $name | sed 's/:.*$//'`
55          name=`echo $name | sed 's/[^:]*://'`
56          ;;
57     *)   host=localhost
58          ;;
59     esac
60     echo "$host $name"
61 }
62
63 DSH=${DSH:-"ssh"}
64
65 dsh () {
66     local node="$1"
67     local user="$2"
68     shift 2
69     local command="$@"
70
71     command="export PATH=/sbin:/usr/sbin:\$PATH; $command"
72
73     case $DSH in
74         ssh)
75             if [ -n "$user" ]; then
76                 user="$user@"
77             fi
78             $DSH $user$node "$command"
79             ;;
80         rsh)
81             if [ -n "$user" ]; then
82                 user="-l $user"
83             fi
84             $DSH $user $node "$command"
85             ;;
86     esac
87 }
88
89 # how to run commands on other nodes
90 remote_shell () {
91     local host=$1
92     shift
93     local cmds="$@"
94     if [ "$host" = "localhost" -o "$host" = `uname -n` ]; then
95         eval "$cmds"
96     else
97         # split $host into $host and $user
98         local user=""
99         if [[ $host == *@* ]]; then
100             user=${host%@*}
101             host=${host#*@}
102         fi
103         dsh $host "$user" "$cmds"
104     fi
105 }
106
107
108 # check either scsidevs or rawdevs is specified
109 # but only one of them
110 if [ -n "$scsidevs" -a -n "$rawdevs" -o -z "$scsidevs$rawdevs" ]; then
111     echo "Must either specify scsidevs or rawdevs"
112     exit 1
113 fi
114
115 # retrieve host and device if specified as "hostname:device"
116 ndevs=0
117 devs=()
118 for d in $scsidevs $rawdevs; do
119     str=(`split_hostname $d`)
120     hosts[$ndevs]=${str[0]}
121     devs[$ndevs]=${str[1]}
122     ndevs=$((ndevs+1))
123 done
124 unique_hosts=(`unique ${hosts[@]}`)
125
126 # map given device names into SG device names
127 if [ "$scsidevs" ]; then
128     # make sure sg kernel module is loaded
129     for host in ${unique_hosts[@]}; do
130         sg_is_loaded=$(remote_shell $host grep -q "^sg " /proc/modules \
131                        && echo true || echo false)
132         if ! $sg_is_loaded; then
133             echo "loading the sg kernel module on $host"
134             remote_shell $host modprobe sg
135             sg_was_loaded_on="$sg_was_loaded_on $host"
136         fi
137     done
138
139     for ((i=0; i < $ndevs; i++)); do
140         # resolve symbolic link if any
141         devs[$i]=$(remote_shell ${hosts[$i]} readlink -f ${devs[$i]})
142
143         # retrieve associated sg device
144         # we will test for a LUN, the test for a partition
145         # if the partition number is > 9 this will fail
146         tmp=$(remote_shell ${hosts[$i]} sg_map | \
147               awk -v dev=${devs[$i]} '{if ($2 == dev) print $1}')
148         if [ -z "$tmp" ]; then
149             echo "Can't find SG device for ${hosts[$i]}:${devs[$i]}, " \
150                  "testing for partition"
151             pt=`echo ${devs[$i]} | sed 's/[0-9]*$//'`
152             # Try again
153             tmp=$(remote_shell ${hosts[$i]} sg_map | \
154                   awk -v dev=$pt '{if ($2 == dev) print $1}')
155             if [ -z "$tmp" ]; then
156                 echo -e "Can't find SG device ${hosts[$i]}:$pt.\n" \
157                         "Do you have the sg module configured for your kernel?"
158                 exit 1
159            fi
160         fi
161         devs[$i]=$tmp
162     done
163 elif [ "$rawdevs" ]; then
164     for ((i=0; i < $ndevs; i++)); do
165         RES=$(remote_shell ${hosts[$i]} raw -q ${devs[$i]})
166         if [ $? -ne 0 ];then
167             echo "Raw device ${hosts[$i]}:${devs[$i]} not set up"
168             exit 1
169         fi
170     done
171 fi
172
173 # determine block size of each device. This should also work for raw devices
174 # If it fails, set to 512
175 for ((i=0; i < $ndevs; i++)); do
176     # retrieve device size (in kbytes) and block size (in bytes)
177     tmp=( `remote_shell ${hosts[$i]} sg_readcap -lb ${devs[$i]}` )
178     bs[$i]=$((tmp[1]))
179     if [ ${bs[$i]} == 0  ]; then
180         echo "sg_readcap on device ${hosts[$i]}:${devs[$i]} failed, " \
181              "setting block size to 512"
182         bs[$i]=512
183     fi
184     devsize=$((tmp[0]*bs[$i]/1024))
185
186     # check record size is a multiple of block size
187     if [ $((rszlo*1024%bs[$i])) -ne 0 ]; then
188         echo "Record size is not a multiple of block size (${bs[$i]} bytes) " \
189              "for device ${hosts[$i]}:${devs[$i]}"
190         exit 1
191     fi
192
193     # check device size
194     if [ $devsize -lt $((size*1024)) ]; then
195         echo -e "device ${hosts[$i]}:${devs[$i]} not big enough: " \
196                 "$devsize < $((size*1024)).\nConsider reducing \$size"
197         exit 1
198     fi
199 done
200
201 rsltf=${rslt}.summary
202 workf=${rslt}.detail
203 cmdsf=${rslt}.script
204 echo -n > $rsltf
205 echo -n > $workf
206
207 print_summary () {
208     if [ "$1" = "-n" ]; then
209         minusn=$1; shift
210     else
211         minusn=""
212     fi
213     echo $minusn "$*" >> $rsltf
214     echo $minusn "$*"
215 }
216
217 print_summary "$(date) sgpdd-survey on $rawdevs$scsidevs from $(hostname)"
218
219 for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do
220     for ((crg=$crglo;crg<=$crghi;crg*=2)); do 
221         for ((thr=$thrlo;thr<=$thrhi;thr*=2)); do
222             if ((thr < crg || thr/crg > SG_MAX_QUEUE)); then
223                 continue
224             fi
225             # compute total size (in kbytes)
226             total_size=0
227             for ((i=0; i < $ndevs; i++)); do
228                 tsize=$((size*1024*1024/bs[$i]/crg*crg*bs[$i]/1024))
229                 total_size=$((total_size+tsize))
230             done
231             # show test parameters
232             str=`printf 'dev %2d sz %8dK rsz %4dK crg %5d thr %5d ' \
233                          $ndevs $total_size $rsz $((crg*ndevs)) $((thr*ndevs))`
234             echo "==============> $str" >> $workf
235             print_summary -n "$str"
236
237             # check memory for each host
238             for host in ${unique_hosts[@]}; do
239                 numdevs=0
240                 for ((i=0; i < $ndevs; i++)); do
241                     if [ ${hosts[$i]} == $host ]; then
242                         numdevs=$((numdevs+1))
243                     fi
244                 done
245                 freemem=$(remote_shell $host cat /proc/meminfo | \
246                           awk '/^MemTotal:/ {printf "%d\n", $2}')
247                 if (((rsz*thr/crg + 64)*crg*numdevs > freemem)); then
248                     echo "ENOMEM on $host" >> $workf
249                     print_summary "ENOMEM"
250                     continue 2
251                 fi
252             done
253
254             # run tests
255             for action in $actions; do
256                 declare -a pidarray
257                 print_summary -n "$action "
258                 echo "=====> $action" >> $workf
259                 tmpf=${workf}_tmp
260
261                 # create per-host script files
262                 for host in ${unique_hosts[@]}; do
263                     echo -n > ${cmdsf}_${host}
264                 done
265                 for ((i=0; i < $ndevs; i++)); do
266                     bpt=$((rsz*1024/bs[$i]))
267                     blocks=$((size*((1024*1024)/bs[$i])/crg))
268                     count=$blocks
269                     host=${hosts[$i]}
270                     dev=${devs[$i]}
271                     if [ $action = read ]; then
272                         inf="if=$dev"
273                         outf="of=/dev/null"
274                         skip=skip
275                     else
276                         inf="if=/dev/zero"
277                         outf="of=$dev"
278                         skip=seek
279                     fi
280                     for ((j=0;j<crg;j++)); do 
281                         echo >> ${cmdsf}_${host} \
282                                 "sgp_dd 2> ${tmpf}_${i}_${j} $inf $outf " \
283                                 "${skip}=$((boundary+j*blocks)) " \
284                                 "thr=$((thr/crg)) count=$count bs=${bs[$i]} " \
285                                 "bpt=$bpt time=1&"
286                     done
287                 done
288                 for host in ${unique_hosts[@]}; do
289                     echo "wait" >> ${cmdsf}_${host}
290                 done
291
292                 # run of all the per-host script files
293                 t0=`date +%s.%N`
294                 pidcount=0
295                 for host in ${unique_hosts[@]}; do
296                     remote_shell $host bash < ${cmdsf}_${host} &
297                     pidarray[$pidcount]=$!
298                     pidcount=$((pidcount+1))
299                 done
300                 pidcount=0
301                 for host in ${unique_hosts[@]}; do
302                     wait ${pidarray[$pidcount]}
303                     pidcount=$((pidcount+1))
304                 done
305                 t1=`date +%s.%N`
306
307                 # clean up per-host script files
308                 for host in ${unique_hosts[@]}; do
309                     rm ${cmdsf}_${host}
310                 done
311
312                 # collect/check individual stats
313                 echo > $tmpf
314                 ok=0
315                 for ((i=0;i<ndevs;i++)); do
316                     for ((j=0;j<crg;j++)); do
317                         rtmp=${tmpf}_${i}_${j}_local
318                         remote_shell ${hosts[$i]} cat ${tmpf}_${i}_${j} > $rtmp
319                         if grep 'error' $rtmp > /dev/null 2>&1; then
320                             echo "Error found in $rtmp"
321                         elif grep 'time to transfer data' $rtmp > /dev/null 2>&1; then
322                             ok=$((ok + 1))
323                         fi
324                         cat ${rtmp} >> $tmpf
325                         cat ${rtmp} >> $workf
326                         rm  ${rtmp}
327                         remote_shell ${hosts[$i]} rm ${tmpf}_${i}_${j}
328                     done
329                 done
330                 if ((ok != ndevs*crg)); then
331                     print_summary -n "$((ndevs*crg - ok)) failed "
332                 else
333                     # compute bandwidth in MiB/s from total data / elapsed time
334                     bw=`awk "BEGIN {printf \"%7.2f \", \
335                                     $total_size / (( $t1 - $t0 ) * 1024); exit}"`
336                     # compute global min/max stats
337                     minmax=`awk < $tmpf \
338                         '/time to transfer data/ {mb=$8/1.048576; \
339                                                   if (n == 0 || mb < min) min = mb; \
340                                                   if (n == 0 || mb > max) max = mb; \
341                                                   n++} \
342                         END {printf "[ %7.2f, %7.2f] ",min,max;}'`
343                     print_summary -n "$bw $minmax "
344                 fi
345                 rm $tmpf
346             done
347             print_summary ""
348         done
349     done
350 done
351
352 for host in $sg_was_loaded_on; do
353     echo "unloading sg module on $host"
354     remote_shell $host rmmod sg
355 done