2 # SPDX-License-Identifier: GPL-2.0
5 # This file is part of Lustre, http://www.lustre.org/
8 ######################################################################
11 # CHOOSE EITHER scsidevs or rawdevs
12 # the SCSI devices to measure - WARNING: will be erased.
13 # The raw devices to use
14 # rawdevs=${rawdevs:-"/dev/raw/raw1"}
15 # scsidevs=`ls /dev/sd[a-z] /dev/sd[a-z][a-z]` # all devices, if you use udev
18 # NB ensure the path exists on all servers if it includes subdirs
19 rslt_loc=${rslt_loc:-"/tmp"}
20 rslt=${rslt:-"$rslt_loc/sgpdd_survey_`date +%F@%R`"}
22 # what to do (read or write)
23 actions=${actions:-"write read"}
25 # total size per device (MBytes)
26 # NB bigger than device cache is good
29 # record size (KBytes)
33 # Concurrent regions per device
37 # boundary blocks between concurrent regions per device
38 boundary=${boundary:-1024}
40 # threads to share between concurrent regions per device
41 # multiple threads per region simulates a deeper request queue
42 # NB survey skips over #thr < #regions and #thr/#regions > SG_MAX_QUEUE
47 # User provided script that returns a cpu list from a specified device.
48 # Implementation depends on the type of device (scsi/raw, with/without
49 # multipath, technology fc/sas/ib)
54 # pci=$(readlink -f /sys/class/block/$dev | cut -d/ -f1-5)
55 # cat ${pci}/local_cpulist
56 dev2cpus=${dev2cpus:-""}
58 #####################################################################
59 # leave the rest of this alone unless you know what you're doing...
61 # and max # threads one instance will spawn
65 NUMACTL=${NUMACTL:-"/usr/bin/numactl"}
68 echo "$@" | xargs -n1 echo | sort -u
74 *:*) host=`echo $name | sed 's/:.*$//'`
75 name=`echo $name | sed 's/[^:]*://'`
91 command="export PATH=/sbin:/usr/sbin:\$PATH; $command"
95 if [ -n "$user" ]; then
98 $DSH $user$node "$command"
101 if [ -n "$user" ]; then
104 $DSH $user $node "$command"
109 # how to run commands on other nodes
114 if [ "$host" = "localhost" -o "$host" = `uname -n` ]; then
117 # split $host into $host and $user
119 if [[ $host == *@* ]]; then
123 dsh $host "$user" "$cmds"
128 # check either scsidevs or rawdevs is specified
129 # but only one of them
130 if [ -n "$scsidevs" -a -n "$rawdevs" -o -z "$scsidevs$rawdevs" ]; then
131 echo "Must either specify scsidevs or rawdevs"
135 # retrieve host and device if specified as "hostname:device"
138 for d in $scsidevs $rawdevs; do
139 str=(`split_hostname $d`)
140 hosts[$ndevs]=${str[0]}
141 devs[$ndevs]=${str[1]}
144 unique_hosts=(`unique ${hosts[@]}`)
146 # get device cpu list
148 if [ -n "$dev2cpus" ]; then
149 for ((i=0; i < $ndevs; i++)); do
150 devcpus[$i]=$(remote_shell ${hosts[$i]} $dev2cpus ${devs[$i]})
154 # map given device names into SG device names
155 if [ "$scsidevs" ]; then
156 # make sure sg kernel module is loaded
157 for host in ${unique_hosts[@]}; do
158 sg_is_loaded=$(remote_shell $host grep -q "^sg " /proc/modules \
159 && echo true || echo false)
160 if ! $sg_is_loaded; then
161 echo "loading the sg kernel module on $host"
162 remote_shell $host modprobe sg
163 sg_was_loaded_on="$sg_was_loaded_on $host"
167 for ((i=0; i < $ndevs; i++)); do
168 # resolve symbolic link if any
169 devs[$i]=$(remote_shell ${hosts[$i]} readlink -f ${devs[$i]})
171 # retrieve associated sg device
172 # we will test for a LUN, the test for a partition
173 # if the partition number is > 9 this will fail
174 tmp=$(remote_shell ${hosts[$i]} sg_map | \
175 awk -v dev=${devs[$i]} '{if ($2 == dev) print $1}')
176 if [ -z "$tmp" ]; then
177 echo "Can't find SG device for ${hosts[$i]}:${devs[$i]}, " \
178 "testing for partition"
179 pt=`echo ${devs[$i]} | sed 's/[0-9]*$//'`
181 tmp=$(remote_shell ${hosts[$i]} sg_map | \
182 awk -v dev=$pt '{if ($2 == dev) print $1}')
183 if [ -z "$tmp" ]; then
184 echo -e "Can't find SG device ${hosts[$i]}:$pt.\n" \
185 "Do you have the sg module configured for your kernel?"
191 elif [ "$rawdevs" ]; then
192 for ((i=0; i < $ndevs; i++)); do
193 RES=$(remote_shell ${hosts[$i]} raw -q ${devs[$i]})
195 echo "Raw device ${hosts[$i]}:${devs[$i]} not set up"
201 # determine block size of each device. This should also work for raw devices
202 # If it fails, set to 512
203 for ((i=0; i < $ndevs; i++)); do
204 # retrieve device size (in kbytes) and block size (in bytes)
205 tmp=( `remote_shell ${hosts[$i]} sg_readcap -lb ${devs[$i]}` )
207 if [ ${bs[$i]} == 0 ]; then
208 echo "sg_readcap on device ${hosts[$i]}:${devs[$i]} failed, " \
209 "setting block size to 512"
212 devsize=$((tmp[0]*bs[$i]/1024))
214 # check record size is a multiple of block size
215 if [ $((rszlo*1024%bs[$i])) -ne 0 ]; then
216 echo "Record size is not a multiple of block size (${bs[$i]} bytes) " \
217 "for device ${hosts[$i]}:${devs[$i]}"
222 if [ $devsize -lt $((size*1024)) ]; then
223 echo -e "device ${hosts[$i]}:${devs[$i]} not big enough: " \
224 "$devsize < $((size*1024)).\nConsider reducing \$size"
229 rsltf=${rslt}.summary
236 if [ "$1" = "-n" ]; then
241 echo $minusn "$*" >> $rsltf
245 print_summary "$(date) sgpdd-survey on $rawdevs$scsidevs from $(hostname)"
247 for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do
248 for ((crg=$crglo;crg<=$crghi;crg*=2)); do
249 for ((thr=$thrlo;thr<=$thrhi;thr*=2)); do
250 if ((thr < crg || thr/crg > SG_MAX_QUEUE)); then
253 # compute total size (in kbytes)
255 for ((i=0; i < $ndevs; i++)); do
256 tsize=$((size*1024*1024/bs[$i]/crg*crg*bs[$i]/1024))
257 total_size=$((total_size+tsize))
259 # show test parameters
260 str=`printf 'dev %2d sz %8dK rsz %4dK crg %5d thr %5d ' \
261 $ndevs $total_size $rsz $((crg*ndevs)) $((thr*ndevs))`
262 echo "==============> $str" >> $workf
263 print_summary -n "$str"
265 # check memory for each host
266 for host in ${unique_hosts[@]}; do
268 for ((i=0; i < $ndevs; i++)); do
269 if [ ${hosts[$i]} == $host ]; then
270 numdevs=$((numdevs+1))
273 freemem=$(remote_shell $host cat /proc/meminfo | \
274 awk '/^MemTotal:/ {printf "%d\n", $2}')
275 if (((rsz*thr/crg + 64)*crg*numdevs > freemem)); then
276 echo "ENOMEM on $host" >> $workf
277 print_summary "ENOMEM"
283 for action in $actions; do
285 print_summary -n "$action "
286 echo "=====> $action" >> $workf
289 # create per-host script files
290 for host in ${unique_hosts[@]}; do
291 echo -n > ${cmdsf}_${host}
293 for ((i=0; i < $ndevs; i++)); do
294 bpt=$((rsz*1024/bs[$i]))
295 blocks=$((size*((1024*1024)/bs[$i])/crg))
299 if [ $action = read ]; then
308 if [ -n "${devcpus[$i]}" -a -x "$NUMACTL" ]; then
309 numacmd="$NUMACTL --physcpubind=${devcpus[$i]} --localalloc"
313 for ((j=0;j<crg;j++)); do
314 echo >> ${cmdsf}_${host} \
316 "sgp_dd 2> ${tmpf}_${i}_${j} $inf $outf " \
317 "${skip}=$((boundary+j*blocks)) " \
318 "thr=$((thr/crg)) count=$count bs=${bs[$i]} " \
322 for host in ${unique_hosts[@]}; do
323 echo "wait" >> ${cmdsf}_${host}
326 # run of all the per-host script files
329 for host in ${unique_hosts[@]}; do
330 remote_shell $host bash < ${cmdsf}_${host} &
331 pidarray[$pidcount]=$!
332 pidcount=$((pidcount+1))
335 for host in ${unique_hosts[@]}; do
336 wait ${pidarray[$pidcount]}
337 pidcount=$((pidcount+1))
341 # clean up per-host script files
342 for host in ${unique_hosts[@]}; do
346 # collect/check individual stats
349 for ((i=0;i<ndevs;i++)); do
350 for ((j=0;j<crg;j++)); do
351 rtmp=${tmpf}_${i}_${j}_local
352 remote_shell ${hosts[$i]} cat ${tmpf}_${i}_${j} > $rtmp
353 if grep 'error' $rtmp > /dev/null 2>&1; then
354 echo "Error found in $rtmp"
355 elif grep 'time to transfer data' $rtmp > /dev/null 2>&1; then
359 cat ${rtmp} >> $workf
361 remote_shell ${hosts[$i]} rm ${tmpf}_${i}_${j}
364 if ((ok != ndevs*crg)); then
365 print_summary -n "$((ndevs*crg - ok)) failed "
367 # compute bandwidth in MiB/s from total data / elapsed time
368 bw=`awk "BEGIN {printf \"%7.2f \", \
369 $total_size / (( $t1 - $t0 ) * 1024); exit}"`
370 # compute global min/max stats
371 minmax=`awk < $tmpf \
372 '/time to transfer data/ {mb=$8/1.048576; \
373 if (n == 0 || mb < min) min = mb; \
374 if (n == 0 || mb > max) max = mb; \
376 END {printf "[ %7.2f, %7.2f] ",min,max;}'`
377 print_summary -n "$bw $minmax "
386 for host in $sg_was_loaded_on; do
387 echo "unloading sg module on $host"
388 remote_shell $host rmmod sg