Whamcloud - gitweb
b75d3e9bc58a010b21b7f8020c977bd702302557
[fs/lustre-release.git] / lustre-iokit / mds-survey / mds-survey
1 #!/bin/bash
2 # SPDX-License-Identifier: GPL-2.0
3
4 #
5 # This file is part of Lustre, http://www.lustre.org/
6 #
7
8 ######################################################################
9 # customize per survey
10
11 # Prerequisite: For "stripe_count > 0" you need to have ost setup and mounted.
12 #
13 # How to run test:
14 # case 1 (stripe_count=0 default):
15 #  $ thrhi=8 dir_count=4 sh mds-survey
16 #  one can also run test with user defined targets as follows,
17 #  $ thrhi=8 dir_count=4 file_count=50000 targets="lustre-MDT0000" sh mds-survey
18 # case 2 (stripe_count > 0, must have ost mounted):
19 #  $ thrhi=8 dir_count=4 file_count=50000 stripe_count=2
20 #  targets="lustre-MDT0000" sh mds-survey
21 # [ NOTE: It is advised to have automated login (passwordless entry) on server ]
22
23 # include library
24 source $(dirname $0)/iokit-libecho
25
26 # Customisation variables
27 #####################################################################
28 # One can change variable values in this section as per requirements
29 # The following variables can be set in the environment, or on the
30 # command line
31 # result file prefix (date/time + hostname makes unique)
32 # NB ensure path to it exists
33 rslt_loc=${rslt_loc:-"/tmp"}
34 rslt=${rslt:-"$rslt_loc/mds_survey_`date +%F@%R`_`uname -n`"}
35
36 # min and max thread count
37 thrlo=${thrlo:-4}
38 thrhi=${thrhi:-32}
39
40 # number of directories to test
41 dir_count=${dir_count:-$thrlo}
42 # number of files per thread
43 file_count=${file_count:-100000}
44
45 targets=${targets:-""}
46 stripe_count=${stripe_count:-0}
47 # what tests to run (first must be create, and last must be destroy)
48 # default=(create lookup md_getattr setxattr destroy)
49 tests_str=${tests_str:-"create lookup md_getattr setxattr destroy"}
50
51 # start number for each thread
52 start_number=${start_number:-2}
53
54 # layer to be tested
55 layer=${layer:-"mdd"}
56 # Customisation variables ends here.
57 #####################################################################
58 # leave the rest of this alone unless you know what you're doing...
59 export LC_ALL=POSIX
60 basedir="tests"
61 mdtbasedir="MDT%04x-"
62
63 create_directories () {
64         local host=$1
65         local devno=$2
66         local ndir=$3
67         local rfile=$4
68         local mdtidx=$5
69         local dir_stripes=$6
70         local idx
71
72         for ((idx = 0; idx < $ndir; idx++)); do
73                 if (( idx == 0 )); then
74                         dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}"
75                 else
76                         dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}${idx}"
77                 fi
78                 remote_shell $host $LCTL --device $devno test_mkdir /$dirname \
79                         -c $dir_stripes --stripe_index $mdtidx > $rfile 2>&1
80                 while read line; do
81                         echo "$line" | grep -q 'error: test_mkdir'
82                         if [ $?  -eq 0 ]; then
83                                 cat $rfile >&2
84                                 echo "ERROR: fail test_mkdir" >&2
85                                 echo "ERROR"
86                                 return
87                         fi
88                 done < $rfile
89         done
90         echo $basedir
91 }
92
93 destroy_directories () {
94         local host=$1
95         local devno=$2
96         local ndir=$3
97         local rfile=$4
98         local mdtidx=$5
99         local idx
100
101         for ((idx = 0; idx < $ndir; idx++)); do
102                 if (( idx == 0 )); then
103                         dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}"
104                 else
105                         dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}${idx}"
106                 fi
107                 remote_shell $host $LCTL --device $devno test_rmdir /$dirname > $rfile 2>&1
108         done
109 }
110
111 get_stats () {
112         local rfile=$1
113
114         gawk < $rfile                                                   \
115         '/starting/ {                                                   \
116                 n = 0; next;                                            \
117         }                                                               \
118         /error/ {                                                       \
119                 n = -1;                                                 \
120                 exit;                                                   \
121         }                                                               \
122         /^Total: total [0-9]+ threads [0-9]+ sec [0-9\.]+ [0-9]+\.[0-9]+\/second$/ { \
123                 ave = strtonum($8);                                     \
124                 n++;                                                    \
125                 next;                                                   \
126         }                                                               \
127         /^[0-9]+\/[0-9]+ Total: [0-9]+\.[0-9]+\/second$/ {              \
128                 n++; v = strtonum($3);                                  \
129                 if (n == 1 || v < min) min = v;                         \
130                 if (n == 1 || v > max) max = v;                         \
131                 next;                                                   \
132         }                                                               \
133         {                                                               \
134                 if (n != 0) {                                           \
135                         n = -1;                                         \
136                         exit;                                           \
137                 }                                                       \
138         }                                                               \
139         END {                                                           \
140                 if (n == 1) {                                           \
141                         min = ave;                                      \
142                         max = ave;                                      \
143                 }                                                       \
144                 printf "%d %f %f %f\n", n, ave, min, max                \
145         }'
146 }
147
148 get_global_stats () {
149         local rfile=$1
150
151         awk < $rfile                                                    \
152         'BEGIN {                                                        \
153                 n = 0;                                                  \
154         }                                                               \
155         {                                                               \
156                 n++;                                                    \
157                 if (n == 1) {                                           \
158                         err = $1;                                       \
159                         ave = $2;                                       \
160                         min = $3;                                       \
161                         max = $4;                                       \
162                 } else {                                                \
163                         if ($1 < err)                                   \
164                                 err = $1;                               \
165                         ave += $2;                                      \
166                         if ($3 < min)                                   \
167                                 min = $3;                               \
168                         if ($4 > max)                                   \
169                                 max = $4;                               \
170                 }                                                       \
171         }                                                               \
172         END {                                                           \
173                 if (n == 0)                                             \
174                         err = 0;                                        \
175                 printf "%d %f %f %f\n", err, ave/n, min, max            \
176         }'
177 }
178
179 print_summary () {
180         if [ "$1" = "-n" ]; then
181                 minusn=$1; shift
182         else
183                 minusn=""
184         fi
185         echo $minusn "$*" >> $rsltf
186         echo $minusn "$*"
187 }
188
189 declare -a tests
190 count=0
191 for name in $tests_str; do
192         tests[$count]=$name
193         count=$((count+1))
194 done
195
196 # hide a little trick to unset this from the command line
197 if [ "$lustre_root" == " " ]; then
198         unset lustre_root
199 fi
200
201 # find where 'lctl' binary is installed on this system
202 if [[ -x "$LCTL" ]]; then       # full pathname specified
203         : # echo "LCTL=$LCTL"
204 elif [[ -n "$lustre_root" && -x "$lustre_root/utils/lctl" ]]; then
205         LCTL=${lustre_root}/utils/lctl
206 elif [[ -n "$LUSTRE" && -x "$LUSTRE/utils/lctl" ]]; then
207         LCTL=$LUSTRE/utils/lctl
208 else                            # hope that it is in the PATH
209         LCTL=${LCTL:-lctl}
210 fi
211 [[ -n "$(which $LCTL)" ]] || { echo "error: lctl not found"; exit 99; }
212
213 declare -a client_names
214 declare -a host_names
215 declare -a client_indexes
216 if [ -z "$targets" ]; then
217         targets=$($LCTL device_list | awk "{if (\$2 == \"UP\" && \
218                                                \$3 == \"mdt\") {print \$4} }")
219         if [ -z "$targets" ]; then
220                 echo "Can't find any MDT to test.  Please set targets=..."
221                 exit 1
222         fi
223 fi
224
225 # split out hostnames from mdt names
226 ndevs=0
227 for trgt in $targets; do
228         str=($(split_hostname $trgt))
229         host_names[$ndevs]=${str[0]}
230         client_names[$ndevs]=${str[1]}
231         client_indexes[$ndevs]=0x$(echo ${str[1]} |
232                 sed 's/.*MDT\([0-9a-f][0-9a-f][0-9a-f][0-9a-f]\).*/\1/')
233         ndevs=$((ndevs+1))
234 done
235
236 # check for ost
237 if (( $stripe_count > 0 )); then
238         for ((i=0; i < $ndevs; i++)); do
239                 host=${host_names[$i]}
240                 obd=$(remote_shell $host $LCTL device_list |
241                       awk "{ if (\$2 == \"UP\" &&
242                                  (\$3 == \"osc\" || \$3 == \"osp\"))
243                                 { print \$4 } }")
244                 if [ -z "$obd" ]; then
245                         echo "Need obdfilter to test stripe_count"
246                         exit 1
247                 fi
248         done
249 fi
250
251 # check and insert obdecho module
252 if ! lsmod | grep obdecho > /dev/null; then
253         modprobe obdecho
254 fi
255 count=${#tests[@]}
256 if [ $count -eq 0 -o "${tests[0]}" != "create" -o "${tests[(($count - 1))]}" != "destroy" ]; then
257         echo "tests: ${tests[@]}"
258         echo "First test must be 'create', and last test must be 'destroy'" 1>&2
259         exit 1
260 fi
261
262 rsltf="${rslt}.summary"
263 workf="${rslt}.detail"
264 cmdsf="${rslt}.script"
265 vmstatf="${rslt}.vmstat"
266 echo -n > $rsltf
267 echo -n > $workf
268
269 # get vmstat started
270 # disable portals debug and get obdecho loaded on all relevant hosts
271 unique_hosts=($(unique ${host_names[@]}))
272 load_obdechos
273 pidcount=0
274 for host in ${unique_hosts[@]}; do
275         host_vmstatf=${vmstatf}_${host}
276         echo -n > $host_vmstatf
277         remote_shell $host "vmstat 5 >> $host_vmstatf" &> /dev/null &
278         pid=$!
279         vmstatpids[$pidcount]=$pid
280         pidcount=$((pidcount+1))
281 done
282 # get all the echo_client device numbers and names
283 for ((i=0; i < $ndevs; i++)); do
284         host=${host_names[$i]}
285         devno=($(get_ec_devno $host "${client_names[$i]}" "${client_names[$i]}" "mdt" $layer))
286         if ((${#devno[@]} != 3)); then
287                 exit 1
288         fi
289         devnos[$i]=${devno[0]}
290         client_names[$i]=${devno[1]}
291         do_teardown_ec[$i]=${devno[2]}
292 done
293 if (($ndevs <= 0 || ${#host_names[@]} <= 0)); then
294         echo "no devices or hosts specified"
295         cleanup 0
296 fi
297 print_summary "$(date) $0 from $(hostname)"
298 # create directories
299 tmpf="${workf}_tmp"
300 for ((idx = 0; idx < $ndevs; idx++)); do
301         host=${host_names[$idx]}
302         devno=${devnos[$idx]}
303         client_name="${host}:${client_names[$idx]}"
304         mdtidx=${client_indexes[$idx]}
305         echo "=======> Create $dir_count directories on $client_name" >> $workf
306         destroy_directories $host $devno $dir_count $tmpf $mdtidx
307         ret=$(create_directories $host $devno $dir_count $tmpf $mdtidx $ndevs)
308         cat $tmpf >> $workf
309         rm $tmpf
310         if [ $ret = "ERROR" ]; then
311                 print_summary "created directories on $client_name failed"
312                 cleanup 1
313         fi
314 done
315
316 snap=1
317 status=0
318 for ((thr = $thrlo; thr <= $thrhi; thr*=2)); do
319         thr_per_dir=$((${thr}/${dir_count}))
320         # skip if no enough thread
321         if (( thr_per_dir <= 0 )); then
322                 continue
323         fi
324         file_count_per_thread=$((${file_count}/${thr}))
325         str=$(printf 'mdt %1d file %7d dir %4d thr %4d ' \
326               $ndevs $file_count $dir_count $thr)
327         echo "=======> $str" >> $workf
328         print_summary -n "$str"
329         # run tests
330         for test in ${tests[@]}; do
331                 declare -a pidarray
332                 for host in ${unique_hosts[@]}; do
333                         echo "starting run for config: $config test: $test " \
334                              "file: $file_count threads: $thr " \
335                              "directories: $dir_count" >> ${vmstatf}_${host}
336                 done
337                 print_summary -n "$test "
338                 # create per-host script files
339                 for host in ${unique_hosts[@]}; do
340                         echo -n > ${cmdsf}_${host}
341                 done
342                 for ((idx = 0; idx < $ndevs; idx++)); do
343                         host=${host_names[$idx]}
344                         devno=${devnos[$idx]}
345                         dirname="$(printf "${mdtbasedir}" ${client_indexes[$idx]})$basedir"
346                         tmpfi="${tmpf}_$idx"
347                         [ "$test" = "create" ] && test="create -c $stripe_count"
348                         echo >> ${cmdsf}_${host}                        \
349                                 "$LCTL > $tmpfi 2>&1                    \
350                                 --threads $thr -$snap $devno test_$test \
351                                 -d /$dirname -D $dir_count              \
352                                 -b $start_number -n $file_count_per_thread"
353                 done
354                 pidcount=0
355                 for host in ${unique_hosts[@]}; do
356                         echo "wait" >> ${cmdsf}_${host}
357                         pidarray[$pidcount]=0
358                         pidcount=$((pidcount+1))
359                 done
360                 pidcount=0
361                 for host in ${unique_hosts[@]}; do
362                         remote_shell $host bash < ${cmdsf}_${host} &
363                         pidarray[$pidcount]=$!
364                         pidcount=$((pidcount+1))
365                 done
366                 pidcount=0
367                 for host in ${unique_hosts[@]}; do
368                         wait ${pidarray[$pidcount]}
369                         pidcount=$((pidcount+1))
370                 done
371                 #wait
372                 # clean up per-host script files
373                 for host in ${unique_hosts[@]}; do
374                         rm ${cmdsf}_${host}
375                 done
376
377                 # collect/check individual MDT stats
378                 echo -n > $tmpf
379                 for ((idx = 0; idx < $ndevs; idx++)); do
380                         client_name="${host_names[$idx]}:${client_names[$idx]}"
381                         tmpfi="${tmpf}_$idx"
382                         echo "=============> $test $client_name" >> $workf
383                         host="${host_names[$idx]}"
384                         remote_shell $host cat $tmpfi > ${tmpfi}_local
385                         cat ${tmpfi}_local >> $workf
386                         get_stats ${tmpfi}_local >> $tmpf
387                         rm -f $tmpfi ${tmpfi}_local
388                 done
389                 # compute/display global min/max stats
390                 echo "=============> $test global" >> $workf
391                 cat $tmpf >> $workf
392                 stats=($(get_global_stats $tmpf))
393                 rm $tmpf
394                 if ((stats[0] <= 0)); then
395                         str=$(printf "%17s " ERROR)
396                         status=1
397                 else
398                         str=$(awk "BEGIN {printf \"%7.2f [ %7.2f, %7.2f] \", \
399                               ${stats[1]}, ${stats[2]}, ${stats[3]}; exit}")
400                 fi
401                 print_summary -n "$str"
402         done
403         print_summary ""
404 done
405
406 # destroy directories
407 tmpf="${workf}_tmp"
408 for ((idx = 0; idx < $ndevs; idx++)); do
409         host=${host_names[$idx]}
410         devno=${devnos[$idx]}
411         mdtidx=${client_indexes[$idx]}
412         client_name="${host}:${client_names[$idx]}"
413         echo "====> Destroy $dir_count directories on $client_name" >> $workf
414         destroy_directories $host $devno $dir_count $tmpf $mdtidx
415 done
416
417 cleanup $status
418 exit $status