Whamcloud - gitweb
LU-17873 test: ignore WIFSIGNALED if rc is 0
[fs/lustre-release.git] / lustre-iokit / mds-survey / mds-survey
1 #!/bin/bash
2 # SPDX-License-Identifier: GPL-2.0
3
4 #
5 # This file is part of Lustre, http://www.lustre.org/
6 #
7
8 ######################################################################
9 # customize per survey
10
11 # Prerequisite: For "stripe_count > 0" you need to have ost setup and mounted.
12 #
13 # How to run test:
14 # case 1 (stripe_count=0 default):
15 #  $ thrhi=8 dir_count=4 sh mds-survey
16 #  one can also run test with user defined targets as follows,
17 #  $ thrhi=8 dir_count=4 file_count=50000 targets="lustre-MDT0000" sh mds-survey
18 # case 2 (stripe_count > 0, must have ost mounted):
19 #  $ thrhi=8 dir_count=4 file_count=50000 stripe_count=2
20 #  targets="lustre-MDT0000" sh mds-survey
21 # [ NOTE: It is advised to have automated login (passwordless entry) on server ]
22
23 # include library
24 source $(dirname $0)/iokit-libecho
25
26 # Customisation variables
27 #####################################################################
28 # One can change variable values in this section as per requirements
29 # The following variables can be set in the environment, or on the
30 # command line
31 # result file prefix (date/time + hostname makes unique)
32 # NB ensure path to it exists
33 rslt_loc=${rslt_loc:-"/tmp"}
34 rslt=${rslt:-"$rslt_loc/mds_survey_`date +%F@%R`_`uname -n`"}
35
36 # min and max thread count
37 thrlo=${thrlo:-4}
38 thrhi=${thrhi:-32}
39
40 # number of directories to test
41 dir_count=${dir_count:-$thrlo}
42 # number of files per thread
43 file_count=${file_count:-100000}
44
45 targets=${targets:-""}
46 stripe_count=${stripe_count:-0}
47 # what tests to run (first must be create, and last must be destroy)
48 # default=(create lookup md_getattr setxattr destroy)
49 tests_str=${tests_str:-"create lookup md_getattr setxattr destroy"}
50
51 # start number for each thread
52 start_number=${start_number:-2}
53
54 # layer to be tested
55 layer=${layer:-"mdd"}
56 # Customisation variables ends here.
57 #####################################################################
58 # leave the rest of this alone unless you know what you're doing...
59 export LC_ALL=POSIX
60 basedir="tests"
61 mdtbasedir="MDT%04x-"
62
63 create_directories () {
64         local host=$1
65         local devno=$2
66         local ndir=$3
67         local rfile=$4
68         local mdtidx=$5
69         local dir_stripes=$6
70         local idx
71
72         for ((idx = 0; idx < $ndir; idx++)); do
73                 if (( idx == 0 )); then
74                         dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}"
75                 else
76                         dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}${idx}"
77                 fi
78                 remote_shell $host $LCTL --device $devno test_mkdir /$dirname \
79                         -c $dir_stripes --stripe_index $mdtidx > $rfile 2>&1
80                 while read line; do
81                         echo "$line" | grep -q 'error: test_mkdir'
82                         if [ $?  -eq 0 ]; then
83                                 cat $rfile >&2
84                                 echo "ERROR: fail test_mkdir" >&2
85                                 echo "ERROR"
86                                 return
87                         fi
88                 done < $rfile
89         done
90         echo $basedir
91 }
92
93 destroy_directories () {
94         local host=$1
95         local devno=$2
96         local ndir=$3
97         local rfile=$4
98         local mdtidx=$5
99         local idx
100
101         for ((idx = 0; idx < $ndir; idx++)); do
102                 if (( idx == 0 )); then
103                         dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}"
104                 else
105                         dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}${idx}"
106                 fi
107                 remote_shell $host $LCTL --device $devno test_rmdir /$dirname > $rfile 2>&1
108         done
109 }
110
111 get_stats () {
112         local rfile=$1
113
114         gawk < $rfile                                                   \
115         '/starting/ {                                                   \
116                 n = 0; next;                                            \
117         }                                                               \
118         /error/ {                                                       \
119                 n = -1;                                                 \
120                 exit;                                                   \
121         }                                                               \
122         /PID [0-9]+ had rc=0/ {                                         \
123                 next;                                                   \
124         }                                                               \
125         /^Total: total [0-9]+ threads [0-9]+ sec [0-9\.]+ [0-9]+\.[0-9]+\/second$/ { \
126                 ave = strtonum($8);                                     \
127                 n++;                                                    \
128                 next;                                                   \
129         }                                                               \
130         /^[0-9]+\/[0-9]+ Total: [0-9]+\.[0-9]+\/second$/ {              \
131                 n++; v = strtonum($3);                                  \
132                 if (n == 1 || v < min) min = v;                         \
133                 if (n == 1 || v > max) max = v;                         \
134                 next;                                                   \
135         }                                                               \
136         {                                                               \
137                 if (n != 0) {                                           \
138                         n = -1;                                         \
139                         exit;                                           \
140                 }                                                       \
141         }                                                               \
142         END {                                                           \
143                 if (n == 1) {                                           \
144                         min = ave;                                      \
145                         max = ave;                                      \
146                 }                                                       \
147                 printf "%d %f %f %f\n", n, ave, min, max                \
148         }'
149 }
150
151 get_global_stats () {
152         local rfile=$1
153
154         awk < $rfile                                                    \
155         'BEGIN {                                                        \
156                 n = 0;                                                  \
157         }                                                               \
158         {                                                               \
159                 n++;                                                    \
160                 if (n == 1) {                                           \
161                         err = $1;                                       \
162                         ave = $2;                                       \
163                         min = $3;                                       \
164                         max = $4;                                       \
165                 } else {                                                \
166                         if ($1 < err)                                   \
167                                 err = $1;                               \
168                         ave += $2;                                      \
169                         if ($3 < min)                                   \
170                                 min = $3;                               \
171                         if ($4 > max)                                   \
172                                 max = $4;                               \
173                 }                                                       \
174         }                                                               \
175         END {                                                           \
176                 if (n == 0)                                             \
177                         err = 0;                                        \
178                 printf "%d %f %f %f\n", err, ave/n, min, max            \
179         }'
180 }
181
182 print_summary () {
183         if [ "$1" = "-n" ]; then
184                 minusn=$1; shift
185         else
186                 minusn=""
187         fi
188         echo $minusn "$*" >> $rsltf
189         echo $minusn "$*"
190 }
191
192 declare -a tests
193 count=0
194 for name in $tests_str; do
195         tests[$count]=$name
196         count=$((count+1))
197 done
198
199 # hide a little trick to unset this from the command line
200 if [ "$lustre_root" == " " ]; then
201         unset lustre_root
202 fi
203
204 # find where 'lctl' binary is installed on this system
205 if [[ -x "$LCTL" ]]; then       # full pathname specified
206         : # echo "LCTL=$LCTL"
207 elif [[ -n "$lustre_root" && -x "$lustre_root/utils/lctl" ]]; then
208         LCTL=${lustre_root}/utils/lctl
209 elif [[ -n "$LUSTRE" && -x "$LUSTRE/utils/lctl" ]]; then
210         LCTL=$LUSTRE/utils/lctl
211 else                            # hope that it is in the PATH
212         LCTL=${LCTL:-lctl}
213 fi
214 [[ -n "$(which $LCTL)" ]] || { echo "error: lctl not found"; exit 99; }
215
216 declare -a client_names
217 declare -a host_names
218 declare -a client_indexes
219 if [ -z "$targets" ]; then
220         targets=$($LCTL device_list | awk "{if (\$2 == \"UP\" && \
221                                                \$3 == \"mdt\") {print \$4} }")
222         if [ -z "$targets" ]; then
223                 echo "Can't find any MDT to test.  Please set targets=..."
224                 exit 1
225         fi
226 fi
227
228 # split out hostnames from mdt names
229 ndevs=0
230 for trgt in $targets; do
231         str=($(split_hostname $trgt))
232         host_names[$ndevs]=${str[0]}
233         client_names[$ndevs]=${str[1]}
234         client_indexes[$ndevs]=0x$(echo ${str[1]} |
235                 sed 's/.*MDT\([0-9a-f][0-9a-f][0-9a-f][0-9a-f]\).*/\1/')
236         ndevs=$((ndevs+1))
237 done
238
239 # check for ost
240 if (( $stripe_count > 0 )); then
241         for ((i=0; i < $ndevs; i++)); do
242                 host=${host_names[$i]}
243                 obd=$(remote_shell $host $LCTL device_list |
244                       awk "{ if (\$2 == \"UP\" &&
245                                  (\$3 == \"osc\" || \$3 == \"osp\"))
246                                 { print \$4 } }")
247                 if [ -z "$obd" ]; then
248                         echo "Need obdfilter to test stripe_count"
249                         exit 1
250                 fi
251         done
252 fi
253
254 # check and insert obdecho module
255 if ! lsmod | grep obdecho > /dev/null; then
256         modprobe obdecho
257 fi
258 count=${#tests[@]}
259 if [ $count -eq 0 -o "${tests[0]}" != "create" -o "${tests[(($count - 1))]}" != "destroy" ]; then
260         echo "tests: ${tests[@]}"
261         echo "First test must be 'create', and last test must be 'destroy'" 1>&2
262         exit 1
263 fi
264
265 rsltf="${rslt}.summary"
266 workf="${rslt}.detail"
267 cmdsf="${rslt}.script"
268 vmstatf="${rslt}.vmstat"
269 echo -n > $rsltf
270 echo -n > $workf
271
272 # get vmstat started
273 # disable portals debug and get obdecho loaded on all relevant hosts
274 unique_hosts=($(unique ${host_names[@]}))
275 load_obdechos
276 pidcount=0
277 for host in ${unique_hosts[@]}; do
278         host_vmstatf=${vmstatf}_${host}
279         echo -n > $host_vmstatf
280         remote_shell $host "vmstat 5 >> $host_vmstatf" &> /dev/null &
281         pid=$!
282         vmstatpids[$pidcount]=$pid
283         pidcount=$((pidcount+1))
284 done
285 # get all the echo_client device numbers and names
286 for ((i=0; i < $ndevs; i++)); do
287         host=${host_names[$i]}
288         devno=($(get_ec_devno $host "${client_names[$i]}" "${client_names[$i]}" "mdt" $layer))
289         if ((${#devno[@]} != 3)); then
290                 exit 1
291         fi
292         devnos[$i]=${devno[0]}
293         client_names[$i]=${devno[1]}
294         do_teardown_ec[$i]=${devno[2]}
295 done
296 if (($ndevs <= 0 || ${#host_names[@]} <= 0)); then
297         echo "no devices or hosts specified"
298         cleanup 0
299 fi
300 print_summary "$(date) $0 from $(hostname)"
301 # create directories
302 tmpf="${workf}_tmp"
303 for ((idx = 0; idx < $ndevs; idx++)); do
304         host=${host_names[$idx]}
305         devno=${devnos[$idx]}
306         client_name="${host}:${client_names[$idx]}"
307         mdtidx=${client_indexes[$idx]}
308         echo "=======> Create $dir_count directories on $client_name" >> $workf
309         destroy_directories $host $devno $dir_count $tmpf $mdtidx
310         ret=$(create_directories $host $devno $dir_count $tmpf $mdtidx $ndevs)
311         cat $tmpf >> $workf
312         rm $tmpf
313         if [ $ret = "ERROR" ]; then
314                 print_summary "created directories on $client_name failed"
315                 cleanup 1
316         fi
317 done
318
319 snap=1
320 status=0
321 for ((thr = $thrlo; thr <= $thrhi; thr*=2)); do
322         thr_per_dir=$((${thr}/${dir_count}))
323         # skip if no enough thread
324         if (( thr_per_dir <= 0 )); then
325                 continue
326         fi
327         file_count_per_thread=$((${file_count}/${thr}))
328         str=$(printf 'mdt %1d file %7d dir %4d thr %4d ' \
329               $ndevs $file_count $dir_count $thr)
330         echo "=======> $str" >> $workf
331         print_summary -n "$str"
332         # run tests
333         for test in ${tests[@]}; do
334                 declare -a pidarray
335                 for host in ${unique_hosts[@]}; do
336                         echo "starting run for config: $config test: $test " \
337                              "file: $file_count threads: $thr " \
338                              "directories: $dir_count" >> ${vmstatf}_${host}
339                 done
340                 print_summary -n "$test "
341                 # create per-host script files
342                 for host in ${unique_hosts[@]}; do
343                         echo -n > ${cmdsf}_${host}
344                 done
345                 for ((idx = 0; idx < $ndevs; idx++)); do
346                         host=${host_names[$idx]}
347                         devno=${devnos[$idx]}
348                         dirname="$(printf "${mdtbasedir}" ${client_indexes[$idx]})$basedir"
349                         tmpfi="${tmpf}_$idx"
350                         [ "$test" = "create" ] && test="create -c $stripe_count"
351                         echo >> ${cmdsf}_${host}                        \
352                                 "$LCTL > $tmpfi 2>&1                    \
353                                 --threads $thr -$snap $devno test_$test \
354                                 -d /$dirname -D $dir_count              \
355                                 -b $start_number -n $file_count_per_thread"
356                 done
357                 pidcount=0
358                 for host in ${unique_hosts[@]}; do
359                         echo "wait" >> ${cmdsf}_${host}
360                         pidarray[$pidcount]=0
361                         pidcount=$((pidcount+1))
362                 done
363                 pidcount=0
364                 for host in ${unique_hosts[@]}; do
365                         remote_shell $host bash < ${cmdsf}_${host} &
366                         pidarray[$pidcount]=$!
367                         pidcount=$((pidcount+1))
368                 done
369                 pidcount=0
370                 for host in ${unique_hosts[@]}; do
371                         wait ${pidarray[$pidcount]}
372                         pidcount=$((pidcount+1))
373                 done
374                 #wait
375                 # clean up per-host script files
376                 for host in ${unique_hosts[@]}; do
377                         rm ${cmdsf}_${host}
378                 done
379
380                 # collect/check individual MDT stats
381                 echo -n > $tmpf
382                 for ((idx = 0; idx < $ndevs; idx++)); do
383                         client_name="${host_names[$idx]}:${client_names[$idx]}"
384                         tmpfi="${tmpf}_$idx"
385                         echo "=============> $test $client_name" >> $workf
386                         host="${host_names[$idx]}"
387                         remote_shell $host cat $tmpfi > ${tmpfi}_local
388                         cat ${tmpfi}_local >> $workf
389                         get_stats ${tmpfi}_local >> $tmpf
390                         rm -f $tmpfi ${tmpfi}_local
391                 done
392                 # compute/display global min/max stats
393                 echo "=============> $test global" >> $workf
394                 cat $tmpf >> $workf
395                 stats=($(get_global_stats $tmpf))
396                 rm $tmpf
397                 if ((stats[0] <= 0)); then
398                         str=$(printf "%17s " ERROR)
399                         status=1
400                 else
401                         str=$(awk "BEGIN {printf \"%7.2f [ %7.2f, %7.2f] \", \
402                               ${stats[1]}, ${stats[2]}, ${stats[3]}; exit}")
403                 fi
404                 print_summary -n "$str"
405         done
406         print_summary ""
407 done
408
409 # destroy directories
410 tmpf="${workf}_tmp"
411 for ((idx = 0; idx < $ndevs; idx++)); do
412         host=${host_names[$idx]}
413         devno=${devnos[$idx]}
414         mdtidx=${client_indexes[$idx]}
415         client_name="${host}:${client_names[$idx]}"
416         echo "====> Destroy $dir_count directories on $client_name" >> $workf
417         destroy_directories $host $devno $dir_count $tmpf $mdtidx
418 done
419
420 cleanup $status
421 exit $status