Whamcloud - gitweb
LU-8078 iokit: correct obdfilter-survey output data format
[fs/lustre-release.git] / lustre-iokit / mds-survey / mds-survey
1 #!/bin/bash
2
3 ######################################################################
4 # customize per survey
5
6 # Prerequisite: For "stripe_count > 0" you need to have ost setup and mounted.
7 #
8 # How to run test:
9 # case 1 (stripe_count=0 default):
10 #  $ thrhi=8 dir_count=4 sh mds-survey
11 #  one can also run test with user defined targets as follows,
12 #  $ thrhi=8 dir_count=4 file_count=50000 targets="lustre-MDT0000" sh mds-survey
13 # case 2 (stripe_count > 0, must have ost mounted):
14 #  $ thrhi=8 dir_count=4 file_count=50000 stripe_count=2
15 #  targets="lustre-MDT0000" sh mds-survey
16 # [ NOTE: It is advised to have automated login (passwordless entry) on server ]
17
18 # include library
19 source $(dirname $0)/iokit-libecho
20
21 # Customisation variables
22 #####################################################################
23 # One can change variable values in this section as per requirements
24 # The following variables can be set in the environment, or on the
25 # command line
26 # result file prefix (date/time + hostname makes unique)
27 # NB ensure path to it exists
28 rslt_loc=${rslt_loc:-"/tmp"}
29 rslt=${rslt:-"$rslt_loc/mds_survey_`date +%F@%R`_`uname -n`"}
30
31 # min and max thread count
32 thrlo=${thrlo:-4}
33 thrhi=${thrhi:-32}
34
35 # number of directories to test
36 dir_count=${dir_count:-$thrlo}
37 # number of files per thread
38 file_count=${file_count:-100000}
39
40 targets=${targets:-""}
41 stripe_count=${stripe_count:-0}
42 # what tests to run (first must be create, and last must be destroy)
43 # default=(create lookup md_getattr setxattr destroy)
44 tests_str=${tests_str:-"create lookup md_getattr setxattr destroy"}
45
46 # start number for each thread
47 start_number=${start_number:-2}
48
49 # layer to be tested
50 layer=${layer:-"mdd"}
51 # Customisation variables ends here.
52 #####################################################################
53 # leave the rest of this alone unless you know what you're doing...
54 export LC_ALL=POSIX
55 basedir="tests"
56 mdtbasedir="MDT%04x-"
57
58 create_directories () {
59         local host=$1
60         local devno=$2
61         local ndir=$3
62         local rfile=$4
63         local mdtidx=$5
64         local idx
65
66         for ((idx = 0; idx < $ndir; idx++)); do
67                 if (( idx == 0 )); then
68                         dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}"
69                 else
70                         dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}${idx}"
71                 fi
72                 remote_shell $host $lctl --device $devno test_mkdir /$dirname > $rfile 2>&1
73                 while read line; do
74                         echo "$line" | grep -q 'error: test_mkdir'
75                         if [ $?  -eq 0 ]; then
76                                 cat $rfile >&2
77                                 echo "ERROR: fail test_mkdir" >&2
78                                 echo "ERROR"
79                                 return
80                         fi
81                 done < $rfile
82         done
83         echo $basedir
84 }
85
86 destroy_directories () {
87         local host=$1
88         local devno=$2
89         local ndir=$3
90         local rfile=$4
91         local mdtidx=$5
92         local idx
93
94         for ((idx = 0; idx < $ndir; idx++)); do
95                 if (( idx == 0 )); then
96                         dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}"
97                 else
98                         dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}${idx}"
99                 fi
100                 remote_shell $host $lctl --device $devno test_rmdir /$dirname > $rfile 2>&1
101         done
102 }
103
104 get_stats () {
105         local rfile=$1
106
107         gawk < $rfile                                                   \
108         '/starting/ {                                                   \
109                 n = 0; next;                                            \
110         }                                                               \
111         /error/ {                                                       \
112                 n = -1;                                                 \
113                 exit;                                                   \
114         }                                                               \
115         /^Total: total [0-9]+ threads [0-9]+ sec [0-9\.]+ [0-9]+\.[0-9]+\/second$/ { \
116                 ave = strtonum($8);                                     \
117                 n++;                                                    \
118                 next;                                                   \
119         }                                                               \
120         /^[0-9]+\/[0-9]+ Total: [0-9]+\.[0-9]+\/second$/ {              \
121                 n++; v = strtonum($3);                                  \
122                 if (n == 1 || v < min) min = v;                         \
123                 if (n == 1 || v > max) max = v;                         \
124                 next;                                                   \
125         }                                                               \
126         {                                                               \
127                 if (n != 0) {                                           \
128                         n = -1;                                         \
129                         exit;                                           \
130                 }                                                       \
131         }                                                               \
132         END {                                                           \
133                 if (n == 1) {                                           \
134                         min = ave;                                      \
135                         max = ave;                                      \
136                 }                                                       \
137                 printf "%d %f %f %f\n", n, ave, min, max                \
138         }'
139 }
140
141 get_global_stats () {
142         local rfile=$1
143
144         awk < $rfile                                                    \
145         'BEGIN {                                                        \
146                 n = 0;                                                  \
147         }                                                               \
148         {                                                               \
149                 n++;                                                    \
150                 if (n == 1) {                                           \
151                         err = $1;                                       \
152                         ave = $2;                                       \
153                         min = $3;                                       \
154                         max = $4;                                       \
155                 } else {                                                \
156                         if ($1 < err)                                   \
157                                 err = $1;                               \
158                         ave += $2;                                      \
159                         if ($3 < min)                                   \
160                                 min = $3;                               \
161                         if ($4 > max)                                   \
162                                 max = $4;                               \
163                 }                                                       \
164         }                                                               \
165         END {                                                           \
166                 if (n == 0)                                             \
167                         err = 0;                                        \
168                 printf "%d %f %f %f\n", err, ave/n, min, max            \
169         }'
170 }
171
172 print_summary () {
173         if [ "$1" = "-n" ]; then
174                 minusn=$1; shift
175         else
176                 minusn=""
177         fi
178         echo $minusn "$*" >> $rsltf
179         echo $minusn "$*"
180 }
181
182 declare -a tests
183 count=0
184 for name in $tests_str; do
185         tests[$count]=$name
186         count=$((count+1))
187 done
188
189 # hide a little trick to unset this from the command line
190 if [ "$lustre_root" == " " ]; then
191         unset lustre_root
192 fi
193
194 if [ -z "$lustre_root" ]; then
195         lctl=lctl
196 else
197         lctl=${lustre_root}/utils/lctl
198 fi
199
200 declare -a client_names
201 declare -a host_names
202 declare -a client_indexes
203 if [ -z "$targets" ]; then
204         targets=$($lctl device_list | awk "{if (\$2 == \"UP\" && \
205                                                \$3 == \"mdt\") {print \$4} }")
206         if [ -z "$targets" ]; then
207                 echo "Can't find any MDT to test.  Please set targets=..."
208                 exit 1
209         fi
210 fi
211
212 # split out hostnames from mdt names
213 ndevs=0
214 for trgt in $targets; do
215         str=($(split_hostname $trgt))
216         host_names[$ndevs]=${str[0]}
217         client_names[$ndevs]=${str[1]}
218         client_indexes[$ndevs]=0x$(echo ${str[1]} |
219                 sed 's/.*MDT\([0-9a-f][0-9a-f][0-9a-f][0-9a-f]\).*/\1/')
220         ndevs=$((ndevs+1))
221 done
222
223 # check for ost
224 if (( $stripe_count > 0 )); then
225         for ((i=0; i < $ndevs; i++)); do
226                 host=${host_names[$i]}
227                 obd=$(remote_shell $host $lctl device_list |
228                       awk "{ if (\$2 == \"UP\" &&
229                                  (\$3 == \"osc\" || \$3 == \"osp\"))
230                                 { print \$4 } }")
231                 if [ -z "$obd" ]; then
232                         echo "Need obdfilter to test stripe_count"
233                         exit 1
234                 fi
235         done
236 fi
237
238 # check and insert obdecho module
239 if ! lsmod | grep obdecho > /dev/null; then
240         modprobe obdecho
241 fi
242 count=${#tests[@]}
243 if [ $count -eq 0 -o "${tests[0]}" != "create" -o "${tests[(($count - 1))]}" != "destroy" ]; then
244         echo "tests: ${tests[@]}"
245         echo "First test must be 'create', and last test must be 'destroy'" 1>&2
246         exit 1
247 fi
248
249 rsltf="${rslt}.summary"
250 workf="${rslt}.detail"
251 cmdsf="${rslt}.script"
252 vmstatf="${rslt}.vmstat"
253 echo -n > $rsltf
254 echo -n > $workf
255
256 # get vmstat started
257 # disable portals debug and get obdecho loaded on all relevant hosts
258 unique_hosts=($(unique ${host_names[@]}))
259 load_obdechos
260 pidcount=0
261 for host in ${unique_hosts[@]}; do
262         host_vmstatf=${vmstatf}_${host}
263         echo -n > $host_vmstatf
264         remote_shell $host "vmstat 5 >> $host_vmstatf" &> /dev/null &
265         pid=$!
266         vmstatpids[$pidcount]=$pid
267         pidcount=$((pidcount+1))
268 done
269 # get all the echo_client device numbers and names
270 for ((i=0; i < $ndevs; i++)); do
271         host=${host_names[$i]}
272         devno=($(get_ec_devno $host "${client_names[$i]}" "${client_names[$i]}" "mdt" $layer))
273         if ((${#devno[@]} != 3)); then
274                 exit 1
275         fi
276         devnos[$i]=${devno[0]}
277         client_names[$i]=${devno[1]}
278         do_teardown_ec[$i]=${devno[2]}
279 done
280 if (($ndevs <= 0 || ${#host_names[@]} <= 0)); then
281         echo "no devices or hosts specified"
282         cleanup 0
283 fi
284 print_summary "$(date) $0 from $(hostname)"
285 # create directories
286 tmpf="${workf}_tmp"
287 for ((idx = 0; idx < $ndevs; idx++)); do
288         host=${host_names[$idx]}
289         devno=${devnos[$idx]}
290         client_name="${host}:${client_names[$idx]}"
291         mdtidx=${client_indexes[$idx]}
292         echo "=======> Create $dir_count directories on $client_name" >> $workf
293         destroy_directories $host $devno $dir_count $tmpf $mdtidx
294         ret=$(create_directories $host $devno $dir_count $tmpf $mdtidx)
295         cat $tmpf >> $workf
296         rm $tmpf
297         if [ $ret = "ERROR" ]; then
298                 print_summary "created directories on $client_name failed"
299                 cleanup 1
300         fi
301 done
302
303 snap=1
304 status=0
305 for ((thr = $thrlo; thr <= $thrhi; thr*=2)); do
306         thr_per_dir=$((${thr}/${dir_count}))
307         # skip if no enough thread
308         if (( thr_per_dir <= 0 )); then
309                 continue
310         fi
311         file_count_per_thread=$((${file_count}/${thr}))
312         str=$(printf 'mdt %1d file %7d dir %4d thr %4d ' \
313               $ndevs $file_count $dir_count $thr)
314         echo "=======> $str" >> $workf
315         print_summary -n "$str"
316         # run tests
317         for test in ${tests[@]}; do
318                 declare -a pidarray
319                 for host in ${unique_hosts[@]}; do
320                         echo "starting run for config: $config test: $test " \
321                              "file: $file_count threads: $thr " \
322                              "directories: $dir_count" >> ${vmstatf}_${host}
323                 done
324                 print_summary -n "$test "
325                 # create per-host script files
326                 for host in ${unique_hosts[@]}; do
327                         echo -n > ${cmdsf}_${host}
328                 done
329                 for ((idx = 0; idx < $ndevs; idx++)); do
330                         host=${host_names[$idx]}
331                         devno=${devnos[$idx]}
332                         dirname="$(printf "${mdtbasedir}" ${client_indexes[$idx]})$basedir"
333                         tmpfi="${tmpf}_$idx"
334                         [ "$test" = "create" ] && test="create -c $stripe_count"
335                         echo >> ${cmdsf}_${host}                        \
336                                 "$lctl > $tmpfi 2>&1                    \
337                                 --threads $thr -$snap $devno test_$test \
338                                 -d /$dirname -D $dir_count              \
339                                 -b $start_number -n $file_count_per_thread"
340                 done
341                 pidcount=0
342                 for host in ${unique_hosts[@]}; do
343                         echo "wait" >> ${cmdsf}_${host}
344                         pidarray[$pidcount]=0
345                         pidcount=$((pidcount+1))
346                 done
347                 pidcount=0
348                 for host in ${unique_hosts[@]}; do
349                         remote_shell $host bash < ${cmdsf}_${host} &
350                         pidarray[$pidcount]=$!
351                         pidcount=$((pidcount+1))
352                 done
353                 pidcount=0
354                 for host in ${unique_hosts[@]}; do
355                         wait ${pidarray[$pidcount]}
356                         pidcount=$((pidcount+1))
357                 done
358                 #wait
359                 # clean up per-host script files
360                 for host in ${unique_hosts[@]}; do
361                         rm ${cmdsf}_${host}
362                 done
363
364                 # collect/check individual MDT stats
365                 echo -n > $tmpf
366                 for ((idx = 0; idx < $ndevs; idx++)); do
367                         client_name="${host_names[$idx]}:${client_names[$idx]}"
368                         tmpfi="${tmpf}_$idx"
369                         echo "=============> $test $client_name" >> $workf
370                         host="${host_names[$idx]}"
371                         remote_shell $host cat $tmpfi > ${tmpfi}_local
372                         cat ${tmpfi}_local >> $workf
373                         get_stats ${tmpfi}_local >> $tmpf
374                         rm -f $tmpfi ${tmpfi}_local
375                 done
376                 # compute/display global min/max stats
377                 echo "=============> $test global" >> $workf
378                 cat $tmpf >> $workf
379                 stats=($(get_global_stats $tmpf))
380                 rm $tmpf
381                 if ((stats[0] <= 0)); then
382                         str=$(printf "%17s " ERROR)
383                         status=1
384                 else
385                         str=$(awk "BEGIN {printf \"%7.2f [ %7.2f, %7.2f] \", \
386                               ${stats[1]}, ${stats[2]}, ${stats[3]}; exit}")
387                 fi
388                 print_summary -n "$str"
389         done
390         print_summary ""
391 done
392
393 # destroy directories
394 tmpf="${workf}_tmp"
395 for ((idx = 0; idx < $ndevs; idx++)); do
396         host=${host_names[$idx]}
397         devno=${devnos[$idx]}
398         mdtidx=${client_indexes[$idx]}
399         client_name="${host}:${client_names[$idx]}"
400         echo "====> Destroy $dir_count directories on $client_name" >> $workf
401         destroy_directories $host $devno $dir_count $tmpf $mdtidx
402 done
403
404 cleanup $status
405 exit $status