Whamcloud - gitweb
0d6bc30f8e11dd6a8719c6ec7f533fb7d6d8b92e
[fs/lustre-release.git] / lustre-iokit / mds-survey / mds-survey
1 #!/bin/bash
2
3 ######################################################################
4 # customize per survey
5
6 # Prerequisite: For "stripe_count > 0" you need to have ost setup and mounted.
7 #
8 # How to run test:
9 # case 1 (stripe_count=0 default):
10 #  $ thrhi=8 dir_count=4 sh mds-survey
11 #  one can also run test with user defined targets as follows,
12 #  $ thrhi=8 dir_count=4 file_count=50000 targets="lustre-MDT0000" sh mds-survey
13 # case 2 (stripe_count > 0, must have ost mounted):
14 #  $ thrhi=8 dir_count=4 file_count=50000 stripe_count=2
15 #  targets="lustre-MDT0000" sh mds-survey
16 # [ NOTE: It is advised to have automated login (passwordless entry) on server ]
17
18 # include library
19 source $(dirname $0)/iokit-libecho
20
21 # Customisation variables
22 #####################################################################
23 # One can change variable values in this section as per requirements
24 # The following variables can be set in the environment, or on the
25 # command line
26 # result file prefix (date/time + hostname makes unique)
27 # NB ensure path to it exists
28 rslt_loc=${rslt_loc:-"/tmp"}
29 rslt=${rslt:-"$rslt_loc/mds_survey_`date +%F@%R`_`uname -n`"}
30
31 # min and max thread count
32 thrlo=${thrlo:-4}
33 thrhi=${thrhi:-32}
34
35 # number of directories to test
36 dir_count=${dir_count:-$thrlo}
37 # number of files per thread
38 file_count=${file_count:-100000}
39
40 targets=${targets:-""}
41 stripe_count=${stripe_count:-0}
42 # what tests to run (first must be create, and last must be destroy)
43 # default=(create lookup md_getattr setxattr destroy)
44 tests_str=${tests_str:-"create lookup md_getattr setxattr destroy"}
45
46 # start number for each thread
47 start_number=${start_number:-2}
48
49 # layer to be tested
50 layer=${layer:-"mdd"}
51 # Customisation variables ends here.
52 #####################################################################
53 # leave the rest of this alone unless you know what you're doing...
54 export LC_ALL=POSIX
55 basedir="tests"
56 mdtbasedir="MDT%04x-"
57
58 create_directories () {
59         local host=$1
60         local devno=$2
61         local ndir=$3
62         local rfile=$4
63         local mdtidx=$5
64         local dir_stripes=$6
65         local idx
66
67         for ((idx = 0; idx < $ndir; idx++)); do
68                 if (( idx == 0 )); then
69                         dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}"
70                 else
71                         dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}${idx}"
72                 fi
73                 remote_shell $host $lctl --device $devno test_mkdir /$dirname \
74                         -c $dir_stripes --stripe_index $mdtidx > $rfile 2>&1
75                 while read line; do
76                         echo "$line" | grep -q 'error: test_mkdir'
77                         if [ $?  -eq 0 ]; then
78                                 cat $rfile >&2
79                                 echo "ERROR: fail test_mkdir" >&2
80                                 echo "ERROR"
81                                 return
82                         fi
83                 done < $rfile
84         done
85         echo $basedir
86 }
87
88 destroy_directories () {
89         local host=$1
90         local devno=$2
91         local ndir=$3
92         local rfile=$4
93         local mdtidx=$5
94         local idx
95
96         for ((idx = 0; idx < $ndir; idx++)); do
97                 if (( idx == 0 )); then
98                         dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}"
99                 else
100                         dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}${idx}"
101                 fi
102                 remote_shell $host $lctl --device $devno test_rmdir /$dirname > $rfile 2>&1
103         done
104 }
105
106 get_stats () {
107         local rfile=$1
108
109         gawk < $rfile                                                   \
110         '/starting/ {                                                   \
111                 n = 0; next;                                            \
112         }                                                               \
113         /error/ {                                                       \
114                 n = -1;                                                 \
115                 exit;                                                   \
116         }                                                               \
117         /^Total: total [0-9]+ threads [0-9]+ sec [0-9\.]+ [0-9]+\.[0-9]+\/second$/ { \
118                 ave = strtonum($8);                                     \
119                 n++;                                                    \
120                 next;                                                   \
121         }                                                               \
122         /^[0-9]+\/[0-9]+ Total: [0-9]+\.[0-9]+\/second$/ {              \
123                 n++; v = strtonum($3);                                  \
124                 if (n == 1 || v < min) min = v;                         \
125                 if (n == 1 || v > max) max = v;                         \
126                 next;                                                   \
127         }                                                               \
128         {                                                               \
129                 if (n != 0) {                                           \
130                         n = -1;                                         \
131                         exit;                                           \
132                 }                                                       \
133         }                                                               \
134         END {                                                           \
135                 if (n == 1) {                                           \
136                         min = ave;                                      \
137                         max = ave;                                      \
138                 }                                                       \
139                 printf "%d %f %f %f\n", n, ave, min, max                \
140         }'
141 }
142
143 get_global_stats () {
144         local rfile=$1
145
146         awk < $rfile                                                    \
147         'BEGIN {                                                        \
148                 n = 0;                                                  \
149         }                                                               \
150         {                                                               \
151                 n++;                                                    \
152                 if (n == 1) {                                           \
153                         err = $1;                                       \
154                         ave = $2;                                       \
155                         min = $3;                                       \
156                         max = $4;                                       \
157                 } else {                                                \
158                         if ($1 < err)                                   \
159                                 err = $1;                               \
160                         ave += $2;                                      \
161                         if ($3 < min)                                   \
162                                 min = $3;                               \
163                         if ($4 > max)                                   \
164                                 max = $4;                               \
165                 }                                                       \
166         }                                                               \
167         END {                                                           \
168                 if (n == 0)                                             \
169                         err = 0;                                        \
170                 printf "%d %f %f %f\n", err, ave/n, min, max            \
171         }'
172 }
173
174 print_summary () {
175         if [ "$1" = "-n" ]; then
176                 minusn=$1; shift
177         else
178                 minusn=""
179         fi
180         echo $minusn "$*" >> $rsltf
181         echo $minusn "$*"
182 }
183
184 declare -a tests
185 count=0
186 for name in $tests_str; do
187         tests[$count]=$name
188         count=$((count+1))
189 done
190
191 # hide a little trick to unset this from the command line
192 if [ "$lustre_root" == " " ]; then
193         unset lustre_root
194 fi
195
196 if [ -z "$lustre_root" ]; then
197         lctl=lctl
198 else
199         lctl=${lustre_root}/utils/lctl
200 fi
201
202 declare -a client_names
203 declare -a host_names
204 declare -a client_indexes
205 if [ -z "$targets" ]; then
206         targets=$($lctl device_list | awk "{if (\$2 == \"UP\" && \
207                                                \$3 == \"mdt\") {print \$4} }")
208         if [ -z "$targets" ]; then
209                 echo "Can't find any MDT to test.  Please set targets=..."
210                 exit 1
211         fi
212 fi
213
214 # split out hostnames from mdt names
215 ndevs=0
216 for trgt in $targets; do
217         str=($(split_hostname $trgt))
218         host_names[$ndevs]=${str[0]}
219         client_names[$ndevs]=${str[1]}
220         client_indexes[$ndevs]=0x$(echo ${str[1]} |
221                 sed 's/.*MDT\([0-9a-f][0-9a-f][0-9a-f][0-9a-f]\).*/\1/')
222         ndevs=$((ndevs+1))
223 done
224
225 # check for ost
226 if (( $stripe_count > 0 )); then
227         for ((i=0; i < $ndevs; i++)); do
228                 host=${host_names[$i]}
229                 obd=$(remote_shell $host $lctl device_list |
230                       awk "{ if (\$2 == \"UP\" &&
231                                  (\$3 == \"osc\" || \$3 == \"osp\"))
232                                 { print \$4 } }")
233                 if [ -z "$obd" ]; then
234                         echo "Need obdfilter to test stripe_count"
235                         exit 1
236                 fi
237         done
238 fi
239
240 # check and insert obdecho module
241 if ! lsmod | grep obdecho > /dev/null; then
242         modprobe obdecho
243 fi
244 count=${#tests[@]}
245 if [ $count -eq 0 -o "${tests[0]}" != "create" -o "${tests[(($count - 1))]}" != "destroy" ]; then
246         echo "tests: ${tests[@]}"
247         echo "First test must be 'create', and last test must be 'destroy'" 1>&2
248         exit 1
249 fi
250
251 rsltf="${rslt}.summary"
252 workf="${rslt}.detail"
253 cmdsf="${rslt}.script"
254 vmstatf="${rslt}.vmstat"
255 echo -n > $rsltf
256 echo -n > $workf
257
258 # get vmstat started
259 # disable portals debug and get obdecho loaded on all relevant hosts
260 unique_hosts=($(unique ${host_names[@]}))
261 load_obdechos
262 pidcount=0
263 for host in ${unique_hosts[@]}; do
264         host_vmstatf=${vmstatf}_${host}
265         echo -n > $host_vmstatf
266         remote_shell $host "vmstat 5 >> $host_vmstatf" &> /dev/null &
267         pid=$!
268         vmstatpids[$pidcount]=$pid
269         pidcount=$((pidcount+1))
270 done
271 # get all the echo_client device numbers and names
272 for ((i=0; i < $ndevs; i++)); do
273         host=${host_names[$i]}
274         devno=($(get_ec_devno $host "${client_names[$i]}" "${client_names[$i]}" "mdt" $layer))
275         if ((${#devno[@]} != 3)); then
276                 exit 1
277         fi
278         devnos[$i]=${devno[0]}
279         client_names[$i]=${devno[1]}
280         do_teardown_ec[$i]=${devno[2]}
281 done
282 if (($ndevs <= 0 || ${#host_names[@]} <= 0)); then
283         echo "no devices or hosts specified"
284         cleanup 0
285 fi
286 print_summary "$(date) $0 from $(hostname)"
287 # create directories
288 tmpf="${workf}_tmp"
289 for ((idx = 0; idx < $ndevs; idx++)); do
290         host=${host_names[$idx]}
291         devno=${devnos[$idx]}
292         client_name="${host}:${client_names[$idx]}"
293         mdtidx=${client_indexes[$idx]}
294         echo "=======> Create $dir_count directories on $client_name" >> $workf
295         destroy_directories $host $devno $dir_count $tmpf $mdtidx
296         ret=$(create_directories $host $devno $dir_count $tmpf $mdtidx $ndevs)
297         cat $tmpf >> $workf
298         rm $tmpf
299         if [ $ret = "ERROR" ]; then
300                 print_summary "created directories on $client_name failed"
301                 cleanup 1
302         fi
303 done
304
305 snap=1
306 status=0
307 for ((thr = $thrlo; thr <= $thrhi; thr*=2)); do
308         thr_per_dir=$((${thr}/${dir_count}))
309         # skip if no enough thread
310         if (( thr_per_dir <= 0 )); then
311                 continue
312         fi
313         file_count_per_thread=$((${file_count}/${thr}))
314         str=$(printf 'mdt %1d file %7d dir %4d thr %4d ' \
315               $ndevs $file_count $dir_count $thr)
316         echo "=======> $str" >> $workf
317         print_summary -n "$str"
318         # run tests
319         for test in ${tests[@]}; do
320                 declare -a pidarray
321                 for host in ${unique_hosts[@]}; do
322                         echo "starting run for config: $config test: $test " \
323                              "file: $file_count threads: $thr " \
324                              "directories: $dir_count" >> ${vmstatf}_${host}
325                 done
326                 print_summary -n "$test "
327                 # create per-host script files
328                 for host in ${unique_hosts[@]}; do
329                         echo -n > ${cmdsf}_${host}
330                 done
331                 for ((idx = 0; idx < $ndevs; idx++)); do
332                         host=${host_names[$idx]}
333                         devno=${devnos[$idx]}
334                         dirname="$(printf "${mdtbasedir}" ${client_indexes[$idx]})$basedir"
335                         tmpfi="${tmpf}_$idx"
336                         [ "$test" = "create" ] && test="create -c $stripe_count"
337                         echo >> ${cmdsf}_${host}                        \
338                                 "$lctl > $tmpfi 2>&1                    \
339                                 --threads $thr -$snap $devno test_$test \
340                                 -d /$dirname -D $dir_count              \
341                                 -b $start_number -n $file_count_per_thread"
342                 done
343                 pidcount=0
344                 for host in ${unique_hosts[@]}; do
345                         echo "wait" >> ${cmdsf}_${host}
346                         pidarray[$pidcount]=0
347                         pidcount=$((pidcount+1))
348                 done
349                 pidcount=0
350                 for host in ${unique_hosts[@]}; do
351                         remote_shell $host bash < ${cmdsf}_${host} &
352                         pidarray[$pidcount]=$!
353                         pidcount=$((pidcount+1))
354                 done
355                 pidcount=0
356                 for host in ${unique_hosts[@]}; do
357                         wait ${pidarray[$pidcount]}
358                         pidcount=$((pidcount+1))
359                 done
360                 #wait
361                 # clean up per-host script files
362                 for host in ${unique_hosts[@]}; do
363                         rm ${cmdsf}_${host}
364                 done
365
366                 # collect/check individual MDT stats
367                 echo -n > $tmpf
368                 for ((idx = 0; idx < $ndevs; idx++)); do
369                         client_name="${host_names[$idx]}:${client_names[$idx]}"
370                         tmpfi="${tmpf}_$idx"
371                         echo "=============> $test $client_name" >> $workf
372                         host="${host_names[$idx]}"
373                         remote_shell $host cat $tmpfi > ${tmpfi}_local
374                         cat ${tmpfi}_local >> $workf
375                         get_stats ${tmpfi}_local >> $tmpf
376                         rm -f $tmpfi ${tmpfi}_local
377                 done
378                 # compute/display global min/max stats
379                 echo "=============> $test global" >> $workf
380                 cat $tmpf >> $workf
381                 stats=($(get_global_stats $tmpf))
382                 rm $tmpf
383                 if ((stats[0] <= 0)); then
384                         str=$(printf "%17s " ERROR)
385                         status=1
386                 else
387                         str=$(awk "BEGIN {printf \"%7.2f [ %7.2f, %7.2f] \", \
388                               ${stats[1]}, ${stats[2]}, ${stats[3]}; exit}")
389                 fi
390                 print_summary -n "$str"
391         done
392         print_summary ""
393 done
394
395 # destroy directories
396 tmpf="${workf}_tmp"
397 for ((idx = 0; idx < $ndevs; idx++)); do
398         host=${host_names[$idx]}
399         devno=${devnos[$idx]}
400         mdtidx=${client_indexes[$idx]}
401         client_name="${host}:${client_names[$idx]}"
402         echo "====> Destroy $dir_count directories on $client_name" >> $workf
403         destroy_directories $host $devno $dir_count $tmpf $mdtidx
404 done
405
406 cleanup $status
407 exit $status