Whamcloud - gitweb
6111e1a915ccda461e17de1826c681f863faf748
[fs/lustre-release.git] / lustre-iokit / mds-survey / mds-survey
1 #!/bin/bash
2
3 ######################################################################
4 # customize per survey
5
6 # Prerequisite: For "stripe_count > 0" you need to have ost setup and mounted.
7 #
8 # How to run test:
9 # case 1 (stripe_count=0 default):
10 #  $ thrhi=8 dir_count=4 sh mds-survey
11 #  one can also run test with user defined targets as follows,
12 #  $ thrhi=8 dir_count=4 file_count=50000 targets="lustre-MDT0000" sh mds-survey
13 # case 2 (stripe_count > 0, must have ost mounted):
14 #  $ thrhi=8 dir_count=4 file_count=50000 stripe_count=2
15 #  targets="lustre-MDT0000" sh mds-survey
16 # [ NOTE: It is advised to have automated login (passwordless entry) on server ]
17
18 # include library
19 source $(dirname $0)/iokit-libecho
20
21 # Customisation variables
22 #####################################################################
23 # One can change variable values in this section as per requirements
24 # The following variables can be set in the environment, or on the
25 # command line
26 # result file prefix (date/time + hostname makes unique)
27 # NB ensure path to it exists
28 rslt_loc=${rslt_loc:-"/tmp"}
29 rslt=${rslt:-"$rslt_loc/mds_survey_`date +%F@%R`_`uname -n`"}
30
31 # min and max thread count
32 thrlo=${thrlo:-4}
33 thrhi=${thrhi:-32}
34
35 # number of directories to test
36 dir_count=${dir_count:-$thrlo}
37 # number of files per thread
38 file_count=${file_count:-100000}
39
40 targets=${targets:-""}
41 stripe_count=${stripe_count:-0}
42 # what tests to run (first must be create, and last must be destroy)
43 # default=(create lookup md_getattr setxattr destroy)
44 tests_str=${tests_str:-"create lookup md_getattr setxattr destroy"}
45
46 # start number for each thread
47 start_number=${start_number:-2}
48
49 # layer to be tested
50 layer=${layer:-"mdd"}
51 # Customisation variables ends here.
52 #####################################################################
53 # leave the rest of this alone unless you know what you're doing...
54 export LC_ALL=POSIX
55 basedir="tests"
56 mdtbasedir="MDT%04x-"
57
58 create_directories () {
59         local host=$1
60         local devno=$2
61         local ndir=$3
62         local rfile=$4
63         local mdtidx=$5
64         local dir_stripes=$6
65         local idx
66
67         for ((idx = 0; idx < $ndir; idx++)); do
68                 if (( idx == 0 )); then
69                         dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}"
70                 else
71                         dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}${idx}"
72                 fi
73                 remote_shell $host $LCTL --device $devno test_mkdir /$dirname \
74                         -c $dir_stripes --stripe_index $mdtidx > $rfile 2>&1
75                 while read line; do
76                         echo "$line" | grep -q 'error: test_mkdir'
77                         if [ $?  -eq 0 ]; then
78                                 cat $rfile >&2
79                                 echo "ERROR: fail test_mkdir" >&2
80                                 echo "ERROR"
81                                 return
82                         fi
83                 done < $rfile
84         done
85         echo $basedir
86 }
87
88 destroy_directories () {
89         local host=$1
90         local devno=$2
91         local ndir=$3
92         local rfile=$4
93         local mdtidx=$5
94         local idx
95
96         for ((idx = 0; idx < $ndir; idx++)); do
97                 if (( idx == 0 )); then
98                         dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}"
99                 else
100                         dirname="$(printf "${mdtbasedir}" $mdtidx)${basedir}${idx}"
101                 fi
102                 remote_shell $host $LCTL --device $devno test_rmdir /$dirname > $rfile 2>&1
103         done
104 }
105
106 get_stats () {
107         local rfile=$1
108
109         gawk < $rfile                                                   \
110         '/starting/ {                                                   \
111                 n = 0; next;                                            \
112         }                                                               \
113         /error/ {                                                       \
114                 n = -1;                                                 \
115                 exit;                                                   \
116         }                                                               \
117         /^Total: total [0-9]+ threads [0-9]+ sec [0-9\.]+ [0-9]+\.[0-9]+\/second$/ { \
118                 ave = strtonum($8);                                     \
119                 n++;                                                    \
120                 next;                                                   \
121         }                                                               \
122         /^[0-9]+\/[0-9]+ Total: [0-9]+\.[0-9]+\/second$/ {              \
123                 n++; v = strtonum($3);                                  \
124                 if (n == 1 || v < min) min = v;                         \
125                 if (n == 1 || v > max) max = v;                         \
126                 next;                                                   \
127         }                                                               \
128         {                                                               \
129                 if (n != 0) {                                           \
130                         n = -1;                                         \
131                         exit;                                           \
132                 }                                                       \
133         }                                                               \
134         END {                                                           \
135                 if (n == 1) {                                           \
136                         min = ave;                                      \
137                         max = ave;                                      \
138                 }                                                       \
139                 printf "%d %f %f %f\n", n, ave, min, max                \
140         }'
141 }
142
143 get_global_stats () {
144         local rfile=$1
145
146         awk < $rfile                                                    \
147         'BEGIN {                                                        \
148                 n = 0;                                                  \
149         }                                                               \
150         {                                                               \
151                 n++;                                                    \
152                 if (n == 1) {                                           \
153                         err = $1;                                       \
154                         ave = $2;                                       \
155                         min = $3;                                       \
156                         max = $4;                                       \
157                 } else {                                                \
158                         if ($1 < err)                                   \
159                                 err = $1;                               \
160                         ave += $2;                                      \
161                         if ($3 < min)                                   \
162                                 min = $3;                               \
163                         if ($4 > max)                                   \
164                                 max = $4;                               \
165                 }                                                       \
166         }                                                               \
167         END {                                                           \
168                 if (n == 0)                                             \
169                         err = 0;                                        \
170                 printf "%d %f %f %f\n", err, ave/n, min, max            \
171         }'
172 }
173
174 print_summary () {
175         if [ "$1" = "-n" ]; then
176                 minusn=$1; shift
177         else
178                 minusn=""
179         fi
180         echo $minusn "$*" >> $rsltf
181         echo $minusn "$*"
182 }
183
184 declare -a tests
185 count=0
186 for name in $tests_str; do
187         tests[$count]=$name
188         count=$((count+1))
189 done
190
191 # hide a little trick to unset this from the command line
192 if [ "$lustre_root" == " " ]; then
193         unset lustre_root
194 fi
195
196 # find where 'lctl' binary is installed on this system
197 if [[ -x "$LCTL" ]]; then       # full pathname specified
198         : # echo "LCTL=$LCTL"
199 elif [[ -n "$lustre_root" && -x "$lustre_root/utils/lctl" ]]; then
200         LCTL=${lustre_root}/utils/lctl
201 elif [[ -n "$LUSTRE" && -x "$LUSTRE/utils/lctl" ]]; then
202         LCTL=$LUSTRE/utils/lctl
203 else                            # hope that it is in the PATH
204         LCTL=${LCTL:-lctl}
205 fi
206 [[ -n "$(which $LCTL)" ]] || { echo "error: lctl not found"; exit 99; }
207
208 declare -a client_names
209 declare -a host_names
210 declare -a client_indexes
211 if [ -z "$targets" ]; then
212         targets=$($LCTL device_list | awk "{if (\$2 == \"UP\" && \
213                                                \$3 == \"mdt\") {print \$4} }")
214         if [ -z "$targets" ]; then
215                 echo "Can't find any MDT to test.  Please set targets=..."
216                 exit 1
217         fi
218 fi
219
220 # split out hostnames from mdt names
221 ndevs=0
222 for trgt in $targets; do
223         str=($(split_hostname $trgt))
224         host_names[$ndevs]=${str[0]}
225         client_names[$ndevs]=${str[1]}
226         client_indexes[$ndevs]=0x$(echo ${str[1]} |
227                 sed 's/.*MDT\([0-9a-f][0-9a-f][0-9a-f][0-9a-f]\).*/\1/')
228         ndevs=$((ndevs+1))
229 done
230
231 # check for ost
232 if (( $stripe_count > 0 )); then
233         for ((i=0; i < $ndevs; i++)); do
234                 host=${host_names[$i]}
235                 obd=$(remote_shell $host $LCTL device_list |
236                       awk "{ if (\$2 == \"UP\" &&
237                                  (\$3 == \"osc\" || \$3 == \"osp\"))
238                                 { print \$4 } }")
239                 if [ -z "$obd" ]; then
240                         echo "Need obdfilter to test stripe_count"
241                         exit 1
242                 fi
243         done
244 fi
245
246 # check and insert obdecho module
247 if ! lsmod | grep obdecho > /dev/null; then
248         modprobe obdecho
249 fi
250 count=${#tests[@]}
251 if [ $count -eq 0 -o "${tests[0]}" != "create" -o "${tests[(($count - 1))]}" != "destroy" ]; then
252         echo "tests: ${tests[@]}"
253         echo "First test must be 'create', and last test must be 'destroy'" 1>&2
254         exit 1
255 fi
256
257 rsltf="${rslt}.summary"
258 workf="${rslt}.detail"
259 cmdsf="${rslt}.script"
260 vmstatf="${rslt}.vmstat"
261 echo -n > $rsltf
262 echo -n > $workf
263
264 # get vmstat started
265 # disable portals debug and get obdecho loaded on all relevant hosts
266 unique_hosts=($(unique ${host_names[@]}))
267 load_obdechos
268 pidcount=0
269 for host in ${unique_hosts[@]}; do
270         host_vmstatf=${vmstatf}_${host}
271         echo -n > $host_vmstatf
272         remote_shell $host "vmstat 5 >> $host_vmstatf" &> /dev/null &
273         pid=$!
274         vmstatpids[$pidcount]=$pid
275         pidcount=$((pidcount+1))
276 done
277 # get all the echo_client device numbers and names
278 for ((i=0; i < $ndevs; i++)); do
279         host=${host_names[$i]}
280         devno=($(get_ec_devno $host "${client_names[$i]}" "${client_names[$i]}" "mdt" $layer))
281         if ((${#devno[@]} != 3)); then
282                 exit 1
283         fi
284         devnos[$i]=${devno[0]}
285         client_names[$i]=${devno[1]}
286         do_teardown_ec[$i]=${devno[2]}
287 done
288 if (($ndevs <= 0 || ${#host_names[@]} <= 0)); then
289         echo "no devices or hosts specified"
290         cleanup 0
291 fi
292 print_summary "$(date) $0 from $(hostname)"
293 # create directories
294 tmpf="${workf}_tmp"
295 for ((idx = 0; idx < $ndevs; idx++)); do
296         host=${host_names[$idx]}
297         devno=${devnos[$idx]}
298         client_name="${host}:${client_names[$idx]}"
299         mdtidx=${client_indexes[$idx]}
300         echo "=======> Create $dir_count directories on $client_name" >> $workf
301         destroy_directories $host $devno $dir_count $tmpf $mdtidx
302         ret=$(create_directories $host $devno $dir_count $tmpf $mdtidx $ndevs)
303         cat $tmpf >> $workf
304         rm $tmpf
305         if [ $ret = "ERROR" ]; then
306                 print_summary "created directories on $client_name failed"
307                 cleanup 1
308         fi
309 done
310
311 snap=1
312 status=0
313 for ((thr = $thrlo; thr <= $thrhi; thr*=2)); do
314         thr_per_dir=$((${thr}/${dir_count}))
315         # skip if no enough thread
316         if (( thr_per_dir <= 0 )); then
317                 continue
318         fi
319         file_count_per_thread=$((${file_count}/${thr}))
320         str=$(printf 'mdt %1d file %7d dir %4d thr %4d ' \
321               $ndevs $file_count $dir_count $thr)
322         echo "=======> $str" >> $workf
323         print_summary -n "$str"
324         # run tests
325         for test in ${tests[@]}; do
326                 declare -a pidarray
327                 for host in ${unique_hosts[@]}; do
328                         echo "starting run for config: $config test: $test " \
329                              "file: $file_count threads: $thr " \
330                              "directories: $dir_count" >> ${vmstatf}_${host}
331                 done
332                 print_summary -n "$test "
333                 # create per-host script files
334                 for host in ${unique_hosts[@]}; do
335                         echo -n > ${cmdsf}_${host}
336                 done
337                 for ((idx = 0; idx < $ndevs; idx++)); do
338                         host=${host_names[$idx]}
339                         devno=${devnos[$idx]}
340                         dirname="$(printf "${mdtbasedir}" ${client_indexes[$idx]})$basedir"
341                         tmpfi="${tmpf}_$idx"
342                         [ "$test" = "create" ] && test="create -c $stripe_count"
343                         echo >> ${cmdsf}_${host}                        \
344                                 "$LCTL > $tmpfi 2>&1                    \
345                                 --threads $thr -$snap $devno test_$test \
346                                 -d /$dirname -D $dir_count              \
347                                 -b $start_number -n $file_count_per_thread"
348                 done
349                 pidcount=0
350                 for host in ${unique_hosts[@]}; do
351                         echo "wait" >> ${cmdsf}_${host}
352                         pidarray[$pidcount]=0
353                         pidcount=$((pidcount+1))
354                 done
355                 pidcount=0
356                 for host in ${unique_hosts[@]}; do
357                         remote_shell $host bash < ${cmdsf}_${host} &
358                         pidarray[$pidcount]=$!
359                         pidcount=$((pidcount+1))
360                 done
361                 pidcount=0
362                 for host in ${unique_hosts[@]}; do
363                         wait ${pidarray[$pidcount]}
364                         pidcount=$((pidcount+1))
365                 done
366                 #wait
367                 # clean up per-host script files
368                 for host in ${unique_hosts[@]}; do
369                         rm ${cmdsf}_${host}
370                 done
371
372                 # collect/check individual MDT stats
373                 echo -n > $tmpf
374                 for ((idx = 0; idx < $ndevs; idx++)); do
375                         client_name="${host_names[$idx]}:${client_names[$idx]}"
376                         tmpfi="${tmpf}_$idx"
377                         echo "=============> $test $client_name" >> $workf
378                         host="${host_names[$idx]}"
379                         remote_shell $host cat $tmpfi > ${tmpfi}_local
380                         cat ${tmpfi}_local >> $workf
381                         get_stats ${tmpfi}_local >> $tmpf
382                         rm -f $tmpfi ${tmpfi}_local
383                 done
384                 # compute/display global min/max stats
385                 echo "=============> $test global" >> $workf
386                 cat $tmpf >> $workf
387                 stats=($(get_global_stats $tmpf))
388                 rm $tmpf
389                 if ((stats[0] <= 0)); then
390                         str=$(printf "%17s " ERROR)
391                         status=1
392                 else
393                         str=$(awk "BEGIN {printf \"%7.2f [ %7.2f, %7.2f] \", \
394                               ${stats[1]}, ${stats[2]}, ${stats[3]}; exit}")
395                 fi
396                 print_summary -n "$str"
397         done
398         print_summary ""
399 done
400
401 # destroy directories
402 tmpf="${workf}_tmp"
403 for ((idx = 0; idx < $ndevs; idx++)); do
404         host=${host_names[$idx]}
405         devno=${devnos[$idx]}
406         mdtidx=${client_indexes[$idx]}
407         client_name="${host}:${client_names[$idx]}"
408         echo "====> Destroy $dir_count directories on $client_name" >> $workf
409         destroy_directories $host $devno $dir_count $tmpf $mdtidx
410 done
411
412 cleanup $status
413 exit $status