Whamcloud - gitweb
LU-10664 tests: fix MPI tests in dom-performance.sh
[fs/lustre-release.git] / lustre / tests / dom-performance.sh
1 #!/bin/bash
2 #
3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
5 #
6
7 set -e
8
9 ONLY=${ONLY:-"$*"}
10 ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"$DOM_PERFORMANCE_EXCEPT"}
11 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
12
13 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
14
15 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
16
17 . $LUSTRE/tests/test-framework.sh
18
19 init_test_env $@
20 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
21 init_logging
22
23 SAVED_FAIL_ON_ERROR=$FAIL_ON_ERROR
24 FAIL_ON_ERROR=false
25
26 SAVED_DEBUG=$($LCTL get_param -n debug 2> /dev/null)
27
28
29 . $LUSTRE/tests/functions.sh
30 build_test_filter
31 check_and_setup_lustre
32
33 # if MACHINEFILE set and exists -- use it
34 MACHINEFILE=${MACHINEFILE:-$TMP/$(basename $0 .sh)-$(hostname).machines}
35 clients=${CLIENTS:-$HOSTNAME}
36 generate_machine_file $clients $MACHINEFILE ||
37         error "Failed to generate machine file"
38
39 DP_DIO=${DP_DIO:-"no"}
40
41 DOM_SIZE=${DOM_SIZE:-"1M"}
42 DP_OSC="mdc"
43
44 rm -rf $DIR/*
45
46 DP_NORM=$DIR/dp_norm
47 DP_DOM=$DIR/dp_dom
48 DP_DOM_DNE=$DIR/dp_dne
49 DP_STATS=${DP_STATS:-"no"}
50
51 # total number of files
52 DP_FNUM=${DP_FNUM:-16384}
53 # number of threads
54 DP_NUM=${DP_NUM:-4}
55
56 # 1 stripe for normal files
57 mkdir -p $DP_NORM
58 $LFS setstripe -c 2 $DP_NORM ||
59         error "Cannot create test directory for ordinary files"
60
61 if [[ $MDSCOUNT -gt 1 ]] ; then
62         $LFS setdirstripe -i 0 -c $MDSCOUNT $DP_DOM_DNE ||
63                 error_noexit "Cannot create striped directory"
64         $LFS setstripe -E ${DOM_SIZE} -L mdt -E EOF $DP_DOM_DNE ||
65                 error_noexit "Cannot create test directory for dom files"
66 fi
67
68 mkdir -p $DP_DOM
69 $LFS setstripe -E ${DOM_SIZE} -L mdt -E EOF $DP_DOM ||
70         error "Cannot create test directory for dom files"
71
72 dp_clear_stats() {
73         local cli=$1
74
75         $LCTL set_param -n osc.*.stats=0
76         $LCTL set_param -n mdc.*.stats=0
77         $LCTL set_param -n ${cli}.*.${cli}_stats=0
78         $LCTL set_param -n ${cli}.*.rpc_stats=0
79         $LCTL set_param -n llite.*.read_ahead_stats=0
80         $LCTL set_param -n llite.*.unstable_stats=0
81 }
82
83 dp_collect_stats() {
84         local cli=$1
85
86         sync;sync
87         echo ----- MDC RPCs: $(calc_stats mdc.*.stats req_active)
88         echo ----- OSC RPCs: $(calc_stats osc.*.stats req_active)
89
90         if [ "x$DP_STATS" != "xyes" ] ; then
91                 return 0
92         fi
93
94         $LCTL get_param ${cli}.*.${cli}_stats
95         $LCTL get_param ${cli}.*.rpc_stats
96         # for OSC get both OSC and MDC stats
97         if [ $cli == "osc" ] ; then
98                 $LCTL get_param mdc.*.stats
99         fi
100         $LCTL get_param ${cli}.*.stats
101         $LCTL get_param ${cli}.*.unstable_stats
102         $LCTL get_param ${cli}.*.${cli}_cached_mb
103         $LCTL get_param llite.*.read_ahead_stats
104 }
105
106 dp_setup_test() {
107         local cli=$1
108
109         cancel_lru_locks $cli
110         ### drop all debug except critical
111         $LCTL set_param -n debug="error warning console emerg"
112         dp_clear_stats $cli
113 }
114
115 dp_run_cmd() {
116         local cmd=$1
117         local cmdlog=$TMP/dp_cmd.log
118         local rc
119
120         dp_setup_test $DP_OSC
121         if ! grep -qw "$MOUNT" /proc/mounts ; then
122                 echo "!!!!! Lustre is not mounted !!!!!, aborting"
123                 return 0
124         fi
125
126         echo "## $cmd" | awk '{ if (NR==1) {gsub(/[ \t\r\n]+/, " "); \
127                                 gsub(/\|.*$/, ""); print }}'
128         echo "## $(date +'%F %H:%M:%S'): START"
129         eval $cmd 2>&1 | tee $cmdlog || true
130
131         rc=${PIPESTATUS[0]}
132         if [ $rc -eq 0 ] && grep -q "p4_error:" $cmdlog ; then
133                 rc=1
134         fi
135
136         dp_collect_stats $DP_OSC
137         remount_client $DIR > /dev/null
138         return $rc
139 }
140
141 run_MDtest() {
142         if ! which mdtest > /dev/null 2>&1 ; then
143                 echo "Mdtest is not installed, skipping"
144                 return 0
145         fi
146
147         local mdtest=$(which mdtest)
148
149         local TDIR=${1:-$MOUNT}
150         local th_num=$((DP_FNUM * 2 / DP_NUM))
151         local bsizes="8192"
152
153         chmod 0777 $TDIR
154
155         [ "$SLOW" = "yes" ] && bsizes="4096 32768"
156
157         for bsize in $bsizes ; do
158                 dp_run_cmd "mpi_run -np $DP_NUM $mdtest -i 3 -I $th_num -F \
159                         -z 1 -b 1 -L -u -w $bsize -R -d $TDIR"
160                 if [ ${PIPESTATUS[0]} != 0 ]; then
161                         error "MDtest failed, aborting"
162                 fi
163         done
164
165         rm -rf $TDIR/*
166         return 0
167 }
168
169 run_SmallIO() {
170         if [ ! -f createmany ] ; then
171                 echo "Createmany is not installed, skipping"
172                 return 0
173         fi
174
175         if [ ! -f smalliomany ] ; then
176                 echo "Smalliomany is not installed, skipping"
177                 return 0
178         fi
179
180         local TDIR=${1:-$DIR}
181         local count=$DP_FNUM
182
183         local MIN=$((count * 16))
184         [ $MDSSIZE -le $MIN ] && count=$((MDSSIZE / 16))
185
186         dp_run_cmd "./createmany -o $TDIR/file- $count | grep 'total:'"
187         if [ ${PIPESTATUS[0]} != 0 ]; then
188                 error "File creation failed, aborting"
189         fi
190
191         if [ -f statmany ]; then
192                 dp_run_cmd "./statmany -s $TDIR/file- $count $((count * 5)) |
193                         grep 'total:'"
194                 if [ ${PIPESTATUS[0]} != 0 ]; then
195                         error "File stat failed, aborting"
196                 fi
197
198         fi
199
200         for opc in w a r ; do
201                 dp_run_cmd "./smalliomany -${opc} $TDIR/file- $count 300 |
202                         grep 'total:'"
203                 if [ ${PIPESTATUS[0]} != 0 ]; then
204                         error "SmallIO -${opc} failed, aborting"
205                 fi
206
207         done
208
209         dp_run_cmd "./unlinkmany $TDIR/file- $count | grep 'total:'"
210         if [ ${PIPESTATUS[0]} != 0 ]; then
211                 error "SmallIO failed, aborting"
212         fi
213
214         return 0
215 }
216
217 run_IOR() {
218         if ! which IOR > /dev/null 2>&1 ; then
219                 echo "IOR is not installed, skipping"
220                 return 0
221         fi
222
223         local IOR=$(which IOR)
224         local iter=$((DP_FNUM / DP_NUM))
225         local direct=""
226
227         if [ "x$DP_DIO" == "xyes" ] ; then
228                 direct="-B"
229         fi
230
231         local TDIR=${1:-$MOUNT}
232
233         chmod 0777 $TDIR
234
235         local bsizes="8"
236         [ "$SLOW" = "yes" ] && bsizes="4 32"
237
238         for bsize in $bsizes ; do
239                 segments=$((128 / bsize))
240
241                 dp_run_cmd "mpi_run -np $DP_NUM $IOR \
242                         -a POSIX -b ${bsize}K -t ${bsize}K -o $TDIR/ -k \
243                         -s $segments -w -r -i $iter -F -E -z -m -Z $direct" |
244                         awk '($1 !~ /^(write|read|access)$/) || NF>12 {print}'
245                 if [ ${PIPESTATUS[0]} != 0 ]; then
246                         error "IOR write test for ${bsize}K failed, aborting"
247                 fi
248
249                 # check READ performance only (no cache)
250                 dp_run_cmd "mpi_run -np $DP_NUM $IOR \
251                         -a POSIX -b ${bsize}K -t ${bsize}K -o $TDIR/ -X 42\
252                         -s $segments -r -i $iter -F -E -z -m -Z $direct" |
253                         awk '($1 !~ /^(read|access|remove)$/) || NF>12 {print}'
254                 if [ ${PIPESTATUS[0]} != 0 ]; then
255                         error "IOR read test for ${bsize}K failed, aborting"
256                 fi
257
258         done
259         rm -rf $TDIR/*
260         return 0
261 }
262
263 run_Dbench() {
264         if ! which dbench > /dev/null 2>&1 ; then
265                 echo "Dbench is not installed, skipping"
266                 return 0
267         fi
268
269         local TDIR=${1:-$MOUNT}
270
271         if [ "x$DP_DOM_DNE" == "x$TDIR" ] ; then
272                 echo "dbench uses subdirs, skipping for DNE dir"
273                 return 0
274         fi
275
276         dp_run_cmd "dbench -D $TDIR $DP_NUM | egrep -v 'warmup|execute'"
277         if [ ${PIPESTATUS[0]} != 0 ]; then
278                 error "Dbench failed, aborting"
279         fi
280
281         rm -rf $TDIR/*
282         return 0
283 }
284
285 run_FIO() {
286         # https://github.com/axboe/fio/archive/fio-2.8.zip
287         if ! which fio > /dev/null 2>&1 ; then
288                 echo "No FIO installed, skipping"
289                 return 0
290         fi
291
292         local fnum=128 # per thread
293         local total=$((fnum * DP_NUM)) # files in all threads
294         local loops=$((DP_FNUM / total)) # number of loops
295         local direct=""
296         local output=""
297
298         if [ $loops -eq 0 ] ; then
299                 loops=1
300         fi
301
302         if [ "x$DP_DIO" == "xyes" ] ; then
303                 direct="--direct=1"
304         else
305                 direct="--buffered=1 --bs_unaligned=1"
306         fi
307
308         if [ "x$DP_STATS" != "xyes" ] ; then
309                 output="--minimal"
310         fi
311
312         local TDIR=${1:-$MOUNT}
313         base_cmd="fio --name=smallio --ioengine=posixaio $output \
314                   --iodepth=$((DP_NUM * 4)) --directory=$TDIR \
315                   --nrfiles=$fnum --openfiles=10000 \
316                   --numjobs=$DP_NUM --filesize=64k --lockfile=readwrite"
317
318         dp_run_cmd "$base_cmd --create_only=1" > /dev/null
319         if [ ${PIPESTATUS[0]} != 0 ]; then
320                 error "FIO file creation failed, aborting"
321         fi
322
323         local bsizes="8"
324         [ "$SLOW" = "yes" ] && bsizes="4 32"
325
326         for bsize in $bsizes ; do
327                 local write_cmd="$base_cmd --bs=${bsize}k --rw=randwrite \
328                         $direct --file_service_type=random --randrepeat=1 \
329                          --norandommap --group_reporting=1 --loops=$loops"
330                 if [ "x$DP_STATS" != "xyes" ] ; then
331                         dp_run_cmd "$write_cmd | awk -F\; '{printf \"WRITE: \
332                                 BW %dKiB/sec, IOPS %d, lat (%d/%d/%d)usec\n\", \
333                                 \$48, \$49, \$53, \$57, \$81}'"
334                 else
335                         dp_run_cmd "$write_cmd"
336                 fi
337                 if [ ${PIPESTATUS[0]} != 0 ]; then
338                         error "FIO write test with ${bsize}k failed, aborting"
339                 fi
340
341                 local read_cmd="$base_cmd --bs=${bsize}k --rw=randread \
342                         $direct --file_service_type=random --randrepeat=1 \
343                          --norandommap --group_reporting=1 --loops=$loops"
344                 if [ "x$DP_STATS" != "xyes" ] ; then
345                         dp_run_cmd "$read_cmd | awk -F\; '{printf \"READ : \
346                                 BW %dKiB/sec, IOPS %d, lat (%d/%d/%d)usec\n\", \
347                                 \$7, \$8, \$12, \$16, \$40}'"
348                 else
349                         dp_run_cmd "$read_cmd"
350                 fi
351                 if [ ${PIPESTATUS[0]} != 0 ]; then
352                         error "FIO read test with ${bsize}k failed, aborting"
353                 fi
354         done
355         rm -rf $TDIR/*
356         return 0
357 }
358
359 run_compbench() {
360         if ! which compilebench > /dev/null 2>&1 ; then
361                 echo "Compilebench is not installed, skipping"
362                 return 0
363         fi
364
365         local TDIR=${1:-$MOUNT}
366
367         dp_run_cmd "compilebench -D $TDIR -i 2 -r 2 --makej"
368         if [ ${PIPESTATUS[0]} != 0 ]; then
369                 error "Compilebench failed, aborting"
370         fi
371
372         rm -rf $TDIR/*
373 }
374
375 dp_test_run() {
376         local test=$1
377         local facets=$(get_facets MDS)
378         local nodes=$(comma_list $(mdts_nodes))
379         local p="$TMP/$TESTSUITE-$TESTNAME.parameters"
380
381         save_lustre_params $facets "mdt.*.dom_lock" >> $p
382
383         printf "\n##### $test: DoM files\n"
384         do_nodes $nodes "lctl set_param -n mdt.*.dom_lock=1"
385         DP_OSC="mdc"
386         run_${test} $DP_DOM
387
388         if [ -d $DP_DOM_DNE ] ; then
389                 printf "\n##### $test: DoM files + DNE\n"
390                 DP_OSC="mdc"
391                 run_${test} $DP_DOM_DNE
392         fi
393
394         printf "\n##### $test: OST files\n"
395         DP_OSC="osc"
396         run_${test} $DP_NORM
397
398         restore_lustre_params < $p
399         rm -f $p
400 }
401
402 test_smallio() {
403         dp_test_run SmallIO
404 }
405 run_test smallio "Performance comparision: smallio"
406
407 test_mdtest() {
408         dp_test_run MDtest
409 }
410 run_test mdtest "Performance comparision: mdtest"
411
412 test_IOR() {
413         dp_test_run IOR
414 }
415 run_test IOR "Performance comparision: IOR"
416
417 test_dbench() {
418         dp_test_run Dbench
419 }
420 run_test dbench "Performance comparision: dbench"
421
422 test_fio() {
423         dp_test_run FIO
424 }
425 run_test fio "Performance comparision: FIO"
426
427 test_compbench() {
428         dp_test_run compbench
429 }
430 run_test compbench "Performance comparision: compilebench"
431
432 FAIL_ON_ERROR=$SAVED_FAIL_ON_ERROR
433 $LCTL set_param -n debug="$SAVED_DEBUG"
434
435 complete $SECONDS
436 check_and_cleanup_lustre
437 exit_status