5 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
6 . $LUSTRE/tests/test-framework.sh
8 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
12 ALWAYS_EXCEPT="parallel_grouplock $PARALLEL_SCALE_EXCEPT"
16 MACHINEFILE=${MACHINEFILE:-$TMP/$(basename $0 .sh).machines}
17 clients=${CLIENTS:-$HOSTNAME}
18 generate_machine_file $clients $MACHINEFILE || error "Failed to generate machine file"
19 num_clients=$(get_node_count ${clients//,/ })
24 cbench_DIR=${cbench_DIR:-""}
25 cbench_IDIRS=${cbench_IDIRS:-4}
26 cbench_RUNS=${cbench_RUNS:-4} # FIXME: wiki page requirements is 30, do we really need 30 ?
28 if [ "$SLOW" = "no" ]; then
36 METABENCH=${METABENCH:-$(which metabench 2> /dev/null || true)}
37 mbench_NFILES=${mbench_NFILES:-30400}
38 [ "$SLOW" = "no" ] && mbench_NFILES=10000
40 mbench_THREADS=${mbench_THREADS:-4}
45 SIMUL=${SIMUL:=$(which simul 2> /dev/null || true)}
47 simul_THREADS=${simul_THREADS:-2}
48 simul_REP=${simul_REP:-20}
49 [ "$SLOW" = "no" ] && simul_REP=2
54 MIB=${MIB:=$(which mib 2> /dev/null || true)}
56 mib_THREADS=${mib_THREADS:-2}
57 mib_xferSize=${mib_xferSize:-1m}
58 mib_xferLimit=${mib_xferLimit:-5000}
59 mib_timeLimit=${mib_timeLimit:-300}
64 MDTEST=${MDTEST:=$(which mdtest 2> /dev/null || true)}
66 mdtest_THREADS=${mdtest_THREADS:-2}
67 mdtest_nFiles=${mdtest_nFiles:-"100000"}
68 # We devide the files by number of core
69 mdtest_nFiles=$((mdtest_nFiles/mdtest_THREADS/num_clients))
70 mdtest_iteration=${mdtest_iteration:-1}
75 cnt_DIR=${cnt_DIR:-""}
76 cnt_NRUN=${cnt_NRUN:-10}
77 [ "$SLOW" = "no" ] && cnt_NRUN=2
82 CASC_RW=${CASC_RW:-$(which cascading_rw 2> /dev/null || true)}
84 casc_THREADS=${casc_THREADS:-2}
85 casc_REP=${casc_REP:-300}
86 [ "$SLOW" = "no" ] && casc_REP=10
91 IOR=${IOR:-$(which IOR 2> /dev/null || true)}
93 ior_THREADS=${ior_THREADS:-2}
94 ior_iteration=${ior_iteration:-1}
95 ior_blockSize=${ior_blockSize:-6} # Gb
96 ior_xferSize=${ior_xferSize:-2m}
97 ior_type=${ior_type:-POSIX}
98 ior_DURATION=${ior_DURATION:-30} # minutes
99 [ "$SLOW" = "no" ] && ior_DURATION=5
102 # write_append_truncate
105 write_THREADS=${write_THREADS:-8}
106 write_REP=${write_REP:-10000}
107 [ "$SLOW" = "no" ] && write_REP=100
112 WRITE_DISJOINT=${WRITE_DISJOINT:-$(which write_disjoint 2> /dev/null || true)}
114 wdisjoint_THREADS=${wdisjoint_THREADS:-4}
115 wdisjoint_REP=${wdisjoint_REP:-10000}
116 [ "$SLOW" = "no" ] && wdisjoint_REP=100
122 PARALLEL_GROUPLOCK=${PARALLEL_GROUPLOCK:-$(which parallel_grouplock 2> /dev/null || true)}
123 parallel_grouplock_MINTASKS=${parallel_grouplock_MINTASKS:-5}
126 check_and_setup_lustre
128 get_mpiuser_id $MPI_USER
129 MPI_RUNAS=${MPI_RUNAS:-"runas -u $MPI_USER_UID -g $MPI_USER_GID"}
130 $GSS_KRB5 && refresh_krb5_tgt $MPI_USER_UID $MPI_USER_GID $MPI_RUNAS
140 echo "${var}=${!var}"
142 [ -e $MACHINEFILE ] && cat $MACHINEFILE
146 # 5 min * cbench_RUNS
150 # compile dir kernel-1 680MB
151 # required space 680MB * cbench_IDIRS = ~7 Gb
153 test_compilebench() {
154 print_opts cbench_DIR cbench_IDIRS cbench_RUNS
156 [ x$cbench_DIR = x ] &&
157 { skip_env "compilebench not found" && return; }
159 [ -e $cbench_DIR/compilebench ] || \
160 { skip_env "No compilebench build" && return; }
162 local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
163 if [ $space -le $((680 * 1024 * cbench_IDIRS)) ]; then
164 cbench_IDIRS=$(( space / 680 / 1024))
165 [ $cbench_IDIRS = 0 ] && \
166 skip_env "Need free space atleast 680 Mb, have $space" && return
168 log free space=$space, reducing initial dirs to $cbench_IDIRS
171 # t-f _base needs to be modifyed to set properly tdir
172 # for new "test_foo" functions names
173 # local testdir=$DIR/$tdir
174 local testdir=$DIR/d0.compilebench
179 local cmd="./compilebench -D $testdir -i $cbench_IDIRS -r $cbench_RUNS --makej"
188 [ $rc = 0 ] || error "compilebench failed: $rc"
191 run_test compilebench "compilebench"
194 [ x$METABENCH = x ] &&
195 { skip_env "metabench not found" && return; }
198 # Need space estimation here.
200 print_opts METABENCH clients mbench_NFILES mbench_THREADS
202 local testdir=$DIR/d0.metabench
204 # mpi_run uses mpiuser
207 # -C Run the file creation tests.
208 # -S Run the file stat tests.
209 # -c nfile Number of files to be used in each test.
210 # -k Cleanup. Remove the test directories.
211 local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S -k"
214 # find out if we need to use srun by checking $SRUN_PARTITION
215 if [ "$SRUN_PARTITION" ]; then
216 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
217 -n $((num_clients * mbench_THREADS)) -p $SRUN_PARTITION -- $cmd
219 mpi_run -np $((num_clients * $mbench_THREADS)) -machinefile ${MACHINEFILE} $cmd
223 if [ $rc != 0 ] ; then
224 error "metabench failed! $rc"
228 run_test metabench "metabench"
231 if [ "$NFSCLIENT" ]; then
232 skip "skipped for NFSCLIENT mode"
237 { skip_env "simul not found" && return; }
240 # Need space estimation here.
242 print_opts SIMUL clients simul_REP simul_THREADS
244 local testdir=$DIR/d0.simul
246 # mpi_run uses mpiuser
249 # -n # : repeat each test # times
250 # -N # : repeat the entire set of tests # times
252 local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP"
255 # find out if we need to use srun by checking $SRUN_PARTITION
256 if [ "$SRUN_PARTITION" ]; then
257 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
258 -n $((num_clients * simul_THREADS)) -p $SRUN_PARTITION -- $cmd
260 mpi_run -np $((num_clients * simul_THREADS)) -machinefile ${MACHINEFILE} $cmd
264 if [ $rc != 0 ] ; then
265 error "simul failed! $rc"
269 run_test simul "simul"
272 local type=${1:-"ssf"}
274 if [ "$NFSCLIENT" ]; then
275 skip "skipped for NFSCLIENT mode"
280 { skip_env "mdtest not found" && return; }
283 # Need space estimation here.
285 print_opts MDTEST mdtest_iteration mdtest_THREADS mdtest_nFiles
287 local testdir=$DIR/d0.mdtest
289 # mpi_run uses mpiuser
292 # -i # : repeat each test # times
294 # -n # : number of file/dir to create/stat/remove
295 # -u : each process create/stat/remove individually
297 local cmd="$MDTEST -d $testdir -i $mdtest_iteration -n $mdtest_nFiles"
298 [ $type = "fpp" ] && cmd="$cmd -u"
301 # find out if we need to use srun by checking $SRUN_PARTITION
302 if [ "$SRUN_PARTITION" ]; then
303 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
304 -n $((num_clients * mdtest_THREADS)) -p $SRUN_PARTITION -- $cmd
306 mpi_run -np $((num_clients * mdtest_THREADS)) -machinefile ${MACHINEFILE} $cmd
310 if [ $rc != 0 ] ; then
311 error "mdtest failed! $rc"
319 run_test mdtestssf "mdtestssf"
324 run_test mdtestfpp "mdtestfpp"
326 test_connectathon() {
327 print_opts cnt_DIR cnt_NRUN
330 { skip_env "connectathon dir not found" && return; }
332 [ -e $cnt_DIR/runtests ] || \
333 { skip_env "No connectathon runtests found" && return; }
335 local testdir=$DIR/d0.connectathon
342 # cthon options (must be in this order)
344 # -N numpasses - will be passed to the runtests script. This argument
345 # is optional. It specifies the number of times to run
348 # One of these test types
353 # -a all of the above
355 # -f a quick functionality test
359 # Include lock tests unless we're running on nfsv4
360 local fstype=$(df -TP $testdir | awk 'NR==2 {print $2}')
361 echo "$testdir: $fstype"
362 if [[ $fstype != "nfs4" ]]; then
366 for test in $tests; do
367 local cmd="./runtests -N $cnt_NRUN $test -f $testdir"
373 [ $rc = 0 ] || error "connectathon failed: $rc"
379 run_test connectathon "connectathon"
382 local type=${1:="ssf"}
385 { skip_env "IOR not found" && return; }
387 local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
388 echo "+ $ior_blockSize * 1024 * 1024 * $num_clients * $ior_THREADS "
389 if [ $((space / 2)) -le $(( ior_blockSize * 1024 * 1024 * num_clients * ior_THREADS)) ]; then
390 echo "+ $space * 9/10 / 1024 / 1024 / $num_clients / $ior_THREADS"
391 ior_blockSize=$(( space /2 /1024 /1024 / num_clients / ior_THREADS ))
392 [ $ior_blockSize = 0 ] && \
393 skip_env "Need free space more than ($num_clients * $ior_THREADS )Gb: $((num_clients*ior_THREADS *1024 *1024*2)), have $space" && return
395 echo "free space=$space, Need: $num_clients x $ior_THREADS x $ior_blockSize Gb (blockSize reduced to $ior_blockSize Gb)"
398 print_opts IOR ior_THREADS ior_DURATION MACHINEFILE
400 local testdir=$DIR/d0.ior
402 # mpi_run uses mpiuser
404 if [ "$NFSCLIENT" ]; then
405 setstripe_nfsserver $testdir -c -1 ||
406 { error "setstripe on nfsserver failed" && return 1; }
408 $LFS setstripe $testdir -c -1 ||
409 { error "setstripe failed" && return 2; }
412 # -b N blockSize -- contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)"
414 # -t N transferSize -- size of transfer in bytes (e.g.: 8, 4k, 2m, 1g)"
415 # -w writeFile -- write file"
416 # -r readFile -- read existing file"
417 # -T maxTimeDuration -- max time in minutes to run tests"
418 # -k keepFile -- keep testFile(s) on program exit
420 local cmd="$IOR -a $ior_type -b ${ior_blockSize}g -o $testdir/iorData -t $ior_xferSize -v -w -r -i $ior_iteration -T $ior_DURATION -k"
421 [ $type = "fpp" ] && cmd="$cmd -F"
424 # find out if we need to use srun by checking $SRUN_PARTITION
425 if [ "$SRUN_PARTITION" ]; then
426 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
427 -n $((num_clients * ior_THREADS)) -p $SRUN_PARTITION -- $cmd
429 mpi_run -np $((num_clients * $ior_THREADS)) -machinefile ${MACHINEFILE} $cmd
433 if [ $rc != 0 ] ; then
434 error "ior failed! $rc"
442 run_test iorssf "iorssf"
447 run_test iorfpp "iorfpp"
450 if [ "$NFSCLIENT" ]; then
451 skip "skipped for NFSCLIENT mode"
456 { skip_env "MIB not found" && return; }
458 print_opts MIB mib_THREADS mib_xferSize mib_xferLimit mib_timeLimit MACHINEFILE
460 local testdir=$DIR/d0.mib
462 # mpi_run uses mpiuser
464 $LFS setstripe $testdir -c -1 ||
465 { error "setstripe failed" && return 2; }
467 # -I Show intermediate values in output
468 # -H Show headers in output
469 # -L Do not issue new system calls after this many seconds
470 # -s Use system calls of this size
472 # -l Issue no more than this many system calls
473 local cmd="$MIB -t $testdir -s $mib_xferSize -l $mib_xferLimit -L $mib_timeLimit -HI -p mib.$(date +%Y%m%d%H%M%S)"
476 # find out if we need to use srun by checking $SRUN_PARTITION
477 if [ "$SRUN_PARTITION" ]; then
478 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
479 -n $((num_clients * mib_THREADS)) -p $SRUN_PARTITION -- $cmd
481 mpi_run -np $((num_clients * mib_THREADS)) -machinefile ${MACHINEFILE} $cmd
485 if [ $rc != 0 ] ; then
486 error "mib failed! $rc"
492 test_cascading_rw() {
493 if [ "$NFSCLIENT" ]; then
494 skip "skipped for NFSCLIENT mode"
499 { skip_env "cascading_rw not found" && return; }
502 # Need space estimation here.
504 print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE
506 local testdir=$DIR/d0.cascading_rw
508 # mpi_run uses mpiuser
512 # -n: repeat test # times
514 local cmd="$CASC_RW -g -d $testdir -n $casc_REP"
517 mpi_run -np $((num_clients * $casc_THREADS)) -machinefile ${MACHINEFILE} $cmd
520 if [ $rc != 0 ] ; then
521 error "cascading_rw failed! $rc"
525 run_test cascading_rw "cascading_rw"
527 test_write_append_truncate() {
528 if [ "$NFSCLIENT" ]; then
529 skip "skipped for NFSCLIENT mode"
533 # location is lustre/tests dir
534 if ! which write_append_truncate > /dev/null 2>&1 ; then
535 skip_env "write_append_truncate not found"
540 # Need space estimation here.
542 local testdir=$DIR/d0.write_append_truncate
543 local file=$testdir/f0.wat
545 print_opts clients write_REP write_THREADS MACHINEFILE
548 # mpi_run uses mpiuser
551 local cmd="write_append_truncate -n $write_REP $file"
554 mpi_run -np $((num_clients * $write_THREADS)) -machinefile ${MACHINEFILE} $cmd
557 if [ $rc != 0 ] ; then
558 error "write_append_truncate failed! $rc"
563 run_test write_append_truncate "write_append_truncate"
565 test_write_disjoint() {
566 if [ "$NFSCLIENT" ]; then
567 skip "skipped for NFSCLIENT mode"
571 [ x$WRITE_DISJOINT = x ] &&
572 { skip_env "write_disjoint not found" && return; }
575 # Need space estimation here.
577 print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP MACHINEFILE
578 local testdir=$DIR/d0.write_disjoint
580 # mpi_run uses mpiuser
583 local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP"
586 mpi_run -np $((num_clients * $wdisjoint_THREADS)) -machinefile ${MACHINEFILE} $cmd
589 if [ $rc != 0 ] ; then
590 error "write_disjoint failed! $rc"
594 run_test write_disjoint "write_disjoint"
596 test_parallel_grouplock() {
597 if [ "$NFSCLIENT" ]; then
598 skip "skipped for NFSCLIENT mode"
602 [ x$PARALLEL_GROUPLOCK = x ] &&
603 { skip "PARALLEL_GROUPLOCK not found" && return; }
605 print_opts clients parallel_grouplock_MINTASKS MACHINEFILE
607 local testdir=$DIR/d0.parallel_grouplock
609 # mpi_run uses mpiuser
612 do_nodes $clients "lctl set_param llite.*.max_rw_chunk=0" ||
613 error "set_param max_rw_chunk=0 failed "
618 for i in $(seq 12); do
620 local cmd="$PARALLEL_GROUPLOCK -g -v -d $testdir $subtest"
623 mpi_run -np $parallel_grouplock_MINTASKS -machinefile ${MACHINEFILE} $cmd
625 if [ $rc != 0 ] ; then
626 error_noexit "parallel_grouplock subtests $subtest failed! $rc"
628 echo "parallel_grouplock subtests $subtest PASS"
630 let status=$((status + rc))
631 # clear debug to collect one log per one test
632 do_nodes $(comma_list $(nodes_list)) lctl clear
634 [ $status -eq 0 ] || error "parallel_grouplock status: $status"
637 run_test parallel_grouplock "parallel_grouplock"
639 statahead_NUMMNTPTS=${statahead_NUMMNTPTS:-5}
640 statahead_NUMFILES=${statahead_NUMFILES:-500000}
642 cleanup_statahead () {
649 for i in $(seq 0 $num_mntpts);do
650 zconf_umount_clients $clients ${mntpt_root}$i ||
651 error_exit "Failed to umount lustre on ${mntpt_root}$i"
656 if [[ -n $NFSCLIENT ]]; then
657 skip "Statahead testing is not supported on NFS clients."
662 { skip_env "mdsrate not found" && return; }
664 print_opts MDSRATE clients statahead_NUMMNTPTS statahead_NUMFILES
668 # do not use default "d[0-9]*" dir name
669 # to avoid of rm $statahead_NUMFILES (500k) files in t-f cleanup
671 local testdir=$DIR/$dir
673 # cleanup only if dir exists
674 # cleanup only $statahead_NUMFILES number of files
675 # ignore the other files created by someone else
677 mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE $statahead_NUMFILES $testdir 'f%%d' --ignore
680 # mpi_run uses mpiuser
683 local num_files=$statahead_NUMFILES
685 local IFree=$(inodes_available)
686 if [ $IFree -lt $num_files ]; then
692 local cmd="${MDSRATE} ${MDSRATE_DEBUG} --mknod --dir $testdir --nfiles $num_files --filefmt 'f%%d'"
695 mpi_run -np $((num_clients * 32)) -machinefile ${MACHINEFILE} $cmd
698 if [ $rc != 0 ] ; then
699 error "mdsrate failed to create $rc"
703 local num_mntpts=$statahead_NUMMNTPTS
704 local mntpt_root=$TMP/mntpt/lustre
705 local mntopts=${MNTOPTSTATAHEAD:-$MOUNTOPT}
707 echo "Mounting $num_mntpts lustre clients starts on $clients"
708 trap "cleanup_statahead $clients $mntpt_root $num_mntpts" EXIT ERR
709 for i in $(seq 0 $num_mntpts); do
710 zconf_mount_clients $clients ${mntpt_root}$i "$mntopts" ||
711 error_exit "Failed to mount lustre on ${mntpt_root}$i on $clients"
714 do_rpc_nodes $clients cancel_lru_locks mdc
716 do_rpc_nodes $clients do_ls $mntpt_root $num_mntpts $dir
718 mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE $num_files $testdir 'f%%d' --ignore
720 # use rm instead of rmdir because of
721 # testdir could contain the files created by someone else,
722 # or by previous run where is num_files prev > num_files current
724 cleanup_statahead $clients $mntpt_root $num_mntpts
727 run_test statahead "statahead test, multiple clients"
729 complete $(basename $0) $SECONDS
730 check_and_cleanup_lustre