3 # Simple function used by run_*.sh scripts
8 if [ -z "${!name}" ]; then
9 echo "$0: $name must be set"
13 [ $failed ] && exit 1 || true
16 # lrepl - Lustre test Read-Eval-Print Loop.
18 # This function implements a REPL for the Lustre test framework. It
19 # doesn't exec an actual shell because the user may want to inspect
20 # variables and use functions from the test framework.
27 This is an interactive read-eval-print loop interactive shell
28 simulation that you can use to debug failing tests. You can
29 enter most bash command lines (see notes below).
31 Use this REPL to inspect variables, set them, call test
32 framework shell functions, etcetera.
34 'exit' or EOF to exit this shell.
36 set \$retcode to 0 to cause the assertion failure that
37 triggered this REPL to be ignored.
40 do_facet ost1 lctl get_param ost.*.ost.threads_*
41 do_rpc_nodes \$OSTNODES unload_modules
44 All but the last line of multi-line statements or blocks
45 must end in a backslash.
47 "Here documents" are not supported.
49 History is not supported, but command-line editing is.
53 # Prompt escapes don't work in read -p, sadly.
54 prompt=":test_${testnum:-UNKNOWN}:$(uname -n):$(basename $PWD)% "
56 # We use read -r to get close to a shell experience
57 while read -e -r -p "$prompt" rawline; do
60 # Don't want to exit-exit, just exit the REPL
62 # We need to handle continuations, and read -r doesn't do
63 # that for us. Yet we need read -r.
65 # We also use case/esac to compare lines read to "*\\"
66 # because [ "$line" = *\\ ] and variants of that don't work.
68 while read -e -r -p '> ' rawline
70 line="$line"$'\n'"$rawline"
72 # We could check for here documents by matching
73 # against *<<*, but who cares.
86 # Finally! Time to eval.
90 echo $'\n\tExiting interactive shell...\n'
94 # lassert - Lustre test framework assert
96 # Arguments: failure code, failure message, expression/statement
98 # lassert evaluates the expression given, and, if false, calls
99 # error() to trigger test failure. If REPL_ON_LASSERT is true then
100 # lassert will call lrepl() to give the user an interactive shell.
101 # If the REPL sets retcode=0 then the assertion failure will be
108 echo "checking $* ($(eval echo \""$*"\"))..."
109 eval "$@" && return 0;
111 if ${REPL_ON_LASSERT:-false}; then
112 echo "Assertion $retcode failed: $* (expanded: $(eval echo \""$*"\"))
117 error "Assertion $retcode failed: $* (expanded: $(eval echo \""$*"\"))
122 # setmodopts- set module options for subsequent calls to load_modules
124 # Usage: setmodopts module_name new_value [var_in_which_to_save_old_value]
125 # setmodopts -a module_name new_value [var_in_which_to_save_old_value]
127 # In the second usage the new value is appended to the old.
131 if [ "$1" = -a ]; then
136 local _var=MODOPTS_$1
141 # Dynamic naming of variables is a pain in bash. In ksh93 we could
142 # write "nameref opts_var=${modname}_MODOPTS" then assign directly
143 # to opts_var. Associative arrays would also help, alternatively.
144 # Alas, we're stuck with eval until all distros move to a more recent
145 # version of bash. Fortunately we don't need to eval unset and export.
147 if [ -z "$_newvalue" ]; then
153 $_append && _newvalue="$_oldvalue $_newvalue"
154 export $_var="$_newvalue"
155 echo setmodopts: ${_var}=${_newvalue}
157 [ -n "$_savevar" ] && eval $_savevar=\""$_oldvalue"\"
160 echoerr () { echo "$@" 1>&2 ; }
163 echoerr "$(date +'%F %H:%M:%S'): client load was signaled to terminate"
165 local PGID=$(ps -eo "%c %p %r" | awk "/ $PPID / {print \$3}")
172 local mpirun="$MPIRUN $MPIRUN_OPTIONS"
173 local command="$mpirun $@"
174 local mpilog=$TMP/mpi.log
177 if [ -n "$MPI_USER" -a "$MPI_USER" != root -a -n "$mpirun" ]; then
178 echo "+ chmod 0777 $MOUNT"
180 command="su $MPI_USER sh -c \"$command \""
185 eval $command 2>&1 | tee $mpilog || true
188 if [ $rc -eq 0 ] && grep -q "p4_error:" $mpilog ; then
196 for i in ${1//,/ }; do
197 list="$list $i@$NETTYPE"
202 # FIXME: all setup/cleanup can be done without rpc.sh
205 [ x$1 = x--verbose ] && verbose=true
207 export LST_SESSION=`$LST show_session 2>/dev/null | awk -F " " '{print $5}'`
208 [ "$LST_SESSION" == "" ] && return
217 lst_session_cleanup_all () {
218 local list=$(comma_list $(nodes_list))
219 do_rpc_nodes $list lst_end_session
223 lsmod | grep -q lnet_selftest && \
224 rmmod lnet_selftest > /dev/null 2>&1 || true
228 local list=$(comma_list $(nodes_list))
230 # lst end_session needs to be executed only locally
231 # i.e. on node where lst new_session was called
232 lst_end_session --verbose
233 do_rpc_nodes $list lst_cleanup
237 load_module lnet_selftest
241 local list=$(comma_list $(nodes_list))
242 do_rpc_nodes $list lst_setup
248 # Passed a single argument, strips everything off following
249 # and includes the first period.
250 # client-20.lab.whamcloud.com becomes client-20
252 echo $(sed 's/\..*//' <<< $1)
262 echo "${var}=${!var}"
264 [ -e $MACHINEFILE ] && cat $MACHINEFILE
270 # 5 min * cbench_RUNS
274 # compile dir kernel-1 680MB
275 # required space 680MB * cbench_IDIRS = ~7 Gb
277 cbench_DIR=${cbench_DIR:-""}
278 cbench_IDIRS=${cbench_IDIRS:-4}
279 cbench_RUNS=${cbench_RUNS:-4}
281 print_opts cbench_DIR cbench_IDIRS cbench_RUNS
283 [ x$cbench_DIR = x ] &&
284 { skip_env "compilebench not found" && return; }
286 [ -e $cbench_DIR/compilebench ] || \
287 { skip_env "No compilebench build" && return; }
289 local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
290 if [ $space -le $((680 * 1024 * cbench_IDIRS)) ]; then
291 cbench_IDIRS=$(( space / 680 / 1024))
292 [ $cbench_IDIRS = 0 ] && \
293 skip_env "Need free space atleast 680 Mb, have $space" && return
295 log free space=$space, reducing initial dirs to $cbench_IDIRS
298 # t-f _base needs to be modifyed to set properly tdir
299 # for new "test_foo" functions names
300 # local testdir=$DIR/$tdir
301 local testdir=$DIR/d0.compilebench
306 local cmd="./compilebench -D $testdir -i $cbench_IDIRS \
307 -r $cbench_RUNS --makej"
316 [ $rc = 0 ] || error "compilebench failed: $rc"
322 METABENCH=${METABENCH:-$(which metabench 2> /dev/null || true)}
323 mbench_NFILES=${mbench_NFILES:-30400}
325 mbench_THREADS=${mbench_THREADS:-4}
327 [ x$METABENCH = x ] &&
328 { skip_env "metabench not found" && return; }
331 # Need space estimation here.
333 print_opts METABENCH clients mbench_NFILES mbench_THREADS
335 local testdir=$DIR/d0.metabench
337 # mpi_run uses mpiuser
340 # -C Run the file creation tests.
341 # -S Run the file stat tests.
342 # -c nfile Number of files to be used in each test.
343 # -k Cleanup. Remove the test directories.
344 local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S -k"
347 # find out if we need to use srun by checking $SRUN_PARTITION
348 if [ "$SRUN_PARTITION" ]; then
349 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
350 -n $((num_clients * mbench_THREADS)) -p $SRUN_PARTITION -- $cmd
352 mpi_run -np $((num_clients * $mbench_THREADS)) \
353 -machinefile ${MACHINEFILE} $cmd
357 if [ $rc != 0 ] ; then
358 error "metabench failed! $rc"
365 SIMUL=${SIMUL:=$(which simul 2> /dev/null || true)}
367 simul_THREADS=${simul_THREADS:-2}
368 simul_REP=${simul_REP:-20}
370 if [ "$NFSCLIENT" ]; then
371 skip "skipped for NFSCLIENT mode"
376 { skip_env "simul not found" && return; }
379 # Need space estimation here.
381 print_opts SIMUL clients simul_REP simul_THREADS
383 local testdir=$DIR/d0.simul
385 # mpi_run uses mpiuser
388 # -n # : repeat each test # times
389 # -N # : repeat the entire set of tests # times
391 local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP"
394 # find out if we need to use srun by checking $SRUN_PARTITION
395 if [ "$SRUN_PARTITION" ]; then
396 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
397 -n $((num_clients * simul_THREADS)) -p $SRUN_PARTITION -- $cmd
399 mpi_run -np $((num_clients * simul_THREADS)) \
400 -machinefile ${MACHINEFILE} $cmd
404 if [ $rc != 0 ] ; then
405 error "simul failed! $rc"
412 MDTEST=${MDTEST:=$(which mdtest 2> /dev/null || true)}
414 mdtest_THREADS=${mdtest_THREADS:-2}
415 mdtest_nFiles=${mdtest_nFiles:-"100000"}
416 # We devide the files by number of core
417 mdtest_nFiles=$((mdtest_nFiles/mdtest_THREADS/num_clients))
418 mdtest_iteration=${mdtest_iteration:-1}
420 local type=${1:-"ssf"}
422 if [ "$NFSCLIENT" ]; then
423 skip "skipped for NFSCLIENT mode"
428 { skip_env "mdtest not found" && return; }
431 # Need space estimation here.
433 print_opts MDTEST mdtest_iteration mdtest_THREADS mdtest_nFiles
435 local testdir=$DIR/d0.mdtest
437 # mpi_run uses mpiuser
440 # -i # : repeat each test # times
442 # -n # : number of file/dir to create/stat/remove
443 # -u : each process create/stat/remove individually
445 local cmd="$MDTEST -d $testdir -i $mdtest_iteration -n $mdtest_nFiles"
446 [ $type = "fpp" ] && cmd="$cmd -u"
449 # find out if we need to use srun by checking $SRUN_PARTITION
450 if [ "$SRUN_PARTITION" ]; then
451 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
452 -n $((num_clients * mdtest_THREADS)) -p $SRUN_PARTITION -- $cmd
454 mpi_run -np $((num_clients * mdtest_THREADS)) \
455 -machinefile ${MACHINEFILE} $cmd
459 if [ $rc != 0 ] ; then
460 error "mdtest failed! $rc"
467 cnt_DIR=${cnt_DIR:-""}
468 cnt_NRUN=${cnt_NRUN:-10}
470 print_opts cnt_DIR cnt_NRUN
473 { skip_env "connectathon dir not found" && return; }
475 [ -e $cnt_DIR/runtests ] || \
476 { skip_env "No connectathon runtests found" && return; }
478 local testdir=$DIR/d0.connectathon
485 # cthon options (must be in this order)
487 # -N numpasses - will be passed to the runtests script. This argument
488 # is optional. It specifies the number of times to run
491 # One of these test types
496 # -a all of the above
498 # -f a quick functionality test
502 # Include lock tests unless we're running on nfsv4
503 local fstype=$(df -TP $testdir | awk 'NR==2 {print $2}')
504 echo "$testdir: $fstype"
505 if [[ $fstype != "nfs4" ]]; then
509 for test in $tests; do
510 local cmd="./runtests -N $cnt_NRUN $test -f $testdir"
516 [ $rc = 0 ] || error "connectathon failed: $rc"
524 local type=${1:="ssf"}
526 IOR=${IOR:-$(which IOR 2> /dev/null || true)}
528 ior_THREADS=${ior_THREADS:-2}
529 ior_iteration=${ior_iteration:-1}
530 ior_blockSize=${ior_blockSize:-6} # GB
531 ior_xferSize=${ior_xferSize:-2m}
532 ior_type=${ior_type:-POSIX}
533 ior_DURATION=${ior_DURATION:-30} # minutes
536 { skip_env "IOR not found" && return; }
538 local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
539 local total_threads=$(( num_clients * ior_THREADS ))
540 echo "+ $ior_blockSize * 1024 * 1024 * $total_threads "
541 if [ $((space / 2)) -le \
542 $(( ior_blockSize * 1024 * 1024 * total_threads)) ]; then
543 echo "+ $space * 9/10 / 1024 / 1024 / $num_clients / $ior_THREADS"
544 ior_blockSize=$(( space /2 /1024 /1024 / num_clients / ior_THREADS ))
545 [ $ior_blockSize = 0 ] && \
546 skip_env "Need free space more than $((2 * total_threads))GB: \
547 $((total_threads *1024 *1024*2)), have $space" && return
549 local reduced_size="$num_clients x $ior_THREADS x $ior_blockSize"
550 echo "free space=$space, Need: $reduced_size GB"
551 echo "(blockSize reduced to $ior_blockSize Gb)"
554 print_opts IOR ior_THREADS ior_DURATION MACHINEFILE
556 local testdir=$DIR/d0.ior.$type
558 # mpi_run uses mpiuser
560 if [ "$NFSCLIENT" ]; then
561 setstripe_nfsserver $testdir -c -1 ||
562 { error "setstripe on nfsserver failed" && return 1; }
564 $LFS setstripe $testdir -c -1 ||
565 { error "setstripe failed" && return 2; }
569 # contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)"
571 # -t N transferSize -- size of transfer in bytes (e.g.: 8, 4k, 2m, 1g)"
572 # -w writeFile -- write file"
573 # -r readFile -- read existing file"
574 # -T maxTimeDuration -- max time in minutes to run tests"
575 # -k keepFile -- keep testFile(s) on program exit
577 local cmd="$IOR -a $ior_type -b ${ior_blockSize}g -o $testdir/iorData \
578 -t $ior_xferSize -v -w -r -i $ior_iteration -T $ior_DURATION -k"
579 [ $type = "fpp" ] && cmd="$cmd -F"
582 # find out if we need to use srun by checking $SRUN_PARTITION
583 if [ "$SRUN_PARTITION" ]; then
584 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
585 -n $((num_clients * ior_THREADS)) -p $SRUN_PARTITION -- $cmd
587 mpi_run -np $((num_clients * $ior_THREADS)) \
588 -machinefile ${MACHINEFILE} $cmd
592 if [ $rc != 0 ] ; then
593 error "ior failed! $rc"
600 MIB=${MIB:=$(which mib 2> /dev/null || true)}
602 mib_THREADS=${mib_THREADS:-2}
603 mib_xferSize=${mib_xferSize:-1m}
604 mib_xferLimit=${mib_xferLimit:-5000}
605 mib_timeLimit=${mib_timeLimit:-300}
607 if [ "$NFSCLIENT" ]; then
608 skip "skipped for NFSCLIENT mode"
613 { skip_env "MIB not found" && return; }
615 print_opts MIB mib_THREADS mib_xferSize mib_xferLimit mib_timeLimit \
618 local testdir=$DIR/d0.mib
620 # mpi_run uses mpiuser
622 $LFS setstripe $testdir -c -1 ||
623 { error "setstripe failed" && return 2; }
625 # -I Show intermediate values in output
626 # -H Show headers in output
627 # -L Do not issue new system calls after this many seconds
628 # -s Use system calls of this size
630 # -l Issue no more than this many system calls
631 local cmd="$MIB -t $testdir -s $mib_xferSize -l $mib_xferLimit \
632 -L $mib_timeLimit -HI -p mib.$(date +%Y%m%d%H%M%S)"
635 # find out if we need to use srun by checking $SRUN_PARTITION
636 if [ "$SRUN_PARTITION" ]; then
637 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
638 -n $((num_clients * mib_THREADS)) -p $SRUN_PARTITION -- $cmd
640 mpi_run -np $((num_clients * mib_THREADS)) \
641 -machinefile ${MACHINEFILE} $cmd
645 if [ $rc != 0 ] ; then
646 error "mib failed! $rc"
653 CASC_RW=${CASC_RW:-$(which cascading_rw 2> /dev/null || true)}
655 casc_THREADS=${casc_THREADS:-2}
656 casc_REP=${casc_REP:-300}
658 if [ "$NFSCLIENT" ]; then
659 skip "skipped for NFSCLIENT mode"
664 { skip_env "cascading_rw not found" && return; }
667 # Need space estimation here.
669 print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE
671 local testdir=$DIR/d0.cascading_rw
673 # mpi_run uses mpiuser
677 # -n: repeat test # times
679 local cmd="$CASC_RW -g -d $testdir -n $casc_REP"
682 mpi_run -np $((num_clients * $casc_THREADS)) \
683 -machinefile ${MACHINEFILE} $cmd
686 if [ $rc != 0 ] ; then
687 error "cascading_rw failed! $rc"
692 run_write_append_truncate() {
695 write_THREADS=${write_THREADS:-8}
696 write_REP=${write_REP:-10000}
698 if [ "$NFSCLIENT" ]; then
699 skip "skipped for NFSCLIENT mode"
703 # location is lustre/tests dir
704 if ! which write_append_truncate > /dev/null 2>&1 ; then
705 skip_env "write_append_truncate not found"
710 # Need space estimation here.
712 local testdir=$DIR/d0.write_append_truncate
713 local file=$testdir/f0.wat
715 print_opts clients write_REP write_THREADS MACHINEFILE
718 # mpi_run uses mpiuser
721 local cmd="write_append_truncate -n $write_REP $file"
724 mpi_run -np $((num_clients * $write_THREADS)) \
725 -machinefile ${MACHINEFILE} $cmd
728 if [ $rc != 0 ] ; then
729 error "write_append_truncate failed! $rc"
735 run_write_disjoint() {
737 WRITE_DISJOINT=${WRITE_DISJOINT:-\
738 $(which write_disjoint 2> /dev/null || true)}
740 wdisjoint_THREADS=${wdisjoint_THREADS:-4}
741 wdisjoint_REP=${wdisjoint_REP:-10000}
743 if [ "$NFSCLIENT" ]; then
744 skip "skipped for NFSCLIENT mode"
748 [ x$WRITE_DISJOINT = x ] &&
749 { skip_env "write_disjoint not found" && return; }
752 # Need space estimation here.
754 print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP \
756 local testdir=$DIR/d0.write_disjoint
758 # mpi_run uses mpiuser
761 local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP"
764 mpi_run -np $((num_clients * $wdisjoint_THREADS)) \
765 -machinefile ${MACHINEFILE} $cmd
768 if [ $rc != 0 ] ; then
769 error "write_disjoint failed! $rc"
774 run_parallel_grouplock() {
776 PARALLEL_GROUPLOCK=${PARALLEL_GROUPLOCK:-\
777 $(which parallel_grouplock 2> /dev/null || true)}
778 parallel_grouplock_MINTASKS=${parallel_grouplock_MINTASKS:-5}
780 if [ "$NFSCLIENT" ]; then
781 skip "skipped for NFSCLIENT mode"
785 [ x$PARALLEL_GROUPLOCK = x ] &&
786 { skip "PARALLEL_GROUPLOCK not found" && return; }
788 print_opts clients parallel_grouplock_MINTASKS MACHINEFILE
790 local testdir=$DIR/d0.parallel_grouplock
792 # mpi_run uses mpiuser
795 do_nodes $clients "lctl set_param llite.*.max_rw_chunk=0" ||
796 error "set_param max_rw_chunk=0 failed "
801 for i in $(seq 12); do
803 local cmd="$PARALLEL_GROUPLOCK -g -v -d $testdir $subtest"
806 mpi_run -np $parallel_grouplock_MINTASKS \
807 -machinefile ${MACHINEFILE} $cmd
809 if [ $rc != 0 ] ; then
810 error_noexit "parallel_grouplock subtests $subtest failed! $rc"
812 echo "parallel_grouplock subtests $subtest PASS"
814 let status=$((status + rc))
815 # clear debug to collect one log per one test
816 do_nodes $(comma_list $(nodes_list)) lctl clear
818 [ $status -eq 0 ] || error "parallel_grouplock status: $status"
822 cleanup_statahead () {
829 for i in $(seq 0 $num_mntpts);do
830 zconf_umount_clients $clients ${mntpt_root}$i ||
831 error_exit "Failed to umount lustre on ${mntpt_root}$i"
837 statahead_NUMMNTPTS=${statahead_NUMMNTPTS:-5}
838 statahead_NUMFILES=${statahead_NUMFILES:-500000}
840 if [[ -n $NFSCLIENT ]]; then
841 skip "Statahead testing is not supported on NFS clients."
846 { skip_env "mdsrate not found" && return; }
848 print_opts MDSRATE clients statahead_NUMMNTPTS statahead_NUMFILES
852 # do not use default "d[0-9]*" dir name
853 # to avoid of rm $statahead_NUMFILES (500k) files in t-f cleanup
855 local testdir=$DIR/$dir
857 # cleanup only if dir exists
858 # cleanup only $statahead_NUMFILES number of files
859 # ignore the other files created by someone else
861 mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
862 $statahead_NUMFILES $testdir 'f%%d' --ignore
865 # mpi_run uses mpiuser
868 local num_files=$statahead_NUMFILES
870 local IFree=$(inodes_available)
871 if [ $IFree -lt $num_files ]; then
877 local cmd1="${MDSRATE} ${MDSRATE_DEBUG} --mknod --dir $testdir"
878 local cmd2="--nfiles $num_files --filefmt 'f%%d'"
879 local cmd="$cmd1 $cmd2"
882 mpi_run -np $((num_clients * 32)) -machinefile ${MACHINEFILE} $cmd
885 if [ $rc != 0 ] ; then
886 error "mdsrate failed to create $rc"
890 local num_mntpts=$statahead_NUMMNTPTS
891 local mntpt_root=$TMP/mntpt/lustre
892 local mntopts=${MNTOPTSTATAHEAD:-$MOUNTOPT}
894 echo "Mounting $num_mntpts lustre clients starts on $clients"
895 trap "cleanup_statahead $clients $mntpt_root $num_mntpts" EXIT ERR
896 for i in $(seq 0 $num_mntpts); do
897 zconf_mount_clients $clients ${mntpt_root}$i "$mntopts" ||
898 error_exit "Failed to mount lustre on ${mntpt_root}$i on $clients"
901 do_rpc_nodes $clients cancel_lru_locks mdc
903 do_rpc_nodes $clients do_ls $mntpt_root $num_mntpts $dir
905 mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
906 $num_files $testdir 'f%%d' --ignore
908 # use rm instead of rmdir because of
909 # testdir could contain the files created by someone else,
910 # or by previous run where is num_files prev > num_files current
912 cleanup_statahead $clients $mntpt_root $num_mntpts