2 # -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*-
3 # vim:shiftwidth=4:softtabstop=4:tabstop=4:
5 # Simple function used by run_*.sh scripts
10 if [ -z "${!name}" ]; then
11 echo "$0: $name must be set"
15 [ $failed ] && exit 1 || true
18 # lrepl - Lustre test Read-Eval-Print Loop.
20 # This function implements a REPL for the Lustre test framework. It
21 # doesn't exec an actual shell because the user may want to inspect
22 # variables and use functions from the test framework.
29 This is an interactive read-eval-print loop interactive shell
30 simulation that you can use to debug failing tests. You can
31 enter most bash command lines (see notes below).
33 Use this REPL to inspect variables, set them, call test
34 framework shell functions, etcetera.
36 'exit' or EOF to exit this shell.
38 set \$retcode to 0 to cause the assertion failure that
39 triggered this REPL to be ignored.
42 do_facet ost1 lctl get_param ost.*.ost.threads_*
43 do_rpc_nodes \$OSTNODES unload_modules
46 All but the last line of multi-line statements or blocks
47 must end in a backslash.
49 "Here documents" are not supported.
51 History is not supported, but command-line editing is.
55 # Prompt escapes don't work in read -p, sadly.
56 prompt=":test_${testnum:-UNKNOWN}:$(uname -n):$(basename $PWD)% "
58 # We use read -r to get close to a shell experience
59 while read -e -r -p "$prompt" rawline; do
62 # Don't want to exit-exit, just exit the REPL
64 # We need to handle continuations, and read -r doesn't do
65 # that for us. Yet we need read -r.
67 # We also use case/esac to compare lines read to "*\\"
68 # because [ "$line" = *\\ ] and variants of that don't work.
70 while read -e -r -p '> ' rawline
72 line="$line"$'\n'"$rawline"
74 # We could check for here documents by matching
75 # against *<<*, but who cares.
88 # Finally! Time to eval.
92 echo $'\n\tExiting interactive shell...\n'
96 # lassert - Lustre test framework assert
98 # Arguments: failure code, failure message, expression/statement
100 # lassert evaluates the expression given, and, if false, calls
101 # error() to trigger test failure. If REPL_ON_LASSERT is true then
102 # lassert will call lrepl() to give the user an interactive shell.
103 # If the REPL sets retcode=0 then the assertion failure will be
110 echo "checking $* ($(eval echo \""$*"\"))..."
111 eval "$@" && return 0;
113 if ${REPL_ON_LASSERT:-false}; then
114 echo "Assertion $retcode failed: $* (expanded: $(eval echo \""$*"\"))
119 error "Assertion $retcode failed: $* (expanded: $(eval echo \""$*"\"))
124 # setmodopts- set module options for subsequent calls to load_modules
126 # Usage: setmodopts module_name new_value [var_in_which_to_save_old_value]
127 # setmodopts -a module_name new_value [var_in_which_to_save_old_value]
129 # In the second usage the new value is appended to the old.
133 if [ "$1" = -a ]; then
138 local _var=MODOPTS_$1
143 # Dynamic naming of variables is a pain in bash. In ksh93 we could
144 # write "nameref opts_var=${modname}_MODOPTS" then assign directly
145 # to opts_var. Associative arrays would also help, alternatively.
146 # Alas, we're stuck with eval until all distros move to a more recent
147 # version of bash. Fortunately we don't need to eval unset and export.
149 if [ -z "$_newvalue" ]; then
155 $_append && _newvalue="$_oldvalue $_newvalue"
156 export $_var="$_newvalue"
157 echo setmodopts: ${_var}=${_newvalue}
159 [ -n "$_savevar" ] && eval $_savevar=\""$_oldvalue"\"
162 echoerr () { echo "$@" 1>&2 ; }
165 echoerr "$(date +'%F %H:%M:%S'): client load was signaled to terminate"
167 local PGID=$(ps -eo "%c %p %r" | awk "/ $PPID / {print \$3}")
174 local mpirun="$MPIRUN $MPIRUN_OPTIONS"
175 local command="$mpirun $@"
176 local mpilog=$TMP/mpi.log
179 if [ -n "$MPI_USER" -a "$MPI_USER" != root -a -n "$mpirun" ]; then
180 echo "+ chmod 0777 $MOUNT"
182 command="su $MPI_USER sh -c \"$command \""
187 eval $command 2>&1 | tee $mpilog || true
190 if [ $rc -eq 0 ] && grep -q "p4_error:" $mpilog ; then
198 for i in ${1//,/ }; do
199 list="$list $i@$NETTYPE"
204 # FIXME: all setup/cleanup can be done without rpc.sh
207 [ x$1 = x--verbose ] && verbose=true
209 export LST_SESSION=`$LST show_session 2>/dev/null | awk -F " " '{print $5}'`
210 [ "$LST_SESSION" == "" ] && return
219 lst_session_cleanup_all () {
220 local list=$(comma_list $(nodes_list))
221 do_rpc_nodes $list lst_end_session
225 lsmod | grep -q lnet_selftest && \
226 rmmod lnet_selftest > /dev/null 2>&1 || true
230 local list=$(comma_list $(nodes_list))
232 # lst end_session needs to be executed only locally
233 # i.e. on node where lst new_session was called
234 lst_end_session --verbose
235 do_rpc_nodes $list lst_cleanup
239 load_module lnet_selftest
243 local list=$(comma_list $(nodes_list))
244 do_rpc_nodes $list lst_setup
250 # Passed a single argument, strips everything off following
251 # and includes the first period.
252 # client-20.lab.whamcloud.com becomes client-20
254 echo $(sed 's/\..*//' <<< $1)
264 echo "${var}=${!var}"
266 [ -e $MACHINEFILE ] && cat $MACHINEFILE
271 # compile dir kernel-0 ~1GB
272 # required space ~1GB * cbench_IDIRS
274 cbench_DIR=${cbench_DIR:-""}
275 cbench_IDIRS=${cbench_IDIRS:-2}
276 cbench_RUNS=${cbench_RUNS:-2}
278 print_opts cbench_DIR cbench_IDIRS cbench_RUNS
280 [ x$cbench_DIR = x ] &&
281 { skip_env "compilebench not found" && return; }
283 [ -e $cbench_DIR/compilebench ] || \
284 { skip_env "No compilebench build" && return; }
286 local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
287 if [[ $space -le $((1024 * 1024 * cbench_IDIRS)) ]]; then
288 cbench_IDIRS=$((space / 1024 / 1024))
289 [[ $cbench_IDIRS -eq 0 ]] &&
290 skip_env "Need free space at least 1GB, have $space" &&
293 echo "free space=$space, reducing initial dirs to $cbench_IDIRS"
297 # t-f _base needs to be modifyed to set properly tdir
298 # for new "test_foo" functions names
299 # local testdir=$DIR/$tdir
300 local testdir=$DIR/d0.compilebench
305 local cmd="./compilebench -D $testdir -i $cbench_IDIRS \
306 -r $cbench_RUNS --makej"
315 [ $rc = 0 ] || error "compilebench failed: $rc"
321 METABENCH=${METABENCH:-$(which metabench 2> /dev/null || true)}
322 mbench_NFILES=${mbench_NFILES:-30400}
324 mbench_THREADS=${mbench_THREADS:-4}
326 [ x$METABENCH = x ] &&
327 { skip_env "metabench not found" && return; }
330 # Need space estimation here.
332 print_opts METABENCH clients mbench_NFILES mbench_THREADS
334 local testdir=$DIR/d0.metabench
336 # mpi_run uses mpiuser
339 # -C Run the file creation tests.
340 # -S Run the file stat tests.
341 # -c nfile Number of files to be used in each test.
342 # -k Cleanup. Remove the test directories.
343 local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S -k"
346 # find out if we need to use srun by checking $SRUN_PARTITION
347 if [ "$SRUN_PARTITION" ]; then
348 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
349 -n $((num_clients * mbench_THREADS)) \
350 -p $SRUN_PARTITION -- $cmd
352 mpi_run -np $((num_clients * $mbench_THREADS)) \
353 ${MACHINEFILE_OPTION} ${MACHINEFILE} $cmd
357 if [ $rc != 0 ] ; then
358 error "metabench failed! $rc"
365 SIMUL=${SIMUL:=$(which simul 2> /dev/null || true)}
367 simul_THREADS=${simul_THREADS:-2}
368 simul_REP=${simul_REP:-20}
370 if [ "$NFSCLIENT" ]; then
371 skip "skipped for NFSCLIENT mode"
376 { skip_env "simul not found" && return; }
379 # Need space estimation here.
381 print_opts SIMUL clients simul_REP simul_THREADS
383 local testdir=$DIR/d0.simul
385 # mpi_run uses mpiuser
388 # -n # : repeat each test # times
389 # -N # : repeat the entire set of tests # times
391 local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP"
394 # find out if we need to use srun by checking $SRUN_PARTITION
395 if [ "$SRUN_PARTITION" ]; then
396 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
397 -n $((num_clients * simul_THREADS)) -p $SRUN_PARTITION \
400 mpi_run -np $((num_clients * simul_THREADS)) \
401 ${MACHINEFILE_OPTION} ${MACHINEFILE} $cmd
405 if [ $rc != 0 ] ; then
406 error "simul failed! $rc"
413 MDTEST=${MDTEST:=$(which mdtest 2> /dev/null || true)}
415 mdtest_THREADS=${mdtest_THREADS:-2}
416 mdtest_nFiles=${mdtest_nFiles:-"100000"}
417 # We devide the files by number of core
418 mdtest_nFiles=$((mdtest_nFiles/mdtest_THREADS/num_clients))
419 mdtest_iteration=${mdtest_iteration:-1}
421 local type=${1:-"ssf"}
423 if [ "$NFSCLIENT" ]; then
424 skip "skipped for NFSCLIENT mode"
429 { skip_env "mdtest not found" && return; }
432 # Need space estimation here.
434 print_opts MDTEST mdtest_iteration mdtest_THREADS mdtest_nFiles
436 local testdir=$DIR/d0.mdtest
438 # mpi_run uses mpiuser
441 # -i # : repeat each test # times
443 # -n # : number of file/dir to create/stat/remove
444 # -u : each process create/stat/remove individually
446 local cmd="$MDTEST -d $testdir -i $mdtest_iteration -n $mdtest_nFiles"
447 [ $type = "fpp" ] && cmd="$cmd -u"
450 # find out if we need to use srun by checking $SRUN_PARTITION
451 if [ "$SRUN_PARTITION" ]; then
452 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
453 -n $((num_clients * mdtest_THREADS)) \
454 -p $SRUN_PARTITION -- $cmd
456 mpi_run -np $((num_clients * mdtest_THREADS)) \
457 ${MACHINEFILE_OPTION} ${MACHINEFILE} $cmd
461 if [ $rc != 0 ] ; then
462 error "mdtest failed! $rc"
469 cnt_DIR=${cnt_DIR:-""}
470 cnt_NRUN=${cnt_NRUN:-10}
472 print_opts cnt_DIR cnt_NRUN
475 { skip_env "connectathon dir not found" && return; }
477 [ -e $cnt_DIR/runtests ] || \
478 { skip_env "No connectathon runtests found" && return; }
480 local testdir=$DIR/d0.connectathon
487 # cthon options (must be in this order)
489 # -N numpasses - will be passed to the runtests script. This argument
490 # is optional. It specifies the number of times to run
493 # One of these test types
498 # -a all of the above
500 # -f a quick functionality test
504 # Include lock tests unless we're running on nfsv4
505 local fstype=$(df -TP $testdir | awk 'NR==2 {print $2}')
506 echo "$testdir: $fstype"
507 if [[ $fstype != "nfs4" ]]; then
511 for test in $tests; do
512 local cmd="./runtests -N $cnt_NRUN $test -f $testdir"
518 [ $rc = 0 ] || error "connectathon failed: $rc"
526 local type=${1:="ssf"}
528 IOR=${IOR:-$(which IOR 2> /dev/null || true)}
530 ior_THREADS=${ior_THREADS:-2}
531 ior_iteration=${ior_iteration:-1}
532 ior_blockSize=${ior_blockSize:-6} # GB
533 ior_xferSize=${ior_xferSize:-2m}
534 ior_type=${ior_type:-POSIX}
535 ior_DURATION=${ior_DURATION:-30} # minutes
538 { skip_env "IOR not found" && return; }
540 local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
541 local total_threads=$(( num_clients * ior_THREADS ))
542 echo "+ $ior_blockSize * 1024 * 1024 * $total_threads "
543 if [ $((space / 2)) -le \
544 $(( ior_blockSize * 1024 * 1024 * total_threads)) ]; then
545 echo "+ $space * 9/10 / 1024 / 1024 / $num_clients / $ior_THREADS"
546 ior_blockSize=$(( space /2 /1024 /1024 / num_clients / ior_THREADS ))
547 [ $ior_blockSize = 0 ] && \
548 skip_env "Need free space more than $((2 * total_threads))GB: \
549 $((total_threads *1024 *1024*2)), have $space" && return
551 local reduced_size="$num_clients x $ior_THREADS x $ior_blockSize"
552 echo "free space=$space, Need: $reduced_size GB"
553 echo "(blockSize reduced to $ior_blockSize Gb)"
556 print_opts IOR ior_THREADS ior_DURATION MACHINEFILE
558 local testdir=$DIR/d0.ior.$type
560 # mpi_run uses mpiuser
562 if [ "$NFSCLIENT" ]; then
563 setstripe_nfsserver $testdir -c -1 ||
564 { error "setstripe on nfsserver failed" && return 1; }
566 $LFS setstripe $testdir -c -1 ||
567 { error "setstripe failed" && return 2; }
571 # contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)"
573 # -t N transferSize -- size of transfer in bytes (e.g.: 8, 4k, 2m, 1g)"
574 # -w writeFile -- write file"
575 # -r readFile -- read existing file"
576 # -T maxTimeDuration -- max time in minutes to run tests"
577 # -k keepFile -- keep testFile(s) on program exit
579 local cmd="$IOR -a $ior_type -b ${ior_blockSize}g -o $testdir/iorData \
580 -t $ior_xferSize -v -w -r -i $ior_iteration -T $ior_DURATION -k"
581 [ $type = "fpp" ] && cmd="$cmd -F"
584 # find out if we need to use srun by checking $SRUN_PARTITION
585 if [ "$SRUN_PARTITION" ]; then
586 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
587 -n $((num_clients * ior_THREADS)) -p $SRUN_PARTITION \
590 mpi_run -np $((num_clients * $ior_THREADS)) \
591 ${MACHINEFILE_OPTION} ${MACHINEFILE} $cmd
595 if [ $rc != 0 ] ; then
596 error "ior failed! $rc"
603 MIB=${MIB:=$(which mib 2> /dev/null || true)}
605 mib_THREADS=${mib_THREADS:-2}
606 mib_xferSize=${mib_xferSize:-1m}
607 mib_xferLimit=${mib_xferLimit:-5000}
608 mib_timeLimit=${mib_timeLimit:-300}
610 if [ "$NFSCLIENT" ]; then
611 skip "skipped for NFSCLIENT mode"
616 { skip_env "MIB not found" && return; }
618 print_opts MIB mib_THREADS mib_xferSize mib_xferLimit mib_timeLimit \
621 local testdir=$DIR/d0.mib
623 # mpi_run uses mpiuser
625 $LFS setstripe $testdir -c -1 ||
626 { error "setstripe failed" && return 2; }
628 # -I Show intermediate values in output
629 # -H Show headers in output
630 # -L Do not issue new system calls after this many seconds
631 # -s Use system calls of this size
633 # -l Issue no more than this many system calls
634 local cmd="$MIB -t $testdir -s $mib_xferSize -l $mib_xferLimit \
635 -L $mib_timeLimit -HI -p mib.$(date +%Y%m%d%H%M%S)"
638 # find out if we need to use srun by checking $SRUN_PARTITION
639 if [ "$SRUN_PARTITION" ]; then
640 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
641 -n $((num_clients * mib_THREADS)) -p $SRUN_PARTITION \
644 mpi_run -np $((num_clients * mib_THREADS)) \
645 ${MACHINEFILE_OPTION} ${MACHINEFILE} $cmd
649 if [ $rc != 0 ] ; then
650 error "mib failed! $rc"
657 CASC_RW=${CASC_RW:-$(which cascading_rw 2> /dev/null || true)}
659 casc_THREADS=${casc_THREADS:-2}
660 casc_REP=${casc_REP:-300}
662 if [ "$NFSCLIENT" ]; then
663 skip "skipped for NFSCLIENT mode"
668 { skip_env "cascading_rw not found" && return; }
671 # Need space estimation here.
673 print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE
675 local testdir=$DIR/d0.cascading_rw
677 # mpi_run uses mpiuser
681 # -n: repeat test # times
683 local cmd="$CASC_RW -g -d $testdir -n $casc_REP"
686 mpi_run -np $((num_clients * $casc_THREADS)) ${MACHINEFILE_OPTION} \
690 if [ $rc != 0 ] ; then
691 error "cascading_rw failed! $rc"
696 run_write_append_truncate() {
699 write_THREADS=${write_THREADS:-8}
700 write_REP=${write_REP:-10000}
702 if [ "$NFSCLIENT" ]; then
703 skip "skipped for NFSCLIENT mode"
707 # location is lustre/tests dir
708 if ! which write_append_truncate > /dev/null 2>&1 ; then
709 skip_env "write_append_truncate not found"
714 # Need space estimation here.
716 local testdir=$DIR/d0.write_append_truncate
717 local file=$testdir/f0.wat
719 print_opts clients write_REP write_THREADS MACHINEFILE
722 # mpi_run uses mpiuser
725 local cmd="write_append_truncate -n $write_REP $file"
728 mpi_run -np $((num_clients * $write_THREADS)) ${MACHINEFILE_OPTION} \
732 if [ $rc != 0 ] ; then
733 error "write_append_truncate failed! $rc"
739 run_write_disjoint() {
741 WRITE_DISJOINT=${WRITE_DISJOINT:-$(which write_disjoint \
742 2> /dev/null || true)}
744 wdisjoint_THREADS=${wdisjoint_THREADS:-4}
745 wdisjoint_REP=${wdisjoint_REP:-10000}
747 if [ "$NFSCLIENT" ]; then
748 skip "skipped for NFSCLIENT mode"
752 [ x$WRITE_DISJOINT = x ] &&
753 { skip_env "write_disjoint not found" && return; }
756 # Need space estimation here.
758 print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP \
760 local testdir=$DIR/d0.write_disjoint
762 # mpi_run uses mpiuser
765 local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP"
768 mpi_run -np $((num_clients * $wdisjoint_THREADS)) \
769 ${MACHINEFILE_OPTION} ${MACHINEFILE} $cmd
772 if [ $rc != 0 ] ; then
773 error "write_disjoint failed! $rc"
778 run_parallel_grouplock() {
780 PARALLEL_GROUPLOCK=${PARALLEL_GROUPLOCK:-$(which parallel_grouplock \
781 2> /dev/null || true)}
782 parallel_grouplock_MINTASKS=${parallel_grouplock_MINTASKS:-5}
784 if [ "$NFSCLIENT" ]; then
785 skip "skipped for NFSCLIENT mode"
789 [ x$PARALLEL_GROUPLOCK = x ] &&
790 { skip "PARALLEL_GROUPLOCK not found" && return; }
792 print_opts clients parallel_grouplock_MINTASKS MACHINEFILE
794 local testdir=$DIR/d0.parallel_grouplock
796 # mpi_run uses mpiuser
799 do_nodes $clients "lctl set_param llite.*.max_rw_chunk=0" ||
800 error "set_param max_rw_chunk=0 failed "
805 for i in $(seq 12); do
807 local cmd="$PARALLEL_GROUPLOCK -g -v -d $testdir $subtest"
810 mpi_run -np $parallel_grouplock_MINTASKS ${MACHINEFILE_OPTION} \
813 if [ $rc != 0 ] ; then
814 error_noexit "parallel_grouplock subtests $subtest " \
817 echo "parallel_grouplock subtests $subtest PASS"
819 let status=$((status + rc))
820 # clear debug to collect one log per one test
821 do_nodes $(comma_list $(nodes_list)) lctl clear
823 [ $status -eq 0 ] || error "parallel_grouplock status: $status"
827 cleanup_statahead () {
834 for i in $(seq 0 $num_mntpts);do
835 zconf_umount_clients $clients ${mntpt_root}$i ||
836 error_exit "Failed to umount lustre on ${mntpt_root}$i"
842 statahead_NUMMNTPTS=${statahead_NUMMNTPTS:-5}
843 statahead_NUMFILES=${statahead_NUMFILES:-500000}
845 if [[ -n $NFSCLIENT ]]; then
846 skip "Statahead testing is not supported on NFS clients."
851 { skip_env "mdsrate not found" && return; }
853 print_opts MDSRATE clients statahead_NUMMNTPTS statahead_NUMFILES
857 # do not use default "d[0-9]*" dir name
858 # to avoid of rm $statahead_NUMFILES (500k) files in t-f cleanup
860 local testdir=$DIR/$dir
862 # cleanup only if dir exists
863 # cleanup only $statahead_NUMFILES number of files
864 # ignore the other files created by someone else
866 mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
867 $statahead_NUMFILES $testdir 'f%%d' --ignore
870 # mpi_run uses mpiuser
873 local num_files=$statahead_NUMFILES
875 local IFree=$(inodes_available)
876 if [ $IFree -lt $num_files ]; then
882 local cmd1="${MDSRATE} ${MDSRATE_DEBUG} --mknod --dir $testdir"
883 local cmd2="--nfiles $num_files --filefmt 'f%%d'"
884 local cmd="$cmd1 $cmd2"
887 mpi_run -np $((num_clients * 32)) ${MACHINEFILE_OPTION} ${MACHINEFILE} \
891 if [ $rc != 0 ] ; then
892 error "mdsrate failed to create $rc"
896 local num_mntpts=$statahead_NUMMNTPTS
897 local mntpt_root=$TMP/mntpt/lustre
898 local mntopts=${MNTOPTSTATAHEAD:-$MOUNTOPT}
900 echo "Mounting $num_mntpts lustre clients starts on $clients"
901 trap "cleanup_statahead $clients $mntpt_root $num_mntpts" EXIT ERR
902 for i in $(seq 0 $num_mntpts); do
903 zconf_mount_clients $clients ${mntpt_root}$i "$mntopts" ||
904 error_exit "Failed to mount lustre on ${mntpt_root}$i on $clients"
907 do_rpc_nodes $clients cancel_lru_locks mdc
909 do_rpc_nodes $clients do_ls $mntpt_root $num_mntpts $dir
911 mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
912 $num_files $testdir 'f%%d' --ignore
914 # use rm instead of rmdir because of
915 # testdir could contain the files created by someone else,
916 # or by previous run where is num_files prev > num_files current
918 cleanup_statahead $clients $mntpt_root $num_mntpts