2 # -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*-
3 # vim:shiftwidth=4:softtabstop=4:tabstop=4:
5 # Simple function used by run_*.sh scripts
10 if [ -z "${!name}" ]; then
11 echo "$0: $name must be set"
15 [ $failed ] && exit 1 || true
18 # lrepl - Lustre test Read-Eval-Print Loop.
20 # This function implements a REPL for the Lustre test framework. It
21 # doesn't exec an actual shell because the user may want to inspect
22 # variables and use functions from the test framework.
29 This is an interactive read-eval-print loop interactive shell
30 simulation that you can use to debug failing tests. You can
31 enter most bash command lines (see notes below).
33 Use this REPL to inspect variables, set them, call test
34 framework shell functions, etcetera.
36 'exit' or EOF to exit this shell.
38 set \$retcode to 0 to cause the assertion failure that
39 triggered this REPL to be ignored.
42 do_facet ost1 lctl get_param ost.*.ost.threads_*
43 do_rpc_nodes \$OSTNODES unload_modules
46 All but the last line of multi-line statements or blocks
47 must end in a backslash.
49 "Here documents" are not supported.
51 History is not supported, but command-line editing is.
55 # Prompt escapes don't work in read -p, sadly.
56 prompt=":test_${testnum:-UNKNOWN}:$(uname -n):$(basename $PWD)% "
58 # We use read -r to get close to a shell experience
59 while read -e -r -p "$prompt" rawline; do
62 # Don't want to exit-exit, just exit the REPL
64 # We need to handle continuations, and read -r doesn't do
65 # that for us. Yet we need read -r.
67 # We also use case/esac to compare lines read to "*\\"
68 # because [ "$line" = *\\ ] and variants of that don't work.
70 while read -e -r -p '> ' rawline
72 line="$line"$'\n'"$rawline"
74 # We could check for here documents by matching
75 # against *<<*, but who cares.
88 # Finally! Time to eval.
92 echo $'\n\tExiting interactive shell...\n'
96 # lassert - Lustre test framework assert
98 # Arguments: failure code, failure message, expression/statement
100 # lassert evaluates the expression given, and, if false, calls
101 # error() to trigger test failure. If REPL_ON_LASSERT is true then
102 # lassert will call lrepl() to give the user an interactive shell.
103 # If the REPL sets retcode=0 then the assertion failure will be
110 echo "checking $* ($(eval echo \""$*"\"))..."
111 eval "$@" && return 0;
113 if ${REPL_ON_LASSERT:-false}; then
114 echo "Assertion $retcode failed: $* (expanded: $(eval echo \""$*"\"))
119 error "Assertion $retcode failed: $* (expanded: $(eval echo \""$*"\"))
124 # setmodopts- set module options for subsequent calls to load_modules
126 # Usage: setmodopts module_name new_value [var_in_which_to_save_old_value]
127 # setmodopts -a module_name new_value [var_in_which_to_save_old_value]
129 # In the second usage the new value is appended to the old.
133 if [ "$1" = -a ]; then
138 local _var=MODOPTS_$1
143 # Dynamic naming of variables is a pain in bash. In ksh93 we could
144 # write "nameref opts_var=${modname}_MODOPTS" then assign directly
145 # to opts_var. Associative arrays would also help, alternatively.
146 # Alas, we're stuck with eval until all distros move to a more recent
147 # version of bash. Fortunately we don't need to eval unset and export.
149 if [ -z "$_newvalue" ]; then
155 $_append && _newvalue="$_oldvalue $_newvalue"
156 export $_var="$_newvalue"
157 echo setmodopts: ${_var}=${_newvalue}
159 [ -n "$_savevar" ] && eval $_savevar=\""$_oldvalue"\"
162 echoerr () { echo "$@" 1>&2 ; }
165 echoerr "$(date +'%F %H:%M:%S'): client load was signaled to terminate"
167 local PGID=$(ps -eo "%c %p %r" | awk "/ $PPID / {print \$3}")
174 local mpirun="$MPIRUN $MPIRUN_OPTIONS"
175 local command="$mpirun $@"
176 local mpilog=$TMP/mpi.log
179 if [ -n "$MPI_USER" -a "$MPI_USER" != root -a -n "$mpirun" ]; then
180 echo "+ chmod 0777 $MOUNT"
182 command="su $MPI_USER sh -c \"$command \""
187 eval $command 2>&1 | tee $mpilog || true
190 if [ $rc -eq 0 ] && grep -q "p4_error:" $mpilog ; then
198 for i in ${1//,/ }; do
199 list="$list $i@$NETTYPE"
204 # FIXME: all setup/cleanup can be done without rpc.sh
207 [ x$1 = x--verbose ] && verbose=true
209 export LST_SESSION=`$LST show_session 2>/dev/null | awk -F " " '{print $5}'`
210 [ "$LST_SESSION" == "" ] && return
219 lst_session_cleanup_all () {
220 local list=$(comma_list $(nodes_list))
221 do_rpc_nodes $list lst_end_session
225 lsmod | grep -q lnet_selftest && \
226 rmmod lnet_selftest > /dev/null 2>&1 || true
230 local list=$(comma_list $(nodes_list))
232 # lst end_session needs to be executed only locally
233 # i.e. on node where lst new_session was called
234 lst_end_session --verbose
235 do_rpc_nodes $list lst_cleanup
239 load_module lnet_selftest
243 local list=$(comma_list $(nodes_list))
244 do_rpc_nodes $list lst_setup
250 # Passed a single argument, strips everything off following
251 # and includes the first period.
252 # client-20.lab.whamcloud.com becomes client-20
254 echo $(sed 's/\..*//' <<< $1)
260 # Find remote nodename, stripped of any domain, etc.
261 # 'hostname -s' is easy, but not implemented on all systems
263 local rname=$(do_node $1 "uname -n" || echo -1)
264 if [[ "$rname" = "-1" ]]; then
267 echo $(short_hostname $rname)
277 echo "${var}=${!var}"
279 [ -e $MACHINEFILE ] && cat $MACHINEFILE
284 # compile dir kernel-0 ~1GB
285 # required space ~1GB * cbench_IDIRS
287 cbench_DIR=${cbench_DIR:-""}
288 cbench_IDIRS=${cbench_IDIRS:-2}
289 cbench_RUNS=${cbench_RUNS:-2}
291 print_opts cbench_DIR cbench_IDIRS cbench_RUNS
293 [ x$cbench_DIR = x ] &&
294 { skip_env "compilebench not found" && return; }
296 [ -e $cbench_DIR/compilebench ] || \
297 { skip_env "No compilebench build" && return; }
299 local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
300 if [[ $space -le $((1024 * 1024 * cbench_IDIRS)) ]]; then
301 cbench_IDIRS=$((space / 1024 / 1024))
302 [[ $cbench_IDIRS -eq 0 ]] &&
303 skip_env "Need free space at least 1GB, have $space" &&
306 echo "free space=$space, reducing initial dirs to $cbench_IDIRS"
310 # t-f _base needs to be modifyed to set properly tdir
311 # for new "test_foo" functions names
312 # local testdir=$DIR/$tdir
313 local testdir=$DIR/d0.compilebench
318 local cmd="./compilebench -D $testdir -i $cbench_IDIRS \
319 -r $cbench_RUNS --makej"
328 [ $rc = 0 ] || error "compilebench failed: $rc"
334 METABENCH=${METABENCH:-$(which metabench 2> /dev/null || true)}
335 mbench_NFILES=${mbench_NFILES:-30400}
337 mbench_THREADS=${mbench_THREADS:-4}
339 [ x$METABENCH = x ] &&
340 { skip_env "metabench not found" && return; }
343 # Need space estimation here.
345 print_opts METABENCH clients mbench_NFILES mbench_THREADS
347 local testdir=$DIR/d0.metabench
349 # mpi_run uses mpiuser
352 # -C Run the file creation tests.
353 # -S Run the file stat tests.
354 # -c nfile Number of files to be used in each test.
355 # -k Cleanup. Remove the test directories.
356 local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S -k"
359 # find out if we need to use srun by checking $SRUN_PARTITION
360 if [ "$SRUN_PARTITION" ]; then
361 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
362 -n $((num_clients * mbench_THREADS)) \
363 -p $SRUN_PARTITION -- $cmd
365 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
366 -np $((num_clients * $mbench_THREADS)) $cmd
370 if [ $rc != 0 ] ; then
371 error "metabench failed! $rc"
378 SIMUL=${SIMUL:=$(which simul 2> /dev/null || true)}
380 simul_THREADS=${simul_THREADS:-2}
381 simul_REP=${simul_REP:-20}
383 if [ "$NFSCLIENT" ]; then
384 skip "skipped for NFSCLIENT mode"
389 { skip_env "simul not found" && return; }
392 # Need space estimation here.
394 print_opts SIMUL clients simul_REP simul_THREADS
396 local testdir=$DIR/d0.simul
398 # mpi_run uses mpiuser
401 # -n # : repeat each test # times
402 # -N # : repeat the entire set of tests # times
404 local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP"
407 # find out if we need to use srun by checking $SRUN_PARTITION
408 if [ "$SRUN_PARTITION" ]; then
409 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
410 -n $((num_clients * simul_THREADS)) -p $SRUN_PARTITION \
413 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
414 -np $((num_clients * simul_THREADS)) $cmd
418 if [ $rc != 0 ] ; then
419 error "simul failed! $rc"
426 MDTEST=${MDTEST:=$(which mdtest 2> /dev/null || true)}
428 mdtest_THREADS=${mdtest_THREADS:-2}
429 mdtest_nFiles=${mdtest_nFiles:-"100000"}
430 # We devide the files by number of core
431 mdtest_nFiles=$((mdtest_nFiles/mdtest_THREADS/num_clients))
432 mdtest_iteration=${mdtest_iteration:-1}
434 local type=${1:-"ssf"}
436 if [ "$NFSCLIENT" ]; then
437 skip "skipped for NFSCLIENT mode"
442 { skip_env "mdtest not found" && return; }
445 # Need space estimation here.
447 print_opts MDTEST mdtest_iteration mdtest_THREADS mdtest_nFiles
449 local testdir=$DIR/d0.mdtest
451 # mpi_run uses mpiuser
454 # -i # : repeat each test # times
456 # -n # : number of file/dir to create/stat/remove
457 # -u : each process create/stat/remove individually
459 local cmd="$MDTEST -d $testdir -i $mdtest_iteration -n $mdtest_nFiles"
460 [ $type = "fpp" ] && cmd="$cmd -u"
463 # find out if we need to use srun by checking $SRUN_PARTITION
464 if [ "$SRUN_PARTITION" ]; then
465 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
466 -n $((num_clients * mdtest_THREADS)) \
467 -p $SRUN_PARTITION -- $cmd
469 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
470 -np $((num_clients * mdtest_THREADS)) $cmd
474 if [ $rc != 0 ] ; then
475 error "mdtest failed! $rc"
482 cnt_DIR=${cnt_DIR:-""}
483 cnt_NRUN=${cnt_NRUN:-10}
485 print_opts cnt_DIR cnt_NRUN
488 { skip_env "connectathon dir not found" && return; }
490 [ -e $cnt_DIR/runtests ] || \
491 { skip_env "No connectathon runtests found" && return; }
493 local testdir=$DIR/d0.connectathon
500 # cthon options (must be in this order)
502 # -N numpasses - will be passed to the runtests script. This argument
503 # is optional. It specifies the number of times to run
506 # One of these test types
511 # -a all of the above
513 # -f a quick functionality test
517 # Include lock tests unless we're running on nfsv4
518 local fstype=$(df -TP $testdir | awk 'NR==2 {print $2}')
519 echo "$testdir: $fstype"
520 if [[ $fstype != "nfs4" ]]; then
524 for test in $tests; do
525 local cmd="./runtests -N $cnt_NRUN $test -f $testdir"
531 [ $rc = 0 ] || error "connectathon failed: $rc"
539 local type=${1:="ssf"}
541 IOR=${IOR:-$(which IOR 2> /dev/null || true)}
543 ior_THREADS=${ior_THREADS:-2}
544 ior_iteration=${ior_iteration:-1}
545 ior_blockSize=${ior_blockSize:-6} # GB
546 ior_xferSize=${ior_xferSize:-2m}
547 ior_type=${ior_type:-POSIX}
548 ior_DURATION=${ior_DURATION:-30} # minutes
551 { skip_env "IOR not found" && return; }
553 local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
554 local total_threads=$(( num_clients * ior_THREADS ))
555 echo "+ $ior_blockSize * 1024 * 1024 * $total_threads "
556 if [ $((space / 2)) -le \
557 $(( ior_blockSize * 1024 * 1024 * total_threads)) ]; then
558 echo "+ $space * 9/10 / 1024 / 1024 / $num_clients / $ior_THREADS"
559 ior_blockSize=$(( space /2 /1024 /1024 / num_clients / ior_THREADS ))
560 [ $ior_blockSize = 0 ] && \
561 skip_env "Need free space more than $((2 * total_threads))GB: \
562 $((total_threads *1024 *1024*2)), have $space" && return
564 local reduced_size="$num_clients x $ior_THREADS x $ior_blockSize"
565 echo "free space=$space, Need: $reduced_size GB"
566 echo "(blockSize reduced to $ior_blockSize Gb)"
569 print_opts IOR ior_THREADS ior_DURATION MACHINEFILE
571 local testdir=$DIR/d0.ior.$type
573 # mpi_run uses mpiuser
575 if [ "$NFSCLIENT" ]; then
576 setstripe_nfsserver $testdir -c -1 ||
577 { error "setstripe on nfsserver failed" && return 1; }
579 $LFS setstripe $testdir -c -1 ||
580 { error "setstripe failed" && return 2; }
584 # contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)"
586 # -t N transferSize -- size of transfer in bytes (e.g.: 8, 4k, 2m, 1g)"
587 # -w writeFile -- write file"
588 # -r readFile -- read existing file"
589 # -W checkWrite -- check read after write"
590 # -C reorderTasks -- changes task ordering to n+1 ordering for readback
591 # -T maxTimeDuration -- max time in minutes to run tests"
592 # -k keepFile -- keep testFile(s) on program exit
594 local cmd="$IOR -a $ior_type -b ${ior_blockSize}g -o $testdir/iorData \
595 -t $ior_xferSize -v -C -w -r -W -i $ior_iteration -T $ior_DURATION -k"
596 [ $type = "fpp" ] && cmd="$cmd -F"
599 # find out if we need to use srun by checking $SRUN_PARTITION
600 if [ "$SRUN_PARTITION" ]; then
601 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
602 -n $((num_clients * ior_THREADS)) -p $SRUN_PARTITION \
605 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
606 -np $((num_clients * $ior_THREADS)) $cmd
610 if [ $rc != 0 ] ; then
611 error "ior failed! $rc"
618 MIB=${MIB:=$(which mib 2> /dev/null || true)}
620 mib_THREADS=${mib_THREADS:-2}
621 mib_xferSize=${mib_xferSize:-1m}
622 mib_xferLimit=${mib_xferLimit:-5000}
623 mib_timeLimit=${mib_timeLimit:-300}
625 if [ "$NFSCLIENT" ]; then
626 skip "skipped for NFSCLIENT mode"
631 { skip_env "MIB not found" && return; }
633 print_opts MIB mib_THREADS mib_xferSize mib_xferLimit mib_timeLimit \
636 local testdir=$DIR/d0.mib
638 # mpi_run uses mpiuser
640 $LFS setstripe $testdir -c -1 ||
641 { error "setstripe failed" && return 2; }
643 # -I Show intermediate values in output
644 # -H Show headers in output
645 # -L Do not issue new system calls after this many seconds
646 # -s Use system calls of this size
648 # -l Issue no more than this many system calls
649 local cmd="$MIB -t $testdir -s $mib_xferSize -l $mib_xferLimit \
650 -L $mib_timeLimit -HI -p mib.$(date +%Y%m%d%H%M%S)"
653 # find out if we need to use srun by checking $SRUN_PARTITION
654 if [ "$SRUN_PARTITION" ]; then
655 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
656 -n $((num_clients * mib_THREADS)) -p $SRUN_PARTITION \
659 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
660 -np $((num_clients * mib_THREADS)) $cmd
664 if [ $rc != 0 ] ; then
665 error "mib failed! $rc"
672 CASC_RW=${CASC_RW:-$(which cascading_rw 2> /dev/null || true)}
674 casc_THREADS=${casc_THREADS:-2}
675 casc_REP=${casc_REP:-300}
677 if [ "$NFSCLIENT" ]; then
678 skip "skipped for NFSCLIENT mode"
683 { skip_env "cascading_rw not found" && return; }
686 # Need space estimation here.
688 print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE
690 local testdir=$DIR/d0.cascading_rw
692 # mpi_run uses mpiuser
696 # -n: repeat test # times
698 local cmd="$CASC_RW -g -d $testdir -n $casc_REP"
701 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
702 -np $((num_clients * $casc_THREADS)) $cmd
705 if [ $rc != 0 ] ; then
706 error "cascading_rw failed! $rc"
711 run_write_append_truncate() {
714 write_THREADS=${write_THREADS:-8}
715 write_REP=${write_REP:-10000}
717 if [ "$NFSCLIENT" ]; then
718 skip "skipped for NFSCLIENT mode"
722 # location is lustre/tests dir
723 if ! which write_append_truncate > /dev/null 2>&1 ; then
724 skip_env "write_append_truncate not found"
729 # Need space estimation here.
731 local testdir=$DIR/d0.write_append_truncate
732 local file=$testdir/f0.wat
734 print_opts clients write_REP write_THREADS MACHINEFILE
737 # mpi_run uses mpiuser
740 local cmd="write_append_truncate -n $write_REP $file"
743 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
744 -np $((num_clients * $write_THREADS)) $cmd
747 if [ $rc != 0 ] ; then
748 error "write_append_truncate failed! $rc"
754 run_write_disjoint() {
756 WRITE_DISJOINT=${WRITE_DISJOINT:-$(which write_disjoint \
757 2> /dev/null || true)}
759 wdisjoint_THREADS=${wdisjoint_THREADS:-4}
760 wdisjoint_REP=${wdisjoint_REP:-10000}
762 if [ "$NFSCLIENT" ]; then
763 skip "skipped for NFSCLIENT mode"
767 [ x$WRITE_DISJOINT = x ] &&
768 { skip_env "write_disjoint not found" && return; }
771 # Need space estimation here.
773 print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP \
775 local testdir=$DIR/d0.write_disjoint
777 # mpi_run uses mpiuser
780 local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP"
783 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
784 -np $((num_clients * $wdisjoint_THREADS)) $cmd
787 if [ $rc != 0 ] ; then
788 error "write_disjoint failed! $rc"
793 run_parallel_grouplock() {
795 PARALLEL_GROUPLOCK=${PARALLEL_GROUPLOCK:-$(which parallel_grouplock \
796 2> /dev/null || true)}
797 parallel_grouplock_MINTASKS=${parallel_grouplock_MINTASKS:-5}
799 if [ "$NFSCLIENT" ]; then
800 skip "skipped for NFSCLIENT mode"
804 [ x$PARALLEL_GROUPLOCK = x ] &&
805 { skip "PARALLEL_GROUPLOCK not found" && return; }
807 print_opts clients parallel_grouplock_MINTASKS MACHINEFILE
809 local testdir=$DIR/d0.parallel_grouplock
811 # mpi_run uses mpiuser
814 do_nodes $clients "lctl set_param llite.*.max_rw_chunk=0" ||
815 error "set_param max_rw_chunk=0 failed "
820 for i in $(seq 12); do
822 local cmd="$PARALLEL_GROUPLOCK -g -v -d $testdir $subtest"
825 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
826 -np $parallel_grouplock_MINTASKS $cmd
828 if [ $rc != 0 ] ; then
829 error_noexit "parallel_grouplock subtests $subtest " \
832 echo "parallel_grouplock subtests $subtest PASS"
834 let status=$((status + rc))
835 # clear debug to collect one log per one test
836 do_nodes $(comma_list $(nodes_list)) lctl clear
838 [ $status -eq 0 ] || error "parallel_grouplock status: $status"
842 cleanup_statahead () {
849 for i in $(seq 0 $num_mntpts);do
850 zconf_umount_clients $clients ${mntpt_root}$i ||
851 error_exit "Failed to umount lustre on ${mntpt_root}$i"
857 statahead_NUMMNTPTS=${statahead_NUMMNTPTS:-5}
858 statahead_NUMFILES=${statahead_NUMFILES:-500000}
860 if [[ -n $NFSCLIENT ]]; then
861 skip "Statahead testing is not supported on NFS clients."
866 { skip_env "mdsrate not found" && return; }
868 print_opts MDSRATE clients statahead_NUMMNTPTS statahead_NUMFILES
872 # do not use default "d[0-9]*" dir name
873 # to avoid of rm $statahead_NUMFILES (500k) files in t-f cleanup
875 local testdir=$DIR/$dir
877 # cleanup only if dir exists
878 # cleanup only $statahead_NUMFILES number of files
879 # ignore the other files created by someone else
881 mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
882 $statahead_NUMFILES $testdir 'f%%d' --ignore
885 # mpi_run uses mpiuser
888 local num_files=$statahead_NUMFILES
890 local IFree=$(inodes_available)
891 if [ $IFree -lt $num_files ]; then
897 local cmd1="${MDSRATE} ${MDSRATE_DEBUG} --mknod --dir $testdir"
898 local cmd2="--nfiles $num_files --filefmt 'f%%d'"
899 local cmd="$cmd1 $cmd2"
902 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
903 -np $((num_clients * 32)) $cmd
906 if [ $rc != 0 ] ; then
907 error "mdsrate failed to create $rc"
911 local num_mntpts=$statahead_NUMMNTPTS
912 local mntpt_root=$TMP/mntpt/lustre
913 local mntopts=$MNTOPTSTATAHEAD
915 echo "Mounting $num_mntpts lustre clients starts on $clients"
916 trap "cleanup_statahead $clients $mntpt_root $num_mntpts" EXIT ERR
917 for i in $(seq 0 $num_mntpts); do
918 zconf_mount_clients $clients ${mntpt_root}$i "$mntopts" ||
919 error_exit "Failed to mount lustre on ${mntpt_root}$i on $clients"
922 do_rpc_nodes $clients cancel_lru_locks mdc
924 do_rpc_nodes $clients do_ls $mntpt_root $num_mntpts $dir
926 mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
927 $num_files $testdir 'f%%d' --ignore
929 # use rm instead of rmdir because of
930 # testdir could contain the files created by someone else,
931 # or by previous run where is num_files prev > num_files current
933 cleanup_statahead $clients $mntpt_root $num_mntpts