2 # -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*-
3 # vim:shiftwidth=4:softtabstop=4:tabstop=4:
5 # Simple function used by run_*.sh scripts
10 if [ -z "${!name}" ]; then
11 echo "$0: $name must be set"
15 [ $failed ] && exit 1 || true
18 # lrepl - Lustre test Read-Eval-Print Loop.
20 # This function implements a REPL for the Lustre test framework. It
21 # doesn't exec an actual shell because the user may want to inspect
22 # variables and use functions from the test framework.
29 This is an interactive read-eval-print loop interactive shell
30 simulation that you can use to debug failing tests. You can
31 enter most bash command lines (see notes below).
33 Use this REPL to inspect variables, set them, call test
34 framework shell functions, etcetera.
36 'exit' or EOF to exit this shell.
38 set \$retcode to 0 to cause the assertion failure that
39 triggered this REPL to be ignored.
42 do_facet ost1 lctl get_param ost.*.ost.threads_*
43 do_rpc_nodes \$OSTNODES unload_modules
46 All but the last line of multi-line statements or blocks
47 must end in a backslash.
49 "Here documents" are not supported.
51 History is not supported, but command-line editing is.
55 # Prompt escapes don't work in read -p, sadly.
56 prompt=":test_${testnum:-UNKNOWN}:$(uname -n):$(basename $PWD)% "
58 # We use read -r to get close to a shell experience
59 while read -e -r -p "$prompt" rawline; do
62 # Don't want to exit-exit, just exit the REPL
64 # We need to handle continuations, and read -r doesn't do
65 # that for us. Yet we need read -r.
67 # We also use case/esac to compare lines read to "*\\"
68 # because [ "$line" = *\\ ] and variants of that don't work.
70 while read -e -r -p '> ' rawline
72 line="$line"$'\n'"$rawline"
74 # We could check for here documents by matching
75 # against *<<*, but who cares.
88 # Finally! Time to eval.
92 echo $'\n\tExiting interactive shell...\n'
96 # lassert - Lustre test framework assert
98 # Arguments: failure code, failure message, expression/statement
100 # lassert evaluates the expression given, and, if false, calls
101 # error() to trigger test failure. If REPL_ON_LASSERT is true then
102 # lassert will call lrepl() to give the user an interactive shell.
103 # If the REPL sets retcode=0 then the assertion failure will be
110 echo "checking $* ($(eval echo \""$*"\"))..."
111 eval "$@" && return 0;
113 if ${REPL_ON_LASSERT:-false}; then
114 echo "Assertion $retcode failed: $* (expanded: $(eval echo \""$*"\"))
119 error "Assertion $retcode failed: $* (expanded: $(eval echo \""$*"\"))
124 # setmodopts- set module options for subsequent calls to load_modules
126 # Usage: setmodopts module_name new_value [var_in_which_to_save_old_value]
127 # setmodopts -a module_name new_value [var_in_which_to_save_old_value]
129 # In the second usage the new value is appended to the old.
133 if [ "$1" = -a ]; then
138 local _var=MODOPTS_$1
143 # Dynamic naming of variables is a pain in bash. In ksh93 we could
144 # write "nameref opts_var=${modname}_MODOPTS" then assign directly
145 # to opts_var. Associative arrays would also help, alternatively.
146 # Alas, we're stuck with eval until all distros move to a more recent
147 # version of bash. Fortunately we don't need to eval unset and export.
149 if [ -z "$_newvalue" ]; then
155 $_append && _newvalue="$_oldvalue $_newvalue"
156 export $_var="$_newvalue"
157 echo setmodopts: ${_var}=${_newvalue}
159 [ -n "$_savevar" ] && eval $_savevar=\""$_oldvalue"\"
162 echoerr () { echo "$@" 1>&2 ; }
165 echoerr "$(date +'%F %H:%M:%S'): client load was signaled to terminate"
167 local PGID=$(ps -eo "%c %p %r" | awk "/ $PPID / {print \$3}")
174 local mpirun="$MPIRUN $MPIRUN_OPTIONS"
175 local command="$mpirun $@"
176 local mpilog=$TMP/mpi.log
179 if [ -n "$MPI_USER" -a "$MPI_USER" != root -a -n "$mpirun" ]; then
180 echo "+ chmod 0777 $MOUNT"
182 command="su $MPI_USER sh -c \"$command \""
187 eval $command 2>&1 | tee $mpilog || true
190 if [ $rc -eq 0 ] && grep -q "p4_error:" $mpilog ; then
199 for i in ${1//,/ }; do
200 if [ "$list" = "" ]; then
203 list="$list$escape $i@$NETTYPE"
209 # FIXME: all setup/cleanup can be done without rpc.sh
212 [ x$1 = x--verbose ] && verbose=true
214 export LST_SESSION=`$LST show_session 2>/dev/null | awk -F " " '{print $5}'`
215 [ "$LST_SESSION" == "" ] && return
224 lst_session_cleanup_all () {
225 local list=$(comma_list $(nodes_list))
226 do_rpc_nodes $list lst_end_session
230 lsmod | grep -q lnet_selftest && \
231 rmmod lnet_selftest > /dev/null 2>&1 || true
235 local list=$(comma_list $(nodes_list))
237 # lst end_session needs to be executed only locally
238 # i.e. on node where lst new_session was called
239 lst_end_session --verbose
240 do_rpc_nodes $list lst_cleanup
244 load_module lnet_selftest
248 local list=$(comma_list $(nodes_list))
249 do_rpc_nodes $list lst_setup
255 # Passed a single argument, strips everything off following
256 # and includes the first period.
257 # client-20.lab.whamcloud.com becomes client-20
259 echo $(sed 's/\..*//' <<< $1)
265 # Find remote nodename, stripped of any domain, etc.
266 # 'hostname -s' is easy, but not implemented on all systems
268 local rname=$(do_node $1 "uname -n" || echo -1)
269 if [[ "$rname" = "-1" ]]; then
272 echo $(short_hostname $rname)
282 echo "${var}=${!var}"
284 [ -e $MACHINEFILE ] && cat $MACHINEFILE
289 # compile dir kernel-0 ~1GB
290 # required space ~1GB * cbench_IDIRS
294 cbench_DIR=${cbench_DIR:-""}
295 cbench_IDIRS=${cbench_IDIRS:-2}
296 cbench_RUNS=${cbench_RUNS:-2}
298 print_opts cbench_DIR cbench_IDIRS cbench_RUNS
300 [ x$cbench_DIR = x ] &&
301 { skip_env "compilebench not found" && return; }
303 [ -e $cbench_DIR/compilebench ] || \
304 { skip_env "No compilebench build" && return; }
306 local space=$(df -P $dir | tail -n 1 | awk '{ print $4 }')
307 if [[ $space -le $((1024 * 1024 * cbench_IDIRS)) ]]; then
308 cbench_IDIRS=$((space / 1024 / 1024))
309 [[ $cbench_IDIRS -eq 0 ]] &&
310 skip_env "Need free space at least 1GB, have $space" &&
313 echo "free space=$space, reducing initial dirs to $cbench_IDIRS"
317 # t-f _base needs to be modifyed to set properly tdir
318 # for new "test_foo" functions names
319 # local testdir=$DIR/$tdir
320 local testdir=$dir/d0.compilebench
325 local cmd="./compilebench -D $testdir -i $cbench_IDIRS \
326 -r $cbench_RUNS --makej"
335 [ $rc = 0 ] || error "compilebench failed: $rc"
341 METABENCH=${METABENCH:-$(which metabench 2> /dev/null || true)}
342 mbench_NFILES=${mbench_NFILES:-30400}
344 mbench_THREADS=${mbench_THREADS:-4}
345 mbench_OPTIONS=${mbench_OPTIONS:-}
347 [ x$METABENCH = x ] &&
348 { skip_env "metabench not found" && return; }
351 # Need space estimation here.
353 print_opts METABENCH clients mbench_NFILES mbench_THREADS
355 local testdir=$DIR/d0.metabench
357 # mpi_run uses mpiuser
360 # -C Run the file creation tests.
361 # -S Run the file stat tests.
362 # -c nfile Number of files to be used in each test.
363 # -k Cleanup. Remove the test directories.
364 local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S -k $mbench_OPTIONS"
367 # find out if we need to use srun by checking $SRUN_PARTITION
368 if [ "$SRUN_PARTITION" ]; then
369 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
370 -n $((num_clients * mbench_THREADS)) \
371 -p $SRUN_PARTITION -- $cmd
373 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
374 -np $((num_clients * $mbench_THREADS)) $cmd
378 if [ $rc != 0 ] ; then
379 error "metabench failed! $rc"
386 SIMUL=${SIMUL:=$(which simul 2> /dev/null || true)}
388 simul_THREADS=${simul_THREADS:-2}
389 simul_REP=${simul_REP:-20}
391 if [ "$NFSCLIENT" ]; then
392 skip "skipped for NFSCLIENT mode"
397 { skip_env "simul not found" && return; }
400 # Need space estimation here.
402 print_opts SIMUL clients simul_REP simul_THREADS
404 local testdir=$DIR/d0.simul
406 # mpi_run uses mpiuser
409 # -n # : repeat each test # times
410 # -N # : repeat the entire set of tests # times
412 local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP"
415 # find out if we need to use srun by checking $SRUN_PARTITION
416 if [ "$SRUN_PARTITION" ]; then
417 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
418 -n $((num_clients * simul_THREADS)) -p $SRUN_PARTITION \
421 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
422 -np $((num_clients * simul_THREADS)) $cmd
426 if [ $rc != 0 ] ; then
427 error "simul failed! $rc"
434 MDTEST=${MDTEST:=$(which mdtest 2> /dev/null || true)}
436 mdtest_THREADS=${mdtest_THREADS:-2}
437 mdtest_nFiles=${mdtest_nFiles:-"100000"}
438 # We devide the files by number of core
439 mdtest_nFiles=$((mdtest_nFiles/mdtest_THREADS/num_clients))
440 mdtest_iteration=${mdtest_iteration:-1}
441 local mdtest_custom_params=${mdtest_custom_params:-""}
443 local type=${1:-"ssf"}
445 if [ "$NFSCLIENT" ]; then
446 skip "skipped for NFSCLIENT mode"
451 { skip_env "mdtest not found" && return; }
454 # Need space estimation here.
456 print_opts MDTEST mdtest_iteration mdtest_THREADS mdtest_nFiles
458 local testdir=$DIR/d0.mdtest
460 # mpi_run uses mpiuser
463 # -i # : repeat each test # times
465 # -n # : number of file/dir to create/stat/remove
466 # -u : each process create/stat/remove individually
468 local cmd="$MDTEST -d $testdir -i $mdtest_iteration \
469 -n $mdtest_nFiles $mdtest_custom_params"
471 [ $type = "fpp" ] && cmd="$cmd -u"
474 # find out if we need to use srun by checking $SRUN_PARTITION
475 if [ "$SRUN_PARTITION" ]; then
476 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
477 -n $((num_clients * mdtest_THREADS)) \
478 -p $SRUN_PARTITION -- $cmd
480 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
481 -np $((num_clients * mdtest_THREADS)) $cmd
485 if [ $rc != 0 ] ; then
486 error "mdtest failed! $rc"
493 cnt_DIR=${cnt_DIR:-""}
494 cnt_NRUN=${cnt_NRUN:-10}
496 print_opts cnt_DIR cnt_NRUN
499 { skip_env "connectathon dir not found" && return; }
501 [ -e $cnt_DIR/runtests ] || \
502 { skip_env "No connectathon runtests found" && return; }
504 local testdir=$DIR/d0.connectathon
511 # cthon options (must be in this order)
513 # -N numpasses - will be passed to the runtests script. This argument
514 # is optional. It specifies the number of times to run
517 # One of these test types
522 # -a all of the above
524 # -f a quick functionality test
528 # Include lock tests unless we're running on nfsv4
529 local fstype=$(df -TP $testdir | awk 'NR==2 {print $2}')
530 echo "$testdir: $fstype"
531 if [[ $fstype != "nfs4" ]]; then
535 for test in $tests; do
536 local cmd="./runtests -N $cnt_NRUN $test -f $testdir"
542 [ $rc = 0 ] || error "connectathon failed: $rc"
550 local type=${1:="ssf"}
552 IOR=${IOR:-$(which IOR 2> /dev/null || true)}
554 ior_THREADS=${ior_THREADS:-2}
555 ior_iteration=${ior_iteration:-1}
556 ior_blockSize=${ior_blockSize:-6}
557 ior_blockUnit=${ior_blockUnit:-M} # K, M, G
558 ior_xferSize=${ior_xferSize:-1M}
559 ior_type=${ior_type:-POSIX}
560 ior_DURATION=${ior_DURATION:-30} # minutes
562 case ${ior_blockUnit} in
564 multiplier=$((1024 * 1024 * 1024))
567 multiplier=$((1024 * 1024))
572 *) error "Incorrect block unit should be one of [KMG]"
577 { skip_env "IOR not found" && return; }
579 # calculate the space in bytes
580 local space=$(df -B 1 -P $DIR | tail -n 1 | awk '{ print $4 }')
581 local total_threads=$((num_clients * ior_THREADS))
582 echo "+ $ior_blockSize * $multiplier * $total_threads "
583 if [ $((space / 2)) -le \
584 $((ior_blockSize * multiplier * total_threads)) ]; then
585 ior_blockSize=$((space / 2 / multiplier / total_threads))
586 [ $ior_blockSize -eq 0 ] && \
587 skip_env "Need free space more than $((2 * total_threads)) \
588 ${ior_blockUnit}: have $((space / multiplier))" &&
591 echo "(reduced blockSize to $ior_blockSize \
592 ${ior_blockUnit} bytes)"
595 print_opts IOR ior_THREADS ior_DURATION MACHINEFILE
597 local testdir=$DIR/d0.ior.$type
599 # mpi_run uses mpiuser
601 if [ "$NFSCLIENT" ]; then
602 setstripe_nfsserver $testdir -c -1 ||
603 { error "setstripe on nfsserver failed" && return 1; }
605 $LFS setstripe $testdir -c -1 ||
606 { error "setstripe failed" && return 2; }
610 # contiguous bytes to write per task (e.g.: 8, 4K, 2M, 1G)"
612 # -t N transferSize -- size of transfer in bytes (e.g.: 8, 4K, 2M, 1G)"
613 # -w writeFile -- write file"
614 # -r readFile -- read existing file"
615 # -W checkWrite -- check read after write"
616 # -C reorderTasks -- changes task ordering to n+1 ordering for readback
617 # -T maxTimeDuration -- max time in minutes to run tests"
618 # -k keepFile -- keep testFile(s) on program exit
620 local cmd="$IOR -a $ior_type -b ${ior_blockSize}${ior_blockUnit} \
621 -o $testdir/iorData -t $ior_xferSize -v -C -w -r -W \
622 -i $ior_iteration -T $ior_DURATION -k"
624 [ $type = "fpp" ] && cmd="$cmd -F"
627 # find out if we need to use srun by checking $SRUN_PARTITION
628 if [ "$SRUN_PARTITION" ]; then
629 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
630 -n $((num_clients * ior_THREADS)) -p $SRUN_PARTITION \
633 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
634 -np $((num_clients * $ior_THREADS)) $cmd
638 if [ $rc != 0 ] ; then
639 error "ior failed! $rc"
646 MIB=${MIB:=$(which mib 2> /dev/null || true)}
648 mib_THREADS=${mib_THREADS:-2}
649 mib_xferSize=${mib_xferSize:-1m}
650 mib_xferLimit=${mib_xferLimit:-5000}
651 mib_timeLimit=${mib_timeLimit:-300}
653 if [ "$NFSCLIENT" ]; then
654 skip "skipped for NFSCLIENT mode"
659 { skip_env "MIB not found" && return; }
661 print_opts MIB mib_THREADS mib_xferSize mib_xferLimit mib_timeLimit \
664 local testdir=$DIR/d0.mib
666 # mpi_run uses mpiuser
668 $LFS setstripe $testdir -c -1 ||
669 { error "setstripe failed" && return 2; }
671 # -I Show intermediate values in output
672 # -H Show headers in output
673 # -L Do not issue new system calls after this many seconds
674 # -s Use system calls of this size
676 # -l Issue no more than this many system calls
677 local cmd="$MIB -t $testdir -s $mib_xferSize -l $mib_xferLimit \
678 -L $mib_timeLimit -HI -p mib.$(date +%Y%m%d%H%M%S)"
681 # find out if we need to use srun by checking $SRUN_PARTITION
682 if [ "$SRUN_PARTITION" ]; then
683 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
684 -n $((num_clients * mib_THREADS)) -p $SRUN_PARTITION \
687 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
688 -np $((num_clients * mib_THREADS)) $cmd
692 if [ $rc != 0 ] ; then
693 error "mib failed! $rc"
700 CASC_RW=${CASC_RW:-$(which cascading_rw 2> /dev/null || true)}
702 casc_THREADS=${casc_THREADS:-2}
703 casc_REP=${casc_REP:-300}
705 if [ "$NFSCLIENT" ]; then
706 skip "skipped for NFSCLIENT mode"
711 { skip_env "cascading_rw not found" && return; }
714 # Need space estimation here.
716 print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE
718 local testdir=$DIR/d0.cascading_rw
720 # mpi_run uses mpiuser
724 # -n: repeat test # times
726 local cmd="$CASC_RW -g -d $testdir -n $casc_REP"
729 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
730 -np $((num_clients * $casc_THREADS)) $cmd
733 if [ $rc != 0 ] ; then
734 error "cascading_rw failed! $rc"
739 run_write_append_truncate() {
742 write_THREADS=${write_THREADS:-8}
743 write_REP=${write_REP:-10000}
745 if [ "$NFSCLIENT" ]; then
746 skip "skipped for NFSCLIENT mode"
750 # location is lustre/tests dir
751 if ! which write_append_truncate > /dev/null 2>&1 ; then
752 skip_env "write_append_truncate not found"
757 # Need space estimation here.
759 local testdir=$DIR/d0.write_append_truncate
760 local file=$testdir/f0.wat
762 print_opts clients write_REP write_THREADS MACHINEFILE
765 # mpi_run uses mpiuser
768 local cmd="write_append_truncate -n $write_REP $file"
771 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
772 -np $((num_clients * $write_THREADS)) $cmd
775 if [ $rc != 0 ] ; then
776 error "write_append_truncate failed! $rc"
782 run_write_disjoint() {
784 WRITE_DISJOINT=${WRITE_DISJOINT:-$(which write_disjoint \
785 2> /dev/null || true)}
787 wdisjoint_THREADS=${wdisjoint_THREADS:-4}
788 wdisjoint_REP=${wdisjoint_REP:-10000}
790 if [ "$NFSCLIENT" ]; then
791 skip "skipped for NFSCLIENT mode"
795 [ x$WRITE_DISJOINT = x ] &&
796 { skip_env "write_disjoint not found" && return; }
799 # Need space estimation here.
801 print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP \
803 local testdir=$DIR/d0.write_disjoint
805 # mpi_run uses mpiuser
808 local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP"
811 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
812 -np $((num_clients * $wdisjoint_THREADS)) $cmd
815 if [ $rc != 0 ] ; then
816 error "write_disjoint failed! $rc"
821 run_parallel_grouplock() {
823 PARALLEL_GROUPLOCK=${PARALLEL_GROUPLOCK:-$(which parallel_grouplock \
824 2> /dev/null || true)}
825 parallel_grouplock_MINTASKS=${parallel_grouplock_MINTASKS:-5}
827 if [ "$NFSCLIENT" ]; then
828 skip "skipped for NFSCLIENT mode"
832 [ x$PARALLEL_GROUPLOCK = x ] &&
833 { skip "PARALLEL_GROUPLOCK not found" && return; }
835 print_opts clients parallel_grouplock_MINTASKS MACHINEFILE
837 local testdir=$DIR/d0.parallel_grouplock
839 # mpi_run uses mpiuser
845 for i in $(seq 12); do
847 local cmd="$PARALLEL_GROUPLOCK -g -v -d $testdir $subtest"
850 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
851 -np $parallel_grouplock_MINTASKS $cmd
853 if [ $rc != 0 ] ; then
854 error_noexit "parallel_grouplock subtests $subtest " \
857 echo "parallel_grouplock subtests $subtest PASS"
859 let status=$((status + rc))
860 # clear debug to collect one log per one test
861 do_nodes $(comma_list $(nodes_list)) lctl clear
863 [ $status -eq 0 ] || error "parallel_grouplock status: $status"
867 cleanup_statahead () {
874 for i in $(seq 0 $num_mntpts);do
875 zconf_umount_clients $clients ${mntpt_root}$i ||
876 error_exit "Failed to umount lustre on ${mntpt_root}$i"
882 statahead_NUMMNTPTS=${statahead_NUMMNTPTS:-5}
883 statahead_NUMFILES=${statahead_NUMFILES:-500000}
885 if [[ -n $NFSCLIENT ]]; then
886 skip "Statahead testing is not supported on NFS clients."
891 { skip_env "mdsrate not found" && return; }
893 print_opts MDSRATE clients statahead_NUMMNTPTS statahead_NUMFILES
897 # do not use default "d[0-9]*" dir name
898 # to avoid of rm $statahead_NUMFILES (500k) files in t-f cleanup
900 local testdir=$DIR/$dir
902 # cleanup only if dir exists
903 # cleanup only $statahead_NUMFILES number of files
904 # ignore the other files created by someone else
906 mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
907 $statahead_NUMFILES $testdir 'f%%d' --ignore
910 # mpi_run uses mpiuser
913 local num_files=$statahead_NUMFILES
915 local IFree=$(inodes_available)
916 if [ $IFree -lt $num_files ]; then
922 local cmd1="${MDSRATE} ${MDSRATE_DEBUG} --mknod --dir $testdir"
923 local cmd2="--nfiles $num_files --filefmt 'f%%d'"
924 local cmd="$cmd1 $cmd2"
927 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
928 -np $((num_clients * 32)) $cmd
931 if [ $rc != 0 ] ; then
932 error "mdsrate failed to create $rc"
936 local num_mntpts=$statahead_NUMMNTPTS
937 local mntpt_root=$TMP/mntpt/lustre
938 local mntopts=$MNTOPTSTATAHEAD
940 echo "Mounting $num_mntpts lustre clients starts on $clients"
941 trap "cleanup_statahead $clients $mntpt_root $num_mntpts" EXIT ERR
942 for i in $(seq 0 $num_mntpts); do
943 zconf_mount_clients $clients ${mntpt_root}$i "$mntopts" ||
944 error_exit "Failed to mount lustre on ${mntpt_root}$i on $clients"
947 do_rpc_nodes $clients cancel_lru_locks mdc
949 do_rpc_nodes $clients do_ls $mntpt_root $num_mntpts $dir
951 mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
952 $num_files $testdir 'f%%d' --ignore
954 # use rm instead of rmdir because of
955 # testdir could contain the files created by someone else,
956 # or by previous run where is num_files prev > num_files current
958 cleanup_statahead $clients $mntpt_root $num_mntpts