2 # -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*-
3 # vim:shiftwidth=4:softtabstop=4:tabstop=4:
5 # Simple function used by run_*.sh scripts
10 if [ -z "${!name}" ]; then
11 echo "$0: $name must be set"
15 [ $failed ] && exit 1 || true
18 # lrepl - Lustre test Read-Eval-Print Loop.
20 # This function implements a REPL for the Lustre test framework. It
21 # doesn't exec an actual shell because the user may want to inspect
22 # variables and use functions from the test framework.
29 This is an interactive read-eval-print loop interactive shell
30 simulation that you can use to debug failing tests. You can
31 enter most bash command lines (see notes below).
33 Use this REPL to inspect variables, set them, call test
34 framework shell functions, etcetera.
36 'exit' or EOF to exit this shell.
38 set \$retcode to 0 to cause the assertion failure that
39 triggered this REPL to be ignored.
42 do_facet ost1 lctl get_param ost.*.ost.threads_*
43 do_rpc_nodes \$OSTNODES unload_modules
46 All but the last line of multi-line statements or blocks
47 must end in a backslash.
49 "Here documents" are not supported.
51 History is not supported, but command-line editing is.
55 # Prompt escapes don't work in read -p, sadly.
56 prompt=":test_${testnum:-UNKNOWN}:$(uname -n):$(basename $PWD)% "
58 # We use read -r to get close to a shell experience
59 while read -e -r -p "$prompt" rawline; do
62 # Don't want to exit-exit, just exit the REPL
64 # We need to handle continuations, and read -r doesn't do
65 # that for us. Yet we need read -r.
67 # We also use case/esac to compare lines read to "*\\"
68 # because [ "$line" = *\\ ] and variants of that don't work.
70 while read -e -r -p '> ' rawline
72 line="$line"$'\n'"$rawline"
74 # We could check for here documents by matching
75 # against *<<*, but who cares.
88 # Finally! Time to eval.
92 echo $'\n\tExiting interactive shell...\n'
96 # lassert - Lustre test framework assert
98 # Arguments: failure code, failure message, expression/statement
100 # lassert evaluates the expression given, and, if false, calls
101 # error() to trigger test failure. If REPL_ON_LASSERT is true then
102 # lassert will call lrepl() to give the user an interactive shell.
103 # If the REPL sets retcode=0 then the assertion failure will be
110 echo "checking $* ($(eval echo \""$*"\"))..."
111 eval "$@" && return 0;
113 if ${REPL_ON_LASSERT:-false}; then
114 echo "Assertion $retcode failed: $* (expanded: $(eval echo \""$*"\"))
119 error "Assertion $retcode failed: $* (expanded: $(eval echo \""$*"\"))
124 # setmodopts- set module options for subsequent calls to load_modules
126 # Usage: setmodopts module_name new_value [var_in_which_to_save_old_value]
127 # setmodopts -a module_name new_value [var_in_which_to_save_old_value]
129 # In the second usage the new value is appended to the old.
133 if [ "$1" = -a ]; then
138 local _var=MODOPTS_$1
143 # Dynamic naming of variables is a pain in bash. In ksh93 we could
144 # write "nameref opts_var=${modname}_MODOPTS" then assign directly
145 # to opts_var. Associative arrays would also help, alternatively.
146 # Alas, we're stuck with eval until all distros move to a more recent
147 # version of bash. Fortunately we don't need to eval unset and export.
149 if [ -z "$_newvalue" ]; then
155 $_append && _newvalue="$_oldvalue $_newvalue"
156 export $_var="$_newvalue"
157 echo setmodopts: ${_var}=${_newvalue}
159 [ -n "$_savevar" ] && eval $_savevar=\""$_oldvalue"\"
162 echoerr () { echo "$@" 1>&2 ; }
165 echoerr "$(date +'%F %H:%M:%S'): client load was signaled to terminate"
167 local PGID=$(ps -eo "%c %p %r" | awk "/ $PPID / {print \$3}")
174 local mpirun="$MPIRUN $MPIRUN_OPTIONS"
175 local command="$mpirun $@"
176 local mpilog=$TMP/mpi.log
179 if [ -n "$MPI_USER" -a "$MPI_USER" != root -a -n "$mpirun" ]; then
180 echo "+ chmod 0777 $MOUNT"
182 command="su $MPI_USER sh -c \"$command \""
187 eval $command 2>&1 | tee $mpilog || true
190 if [ $rc -eq 0 ] && grep -q "p4_error:" $mpilog ; then
198 for i in ${1//,/ }; do
199 list="$list $i@$NETTYPE"
204 # FIXME: all setup/cleanup can be done without rpc.sh
207 [ x$1 = x--verbose ] && verbose=true
209 export LST_SESSION=`$LST show_session 2>/dev/null | awk -F " " '{print $5}'`
210 [ "$LST_SESSION" == "" ] && return
219 lst_session_cleanup_all () {
220 local list=$(comma_list $(nodes_list))
221 do_rpc_nodes $list lst_end_session
225 lsmod | grep -q lnet_selftest && \
226 rmmod lnet_selftest > /dev/null 2>&1 || true
230 local list=$(comma_list $(nodes_list))
232 # lst end_session needs to be executed only locally
233 # i.e. on node where lst new_session was called
234 lst_end_session --verbose
235 do_rpc_nodes $list lst_cleanup
239 load_module lnet_selftest
243 local list=$(comma_list $(nodes_list))
244 do_rpc_nodes $list lst_setup
250 # Passed a single argument, strips everything off following
251 # and includes the first period.
252 # client-20.lab.whamcloud.com becomes client-20
254 echo $(sed 's/\..*//' <<< $1)
264 echo "${var}=${!var}"
266 [ -e $MACHINEFILE ] && cat $MACHINEFILE
272 # 5 min * cbench_RUNS
276 # compile dir kernel-1 680MB
277 # required space 680MB * cbench_IDIRS = ~7 Gb
279 cbench_DIR=${cbench_DIR:-""}
280 cbench_IDIRS=${cbench_IDIRS:-4}
281 cbench_RUNS=${cbench_RUNS:-4}
283 print_opts cbench_DIR cbench_IDIRS cbench_RUNS
285 [ x$cbench_DIR = x ] &&
286 { skip_env "compilebench not found" && return; }
288 [ -e $cbench_DIR/compilebench ] || \
289 { skip_env "No compilebench build" && return; }
291 local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
292 if [ $space -le $((680 * 1024 * cbench_IDIRS)) ]; then
293 cbench_IDIRS=$(( space / 680 / 1024))
294 [ $cbench_IDIRS = 0 ] && \
295 skip_env "Need free space atleast 680 Mb, have $space" && return
297 log free space=$space, reducing initial dirs to $cbench_IDIRS
300 # t-f _base needs to be modifyed to set properly tdir
301 # for new "test_foo" functions names
302 # local testdir=$DIR/$tdir
303 local testdir=$DIR/d0.compilebench
308 local cmd="./compilebench -D $testdir -i $cbench_IDIRS \
309 -r $cbench_RUNS --makej"
318 [ $rc = 0 ] || error "compilebench failed: $rc"
324 METABENCH=${METABENCH:-$(which metabench 2> /dev/null || true)}
325 mbench_NFILES=${mbench_NFILES:-30400}
327 mbench_THREADS=${mbench_THREADS:-4}
329 [ x$METABENCH = x ] &&
330 { skip_env "metabench not found" && return; }
333 # Need space estimation here.
335 print_opts METABENCH clients mbench_NFILES mbench_THREADS
337 local testdir=$DIR/d0.metabench
339 # mpi_run uses mpiuser
342 # -C Run the file creation tests.
343 # -S Run the file stat tests.
344 # -c nfile Number of files to be used in each test.
345 # -k Cleanup. Remove the test directories.
346 local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S -k"
349 # find out if we need to use srun by checking $SRUN_PARTITION
350 if [ "$SRUN_PARTITION" ]; then
351 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
352 -n $((num_clients * mbench_THREADS)) \
353 -p $SRUN_PARTITION -- $cmd
355 mpi_run -np $((num_clients * $mbench_THREADS)) \
356 ${MACHINEFILE_OPTION} ${MACHINEFILE} $cmd
360 if [ $rc != 0 ] ; then
361 error "metabench failed! $rc"
368 SIMUL=${SIMUL:=$(which simul 2> /dev/null || true)}
370 simul_THREADS=${simul_THREADS:-2}
371 simul_REP=${simul_REP:-20}
373 if [ "$NFSCLIENT" ]; then
374 skip "skipped for NFSCLIENT mode"
379 { skip_env "simul not found" && return; }
382 # Need space estimation here.
384 print_opts SIMUL clients simul_REP simul_THREADS
386 local testdir=$DIR/d0.simul
388 # mpi_run uses mpiuser
391 # -n # : repeat each test # times
392 # -N # : repeat the entire set of tests # times
394 local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP"
397 # find out if we need to use srun by checking $SRUN_PARTITION
398 if [ "$SRUN_PARTITION" ]; then
399 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
400 -n $((num_clients * simul_THREADS)) -p $SRUN_PARTITION \
403 mpi_run -np $((num_clients * simul_THREADS)) \
404 ${MACHINEFILE_OPTION} ${MACHINEFILE} $cmd
408 if [ $rc != 0 ] ; then
409 error "simul failed! $rc"
416 MDTEST=${MDTEST:=$(which mdtest 2> /dev/null || true)}
418 mdtest_THREADS=${mdtest_THREADS:-2}
419 mdtest_nFiles=${mdtest_nFiles:-"100000"}
420 # We devide the files by number of core
421 mdtest_nFiles=$((mdtest_nFiles/mdtest_THREADS/num_clients))
422 mdtest_iteration=${mdtest_iteration:-1}
424 local type=${1:-"ssf"}
426 if [ "$NFSCLIENT" ]; then
427 skip "skipped for NFSCLIENT mode"
432 { skip_env "mdtest not found" && return; }
435 # Need space estimation here.
437 print_opts MDTEST mdtest_iteration mdtest_THREADS mdtest_nFiles
439 local testdir=$DIR/d0.mdtest
441 # mpi_run uses mpiuser
444 # -i # : repeat each test # times
446 # -n # : number of file/dir to create/stat/remove
447 # -u : each process create/stat/remove individually
449 local cmd="$MDTEST -d $testdir -i $mdtest_iteration -n $mdtest_nFiles"
450 [ $type = "fpp" ] && cmd="$cmd -u"
453 # find out if we need to use srun by checking $SRUN_PARTITION
454 if [ "$SRUN_PARTITION" ]; then
455 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
456 -n $((num_clients * mdtest_THREADS)) \
457 -p $SRUN_PARTITION -- $cmd
459 mpi_run -np $((num_clients * mdtest_THREADS)) \
460 ${MACHINEFILE_OPTION} ${MACHINEFILE} $cmd
464 if [ $rc != 0 ] ; then
465 error "mdtest failed! $rc"
472 cnt_DIR=${cnt_DIR:-""}
473 cnt_NRUN=${cnt_NRUN:-10}
475 print_opts cnt_DIR cnt_NRUN
478 { skip_env "connectathon dir not found" && return; }
480 [ -e $cnt_DIR/runtests ] || \
481 { skip_env "No connectathon runtests found" && return; }
483 local testdir=$DIR/d0.connectathon
490 # cthon options (must be in this order)
492 # -N numpasses - will be passed to the runtests script. This argument
493 # is optional. It specifies the number of times to run
496 # One of these test types
501 # -a all of the above
503 # -f a quick functionality test
507 # Include lock tests unless we're running on nfsv4
508 local fstype=$(df -TP $testdir | awk 'NR==2 {print $2}')
509 echo "$testdir: $fstype"
510 if [[ $fstype != "nfs4" ]]; then
514 for test in $tests; do
515 local cmd="./runtests -N $cnt_NRUN $test -f $testdir"
521 [ $rc = 0 ] || error "connectathon failed: $rc"
529 local type=${1:="ssf"}
531 IOR=${IOR:-$(which IOR 2> /dev/null || true)}
533 ior_THREADS=${ior_THREADS:-2}
534 ior_iteration=${ior_iteration:-1}
535 ior_blockSize=${ior_blockSize:-6} # GB
536 ior_xferSize=${ior_xferSize:-2m}
537 ior_type=${ior_type:-POSIX}
538 ior_DURATION=${ior_DURATION:-30} # minutes
541 { skip_env "IOR not found" && return; }
543 local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
544 local total_threads=$(( num_clients * ior_THREADS ))
545 echo "+ $ior_blockSize * 1024 * 1024 * $total_threads "
546 if [ $((space / 2)) -le \
547 $(( ior_blockSize * 1024 * 1024 * total_threads)) ]; then
548 echo "+ $space * 9/10 / 1024 / 1024 / $num_clients / $ior_THREADS"
549 ior_blockSize=$(( space /2 /1024 /1024 / num_clients / ior_THREADS ))
550 [ $ior_blockSize = 0 ] && \
551 skip_env "Need free space more than $((2 * total_threads))GB: \
552 $((total_threads *1024 *1024*2)), have $space" && return
554 local reduced_size="$num_clients x $ior_THREADS x $ior_blockSize"
555 echo "free space=$space, Need: $reduced_size GB"
556 echo "(blockSize reduced to $ior_blockSize Gb)"
559 print_opts IOR ior_THREADS ior_DURATION MACHINEFILE
561 local testdir=$DIR/d0.ior.$type
563 # mpi_run uses mpiuser
565 if [ "$NFSCLIENT" ]; then
566 setstripe_nfsserver $testdir -c -1 ||
567 { error "setstripe on nfsserver failed" && return 1; }
569 $LFS setstripe $testdir -c -1 ||
570 { error "setstripe failed" && return 2; }
574 # contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)"
576 # -t N transferSize -- size of transfer in bytes (e.g.: 8, 4k, 2m, 1g)"
577 # -w writeFile -- write file"
578 # -r readFile -- read existing file"
579 # -T maxTimeDuration -- max time in minutes to run tests"
580 # -k keepFile -- keep testFile(s) on program exit
582 local cmd="$IOR -a $ior_type -b ${ior_blockSize}g -o $testdir/iorData \
583 -t $ior_xferSize -v -w -r -i $ior_iteration -T $ior_DURATION -k"
584 [ $type = "fpp" ] && cmd="$cmd -F"
587 # find out if we need to use srun by checking $SRUN_PARTITION
588 if [ "$SRUN_PARTITION" ]; then
589 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
590 -n $((num_clients * ior_THREADS)) -p $SRUN_PARTITION \
593 mpi_run -np $((num_clients * $ior_THREADS)) \
594 ${MACHINEFILE_OPTION} ${MACHINEFILE} $cmd
598 if [ $rc != 0 ] ; then
599 error "ior failed! $rc"
606 MIB=${MIB:=$(which mib 2> /dev/null || true)}
608 mib_THREADS=${mib_THREADS:-2}
609 mib_xferSize=${mib_xferSize:-1m}
610 mib_xferLimit=${mib_xferLimit:-5000}
611 mib_timeLimit=${mib_timeLimit:-300}
613 if [ "$NFSCLIENT" ]; then
614 skip "skipped for NFSCLIENT mode"
619 { skip_env "MIB not found" && return; }
621 print_opts MIB mib_THREADS mib_xferSize mib_xferLimit mib_timeLimit \
624 local testdir=$DIR/d0.mib
626 # mpi_run uses mpiuser
628 $LFS setstripe $testdir -c -1 ||
629 { error "setstripe failed" && return 2; }
631 # -I Show intermediate values in output
632 # -H Show headers in output
633 # -L Do not issue new system calls after this many seconds
634 # -s Use system calls of this size
636 # -l Issue no more than this many system calls
637 local cmd="$MIB -t $testdir -s $mib_xferSize -l $mib_xferLimit \
638 -L $mib_timeLimit -HI -p mib.$(date +%Y%m%d%H%M%S)"
641 # find out if we need to use srun by checking $SRUN_PARTITION
642 if [ "$SRUN_PARTITION" ]; then
643 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
644 -n $((num_clients * mib_THREADS)) -p $SRUN_PARTITION \
647 mpi_run -np $((num_clients * mib_THREADS)) \
648 ${MACHINEFILE_OPTION} ${MACHINEFILE} $cmd
652 if [ $rc != 0 ] ; then
653 error "mib failed! $rc"
660 CASC_RW=${CASC_RW:-$(which cascading_rw 2> /dev/null || true)}
662 casc_THREADS=${casc_THREADS:-2}
663 casc_REP=${casc_REP:-300}
665 if [ "$NFSCLIENT" ]; then
666 skip "skipped for NFSCLIENT mode"
671 { skip_env "cascading_rw not found" && return; }
674 # Need space estimation here.
676 print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE
678 local testdir=$DIR/d0.cascading_rw
680 # mpi_run uses mpiuser
684 # -n: repeat test # times
686 local cmd="$CASC_RW -g -d $testdir -n $casc_REP"
689 mpi_run -np $((num_clients * $casc_THREADS)) ${MACHINEFILE_OPTION} \
693 if [ $rc != 0 ] ; then
694 error "cascading_rw failed! $rc"
699 run_write_append_truncate() {
702 write_THREADS=${write_THREADS:-8}
703 write_REP=${write_REP:-10000}
705 if [ "$NFSCLIENT" ]; then
706 skip "skipped for NFSCLIENT mode"
710 # location is lustre/tests dir
711 if ! which write_append_truncate > /dev/null 2>&1 ; then
712 skip_env "write_append_truncate not found"
717 # Need space estimation here.
719 local testdir=$DIR/d0.write_append_truncate
720 local file=$testdir/f0.wat
722 print_opts clients write_REP write_THREADS MACHINEFILE
725 # mpi_run uses mpiuser
728 local cmd="write_append_truncate -n $write_REP $file"
731 mpi_run -np $((num_clients * $write_THREADS)) ${MACHINEFILE_OPTION} \
735 if [ $rc != 0 ] ; then
736 error "write_append_truncate failed! $rc"
742 run_write_disjoint() {
744 WRITE_DISJOINT=${WRITE_DISJOINT:-$(which write_disjoint \
745 2> /dev/null || true)}
747 wdisjoint_THREADS=${wdisjoint_THREADS:-4}
748 wdisjoint_REP=${wdisjoint_REP:-10000}
750 if [ "$NFSCLIENT" ]; then
751 skip "skipped for NFSCLIENT mode"
755 [ x$WRITE_DISJOINT = x ] &&
756 { skip_env "write_disjoint not found" && return; }
759 # Need space estimation here.
761 print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP \
763 local testdir=$DIR/d0.write_disjoint
765 # mpi_run uses mpiuser
768 local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP"
771 mpi_run -np $((num_clients * $wdisjoint_THREADS)) \
772 ${MACHINEFILE_OPTION} ${MACHINEFILE} $cmd
775 if [ $rc != 0 ] ; then
776 error "write_disjoint failed! $rc"
781 run_parallel_grouplock() {
783 PARALLEL_GROUPLOCK=${PARALLEL_GROUPLOCK:-$(which parallel_grouplock \
784 2> /dev/null || true)}
785 parallel_grouplock_MINTASKS=${parallel_grouplock_MINTASKS:-5}
787 if [ "$NFSCLIENT" ]; then
788 skip "skipped for NFSCLIENT mode"
792 [ x$PARALLEL_GROUPLOCK = x ] &&
793 { skip "PARALLEL_GROUPLOCK not found" && return; }
795 print_opts clients parallel_grouplock_MINTASKS MACHINEFILE
797 local testdir=$DIR/d0.parallel_grouplock
799 # mpi_run uses mpiuser
802 do_nodes $clients "lctl set_param llite.*.max_rw_chunk=0" ||
803 error "set_param max_rw_chunk=0 failed "
808 for i in $(seq 12); do
810 local cmd="$PARALLEL_GROUPLOCK -g -v -d $testdir $subtest"
813 mpi_run -np $parallel_grouplock_MINTASKS ${MACHINEFILE_OPTION} \
816 if [ $rc != 0 ] ; then
817 error_noexit "parallel_grouplock subtests $subtest " \
820 echo "parallel_grouplock subtests $subtest PASS"
822 let status=$((status + rc))
823 # clear debug to collect one log per one test
824 do_nodes $(comma_list $(nodes_list)) lctl clear
826 [ $status -eq 0 ] || error "parallel_grouplock status: $status"
830 cleanup_statahead () {
837 for i in $(seq 0 $num_mntpts);do
838 zconf_umount_clients $clients ${mntpt_root}$i ||
839 error_exit "Failed to umount lustre on ${mntpt_root}$i"
845 statahead_NUMMNTPTS=${statahead_NUMMNTPTS:-5}
846 statahead_NUMFILES=${statahead_NUMFILES:-500000}
848 if [[ -n $NFSCLIENT ]]; then
849 skip "Statahead testing is not supported on NFS clients."
854 { skip_env "mdsrate not found" && return; }
856 print_opts MDSRATE clients statahead_NUMMNTPTS statahead_NUMFILES
860 # do not use default "d[0-9]*" dir name
861 # to avoid of rm $statahead_NUMFILES (500k) files in t-f cleanup
863 local testdir=$DIR/$dir
865 # cleanup only if dir exists
866 # cleanup only $statahead_NUMFILES number of files
867 # ignore the other files created by someone else
869 mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
870 $statahead_NUMFILES $testdir 'f%%d' --ignore
873 # mpi_run uses mpiuser
876 local num_files=$statahead_NUMFILES
878 local IFree=$(inodes_available)
879 if [ $IFree -lt $num_files ]; then
885 local cmd1="${MDSRATE} ${MDSRATE_DEBUG} --mknod --dir $testdir"
886 local cmd2="--nfiles $num_files --filefmt 'f%%d'"
887 local cmd="$cmd1 $cmd2"
890 mpi_run -np $((num_clients * 32)) ${MACHINEFILE_OPTION} ${MACHINEFILE} \
894 if [ $rc != 0 ] ; then
895 error "mdsrate failed to create $rc"
899 local num_mntpts=$statahead_NUMMNTPTS
900 local mntpt_root=$TMP/mntpt/lustre
901 local mntopts=${MNTOPTSTATAHEAD:-$MOUNTOPT}
903 echo "Mounting $num_mntpts lustre clients starts on $clients"
904 trap "cleanup_statahead $clients $mntpt_root $num_mntpts" EXIT ERR
905 for i in $(seq 0 $num_mntpts); do
906 zconf_mount_clients $clients ${mntpt_root}$i "$mntopts" ||
907 error_exit "Failed to mount lustre on ${mntpt_root}$i on $clients"
910 do_rpc_nodes $clients cancel_lru_locks mdc
912 do_rpc_nodes $clients do_ls $mntpt_root $num_mntpts $dir
914 mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
915 $num_files $testdir 'f%%d' --ignore
917 # use rm instead of rmdir because of
918 # testdir could contain the files created by someone else,
919 # or by previous run where is num_files prev > num_files current
921 cleanup_statahead $clients $mntpt_root $num_mntpts