2 # -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*-
3 # vim:shiftwidth=4:softtabstop=4:tabstop=4:
5 # Simple function used by run_*.sh scripts
10 if [ -z "${!name}" ]; then
11 echo "$0: $name must be set"
15 [ $failed ] && exit 1 || true
18 # lrepl - Lustre test Read-Eval-Print Loop.
20 # This function implements a REPL for the Lustre test framework. It
21 # doesn't exec an actual shell because the user may want to inspect
22 # variables and use functions from the test framework.
29 This is an interactive read-eval-print loop interactive shell
30 simulation that you can use to debug failing tests. You can
31 enter most bash command lines (see notes below).
33 Use this REPL to inspect variables, set them, call test
34 framework shell functions, etcetera.
36 'exit' or EOF to exit this shell.
38 set \$retcode to 0 to cause the assertion failure that
39 triggered this REPL to be ignored.
42 do_facet ost1 lctl get_param ost.*.ost.threads_*
43 do_rpc_nodes \$OSTNODES unload_modules
46 All but the last line of multi-line statements or blocks
47 must end in a backslash.
49 "Here documents" are not supported.
51 History is not supported, but command-line editing is.
55 # Prompt escapes don't work in read -p, sadly.
56 prompt=":test_${testnum:-UNKNOWN}:$(uname -n):$(basename $PWD)% "
58 # We use read -r to get close to a shell experience
59 while read -e -r -p "$prompt" rawline; do
62 # Don't want to exit-exit, just exit the REPL
64 # We need to handle continuations, and read -r doesn't do
65 # that for us. Yet we need read -r.
67 # We also use case/esac to compare lines read to "*\\"
68 # because [ "$line" = *\\ ] and variants of that don't work.
70 while read -e -r -p '> ' rawline
72 line="$line"$'\n'"$rawline"
74 # We could check for here documents by matching
75 # against *<<*, but who cares.
88 # Finally! Time to eval.
92 echo $'\n\tExiting interactive shell...\n'
96 # lassert - Lustre test framework assert
98 # Arguments: failure code, failure message, expression/statement
100 # lassert evaluates the expression given, and, if false, calls
101 # error() to trigger test failure. If REPL_ON_LASSERT is true then
102 # lassert will call lrepl() to give the user an interactive shell.
103 # If the REPL sets retcode=0 then the assertion failure will be
110 echo "checking $* ($(eval echo \""$*"\"))..."
111 eval "$@" && return 0;
113 if ${REPL_ON_LASSERT:-false}; then
114 echo "Assertion $retcode failed: $* (expanded: $(eval echo \""$*"\"))
119 error "Assertion $retcode failed: $* (expanded: $(eval echo \""$*"\"))
124 # setmodopts- set module options for subsequent calls to load_modules
126 # Usage: setmodopts module_name new_value [var_in_which_to_save_old_value]
127 # setmodopts -a module_name new_value [var_in_which_to_save_old_value]
129 # In the second usage the new value is appended to the old.
133 if [ "$1" = -a ]; then
138 local _var=MODOPTS_$1
143 # Dynamic naming of variables is a pain in bash. In ksh93 we could
144 # write "nameref opts_var=${modname}_MODOPTS" then assign directly
145 # to opts_var. Associative arrays would also help, alternatively.
146 # Alas, we're stuck with eval until all distros move to a more recent
147 # version of bash. Fortunately we don't need to eval unset and export.
149 if [ -z "$_newvalue" ]; then
155 $_append && _newvalue="$_oldvalue $_newvalue"
156 export $_var="$_newvalue"
157 echo setmodopts: ${_var}=${_newvalue}
159 [ -n "$_savevar" ] && eval $_savevar=\""$_oldvalue"\"
162 echoerr () { echo "$@" 1>&2 ; }
165 echoerr "$(date +'%F %H:%M:%S'): client load was signaled to terminate"
167 local PGID=$(ps -eo "%c %p %r" | awk "/ $PPID / {print \$3}")
174 local mpirun="$MPIRUN $MPIRUN_OPTIONS"
175 local command="$mpirun $@"
176 local mpilog=$TMP/mpi.log
179 if [ -n "$MPI_USER" -a "$MPI_USER" != root -a -n "$mpirun" ]; then
180 echo "+ chmod 0777 $MOUNT"
182 command="su $MPI_USER sh -c \"$command \""
187 eval $command 2>&1 | tee $mpilog || true
190 if [ $rc -eq 0 ] && grep -q "p4_error:" $mpilog ; then
198 for i in ${1//,/ }; do
199 list="$list $i@$NETTYPE"
204 # FIXME: all setup/cleanup can be done without rpc.sh
207 [ x$1 = x--verbose ] && verbose=true
209 export LST_SESSION=`$LST show_session 2>/dev/null | awk -F " " '{print $5}'`
210 [ "$LST_SESSION" == "" ] && return
219 lst_session_cleanup_all () {
220 local list=$(comma_list $(nodes_list))
221 do_rpc_nodes $list lst_end_session
225 lsmod | grep -q lnet_selftest && \
226 rmmod lnet_selftest > /dev/null 2>&1 || true
230 local list=$(comma_list $(nodes_list))
232 # lst end_session needs to be executed only locally
233 # i.e. on node where lst new_session was called
234 lst_end_session --verbose
235 do_rpc_nodes $list lst_cleanup
239 load_module lnet_selftest
243 local list=$(comma_list $(nodes_list))
244 do_rpc_nodes $list lst_setup
250 # Passed a single argument, strips everything off following
251 # and includes the first period.
252 # client-20.lab.whamcloud.com becomes client-20
254 echo $(sed 's/\..*//' <<< $1)
264 echo "${var}=${!var}"
266 [ -e $MACHINEFILE ] && cat $MACHINEFILE
272 # compile dir kernel-1 680MB
273 # required space 680MB * cbench_IDIRS = ~1.4 Gb
275 cbench_DIR=${cbench_DIR:-""}
276 cbench_IDIRS=${cbench_IDIRS:-2}
277 cbench_RUNS=${cbench_RUNS:-2}
279 print_opts cbench_DIR cbench_IDIRS cbench_RUNS
281 [ x$cbench_DIR = x ] &&
282 { skip_env "compilebench not found" && return; }
284 [ -e $cbench_DIR/compilebench ] || \
285 { skip_env "No compilebench build" && return; }
287 local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
288 if [ $space -le $((680 * 1024 * cbench_IDIRS)) ]; then
289 cbench_IDIRS=$(( space / 680 / 1024))
290 [ $cbench_IDIRS = 0 ] && \
291 skip_env "Need free space atleast 680 Mb, have $space" && return
293 log free space=$space, reducing initial dirs to $cbench_IDIRS
296 # t-f _base needs to be modifyed to set properly tdir
297 # for new "test_foo" functions names
298 # local testdir=$DIR/$tdir
299 local testdir=$DIR/d0.compilebench
304 local cmd="./compilebench -D $testdir -i $cbench_IDIRS \
305 -r $cbench_RUNS --makej"
314 [ $rc = 0 ] || error "compilebench failed: $rc"
320 METABENCH=${METABENCH:-$(which metabench 2> /dev/null || true)}
321 mbench_NFILES=${mbench_NFILES:-30400}
323 mbench_THREADS=${mbench_THREADS:-4}
325 [ x$METABENCH = x ] &&
326 { skip_env "metabench not found" && return; }
329 # Need space estimation here.
331 print_opts METABENCH clients mbench_NFILES mbench_THREADS
333 local testdir=$DIR/d0.metabench
335 # mpi_run uses mpiuser
338 # -C Run the file creation tests.
339 # -S Run the file stat tests.
340 # -c nfile Number of files to be used in each test.
341 # -k Cleanup. Remove the test directories.
342 local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S -k"
345 # find out if we need to use srun by checking $SRUN_PARTITION
346 if [ "$SRUN_PARTITION" ]; then
347 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
348 -n $((num_clients * mbench_THREADS)) \
349 -p $SRUN_PARTITION -- $cmd
351 mpi_run -np $((num_clients * $mbench_THREADS)) \
352 ${MACHINEFILE_OPTION} ${MACHINEFILE} $cmd
356 if [ $rc != 0 ] ; then
357 error "metabench failed! $rc"
364 SIMUL=${SIMUL:=$(which simul 2> /dev/null || true)}
366 simul_THREADS=${simul_THREADS:-2}
367 simul_REP=${simul_REP:-20}
369 if [ "$NFSCLIENT" ]; then
370 skip "skipped for NFSCLIENT mode"
375 { skip_env "simul not found" && return; }
378 # Need space estimation here.
380 print_opts SIMUL clients simul_REP simul_THREADS
382 local testdir=$DIR/d0.simul
384 # mpi_run uses mpiuser
387 # -n # : repeat each test # times
388 # -N # : repeat the entire set of tests # times
390 local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP"
393 # find out if we need to use srun by checking $SRUN_PARTITION
394 if [ "$SRUN_PARTITION" ]; then
395 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
396 -n $((num_clients * simul_THREADS)) -p $SRUN_PARTITION \
399 mpi_run -np $((num_clients * simul_THREADS)) \
400 ${MACHINEFILE_OPTION} ${MACHINEFILE} $cmd
404 if [ $rc != 0 ] ; then
405 error "simul failed! $rc"
412 MDTEST=${MDTEST:=$(which mdtest 2> /dev/null || true)}
414 mdtest_THREADS=${mdtest_THREADS:-2}
415 mdtest_nFiles=${mdtest_nFiles:-"100000"}
416 # We devide the files by number of core
417 mdtest_nFiles=$((mdtest_nFiles/mdtest_THREADS/num_clients))
418 mdtest_iteration=${mdtest_iteration:-1}
420 local type=${1:-"ssf"}
422 if [ "$NFSCLIENT" ]; then
423 skip "skipped for NFSCLIENT mode"
428 { skip_env "mdtest not found" && return; }
431 # Need space estimation here.
433 print_opts MDTEST mdtest_iteration mdtest_THREADS mdtest_nFiles
435 local testdir=$DIR/d0.mdtest
437 # mpi_run uses mpiuser
440 # -i # : repeat each test # times
442 # -n # : number of file/dir to create/stat/remove
443 # -u : each process create/stat/remove individually
445 local cmd="$MDTEST -d $testdir -i $mdtest_iteration -n $mdtest_nFiles"
446 [ $type = "fpp" ] && cmd="$cmd -u"
449 # find out if we need to use srun by checking $SRUN_PARTITION
450 if [ "$SRUN_PARTITION" ]; then
451 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
452 -n $((num_clients * mdtest_THREADS)) \
453 -p $SRUN_PARTITION -- $cmd
455 mpi_run -np $((num_clients * mdtest_THREADS)) \
456 ${MACHINEFILE_OPTION} ${MACHINEFILE} $cmd
460 if [ $rc != 0 ] ; then
461 error "mdtest failed! $rc"
468 cnt_DIR=${cnt_DIR:-""}
469 cnt_NRUN=${cnt_NRUN:-10}
471 print_opts cnt_DIR cnt_NRUN
474 { skip_env "connectathon dir not found" && return; }
476 [ -e $cnt_DIR/runtests ] || \
477 { skip_env "No connectathon runtests found" && return; }
479 local testdir=$DIR/d0.connectathon
486 # cthon options (must be in this order)
488 # -N numpasses - will be passed to the runtests script. This argument
489 # is optional. It specifies the number of times to run
492 # One of these test types
497 # -a all of the above
499 # -f a quick functionality test
503 # Include lock tests unless we're running on nfsv4
504 local fstype=$(df -TP $testdir | awk 'NR==2 {print $2}')
505 echo "$testdir: $fstype"
506 if [[ $fstype != "nfs4" ]]; then
510 for test in $tests; do
511 local cmd="./runtests -N $cnt_NRUN $test -f $testdir"
517 [ $rc = 0 ] || error "connectathon failed: $rc"
525 local type=${1:="ssf"}
527 IOR=${IOR:-$(which IOR 2> /dev/null || true)}
529 ior_THREADS=${ior_THREADS:-2}
530 ior_iteration=${ior_iteration:-1}
531 ior_blockSize=${ior_blockSize:-6} # GB
532 ior_xferSize=${ior_xferSize:-2m}
533 ior_type=${ior_type:-POSIX}
534 ior_DURATION=${ior_DURATION:-30} # minutes
537 { skip_env "IOR not found" && return; }
539 local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
540 local total_threads=$(( num_clients * ior_THREADS ))
541 echo "+ $ior_blockSize * 1024 * 1024 * $total_threads "
542 if [ $((space / 2)) -le \
543 $(( ior_blockSize * 1024 * 1024 * total_threads)) ]; then
544 echo "+ $space * 9/10 / 1024 / 1024 / $num_clients / $ior_THREADS"
545 ior_blockSize=$(( space /2 /1024 /1024 / num_clients / ior_THREADS ))
546 [ $ior_blockSize = 0 ] && \
547 skip_env "Need free space more than $((2 * total_threads))GB: \
548 $((total_threads *1024 *1024*2)), have $space" && return
550 local reduced_size="$num_clients x $ior_THREADS x $ior_blockSize"
551 echo "free space=$space, Need: $reduced_size GB"
552 echo "(blockSize reduced to $ior_blockSize Gb)"
555 print_opts IOR ior_THREADS ior_DURATION MACHINEFILE
557 local testdir=$DIR/d0.ior.$type
559 # mpi_run uses mpiuser
561 if [ "$NFSCLIENT" ]; then
562 setstripe_nfsserver $testdir -c -1 ||
563 { error "setstripe on nfsserver failed" && return 1; }
565 $LFS setstripe $testdir -c -1 ||
566 { error "setstripe failed" && return 2; }
570 # contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)"
572 # -t N transferSize -- size of transfer in bytes (e.g.: 8, 4k, 2m, 1g)"
573 # -w writeFile -- write file"
574 # -r readFile -- read existing file"
575 # -T maxTimeDuration -- max time in minutes to run tests"
576 # -k keepFile -- keep testFile(s) on program exit
578 local cmd="$IOR -a $ior_type -b ${ior_blockSize}g -o $testdir/iorData \
579 -t $ior_xferSize -v -w -r -i $ior_iteration -T $ior_DURATION -k"
580 [ $type = "fpp" ] && cmd="$cmd -F"
583 # find out if we need to use srun by checking $SRUN_PARTITION
584 if [ "$SRUN_PARTITION" ]; then
585 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
586 -n $((num_clients * ior_THREADS)) -p $SRUN_PARTITION \
589 mpi_run -np $((num_clients * $ior_THREADS)) \
590 ${MACHINEFILE_OPTION} ${MACHINEFILE} $cmd
594 if [ $rc != 0 ] ; then
595 error "ior failed! $rc"
602 MIB=${MIB:=$(which mib 2> /dev/null || true)}
604 mib_THREADS=${mib_THREADS:-2}
605 mib_xferSize=${mib_xferSize:-1m}
606 mib_xferLimit=${mib_xferLimit:-5000}
607 mib_timeLimit=${mib_timeLimit:-300}
609 if [ "$NFSCLIENT" ]; then
610 skip "skipped for NFSCLIENT mode"
615 { skip_env "MIB not found" && return; }
617 print_opts MIB mib_THREADS mib_xferSize mib_xferLimit mib_timeLimit \
620 local testdir=$DIR/d0.mib
622 # mpi_run uses mpiuser
624 $LFS setstripe $testdir -c -1 ||
625 { error "setstripe failed" && return 2; }
627 # -I Show intermediate values in output
628 # -H Show headers in output
629 # -L Do not issue new system calls after this many seconds
630 # -s Use system calls of this size
632 # -l Issue no more than this many system calls
633 local cmd="$MIB -t $testdir -s $mib_xferSize -l $mib_xferLimit \
634 -L $mib_timeLimit -HI -p mib.$(date +%Y%m%d%H%M%S)"
637 # find out if we need to use srun by checking $SRUN_PARTITION
638 if [ "$SRUN_PARTITION" ]; then
639 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
640 -n $((num_clients * mib_THREADS)) -p $SRUN_PARTITION \
643 mpi_run -np $((num_clients * mib_THREADS)) \
644 ${MACHINEFILE_OPTION} ${MACHINEFILE} $cmd
648 if [ $rc != 0 ] ; then
649 error "mib failed! $rc"
656 CASC_RW=${CASC_RW:-$(which cascading_rw 2> /dev/null || true)}
658 casc_THREADS=${casc_THREADS:-2}
659 casc_REP=${casc_REP:-300}
661 if [ "$NFSCLIENT" ]; then
662 skip "skipped for NFSCLIENT mode"
667 { skip_env "cascading_rw not found" && return; }
670 # Need space estimation here.
672 print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE
674 local testdir=$DIR/d0.cascading_rw
676 # mpi_run uses mpiuser
680 # -n: repeat test # times
682 local cmd="$CASC_RW -g -d $testdir -n $casc_REP"
685 mpi_run -np $((num_clients * $casc_THREADS)) ${MACHINEFILE_OPTION} \
689 if [ $rc != 0 ] ; then
690 error "cascading_rw failed! $rc"
695 run_write_append_truncate() {
698 write_THREADS=${write_THREADS:-8}
699 write_REP=${write_REP:-10000}
701 if [ "$NFSCLIENT" ]; then
702 skip "skipped for NFSCLIENT mode"
706 # location is lustre/tests dir
707 if ! which write_append_truncate > /dev/null 2>&1 ; then
708 skip_env "write_append_truncate not found"
713 # Need space estimation here.
715 local testdir=$DIR/d0.write_append_truncate
716 local file=$testdir/f0.wat
718 print_opts clients write_REP write_THREADS MACHINEFILE
721 # mpi_run uses mpiuser
724 local cmd="write_append_truncate -n $write_REP $file"
727 mpi_run -np $((num_clients * $write_THREADS)) ${MACHINEFILE_OPTION} \
731 if [ $rc != 0 ] ; then
732 error "write_append_truncate failed! $rc"
738 run_write_disjoint() {
740 WRITE_DISJOINT=${WRITE_DISJOINT:-$(which write_disjoint \
741 2> /dev/null || true)}
743 wdisjoint_THREADS=${wdisjoint_THREADS:-4}
744 wdisjoint_REP=${wdisjoint_REP:-10000}
746 if [ "$NFSCLIENT" ]; then
747 skip "skipped for NFSCLIENT mode"
751 [ x$WRITE_DISJOINT = x ] &&
752 { skip_env "write_disjoint not found" && return; }
755 # Need space estimation here.
757 print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP \
759 local testdir=$DIR/d0.write_disjoint
761 # mpi_run uses mpiuser
764 local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP"
767 mpi_run -np $((num_clients * $wdisjoint_THREADS)) \
768 ${MACHINEFILE_OPTION} ${MACHINEFILE} $cmd
771 if [ $rc != 0 ] ; then
772 error "write_disjoint failed! $rc"
777 run_parallel_grouplock() {
779 PARALLEL_GROUPLOCK=${PARALLEL_GROUPLOCK:-$(which parallel_grouplock \
780 2> /dev/null || true)}
781 parallel_grouplock_MINTASKS=${parallel_grouplock_MINTASKS:-5}
783 if [ "$NFSCLIENT" ]; then
784 skip "skipped for NFSCLIENT mode"
788 [ x$PARALLEL_GROUPLOCK = x ] &&
789 { skip "PARALLEL_GROUPLOCK not found" && return; }
791 print_opts clients parallel_grouplock_MINTASKS MACHINEFILE
793 local testdir=$DIR/d0.parallel_grouplock
795 # mpi_run uses mpiuser
798 do_nodes $clients "lctl set_param llite.*.max_rw_chunk=0" ||
799 error "set_param max_rw_chunk=0 failed "
804 for i in $(seq 12); do
806 local cmd="$PARALLEL_GROUPLOCK -g -v -d $testdir $subtest"
809 mpi_run -np $parallel_grouplock_MINTASKS ${MACHINEFILE_OPTION} \
812 if [ $rc != 0 ] ; then
813 error_noexit "parallel_grouplock subtests $subtest " \
816 echo "parallel_grouplock subtests $subtest PASS"
818 let status=$((status + rc))
819 # clear debug to collect one log per one test
820 do_nodes $(comma_list $(nodes_list)) lctl clear
822 [ $status -eq 0 ] || error "parallel_grouplock status: $status"
826 cleanup_statahead () {
833 for i in $(seq 0 $num_mntpts);do
834 zconf_umount_clients $clients ${mntpt_root}$i ||
835 error_exit "Failed to umount lustre on ${mntpt_root}$i"
841 statahead_NUMMNTPTS=${statahead_NUMMNTPTS:-5}
842 statahead_NUMFILES=${statahead_NUMFILES:-500000}
844 if [[ -n $NFSCLIENT ]]; then
845 skip "Statahead testing is not supported on NFS clients."
850 { skip_env "mdsrate not found" && return; }
852 print_opts MDSRATE clients statahead_NUMMNTPTS statahead_NUMFILES
856 # do not use default "d[0-9]*" dir name
857 # to avoid of rm $statahead_NUMFILES (500k) files in t-f cleanup
859 local testdir=$DIR/$dir
861 # cleanup only if dir exists
862 # cleanup only $statahead_NUMFILES number of files
863 # ignore the other files created by someone else
865 mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
866 $statahead_NUMFILES $testdir 'f%%d' --ignore
869 # mpi_run uses mpiuser
872 local num_files=$statahead_NUMFILES
874 local IFree=$(inodes_available)
875 if [ $IFree -lt $num_files ]; then
881 local cmd1="${MDSRATE} ${MDSRATE_DEBUG} --mknod --dir $testdir"
882 local cmd2="--nfiles $num_files --filefmt 'f%%d'"
883 local cmd="$cmd1 $cmd2"
886 mpi_run -np $((num_clients * 32)) ${MACHINEFILE_OPTION} ${MACHINEFILE} \
890 if [ $rc != 0 ] ; then
891 error "mdsrate failed to create $rc"
895 local num_mntpts=$statahead_NUMMNTPTS
896 local mntpt_root=$TMP/mntpt/lustre
897 local mntopts=${MNTOPTSTATAHEAD:-$MOUNTOPT}
899 echo "Mounting $num_mntpts lustre clients starts on $clients"
900 trap "cleanup_statahead $clients $mntpt_root $num_mntpts" EXIT ERR
901 for i in $(seq 0 $num_mntpts); do
902 zconf_mount_clients $clients ${mntpt_root}$i "$mntopts" ||
903 error_exit "Failed to mount lustre on ${mntpt_root}$i on $clients"
906 do_rpc_nodes $clients cancel_lru_locks mdc
908 do_rpc_nodes $clients do_ls $mntpt_root $num_mntpts $dir
910 mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
911 $num_files $testdir 'f%%d' --ignore
913 # use rm instead of rmdir because of
914 # testdir could contain the files created by someone else,
915 # or by previous run where is num_files prev > num_files current
917 cleanup_statahead $clients $mntpt_root $num_mntpts