2 # -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*-
3 # vim:shiftwidth=4:softtabstop=4:tabstop=4:
5 # Simple function used by run_*.sh scripts
10 if [ -z "${!name}" ]; then
11 echo "$0: $name must be set"
15 [ $failed ] && exit 1 || true
18 # lrepl - Lustre test Read-Eval-Print Loop.
20 # This function implements a REPL for the Lustre test framework. It
21 # doesn't exec an actual shell because the user may want to inspect
22 # variables and use functions from the test framework.
29 This is an interactive read-eval-print loop interactive shell
30 simulation that you can use to debug failing tests. You can
31 enter most bash command lines (see notes below).
33 Use this REPL to inspect variables, set them, call test
34 framework shell functions, etcetera.
36 'exit' or EOF to exit this shell.
38 set \$retcode to 0 to cause the assertion failure that
39 triggered this REPL to be ignored.
42 do_facet ost1 lctl get_param ost.*.ost.threads_*
43 do_rpc_nodes \$OSTNODES unload_modules
46 All but the last line of multi-line statements or blocks
47 must end in a backslash.
49 "Here documents" are not supported.
51 History is not supported, but command-line editing is.
55 # Prompt escapes don't work in read -p, sadly.
56 prompt=":test_${testnum:-UNKNOWN}:$(uname -n):$(basename $PWD)% "
58 # We use read -r to get close to a shell experience
59 while read -e -r -p "$prompt" rawline; do
62 # Don't want to exit-exit, just exit the REPL
64 # We need to handle continuations, and read -r doesn't do
65 # that for us. Yet we need read -r.
67 # We also use case/esac to compare lines read to "*\\"
68 # because [ "$line" = *\\ ] and variants of that don't work.
70 while read -e -r -p '> ' rawline
72 line="$line"$'\n'"$rawline"
74 # We could check for here documents by matching
75 # against *<<*, but who cares.
88 # Finally! Time to eval.
92 echo $'\n\tExiting interactive shell...\n'
96 # lassert - Lustre test framework assert
98 # Arguments: failure code, failure message, expression/statement
100 # lassert evaluates the expression given, and, if false, calls
101 # error() to trigger test failure. If REPL_ON_LASSERT is true then
102 # lassert will call lrepl() to give the user an interactive shell.
103 # If the REPL sets retcode=0 then the assertion failure will be
110 echo "checking $* ($(eval echo \""$*"\"))..."
111 eval "$@" && return 0;
113 if ${REPL_ON_LASSERT:-false}; then
114 echo "Assertion $retcode failed: $* (expanded: $(eval echo \""$*"\"))
119 error "Assertion $retcode failed: $* (expanded: $(eval echo \""$*"\"))
124 # setmodopts- set module options for subsequent calls to load_modules
126 # Usage: setmodopts module_name new_value [var_in_which_to_save_old_value]
127 # setmodopts -a module_name new_value [var_in_which_to_save_old_value]
129 # In the second usage the new value is appended to the old.
133 if [ "$1" = -a ]; then
138 local _var=MODOPTS_$1
143 # Dynamic naming of variables is a pain in bash. In ksh93 we could
144 # write "nameref opts_var=${modname}_MODOPTS" then assign directly
145 # to opts_var. Associative arrays would also help, alternatively.
146 # Alas, we're stuck with eval until all distros move to a more recent
147 # version of bash. Fortunately we don't need to eval unset and export.
149 if [ -z "$_newvalue" ]; then
155 $_append && _newvalue="$_oldvalue $_newvalue"
156 export $_var="$_newvalue"
157 echo setmodopts: ${_var}=${_newvalue}
159 [ -n "$_savevar" ] && eval $_savevar=\""$_oldvalue"\"
162 echoerr () { echo "$@" 1>&2 ; }
165 echoerr "$(date +'%F %H:%M:%S'): client load was signaled to terminate"
167 local PGID=$(ps -eo "%c %p %r" | awk "/ $PPID / {print \$3}")
174 local mpirun="$MPIRUN $MPIRUN_OPTIONS"
175 local command="$mpirun $@"
176 local mpilog=$TMP/mpi.log
179 if [ -n "$MPI_USER" -a "$MPI_USER" != root -a -n "$mpirun" ]; then
180 echo "+ chmod 0777 $MOUNT"
182 command="su $MPI_USER sh -c \"$command \""
187 eval $command 2>&1 | tee $mpilog || true
190 if [ $rc -eq 0 ] && grep -q "p4_error:" $mpilog ; then
198 for i in ${1//,/ }; do
199 list="$list $i@$NETTYPE"
204 # FIXME: all setup/cleanup can be done without rpc.sh
207 [ x$1 = x--verbose ] && verbose=true
209 export LST_SESSION=`$LST show_session 2>/dev/null | awk -F " " '{print $5}'`
210 [ "$LST_SESSION" == "" ] && return
219 lst_session_cleanup_all () {
220 local list=$(comma_list $(nodes_list))
221 do_rpc_nodes $list lst_end_session
225 lsmod | grep -q lnet_selftest && \
226 rmmod lnet_selftest > /dev/null 2>&1 || true
230 local list=$(comma_list $(nodes_list))
232 # lst end_session needs to be executed only locally
233 # i.e. on node where lst new_session was called
234 lst_end_session --verbose
235 do_rpc_nodes $list lst_cleanup
239 load_module lnet_selftest
243 local list=$(comma_list $(nodes_list))
244 do_rpc_nodes $list lst_setup
250 # Passed a single argument, strips everything off following
251 # and includes the first period.
252 # client-20.lab.whamcloud.com becomes client-20
254 echo $(sed 's/\..*//' <<< $1)
260 # Find remote nodename, stripped of any domain, etc.
261 # 'hostname -s' is easy, but not implemented on all systems
263 local rname=$(do_node $1 "uname -n" || echo -1)
264 if [[ "$rname" = "-1" ]]; then
267 echo $(short_hostname $rname)
277 echo "${var}=${!var}"
279 [ -e $MACHINEFILE ] && cat $MACHINEFILE
285 # compile dir kernel-1 680MB
286 # required space 680MB * cbench_IDIRS = ~1.4 Gb
288 cbench_DIR=${cbench_DIR:-""}
289 cbench_IDIRS=${cbench_IDIRS:-2}
290 cbench_RUNS=${cbench_RUNS:-2}
292 print_opts cbench_DIR cbench_IDIRS cbench_RUNS
294 [ x$cbench_DIR = x ] &&
295 { skip_env "compilebench not found" && return; }
297 [ -e $cbench_DIR/compilebench ] || \
298 { skip_env "No compilebench build" && return; }
300 local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
301 if [ $space -le $((680 * 1024 * cbench_IDIRS)) ]; then
302 cbench_IDIRS=$(( space / 680 / 1024))
303 [ $cbench_IDIRS = 0 ] && \
304 skip_env "Need free space atleast 680 Mb, have $space" && return
306 log free space=$space, reducing initial dirs to $cbench_IDIRS
309 # t-f _base needs to be modifyed to set properly tdir
310 # for new "test_foo" functions names
311 # local testdir=$DIR/$tdir
312 local testdir=$DIR/d0.compilebench
317 local cmd="./compilebench -D $testdir -i $cbench_IDIRS \
318 -r $cbench_RUNS --makej"
327 [ $rc = 0 ] || error "compilebench failed: $rc"
333 METABENCH=${METABENCH:-$(which metabench 2> /dev/null || true)}
334 mbench_NFILES=${mbench_NFILES:-30400}
336 mbench_THREADS=${mbench_THREADS:-4}
338 [ x$METABENCH = x ] &&
339 { skip_env "metabench not found" && return; }
342 # Need space estimation here.
344 print_opts METABENCH clients mbench_NFILES mbench_THREADS
346 local testdir=$DIR/d0.metabench
348 # mpi_run uses mpiuser
351 # -C Run the file creation tests.
352 # -S Run the file stat tests.
353 # -c nfile Number of files to be used in each test.
354 # -k Cleanup. Remove the test directories.
355 local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S -k"
358 # find out if we need to use srun by checking $SRUN_PARTITION
359 if [ "$SRUN_PARTITION" ]; then
360 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
361 -n $((num_clients * mbench_THREADS)) \
362 -p $SRUN_PARTITION -- $cmd
364 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
365 -np $((num_clients * $mbench_THREADS)) $cmd
369 if [ $rc != 0 ] ; then
370 error "metabench failed! $rc"
377 SIMUL=${SIMUL:=$(which simul 2> /dev/null || true)}
379 simul_THREADS=${simul_THREADS:-2}
380 simul_REP=${simul_REP:-20}
382 if [ "$NFSCLIENT" ]; then
383 skip "skipped for NFSCLIENT mode"
388 { skip_env "simul not found" && return; }
391 # Need space estimation here.
393 print_opts SIMUL clients simul_REP simul_THREADS
395 local testdir=$DIR/d0.simul
397 # mpi_run uses mpiuser
400 # -n # : repeat each test # times
401 # -N # : repeat the entire set of tests # times
403 local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP"
406 # find out if we need to use srun by checking $SRUN_PARTITION
407 if [ "$SRUN_PARTITION" ]; then
408 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
409 -n $((num_clients * simul_THREADS)) -p $SRUN_PARTITION \
412 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
413 -np $((num_clients * simul_THREADS)) $cmd
417 if [ $rc != 0 ] ; then
418 error "simul failed! $rc"
425 MDTEST=${MDTEST:=$(which mdtest 2> /dev/null || true)}
427 mdtest_THREADS=${mdtest_THREADS:-2}
428 mdtest_nFiles=${mdtest_nFiles:-"100000"}
429 # We devide the files by number of core
430 mdtest_nFiles=$((mdtest_nFiles/mdtest_THREADS/num_clients))
431 mdtest_iteration=${mdtest_iteration:-1}
433 local type=${1:-"ssf"}
435 if [ "$NFSCLIENT" ]; then
436 skip "skipped for NFSCLIENT mode"
441 { skip_env "mdtest not found" && return; }
444 # Need space estimation here.
446 print_opts MDTEST mdtest_iteration mdtest_THREADS mdtest_nFiles
448 local testdir=$DIR/d0.mdtest
450 # mpi_run uses mpiuser
453 # -i # : repeat each test # times
455 # -n # : number of file/dir to create/stat/remove
456 # -u : each process create/stat/remove individually
458 local cmd="$MDTEST -d $testdir -i $mdtest_iteration -n $mdtest_nFiles"
459 [ $type = "fpp" ] && cmd="$cmd -u"
462 # find out if we need to use srun by checking $SRUN_PARTITION
463 if [ "$SRUN_PARTITION" ]; then
464 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
465 -n $((num_clients * mdtest_THREADS)) \
466 -p $SRUN_PARTITION -- $cmd
468 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
469 -np $((num_clients * mdtest_THREADS)) $cmd
473 if [ $rc != 0 ] ; then
474 error "mdtest failed! $rc"
481 cnt_DIR=${cnt_DIR:-""}
482 cnt_NRUN=${cnt_NRUN:-10}
484 print_opts cnt_DIR cnt_NRUN
487 { skip_env "connectathon dir not found" && return; }
489 [ -e $cnt_DIR/runtests ] || \
490 { skip_env "No connectathon runtests found" && return; }
492 local testdir=$DIR/d0.connectathon
499 # cthon options (must be in this order)
501 # -N numpasses - will be passed to the runtests script. This argument
502 # is optional. It specifies the number of times to run
505 # One of these test types
510 # -a all of the above
512 # -f a quick functionality test
516 # Include lock tests unless we're running on nfsv4
517 local fstype=$(df -TP $testdir | awk 'NR==2 {print $2}')
518 echo "$testdir: $fstype"
519 if [[ $fstype != "nfs4" ]]; then
523 for test in $tests; do
524 local cmd="./runtests -N $cnt_NRUN $test -f $testdir"
530 [ $rc = 0 ] || error "connectathon failed: $rc"
538 local type=${1:="ssf"}
540 IOR=${IOR:-$(which IOR 2> /dev/null || true)}
542 ior_THREADS=${ior_THREADS:-2}
543 ior_iteration=${ior_iteration:-1}
544 ior_blockSize=${ior_blockSize:-6} # GB
545 ior_xferSize=${ior_xferSize:-2m}
546 ior_type=${ior_type:-POSIX}
547 ior_DURATION=${ior_DURATION:-30} # minutes
550 { skip_env "IOR not found" && return; }
552 local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
553 local total_threads=$(( num_clients * ior_THREADS ))
554 echo "+ $ior_blockSize * 1024 * 1024 * $total_threads "
555 if [ $((space / 2)) -le \
556 $(( ior_blockSize * 1024 * 1024 * total_threads)) ]; then
557 echo "+ $space * 9/10 / 1024 / 1024 / $num_clients / $ior_THREADS"
558 ior_blockSize=$(( space /2 /1024 /1024 / num_clients / ior_THREADS ))
559 [ $ior_blockSize = 0 ] && \
560 skip_env "Need free space more than $((2 * total_threads))GB: \
561 $((total_threads *1024 *1024*2)), have $space" && return
563 local reduced_size="$num_clients x $ior_THREADS x $ior_blockSize"
564 echo "free space=$space, Need: $reduced_size GB"
565 echo "(blockSize reduced to $ior_blockSize Gb)"
568 print_opts IOR ior_THREADS ior_DURATION MACHINEFILE
570 local testdir=$DIR/d0.ior.$type
572 # mpi_run uses mpiuser
574 if [ "$NFSCLIENT" ]; then
575 setstripe_nfsserver $testdir -c -1 ||
576 { error "setstripe on nfsserver failed" && return 1; }
578 $LFS setstripe $testdir -c -1 ||
579 { error "setstripe failed" && return 2; }
583 # contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)"
585 # -t N transferSize -- size of transfer in bytes (e.g.: 8, 4k, 2m, 1g)"
586 # -w writeFile -- write file"
587 # -r readFile -- read existing file"
588 # -T maxTimeDuration -- max time in minutes to run tests"
589 # -k keepFile -- keep testFile(s) on program exit
591 local cmd="$IOR -a $ior_type -b ${ior_blockSize}g -o $testdir/iorData \
592 -t $ior_xferSize -v -w -r -i $ior_iteration -T $ior_DURATION -k"
593 [ $type = "fpp" ] && cmd="$cmd -F"
596 # find out if we need to use srun by checking $SRUN_PARTITION
597 if [ "$SRUN_PARTITION" ]; then
598 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
599 -n $((num_clients * ior_THREADS)) -p $SRUN_PARTITION \
602 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
603 -np $((num_clients * $ior_THREADS)) $cmd
607 if [ $rc != 0 ] ; then
608 error "ior failed! $rc"
615 MIB=${MIB:=$(which mib 2> /dev/null || true)}
617 mib_THREADS=${mib_THREADS:-2}
618 mib_xferSize=${mib_xferSize:-1m}
619 mib_xferLimit=${mib_xferLimit:-5000}
620 mib_timeLimit=${mib_timeLimit:-300}
622 if [ "$NFSCLIENT" ]; then
623 skip "skipped for NFSCLIENT mode"
628 { skip_env "MIB not found" && return; }
630 print_opts MIB mib_THREADS mib_xferSize mib_xferLimit mib_timeLimit \
633 local testdir=$DIR/d0.mib
635 # mpi_run uses mpiuser
637 $LFS setstripe $testdir -c -1 ||
638 { error "setstripe failed" && return 2; }
640 # -I Show intermediate values in output
641 # -H Show headers in output
642 # -L Do not issue new system calls after this many seconds
643 # -s Use system calls of this size
645 # -l Issue no more than this many system calls
646 local cmd="$MIB -t $testdir -s $mib_xferSize -l $mib_xferLimit \
647 -L $mib_timeLimit -HI -p mib.$(date +%Y%m%d%H%M%S)"
650 # find out if we need to use srun by checking $SRUN_PARTITION
651 if [ "$SRUN_PARTITION" ]; then
652 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
653 -n $((num_clients * mib_THREADS)) -p $SRUN_PARTITION \
656 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
657 -np $((num_clients * mib_THREADS)) $cmd
661 if [ $rc != 0 ] ; then
662 error "mib failed! $rc"
669 CASC_RW=${CASC_RW:-$(which cascading_rw 2> /dev/null || true)}
671 casc_THREADS=${casc_THREADS:-2}
672 casc_REP=${casc_REP:-300}
674 if [ "$NFSCLIENT" ]; then
675 skip "skipped for NFSCLIENT mode"
680 { skip_env "cascading_rw not found" && return; }
683 # Need space estimation here.
685 print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE
687 local testdir=$DIR/d0.cascading_rw
689 # mpi_run uses mpiuser
693 # -n: repeat test # times
695 local cmd="$CASC_RW -g -d $testdir -n $casc_REP"
698 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
699 -np $((num_clients * $casc_THREADS)) $cmd
702 if [ $rc != 0 ] ; then
703 error "cascading_rw failed! $rc"
708 run_write_append_truncate() {
711 write_THREADS=${write_THREADS:-8}
712 write_REP=${write_REP:-10000}
714 if [ "$NFSCLIENT" ]; then
715 skip "skipped for NFSCLIENT mode"
719 # location is lustre/tests dir
720 if ! which write_append_truncate > /dev/null 2>&1 ; then
721 skip_env "write_append_truncate not found"
726 # Need space estimation here.
728 local testdir=$DIR/d0.write_append_truncate
729 local file=$testdir/f0.wat
731 print_opts clients write_REP write_THREADS MACHINEFILE
734 # mpi_run uses mpiuser
737 local cmd="write_append_truncate -n $write_REP $file"
740 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
741 -np $((num_clients * $write_THREADS)) $cmd
744 if [ $rc != 0 ] ; then
745 error "write_append_truncate failed! $rc"
751 run_write_disjoint() {
753 WRITE_DISJOINT=${WRITE_DISJOINT:-$(which write_disjoint \
754 2> /dev/null || true)}
756 wdisjoint_THREADS=${wdisjoint_THREADS:-4}
757 wdisjoint_REP=${wdisjoint_REP:-10000}
759 if [ "$NFSCLIENT" ]; then
760 skip "skipped for NFSCLIENT mode"
764 [ x$WRITE_DISJOINT = x ] &&
765 { skip_env "write_disjoint not found" && return; }
768 # Need space estimation here.
770 print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP \
772 local testdir=$DIR/d0.write_disjoint
774 # mpi_run uses mpiuser
777 local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP"
780 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
781 -np $((num_clients * $wdisjoint_THREADS)) $cmd
784 if [ $rc != 0 ] ; then
785 error "write_disjoint failed! $rc"
790 run_parallel_grouplock() {
792 PARALLEL_GROUPLOCK=${PARALLEL_GROUPLOCK:-$(which parallel_grouplock \
793 2> /dev/null || true)}
794 parallel_grouplock_MINTASKS=${parallel_grouplock_MINTASKS:-5}
796 if [ "$NFSCLIENT" ]; then
797 skip "skipped for NFSCLIENT mode"
801 [ x$PARALLEL_GROUPLOCK = x ] &&
802 { skip "PARALLEL_GROUPLOCK not found" && return; }
804 print_opts clients parallel_grouplock_MINTASKS MACHINEFILE
806 local testdir=$DIR/d0.parallel_grouplock
808 # mpi_run uses mpiuser
811 do_nodes $clients "lctl set_param llite.*.max_rw_chunk=0" ||
812 error "set_param max_rw_chunk=0 failed "
817 for i in $(seq 12); do
819 local cmd="$PARALLEL_GROUPLOCK -g -v -d $testdir $subtest"
822 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
823 -np $parallel_grouplock_MINTASKS $cmd
825 if [ $rc != 0 ] ; then
826 error_noexit "parallel_grouplock subtests $subtest " \
829 echo "parallel_grouplock subtests $subtest PASS"
831 let status=$((status + rc))
832 # clear debug to collect one log per one test
833 do_nodes $(comma_list $(nodes_list)) lctl clear
835 [ $status -eq 0 ] || error "parallel_grouplock status: $status"
839 cleanup_statahead () {
846 for i in $(seq 0 $num_mntpts);do
847 zconf_umount_clients $clients ${mntpt_root}$i ||
848 error_exit "Failed to umount lustre on ${mntpt_root}$i"
854 statahead_NUMMNTPTS=${statahead_NUMMNTPTS:-5}
855 statahead_NUMFILES=${statahead_NUMFILES:-500000}
857 if [[ -n $NFSCLIENT ]]; then
858 skip "Statahead testing is not supported on NFS clients."
863 { skip_env "mdsrate not found" && return; }
865 print_opts MDSRATE clients statahead_NUMMNTPTS statahead_NUMFILES
869 # do not use default "d[0-9]*" dir name
870 # to avoid of rm $statahead_NUMFILES (500k) files in t-f cleanup
872 local testdir=$DIR/$dir
874 # cleanup only if dir exists
875 # cleanup only $statahead_NUMFILES number of files
876 # ignore the other files created by someone else
878 mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
879 $statahead_NUMFILES $testdir 'f%%d' --ignore
882 # mpi_run uses mpiuser
885 local num_files=$statahead_NUMFILES
887 local IFree=$(inodes_available)
888 if [ $IFree -lt $num_files ]; then
894 local cmd1="${MDSRATE} ${MDSRATE_DEBUG} --mknod --dir $testdir"
895 local cmd2="--nfiles $num_files --filefmt 'f%%d'"
896 local cmd="$cmd1 $cmd2"
899 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
900 -np $((num_clients * 32)) $cmd
903 if [ $rc != 0 ] ; then
904 error "mdsrate failed to create $rc"
908 local num_mntpts=$statahead_NUMMNTPTS
909 local mntpt_root=$TMP/mntpt/lustre
910 local mntopts=${MNTOPTSTATAHEAD:-$MOUNTOPT}
912 echo "Mounting $num_mntpts lustre clients starts on $clients"
913 trap "cleanup_statahead $clients $mntpt_root $num_mntpts" EXIT ERR
914 for i in $(seq 0 $num_mntpts); do
915 zconf_mount_clients $clients ${mntpt_root}$i "$mntopts" ||
916 error_exit "Failed to mount lustre on ${mntpt_root}$i on $clients"
919 do_rpc_nodes $clients cancel_lru_locks mdc
921 do_rpc_nodes $clients do_ls $mntpt_root $num_mntpts $dir
923 mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
924 $num_files $testdir 'f%%d' --ignore
926 # use rm instead of rmdir because of
927 # testdir could contain the files created by someone else,
928 # or by previous run where is num_files prev > num_files current
930 cleanup_statahead $clients $mntpt_root $num_mntpts