3 # Simple function used by run_*.sh scripts
8 if [ -z "${!name}" ]; then
9 echo "$0: $name must be set"
13 [ $failed ] && exit 1 || true
16 echoerr () { echo "$@" 1>&2 ; }
19 echoerr "$(date +'%F %H:%M:%S'): client load was signaled to terminate"
21 local PGID=$(ps -eo "%c %p %r" | awk "/ $PPID / {print \$3}")
28 local mpirun="$MPIRUN $MPIRUN_OPTIONS"
29 local command="$mpirun $@"
30 local mpilog=$TMP/mpi.log
34 if [ -n "$MPI_USER" -a "$MPI_USER" != root -a -n "$mpirun" ]; then
35 echo "+ chmod 0777 $MOUNT"
37 command="su $MPI_USER sh -c \"$command \""
42 eval $command 2>&1 | tee $mpilog || true
45 if [ $rc -eq 0 ] && grep -q "p4_error:" $mpilog ; then
54 for i in ${1//,/ }; do
55 list="$list $i@$NETTYPE"
60 # FIXME: all setup/cleanup can be done without rpc.sh
63 [ x$1 = x--verbose ] && verbose=true
65 export LST_SESSION=`$LST show_session 2>/dev/null | awk -F " " '{print $5}'`
66 [ "$LST_SESSION" == "" ] && return
75 lst_session_cleanup_all () {
76 local list=$(comma_list $(nodes_list))
77 do_rpc_nodes $list lst_end_session
81 lsmod | grep -q lnet_selftest &&
82 rmmod lnet_selftest > /dev/null 2>&1 || true
86 local list=$(comma_list $(nodes_list))
88 # lst end_session needs to be executed only locally
89 # i.e. on node where lst new_session was called
90 lst_end_session --verbose
91 do_rpc_nodes $list lst_cleanup
95 load_module lnet_selftest
99 local list=$(comma_list $(nodes_list))
100 do_rpc_nodes $list lst_setup
110 echo "${var}=${!var}"
112 [ -e $MACHINEFILE ] && cat $MACHINEFILE
118 # 5 min * cbench_RUNS
122 # compile dir kernel-1 680MB
123 # required space 680MB * cbench_IDIRS = ~7 Gb
125 cbench_DIR=${cbench_DIR:-""}
126 cbench_IDIRS=${cbench_IDIRS:-4}
127 cbench_RUNS=${cbench_RUNS:-4}
129 print_opts cbench_DIR cbench_IDIRS cbench_RUNS
131 [ x$cbench_DIR = x ] &&
132 { skip_env "compilebench not found" && return; }
134 [ -e $cbench_DIR/compilebench ] || \
135 { skip_env "No compilebench build" && return; }
137 local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
138 if [ $space -le $((680 * 1024 * cbench_IDIRS)) ]; then
139 cbench_IDIRS=$(( space / 680 / 1024))
140 [ $cbench_IDIRS = 0 ] && \
141 skip_env "Need free space atleast 680 Mb, have $space" && return
143 log free space=$space, reducing initial dirs to $cbench_IDIRS
146 # t-f _base needs to be modifyed to set properly tdir
147 # for new "test_foo" functions names
148 # local testdir=$DIR/$tdir
149 local testdir=$DIR/d0.compilebench
154 local cmd="./compilebench -D $testdir -i $cbench_IDIRS \
155 -r $cbench_RUNS --makej"
164 [ $rc = 0 ] || error "compilebench failed: $rc"
170 METABENCH=${METABENCH:-$(which metabench 2> /dev/null || true)}
171 mbench_NFILES=${mbench_NFILES:-30400}
173 mbench_THREADS=${mbench_THREADS:-4}
175 [ x$METABENCH = x ] &&
176 { skip_env "metabench not found" && return; }
179 # Need space estimation here.
181 print_opts METABENCH clients mbench_NFILES mbench_THREADS
183 local testdir=$DIR/d0.metabench
185 # mpi_run uses mpiuser
188 # -C Run the file creation tests.
189 # -S Run the file stat tests.
190 # -c nfile Number of files to be used in each test.
191 # -k Cleanup. Remove the test directories.
192 local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S -k"
195 # find out if we need to use srun by checking $SRUN_PARTITION
196 if [ "$SRUN_PARTITION" ]; then
197 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
198 -n $((num_clients * mbench_THREADS)) -p $SRUN_PARTITION -- $cmd
200 mpi_run -np $((num_clients * $mbench_THREADS)) \
201 -machinefile ${MACHINEFILE} $cmd
205 if [ $rc != 0 ] ; then
206 error "metabench failed! $rc"
213 SIMUL=${SIMUL:=$(which simul 2> /dev/null || true)}
215 simul_THREADS=${simul_THREADS:-2}
216 simul_REP=${simul_REP:-20}
218 if [ "$NFSCLIENT" ]; then
219 skip "skipped for NFSCLIENT mode"
224 { skip_env "simul not found" && return; }
227 # Need space estimation here.
229 print_opts SIMUL clients simul_REP simul_THREADS
231 local testdir=$DIR/d0.simul
233 # mpi_run uses mpiuser
236 # -n # : repeat each test # times
237 # -N # : repeat the entire set of tests # times
239 local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP"
242 # find out if we need to use srun by checking $SRUN_PARTITION
243 if [ "$SRUN_PARTITION" ]; then
244 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
245 -n $((num_clients * simul_THREADS)) -p $SRUN_PARTITION -- $cmd
247 mpi_run -np $((num_clients * simul_THREADS)) \
248 -machinefile ${MACHINEFILE} $cmd
252 if [ $rc != 0 ] ; then
253 error "simul failed! $rc"
260 MDTEST=${MDTEST:=$(which mdtest 2> /dev/null || true)}
262 mdtest_THREADS=${mdtest_THREADS:-2}
263 mdtest_nFiles=${mdtest_nFiles:-"100000"}
264 # We devide the files by number of core
265 mdtest_nFiles=$((mdtest_nFiles/mdtest_THREADS/num_clients))
266 mdtest_iteration=${mdtest_iteration:-1}
268 local type=${1:-"ssf"}
270 if [ "$NFSCLIENT" ]; then
271 skip "skipped for NFSCLIENT mode"
276 { skip_env "mdtest not found" && return; }
279 # Need space estimation here.
281 print_opts MDTEST mdtest_iteration mdtest_THREADS mdtest_nFiles
283 local testdir=$DIR/d0.mdtest
285 # mpi_run uses mpiuser
288 # -i # : repeat each test # times
290 # -n # : number of file/dir to create/stat/remove
291 # -u : each process create/stat/remove individually
293 local cmd="$MDTEST -d $testdir -i $mdtest_iteration -n $mdtest_nFiles"
294 [ $type = "fpp" ] && cmd="$cmd -u"
297 # find out if we need to use srun by checking $SRUN_PARTITION
298 if [ "$SRUN_PARTITION" ]; then
299 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
300 -n $((num_clients * mdtest_THREADS)) -p $SRUN_PARTITION -- $cmd
302 mpi_run -np $((num_clients * mdtest_THREADS)) \
303 -machinefile ${MACHINEFILE} $cmd
307 if [ $rc != 0 ] ; then
308 error "mdtest failed! $rc"
315 cnt_DIR=${cnt_DIR:-""}
316 cnt_NRUN=${cnt_NRUN:-10}
318 print_opts cnt_DIR cnt_NRUN
321 { skip_env "connectathon dir not found" && return; }
323 [ -e $cnt_DIR/runtests ] || \
324 { skip_env "No connectathon runtests found" && return; }
326 local testdir=$DIR/d0.connectathon
333 # cthon options (must be in this order)
335 # -N numpasses - will be passed to the runtests script. This argument
336 # is optional. It specifies the number of times to run
339 # One of these test types
344 # -a all of the above
346 # -f a quick functionality test
350 # Include lock tests unless we're running on nfsv4
351 local fstype=$(df -TP $testdir | awk 'NR==2 {print $2}')
352 echo "$testdir: $fstype"
353 if [[ $fstype != "nfs4" ]]; then
357 for test in $tests; do
358 local cmd="./runtests -N $cnt_NRUN $test -f $testdir"
364 [ $rc = 0 ] || error "connectathon failed: $rc"
372 local type=${1:="ssf"}
374 IOR=${IOR:-$(which IOR 2> /dev/null || true)}
376 ior_THREADS=${ior_THREADS:-2}
377 ior_iteration=${ior_iteration:-1}
378 ior_blockSize=${ior_blockSize:-6} # GB
379 ior_xferSize=${ior_xferSize:-2m}
380 ior_type=${ior_type:-POSIX}
381 ior_DURATION=${ior_DURATION:-30} # minutes
384 { skip_env "IOR not found" && return; }
386 local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
387 local total_threads=$(( num_clients * ior_THREADS ))
388 echo "+ $ior_blockSize * 1024 * 1024 * $total_threads "
389 if [ $((space / 2)) -le \
390 $(( ior_blockSize * 1024 * 1024 * total_threads)) ]; then
391 echo "+ $space * 9/10 / 1024 / 1024 / $num_clients / $ior_THREADS"
392 ior_blockSize=$(( space /2 /1024 /1024 / num_clients / ior_THREADS ))
393 [ $ior_blockSize = 0 ] && \
394 skip_env "Need free space more than $((2 * total_threads))GB: \
395 $((total_threads *1024 *1024*2)), have $space" && return
397 local reduced_size="$num_clients x $ior_THREADS x $ior_blockSize"
398 echo "free space=$space, Need: $reduced_size GB"
399 echo "(blockSize reduced to $ior_blockSize Gb)"
402 print_opts IOR ior_THREADS ior_DURATION MACHINEFILE
404 local testdir=$DIR/d0.ior.$type
406 # mpi_run uses mpiuser
408 if [ "$NFSCLIENT" ]; then
409 setstripe_nfsserver $testdir -c -1 ||
410 { error "setstripe on nfsserver failed" && return 1; }
412 $LFS setstripe $testdir -c -1 ||
413 { error "setstripe failed" && return 2; }
417 # contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)"
419 # -t N transferSize -- size of transfer in bytes (e.g.: 8, 4k, 2m, 1g)"
420 # -w writeFile -- write file"
421 # -r readFile -- read existing file"
422 # -T maxTimeDuration -- max time in minutes to run tests"
423 # -k keepFile -- keep testFile(s) on program exit
425 local cmd="$IOR -a $ior_type -b ${ior_blockSize}g -o $testdir/iorData \
426 -t $ior_xferSize -v -w -r -i $ior_iteration -T $ior_DURATION -k"
427 [ $type = "fpp" ] && cmd="$cmd -F"
430 # find out if we need to use srun by checking $SRUN_PARTITION
431 if [ "$SRUN_PARTITION" ]; then
432 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
433 -n $((num_clients * ior_THREADS)) -p $SRUN_PARTITION -- $cmd
435 mpi_run -np $((num_clients * $ior_THREADS)) \
436 -machinefile ${MACHINEFILE} $cmd
440 if [ $rc != 0 ] ; then
441 error "ior failed! $rc"
448 MIB=${MIB:=$(which mib 2> /dev/null || true)}
450 mib_THREADS=${mib_THREADS:-2}
451 mib_xferSize=${mib_xferSize:-1m}
452 mib_xferLimit=${mib_xferLimit:-5000}
453 mib_timeLimit=${mib_timeLimit:-300}
455 if [ "$NFSCLIENT" ]; then
456 skip "skipped for NFSCLIENT mode"
461 { skip_env "MIB not found" && return; }
463 print_opts MIB mib_THREADS mib_xferSize mib_xferLimit mib_timeLimit \
466 local testdir=$DIR/d0.mib
468 # mpi_run uses mpiuser
470 $LFS setstripe $testdir -c -1 ||
471 { error "setstripe failed" && return 2; }
473 # -I Show intermediate values in output
474 # -H Show headers in output
475 # -L Do not issue new system calls after this many seconds
476 # -s Use system calls of this size
478 # -l Issue no more than this many system calls
479 local cmd="$MIB -t $testdir -s $mib_xferSize -l $mib_xferLimit \
480 -L $mib_timeLimit -HI -p mib.$(date +%Y%m%d%H%M%S)"
483 # find out if we need to use srun by checking $SRUN_PARTITION
484 if [ "$SRUN_PARTITION" ]; then
485 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
486 -n $((num_clients * mib_THREADS)) -p $SRUN_PARTITION -- $cmd
488 mpi_run -np $((num_clients * mib_THREADS)) \
489 -machinefile ${MACHINEFILE} $cmd
493 if [ $rc != 0 ] ; then
494 error "mib failed! $rc"
501 CASC_RW=${CASC_RW:-$(which cascading_rw 2> /dev/null || true)}
503 casc_THREADS=${casc_THREADS:-2}
504 casc_REP=${casc_REP:-300}
506 if [ "$NFSCLIENT" ]; then
507 skip "skipped for NFSCLIENT mode"
512 { skip_env "cascading_rw not found" && return; }
515 # Need space estimation here.
517 print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE
519 local testdir=$DIR/d0.cascading_rw
521 # mpi_run uses mpiuser
525 # -n: repeat test # times
527 local cmd="$CASC_RW -g -d $testdir -n $casc_REP"
530 mpi_run -np $((num_clients * $casc_THREADS)) \
531 -machinefile ${MACHINEFILE} $cmd
534 if [ $rc != 0 ] ; then
535 error "cascading_rw failed! $rc"
540 run_write_append_truncate() {
543 write_THREADS=${write_THREADS:-8}
544 write_REP=${write_REP:-10000}
546 if [ "$NFSCLIENT" ]; then
547 skip "skipped for NFSCLIENT mode"
551 # location is lustre/tests dir
552 if ! which write_append_truncate > /dev/null 2>&1 ; then
553 skip_env "write_append_truncate not found"
558 # Need space estimation here.
560 local testdir=$DIR/d0.write_append_truncate
561 local file=$testdir/f0.wat
563 print_opts clients write_REP write_THREADS MACHINEFILE
566 # mpi_run uses mpiuser
569 local cmd="write_append_truncate -n $write_REP $file"
572 mpi_run -np $((num_clients * $write_THREADS)) \
573 -machinefile ${MACHINEFILE} $cmd
576 if [ $rc != 0 ] ; then
577 error "write_append_truncate failed! $rc"
583 run_write_disjoint() {
585 WRITE_DISJOINT=${WRITE_DISJOINT:-\
586 $(which write_disjoint 2> /dev/null || true)}
588 wdisjoint_THREADS=${wdisjoint_THREADS:-4}
589 wdisjoint_REP=${wdisjoint_REP:-10000}
591 if [ "$NFSCLIENT" ]; then
592 skip "skipped for NFSCLIENT mode"
596 [ x$WRITE_DISJOINT = x ] &&
597 { skip_env "write_disjoint not found" && return; }
600 # Need space estimation here.
602 print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP \
604 local testdir=$DIR/d0.write_disjoint
606 # mpi_run uses mpiuser
609 local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP"
612 mpi_run -np $((num_clients * $wdisjoint_THREADS)) \
613 -machinefile ${MACHINEFILE} $cmd
616 if [ $rc != 0 ] ; then
617 error "write_disjoint failed! $rc"
622 run_parallel_grouplock() {
624 PARALLEL_GROUPLOCK=${PARALLEL_GROUPLOCK:-\
625 $(which parallel_grouplock 2> /dev/null || true)}
626 parallel_grouplock_MINTASKS=${parallel_grouplock_MINTASKS:-5}
628 if [ "$NFSCLIENT" ]; then
629 skip "skipped for NFSCLIENT mode"
633 [ x$PARALLEL_GROUPLOCK = x ] &&
634 { skip "PARALLEL_GROUPLOCK not found" && return; }
636 print_opts clients parallel_grouplock_MINTASKS MACHINEFILE
638 local testdir=$DIR/d0.parallel_grouplock
640 # mpi_run uses mpiuser
643 do_nodes $clients "lctl set_param llite.*.max_rw_chunk=0" ||
644 error "set_param max_rw_chunk=0 failed "
649 for i in $(seq 12); do
651 local cmd="$PARALLEL_GROUPLOCK -g -v -d $testdir $subtest"
654 mpi_run -np $parallel_grouplock_MINTASKS \
655 -machinefile ${MACHINEFILE} $cmd
657 if [ $rc != 0 ] ; then
658 error_noexit "parallel_grouplock subtests $subtest failed! $rc"
660 echo "parallel_grouplock subtests $subtest PASS"
662 let status=$((status + rc))
663 # clear debug to collect one log per one test
664 do_nodes $(comma_list $(nodes_list)) lctl clear
666 [ $status -eq 0 ] || error "parallel_grouplock status: $status"
670 cleanup_statahead () {
677 for i in $(seq 0 $num_mntpts);do
678 zconf_umount_clients $clients ${mntpt_root}$i ||
679 error_exit "Failed to umount lustre on ${mntpt_root}$i"
685 statahead_NUMMNTPTS=${statahead_NUMMNTPTS:-5}
686 statahead_NUMFILES=${statahead_NUMFILES:-500000}
688 if [[ -n $NFSCLIENT ]]; then
689 skip "Statahead testing is not supported on NFS clients."
694 { skip_env "mdsrate not found" && return; }
696 print_opts MDSRATE clients statahead_NUMMNTPTS statahead_NUMFILES
700 # do not use default "d[0-9]*" dir name
701 # to avoid of rm $statahead_NUMFILES (500k) files in t-f cleanup
703 local testdir=$DIR/$dir
705 # cleanup only if dir exists
706 # cleanup only $statahead_NUMFILES number of files
707 # ignore the other files created by someone else
709 mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
710 $statahead_NUMFILES $testdir 'f%%d' --ignore
713 # mpi_run uses mpiuser
716 local num_files=$statahead_NUMFILES
718 local IFree=$(inodes_available)
719 if [ $IFree -lt $num_files ]; then
725 local cmd1="${MDSRATE} ${MDSRATE_DEBUG} --mknod --dir $testdir"
726 local cmd2="--nfiles $num_files --filefmt 'f%%d'"
727 local cmd="$cmd1 $cmd2"
730 mpi_run -np $((num_clients * 32)) -machinefile ${MACHINEFILE} $cmd
733 if [ $rc != 0 ] ; then
734 error "mdsrate failed to create $rc"
738 local num_mntpts=$statahead_NUMMNTPTS
739 local mntpt_root=$TMP/mntpt/lustre
740 local mntopts=${MNTOPTSTATAHEAD:-$MOUNTOPT}
742 echo "Mounting $num_mntpts lustre clients starts on $clients"
743 trap "cleanup_statahead $clients $mntpt_root $num_mntpts" EXIT ERR
744 for i in $(seq 0 $num_mntpts); do
745 zconf_mount_clients $clients ${mntpt_root}$i "$mntopts" ||
746 error_exit "Failed to mount lustre on ${mntpt_root}$i on $clients"
749 do_rpc_nodes $clients cancel_lru_locks mdc
751 do_rpc_nodes $clients do_ls $mntpt_root $num_mntpts $dir
753 mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
754 $num_files $testdir 'f%%d' --ignore
756 # use rm instead of rmdir because of
757 # testdir could contain the files created by someone else,
758 # or by previous run where is num_files prev > num_files current
760 cleanup_statahead $clients $mntpt_root $num_mntpts
763 # bug 17764 accessing files via nfs,
764 # ASSERTION(!mds_inode_is_orphan(dchild->d_inode)) failed
765 run_nfsread_orphan_file() {
766 if [ ! "$NFSCLIENT" ]; then
767 skip "not NFSCLIENT mode, skipped"
771 # copy file to lustre server
772 local nfsserver=$(nfs_server $MOUNT)
773 do_nodev $nfsserver cp /etc/passwd $DIR/$tfile
774 zconf_mount $nfsserver $MOUNT2
776 # open, wait, unlink and close
777 rmultiop_start --uniq unlink $nfsserver $DIR/$tfile o_uc
778 echo "1. unlinker on NFS server $nfsserver opened the file $DIR/$tfile"
781 # open $DIR2/$tfile and wait
782 rmultiop_start --uniq open $nfsserver $DIR2/$tfile o_c
783 echo "2. open on NFS server $nfsserver opened the file $DIR2/$tfile"
786 # open $DIR/$tfile on nfs client, wait, read
787 multiop_bg_pause $DIR/$tfile o_r10c
789 echo "3. NFS client readder opened the file $DIR/$tfile"
793 rmultiop_stop --uniq unlink $nfsserver
794 echo "4. unlink, close completed"
798 kill -USR1 $NFSREADPID
799 echo "5. NFS client read completed"
803 rmultiop_stop --uniq open $nfsserver
804 zconf_umount $nfsserver $MOUNT2