lustre/tests/functions.sh

   1 #!/bin/bash
   2 # -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*-
   3 # vim:shiftwidth=4:softtabstop=4:tabstop=4:
   4
   5 # Simple function used by run_*.sh scripts
   6
   7 assert_env() {
   8     local failed=""
   9     for name in $@; do
  10         if [ -z "${!name}" ]; then
  11             echo "$0: $name must be set"
  12             failed=1
  13         fi
  14     done
  15     [ $failed ] && exit 1 || true
  16 }
  17
  18 # lrepl - Lustre test Read-Eval-Print Loop.
  19 #
  20 # This function implements a REPL for the Lustre test framework.  It
  21 # doesn't exec an actual shell because the user may want to inspect
  22 # variables and use functions from the test framework.
  23 lrepl() {
  24     local line
  25     local rawline
  26     local prompt
  27
  28     cat <<EOF
  29         This is an interactive read-eval-print loop interactive shell
  30         simulation that you can use to debug failing tests.  You can
  31         enter most bash command lines (see notes below).
  32
  33         Use this REPL to inspect variables, set them, call test
  34         framework shell functions, etcetera.
  35
  36         'exit' or EOF to exit this shell.
  37
  38         set \$retcode to 0 to cause the assertion failure that
  39         triggered this REPL to be ignored.
  40
  41         Examples:
  42             do_facet ost1 lctl get_param ost.*.ost.threads_*
  43             do_rpc_nodes \$OSTNODES unload_modules
  44
  45         NOTES:
  46             All but the last line of multi-line statements or blocks
  47             must end in a backslash.
  48
  49             "Here documents" are not supported.
  50
  51             History is not supported, but command-line editing is.
  52
  53 EOF
  54
  55     # Prompt escapes don't work in read -p, sadly.
  56     prompt=":test_${testnum:-UNKNOWN}:$(uname -n):$(basename $PWD)% "
  57
  58     # We use read -r to get close to a shell experience
  59     while read -e -r -p "$prompt" rawline; do
  60         line=
  61         case "$rawline" in
  62         # Don't want to exit-exit, just exit the REPL
  63         exit) break;;
  64         # We need to handle continuations, and read -r doesn't do
  65         # that for us.  Yet we need read -r.
  66         #
  67         # We also use case/esac to compare lines read to "*\\"
  68         # because [ "$line" = *\\ ] and variants of that don't work.
  69         *\\) line="$rawline"
  70             while read -e -r -p '> ' rawline
  71             do
  72                 line="$line"$'\n'"$rawline"
  73                 case "$rawline" in
  74                 # We could check for here documents by matching
  75                 # against *<<*, but who cares.
  76                 *\\) continue;;
  77                 *) break;;
  78                 esac
  79             done
  80             ;;
  81         *) line=$rawline
  82         esac
  83
  84         case "$line" in
  85         *\\) break;;
  86         esac
  87
  88         # Finally!  Time to eval.
  89         eval "$line"
  90     done
  91
  92     echo $'\n\tExiting interactive shell...\n'
  93     return 0
  94 }
  95
  96 # lassert - Lustre test framework assert
  97 #
  98 # Arguments: failure code, failure message, expression/statement
  99 #
 100 # lassert evaluates the expression given, and, if false, calls
 101 # error() to trigger test failure.  If REPL_ON_LASSERT is true then
 102 # lassert will call lrepl() to give the user an interactive shell.
 103 # If the REPL sets retcode=0 then the assertion failure will be
 104 # ignored.
 105 lassert() {
 106     local retcode=$1
 107     local msg=$2
 108     shift 2
 109
 110     echo "checking $* ($(eval echo \""$*"\"))..."
 111     eval "$@" && return 0;
 112
 113     if ${REPL_ON_LASSERT:-false}; then
 114         echo "Assertion $retcode failed: $* (expanded: $(eval echo \""$*"\"))
 115 $msg"
 116         lrepl
 117     fi
 118
 119     error "Assertion $retcode failed: $* (expanded: $(eval echo \""$*"\"))
 120 $msg"
 121     return $retcode
 122 }
 123
 124 # setmodopts- set module options for subsequent calls to load_modules
 125 #
 126 # Usage: setmodopts module_name new_value [var_in_which_to_save_old_value]
 127 #        setmodopts -a module_name new_value [var_in_which_to_save_old_value]
 128 #
 129 # In the second usage the new value is appended to the old.
 130 setmodopts() {
 131         local _append=false
 132
 133         if [ "$1" = -a ]; then
 134             _append=true
 135             shift
 136         fi
 137
 138         local _var=MODOPTS_$1
 139         local _newvalue=$2
 140         local _savevar=$3
 141         local _oldvalue
 142
 143         # Dynamic naming of variables is a pain in bash.  In ksh93 we could
 144         # write "nameref opts_var=${modname}_MODOPTS" then assign directly
 145         # to opts_var.  Associative arrays would also help, alternatively.
 146         # Alas, we're stuck with eval until all distros move to a more recent
 147         # version of bash.  Fortunately we don't need to eval unset and export.
 148
 149         if [ -z "$_newvalue" ]; then
 150             unset $_var
 151             return 0
 152         fi
 153
 154         _oldvalue=${!var}
 155         $_append && _newvalue="$_oldvalue $_newvalue"
 156         export $_var="$_newvalue"
 157         echo setmodopts: ${_var}=${_newvalue}
 158
 159         [ -n "$_savevar" ] && eval $_savevar=\""$_oldvalue"\"
 160 }
 161
 162 echoerr () { echo "$@" 1>&2 ; }
 163
 164 signaled() {
 165     echoerr "$(date +'%F %H:%M:%S'): client load was signaled to terminate"
 166
 167     local PGID=$(ps -eo "%c %p %r" | awk "/ $PPID / {print \$3}")
 168     kill -TERM -$PGID
 169     sleep 5
 170     kill -KILL -$PGID
 171 }
 172
 173 mpi_run () {
 174     local mpirun="$MPIRUN $MPIRUN_OPTIONS"
 175     local command="$mpirun $@"
 176     local mpilog=$TMP/mpi.log
 177     local rc
 178
 179     if [ -n "$MPI_USER" -a "$MPI_USER" != root -a -n "$mpirun" ]; then
 180         echo "+ chmod 0777 $MOUNT"
 181         chmod 0777 $MOUNT
 182         command="su $MPI_USER sh -c \"$command \""
 183     fi
 184
 185     ls -ald $MOUNT
 186     echo "+ $command"
 187     eval $command 2>&1 | tee $mpilog || true
 188
 189     rc=${PIPESTATUS[0]}
 190     if [ $rc -eq 0 ] && grep -q "p4_error:" $mpilog ; then
 191        rc=1
 192     fi
 193     return $rc
 194 }
 195
 196 nids_list () {
 197         local list
 198         local escape="$2"
 199         for i in ${1//,/ }; do
 200                 if [ "$list" = "" ]; then
 201                         list="$i@$NETTYPE"
 202                 else
 203                         list="$list$escape $i@$NETTYPE"
 204                 fi
 205         done
 206         echo $list
 207 }
 208
 209 # FIXME: all setup/cleanup can be done without rpc.sh
 210 lst_end_session () {
 211     local verbose=false
 212     [ x$1 = x--verbose ] && verbose=true
 213
 214     export LST_SESSION=`$LST show_session 2>/dev/null | awk -F " " '{print $5}'`
 215     [ "$LST_SESSION" == "" ] && return
 216
 217         $LST stop b
 218     if $verbose; then
 219         $LST show_error c s
 220     fi
 221     $LST end_session
 222 }
 223
 224 lst_session_cleanup_all () {
 225     local list=$(comma_list $(nodes_list))
 226     do_rpc_nodes $list lst_end_session
 227 }
 228
 229 lst_cleanup () {
 230     lsmod | grep -q lnet_selftest && \
 231         rmmod lnet_selftest > /dev/null 2>&1 || true
 232 }
 233
 234 lst_cleanup_all () {
 235    local list=$(comma_list $(nodes_list))
 236
 237    # lst end_session needs to be executed only locally
 238    # i.e. on node where lst new_session was called
 239    lst_end_session --verbose
 240    do_rpc_nodes $list lst_cleanup
 241 }
 242
 243 lst_setup () {
 244     load_module lnet_selftest
 245 }
 246
 247 lst_setup_all () {
 248     local list=$(comma_list $(nodes_list))
 249     do_rpc_nodes $list lst_setup
 250 }
 251
 252 ###
 253 # short_hostname
 254 #
 255 # Passed a single argument, strips everything off following
 256 # and includes the first period.
 257 # client-20.lab.whamcloud.com becomes client-20
 258 short_hostname() {
 259   echo $(sed 's/\..*//' <<< $1)
 260 }
 261
 262 ###
 263 # short_nodename
 264 #
 265 # Find remote nodename, stripped of any domain, etc.
 266 # 'hostname -s' is easy, but not implemented on all systems
 267 short_nodename() {
 268         local rname=$(do_node $1 "uname -n" || echo -1)
 269         if [[ "$rname" = "-1" ]]; then
 270                 rname=$1
 271         fi
 272         echo $(short_hostname $rname)
 273 }
 274
 275 print_opts () {
 276     local var
 277
 278     echo OPTIONS:
 279
 280     for i in $@; do
 281         var=$i
 282         echo "${var}=${!var}"
 283     done
 284     [ -e $MACHINEFILE ] && cat $MACHINEFILE
 285 }
 286
 287 run_compilebench() {
 288         # Space estimation:
 289         # compile dir kernel-0  ~1GB
 290         # required space        ~1GB * cbench_IDIRS
 291
 292         local dir=${1:-$DIR}
 293
 294     cbench_DIR=${cbench_DIR:-""}
 295     cbench_IDIRS=${cbench_IDIRS:-2}
 296     cbench_RUNS=${cbench_RUNS:-2}
 297
 298     print_opts cbench_DIR cbench_IDIRS cbench_RUNS
 299
 300     [ x$cbench_DIR = x ] &&
 301         { skip_env "compilebench not found" && return; }
 302
 303     [ -e $cbench_DIR/compilebench ] || \
 304         { skip_env "No compilebench build" && return; }
 305
 306         local space=$(df -P $dir | tail -n 1 | awk '{ print $4 }')
 307         if [[ $space -le $((1024 * 1024 * cbench_IDIRS)) ]]; then
 308                 cbench_IDIRS=$((space / 1024 / 1024))
 309                 [[ $cbench_IDIRS -eq 0 ]] &&
 310                         skip_env "Need free space at least 1GB, have $space" &&
 311                         return
 312
 313                 echo "free space=$space, reducing initial dirs to $cbench_IDIRS"
 314         fi
 315
 316     # FIXME:
 317     # t-f _base needs to be modifyed to set properly tdir
 318     # for new "test_foo" functions names
 319     # local testdir=$DIR/$tdir
 320     local testdir=$dir/d0.compilebench.$$
 321     mkdir -p $testdir
 322
 323     local savePWD=$PWD
 324     cd $cbench_DIR
 325     local cmd="./compilebench -D $testdir -i $cbench_IDIRS \
 326         -r $cbench_RUNS --makej"
 327
 328     log "$cmd"
 329
 330     local rc=0
 331     eval $cmd
 332     rc=$?
 333
 334     cd $savePWD
 335     [ $rc = 0 ] || error "compilebench failed: $rc"
 336     rm -rf $testdir
 337 }
 338
 339 run_metabench() {
 340         local dir=${1:-$DIR}
 341         METABENCH=${METABENCH:-$(which metabench 2> /dev/null || true)}
 342         mbench_NFILES=${mbench_NFILES:-30400}
 343         # threads per client
 344         mbench_THREADS=${mbench_THREADS:-4}
 345         mbench_OPTIONS=${mbench_OPTIONS:-}
 346         mbench_CLEANUP=${mbench_CLEANUP:-true}
 347
 348         [ x$METABENCH = x ] &&
 349                 { skip_env "metabench not found" && return; }
 350
 351         # FIXME
 352         # Need space estimation here.
 353
 354         print_opts METABENCH clients mbench_NFILES mbench_THREADS
 355
 356         local testdir=$dir/d0.metabench
 357         mkdir -p $testdir
 358         # mpi_run uses mpiuser
 359         chmod 0777 $testdir
 360
 361         # -C             Run the file creation tests.
 362         # -S             Run the file stat tests.
 363         # -c nfile       Number of files to be used in each test.
 364         # -k             => dont cleanup files when finished.
 365         local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S -k $mbench_OPTIONS"
 366         echo "+ $cmd"
 367
 368         # find out if we need to use srun by checking $SRUN_PARTITION
 369         if [ "$SRUN_PARTITION" ]; then
 370                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 371                         -n $((num_clients * mbench_THREADS)) \
 372                         -p $SRUN_PARTITION -- $cmd
 373         else
 374                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 375                         -np $((num_clients * $mbench_THREADS)) $cmd
 376         fi
 377
 378     local rc=$?
 379     if [ $rc != 0 ] ; then
 380         error "metabench failed! $rc"
 381     fi
 382
 383         if $mbench_CLEANUP; then
 384                 rm -rf $testdir
 385         else
 386                 mv $dir/d0.metabench $dir/_xxx.$(date +%s).d0.metabench
 387         fi
 388 }
 389
 390 run_simul() {
 391
 392     SIMUL=${SIMUL:=$(which simul 2> /dev/null || true)}
 393     # threads per client
 394     simul_THREADS=${simul_THREADS:-2}
 395     simul_REP=${simul_REP:-20}
 396
 397     if [ "$NFSCLIENT" ]; then
 398         skip "skipped for NFSCLIENT mode"
 399         return
 400     fi
 401
 402     [ x$SIMUL = x ] &&
 403         { skip_env "simul not found" && return; }
 404
 405     # FIXME
 406     # Need space estimation here.
 407
 408     print_opts SIMUL clients simul_REP simul_THREADS
 409
 410     local testdir=$DIR/d0.simul
 411     mkdir -p $testdir
 412     # mpi_run uses mpiuser
 413     chmod 0777 $testdir
 414
 415     # -n # : repeat each test # times
 416     # -N # : repeat the entire set of tests # times
 417
 418     local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP"
 419
 420         echo "+ $cmd"
 421         # find out if we need to use srun by checking $SRUN_PARTITION
 422         if [ "$SRUN_PARTITION" ]; then
 423                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 424                         -n $((num_clients * simul_THREADS)) -p $SRUN_PARTITION \
 425                         -- $cmd
 426         else
 427                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 428                         -np $((num_clients * simul_THREADS)) $cmd
 429         fi
 430
 431     local rc=$?
 432     if [ $rc != 0 ] ; then
 433         error "simul failed! $rc"
 434     fi
 435     rm -rf $testdir
 436 }
 437
 438 run_mdtest() {
 439
 440     MDTEST=${MDTEST:=$(which mdtest 2> /dev/null || true)}
 441     # threads per client
 442     mdtest_THREADS=${mdtest_THREADS:-2}
 443     mdtest_nFiles=${mdtest_nFiles:-"100000"}
 444     # We devide the files by number of core
 445     mdtest_nFiles=$((mdtest_nFiles/mdtest_THREADS/num_clients))
 446     mdtest_iteration=${mdtest_iteration:-1}
 447         local mdtest_custom_params=${mdtest_custom_params:-""}
 448
 449     local type=${1:-"ssf"}
 450
 451     if [ "$NFSCLIENT" ]; then
 452         skip "skipped for NFSCLIENT mode"
 453         return
 454     fi
 455
 456     [ x$MDTEST = x ] &&
 457         { skip_env "mdtest not found" && return; }
 458
 459     # FIXME
 460     # Need space estimation here.
 461
 462     print_opts MDTEST mdtest_iteration mdtest_THREADS mdtest_nFiles
 463
 464     local testdir=$DIR/d0.mdtest
 465     mkdir -p $testdir
 466     # mpi_run uses mpiuser
 467     chmod 0777 $testdir
 468
 469     # -i # : repeat each test # times
 470     # -d   : test dir
 471     # -n # : number of file/dir to create/stat/remove
 472     # -u   : each process create/stat/remove individually
 473
 474         local cmd="$MDTEST -d $testdir -i $mdtest_iteration \
 475                 -n $mdtest_nFiles $mdtest_custom_params"
 476
 477         [ $type = "fpp" ] && cmd="$cmd -u"
 478
 479         echo "+ $cmd"
 480         # find out if we need to use srun by checking $SRUN_PARTITION
 481         if [ "$SRUN_PARTITION" ]; then
 482                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 483                         -n $((num_clients * mdtest_THREADS)) \
 484                         -p $SRUN_PARTITION -- $cmd
 485         else
 486                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 487                         -np $((num_clients * mdtest_THREADS)) $cmd
 488         fi
 489
 490     local rc=$?
 491     if [ $rc != 0 ] ; then
 492         error "mdtest failed! $rc"
 493     fi
 494     rm -rf $testdir
 495 }
 496
 497 run_connectathon() {
 498         local dir=${1:-$DIR}
 499         cnt_DIR=${cnt_DIR:-""}
 500         cnt_NRUN=${cnt_NRUN:-10}
 501
 502         print_opts cnt_DIR cnt_NRUN
 503
 504         [ x$cnt_DIR = x ] &&
 505                 { skip_env "connectathon dir not found" && return; }
 506
 507         [ -e $cnt_DIR/runtests ] || \
 508                 { skip_env "No connectathon runtests found" && return; }
 509
 510         local testdir=$dir/d0.connectathon
 511         mkdir -p $testdir
 512
 513     local savePWD=$PWD
 514     cd $cnt_DIR
 515
 516     #
 517     # cthon options (must be in this order)
 518     #
 519     # -N numpasses - will be passed to the runtests script.  This argument
 520     #         is optional.  It specifies the number of times to run
 521     #         through the tests.
 522     #
 523     # One of these test types
 524     #    -b  basic
 525     #    -g  general
 526     #    -s  special
 527     #    -l  lock
 528     #    -a  all of the above
 529     #
 530     # -f      a quick functionality test
 531     #
 532
 533     tests="-b -g -s"
 534     # Include lock tests unless we're running on nfsv4
 535     local fstype=$(df -TP $testdir | awk 'NR==2  {print $2}')
 536     echo "$testdir: $fstype"
 537     if [[ $fstype != "nfs4" ]]; then
 538         tests="$tests -l"
 539     fi
 540     echo "tests: $tests"
 541     for test in $tests; do
 542         local cmd="./runtests -N $cnt_NRUN $test -f $testdir"
 543         local rc=0
 544
 545         log "$cmd"
 546         eval $cmd
 547         rc=$?
 548         [ $rc = 0 ] || error "connectathon failed: $rc"
 549     done
 550
 551     cd $savePWD
 552     rm -rf $testdir
 553 }
 554
 555 run_ior() {
 556         local type=${1:="ssf"}
 557         local dir=${2:-$DIR}
 558         local testdir=$dir/d0.ior.$type
 559         local nfs_srvmntpt=$3
 560
 561         if [ "$NFSCLIENT" ]; then
 562                 [[ -n $nfs_srvmntpt ]] ||
 563                         { error "NFSCLIENT mode, but nfs exported dir"\
 564                                 "is not set!" && return 1; }
 565         fi
 566
 567         IOR=${IOR:-$(which IOR 2> /dev/null || true)}
 568         [ x$IOR = x ] &&
 569                 { skip_env "IOR not found" && return; }
 570
 571         # threads per client
 572         ior_THREADS=${ior_THREADS:-2}
 573         ior_iteration=${ior_iteration:-1}
 574         ior_blockSize=${ior_blockSize:-6}
 575         ior_blockUnit=${ior_blockUnit:-M}   # K, M, G
 576         ior_xferSize=${ior_xferSize:-1M}
 577         ior_type=${ior_type:-POSIX}
 578         ior_DURATION=${ior_DURATION:-30}        # minutes
 579         local multiplier=1
 580         case ${ior_blockUnit} in
 581                 [G])
 582                         multiplier=$((1024 * 1024 * 1024))
 583                         ;;
 584                 [M])
 585                         multiplier=$((1024 * 1024))
 586                         ;;
 587                 [K])
 588                         multiplier=1024
 589                         ;;
 590                 *)      error "Incorrect block unit should be one of [KMG]"
 591                         ;;
 592         esac
 593
 594         # calculate the space in bytes
 595         local space=$(df -B 1 -P $dir | tail -n 1 | awk '{ print $4 }')
 596         local total_threads=$((num_clients * ior_THREADS))
 597         echo "+ $ior_blockSize * $multiplier * $total_threads "
 598         if [ $((space / 2)) -le \
 599              $((ior_blockSize * multiplier * total_threads)) ]; then
 600                 ior_blockSize=$((space / 2 / multiplier / total_threads))
 601                 [ $ior_blockSize -eq 0 ] && \
 602                 skip_env "Need free space more than $((2 * total_threads)) \
 603                          ${ior_blockUnit}: have $((space / multiplier))" &&
 604                          return
 605
 606                 echo "(reduced blockSize to $ior_blockSize \
 607                      ${ior_blockUnit} bytes)"
 608         fi
 609
 610     print_opts IOR ior_THREADS ior_DURATION MACHINEFILE
 611
 612     mkdir -p $testdir
 613     # mpi_run uses mpiuser
 614     chmod 0777 $testdir
 615         if [ "$NFSCLIENT" ]; then
 616                 setstripe_nfsserver $testdir $nfs_srvmntpt -c -1 ||
 617                         { error "setstripe on nfsserver failed" && return 1; }
 618         else
 619                 $LFS setstripe $testdir -c -1 ||
 620                         { error "setstripe failed" && return 2; }
 621         fi
 622         #
 623         # -b N  blockSize --
 624         #       contiguous bytes to write per task (e.g.: 8, 4K, 2M, 1G)"
 625         # -o S  testFileName
 626         # -t N  transferSize -- size of transfer in bytes (e.g.: 8, 4K, 2M, 1G)"
 627         # -w    writeFile -- write file"
 628         # -r    readFile -- read existing file"
 629         # -W    checkWrite -- check read after write"
 630         # -C    reorderTasks -- changes task ordering to n+1 ordering for readback
 631         # -T    maxTimeDuration -- max time in minutes to run tests"
 632         # -k    keepFile -- keep testFile(s) on program exit
 633
 634         local cmd="$IOR -a $ior_type -b ${ior_blockSize}${ior_blockUnit} \
 635                 -o $testdir/iorData -t $ior_xferSize -v -C -w -r -W \
 636                 -i $ior_iteration -T $ior_DURATION -k"
 637
 638         [ $type = "fpp" ] && cmd="$cmd -F"
 639
 640         echo "+ $cmd"
 641         # find out if we need to use srun by checking $SRUN_PARTITION
 642         if [ "$SRUN_PARTITION" ]; then
 643                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 644                         -n $((num_clients * ior_THREADS)) -p $SRUN_PARTITION \
 645                         -- $cmd
 646         else
 647                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 648                         -np $((num_clients * $ior_THREADS)) $cmd
 649         fi
 650
 651     local rc=$?
 652     if [ $rc != 0 ] ; then
 653         error "ior failed! $rc"
 654     fi
 655     rm -rf $testdir
 656 }
 657
 658 run_mib() {
 659
 660     MIB=${MIB:=$(which mib 2> /dev/null || true)}
 661     # threads per client
 662     mib_THREADS=${mib_THREADS:-2}
 663     mib_xferSize=${mib_xferSize:-1m}
 664     mib_xferLimit=${mib_xferLimit:-5000}
 665     mib_timeLimit=${mib_timeLimit:-300}
 666
 667     if [ "$NFSCLIENT" ]; then
 668         skip "skipped for NFSCLIENT mode"
 669         return
 670     fi
 671
 672     [ x$MIB = x ] &&
 673         { skip_env "MIB not found" && return; }
 674
 675     print_opts MIB mib_THREADS mib_xferSize mib_xferLimit mib_timeLimit \
 676         MACHINEFILE
 677
 678     local testdir=$DIR/d0.mib
 679     mkdir -p $testdir
 680     # mpi_run uses mpiuser
 681     chmod 0777 $testdir
 682     $LFS setstripe $testdir -c -1 ||
 683         { error "setstripe failed" && return 2; }
 684     #
 685     # -I    Show intermediate values in output
 686     # -H    Show headers in output
 687     # -L    Do not issue new system calls after this many seconds
 688     # -s    Use system calls of this size
 689     # -t    test dir
 690     # -l    Issue no more than this many system calls
 691     local cmd="$MIB -t $testdir -s $mib_xferSize -l $mib_xferLimit \
 692         -L $mib_timeLimit -HI -p mib.$(date +%Y%m%d%H%M%S)"
 693
 694         echo "+ $cmd"
 695         # find out if we need to use srun by checking $SRUN_PARTITION
 696         if [ "$SRUN_PARTITION" ]; then
 697                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 698                         -n $((num_clients * mib_THREADS)) -p $SRUN_PARTITION \
 699                         -- $cmd
 700         else
 701                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 702                         -np $((num_clients * mib_THREADS)) $cmd
 703         fi
 704
 705     local rc=$?
 706     if [ $rc != 0 ] ; then
 707         error "mib failed! $rc"
 708     fi
 709     rm -rf $testdir
 710 }
 711
 712 run_cascading_rw() {
 713
 714     CASC_RW=${CASC_RW:-$(which cascading_rw 2> /dev/null || true)}
 715     # threads per client
 716     casc_THREADS=${casc_THREADS:-2}
 717     casc_REP=${casc_REP:-300}
 718
 719     if [ "$NFSCLIENT" ]; then
 720         skip "skipped for NFSCLIENT mode"
 721         return
 722     fi
 723
 724     [ x$CASC_RW = x ] &&
 725         { skip_env "cascading_rw not found" && return; }
 726
 727     # FIXME
 728     # Need space estimation here.
 729
 730     print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE
 731
 732     local testdir=$DIR/d0.cascading_rw
 733     mkdir -p $testdir
 734     # mpi_run uses mpiuser
 735     chmod 0777 $testdir
 736
 737     # -g: debug mode
 738     # -n: repeat test # times
 739
 740     local cmd="$CASC_RW -g -d $testdir -n $casc_REP"
 741
 742         echo "+ $cmd"
 743         mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 744                 -np $((num_clients * $casc_THREADS)) $cmd
 745
 746     local rc=$?
 747     if [ $rc != 0 ] ; then
 748         error "cascading_rw failed! $rc"
 749     fi
 750     rm -rf $testdir
 751 }
 752
 753 run_write_append_truncate() {
 754
 755     # threads per client
 756     write_THREADS=${write_THREADS:-8}
 757     write_REP=${write_REP:-10000}
 758
 759     if [ "$NFSCLIENT" ]; then
 760         skip "skipped for NFSCLIENT mode"
 761         return
 762     fi
 763
 764     # location is lustre/tests dir
 765     if ! which write_append_truncate > /dev/null 2>&1 ; then
 766         skip_env "write_append_truncate not found"
 767         return
 768     fi
 769
 770     # FIXME
 771     # Need space estimation here.
 772
 773     local testdir=$DIR/d0.write_append_truncate
 774     local file=$testdir/f0.wat
 775
 776     print_opts clients write_REP write_THREADS MACHINEFILE
 777
 778     mkdir -p $testdir
 779     # mpi_run uses mpiuser
 780     chmod 0777 $testdir
 781
 782     local cmd="write_append_truncate -n $write_REP $file"
 783
 784         echo "+ $cmd"
 785         mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 786                 -np $((num_clients * $write_THREADS)) $cmd
 787
 788     local rc=$?
 789     if [ $rc != 0 ] ; then
 790         error "write_append_truncate failed! $rc"
 791         return $rc
 792     fi
 793     rm -rf $testdir
 794 }
 795
 796 run_write_disjoint() {
 797
 798     WRITE_DISJOINT=${WRITE_DISJOINT:-$(which write_disjoint \
 799         2> /dev/null || true)}
 800     # threads per client
 801     wdisjoint_THREADS=${wdisjoint_THREADS:-4}
 802     wdisjoint_REP=${wdisjoint_REP:-10000}
 803
 804     if [ "$NFSCLIENT" ]; then
 805         skip "skipped for NFSCLIENT mode"
 806         return
 807     fi
 808
 809     [ x$WRITE_DISJOINT = x ] &&
 810         { skip_env "write_disjoint not found" && return; }
 811
 812     # FIXME
 813     # Need space estimation here.
 814
 815     print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP \
 816         MACHINEFILE
 817     local testdir=$DIR/d0.write_disjoint
 818     mkdir -p $testdir
 819     # mpi_run uses mpiuser
 820     chmod 0777 $testdir
 821
 822     local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP"
 823
 824         echo "+ $cmd"
 825         mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 826                 -np $((num_clients * $wdisjoint_THREADS)) $cmd
 827
 828     local rc=$?
 829     if [ $rc != 0 ] ; then
 830         error "write_disjoint failed! $rc"
 831     fi
 832     rm -rf $testdir
 833 }
 834
 835 run_parallel_grouplock() {
 836
 837     PARALLEL_GROUPLOCK=${PARALLEL_GROUPLOCK:-$(which parallel_grouplock \
 838         2> /dev/null || true)}
 839     parallel_grouplock_MINTASKS=${parallel_grouplock_MINTASKS:-5}
 840
 841     if [ "$NFSCLIENT" ]; then
 842         skip "skipped for NFSCLIENT mode"
 843         return
 844     fi
 845
 846     [ x$PARALLEL_GROUPLOCK = x ] &&
 847         { skip "PARALLEL_GROUPLOCK not found" && return; }
 848
 849     print_opts clients parallel_grouplock_MINTASKS MACHINEFILE
 850
 851     local testdir=$DIR/d0.parallel_grouplock
 852     mkdir -p $testdir
 853     # mpi_run uses mpiuser
 854     chmod 0777 $testdir
 855
 856     local cmd
 857     local status=0
 858     local subtest
 859         for i in $(seq 12); do
 860                 subtest="-t $i"
 861                 local cmd="$PARALLEL_GROUPLOCK -g -v -d $testdir $subtest"
 862                 echo "+ $cmd"
 863
 864                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 865                         -np $parallel_grouplock_MINTASKS $cmd
 866                 local rc=$?
 867                 if [ $rc != 0 ] ; then
 868                         error_noexit "parallel_grouplock subtests $subtest " \
 869                                      "failed! $rc"
 870                 else
 871                         echo "parallel_grouplock subtests $subtest PASS"
 872                 fi
 873                 let status=$((status + rc))
 874                 # clear debug to collect one log per one test
 875                 do_nodes $(comma_list $(nodes_list)) lctl clear
 876         done
 877         [ $status -eq 0 ] || error "parallel_grouplock status: $status"
 878         rm -rf $testdir
 879 }
 880
 881 cleanup_statahead () {
 882     trap 0
 883
 884     local clients=$1
 885     local mntpt_root=$2
 886     local num_mntpts=$3
 887
 888     for i in $(seq 0 $num_mntpts);do
 889         zconf_umount_clients $clients ${mntpt_root}$i ||
 890             error_exit "Failed to umount lustre on ${mntpt_root}$i"
 891     done
 892 }
 893
 894 run_statahead () {
 895
 896     statahead_NUMMNTPTS=${statahead_NUMMNTPTS:-5}
 897     statahead_NUMFILES=${statahead_NUMFILES:-500000}
 898
 899     if [[ -n $NFSCLIENT ]]; then
 900         skip "Statahead testing is not supported on NFS clients."
 901         return 0
 902     fi
 903
 904     [ x$MDSRATE = x ] &&
 905         { skip_env "mdsrate not found" && return; }
 906
 907     print_opts MDSRATE clients statahead_NUMMNTPTS statahead_NUMFILES
 908
 909     # create large dir
 910
 911     # do not use default "d[0-9]*" dir name
 912     # to avoid of rm $statahead_NUMFILES (500k) files in t-f cleanup
 913     local dir=dstatahead
 914     local testdir=$DIR/$dir
 915
 916     # cleanup only if dir exists
 917     # cleanup only $statahead_NUMFILES number of files
 918     # ignore the other files created by someone else
 919     [ -d $testdir ] &&
 920         mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
 921             $statahead_NUMFILES $testdir 'f%%d' --ignore
 922
 923     mkdir -p $testdir
 924     # mpi_run uses mpiuser
 925     chmod 0777 $testdir
 926
 927     local num_files=$statahead_NUMFILES
 928
 929     local IFree=$(inodes_available)
 930     if [ $IFree -lt $num_files ]; then
 931       num_files=$IFree
 932     fi
 933
 934     cancel_lru_locks mdc
 935
 936     local cmd1="${MDSRATE} ${MDSRATE_DEBUG} --mknod --dir $testdir"
 937     local cmd2="--nfiles $num_files --filefmt 'f%%d'"
 938     local cmd="$cmd1 $cmd2"
 939     echo "+ $cmd"
 940
 941         mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 942                 -np $((num_clients * 32)) $cmd
 943
 944     local rc=$?
 945     if [ $rc != 0 ] ; then
 946         error "mdsrate failed to create $rc"
 947         return $rc
 948     fi
 949
 950     local num_mntpts=$statahead_NUMMNTPTS
 951     local mntpt_root=$TMP/mntpt/lustre
 952     local mntopts=$MNTOPTSTATAHEAD
 953
 954     echo "Mounting $num_mntpts lustre clients starts on $clients"
 955     trap "cleanup_statahead $clients $mntpt_root $num_mntpts" EXIT ERR
 956     for i in $(seq 0 $num_mntpts); do
 957         zconf_mount_clients $clients ${mntpt_root}$i "$mntopts" ||
 958             error_exit "Failed to mount lustre on ${mntpt_root}$i on $clients"
 959     done
 960
 961     do_rpc_nodes $clients cancel_lru_locks mdc
 962
 963     do_rpc_nodes $clients do_ls $mntpt_root $num_mntpts $dir
 964
 965     mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
 966         $num_files $testdir 'f%%d' --ignore
 967
 968     # use rm instead of rmdir because of
 969     # testdir could contain the files created by someone else,
 970     # or by previous run where is num_files prev > num_files current
 971     rm -rf $testdir
 972     cleanup_statahead $clients $mntpt_root $num_mntpts
 973 }
 974
 975 run_fs_test() {
 976         # fs_test.x is the default name for exe
 977         FS_TEST=${FS_TEST:=$(which fs_test.x 2> /dev/null || true)}
 978
 979         local clients=${CLIENTS:-$(hostname)}
 980         local testdir=$DIR/d0.fs_test
 981         local file=${testdir}/fs_test
 982         fs_test_threads=${fs_test_threads:-2}
 983         fs_test_type=${fs_test_type:-1}
 984         fs_test_nobj=${fs_test_nobj:-10}
 985         fs_test_check=${fs_test_check:-3}
 986         fs_test_strided=${fs_test_strided:-1}
 987         fs_test_touch=${fs_test_touch:-3}
 988         fs_test_supersize=${fs_test_supersize:-1}
 989         fs_test_op=${fs_test_op:-write}
 990         fs_test_barriers=${fs_test_barriers:-bopen,bwrite,bclose}
 991         fs_test_io=${fs_test_io:-mpi}
 992         fs_test_objsize=${fs_test_objsize:-100}
 993         fs_test_objunit=${fs_test_objunit:-1048576} # 1 mb
 994         fs_test_ndirs=${fs_test_ndirs:-80000}
 995
 996         [ x$FS_TEST = x ] &&
 997                 { skip "FS_TEST not found" && return; }
 998
 999         # Space estimation  in bytes
1000         local space=$(df -B 1 -P $dir | tail -n 1 | awk '{ print $4 }')
1001         local total_threads=$((num_clients * fs_test_threads))
1002         echo "+ $fs_test_objsize * $fs_test_objunit * $total_threads "
1003         if [ $((space / 2)) -le \
1004                 $((fs_test_objsize * fs_test_objunit * total_threads)) ]; then
1005                         fs_test_objsize=$((space / 2 / fs_test_objunit / \
1006                                 total_threads))
1007                         [ $fs_test_objsize -eq 0 ] && \
1008                         skip_env "Need free space more than \
1009                                 $((2 * total_threads * fs_test_objunit)) \
1010                                 : have $((space / fs_test_objunit))" &&
1011                                 return
1012
1013                         echo "(reduced objsize to \
1014                                 $((fs_test_objsize * fs_test_objunit)) bytes)"
1015         fi
1016
1017         print_opts FS_TEST clients fs_test_threads fs_test_objsize MACHINEFILE
1018
1019         mkdir -p $testdir
1020         # mpi_run uses mpiuser
1021         chmod 0777 $testdir
1022
1023         # --nodb          Turn off the database code at runtime
1024         # -g --target     The path to the data file
1025         # -t --type       Whether to do N-N (1) or N-1 (2)
1026         # -n --nobj       The number of objects written/read by each proc
1027         # -z --size       The size of each object
1028         # -d ---num_nn_dirs Number of subdirectories for files
1029         # -C --check      Check every byte using argument 3.
1030         # --collective    Whether to use collective I/O (for N-1, mpi-io only)
1031         # -s --strided    Whether to use a strided pattern (for N-1 only)
1032         # -T --touch      Touch every byte using argument 3
1033         # -o --op         Whether to read only (read) or write only (write)
1034         # -b --barriers   When to barrier.
1035         # -i --io         Use POSIX, MPI, or PLFS IO routines (mpi|posix|plfs)
1036         # -S --supersize  Specify how many objects per superblock
1037
1038         local cmd="$FS_TEST -nodb -g $file -t $fs_test_type -n $fs_test_nobj \
1039                 -z $((fs_test_objsize * fs_test_objunit)) -d $fs_test_ndirs \
1040                 -C $fs_test_check -collective -s $fs_test_strided \
1041                 -T $fs_test_touch -o $fs_test_op -b $fs_test_barriers \
1042                 -i $fs_test_io -S $fs_test_supersize"
1043
1044         echo "+ $cmd"
1045         mpi_run "-np $((num_clients * fs_test_threads))" $cmd
1046
1047         local rc=$?
1048         if [ $rc != 0 ] ; then
1049                 error "fs_test failed! $rc"
1050         fi
1051
1052         rm -rf $testdir
1053 }
1054
1055 ior_mdtest_parallel() {
1056         local rc1=0
1057         local rc2=0
1058         local type=$1
1059
1060         run_ior $type &
1061         local pids=$!
1062
1063         run_mdtest $type || rc2=$?
1064         [[ $rc2 -ne 0 ]] && echo "mdtest failed with error $rc2"
1065
1066         wait $pids || rc1=$?
1067         [[ $rc1 -ne 0 ]] && echo "ior failed with error $rc1"
1068
1069         [[ $rc1 -ne 0 || $rc2 -ne 0 ]] && return 1
1070         return 0
1071 }
1072
1073 run_fio() {
1074         FIO=${FIO:=$(which fio 2> /dev/null || true)}
1075
1076         local clients=${CLIENTS:-$(hostname)}
1077         local fio_jobNum=${fio_jobNum:-4}
1078         local fio_jobFile=${fio_jobFile:-$TMP/fiojobfile.$(date +%s)}
1079         local fio_bs=${fio_bs:-1}
1080         local testdir=$DIR/d0.fio
1081         local file=${testdir}/fio
1082         local runtime=60
1083         local propagate=false
1084
1085         [ "$SLOW" = "no" ] || runtime=600
1086
1087         [ x$FIO = x ] &&
1088                 { skip_env "FIO not found" && return; }
1089
1090         mkdir -p $testdir
1091
1092         # use fio job file if exists,
1093         # create a simple one if missing
1094         if ! [ -f $fio_jobFile ]; then
1095                 cat >> $fio_jobFile <<EOF
1096 [global]
1097 rw=randwrite
1098 size=128m
1099 time_based=1
1100 runtime=$runtime
1101 filename=${file}_\$(hostname)
1102 EOF
1103                 # bs size increased by $i for each job
1104                 for ((i=1; i<=fio_jobNum; i++)); do
1105                         cat >> $fio_jobFile <<EOF
1106
1107 [job$i]
1108 bs=$(( fio_bs * i ))m
1109 EOF
1110                 done
1111                 # job file is created, should be propagated to all clients
1112                 propagate=true
1113         fi
1114
1115
1116         # propagate the job file if not all clients have it yet or
1117         # if the job file was created during the test run
1118         if ! do_nodesv $clients " [ -f $fio_jobFile ] " ||
1119            $propagate; then
1120                 local cfg=$(cat $fio_jobFile)
1121                 do_nodes $clients "echo \\\"$cfg\\\" > ${fio_jobFile}" ||
1122                         error "job file $fio_jobFile is not propagated"
1123                 do_nodesv $clients "cat ${fio_jobFile}"
1124         fi
1125
1126         cmd="$FIO $fio_jobFile"
1127         echo "+ $cmd"
1128
1129         log "clients: $clients $cmd"
1130
1131         local rc=0
1132         do_nodesv $clients "$cmd "
1133         rc=$?
1134
1135         [ $rc = 0 ] || error "fio failed: $rc"
1136         rm -rf $testdir
1137 }
1138