lustre/tests/functions.sh

   1 #!/bin/bash
   2 # -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*-
   3 # vim:shiftwidth=4:softtabstop=4:tabstop=4:
   4
   5 # Simple function used by run_*.sh scripts
   6
   7 assert_env() {
   8     local failed=""
   9     for name in $@; do
  10         if [ -z "${!name}" ]; then
  11             echo "$0: $name must be set"
  12             failed=1
  13         fi
  14     done
  15     [ $failed ] && exit 1 || true
  16 }
  17
  18 # lrepl - Lustre test Read-Eval-Print Loop.
  19 #
  20 # This function implements a REPL for the Lustre test framework.  It
  21 # doesn't exec an actual shell because the user may want to inspect
  22 # variables and use functions from the test framework.
  23 lrepl() {
  24     local line
  25     local rawline
  26     local prompt
  27
  28     cat <<EOF
  29         This is an interactive read-eval-print loop interactive shell
  30         simulation that you can use to debug failing tests.  You can
  31         enter most bash command lines (see notes below).
  32
  33         Use this REPL to inspect variables, set them, call test
  34         framework shell functions, etcetera.
  35
  36         'exit' or EOF to exit this shell.
  37
  38         set \$retcode to 0 to cause the assertion failure that
  39         triggered this REPL to be ignored.
  40
  41         Examples:
  42             do_facet ost1 lctl get_param ost.*.ost.threads_*
  43             do_rpc_nodes \$OSTNODES unload_modules
  44
  45         NOTES:
  46             All but the last line of multi-line statements or blocks
  47             must end in a backslash.
  48
  49             "Here documents" are not supported.
  50
  51             History is not supported, but command-line editing is.
  52
  53 EOF
  54
  55     # Prompt escapes don't work in read -p, sadly.
  56     prompt=":test_${testnum:-UNKNOWN}:$(uname -n):$(basename $PWD)% "
  57
  58     # We use read -r to get close to a shell experience
  59     while read -e -r -p "$prompt" rawline; do
  60         line=
  61         case "$rawline" in
  62         # Don't want to exit-exit, just exit the REPL
  63         exit) break;;
  64         # We need to handle continuations, and read -r doesn't do
  65         # that for us.  Yet we need read -r.
  66         #
  67         # We also use case/esac to compare lines read to "*\\"
  68         # because [ "$line" = *\\ ] and variants of that don't work.
  69         *\\) line="$rawline"
  70             while read -e -r -p '> ' rawline
  71             do
  72                 line="$line"$'\n'"$rawline"
  73                 case "$rawline" in
  74                 # We could check for here documents by matching
  75                 # against *<<*, but who cares.
  76                 *\\) continue;;
  77                 *) break;;
  78                 esac
  79             done
  80             ;;
  81         *) line=$rawline
  82         esac
  83
  84         case "$line" in
  85         *\\) break;;
  86         esac
  87
  88         # Finally!  Time to eval.
  89         eval "$line"
  90     done
  91
  92     echo $'\n\tExiting interactive shell...\n'
  93     return 0
  94 }
  95
  96 # lassert - Lustre test framework assert
  97 #
  98 # Arguments: failure code, failure message, expression/statement
  99 #
 100 # lassert evaluates the expression given, and, if false, calls
 101 # error() to trigger test failure.  If REPL_ON_LASSERT is true then
 102 # lassert will call lrepl() to give the user an interactive shell.
 103 # If the REPL sets retcode=0 then the assertion failure will be
 104 # ignored.
 105 lassert() {
 106     local retcode=$1
 107     local msg=$2
 108     shift 2
 109
 110     echo "checking $* ($(eval echo \""$*"\"))..."
 111     eval "$@" && return 0;
 112
 113     if ${REPL_ON_LASSERT:-false}; then
 114         echo "Assertion $retcode failed: $* (expanded: $(eval echo \""$*"\"))
 115 $msg"
 116         lrepl
 117     fi
 118
 119     error "Assertion $retcode failed: $* (expanded: $(eval echo \""$*"\"))
 120 $msg"
 121     return $retcode
 122 }
 123
 124 # setmodopts- set module options for subsequent calls to load_modules
 125 #
 126 # Usage: setmodopts module_name new_value [var_in_which_to_save_old_value]
 127 #        setmodopts -a module_name new_value [var_in_which_to_save_old_value]
 128 #
 129 # In the second usage the new value is appended to the old.
 130 setmodopts() {
 131         local _append=false
 132
 133         if [ "$1" = -a ]; then
 134             _append=true
 135             shift
 136         fi
 137
 138         local _var=MODOPTS_$1
 139         local _newvalue=$2
 140         local _savevar=$3
 141         local _oldvalue
 142
 143         # Dynamic naming of variables is a pain in bash.  In ksh93 we could
 144         # write "nameref opts_var=${modname}_MODOPTS" then assign directly
 145         # to opts_var.  Associative arrays would also help, alternatively.
 146         # Alas, we're stuck with eval until all distros move to a more recent
 147         # version of bash.  Fortunately we don't need to eval unset and export.
 148
 149         if [ -z "$_newvalue" ]; then
 150             unset $_var
 151             return 0
 152         fi
 153
 154         _oldvalue=${!var}
 155         $_append && _newvalue="$_oldvalue $_newvalue"
 156         export $_var="$_newvalue"
 157         echo setmodopts: ${_var}=${_newvalue}
 158
 159         [ -n "$_savevar" ] && eval $_savevar=\""$_oldvalue"\"
 160 }
 161
 162 echoerr () { echo "$@" 1>&2 ; }
 163
 164 signaled() {
 165     echoerr "$(date +'%F %H:%M:%S'): client load was signaled to terminate"
 166
 167     local PGID=$(ps -eo "%c %p %r" | awk "/ $PPID / {print \$3}")
 168     kill -TERM -$PGID
 169     sleep 5
 170     kill -KILL -$PGID
 171 }
 172
 173 mpi_run () {
 174     local mpirun="$MPIRUN $MPIRUN_OPTIONS"
 175     local command="$mpirun $@"
 176     local mpilog=$TMP/mpi.log
 177     local rc
 178
 179     if [ -n "$MPI_USER" -a "$MPI_USER" != root -a -n "$mpirun" ]; then
 180         echo "+ chmod 0777 $MOUNT"
 181         chmod 0777 $MOUNT
 182         command="su $MPI_USER sh -c \"$command \""
 183     fi
 184
 185     ls -ald $MOUNT
 186     echo "+ $command"
 187     eval $command 2>&1 | tee $mpilog || true
 188
 189     rc=${PIPESTATUS[0]}
 190     if [ $rc -eq 0 ] && grep -q "p4_error:" $mpilog ; then
 191        rc=1
 192     fi
 193     return $rc
 194 }
 195
 196 nids_list () {
 197         local list
 198         local escape="$2"
 199         for i in ${1//,/ }; do
 200                 if [ "$list" = "" ]; then
 201                         list="$i@$NETTYPE"
 202                 else
 203                         list="$list$escape $i@$NETTYPE"
 204                 fi
 205         done
 206         echo $list
 207 }
 208
 209 # FIXME: all setup/cleanup can be done without rpc.sh
 210 lst_end_session () {
 211     local verbose=false
 212     [ x$1 = x--verbose ] && verbose=true
 213
 214     export LST_SESSION=`$LST show_session 2>/dev/null | awk -F " " '{print $5}'`
 215     [ "$LST_SESSION" == "" ] && return
 216
 217         $LST stop b
 218     if $verbose; then
 219         $LST show_error c s
 220     fi
 221     $LST end_session
 222 }
 223
 224 lst_session_cleanup_all () {
 225     local list=$(comma_list $(nodes_list))
 226     do_rpc_nodes $list lst_end_session
 227 }
 228
 229 lst_cleanup () {
 230     lsmod | grep -q lnet_selftest && \
 231         rmmod lnet_selftest > /dev/null 2>&1 || true
 232 }
 233
 234 lst_cleanup_all () {
 235    local list=$(comma_list $(nodes_list))
 236
 237    # lst end_session needs to be executed only locally
 238    # i.e. on node where lst new_session was called
 239    lst_end_session --verbose
 240    do_rpc_nodes $list lst_cleanup
 241 }
 242
 243 lst_setup () {
 244     load_module lnet_selftest
 245 }
 246
 247 lst_setup_all () {
 248     local list=$(comma_list $(nodes_list))
 249     do_rpc_nodes $list lst_setup
 250 }
 251
 252 ###
 253 # short_hostname
 254 #
 255 # Passed a single argument, strips everything off following
 256 # and includes the first period.
 257 # client-20.lab.whamcloud.com becomes client-20
 258 short_hostname() {
 259   echo $(sed 's/\..*//' <<< $1)
 260 }
 261
 262 ###
 263 # short_nodename
 264 #
 265 # Find remote nodename, stripped of any domain, etc.
 266 # 'hostname -s' is easy, but not implemented on all systems
 267 short_nodename() {
 268         local rname=$(do_node $1 "uname -n" || echo -1)
 269         if [[ "$rname" = "-1" ]]; then
 270                 rname=$1
 271         fi
 272         echo $(short_hostname $rname)
 273 }
 274
 275 print_opts () {
 276     local var
 277
 278     echo OPTIONS:
 279
 280     for i in $@; do
 281         var=$i
 282         echo "${var}=${!var}"
 283     done
 284     [ -e $MACHINEFILE ] && cat $MACHINEFILE
 285 }
 286
 287 run_compilebench() {
 288         # Space estimation:
 289         # compile dir kernel-0  ~1GB
 290         # required space        ~1GB * cbench_IDIRS
 291
 292         local dir=${1:-$DIR}
 293
 294     cbench_DIR=${cbench_DIR:-""}
 295     cbench_IDIRS=${cbench_IDIRS:-2}
 296     cbench_RUNS=${cbench_RUNS:-2}
 297
 298     print_opts cbench_DIR cbench_IDIRS cbench_RUNS
 299
 300     [ x$cbench_DIR = x ] &&
 301         { skip_env "compilebench not found" && return; }
 302
 303     [ -e $cbench_DIR/compilebench ] || \
 304         { skip_env "No compilebench build" && return; }
 305
 306         local space=$(df -P $dir | tail -n 1 | awk '{ print $4 }')
 307         if [[ $space -le $((1024 * 1024 * cbench_IDIRS)) ]]; then
 308                 cbench_IDIRS=$((space / 1024 / 1024))
 309                 [[ $cbench_IDIRS -eq 0 ]] &&
 310                         skip_env "Need free space at least 1GB, have $space" &&
 311                         return
 312
 313                 echo "free space=$space, reducing initial dirs to $cbench_IDIRS"
 314         fi
 315
 316     # FIXME:
 317     # t-f _base needs to be modifyed to set properly tdir
 318     # for new "test_foo" functions names
 319     # local testdir=$DIR/$tdir
 320     local testdir=$dir/d0.compilebench
 321     mkdir -p $testdir
 322
 323     local savePWD=$PWD
 324     cd $cbench_DIR
 325     local cmd="./compilebench -D $testdir -i $cbench_IDIRS \
 326         -r $cbench_RUNS --makej"
 327
 328     log "$cmd"
 329
 330     local rc=0
 331     eval $cmd
 332     rc=$?
 333
 334     cd $savePWD
 335     [ $rc = 0 ] || error "compilebench failed: $rc"
 336     rm -rf $testdir
 337 }
 338
 339 run_metabench() {
 340
 341     METABENCH=${METABENCH:-$(which metabench 2> /dev/null || true)}
 342     mbench_NFILES=${mbench_NFILES:-30400}
 343     # threads per client
 344     mbench_THREADS=${mbench_THREADS:-4}
 345         mbench_OPTIONS=${mbench_OPTIONS:-}
 346
 347     [ x$METABENCH = x ] &&
 348         { skip_env "metabench not found" && return; }
 349
 350     # FIXME
 351     # Need space estimation here.
 352
 353     print_opts METABENCH clients mbench_NFILES mbench_THREADS
 354
 355     local testdir=$DIR/d0.metabench
 356     mkdir -p $testdir
 357     # mpi_run uses mpiuser
 358     chmod 0777 $testdir
 359
 360     # -C             Run the file creation tests.
 361     # -S             Run the file stat tests.
 362     # -c nfile       Number of files to be used in each test.
 363     # -k             Cleanup.  Remove the test directories.
 364         local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S -k $mbench_OPTIONS"
 365     echo "+ $cmd"
 366
 367         # find out if we need to use srun by checking $SRUN_PARTITION
 368         if [ "$SRUN_PARTITION" ]; then
 369                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 370                         -n $((num_clients * mbench_THREADS)) \
 371                         -p $SRUN_PARTITION -- $cmd
 372         else
 373                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 374                         -np $((num_clients * $mbench_THREADS)) $cmd
 375         fi
 376
 377     local rc=$?
 378     if [ $rc != 0 ] ; then
 379         error "metabench failed! $rc"
 380     fi
 381     rm -rf $testdir
 382 }
 383
 384 run_simul() {
 385
 386     SIMUL=${SIMUL:=$(which simul 2> /dev/null || true)}
 387     # threads per client
 388     simul_THREADS=${simul_THREADS:-2}
 389     simul_REP=${simul_REP:-20}
 390
 391     if [ "$NFSCLIENT" ]; then
 392         skip "skipped for NFSCLIENT mode"
 393         return
 394     fi
 395
 396     [ x$SIMUL = x ] &&
 397         { skip_env "simul not found" && return; }
 398
 399     # FIXME
 400     # Need space estimation here.
 401
 402     print_opts SIMUL clients simul_REP simul_THREADS
 403
 404     local testdir=$DIR/d0.simul
 405     mkdir -p $testdir
 406     # mpi_run uses mpiuser
 407     chmod 0777 $testdir
 408
 409     # -n # : repeat each test # times
 410     # -N # : repeat the entire set of tests # times
 411
 412     local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP"
 413
 414         echo "+ $cmd"
 415         # find out if we need to use srun by checking $SRUN_PARTITION
 416         if [ "$SRUN_PARTITION" ]; then
 417                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 418                         -n $((num_clients * simul_THREADS)) -p $SRUN_PARTITION \
 419                         -- $cmd
 420         else
 421                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 422                         -np $((num_clients * simul_THREADS)) $cmd
 423         fi
 424
 425     local rc=$?
 426     if [ $rc != 0 ] ; then
 427         error "simul failed! $rc"
 428     fi
 429     rm -rf $testdir
 430 }
 431
 432 run_mdtest() {
 433
 434     MDTEST=${MDTEST:=$(which mdtest 2> /dev/null || true)}
 435     # threads per client
 436     mdtest_THREADS=${mdtest_THREADS:-2}
 437     mdtest_nFiles=${mdtest_nFiles:-"100000"}
 438     # We devide the files by number of core
 439     mdtest_nFiles=$((mdtest_nFiles/mdtest_THREADS/num_clients))
 440     mdtest_iteration=${mdtest_iteration:-1}
 441         local mdtest_custom_params=${mdtest_custom_params:-""}
 442
 443     local type=${1:-"ssf"}
 444
 445     if [ "$NFSCLIENT" ]; then
 446         skip "skipped for NFSCLIENT mode"
 447         return
 448     fi
 449
 450     [ x$MDTEST = x ] &&
 451         { skip_env "mdtest not found" && return; }
 452
 453     # FIXME
 454     # Need space estimation here.
 455
 456     print_opts MDTEST mdtest_iteration mdtest_THREADS mdtest_nFiles
 457
 458     local testdir=$DIR/d0.mdtest
 459     mkdir -p $testdir
 460     # mpi_run uses mpiuser
 461     chmod 0777 $testdir
 462
 463     # -i # : repeat each test # times
 464     # -d   : test dir
 465     # -n # : number of file/dir to create/stat/remove
 466     # -u   : each process create/stat/remove individually
 467
 468         local cmd="$MDTEST -d $testdir -i $mdtest_iteration \
 469                 -n $mdtest_nFiles $mdtest_custom_params"
 470
 471         [ $type = "fpp" ] && cmd="$cmd -u"
 472
 473         echo "+ $cmd"
 474         # find out if we need to use srun by checking $SRUN_PARTITION
 475         if [ "$SRUN_PARTITION" ]; then
 476                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 477                         -n $((num_clients * mdtest_THREADS)) \
 478                         -p $SRUN_PARTITION -- $cmd
 479         else
 480                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 481                         -np $((num_clients * mdtest_THREADS)) $cmd
 482         fi
 483
 484     local rc=$?
 485     if [ $rc != 0 ] ; then
 486         error "mdtest failed! $rc"
 487     fi
 488     rm -rf $testdir
 489 }
 490
 491 run_connectathon() {
 492
 493     cnt_DIR=${cnt_DIR:-""}
 494     cnt_NRUN=${cnt_NRUN:-10}
 495
 496     print_opts cnt_DIR cnt_NRUN
 497
 498     [ x$cnt_DIR = x ] &&
 499         { skip_env "connectathon dir not found" && return; }
 500
 501     [ -e $cnt_DIR/runtests ] || \
 502         { skip_env "No connectathon runtests found" && return; }
 503
 504     local testdir=$DIR/d0.connectathon
 505     mkdir -p $testdir
 506
 507     local savePWD=$PWD
 508     cd $cnt_DIR
 509
 510     #
 511     # cthon options (must be in this order)
 512     #
 513     # -N numpasses - will be passed to the runtests script.  This argument
 514     #         is optional.  It specifies the number of times to run
 515     #         through the tests.
 516     #
 517     # One of these test types
 518     #    -b  basic
 519     #    -g  general
 520     #    -s  special
 521     #    -l  lock
 522     #    -a  all of the above
 523     #
 524     # -f      a quick functionality test
 525     #
 526
 527     tests="-b -g -s"
 528     # Include lock tests unless we're running on nfsv4
 529     local fstype=$(df -TP $testdir | awk 'NR==2  {print $2}')
 530     echo "$testdir: $fstype"
 531     if [[ $fstype != "nfs4" ]]; then
 532         tests="$tests -l"
 533     fi
 534     echo "tests: $tests"
 535     for test in $tests; do
 536         local cmd="./runtests -N $cnt_NRUN $test -f $testdir"
 537         local rc=0
 538
 539         log "$cmd"
 540         eval $cmd
 541         rc=$?
 542         [ $rc = 0 ] || error "connectathon failed: $rc"
 543     done
 544
 545     cd $savePWD
 546     rm -rf $testdir
 547 }
 548
 549 run_ior() {
 550         local type=${1:="ssf"}
 551
 552         IOR=${IOR:-$(which IOR 2> /dev/null || true)}
 553         # threads per client
 554         ior_THREADS=${ior_THREADS:-2}
 555         ior_iteration=${ior_iteration:-1}
 556         ior_blockSize=${ior_blockSize:-6}
 557         ior_blockUnit=${ior_blockUnit:-M}   # K, M, G
 558         ior_xferSize=${ior_xferSize:-1M}
 559         ior_type=${ior_type:-POSIX}
 560         ior_DURATION=${ior_DURATION:-30}        # minutes
 561         local multiplier=1
 562         case ${ior_blockUnit} in
 563                 [G])
 564                         multiplier=$((1024 * 1024 * 1024))
 565                         ;;
 566                 [M])
 567                         multiplier=$((1024 * 1024))
 568                         ;;
 569                 [K])
 570                         multiplier=1024
 571                         ;;
 572                 *)      error "Incorrect block unit should be one of [KMG]"
 573                         ;;
 574         esac
 575
 576         [ x$IOR = x ] &&
 577         { skip_env "IOR not found" && return; }
 578
 579         # calculate the space in bytes
 580         local space=$(df -B 1 -P $DIR | tail -n 1 | awk '{ print $4 }')
 581         local total_threads=$((num_clients * ior_THREADS))
 582         echo "+ $ior_blockSize * $multiplier * $total_threads "
 583         if [ $((space / 2)) -le \
 584              $((ior_blockSize * multiplier * total_threads)) ]; then
 585                 ior_blockSize=$((space / 2 / multiplier / total_threads))
 586                 [ $ior_blockSize -eq 0 ] && \
 587                 skip_env "Need free space more than $((2 * total_threads)) \
 588                          ${ior_blockUnit}: have $((space / multiplier))" &&
 589                          return
 590
 591                 echo "(reduced blockSize to $ior_blockSize \
 592                      ${ior_blockUnit} bytes)"
 593         fi
 594
 595     print_opts IOR ior_THREADS ior_DURATION MACHINEFILE
 596
 597     local testdir=$DIR/d0.ior.$type
 598     mkdir -p $testdir
 599     # mpi_run uses mpiuser
 600     chmod 0777 $testdir
 601     if [ "$NFSCLIENT" ]; then
 602         setstripe_nfsserver $testdir -c -1 ||
 603             { error "setstripe on nfsserver failed" && return 1; }
 604     else
 605         $LFS setstripe $testdir -c -1 ||
 606             { error "setstripe failed" && return 2; }
 607     fi
 608         #
 609         # -b N  blockSize --
 610         #       contiguous bytes to write per task (e.g.: 8, 4K, 2M, 1G)"
 611         # -o S  testFileName
 612         # -t N  transferSize -- size of transfer in bytes (e.g.: 8, 4K, 2M, 1G)"
 613         # -w    writeFile -- write file"
 614         # -r    readFile -- read existing file"
 615         # -W    checkWrite -- check read after write"
 616         # -C    reorderTasks -- changes task ordering to n+1 ordering for readback
 617         # -T    maxTimeDuration -- max time in minutes to run tests"
 618         # -k    keepFile -- keep testFile(s) on program exit
 619
 620         local cmd="$IOR -a $ior_type -b ${ior_blockSize}${ior_blockUnit} \
 621                 -o $testdir/iorData -t $ior_xferSize -v -C -w -r -W \
 622                 -i $ior_iteration -T $ior_DURATION -k"
 623
 624         [ $type = "fpp" ] && cmd="$cmd -F"
 625
 626         echo "+ $cmd"
 627         # find out if we need to use srun by checking $SRUN_PARTITION
 628         if [ "$SRUN_PARTITION" ]; then
 629                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 630                         -n $((num_clients * ior_THREADS)) -p $SRUN_PARTITION \
 631                         -- $cmd
 632         else
 633                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 634                         -np $((num_clients * $ior_THREADS)) $cmd
 635         fi
 636
 637     local rc=$?
 638     if [ $rc != 0 ] ; then
 639         error "ior failed! $rc"
 640     fi
 641     rm -rf $testdir
 642 }
 643
 644 run_mib() {
 645
 646     MIB=${MIB:=$(which mib 2> /dev/null || true)}
 647     # threads per client
 648     mib_THREADS=${mib_THREADS:-2}
 649     mib_xferSize=${mib_xferSize:-1m}
 650     mib_xferLimit=${mib_xferLimit:-5000}
 651     mib_timeLimit=${mib_timeLimit:-300}
 652
 653     if [ "$NFSCLIENT" ]; then
 654         skip "skipped for NFSCLIENT mode"
 655         return
 656     fi
 657
 658     [ x$MIB = x ] &&
 659         { skip_env "MIB not found" && return; }
 660
 661     print_opts MIB mib_THREADS mib_xferSize mib_xferLimit mib_timeLimit \
 662         MACHINEFILE
 663
 664     local testdir=$DIR/d0.mib
 665     mkdir -p $testdir
 666     # mpi_run uses mpiuser
 667     chmod 0777 $testdir
 668     $LFS setstripe $testdir -c -1 ||
 669         { error "setstripe failed" && return 2; }
 670     #
 671     # -I    Show intermediate values in output
 672     # -H    Show headers in output
 673     # -L    Do not issue new system calls after this many seconds
 674     # -s    Use system calls of this size
 675     # -t    test dir
 676     # -l    Issue no more than this many system calls
 677     local cmd="$MIB -t $testdir -s $mib_xferSize -l $mib_xferLimit \
 678         -L $mib_timeLimit -HI -p mib.$(date +%Y%m%d%H%M%S)"
 679
 680         echo "+ $cmd"
 681         # find out if we need to use srun by checking $SRUN_PARTITION
 682         if [ "$SRUN_PARTITION" ]; then
 683                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 684                         -n $((num_clients * mib_THREADS)) -p $SRUN_PARTITION \
 685                         -- $cmd
 686         else
 687                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 688                         -np $((num_clients * mib_THREADS)) $cmd
 689         fi
 690
 691     local rc=$?
 692     if [ $rc != 0 ] ; then
 693         error "mib failed! $rc"
 694     fi
 695     rm -rf $testdir
 696 }
 697
 698 run_cascading_rw() {
 699
 700     CASC_RW=${CASC_RW:-$(which cascading_rw 2> /dev/null || true)}
 701     # threads per client
 702     casc_THREADS=${casc_THREADS:-2}
 703     casc_REP=${casc_REP:-300}
 704
 705     if [ "$NFSCLIENT" ]; then
 706         skip "skipped for NFSCLIENT mode"
 707         return
 708     fi
 709
 710     [ x$CASC_RW = x ] &&
 711         { skip_env "cascading_rw not found" && return; }
 712
 713     # FIXME
 714     # Need space estimation here.
 715
 716     print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE
 717
 718     local testdir=$DIR/d0.cascading_rw
 719     mkdir -p $testdir
 720     # mpi_run uses mpiuser
 721     chmod 0777 $testdir
 722
 723     # -g: debug mode
 724     # -n: repeat test # times
 725
 726     local cmd="$CASC_RW -g -d $testdir -n $casc_REP"
 727
 728         echo "+ $cmd"
 729         mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 730                 -np $((num_clients * $casc_THREADS)) $cmd
 731
 732     local rc=$?
 733     if [ $rc != 0 ] ; then
 734         error "cascading_rw failed! $rc"
 735     fi
 736     rm -rf $testdir
 737 }
 738
 739 run_write_append_truncate() {
 740
 741     # threads per client
 742     write_THREADS=${write_THREADS:-8}
 743     write_REP=${write_REP:-10000}
 744
 745     if [ "$NFSCLIENT" ]; then
 746         skip "skipped for NFSCLIENT mode"
 747         return
 748     fi
 749
 750     # location is lustre/tests dir
 751     if ! which write_append_truncate > /dev/null 2>&1 ; then
 752         skip_env "write_append_truncate not found"
 753         return
 754     fi
 755
 756     # FIXME
 757     # Need space estimation here.
 758
 759     local testdir=$DIR/d0.write_append_truncate
 760     local file=$testdir/f0.wat
 761
 762     print_opts clients write_REP write_THREADS MACHINEFILE
 763
 764     mkdir -p $testdir
 765     # mpi_run uses mpiuser
 766     chmod 0777 $testdir
 767
 768     local cmd="write_append_truncate -n $write_REP $file"
 769
 770         echo "+ $cmd"
 771         mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 772                 -np $((num_clients * $write_THREADS)) $cmd
 773
 774     local rc=$?
 775     if [ $rc != 0 ] ; then
 776         error "write_append_truncate failed! $rc"
 777         return $rc
 778     fi
 779     rm -rf $testdir
 780 }
 781
 782 run_write_disjoint() {
 783
 784     WRITE_DISJOINT=${WRITE_DISJOINT:-$(which write_disjoint \
 785         2> /dev/null || true)}
 786     # threads per client
 787     wdisjoint_THREADS=${wdisjoint_THREADS:-4}
 788     wdisjoint_REP=${wdisjoint_REP:-10000}
 789
 790     if [ "$NFSCLIENT" ]; then
 791         skip "skipped for NFSCLIENT mode"
 792         return
 793     fi
 794
 795     [ x$WRITE_DISJOINT = x ] &&
 796         { skip_env "write_disjoint not found" && return; }
 797
 798     # FIXME
 799     # Need space estimation here.
 800
 801     print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP \
 802         MACHINEFILE
 803     local testdir=$DIR/d0.write_disjoint
 804     mkdir -p $testdir
 805     # mpi_run uses mpiuser
 806     chmod 0777 $testdir
 807
 808     local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP"
 809
 810         echo "+ $cmd"
 811         mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 812                 -np $((num_clients * $wdisjoint_THREADS)) $cmd
 813
 814     local rc=$?
 815     if [ $rc != 0 ] ; then
 816         error "write_disjoint failed! $rc"
 817     fi
 818     rm -rf $testdir
 819 }
 820
 821 run_parallel_grouplock() {
 822
 823     PARALLEL_GROUPLOCK=${PARALLEL_GROUPLOCK:-$(which parallel_grouplock \
 824         2> /dev/null || true)}
 825     parallel_grouplock_MINTASKS=${parallel_grouplock_MINTASKS:-5}
 826
 827     if [ "$NFSCLIENT" ]; then
 828         skip "skipped for NFSCLIENT mode"
 829         return
 830     fi
 831
 832     [ x$PARALLEL_GROUPLOCK = x ] &&
 833         { skip "PARALLEL_GROUPLOCK not found" && return; }
 834
 835     print_opts clients parallel_grouplock_MINTASKS MACHINEFILE
 836
 837     local testdir=$DIR/d0.parallel_grouplock
 838     mkdir -p $testdir
 839     # mpi_run uses mpiuser
 840     chmod 0777 $testdir
 841
 842     local cmd
 843     local status=0
 844     local subtest
 845         for i in $(seq 12); do
 846                 subtest="-t $i"
 847                 local cmd="$PARALLEL_GROUPLOCK -g -v -d $testdir $subtest"
 848                 echo "+ $cmd"
 849
 850                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 851                         -np $parallel_grouplock_MINTASKS $cmd
 852                 local rc=$?
 853                 if [ $rc != 0 ] ; then
 854                         error_noexit "parallel_grouplock subtests $subtest " \
 855                                      "failed! $rc"
 856                 else
 857                         echo "parallel_grouplock subtests $subtest PASS"
 858                 fi
 859                 let status=$((status + rc))
 860                 # clear debug to collect one log per one test
 861                 do_nodes $(comma_list $(nodes_list)) lctl clear
 862         done
 863         [ $status -eq 0 ] || error "parallel_grouplock status: $status"
 864         rm -rf $testdir
 865 }
 866
 867 cleanup_statahead () {
 868     trap 0
 869
 870     local clients=$1
 871     local mntpt_root=$2
 872     local num_mntpts=$3
 873
 874     for i in $(seq 0 $num_mntpts);do
 875         zconf_umount_clients $clients ${mntpt_root}$i ||
 876             error_exit "Failed to umount lustre on ${mntpt_root}$i"
 877     done
 878 }
 879
 880 run_statahead () {
 881
 882     statahead_NUMMNTPTS=${statahead_NUMMNTPTS:-5}
 883     statahead_NUMFILES=${statahead_NUMFILES:-500000}
 884
 885     if [[ -n $NFSCLIENT ]]; then
 886         skip "Statahead testing is not supported on NFS clients."
 887         return 0
 888     fi
 889
 890     [ x$MDSRATE = x ] &&
 891         { skip_env "mdsrate not found" && return; }
 892
 893     print_opts MDSRATE clients statahead_NUMMNTPTS statahead_NUMFILES
 894
 895     # create large dir
 896
 897     # do not use default "d[0-9]*" dir name
 898     # to avoid of rm $statahead_NUMFILES (500k) files in t-f cleanup
 899     local dir=dstatahead
 900     local testdir=$DIR/$dir
 901
 902     # cleanup only if dir exists
 903     # cleanup only $statahead_NUMFILES number of files
 904     # ignore the other files created by someone else
 905     [ -d $testdir ] &&
 906         mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
 907             $statahead_NUMFILES $testdir 'f%%d' --ignore
 908
 909     mkdir -p $testdir
 910     # mpi_run uses mpiuser
 911     chmod 0777 $testdir
 912
 913     local num_files=$statahead_NUMFILES
 914
 915     local IFree=$(inodes_available)
 916     if [ $IFree -lt $num_files ]; then
 917       num_files=$IFree
 918     fi
 919
 920     cancel_lru_locks mdc
 921
 922     local cmd1="${MDSRATE} ${MDSRATE_DEBUG} --mknod --dir $testdir"
 923     local cmd2="--nfiles $num_files --filefmt 'f%%d'"
 924     local cmd="$cmd1 $cmd2"
 925     echo "+ $cmd"
 926
 927         mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 928                 -np $((num_clients * 32)) $cmd
 929
 930     local rc=$?
 931     if [ $rc != 0 ] ; then
 932         error "mdsrate failed to create $rc"
 933         return $rc
 934     fi
 935
 936     local num_mntpts=$statahead_NUMMNTPTS
 937     local mntpt_root=$TMP/mntpt/lustre
 938     local mntopts=$MNTOPTSTATAHEAD
 939
 940     echo "Mounting $num_mntpts lustre clients starts on $clients"
 941     trap "cleanup_statahead $clients $mntpt_root $num_mntpts" EXIT ERR
 942     for i in $(seq 0 $num_mntpts); do
 943         zconf_mount_clients $clients ${mntpt_root}$i "$mntopts" ||
 944             error_exit "Failed to mount lustre on ${mntpt_root}$i on $clients"
 945     done
 946
 947     do_rpc_nodes $clients cancel_lru_locks mdc
 948
 949     do_rpc_nodes $clients do_ls $mntpt_root $num_mntpts $dir
 950
 951     mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
 952         $num_files $testdir 'f%%d' --ignore
 953
 954     # use rm instead of rmdir because of
 955     # testdir could contain the files created by someone else,
 956     # or by previous run where is num_files prev > num_files current
 957     rm -rf $testdir
 958     cleanup_statahead $clients $mntpt_root $num_mntpts
 959 }