lustre/tests/functions.sh

   1 #!/bin/bash
   2 # -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*-
   3 # vim:shiftwidth=4:softtabstop=4:tabstop=4:
   4
   5 # Simple function used by run_*.sh scripts
   6
   7 assert_env() {
   8     local failed=""
   9     for name in $@; do
  10         if [ -z "${!name}" ]; then
  11             echo "$0: $name must be set"
  12             failed=1
  13         fi
  14     done
  15     [ $failed ] && exit 1 || true
  16 }
  17
  18 # lrepl - Lustre test Read-Eval-Print Loop.
  19 #
  20 # This function implements a REPL for the Lustre test framework.  It
  21 # doesn't exec an actual shell because the user may want to inspect
  22 # variables and use functions from the test framework.
  23 lrepl() {
  24     local line
  25     local rawline
  26     local prompt
  27
  28     cat <<EOF
  29         This is an interactive read-eval-print loop interactive shell
  30         simulation that you can use to debug failing tests.  You can
  31         enter most bash command lines (see notes below).
  32
  33         Use this REPL to inspect variables, set them, call test
  34         framework shell functions, etcetera.
  35
  36         'exit' or EOF to exit this shell.
  37
  38         set \$retcode to 0 to cause the assertion failure that
  39         triggered this REPL to be ignored.
  40
  41         Examples:
  42             do_facet ost1 lctl get_param ost.*.ost.threads_*
  43             do_rpc_nodes \$OSTNODES unload_modules
  44
  45         NOTES:
  46             All but the last line of multi-line statements or blocks
  47             must end in a backslash.
  48
  49             "Here documents" are not supported.
  50
  51             History is not supported, but command-line editing is.
  52
  53 EOF
  54
  55     # Prompt escapes don't work in read -p, sadly.
  56     prompt=":test_${testnum:-UNKNOWN}:$(uname -n):$(basename $PWD)% "
  57
  58     # We use read -r to get close to a shell experience
  59     while read -e -r -p "$prompt" rawline; do
  60         line=
  61         case "$rawline" in
  62         # Don't want to exit-exit, just exit the REPL
  63         exit) break;;
  64         # We need to handle continuations, and read -r doesn't do
  65         # that for us.  Yet we need read -r.
  66         #
  67         # We also use case/esac to compare lines read to "*\\"
  68         # because [ "$line" = *\\ ] and variants of that don't work.
  69         *\\) line="$rawline"
  70             while read -e -r -p '> ' rawline
  71             do
  72                 line="$line"$'\n'"$rawline"
  73                 case "$rawline" in
  74                 # We could check for here documents by matching
  75                 # against *<<*, but who cares.
  76                 *\\) continue;;
  77                 *) break;;
  78                 esac
  79             done
  80             ;;
  81         *) line=$rawline
  82         esac
  83
  84         case "$line" in
  85         *\\) break;;
  86         esac
  87
  88         # Finally!  Time to eval.
  89         eval "$line"
  90     done
  91
  92     echo $'\n\tExiting interactive shell...\n'
  93     return 0
  94 }
  95
  96 # lassert - Lustre test framework assert
  97 #
  98 # Arguments: failure code, failure message, expression/statement
  99 #
 100 # lassert evaluates the expression given, and, if false, calls
 101 # error() to trigger test failure.  If REPL_ON_LASSERT is true then
 102 # lassert will call lrepl() to give the user an interactive shell.
 103 # If the REPL sets retcode=0 then the assertion failure will be
 104 # ignored.
 105 lassert() {
 106     local retcode=$1
 107     local msg=$2
 108     shift 2
 109
 110     echo "checking $* ($(eval echo \""$*"\"))..."
 111     eval "$@" && return 0;
 112
 113     if ${REPL_ON_LASSERT:-false}; then
 114         echo "Assertion $retcode failed: $* (expanded: $(eval echo \""$*"\"))
 115 $msg"
 116         lrepl
 117     fi
 118
 119     error "Assertion $retcode failed: $* (expanded: $(eval echo \""$*"\"))
 120 $msg"
 121     return $retcode
 122 }
 123
 124 # setmodopts- set module options for subsequent calls to load_modules
 125 #
 126 # Usage: setmodopts module_name new_value [var_in_which_to_save_old_value]
 127 #        setmodopts -a module_name new_value [var_in_which_to_save_old_value]
 128 #
 129 # In the second usage the new value is appended to the old.
 130 setmodopts() {
 131         local _append=false
 132
 133         if [ "$1" = -a ]; then
 134             _append=true
 135             shift
 136         fi
 137
 138         local _var=MODOPTS_$1
 139         local _newvalue=$2
 140         local _savevar=$3
 141         local _oldvalue
 142
 143         # Dynamic naming of variables is a pain in bash.  In ksh93 we could
 144         # write "nameref opts_var=${modname}_MODOPTS" then assign directly
 145         # to opts_var.  Associative arrays would also help, alternatively.
 146         # Alas, we're stuck with eval until all distros move to a more recent
 147         # version of bash.  Fortunately we don't need to eval unset and export.
 148
 149         if [ -z "$_newvalue" ]; then
 150             unset $_var
 151             return 0
 152         fi
 153
 154         _oldvalue=${!var}
 155         $_append && _newvalue="$_oldvalue $_newvalue"
 156         export $_var="$_newvalue"
 157         echo setmodopts: ${_var}=${_newvalue}
 158
 159         [ -n "$_savevar" ] && eval $_savevar=\""$_oldvalue"\"
 160 }
 161
 162 echoerr () { echo "$@" 1>&2 ; }
 163
 164 signaled() {
 165     echoerr "$(date +'%F %H:%M:%S'): client load was signaled to terminate"
 166
 167     local PGID=$(ps -eo "%c %p %r" | awk "/ $PPID / {print \$3}")
 168     kill -TERM -$PGID
 169     sleep 5
 170     kill -KILL -$PGID
 171 }
 172
 173 mpi_run () {
 174     local mpirun="$MPIRUN $MPIRUN_OPTIONS"
 175     local command="$mpirun $@"
 176     local mpilog=$TMP/mpi.log
 177     local rc
 178
 179     if [ -n "$MPI_USER" -a "$MPI_USER" != root -a -n "$mpirun" ]; then
 180         echo "+ chmod 0777 $MOUNT"
 181         chmod 0777 $MOUNT
 182         command="su $MPI_USER sh -c \"$command \""
 183     fi
 184
 185     ls -ald $MOUNT
 186     echo "+ $command"
 187     eval $command 2>&1 | tee $mpilog || true
 188
 189     rc=${PIPESTATUS[0]}
 190     if [ $rc -eq 0 ] && grep -q "p4_error:" $mpilog ; then
 191        rc=1
 192     fi
 193     return $rc
 194 }
 195
 196 nids_list () {
 197         local list
 198         local escape="$2"
 199         for i in ${1//,/ }; do
 200                 if [ "$list" = "" ]; then
 201                         list="$i@$NETTYPE"
 202                 else
 203                         list="$list$escape $i@$NETTYPE"
 204                 fi
 205         done
 206         echo $list
 207 }
 208
 209 # FIXME: all setup/cleanup can be done without rpc.sh
 210 lst_end_session () {
 211     local verbose=false
 212     [ x$1 = x--verbose ] && verbose=true
 213
 214     export LST_SESSION=`$LST show_session 2>/dev/null | awk -F " " '{print $5}'`
 215     [ "$LST_SESSION" == "" ] && return
 216
 217         $LST stop b
 218     if $verbose; then
 219         $LST show_error c s
 220     fi
 221     $LST end_session
 222 }
 223
 224 lst_session_cleanup_all () {
 225     local list=$(comma_list $(nodes_list))
 226     do_rpc_nodes $list lst_end_session
 227 }
 228
 229 lst_cleanup () {
 230     lsmod | grep -q lnet_selftest && \
 231         rmmod lnet_selftest > /dev/null 2>&1 || true
 232 }
 233
 234 lst_cleanup_all () {
 235    local list=$(comma_list $(nodes_list))
 236
 237    # lst end_session needs to be executed only locally
 238    # i.e. on node where lst new_session was called
 239    lst_end_session --verbose
 240    do_rpc_nodes $list lst_cleanup
 241 }
 242
 243 lst_setup () {
 244     load_module lnet_selftest
 245 }
 246
 247 lst_setup_all () {
 248     local list=$(comma_list $(nodes_list))
 249     do_rpc_nodes $list lst_setup
 250 }
 251
 252 ###
 253 # short_hostname
 254 #
 255 # Passed a single argument, strips everything off following
 256 # and includes the first period.
 257 # client-20.lab.whamcloud.com becomes client-20
 258 short_hostname() {
 259   echo $(sed 's/\..*//' <<< $1)
 260 }
 261
 262 ###
 263 # short_nodename
 264 #
 265 # Find remote nodename, stripped of any domain, etc.
 266 # 'hostname -s' is easy, but not implemented on all systems
 267 short_nodename() {
 268         local rname=$(do_node $1 "uname -n" || echo -1)
 269         if [[ "$rname" = "-1" ]]; then
 270                 rname=$1
 271         fi
 272         echo $(short_hostname $rname)
 273 }
 274
 275 print_opts () {
 276     local var
 277
 278     echo OPTIONS:
 279
 280     for i in $@; do
 281         var=$i
 282         echo "${var}=${!var}"
 283     done
 284     [ -e $MACHINEFILE ] && cat $MACHINEFILE
 285 }
 286
 287 run_compilebench() {
 288         # Space estimation:
 289         # compile dir kernel-0  ~1GB
 290         # required space        ~1GB * cbench_IDIRS
 291
 292         local dir=${1:-$DIR}
 293
 294     cbench_DIR=${cbench_DIR:-""}
 295     cbench_IDIRS=${cbench_IDIRS:-2}
 296     cbench_RUNS=${cbench_RUNS:-2}
 297
 298     print_opts cbench_DIR cbench_IDIRS cbench_RUNS
 299
 300     [ x$cbench_DIR = x ] &&
 301         { skip_env "compilebench not found" && return; }
 302
 303     [ -e $cbench_DIR/compilebench ] || \
 304         { skip_env "No compilebench build" && return; }
 305
 306         local space=$(df -P $dir | tail -n 1 | awk '{ print $4 }')
 307         if [[ $space -le $((1024 * 1024 * cbench_IDIRS)) ]]; then
 308                 cbench_IDIRS=$((space / 1024 / 1024))
 309                 [[ $cbench_IDIRS -eq 0 ]] &&
 310                         skip_env "Need free space at least 1GB, have $space" &&
 311                         return
 312
 313                 echo "free space=$space, reducing initial dirs to $cbench_IDIRS"
 314         fi
 315
 316     # FIXME:
 317     # t-f _base needs to be modifyed to set properly tdir
 318     # for new "test_foo" functions names
 319     # local testdir=$DIR/$tdir
 320     local testdir=$dir/d0.compilebench
 321     mkdir -p $testdir
 322
 323     local savePWD=$PWD
 324     cd $cbench_DIR
 325     local cmd="./compilebench -D $testdir -i $cbench_IDIRS \
 326         -r $cbench_RUNS --makej"
 327
 328     log "$cmd"
 329
 330     local rc=0
 331     eval $cmd
 332     rc=$?
 333
 334     cd $savePWD
 335     [ $rc = 0 ] || error "compilebench failed: $rc"
 336     rm -rf $testdir
 337 }
 338
 339 run_metabench() {
 340
 341     METABENCH=${METABENCH:-$(which metabench 2> /dev/null || true)}
 342     mbench_NFILES=${mbench_NFILES:-30400}
 343     # threads per client
 344     mbench_THREADS=${mbench_THREADS:-4}
 345         mbench_OPTIONS=${mbench_OPTIONS:-}
 346         mbench_CLEANUP=${mbench_CLEANUP:-true}
 347
 348     [ x$METABENCH = x ] &&
 349         { skip_env "metabench not found" && return; }
 350
 351     # FIXME
 352     # Need space estimation here.
 353
 354     print_opts METABENCH clients mbench_NFILES mbench_THREADS
 355
 356     local testdir=$DIR/d0.metabench
 357     mkdir -p $testdir
 358     # mpi_run uses mpiuser
 359     chmod 0777 $testdir
 360
 361         # -C             Run the file creation tests.
 362         # -S             Run the file stat tests.
 363         # -c nfile       Number of files to be used in each test.
 364         # -k             => dont cleanup files when finished.
 365         local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S -k $mbench_OPTIONS"
 366         echo "+ $cmd"
 367
 368         # find out if we need to use srun by checking $SRUN_PARTITION
 369         if [ "$SRUN_PARTITION" ]; then
 370                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 371                         -n $((num_clients * mbench_THREADS)) \
 372                         -p $SRUN_PARTITION -- $cmd
 373         else
 374                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 375                         -np $((num_clients * $mbench_THREADS)) $cmd
 376         fi
 377
 378     local rc=$?
 379     if [ $rc != 0 ] ; then
 380         error "metabench failed! $rc"
 381     fi
 382
 383         if $mbench_CLEANUP; then
 384                 rm -rf $testdir
 385         else
 386                 mv $dir/d0.metabench $dir/_xxx.$(date +%s).d0.metabench
 387         fi
 388 }
 389
 390 run_simul() {
 391
 392     SIMUL=${SIMUL:=$(which simul 2> /dev/null || true)}
 393     # threads per client
 394     simul_THREADS=${simul_THREADS:-2}
 395     simul_REP=${simul_REP:-20}
 396
 397     if [ "$NFSCLIENT" ]; then
 398         skip "skipped for NFSCLIENT mode"
 399         return
 400     fi
 401
 402     [ x$SIMUL = x ] &&
 403         { skip_env "simul not found" && return; }
 404
 405     # FIXME
 406     # Need space estimation here.
 407
 408     print_opts SIMUL clients simul_REP simul_THREADS
 409
 410     local testdir=$DIR/d0.simul
 411     mkdir -p $testdir
 412     # mpi_run uses mpiuser
 413     chmod 0777 $testdir
 414
 415     # -n # : repeat each test # times
 416     # -N # : repeat the entire set of tests # times
 417
 418     local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP"
 419
 420         echo "+ $cmd"
 421         # find out if we need to use srun by checking $SRUN_PARTITION
 422         if [ "$SRUN_PARTITION" ]; then
 423                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 424                         -n $((num_clients * simul_THREADS)) -p $SRUN_PARTITION \
 425                         -- $cmd
 426         else
 427                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 428                         -np $((num_clients * simul_THREADS)) $cmd
 429         fi
 430
 431     local rc=$?
 432     if [ $rc != 0 ] ; then
 433         error "simul failed! $rc"
 434     fi
 435     rm -rf $testdir
 436 }
 437
 438 run_mdtest() {
 439
 440     MDTEST=${MDTEST:=$(which mdtest 2> /dev/null || true)}
 441     # threads per client
 442     mdtest_THREADS=${mdtest_THREADS:-2}
 443     mdtest_nFiles=${mdtest_nFiles:-"100000"}
 444     # We devide the files by number of core
 445     mdtest_nFiles=$((mdtest_nFiles/mdtest_THREADS/num_clients))
 446     mdtest_iteration=${mdtest_iteration:-1}
 447         local mdtest_custom_params=${mdtest_custom_params:-""}
 448
 449     local type=${1:-"ssf"}
 450
 451     if [ "$NFSCLIENT" ]; then
 452         skip "skipped for NFSCLIENT mode"
 453         return
 454     fi
 455
 456     [ x$MDTEST = x ] &&
 457         { skip_env "mdtest not found" && return; }
 458
 459     # FIXME
 460     # Need space estimation here.
 461
 462     print_opts MDTEST mdtest_iteration mdtest_THREADS mdtest_nFiles
 463
 464     local testdir=$DIR/d0.mdtest
 465     mkdir -p $testdir
 466     # mpi_run uses mpiuser
 467     chmod 0777 $testdir
 468
 469     # -i # : repeat each test # times
 470     # -d   : test dir
 471     # -n # : number of file/dir to create/stat/remove
 472     # -u   : each process create/stat/remove individually
 473
 474         local cmd="$MDTEST -d $testdir -i $mdtest_iteration \
 475                 -n $mdtest_nFiles $mdtest_custom_params"
 476
 477         [ $type = "fpp" ] && cmd="$cmd -u"
 478
 479         echo "+ $cmd"
 480         # find out if we need to use srun by checking $SRUN_PARTITION
 481         if [ "$SRUN_PARTITION" ]; then
 482                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 483                         -n $((num_clients * mdtest_THREADS)) \
 484                         -p $SRUN_PARTITION -- $cmd
 485         else
 486                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 487                         -np $((num_clients * mdtest_THREADS)) $cmd
 488         fi
 489
 490     local rc=$?
 491     if [ $rc != 0 ] ; then
 492         error "mdtest failed! $rc"
 493     fi
 494     rm -rf $testdir
 495 }
 496
 497 run_connectathon() {
 498
 499     cnt_DIR=${cnt_DIR:-""}
 500     cnt_NRUN=${cnt_NRUN:-10}
 501
 502     print_opts cnt_DIR cnt_NRUN
 503
 504     [ x$cnt_DIR = x ] &&
 505         { skip_env "connectathon dir not found" && return; }
 506
 507     [ -e $cnt_DIR/runtests ] || \
 508         { skip_env "No connectathon runtests found" && return; }
 509
 510     local testdir=$DIR/d0.connectathon
 511     mkdir -p $testdir
 512
 513     local savePWD=$PWD
 514     cd $cnt_DIR
 515
 516     #
 517     # cthon options (must be in this order)
 518     #
 519     # -N numpasses - will be passed to the runtests script.  This argument
 520     #         is optional.  It specifies the number of times to run
 521     #         through the tests.
 522     #
 523     # One of these test types
 524     #    -b  basic
 525     #    -g  general
 526     #    -s  special
 527     #    -l  lock
 528     #    -a  all of the above
 529     #
 530     # -f      a quick functionality test
 531     #
 532
 533     tests="-b -g -s"
 534     # Include lock tests unless we're running on nfsv4
 535     local fstype=$(df -TP $testdir | awk 'NR==2  {print $2}')
 536     echo "$testdir: $fstype"
 537     if [[ $fstype != "nfs4" ]]; then
 538         tests="$tests -l"
 539     fi
 540     echo "tests: $tests"
 541     for test in $tests; do
 542         local cmd="./runtests -N $cnt_NRUN $test -f $testdir"
 543         local rc=0
 544
 545         log "$cmd"
 546         eval $cmd
 547         rc=$?
 548         [ $rc = 0 ] || error "connectathon failed: $rc"
 549     done
 550
 551     cd $savePWD
 552     rm -rf $testdir
 553 }
 554
 555 run_ior() {
 556         local type=${1:="ssf"}
 557
 558         IOR=${IOR:-$(which IOR 2> /dev/null || true)}
 559         # threads per client
 560         ior_THREADS=${ior_THREADS:-2}
 561         ior_iteration=${ior_iteration:-1}
 562         ior_blockSize=${ior_blockSize:-6}
 563         ior_blockUnit=${ior_blockUnit:-M}   # K, M, G
 564         ior_xferSize=${ior_xferSize:-1M}
 565         ior_type=${ior_type:-POSIX}
 566         ior_DURATION=${ior_DURATION:-30}        # minutes
 567         local multiplier=1
 568         case ${ior_blockUnit} in
 569                 [G])
 570                         multiplier=$((1024 * 1024 * 1024))
 571                         ;;
 572                 [M])
 573                         multiplier=$((1024 * 1024))
 574                         ;;
 575                 [K])
 576                         multiplier=1024
 577                         ;;
 578                 *)      error "Incorrect block unit should be one of [KMG]"
 579                         ;;
 580         esac
 581
 582         [ x$IOR = x ] &&
 583         { skip_env "IOR not found" && return; }
 584
 585         # calculate the space in bytes
 586         local space=$(df -B 1 -P $DIR | tail -n 1 | awk '{ print $4 }')
 587         local total_threads=$((num_clients * ior_THREADS))
 588         echo "+ $ior_blockSize * $multiplier * $total_threads "
 589         if [ $((space / 2)) -le \
 590              $((ior_blockSize * multiplier * total_threads)) ]; then
 591                 ior_blockSize=$((space / 2 / multiplier / total_threads))
 592                 [ $ior_blockSize -eq 0 ] && \
 593                 skip_env "Need free space more than $((2 * total_threads)) \
 594                          ${ior_blockUnit}: have $((space / multiplier))" &&
 595                          return
 596
 597                 echo "(reduced blockSize to $ior_blockSize \
 598                      ${ior_blockUnit} bytes)"
 599         fi
 600
 601     print_opts IOR ior_THREADS ior_DURATION MACHINEFILE
 602
 603     local testdir=$DIR/d0.ior.$type
 604     mkdir -p $testdir
 605     # mpi_run uses mpiuser
 606     chmod 0777 $testdir
 607     if [ "$NFSCLIENT" ]; then
 608         setstripe_nfsserver $testdir -c -1 ||
 609             { error "setstripe on nfsserver failed" && return 1; }
 610     else
 611         $LFS setstripe $testdir -c -1 ||
 612             { error "setstripe failed" && return 2; }
 613     fi
 614         #
 615         # -b N  blockSize --
 616         #       contiguous bytes to write per task (e.g.: 8, 4K, 2M, 1G)"
 617         # -o S  testFileName
 618         # -t N  transferSize -- size of transfer in bytes (e.g.: 8, 4K, 2M, 1G)"
 619         # -w    writeFile -- write file"
 620         # -r    readFile -- read existing file"
 621         # -W    checkWrite -- check read after write"
 622         # -C    reorderTasks -- changes task ordering to n+1 ordering for readback
 623         # -T    maxTimeDuration -- max time in minutes to run tests"
 624         # -k    keepFile -- keep testFile(s) on program exit
 625
 626         local cmd="$IOR -a $ior_type -b ${ior_blockSize}${ior_blockUnit} \
 627                 -o $testdir/iorData -t $ior_xferSize -v -C -w -r -W \
 628                 -i $ior_iteration -T $ior_DURATION -k"
 629
 630         [ $type = "fpp" ] && cmd="$cmd -F"
 631
 632         echo "+ $cmd"
 633         # find out if we need to use srun by checking $SRUN_PARTITION
 634         if [ "$SRUN_PARTITION" ]; then
 635                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 636                         -n $((num_clients * ior_THREADS)) -p $SRUN_PARTITION \
 637                         -- $cmd
 638         else
 639                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 640                         -np $((num_clients * $ior_THREADS)) $cmd
 641         fi
 642
 643     local rc=$?
 644     if [ $rc != 0 ] ; then
 645         error "ior failed! $rc"
 646     fi
 647     rm -rf $testdir
 648 }
 649
 650 run_mib() {
 651
 652     MIB=${MIB:=$(which mib 2> /dev/null || true)}
 653     # threads per client
 654     mib_THREADS=${mib_THREADS:-2}
 655     mib_xferSize=${mib_xferSize:-1m}
 656     mib_xferLimit=${mib_xferLimit:-5000}
 657     mib_timeLimit=${mib_timeLimit:-300}
 658
 659     if [ "$NFSCLIENT" ]; then
 660         skip "skipped for NFSCLIENT mode"
 661         return
 662     fi
 663
 664     [ x$MIB = x ] &&
 665         { skip_env "MIB not found" && return; }
 666
 667     print_opts MIB mib_THREADS mib_xferSize mib_xferLimit mib_timeLimit \
 668         MACHINEFILE
 669
 670     local testdir=$DIR/d0.mib
 671     mkdir -p $testdir
 672     # mpi_run uses mpiuser
 673     chmod 0777 $testdir
 674     $LFS setstripe $testdir -c -1 ||
 675         { error "setstripe failed" && return 2; }
 676     #
 677     # -I    Show intermediate values in output
 678     # -H    Show headers in output
 679     # -L    Do not issue new system calls after this many seconds
 680     # -s    Use system calls of this size
 681     # -t    test dir
 682     # -l    Issue no more than this many system calls
 683     local cmd="$MIB -t $testdir -s $mib_xferSize -l $mib_xferLimit \
 684         -L $mib_timeLimit -HI -p mib.$(date +%Y%m%d%H%M%S)"
 685
 686         echo "+ $cmd"
 687         # find out if we need to use srun by checking $SRUN_PARTITION
 688         if [ "$SRUN_PARTITION" ]; then
 689                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 690                         -n $((num_clients * mib_THREADS)) -p $SRUN_PARTITION \
 691                         -- $cmd
 692         else
 693                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 694                         -np $((num_clients * mib_THREADS)) $cmd
 695         fi
 696
 697     local rc=$?
 698     if [ $rc != 0 ] ; then
 699         error "mib failed! $rc"
 700     fi
 701     rm -rf $testdir
 702 }
 703
 704 run_cascading_rw() {
 705
 706     CASC_RW=${CASC_RW:-$(which cascading_rw 2> /dev/null || true)}
 707     # threads per client
 708     casc_THREADS=${casc_THREADS:-2}
 709     casc_REP=${casc_REP:-300}
 710
 711     if [ "$NFSCLIENT" ]; then
 712         skip "skipped for NFSCLIENT mode"
 713         return
 714     fi
 715
 716     [ x$CASC_RW = x ] &&
 717         { skip_env "cascading_rw not found" && return; }
 718
 719     # FIXME
 720     # Need space estimation here.
 721
 722     print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE
 723
 724     local testdir=$DIR/d0.cascading_rw
 725     mkdir -p $testdir
 726     # mpi_run uses mpiuser
 727     chmod 0777 $testdir
 728
 729     # -g: debug mode
 730     # -n: repeat test # times
 731
 732     local cmd="$CASC_RW -g -d $testdir -n $casc_REP"
 733
 734         echo "+ $cmd"
 735         mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 736                 -np $((num_clients * $casc_THREADS)) $cmd
 737
 738     local rc=$?
 739     if [ $rc != 0 ] ; then
 740         error "cascading_rw failed! $rc"
 741     fi
 742     rm -rf $testdir
 743 }
 744
 745 run_write_append_truncate() {
 746
 747     # threads per client
 748     write_THREADS=${write_THREADS:-8}
 749     write_REP=${write_REP:-10000}
 750
 751     if [ "$NFSCLIENT" ]; then
 752         skip "skipped for NFSCLIENT mode"
 753         return
 754     fi
 755
 756     # location is lustre/tests dir
 757     if ! which write_append_truncate > /dev/null 2>&1 ; then
 758         skip_env "write_append_truncate not found"
 759         return
 760     fi
 761
 762     # FIXME
 763     # Need space estimation here.
 764
 765     local testdir=$DIR/d0.write_append_truncate
 766     local file=$testdir/f0.wat
 767
 768     print_opts clients write_REP write_THREADS MACHINEFILE
 769
 770     mkdir -p $testdir
 771     # mpi_run uses mpiuser
 772     chmod 0777 $testdir
 773
 774     local cmd="write_append_truncate -n $write_REP $file"
 775
 776         echo "+ $cmd"
 777         mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 778                 -np $((num_clients * $write_THREADS)) $cmd
 779
 780     local rc=$?
 781     if [ $rc != 0 ] ; then
 782         error "write_append_truncate failed! $rc"
 783         return $rc
 784     fi
 785     rm -rf $testdir
 786 }
 787
 788 run_write_disjoint() {
 789
 790     WRITE_DISJOINT=${WRITE_DISJOINT:-$(which write_disjoint \
 791         2> /dev/null || true)}
 792     # threads per client
 793     wdisjoint_THREADS=${wdisjoint_THREADS:-4}
 794     wdisjoint_REP=${wdisjoint_REP:-10000}
 795
 796     if [ "$NFSCLIENT" ]; then
 797         skip "skipped for NFSCLIENT mode"
 798         return
 799     fi
 800
 801     [ x$WRITE_DISJOINT = x ] &&
 802         { skip_env "write_disjoint not found" && return; }
 803
 804     # FIXME
 805     # Need space estimation here.
 806
 807     print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP \
 808         MACHINEFILE
 809     local testdir=$DIR/d0.write_disjoint
 810     mkdir -p $testdir
 811     # mpi_run uses mpiuser
 812     chmod 0777 $testdir
 813
 814     local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP"
 815
 816         echo "+ $cmd"
 817         mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 818                 -np $((num_clients * $wdisjoint_THREADS)) $cmd
 819
 820     local rc=$?
 821     if [ $rc != 0 ] ; then
 822         error "write_disjoint failed! $rc"
 823     fi
 824     rm -rf $testdir
 825 }
 826
 827 run_parallel_grouplock() {
 828
 829     PARALLEL_GROUPLOCK=${PARALLEL_GROUPLOCK:-$(which parallel_grouplock \
 830         2> /dev/null || true)}
 831     parallel_grouplock_MINTASKS=${parallel_grouplock_MINTASKS:-5}
 832
 833     if [ "$NFSCLIENT" ]; then
 834         skip "skipped for NFSCLIENT mode"
 835         return
 836     fi
 837
 838     [ x$PARALLEL_GROUPLOCK = x ] &&
 839         { skip "PARALLEL_GROUPLOCK not found" && return; }
 840
 841     print_opts clients parallel_grouplock_MINTASKS MACHINEFILE
 842
 843     local testdir=$DIR/d0.parallel_grouplock
 844     mkdir -p $testdir
 845     # mpi_run uses mpiuser
 846     chmod 0777 $testdir
 847
 848     local cmd
 849     local status=0
 850     local subtest
 851         for i in $(seq 12); do
 852                 subtest="-t $i"
 853                 local cmd="$PARALLEL_GROUPLOCK -g -v -d $testdir $subtest"
 854                 echo "+ $cmd"
 855
 856                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 857                         -np $parallel_grouplock_MINTASKS $cmd
 858                 local rc=$?
 859                 if [ $rc != 0 ] ; then
 860                         error_noexit "parallel_grouplock subtests $subtest " \
 861                                      "failed! $rc"
 862                 else
 863                         echo "parallel_grouplock subtests $subtest PASS"
 864                 fi
 865                 let status=$((status + rc))
 866                 # clear debug to collect one log per one test
 867                 do_nodes $(comma_list $(nodes_list)) lctl clear
 868         done
 869         [ $status -eq 0 ] || error "parallel_grouplock status: $status"
 870         rm -rf $testdir
 871 }
 872
 873 cleanup_statahead () {
 874     trap 0
 875
 876     local clients=$1
 877     local mntpt_root=$2
 878     local num_mntpts=$3
 879
 880     for i in $(seq 0 $num_mntpts);do
 881         zconf_umount_clients $clients ${mntpt_root}$i ||
 882             error_exit "Failed to umount lustre on ${mntpt_root}$i"
 883     done
 884 }
 885
 886 run_statahead () {
 887
 888     statahead_NUMMNTPTS=${statahead_NUMMNTPTS:-5}
 889     statahead_NUMFILES=${statahead_NUMFILES:-500000}
 890
 891     if [[ -n $NFSCLIENT ]]; then
 892         skip "Statahead testing is not supported on NFS clients."
 893         return 0
 894     fi
 895
 896     [ x$MDSRATE = x ] &&
 897         { skip_env "mdsrate not found" && return; }
 898
 899     print_opts MDSRATE clients statahead_NUMMNTPTS statahead_NUMFILES
 900
 901     # create large dir
 902
 903     # do not use default "d[0-9]*" dir name
 904     # to avoid of rm $statahead_NUMFILES (500k) files in t-f cleanup
 905     local dir=dstatahead
 906     local testdir=$DIR/$dir
 907
 908     # cleanup only if dir exists
 909     # cleanup only $statahead_NUMFILES number of files
 910     # ignore the other files created by someone else
 911     [ -d $testdir ] &&
 912         mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
 913             $statahead_NUMFILES $testdir 'f%%d' --ignore
 914
 915     mkdir -p $testdir
 916     # mpi_run uses mpiuser
 917     chmod 0777 $testdir
 918
 919     local num_files=$statahead_NUMFILES
 920
 921     local IFree=$(inodes_available)
 922     if [ $IFree -lt $num_files ]; then
 923       num_files=$IFree
 924     fi
 925
 926     cancel_lru_locks mdc
 927
 928     local cmd1="${MDSRATE} ${MDSRATE_DEBUG} --mknod --dir $testdir"
 929     local cmd2="--nfiles $num_files --filefmt 'f%%d'"
 930     local cmd="$cmd1 $cmd2"
 931     echo "+ $cmd"
 932
 933         mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 934                 -np $((num_clients * 32)) $cmd
 935
 936     local rc=$?
 937     if [ $rc != 0 ] ; then
 938         error "mdsrate failed to create $rc"
 939         return $rc
 940     fi
 941
 942     local num_mntpts=$statahead_NUMMNTPTS
 943     local mntpt_root=$TMP/mntpt/lustre
 944     local mntopts=$MNTOPTSTATAHEAD
 945
 946     echo "Mounting $num_mntpts lustre clients starts on $clients"
 947     trap "cleanup_statahead $clients $mntpt_root $num_mntpts" EXIT ERR
 948     for i in $(seq 0 $num_mntpts); do
 949         zconf_mount_clients $clients ${mntpt_root}$i "$mntopts" ||
 950             error_exit "Failed to mount lustre on ${mntpt_root}$i on $clients"
 951     done
 952
 953     do_rpc_nodes $clients cancel_lru_locks mdc
 954
 955     do_rpc_nodes $clients do_ls $mntpt_root $num_mntpts $dir
 956
 957     mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
 958         $num_files $testdir 'f%%d' --ignore
 959
 960     # use rm instead of rmdir because of
 961     # testdir could contain the files created by someone else,
 962     # or by previous run where is num_files prev > num_files current
 963     rm -rf $testdir
 964     cleanup_statahead $clients $mntpt_root $num_mntpts
 965 }
 966
 967 run_fs_test() {
 968         # fs_test.x is the default name for exe
 969         FS_TEST=${FS_TEST:=$(which fs_test.x 2> /dev/null || true)}
 970
 971         local clients=${CLIENTS:-$(hostname)}
 972         local testdir=$DIR/d0.fs_test
 973         local file=${testdir}/fs_test
 974         fs_test_threads=${fs_test_threads:-2}
 975         fs_test_type=${fs_test_type:-1}
 976         fs_test_nobj=${fs_test_nobj:-10}
 977         fs_test_check=${fs_test_check:-3}
 978         fs_test_strided=${fs_test_strided:-1}
 979         fs_test_touch=${fs_test_touch:-3}
 980         fs_test_supersize=${fs_test_supersize:-1}
 981         fs_test_op=${fs_test_op:-write}
 982         fs_test_barriers=${fs_test_barriers:-bopen,bwrite,bclose}
 983         fs_test_io=${fs_test_io:-mpi}
 984         fs_test_objsize=${fs_test_objsize:-100}
 985         fs_test_objunit=${fs_test_objunit:-1048576} # 1 mb
 986         fs_test_ndirs=${fs_test_ndirs:-80000}
 987
 988         [ x$FS_TEST = x ] &&
 989                 { skip "FS_TEST not found" && return; }
 990
 991         # Space estimation  in bytes
 992         local space=$(df -B 1 -P $dir | tail -n 1 | awk '{ print $4 }')
 993         local total_threads=$((num_clients * fs_test_threads))
 994         echo "+ $fs_test_objsize * $fs_test_objunit * $total_threads "
 995         if [ $((space / 2)) -le \
 996                 $((fs_test_objsize * fs_test_objunit * total_threads)) ]; then
 997                         fs_test_objsize=$((space / 2 / fs_test_objunit / \
 998                                 total_threads))
 999                         [ $fs_test_objsize -eq 0 ] && \
1000                         skip_env "Need free space more than \
1001                                 $((2 * total_threads * fs_test_objunit)) \
1002                                 : have $((space / fs_test_objunit))" &&
1003                                 return
1004
1005                         echo "(reduced objsize to \
1006                                 $((fs_test_objsize * fs_test_objunit)) bytes)"
1007         fi
1008
1009         print_opts FS_TEST clients fs_test_threads fs_test_objsize MACHINEFILE
1010
1011         mkdir -p $testdir
1012         # mpi_run uses mpiuser
1013         chmod 0777 $testdir
1014
1015         # --nodb          Turn off the database code at runtime
1016         # -g --target     The path to the data file
1017         # -t --type       Whether to do N-N (1) or N-1 (2)
1018         # -n --nobj       The number of objects written/read by each proc
1019         # -z --size       The size of each object
1020         # -d ---num_nn_dirs Number of subdirectories for files
1021         # -C --check      Check every byte using argument 3.
1022         # --collective    Whether to use collective I/O (for N-1, mpi-io only)
1023         # -s --strided    Whether to use a strided pattern (for N-1 only)
1024         # -T --touch      Touch every byte using argument 3
1025         # -o --op         Whether to read only (read) or write only (write)
1026         # -b --barriers   When to barrier.
1027         # -i --io         Use POSIX, MPI, or PLFS IO routines (mpi|posix|plfs)
1028         # -S --supersize  Specify how many objects per superblock
1029
1030         local cmd="$FS_TEST -nodb -g $file -t $fs_test_type -n $fs_test_nobj \
1031                 -z $((fs_test_objsize * fs_test_objunit)) -d $fs_test_ndirs \
1032                 -C $fs_test_check -collective -s $fs_test_strided \
1033                 -T $fs_test_touch -o $fs_test_op -b $fs_test_barriers \
1034                 -i $fs_test_io -S $fs_test_supersize"
1035
1036         echo "+ $cmd"
1037         mpi_run "-np $((num_clients * fs_test_threads))" $cmd
1038
1039         local rc=$?
1040         if [ $rc != 0 ] ; then
1041                 error "fs_test failed! $rc"
1042         fi
1043
1044         rm -rf $testdir
1045 }
1046
1047 ior_mdtest_parallel() {
1048         local rc1=0
1049         local rc2=0
1050         local type=$1
1051
1052         run_ior $type &
1053         local pids=$!
1054
1055         run_mdtest $type || rc2=$?
1056         [[ $rc2 -ne 0 ]] && echo "mdtest failed with error $rc2"
1057
1058         wait $pids || rc1=$?
1059         [[ $rc1 -ne 0 ]] && echo "ior failed with error $rc1"
1060
1061         [[ $rc1 -ne 0 || $rc2 -ne 0 ]] && return 1
1062         return 0
1063 }