lustre/tests/functions.sh

   1 #!/bin/bash
   2
   3 # Simple function used by run_*.sh scripts
   4
   5 assert_env() {
   6     local failed=""
   7     for name in $@; do
   8         if [ -z "${!name}" ]; then
   9             echo "$0: $name must be set"
  10             failed=1
  11         fi
  12     done
  13     [ $failed ] && exit 1 || true
  14 }
  15
  16 # lrepl - Lustre test Read-Eval-Print Loop.
  17 #
  18 # This function implements a REPL for the Lustre test framework.  It
  19 # doesn't exec an actual shell because the user may want to inspect
  20 # variables and use functions from the test framework.
  21 lrepl() {
  22     local line
  23     local rawline
  24     local prompt
  25
  26     cat <<EOF
  27         This is an interactive read-eval-print loop interactive shell
  28         simulation that you can use to debug failing tests.  You can
  29         enter most bash command lines (see notes below).
  30
  31         Use this REPL to inspect variables, set them, call test
  32         framework shell functions, etcetera.
  33
  34         'exit' or EOF to exit this shell.
  35
  36         set \$retcode to 0 to cause the assertion failure that
  37         triggered this REPL to be ignored.
  38
  39         Examples:
  40             do_facet ost1 lctl get_param ost.*.ost.threads_*
  41             do_rpc_nodes \$OSTNODES unload_modules
  42
  43         NOTES:
  44             All but the last line of multi-line statements or blocks
  45             must end in a backslash.
  46
  47             "Here documents" are not supported.
  48
  49             History is not supported, but command-line editing is.
  50
  51 EOF
  52
  53     # Prompt escapes don't work in read -p, sadly.
  54     prompt=":test_${testnum:-UNKNOWN}:$(uname -n):$(basename $PWD)% "
  55
  56     # We use read -r to get close to a shell experience
  57     while read -e -r -p "$prompt" rawline; do
  58         line=
  59         case "$rawline" in
  60         # Don't want to exit-exit, just exit the REPL
  61         exit) break;;
  62         # We need to handle continuations, and read -r doesn't do
  63         # that for us.  Yet we need read -r.
  64         #
  65         # We also use case/esac to compare lines read to "*\\"
  66         # because [ "$line" = *\\ ] and variants of that don't work.
  67         *\\) line="$rawline"
  68             while read -e -r -p '> ' rawline
  69             do
  70                 line="$line"$'\n'"$rawline"
  71                 case "$rawline" in
  72                 # We could check for here documents by matching
  73                 # against *<<*, but who cares.
  74                 *\\) continue;;
  75                 *) break;;
  76                 esac
  77             done
  78             ;;
  79         *) line=$rawline
  80         esac
  81
  82         case "$line" in
  83         *\\) break;;
  84         esac
  85
  86         # Finally!  Time to eval.
  87         eval "$line"
  88     done
  89
  90     echo $'\n\tExiting interactive shell...\n'
  91     return 0
  92 }
  93
  94 # lassert - Lustre test framework assert
  95 #
  96 # Arguments: failure code, failure message, expression/statement
  97 #
  98 # lassert evaluates the expression given, and, if false, calls
  99 # error() to trigger test failure.  If REPL_ON_LASSERT is true then
 100 # lassert will call lrepl() to give the user an interactive shell.
 101 # If the REPL sets retcode=0 then the assertion failure will be
 102 # ignored.
 103 lassert() {
 104     local retcode=$1
 105     local msg=$2
 106     shift 2
 107
 108     echo "checking $* ($(eval echo \""$*"\"))..."
 109     eval "$@" && return 0;
 110
 111     if ${REPL_ON_LASSERT:-false}; then
 112         echo "Assertion $retcode failed: $* (expanded: $(eval echo \""$*"\"))
 113 $msg"
 114         lrepl
 115     fi
 116
 117     error "Assertion $retcode failed: $* (expanded: $(eval echo \""$*"\"))
 118 $msg"
 119     return $retcode
 120 }
 121
 122 # setmodopts- set module options for subsequent calls to load_modules
 123 #
 124 # Usage: setmodopts module_name new_value [var_in_which_to_save_old_value]
 125 #        setmodopts -a module_name new_value [var_in_which_to_save_old_value]
 126 #
 127 # In the second usage the new value is appended to the old.
 128 setmodopts() {
 129         local _append=false
 130
 131         if [ "$1" = -a ]; then
 132             _append=true
 133             shift
 134         fi
 135
 136         local _var=MODOPTS_$1
 137         local _newvalue=$2
 138         local _savevar=$3
 139         local _oldvalue
 140
 141         # Dynamic naming of variables is a pain in bash.  In ksh93 we could
 142         # write "nameref opts_var=${modname}_MODOPTS" then assign directly
 143         # to opts_var.  Associative arrays would also help, alternatively.
 144         # Alas, we're stuck with eval until all distros move to a more recent
 145         # version of bash.  Fortunately we don't need to eval unset and export.
 146
 147         if [ -z "$_newvalue" ]; then
 148             unset $_var
 149             return 0
 150         fi
 151
 152         _oldvalue=${!var}
 153         $_append && _newvalue="$_oldvalue $_newvalue"
 154         export $_var="$_newvalue"
 155         echo setmodopts: ${_var}=${_newvalue}
 156
 157         [ -n "$_savevar" ] && eval $_savevar=\""$_oldvalue"\"
 158 }
 159
 160 echoerr () { echo "$@" 1>&2 ; }
 161
 162 signaled() {
 163     echoerr "$(date +'%F %H:%M:%S'): client load was signaled to terminate"
 164
 165     local PGID=$(ps -eo "%c %p %r" | awk "/ $PPID / {print \$3}")
 166     kill -TERM -$PGID
 167     sleep 5
 168     kill -KILL -$PGID
 169 }
 170
 171 mpi_run () {
 172     local mpirun="$MPIRUN $MPIRUN_OPTIONS"
 173     local command="$mpirun $@"
 174     local mpilog=$TMP/mpi.log
 175     local rc
 176
 177     if [ -n "$MPI_USER" -a "$MPI_USER" != root -a -n "$mpirun" ]; then
 178         echo "+ chmod 0777 $MOUNT"
 179         chmod 0777 $MOUNT
 180         command="su $MPI_USER sh -c \"$command \""
 181     fi
 182
 183     ls -ald $MOUNT
 184     echo "+ $command"
 185     eval $command 2>&1 | tee $mpilog || true
 186
 187     rc=${PIPESTATUS[0]}
 188     if [ $rc -eq 0 ] && grep -q "p4_error:" $mpilog ; then
 189        rc=1
 190     fi
 191     return $rc
 192 }
 193
 194 nids_list () {
 195         local list
 196         local escape="$2"
 197         for i in ${1//,/ }; do
 198                 if [ "$list" = "" ]; then
 199                         list="$i@$NETTYPE"
 200                 else
 201                         list="$list$escape $i@$NETTYPE"
 202                 fi
 203         done
 204         echo $list
 205 }
 206
 207 # FIXME: all setup/cleanup can be done without rpc.sh
 208 lst_end_session () {
 209     local verbose=false
 210     [ x$1 = x--verbose ] && verbose=true
 211
 212     export LST_SESSION=`$LST show_session 2>/dev/null | awk -F " " '{print $5}'`
 213     [ "$LST_SESSION" == "" ] && return
 214
 215         $LST stop b
 216     if $verbose; then
 217         $LST show_error c s
 218     fi
 219     $LST end_session
 220 }
 221
 222 lst_session_cleanup_all () {
 223     local list=$(comma_list $(nodes_list))
 224     do_rpc_nodes $list lst_end_session
 225 }
 226
 227 lst_cleanup () {
 228     lsmod | grep -q lnet_selftest && \
 229         rmmod lnet_selftest > /dev/null 2>&1 || true
 230 }
 231
 232 lst_cleanup_all () {
 233    local list=$(comma_list $(nodes_list))
 234
 235    # lst end_session needs to be executed only locally
 236    # i.e. on node where lst new_session was called
 237    lst_end_session --verbose
 238    do_rpc_nodes $list lst_cleanup
 239 }
 240
 241 lst_setup () {
 242     load_module lnet_selftest
 243 }
 244
 245 lst_setup_all () {
 246     local list=$(comma_list $(nodes_list))
 247     do_rpc_nodes $list lst_setup
 248 }
 249
 250 ###
 251 # short_hostname
 252 #
 253 # Passed a single argument, strips everything off following
 254 # and includes the first period.
 255 # client-20.lab.whamcloud.com becomes client-20
 256 short_hostname() {
 257   echo $(sed 's/\..*//' <<< $1)
 258 }
 259
 260 ###
 261 # short_nodename
 262 #
 263 # Find remote nodename, stripped of any domain, etc.
 264 # 'hostname -s' is easy, but not implemented on all systems
 265 short_nodename() {
 266         local rname=$(do_node $1 "uname -n" || echo -1)
 267         if [[ "$rname" = "-1" ]]; then
 268                 rname=$1
 269         fi
 270         echo $(short_hostname $rname)
 271 }
 272
 273 print_opts () {
 274     local var
 275
 276     echo OPTIONS:
 277
 278     for i in $@; do
 279         var=$i
 280         echo "${var}=${!var}"
 281     done
 282     [ -e $MACHINEFILE ] && cat $MACHINEFILE
 283 }
 284
 285 run_compilebench() {
 286         local dir=${1:-$DIR}
 287         local cbench_DIR=${cbench_DIR:-""}
 288         local cbench_IDIRS=${cbench_IDIRS:-2}
 289         local cbench_RUNS=${cbench_RUNS:-2}
 290
 291         print_opts cbench_DIR cbench_IDIRS cbench_RUNS
 292
 293         [ x$cbench_DIR = x ] &&
 294                 { skip_env "compilebench not found" && return; }
 295
 296         [ -e $cbench_DIR/compilebench ] ||
 297                 { skip_env "No compilebench build" && return; }
 298
 299         # Space estimation:
 300         # compile dir kernel-0  ~1GB
 301         # required space        ~1GB * cbench_IDIRS
 302         local space=$(df -P $dir | tail -n 1 | awk '{ print $4 }')
 303         if [[ $space -le $((1024 * 1024 * cbench_IDIRS)) ]]; then
 304                 cbench_IDIRS=$((space / 1024 / 1024))
 305                 [[ $cbench_IDIRS -eq 0 ]] &&
 306                         skip_env "Need free space at least 1GB, have $space" &&
 307                         return
 308
 309                 echo "reducing initial dirs to $cbench_IDIRS"
 310         fi
 311         echo "free space = $space KB"
 312
 313         # FIXME:
 314         # t-f _base needs to be modifyed to set properly tdir
 315         # for new "test_foo" functions names
 316         # local testdir=$DIR/$tdir
 317         local testdir=$dir/d0.compilebench.$$
 318         mkdir -p $testdir
 319
 320     local savePWD=$PWD
 321     cd $cbench_DIR
 322     local cmd="./compilebench -D $testdir -i $cbench_IDIRS \
 323         -r $cbench_RUNS --makej"
 324
 325     log "$cmd"
 326
 327     local rc=0
 328     eval $cmd
 329     rc=$?
 330
 331     cd $savePWD
 332     [ $rc = 0 ] || error "compilebench failed: $rc"
 333     rm -rf $testdir
 334 }
 335
 336 run_metabench() {
 337         local dir=${1:-$DIR}
 338         local mntpt=${2:-$MOUNT}
 339         METABENCH=${METABENCH:-$(which metabench 2> /dev/null || true)}
 340         mbench_NFILES=${mbench_NFILES:-30400}
 341         # threads per client
 342         mbench_THREADS=${mbench_THREADS:-4}
 343         mbench_OPTIONS=${mbench_OPTIONS:-}
 344         mbench_CLEANUP=${mbench_CLEANUP:-true}
 345
 346         [ x$METABENCH = x ] &&
 347                 { skip_env "metabench not found" && return; }
 348
 349         print_opts METABENCH clients mbench_NFILES mbench_THREADS
 350
 351         local testdir=$dir/d0.metabench
 352         mkdir -p $testdir
 353         # mpi_run uses mpiuser
 354         chmod 0777 $testdir
 355
 356         # -C             Run the file creation tests. Creates zero byte files.
 357         # -S             Run the file stat tests.
 358         # -c nfile       Number of files to be used in each test.
 359         # -k             Cleanup files when finished.
 360         local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S $mbench_OPTIONS"
 361         echo "+ $cmd"
 362
 363         # find out if we need to use srun by checking $SRUN_PARTITION
 364         if [ "$SRUN_PARTITION" ]; then
 365                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 366                         -n $((num_clients * mbench_THREADS)) \
 367                         -p $SRUN_PARTITION -- $cmd
 368         else
 369                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 370                         -np $((num_clients * $mbench_THREADS)) $cmd
 371         fi
 372
 373         local rc=$?
 374         if [ $rc != 0 ] ; then
 375                 error "metabench failed! $rc"
 376         fi
 377
 378         if $mbench_CLEANUP; then
 379                 rm -rf $testdir
 380         else
 381                 mv $dir/d0.metabench $mntpt/_xxx.$(date +%s).d0.metabench
 382         fi
 383 }
 384
 385 run_simul() {
 386
 387     SIMUL=${SIMUL:=$(which simul 2> /dev/null || true)}
 388     # threads per client
 389     simul_THREADS=${simul_THREADS:-2}
 390     simul_REP=${simul_REP:-20}
 391
 392     if [ "$NFSCLIENT" ]; then
 393         skip "skipped for NFSCLIENT mode"
 394         return
 395     fi
 396
 397     [ x$SIMUL = x ] &&
 398         { skip_env "simul not found" && return; }
 399
 400     # FIXME
 401     # Need space estimation here.
 402
 403     print_opts SIMUL clients simul_REP simul_THREADS
 404
 405     local testdir=$DIR/d0.simul
 406     mkdir -p $testdir
 407     # mpi_run uses mpiuser
 408     chmod 0777 $testdir
 409
 410     # -n # : repeat each test # times
 411     # -N # : repeat the entire set of tests # times
 412
 413     local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP"
 414
 415         echo "+ $cmd"
 416         # find out if we need to use srun by checking $SRUN_PARTITION
 417         if [ "$SRUN_PARTITION" ]; then
 418                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 419                         -n $((num_clients * simul_THREADS)) -p $SRUN_PARTITION \
 420                         -- $cmd
 421         else
 422                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 423                         -np $((num_clients * simul_THREADS)) $cmd
 424         fi
 425
 426     local rc=$?
 427     if [ $rc != 0 ] ; then
 428         error "simul failed! $rc"
 429     fi
 430     rm -rf $testdir
 431 }
 432
 433 run_mdtest() {
 434
 435     MDTEST=${MDTEST:=$(which mdtest 2> /dev/null || true)}
 436     # threads per client
 437     mdtest_THREADS=${mdtest_THREADS:-2}
 438     mdtest_nFiles=${mdtest_nFiles:-"100000"}
 439     # We devide the files by number of core
 440     mdtest_nFiles=$((mdtest_nFiles/mdtest_THREADS/num_clients))
 441     mdtest_iteration=${mdtest_iteration:-1}
 442         local mdtest_custom_params=${mdtest_custom_params:-""}
 443
 444     local type=${1:-"ssf"}
 445
 446     if [ "$NFSCLIENT" ]; then
 447         skip "skipped for NFSCLIENT mode"
 448         return
 449     fi
 450
 451     [ x$MDTEST = x ] &&
 452         { skip_env "mdtest not found" && return; }
 453
 454     # FIXME
 455     # Need space estimation here.
 456
 457     print_opts MDTEST mdtest_iteration mdtest_THREADS mdtest_nFiles
 458
 459     local testdir=$DIR/d0.mdtest
 460     mkdir -p $testdir
 461     # mpi_run uses mpiuser
 462     chmod 0777 $testdir
 463
 464     # -i # : repeat each test # times
 465     # -d   : test dir
 466     # -n # : number of file/dir to create/stat/remove
 467     # -u   : each process create/stat/remove individually
 468
 469         local cmd="$MDTEST -d $testdir -i $mdtest_iteration \
 470                 -n $mdtest_nFiles $mdtest_custom_params"
 471
 472         [ $type = "fpp" ] && cmd="$cmd -u"
 473
 474         echo "+ $cmd"
 475         # find out if we need to use srun by checking $SRUN_PARTITION
 476         if [ "$SRUN_PARTITION" ]; then
 477                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 478                         -n $((num_clients * mdtest_THREADS)) \
 479                         -p $SRUN_PARTITION -- $cmd
 480         else
 481                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 482                         -np $((num_clients * mdtest_THREADS)) $cmd
 483         fi
 484
 485     local rc=$?
 486     if [ $rc != 0 ] ; then
 487         error "mdtest failed! $rc"
 488     fi
 489     rm -rf $testdir
 490 }
 491
 492 run_connectathon() {
 493         local dir=${1:-$DIR}
 494         cnt_DIR=${cnt_DIR:-""}
 495         cnt_NRUN=${cnt_NRUN:-10}
 496
 497         print_opts cnt_DIR cnt_NRUN
 498
 499         [ x$cnt_DIR = x ] &&
 500                 { skip_env "connectathon dir not found" && return; }
 501
 502         [ -e $cnt_DIR/runtests ] ||
 503                 { skip_env "No connectathon runtests found" && return; }
 504
 505         # Space estimation:
 506         # "special" tests create a 30 MB file + misc. small files
 507         # required space ~40 MB
 508         local space=$(df -P $dir | tail -n 1 | awk '{ print $4 }')
 509         if [[ $space -le $((1024 * 40)) ]]; then
 510                 skip_env "Need free space at least 40MB, have $space KB" &&
 511                 return
 512         fi
 513         echo "free space = $space KB"
 514
 515         local testdir=$dir/d0.connectathon
 516         mkdir -p $testdir
 517
 518         local savePWD=$PWD
 519         cd $cnt_DIR
 520
 521         #
 522         # To run connectathon:
 523         # runtests [-a|-b|-g|-s|-l] [-f|-n|-t] [-N numpasses] [test-directory]
 524         #
 525         # One of the following test types
 526         #    -b  basic
 527         #    -g  general
 528         #    -s  special
 529         #    -l  lock
 530         #    -a  all of the above
 531         #
 532         # -f  a quick functional test
 533         # -n  suppress directory operations (mkdir and rmdir)
 534         # -t  run with time statistics (default for basic tests)
 535         #
 536         # -N numpasses - specifies the number of times to run
 537         #                the tests. Optional.
 538
 539         tests="-b -g -s"
 540         # Include lock tests unless we're running on nfsv4
 541         local fstype=$(df -TP $testdir | awk 'NR==2  {print $2}')
 542         echo "$testdir: $fstype"
 543         if [[ $fstype != "nfs4" ]]; then
 544                 tests="$tests -l"
 545         fi
 546         echo "tests: $tests"
 547         for test in $tests; do
 548                 local cmd="./runtests -N $cnt_NRUN $test -f $testdir"
 549                 local rc=0
 550
 551                 log "$cmd"
 552                 eval $cmd
 553                 rc=$?
 554                 [ $rc = 0 ] || error "connectathon failed: $rc"
 555         done
 556
 557         cd $savePWD
 558         rm -rf $testdir
 559 }
 560
 561 run_ior() {
 562         local type=${1:="ssf"}
 563         local dir=${2:-$DIR}
 564         local testdir=$dir/d0.ior.$type
 565         local nfs_srvmntpt=$3
 566
 567         if [ "$NFSCLIENT" ]; then
 568                 [[ -n $nfs_srvmntpt ]] ||
 569                         { error "NFSCLIENT mode, but nfs exported dir"\
 570                                 "is not set!" && return 1; }
 571         fi
 572
 573         IOR=${IOR:-$(which IOR 2> /dev/null || true)}
 574         [ x$IOR = x ] &&
 575                 { skip_env "IOR not found" && return; }
 576
 577         # threads per client
 578         ior_THREADS=${ior_THREADS:-2}
 579         ior_iteration=${ior_iteration:-1}
 580         ior_blockSize=${ior_blockSize:-6}
 581         ior_blockUnit=${ior_blockUnit:-M}   # K, M, G
 582         ior_xferSize=${ior_xferSize:-1M}
 583         ior_type=${ior_type:-POSIX}
 584         ior_DURATION=${ior_DURATION:-30}        # minutes
 585         local multiplier=1
 586         case ${ior_blockUnit} in
 587                 [G])
 588                         multiplier=$((1024 * 1024 * 1024))
 589                         ;;
 590                 [M])
 591                         multiplier=$((1024 * 1024))
 592                         ;;
 593                 [K])
 594                         multiplier=1024
 595                         ;;
 596                 *)      error "Incorrect block unit should be one of [KMG]"
 597                         ;;
 598         esac
 599
 600         # calculate the space in bytes
 601         local space=$(df -B 1 -P $dir | tail -n 1 | awk '{ print $4 }')
 602         local total_threads=$((num_clients * ior_THREADS))
 603         echo "+ $ior_blockSize * $multiplier * $total_threads "
 604         if [ $((space / 2)) -le \
 605              $((ior_blockSize * multiplier * total_threads)) ]; then
 606                 ior_blockSize=$((space / 2 / multiplier / total_threads))
 607                 [ $ior_blockSize -eq 0 ] && \
 608                 skip_env "Need free space more than $((2 * total_threads)) \
 609                          ${ior_blockUnit}: have $((space / multiplier))" &&
 610                          return
 611
 612                 echo "(reduced blockSize to $ior_blockSize \
 613                      ${ior_blockUnit} bytes)"
 614         fi
 615
 616         print_opts IOR ior_THREADS ior_DURATION MACHINEFILE
 617
 618         mkdir -p $testdir
 619         # mpi_run uses mpiuser
 620         chmod 0777 $testdir
 621         if [ -z "$NFSCLIENT" ]; then
 622                 $LFS setstripe $testdir -c -1 ||
 623                         { error "setstripe failed" && return 2; }
 624         fi
 625
 626         #
 627         # -b N  blockSize --
 628         #       contiguous bytes to write per task (e.g.: 8, 4K, 2M, 1G)"
 629         # -o S  testFileName
 630         # -t N  transferSize -- size of transfer in bytes (e.g.: 8, 4K, 2M, 1G)"
 631         # -w    writeFile -- write file"
 632         # -r    readFile -- read existing file"
 633         # -W    checkWrite -- check read after write"
 634         # -C    reorderTasks -- changes task ordering to n+1 ordering for readback
 635         # -T    maxTimeDuration -- max time in minutes to run tests"
 636         # -k    keepFile -- keep testFile(s) on program exit
 637
 638         local cmd="$IOR -a $ior_type -b ${ior_blockSize}${ior_blockUnit} \
 639                 -o $testdir/iorData -t $ior_xferSize -v -C -w -r -W \
 640                 -i $ior_iteration -T $ior_DURATION -k"
 641
 642         [ $type = "fpp" ] && cmd="$cmd -F"
 643
 644         echo "+ $cmd"
 645         # find out if we need to use srun by checking $SRUN_PARTITION
 646         if [ "$SRUN_PARTITION" ]; then
 647                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 648                         -n $((num_clients * ior_THREADS)) -p $SRUN_PARTITION \
 649                         -- $cmd
 650         else
 651                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 652                         -np $((num_clients * $ior_THREADS)) $cmd
 653         fi
 654
 655     local rc=$?
 656     if [ $rc != 0 ] ; then
 657         error "ior failed! $rc"
 658     fi
 659     rm -rf $testdir
 660 }
 661
 662 run_mib() {
 663
 664     MIB=${MIB:=$(which mib 2> /dev/null || true)}
 665     # threads per client
 666     mib_THREADS=${mib_THREADS:-2}
 667     mib_xferSize=${mib_xferSize:-1m}
 668     mib_xferLimit=${mib_xferLimit:-5000}
 669     mib_timeLimit=${mib_timeLimit:-300}
 670
 671     if [ "$NFSCLIENT" ]; then
 672         skip "skipped for NFSCLIENT mode"
 673         return
 674     fi
 675
 676     [ x$MIB = x ] &&
 677         { skip_env "MIB not found" && return; }
 678
 679     print_opts MIB mib_THREADS mib_xferSize mib_xferLimit mib_timeLimit \
 680         MACHINEFILE
 681
 682     local testdir=$DIR/d0.mib
 683     mkdir -p $testdir
 684     # mpi_run uses mpiuser
 685     chmod 0777 $testdir
 686     $LFS setstripe $testdir -c -1 ||
 687         { error "setstripe failed" && return 2; }
 688     #
 689     # -I    Show intermediate values in output
 690     # -H    Show headers in output
 691     # -L    Do not issue new system calls after this many seconds
 692     # -s    Use system calls of this size
 693     # -t    test dir
 694     # -l    Issue no more than this many system calls
 695     local cmd="$MIB -t $testdir -s $mib_xferSize -l $mib_xferLimit \
 696         -L $mib_timeLimit -HI -p mib.$(date +%Y%m%d%H%M%S)"
 697
 698         echo "+ $cmd"
 699         # find out if we need to use srun by checking $SRUN_PARTITION
 700         if [ "$SRUN_PARTITION" ]; then
 701                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 702                         -n $((num_clients * mib_THREADS)) -p $SRUN_PARTITION \
 703                         -- $cmd
 704         else
 705                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 706                         -np $((num_clients * mib_THREADS)) $cmd
 707         fi
 708
 709     local rc=$?
 710     if [ $rc != 0 ] ; then
 711         error "mib failed! $rc"
 712     fi
 713     rm -rf $testdir
 714 }
 715
 716 run_cascading_rw() {
 717
 718     CASC_RW=${CASC_RW:-$(which cascading_rw 2> /dev/null || true)}
 719     # threads per client
 720     casc_THREADS=${casc_THREADS:-2}
 721     casc_REP=${casc_REP:-300}
 722
 723     if [ "$NFSCLIENT" ]; then
 724         skip "skipped for NFSCLIENT mode"
 725         return
 726     fi
 727
 728     [ x$CASC_RW = x ] &&
 729         { skip_env "cascading_rw not found" && return; }
 730
 731     # FIXME
 732     # Need space estimation here.
 733
 734     print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE
 735
 736     local testdir=$DIR/d0.cascading_rw
 737     mkdir -p $testdir
 738     # mpi_run uses mpiuser
 739     chmod 0777 $testdir
 740
 741     # -g: debug mode
 742     # -n: repeat test # times
 743
 744     local cmd="$CASC_RW -g -d $testdir -n $casc_REP"
 745
 746         echo "+ $cmd"
 747         mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 748                 -np $((num_clients * $casc_THREADS)) $cmd
 749
 750     local rc=$?
 751     if [ $rc != 0 ] ; then
 752         error "cascading_rw failed! $rc"
 753     fi
 754     rm -rf $testdir
 755 }
 756
 757 run_write_append_truncate() {
 758
 759     # threads per client
 760     write_THREADS=${write_THREADS:-8}
 761     write_REP=${write_REP:-10000}
 762
 763     if [ "$NFSCLIENT" ]; then
 764         skip "skipped for NFSCLIENT mode"
 765         return
 766     fi
 767
 768     # location is lustre/tests dir
 769     if ! which write_append_truncate > /dev/null 2>&1 ; then
 770         skip_env "write_append_truncate not found"
 771         return
 772     fi
 773
 774     # FIXME
 775     # Need space estimation here.
 776
 777     local testdir=$DIR/d0.write_append_truncate
 778     local file=$testdir/f0.wat
 779
 780     print_opts clients write_REP write_THREADS MACHINEFILE
 781
 782     mkdir -p $testdir
 783     # mpi_run uses mpiuser
 784     chmod 0777 $testdir
 785
 786     local cmd="write_append_truncate -n $write_REP $file"
 787
 788         echo "+ $cmd"
 789         mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 790                 -np $((num_clients * $write_THREADS)) $cmd
 791
 792     local rc=$?
 793     if [ $rc != 0 ] ; then
 794         error "write_append_truncate failed! $rc"
 795         return $rc
 796     fi
 797     rm -rf $testdir
 798 }
 799
 800 run_write_disjoint() {
 801         if [ "$NFSCLIENT" ]; then
 802                 skip "skipped for NFSCLIENT mode"
 803                 return
 804         fi
 805
 806         WRITE_DISJOINT=${WRITE_DISJOINT:-$(which write_disjoint 2> /dev/null ||
 807                                            true)}
 808
 809         [ x$WRITE_DISJOINT = x ] &&
 810                 { skip_env "write_disjoint not found" && return; }
 811
 812         # threads per client
 813         wdisjoint_THREADS=${wdisjoint_THREADS:-4}
 814         wdisjoint_REP=${wdisjoint_REP:-10000}
 815         chunk_size_limit=$1
 816
 817     # FIXME
 818     # Need space estimation here.
 819
 820     print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP \
 821         MACHINEFILE
 822     local testdir=$DIR/d0.write_disjoint
 823     mkdir -p $testdir
 824     # mpi_run uses mpiuser
 825     chmod 0777 $testdir
 826
 827         local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP -m \
 828                         $chunk_size_limit"
 829
 830         echo "+ $cmd"
 831         mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 832                 -np $((num_clients * $wdisjoint_THREADS)) $cmd
 833
 834     local rc=$?
 835     if [ $rc != 0 ] ; then
 836         error "write_disjoint failed! $rc"
 837     fi
 838     rm -rf $testdir
 839 }
 840
 841 run_parallel_grouplock() {
 842
 843     PARALLEL_GROUPLOCK=${PARALLEL_GROUPLOCK:-$(which parallel_grouplock \
 844         2> /dev/null || true)}
 845     parallel_grouplock_MINTASKS=${parallel_grouplock_MINTASKS:-5}
 846
 847     if [ "$NFSCLIENT" ]; then
 848         skip "skipped for NFSCLIENT mode"
 849         return
 850     fi
 851
 852     [ x$PARALLEL_GROUPLOCK = x ] &&
 853         { skip "PARALLEL_GROUPLOCK not found" && return; }
 854
 855     print_opts clients parallel_grouplock_MINTASKS MACHINEFILE
 856
 857     local testdir=$DIR/d0.parallel_grouplock
 858     mkdir -p $testdir
 859     # mpi_run uses mpiuser
 860     chmod 0777 $testdir
 861
 862     local cmd
 863     local status=0
 864     local subtest
 865         for i in $(seq 12); do
 866                 subtest="-t $i"
 867                 local cmd="$PARALLEL_GROUPLOCK -g -v -d $testdir $subtest"
 868                 echo "+ $cmd"
 869
 870                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 871                         -np $parallel_grouplock_MINTASKS $cmd
 872                 local rc=$?
 873                 if [ $rc != 0 ] ; then
 874                         error_noexit "parallel_grouplock subtests $subtest " \
 875                                      "failed! $rc"
 876                 else
 877                         echo "parallel_grouplock subtests $subtest PASS"
 878                 fi
 879                 let status=$((status + rc))
 880                 # clear debug to collect one log per one test
 881                 do_nodes $(comma_list $(nodes_list)) lctl clear
 882         done
 883         [ $status -eq 0 ] || error "parallel_grouplock status: $status"
 884         rm -rf $testdir
 885 }
 886
 887 cleanup_statahead () {
 888     trap 0
 889
 890     local clients=$1
 891     local mntpt_root=$2
 892     local num_mntpts=$3
 893
 894     for i in $(seq 0 $num_mntpts);do
 895         zconf_umount_clients $clients ${mntpt_root}$i ||
 896             error_exit "Failed to umount lustre on ${mntpt_root}$i"
 897     done
 898 }
 899
 900 run_statahead () {
 901
 902     statahead_NUMMNTPTS=${statahead_NUMMNTPTS:-5}
 903     statahead_NUMFILES=${statahead_NUMFILES:-500000}
 904
 905     if [[ -n $NFSCLIENT ]]; then
 906         skip "Statahead testing is not supported on NFS clients."
 907         return 0
 908     fi
 909
 910     [ x$MDSRATE = x ] &&
 911         { skip_env "mdsrate not found" && return; }
 912
 913     print_opts MDSRATE clients statahead_NUMMNTPTS statahead_NUMFILES
 914
 915     # create large dir
 916
 917     # do not use default "d[0-9]*" dir name
 918     # to avoid of rm $statahead_NUMFILES (500k) files in t-f cleanup
 919     local dir=dstatahead
 920     local testdir=$DIR/$dir
 921
 922     # cleanup only if dir exists
 923     # cleanup only $statahead_NUMFILES number of files
 924     # ignore the other files created by someone else
 925     [ -d $testdir ] &&
 926         mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
 927             $statahead_NUMFILES $testdir 'f%%d' --ignore
 928
 929     mkdir -p $testdir
 930     # mpi_run uses mpiuser
 931     chmod 0777 $testdir
 932
 933     local num_files=$statahead_NUMFILES
 934
 935     local IFree=$(inodes_available)
 936     if [ $IFree -lt $num_files ]; then
 937       num_files=$IFree
 938     fi
 939
 940     cancel_lru_locks mdc
 941
 942     local cmd1="${MDSRATE} ${MDSRATE_DEBUG} --mknod --dir $testdir"
 943     local cmd2="--nfiles $num_files --filefmt 'f%%d'"
 944     local cmd="$cmd1 $cmd2"
 945     echo "+ $cmd"
 946
 947         mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 948                 -np $((num_clients * 32)) $cmd
 949
 950     local rc=$?
 951     if [ $rc != 0 ] ; then
 952         error "mdsrate failed to create $rc"
 953         return $rc
 954     fi
 955
 956     local num_mntpts=$statahead_NUMMNTPTS
 957     local mntpt_root=$TMP/mntpt/lustre
 958     local mntopts=$MNTOPTSTATAHEAD
 959
 960     echo "Mounting $num_mntpts lustre clients starts on $clients"
 961     trap "cleanup_statahead $clients $mntpt_root $num_mntpts" EXIT ERR
 962     for i in $(seq 0 $num_mntpts); do
 963         zconf_mount_clients $clients ${mntpt_root}$i "$mntopts" ||
 964             error_exit "Failed to mount lustre on ${mntpt_root}$i on $clients"
 965     done
 966
 967     do_rpc_nodes $clients cancel_lru_locks mdc
 968
 969     do_rpc_nodes $clients do_ls $mntpt_root $num_mntpts $dir
 970
 971     mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
 972         $num_files $testdir 'f%%d' --ignore
 973
 974     # use rm instead of rmdir because of
 975     # testdir could contain the files created by someone else,
 976     # or by previous run where is num_files prev > num_files current
 977     rm -rf $testdir
 978     cleanup_statahead $clients $mntpt_root $num_mntpts
 979 }
 980
 981 run_fs_test() {
 982         # fs_test.x is the default name for exe
 983         FS_TEST=${FS_TEST:=$(which fs_test.x 2> /dev/null || true)}
 984
 985         local clients=${CLIENTS:-$(hostname)}
 986         local testdir=$DIR/d0.fs_test
 987         local file=${testdir}/fs_test
 988         fs_test_threads=${fs_test_threads:-2}
 989         fs_test_type=${fs_test_type:-1}
 990         fs_test_nobj=${fs_test_nobj:-10}
 991         fs_test_check=${fs_test_check:-3}
 992         fs_test_strided=${fs_test_strided:-1}
 993         fs_test_touch=${fs_test_touch:-3}
 994         fs_test_supersize=${fs_test_supersize:-1}
 995         fs_test_op=${fs_test_op:-write}
 996         fs_test_barriers=${fs_test_barriers:-bopen,bwrite,bclose}
 997         fs_test_io=${fs_test_io:-mpi}
 998         fs_test_objsize=${fs_test_objsize:-100}
 999         fs_test_objunit=${fs_test_objunit:-1048576} # 1 mb
1000         fs_test_ndirs=${fs_test_ndirs:-80000}
1001
1002         [ x$FS_TEST = x ] &&
1003                 { skip "FS_TEST not found" && return; }
1004
1005         # Space estimation  in bytes
1006         local space=$(df -B 1 -P $dir | tail -n 1 | awk '{ print $4 }')
1007         local total_threads=$((num_clients * fs_test_threads))
1008         echo "+ $fs_test_objsize * $fs_test_objunit * $total_threads "
1009         if [ $((space / 2)) -le \
1010                 $((fs_test_objsize * fs_test_objunit * total_threads)) ]; then
1011                         fs_test_objsize=$((space / 2 / fs_test_objunit / \
1012                                 total_threads))
1013                         [ $fs_test_objsize -eq 0 ] && \
1014                         skip_env "Need free space more than \
1015                                 $((2 * total_threads * fs_test_objunit)) \
1016                                 : have $((space / fs_test_objunit))" &&
1017                                 return
1018
1019                         echo "(reduced objsize to \
1020                                 $((fs_test_objsize * fs_test_objunit)) bytes)"
1021         fi
1022
1023         print_opts FS_TEST clients fs_test_threads fs_test_objsize MACHINEFILE
1024
1025         mkdir -p $testdir
1026         # mpi_run uses mpiuser
1027         chmod 0777 $testdir
1028
1029         # --nodb          Turn off the database code at runtime
1030         # -g --target     The path to the data file
1031         # -t --type       Whether to do N-N (1) or N-1 (2)
1032         # -n --nobj       The number of objects written/read by each proc
1033         # -z --size       The size of each object
1034         # -d ---num_nn_dirs Number of subdirectories for files
1035         # -C --check      Check every byte using argument 3.
1036         # --collective    Whether to use collective I/O (for N-1, mpi-io only)
1037         # -s --strided    Whether to use a strided pattern (for N-1 only)
1038         # -T --touch      Touch every byte using argument 3
1039         # -o --op         Whether to read only (read) or write only (write)
1040         # -b --barriers   When to barrier.
1041         # -i --io         Use POSIX, MPI, or PLFS IO routines (mpi|posix|plfs)
1042         # -S --supersize  Specify how many objects per superblock
1043
1044         local cmd="$FS_TEST -nodb -g $file -t $fs_test_type -n $fs_test_nobj \
1045                 -z $((fs_test_objsize * fs_test_objunit)) -d $fs_test_ndirs \
1046                 -C $fs_test_check -collective -s $fs_test_strided \
1047                 -T $fs_test_touch -o $fs_test_op -b $fs_test_barriers \
1048                 -i $fs_test_io -S $fs_test_supersize"
1049
1050         echo "+ $cmd"
1051         mpi_run "-np $((num_clients * fs_test_threads))" $cmd
1052
1053         local rc=$?
1054         if [ $rc != 0 ] ; then
1055                 error "fs_test failed! $rc"
1056         fi
1057
1058         rm -rf $testdir
1059 }
1060
1061 ior_mdtest_parallel() {
1062         local rc1=0
1063         local rc2=0
1064         local type=$1
1065
1066         run_ior $type &
1067         local pids=$!
1068
1069         run_mdtest $type || rc2=$?
1070         [[ $rc2 -ne 0 ]] && echo "mdtest failed with error $rc2"
1071
1072         wait $pids || rc1=$?
1073         [[ $rc1 -ne 0 ]] && echo "ior failed with error $rc1"
1074
1075         [[ $rc1 -ne 0 || $rc2 -ne 0 ]] && return 1
1076         return 0
1077 }
1078
1079 run_fio() {
1080         FIO=${FIO:=$(which fio 2> /dev/null || true)}
1081
1082         local clients=${CLIENTS:-$(hostname)}
1083         local fio_jobNum=${fio_jobNum:-4}
1084         local fio_jobFile=${fio_jobFile:-$TMP/fiojobfile.$(date +%s)}
1085         local fio_bs=${fio_bs:-1}
1086         local testdir=$DIR/d0.fio
1087         local file=${testdir}/fio
1088         local runtime=60
1089         local propagate=false
1090
1091         [ "$SLOW" = "no" ] || runtime=600
1092
1093         [ x$FIO = x ] &&
1094                 { skip_env "FIO not found" && return; }
1095
1096         mkdir -p $testdir
1097
1098         # use fio job file if exists,
1099         # create a simple one if missing
1100         if ! [ -f $fio_jobFile ]; then
1101                 cat >> $fio_jobFile <<EOF
1102 [global]
1103 rw=randwrite
1104 size=128m
1105 time_based=1
1106 runtime=$runtime
1107 filename=${file}_\$(hostname)
1108 EOF
1109                 # bs size increased by $i for each job
1110                 for ((i=1; i<=fio_jobNum; i++)); do
1111                         cat >> $fio_jobFile <<EOF
1112
1113 [job$i]
1114 bs=$(( fio_bs * i ))m
1115 EOF
1116                 done
1117                 # job file is created, should be propagated to all clients
1118                 propagate=true
1119         fi
1120
1121
1122         # propagate the job file if not all clients have it yet or
1123         # if the job file was created during the test run
1124         if ! do_nodesv $clients " [ -f $fio_jobFile ] " ||
1125            $propagate; then
1126                 local cfg=$(cat $fio_jobFile)
1127                 do_nodes $clients "echo \\\"$cfg\\\" > ${fio_jobFile}" ||
1128                         error "job file $fio_jobFile is not propagated"
1129                 do_nodesv $clients "cat ${fio_jobFile}"
1130         fi
1131
1132         cmd="$FIO $fio_jobFile"
1133         echo "+ $cmd"
1134
1135         log "clients: $clients $cmd"
1136
1137         local rc=0
1138         do_nodesv $clients "$cmd "
1139         rc=$?
1140
1141         [ $rc = 0 ] || error "fio failed: $rc"
1142         rm -rf $testdir
1143 }
1144
1145 run_xdd() {
1146         XDD=${XDD:=$(which xdd 2> /dev/null || true)}
1147
1148         local clients=${CLIENTS:-$(hostname)}
1149         local testdir=$DIR/d0.xdd
1150         xdd_queuedepth=${xdd_queuedepth:-4}
1151         xdd_blocksize=${xdd_blocksize:-512}
1152         xdd_reqsize=${xdd_reqsize:-128}
1153         xdd_mbytes=${xdd_mbytes:-100}
1154         xdd_passes=${xdd_passes:-40}
1155         xdd_rwratio=${xdd_rwratio:-0}
1156         xdd_ntargets=${xdd_ntargets:-6}
1157         local xdd_custom_params=${xdd_custom_params:-"-dio -stoponerror \
1158                 -maxpri -minall -noproclock -nomemlock"}
1159
1160         [ x$XDD = x ] &&
1161                 { skip "XDD not found" && return; }
1162
1163         print_opts XDD clients xdd_queuedepth xdd_blocksize xdd_reqsize \
1164                 xdd_mbytes xdd_passes xdd_rwratio
1165
1166         mkdir -p $testdir
1167
1168         local files=""
1169         # Target files creates based on the given number of targets
1170         for (( i=0; i < $xdd_ntargets; i++ ))
1171         do
1172                 files+="${testdir}/xdd"$i" "
1173         done
1174
1175         # -targets      specifies the devices or files to perform operation
1176         # -reqsize      number of 'blocks' per operation
1177         # -mbytes       number of 1024*1024-byte blocks to transfer
1178         # -blocksize    size of a single 'block'
1179         # -passes       number of times to read mbytes
1180         # -queuedepth   number of commands to queue on the target
1181         # -rwratio      percentage of read to write operations
1182         # -verbose      will print out statistics on each pass
1183
1184         local cmd="$XDD -targets $xdd_ntargets $files -reqsize $xdd_reqsize \
1185                 -mbytes $xdd_mbytes -blocksize $xdd_blocksize \
1186                 -passes $xdd_passes -queuedepth $xdd_queuedepth \
1187                 -rwratio $xdd_rwratio -verbose $xdd_custom_params"
1188         echo "+ $cmd"
1189
1190         local rc=0
1191         do_nodesv $clients "$cmd "
1192         rc=$?
1193
1194         [ $rc = 0 ] || error "xdd failed: $rc"
1195
1196         rm -rf $testdir
1197 }