lustre/tests/functions.sh

   1 #!/bin/bash
   2
   3 # functions used by other scripts
   4
   5 assert_env() {
   6         local failed=""
   7
   8         for name in "$@"; do
   9         if [ -z "${!name}" ]; then
  10                 echo "$0: $name must be set"
  11                 failed=1
  12         fi
  13         done
  14         [ $failed ] && exit 1 || true
  15 }
  16
  17 # lrepl - Lustre test Read-Eval-Print Loop.
  18 #
  19 # This function implements a REPL for the Lustre test framework.  It
  20 # doesn't exec an actual shell because the user may want to inspect
  21 # variables and use functions from the test framework.
  22 lrepl() {
  23         local line
  24         local rawline
  25         local prompt
  26
  27         cat <<EOF
  28         This is an interactive read-eval-print loop interactive shell
  29         simulation that you can use to debug failing tests.  You can
  30         enter most bash command lines (see notes below).
  31
  32         Use this REPL to inspect variables, set them, call test
  33         framework shell functions, etcetera.
  34
  35         'exit' or EOF to exit this shell.
  36
  37         set \$retcode to 0 to cause the assertion failure that
  38         triggered this REPL to be ignored.
  39
  40         Examples:
  41             do_facet ost1 lctl get_param ost.*.ost.threads_*
  42             do_rpc_nodes \$OSTNODES unload_modules
  43
  44         NOTES:
  45             All but the last line of multi-line statements or blocks
  46             must end in a backslash.
  47
  48             "Here documents" are not supported.
  49
  50             History is not supported, but command-line editing is.
  51
  52 EOF
  53
  54         # Prompt escapes don't work in read -p, sadly.
  55         prompt=":${TESTNAME:-UNKNOWN}:$(uname -n):$(basename $PWD)% "
  56
  57         # We use read -r to get close to a shell experience
  58         while read -e -r -p "$prompt" rawline; do
  59         line=
  60         case "$rawline" in
  61         # Don't want to exit-exit, just exit the REPL
  62         exit) break;;
  63         # We need to handle continuations, and read -r doesn't do
  64         # that for us.  Yet we need read -r.
  65         #
  66         # We also use case/esac to compare lines read to "*\\"
  67         # because [ "$line" = *\\ ] and variants of that don't work.
  68         *\\) line="$rawline"
  69                 while read -e -r -p '> ' rawline
  70                 do
  71                         line="$line"$'\n'"$rawline"
  72                         case "$rawline" in
  73                         # We could check for here documents by matching
  74                         # against *<<*, but who cares.
  75                         *\\) continue;;
  76                         *) break;;
  77                         esac
  78                 done
  79                 ;;
  80         *) line=$rawline
  81         esac
  82
  83         case "$line" in
  84         *\\) break;;
  85         esac
  86
  87         # Finally!  Time to eval.
  88         eval "$line"
  89         done
  90
  91         echo $'\n\tExiting interactive shell...\n'
  92         return 0
  93 }
  94
  95 # lassert - Lustre test framework assert
  96 #
  97 # Arguments: failure code, failure message, expression/statement
  98 #
  99 # lassert evaluates the expression given, and, if false, calls
 100 # error() to trigger test failure.  If REPL_ON_LASSERT is true then
 101 # lassert will call lrepl() to give the user an interactive shell.
 102 # If the REPL sets retcode=0 then the assertion failure will be
 103 # ignored.
 104 lassert() {
 105         local retcode=$1
 106         local msg=$2
 107         shift 2
 108
 109         echo "checking $* ($(eval echo \""$*"\"))..."
 110         eval "$@" && return 0;
 111
 112         if ${REPL_ON_LASSERT:-false}; then
 113                 echo "Assertion $retcode failed: $*
 114                         (expanded: $(eval echo \""$*"\")) $msg"
 115                 lrepl
 116         fi
 117
 118         error "Assertion $retcode failed: $*
 119                 (expanded: $(eval echo \""$*"\")) $msg"
 120         return $retcode
 121 }
 122
 123 # setmodopts- set module options for subsequent calls to load_modules
 124 #
 125 # Usage: setmodopts module_name new_value [var_in_which_to_save_old_value]
 126 #        setmodopts -a module_name new_value [var_in_which_to_save_old_value]
 127 #
 128 # In the second usage the new value is appended to the old.
 129 setmodopts() {
 130         local _append=false
 131
 132         if [ "$1" = -a ]; then
 133                 _append=true
 134                 shift
 135         fi
 136
 137         local _var=MODOPTS_$1
 138         local _newvalue=$2
 139         local _savevar=$3
 140         local _oldvalue
 141
 142         # Dynamic naming of variables is a pain in bash.  In ksh93 we could
 143         # write "nameref opts_var=${modname}_MODOPTS" then assign directly
 144         # to opts_var.  Associative arrays would also help, alternatively.
 145         # Alas, we're stuck with eval until all distros move to a more recent
 146         # version of bash.  Fortunately we don't need to eval unset and export.
 147
 148         if [ -z "$_newvalue" ]; then
 149                 unset $_var
 150                 return 0
 151         fi
 152
 153         _oldvalue=${!_var}
 154         $_append && _newvalue="$_oldvalue $_newvalue"
 155         export $_var="$_newvalue"
 156         echo setmodopts: ${_var}=${_newvalue}
 157
 158         [ -n "$_savevar" ] && eval $_savevar=\""$_oldvalue"\"
 159 }
 160
 161 echoerr () { echo "$@" 1>&2 ; }
 162
 163 signaled() {
 164         echoerr "$(date +'%F %H:%M:%S'): client load was signaled to terminate"
 165
 166         local PGID=$(ps -eo "%c %p %r" | awk "/ $PPID / {print \$3}")
 167         kill -TERM -$PGID
 168         sleep 5
 169         kill -KILL -$PGID
 170 }
 171
 172 mpi_run () {
 173         local mpirun="$MPIRUN $MPIRUN_OPTIONS"
 174         local command="$mpirun $@"
 175         local mpilog=$TMP/mpi.log
 176         local rc
 177
 178         if [ -n "$MPI_USER" -a "$MPI_USER" != root -a -n "$mpirun" ]; then
 179                 echo "+ chmod 0777 $MOUNT"
 180                 chmod 0777 $MOUNT
 181                 command="su $MPI_USER bash -c \"$command \""
 182         fi
 183
 184         ls -ald $MOUNT
 185         echo "+ $command"
 186         eval $command 2>&1 | tee $mpilog || true
 187
 188         rc=${PIPESTATUS[0]}
 189         if [ $rc -eq 0 ] && grep -q "p4_error:" $mpilog ; then
 190                 rc=1
 191         fi
 192         return $rc
 193 }
 194
 195 nids_list () {
 196         local list
 197         local escape="$2"
 198         for i in ${1//,/ }; do
 199                 if [ "$list" = "" ]; then
 200                         list="$i@$NETTYPE"
 201                 else
 202                         list="$list$escape $i@$NETTYPE"
 203                 fi
 204         done
 205         echo $list
 206 }
 207
 208 # FIXME: all setup/cleanup can be done without rpc.sh
 209 lst_end_session () {
 210         local verbose=false
 211         [ x$1 = x--verbose ] && verbose=true
 212
 213         export LST_SESSION=`$LST show_session 2>/dev/null | awk '{print $5}'`
 214         [ "$LST_SESSION" == "" ] && return
 215
 216         $LST stop b
 217         if $verbose; then
 218                 $LST show_error c s
 219         fi
 220         $LST end_session
 221 }
 222
 223 lst_session_cleanup_all () {
 224         local list=$(comma_list $(nodes_list))
 225         do_rpc_nodes $list lst_end_session
 226 }
 227
 228 lst_cleanup () {
 229         lsmod | grep -q lnet_selftest && \
 230                 rmmod lnet_selftest > /dev/null 2>&1 || true
 231 }
 232
 233 lst_cleanup_all () {
 234         local list=$(comma_list $(nodes_list))
 235
 236         # lst end_session needs to be executed only locally
 237         # i.e. on node where lst new_session was called
 238         lst_end_session --verbose
 239         do_rpc_nodes $list lst_cleanup
 240 }
 241
 242 lst_setup () {
 243         load_module lnet_selftest
 244 }
 245
 246 lst_setup_all () {
 247         local list=$(comma_list $(nodes_list))
 248         do_rpc_nodes $list lst_setup
 249 }
 250
 251 ###
 252 # short_hostname
 253 #
 254 # Passed a single argument, strips everything off following
 255 # and includes the first period.
 256 # client-20.lab.whamcloud.com becomes client-20
 257 short_hostname() {
 258         echo $(sed 's/\..*//' <<< $1)
 259 }
 260
 261 ###
 262 # short_nodename
 263 #
 264 # Find remote nodename, stripped of any domain, etc.
 265 # 'hostname -s' is easy, but not implemented on all systems
 266 short_nodename() {
 267         local rname=$(do_node $1 "uname -n" || echo -1)
 268         if [[ "$rname" = "-1" ]]; then
 269                 rname=$1
 270         fi
 271         echo $(short_hostname $rname)
 272 }
 273
 274 print_opts () {
 275         local var
 276
 277         echo OPTIONS:
 278
 279         for i in "$@"; do
 280                 var=$i
 281                 echo "${var}=${!var}"
 282         done
 283         [ -e $MACHINEFILE ] && cat $MACHINEFILE
 284 }
 285
 286 is_lustre () {
 287         [ "$(stat -f -c %T $1)" = "lustre" ]
 288 }
 289
 290 setstripe_getstripe () {
 291         local file=$1
 292         shift
 293         local params=$@
 294
 295         is_lustre $file || return 0
 296
 297         if [ -n "$params" ]; then
 298                 echo "setstripe option: $params"
 299                 $LFS setstripe $params $file ||
 300                         error "setstripe $params failed"
 301         fi
 302         $LFS getstripe -d $file ||
 303                 error "getstripe $file failed"
 304 }
 305
 306 run_compilebench() {
 307         local dir=${1:-$DIR}
 308         local cbench_DIR=${cbench_DIR:-""}
 309         local cbench_IDIRS=${cbench_IDIRS:-2}
 310         local cbench_RUNS=${cbench_RUNS:-2}
 311
 312         print_opts cbench_DIR cbench_IDIRS cbench_RUNS
 313
 314         [ x$cbench_DIR = x ] &&
 315                 skip_env "compilebench not found"
 316
 317         [ -e $cbench_DIR/compilebench ] ||
 318                 skip_env "No compilebench build"
 319
 320         # Space estimation:
 321         # compile dir kernel-0  ~1GB
 322         # required space        ~1GB * cbench_IDIRS
 323         local space=$(df -P $dir | tail -n 1 | awk '{ print $4 }')
 324         if [[ $space -le $((1024 * 1024 * cbench_IDIRS)) ]]; then
 325                 cbench_IDIRS=$((space / 1024 / 1024))
 326                 [[ $cbench_IDIRS -eq 0 ]] &&
 327                         skip_env "Need free space at least 1GB, have $space"
 328
 329                 echo "reducing initial dirs to $cbench_IDIRS"
 330         fi
 331         echo "free space = $space KB"
 332
 333         # FIXME:
 334         # t-f _base needs to be modifyed to set properly tdir
 335         # for new "test_foo" functions names
 336         # local testdir=$DIR/$tdir
 337         local testdir=$dir/d0.compilebench.$$
 338         test_mkdir -p $testdir
 339         setstripe_getstripe $testdir $cbench_STRIPEPARAMS
 340
 341         local savePWD=$PWD
 342         cd $cbench_DIR
 343         local cmd="./compilebench -D $testdir -i $cbench_IDIRS \
 344                 -r $cbench_RUNS --makej"
 345
 346         log "$cmd"
 347
 348         local rc=0
 349         eval $cmd
 350         rc=$?
 351
 352         cd $savePWD
 353         [ $rc = 0 ] || error "compilebench failed: $rc"
 354         rm -rf $testdir
 355 }
 356
 357 # try to understand why a test is running out of space/quota
 358 find_space_usage() {
 359         local dir=$1
 360         local tmpfile=$(mktemp)
 361
 362         $LFS df $dir || df $dir
 363         $LFS df -i $dir || df -i $dir
 364         $LFS quota -u mpiuser $dir
 365         $LFS quota -u root $dir
 366
 367         du -skx $dir/../* | sort -nr | tee $tmpfile
 368         local topdir=$(awk '{ print $2; exit; }' $tmpfile)
 369         du -skx $topdir/* | sort -nr | tee $tmpfile
 370         topdir=$(awk '{ print $2; exit; }' $tmpfile)
 371         du -skx $topdir/* | sort -nr
 372         rm -f $tmpfile
 373 }
 374
 375 run_metabench() {
 376         local dir=${1:-$DIR}
 377         local mntpt=${2:-$MOUNT}
 378         METABENCH=${METABENCH:-$(which metabench 2> /dev/null || true)}
 379         mbench_NFILES=${mbench_NFILES:-30400}
 380         # threads per client
 381         mbench_THREADS=${mbench_THREADS:-4}
 382         mbench_OPTIONS=${mbench_OPTIONS:-}
 383         mbench_CLEANUP=${mbench_CLEANUP:-true}
 384
 385         [ x$METABENCH = x ] && skip_env "metabench not found"
 386
 387         print_opts METABENCH clients mbench_NFILES mbench_THREADS
 388
 389         local testdir=$dir/d0.metabench
 390         test_mkdir -p $testdir
 391         setstripe_getstripe $testdir $mbench_STRIPEPARAMS
 392
 393         # mpi_run uses mpiuser
 394         chmod 0777 $testdir
 395
 396         # try to understand why this test is running out of space/quota
 397         find_space_usage $dir
 398
 399         # -C             Run the file creation tests. Creates zero byte files.
 400         # -S             Run the file stat tests.
 401         # -c nfile       Number of files to be used in each test.
 402         # -k             Cleanup files when finished.
 403         local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S $mbench_OPTIONS"
 404         echo "+ $cmd"
 405
 406         # find out if we need to use srun by checking $SRUN_PARTITION
 407         if [ "$SRUN_PARTITION" ]; then
 408                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 409                         -n $((num_clients * mbench_THREADS)) \
 410                         -p $SRUN_PARTITION -- $cmd
 411         else
 412                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 413                         -np $((num_clients * $mbench_THREADS)) $cmd
 414         fi
 415
 416         local rc=$?
 417         if [ $rc != 0 ] ; then
 418                 find_space_usage $dir
 419                 error "metabench failed! $rc"
 420         fi
 421
 422         if $mbench_CLEANUP; then
 423                 rm -rf $testdir
 424         else
 425                 mv $dir/d0.metabench $mntpt/_xxx.$(date +%s).d0.metabench
 426         fi
 427 }
 428
 429 run_simul() {
 430         SIMUL=${SIMUL:=$(which simul 2> /dev/null || true)}
 431         [ x$SIMUL = x ] && skip_env "simul not found"
 432         [ "$NFSCLIENT" ] && skip "skipped for NFSCLIENT mode"
 433
 434         # threads per client
 435         simul_THREADS=${simul_THREADS:-2}
 436         simul_REP=${simul_REP:-20}
 437
 438         # FIXME
 439         # Need space estimation here.
 440
 441         print_opts SIMUL clients simul_REP simul_THREADS
 442
 443         local testdir=$DIR/d0.simul
 444         test_mkdir $testdir
 445         setstripe_getstripe $testdir $simul_STRIPEPARAMS
 446
 447         # mpi_run uses mpiuser
 448         chmod 0777 $testdir
 449
 450         # -n # : repeat each test # times
 451         # -N # : repeat the entire set of tests # times
 452
 453         local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP"
 454
 455         echo "+ $cmd"
 456         # find out if we need to use srun by checking $SRUN_PARTITION
 457         if [ "$SRUN_PARTITION" ]; then
 458                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 459                         -n $((num_clients * simul_THREADS)) -p $SRUN_PARTITION \
 460                         -- $cmd
 461         else
 462                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 463                         -np $((num_clients * simul_THREADS)) $cmd
 464         fi
 465
 466         local rc=$?
 467
 468         if [ $rc != 0 ] ; then
 469                 error "simul failed! $rc"
 470         fi
 471         rm -rf $testdir
 472 }
 473
 474 run_mdtest() {
 475         MDTEST=${MDTEST:=$(which mdtest 2> /dev/null || true)}
 476         [ x$MDTEST = x ] && skip_env "mdtest not found"
 477         [ "$NFSCLIENT" ] && skip "skipped for NFSCLIENT mode"
 478
 479         # threads per client
 480         mdtest_THREADS=${mdtest_THREADS:-2}
 481         mdtest_nFiles=${mdtest_nFiles:-"100000"}
 482         # We devide the files by number of core
 483         mdtest_nFiles=$((mdtest_nFiles/mdtest_THREADS/num_clients))
 484         mdtest_iteration=${mdtest_iteration:-1}
 485         local mdtest_custom_params=${mdtest_custom_params:-""}
 486         local type=${1:-"ssf"}
 487
 488         local mdtest_Nmntp=${mdtest_Nmntp:-1}
 489
 490         if [ $type = "ssf" ] && [ $mdtest_Nmntp -ne 1 ]; then
 491                 skip "shared directory mode is not compatible" \
 492                         "with multiple directory paths"
 493         fi
 494
 495         # FIXME
 496         # Need space estimation here.
 497
 498         print_opts MDTEST mdtest_iteration mdtest_THREADS mdtest_nFiles
 499
 500         local testdir=$DIR/d0.mdtest
 501         test_mkdir $testdir
 502         setstripe_getstripe $testdir $mdtest_STRIPEPARAMS
 503         chmod 0777 $testdir
 504
 505         for ((i=1; i<mdtest_Nmntp; i++)); do
 506                 zconf_mount_clients $clients $MOUNT$i "$mntopts" ||
 507                         error_exit "Failed $clients on $MOUNT$i"
 508                 local dir=$DIR$i/d0.mdtest$i
 509                 test_mkdir $dir
 510                 setstripe_getstripe $dir $mdtest_SETSTRIPEPARAMS
 511                 chmod 0777 $dir
 512                 testdir="$testdir@$dir"
 513         done
 514         # mpi_run uses mpiuser
 515
 516         # -i # : repeat each test # times
 517         # -d   : test dir
 518         # -n # : number of file/dir to create/stat/remove
 519         # -u   : each process create/stat/remove individually
 520
 521         local cmd="$MDTEST -d $testdir -i $mdtest_iteration \
 522                 -n $mdtest_nFiles $mdtest_custom_params"
 523
 524         [ $type = "fpp" ] && cmd="$cmd -u"
 525
 526         echo "+ $cmd"
 527         # find out if we need to use srun by checking $SRUN_PARTITION
 528         if [ "$SRUN_PARTITION" ]; then
 529                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 530                         -n $((num_clients * mdtest_THREADS)) \
 531                         -p $SRUN_PARTITION -- $cmd
 532         else
 533                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 534                         -np $((num_clients * mdtest_THREADS)) $cmd
 535         fi
 536
 537         local rc=$?
 538
 539         if [ $rc != 0 ] ; then
 540                 error "mdtest failed! $rc"
 541         fi
 542         rm -rf $testdir
 543         for ((i=1; i<mdtest_Nmntp; i++)); do
 544                 local dir=$DIR$i/d0.mdtest$i
 545                 rm -rf $dir
 546                 zconf_umount_clients $clients $MOUNT$i ||
 547                         error_exit "Failed umount $MOUNT$i on $clients"
 548         done
 549 }
 550
 551 run_connectathon() {
 552         local dir=${1:-$DIR}
 553         cnt_DIR=${cnt_DIR:-""}
 554         cnt_NRUN=${cnt_NRUN:-10}
 555
 556         print_opts cnt_DIR cnt_NRUN
 557
 558         [ x$cnt_DIR = x ] && skip_env "connectathon dir not found"
 559         [ -e $cnt_DIR/runtests ] || skip_env "No connectathon runtests found"
 560
 561         # Space estimation:
 562         # "special" tests create a 30 MB file + misc. small files
 563         # required space ~40 MB
 564         local space=$(df -P $dir | tail -n 1 | awk '{ print $4 }')
 565         if [[ $space -le $((1024 * 40)) ]]; then
 566                 skip_env "Need free space at least 40MB, have $space KB"
 567         fi
 568         echo "free space = $space KB"
 569
 570         local testdir=$dir/d0.connectathon
 571         test_mkdir -p $testdir
 572         setstripe_getstripe $testdir $cnt_STRIPEPARAMS
 573
 574         local savePWD=$PWD
 575         cd $cnt_DIR
 576
 577         #
 578         # To run connectathon:
 579         # runtests [-a|-b|-g|-s|-l] [-f|-n|-t] [-N numpasses] [test-directory]
 580         #
 581         # One of the following test types
 582         #    -b  basic
 583         #    -g  general
 584         #    -s  special
 585         #    -l  lock
 586         #    -a  all of the above
 587         #
 588         # -f  a quick functional test
 589         # -n  suppress directory operations (mkdir and rmdir)
 590         # -t  run with time statistics (default for basic tests)
 591         #
 592         # -N numpasses - specifies the number of times to run
 593         #                the tests. Optional.
 594
 595         tests="-b -g -s"
 596         # Include lock tests unless we're running on nfsv4
 597         local fstype=$(df -TP $testdir | awk 'NR==2  {print $2}')
 598         echo "$testdir: $fstype"
 599         if [[ $fstype != "nfs4" ]]; then
 600                 tests="$tests -l"
 601         fi
 602         echo "tests: $tests"
 603         for test in $tests; do
 604                 local cmd="bash ./runtests -N $cnt_NRUN $test -f $testdir"
 605                 local rc=0
 606
 607                 log "$cmd"
 608                 eval $cmd
 609                 rc=$?
 610                 [ $rc = 0 ] || error "connectathon failed: $rc"
 611         done
 612
 613         cd $savePWD
 614         rm -rf $testdir
 615 }
 616
 617 run_ior() {
 618         local type=${1:="ssf"}
 619         local dir=${2:-$DIR}
 620         local testdir=$dir/d0.ior.$type
 621         local nfs_srvmntpt=$3
 622
 623         if [ "$NFSCLIENT" ]; then
 624                 [[ -n $nfs_srvmntpt ]] ||
 625                         { error "NFSCLIENT mode, but nfs exported dir"\
 626                                 "is not set!" && return 1; }
 627         fi
 628
 629         IOR=${IOR:-$(which ior 2> /dev/null)}
 630         [[ -z "$IOR" ]] && IOR=$(which IOR 2> /dev/null)
 631         [[ -n "$IOR" ]] || skip_env "IOR/ior not found"
 632
 633         # threads per client
 634         ior_THREADS=${ior_THREADS:-2}
 635         ior_iteration=${ior_iteration:-1}
 636         ior_blockSize=${ior_blockSize:-6}
 637         ior_blockUnit=${ior_blockUnit:-M}   # K, M, G
 638         ior_xferSize=${ior_xferSize:-1M}
 639         ior_type=${ior_type:-POSIX}
 640         ior_DURATION=${ior_DURATION:-30}        # minutes
 641         ior_CLEANUP=${ior_CLEANUP:-true}
 642         local multiplier=1
 643         case ${ior_blockUnit} in
 644                 [G])
 645                         multiplier=$((1024 * 1024 * 1024))
 646                         ;;
 647                 [M])
 648                         multiplier=$((1024 * 1024))
 649                         ;;
 650                 [K])
 651                         multiplier=1024
 652                         ;;
 653                 *)      error "Incorrect block unit should be one of [KMG]"
 654                         ;;
 655         esac
 656
 657         # calculate the space in bytes
 658         local space=$(df -B 1 -P $dir | tail -n 1 | awk '{ print $4 }')
 659         local total_threads=$((num_clients * ior_THREADS))
 660         echo "+ $ior_blockSize * $multiplier * $total_threads "
 661         if [ $((space / 2)) -le \
 662              $((ior_blockSize * multiplier * total_threads)) ]; then
 663                 ior_blockSize=$((space / 2 / multiplier / total_threads))
 664                 [ $ior_blockSize -eq 0 ] &&
 665                 skip_env "Need free space more than $((2 * total_threads)) \
 666                          ${ior_blockUnit}: have $((space / multiplier))"
 667
 668                 echo "(reduced blockSize to $ior_blockSize \
 669                      ${ior_blockUnit} bytes)"
 670         fi
 671
 672         print_opts IOR ior_THREADS ior_DURATION MACHINEFILE
 673
 674         client_load_mkdir $testdir
 675
 676         # mpi_run uses mpiuser
 677         chmod 0777 $testdir
 678         [[ "$ior_stripe_params" && -z "$ior_STRIPEPARAMS" ]] &&
 679                 ior_STRIPEPARAMS="$ior_stripe_params" &&
 680                 echo "got deprecated ior_stripe_params,"\
 681                         "use ior_STRIPEPARAMS instead"
 682         setstripe_getstripe $testdir $ior_STRIPEPARAMS
 683
 684         #
 685         # -b N  blockSize --
 686         #       contiguous bytes to write per task (e.g.: 8, 4K, 2M, 1G)"
 687         # -o S  testFileName
 688         # -t N  transferSize -- size of transfer in bytes (e.g.: 8, 4K, 2M, 1G)"
 689         # -w    writeFile -- write file"
 690         # -r    readFile -- read existing file"
 691         # -W    checkWrite -- check read after write"
 692         # -C    reorderTasks -- changes task ordering to n+1 ordering for readback
 693         # -T    maxTimeDuration -- max time in minutes to run tests"
 694         # -k    keepFile -- keep testFile(s) on program exit
 695
 696         local cmd
 697         if [ -n "$ior_custom_params" ]; then
 698                 cmd="$IOR -o $testdir/iorData $ior_custom_params"
 699         else
 700                 cmd="$IOR -a $ior_type -b ${ior_blockSize}${ior_blockUnit} \
 701                 -o $testdir/iorData -t $ior_xferSize -v -C -w -r -W \
 702                 -i $ior_iteration -T $ior_DURATION -k"
 703         fi
 704
 705         [ $type = "fpp" ] && cmd="$cmd -F"
 706
 707         echo "+ $cmd"
 708         # find out if we need to use srun by checking $SRUN_PARTITION
 709         if [ "$SRUN_PARTITION" ]; then
 710                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 711                         -n $((num_clients * ior_THREADS)) -p $SRUN_PARTITION \
 712                         -- $cmd
 713         else
 714                 mpi_ior_custom_threads=${mpi_ior_custom_threads:-"$((num_clients * ior_THREADS))"}
 715                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 716                         -np $mpi_ior_custom_threads $cmd
 717         fi
 718
 719         local rc=$?
 720
 721         if [ $rc != 0 ] ; then
 722                 error "ior failed! $rc"
 723         fi
 724         $ior_CLEANUP && rm -rf $testdir || true
 725 }
 726
 727 run_mib() {
 728         MIB=${MIB:=$(which mib 2> /dev/null || true)}
 729         [ "$NFSCLIENT" ] && skip "skipped for NFSCLIENT mode"
 730         [ x$MIB = x ] && skip_env "MIB not found"
 731
 732         # threads per client
 733         mib_THREADS=${mib_THREADS:-2}
 734         mib_xferSize=${mib_xferSize:-1m}
 735         mib_xferLimit=${mib_xferLimit:-5000}
 736         mib_timeLimit=${mib_timeLimit:-300}
 737         mib_STRIPEPARAMS=${mib_STRIPEPARAMS:-"-c -1"}
 738
 739         print_opts MIB mib_THREADS mib_xferSize mib_xferLimit mib_timeLimit \
 740                 MACHINEFILE
 741
 742         local testdir=$DIR/d0.mib
 743         test_mkdir $testdir
 744         setstripe_getstripe $testdir $mib_STRIPEPARAMS
 745
 746         # mpi_run uses mpiuser
 747         chmod 0777 $testdir
 748
 749         #
 750         # -I    Show intermediate values in output
 751         # -H    Show headers in output
 752         # -L    Do not issue new system calls after this many seconds
 753         # -s    Use system calls of this size
 754         # -t    test dir
 755         # -l    Issue no more than this many system calls
 756         local cmd="$MIB -t $testdir -s $mib_xferSize -l $mib_xferLimit \
 757                 -L $mib_timeLimit -HI -p mib.$(date +%Y%m%d%H%M%S)"
 758
 759         echo "+ $cmd"
 760         # find out if we need to use srun by checking $SRUN_PARTITION
 761         if [ "$SRUN_PARTITION" ]; then
 762                 $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 763                         -n $((num_clients * mib_THREADS)) -p $SRUN_PARTITION \
 764                         -- $cmd
 765         else
 766                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 767                         -np $((num_clients * mib_THREADS)) $cmd
 768         fi
 769
 770         local rc=$?
 771
 772         if [ $rc != 0 ] ; then
 773                 error "mib failed! $rc"
 774         fi
 775         rm -rf $testdir
 776 }
 777
 778 run_cascading_rw() {
 779         CASC_RW=${CASC_RW:-$(which cascading_rw 2> /dev/null || true)}
 780         [ x$CASC_RW = x ] && skip_env "cascading_rw not found"
 781         [ "$NFSCLIENT" ] && skip "skipped for NFSCLIENT mode"
 782
 783         # threads per client
 784         casc_THREADS=${casc_THREADS:-2}
 785         casc_REP=${casc_REP:-300}
 786
 787         # FIXME
 788         # Need space estimation here.
 789
 790         print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE
 791
 792         local testdir=$DIR/d0.cascading_rw
 793         test_mkdir $testdir
 794         setstripe_getstripe $testdir $casc_STRIPEPARAMS
 795
 796         # mpi_run uses mpiuser
 797         chmod 0777 $testdir
 798
 799         # -g: debug mode
 800         # -n: repeat test # times
 801
 802         local cmd="$CASC_RW -g -d $testdir -n $casc_REP"
 803
 804         echo "+ $cmd"
 805         mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 806                 -np $((num_clients * $casc_THREADS)) $cmd
 807
 808         local rc=$?
 809
 810         if [ $rc != 0 ] ; then
 811                 error "cascading_rw failed! $rc"
 812         fi
 813         rm -rf $testdir
 814 }
 815
 816 run_write_append_truncate() {
 817         [ "$NFSCLIENT" ] && skip "skipped for NFSCLIENT mode"
 818         # location is lustre/tests dir
 819         WRITE_APPEND_TRUNCATE=${WRITE_APPEND_TRUNCATE:-$(which \
 820                 write_append_truncate 2> /dev/null || true)}
 821         [[ -n "$WRITE_APPEND_TRUNCATE" ]] ||
 822                 skip_env "write_append_truncate not found"
 823
 824         # threads per client
 825         write_THREADS=${write_THREADS:-8}
 826         write_REP=${write_REP:-10000}
 827
 828         # FIXME
 829         # Need space estimation here.
 830
 831         local testdir=$DIR/d0.write_append_truncate
 832         local file=$testdir/f0.wat
 833
 834         print_opts clients write_REP write_THREADS MACHINEFILE
 835
 836         test_mkdir $testdir
 837         # mpi_run uses mpiuser
 838         setstripe_getstripe $testdir $write_STRIPEPARAMS
 839
 840         chmod 0777 $testdir
 841
 842         local cmd="$WRITE_APPEND_TRUNCATE -n $write_REP $file"
 843
 844         echo "+ $cmd"
 845         mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 846                 -np $((num_clients * $write_THREADS)) $cmd
 847
 848         local rc=$?
 849
 850         if [ $rc != 0 ] ; then
 851         error "write_append_truncate failed! $rc"
 852         return $rc
 853         fi
 854         rm -rf $testdir
 855 }
 856
 857 run_write_disjoint() {
 858         WRITE_DISJOINT=${WRITE_DISJOINT:-$(which write_disjoint 2> /dev/null ||
 859                                            true)}
 860         [ x$WRITE_DISJOINT = x ] && skip_env "write_disjoint not found"
 861         [ "$NFSCLIENT" ] && skip "skipped for NFSCLIENT mode"
 862
 863         # threads per client
 864         wdisjoint_THREADS=${wdisjoint_THREADS:-4}
 865         wdisjoint_REP=${wdisjoint_REP:-10000}
 866         chunk_size_limit=$1
 867
 868         # FIXME
 869         # Need space estimation here.
 870
 871         print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP \
 872                 MACHINEFILE
 873         local testdir=$DIR/d0.write_disjoint
 874         test_mkdir $testdir
 875         setstripe_getstripe $testdir $wdisjoint_STRIPEPARAMS
 876
 877         # mpi_run uses mpiuser
 878         chmod 0777 $testdir
 879
 880         local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP -m \
 881                         $chunk_size_limit"
 882
 883         echo "+ $cmd"
 884         mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 885                 -np $((num_clients * $wdisjoint_THREADS)) $cmd
 886
 887         local rc=$?
 888
 889         if [ $rc != 0 ] ; then
 890                 error "write_disjoint failed! $rc"
 891         fi
 892         rm -rf $testdir
 893 }
 894
 895 run_parallel_grouplock() {
 896         PARALLEL_GROUPLOCK=${PARALLEL_GROUPLOCK:-$(which parallel_grouplock \
 897             2> /dev/null || true)}
 898
 899         [ x$PARALLEL_GROUPLOCK = x ] && skip "PARALLEL_GROUPLOCK not found"
 900         [ "$NFSCLIENT" ] && skip "skipped for NFSCLIENT mode"
 901
 902         parallel_grouplock_MINTASKS=${parallel_grouplock_MINTASKS:-5}
 903
 904         print_opts clients parallel_grouplock_MINTASKS MACHINEFILE
 905
 906         local testdir=$DIR/d0.parallel_grouplock
 907         test_mkdir $testdir
 908         setstripe_getstripe $testdir $parallel_grouplock_STRIPEPARAMS
 909
 910         # mpi_run uses mpiuser
 911         chmod 0777 $testdir
 912
 913         local cmd
 914         local status=0
 915         local subtest
 916         for i in $(seq 12); do
 917                 subtest="-t $i"
 918                 local cmd="$PARALLEL_GROUPLOCK -g -v -d $testdir $subtest"
 919                 echo "+ $cmd"
 920
 921                 mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 922                         -np $parallel_grouplock_MINTASKS $cmd
 923                 local rc=$?
 924                 if [ $rc != 0 ] ; then
 925                         error_noexit "parallel_grouplock subtests $subtest " \
 926                                      "failed! $rc"
 927                 else
 928                         echo "parallel_grouplock subtests $subtest PASS"
 929                 fi
 930                 let status=$((status + rc))
 931                 # clear debug to collect one log per one test
 932                 do_nodes $(comma_list $(nodes_list)) lctl clear
 933         done
 934         [ $status -eq 0 ] || error "parallel_grouplock status: $status"
 935         rm -rf $testdir
 936 }
 937
 938 cleanup_statahead () {
 939         trap 0
 940
 941         local clients=$1
 942         local mntpt_root=$2
 943         local num_mntpts=$3
 944
 945         for i in $(seq 0 $num_mntpts);do
 946         zconf_umount_clients $clients ${mntpt_root}$i ||
 947                 error_exit "Failed to umount lustre on ${mntpt_root}$i"
 948         done
 949 }
 950
 951 run_statahead () {
 952         if [[ -n $NFSCLIENT ]]; then
 953                 skip "Statahead testing is not supported on NFS clients."
 954         fi
 955         [ x$MDSRATE = x ] && skip_env "mdsrate not found"
 956
 957         statahead_NUMMNTPTS=${statahead_NUMMNTPTS:-5}
 958         statahead_NUMFILES=${statahead_NUMFILES:-500000}
 959
 960         print_opts MDSRATE clients statahead_NUMMNTPTS statahead_NUMFILES
 961
 962         # create large dir
 963
 964         # do not use default "d[0-9]*" dir name
 965         # to avoid of rm $statahead_NUMFILES (500k) files in t-f cleanup
 966         local dir=dstatahead
 967         local testdir=$DIR/$dir
 968
 969         # cleanup only if dir exists
 970         # cleanup only $statahead_NUMFILES number of files
 971         # ignore the other files created by someone else
 972         [ -d $testdir ] &&
 973         mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
 974                 $statahead_NUMFILES $testdir 'f%%d' --ignore
 975
 976         test_mkdir $testdir
 977         setstripe_getstripe $testdir $statahead_STRIPEPARAMS
 978
 979         # mpi_run uses mpiuser
 980         chmod 0777 $testdir
 981
 982         local num_files=$statahead_NUMFILES
 983
 984         local IFree=$(inodes_available)
 985         if [ $IFree -lt $num_files ]; then
 986                 num_files=$IFree
 987         fi
 988
 989         cancel_lru_locks mdc
 990
 991         local cmd1="${MDSRATE} ${MDSRATE_DEBUG} --mknod --dir $testdir"
 992         local cmd2="--nfiles $num_files --filefmt 'f%%d'"
 993         local cmd="$cmd1 $cmd2"
 994         echo "+ $cmd"
 995
 996         mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
 997                 -np $((num_clients * 32)) $cmd
 998
 999         local rc=$?
1000
1001         if [ $rc != 0 ] ; then
1002                 error "mdsrate failed to create $rc"
1003                 return $rc
1004         fi
1005
1006         local num_mntpts=$statahead_NUMMNTPTS
1007         local mntpt_root=$TMP/mntpt/lustre
1008         local mntopts=$MNTOPTSTATAHEAD
1009
1010         echo "Mounting $num_mntpts lustre clients starts on $clients"
1011         trap "cleanup_statahead $clients $mntpt_root $num_mntpts" EXIT ERR
1012         for i in $(seq 0 $num_mntpts); do
1013         zconf_mount_clients $clients ${mntpt_root}$i "$mntopts" ||
1014                 error_exit "Failed to mount lustre on ${mntpt_root}$i on $clients"
1015         done
1016
1017         do_rpc_nodes $clients cancel_lru_locks mdc
1018
1019         do_rpc_nodes $clients do_ls $mntpt_root $num_mntpts $dir
1020
1021         mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
1022                 $num_files $testdir 'f%%d' --ignore
1023
1024         # use rm instead of rmdir because of
1025         # testdir could contain the files created by someone else,
1026         # or by previous run where is num_files prev > num_files current
1027         rm -rf $testdir
1028         cleanup_statahead $clients $mntpt_root $num_mntpts
1029 }
1030
1031 cleanup_rr_alloc () {
1032         local clients="$1"
1033         local mntpt_root="$2"
1034         local rr_alloc_MNTPTS="$3"
1035         local mntpt_dir=$(dirname ${mntpt_root})
1036
1037         $LFS find $DIR/$tdir -type f | xargs -n1 -P8 unlink
1038         for ((i=0; i < rr_alloc_MNTPTS; i++)); do
1039                 zconf_umount_clients $clients ${mntpt_root}$i ||
1040                 error_exit "Failed to umount lustre on ${mntpt_root}$i"
1041         done
1042         do_nodes $clients "rm -rf $mntpt_dir"
1043 }
1044
1045 run_rr_alloc() {
1046         remote_mds_nodsh && skip "remote MDS with nodsh"
1047
1048         RR_ALLOC=${RR_ALLOC:-$(which rr_alloc 2> /dev/null || true)}
1049         [[ -n "$RR_ALLOC" ]] || skip_env "rr_alloc not found"
1050
1051         echo "===Test gives more reproduction percentage if number of "
1052         echo "   client and ost are more. Test with 44 or more clients "
1053         echo "   and 73 or more OSTs gives 100% reproduction rate=="
1054
1055         declare -a diff_max_min_arr
1056         local ost_idx
1057         local qos_prec_objs="${TMP}/qos_and_precreated_objects"
1058         local rr_alloc_NFILES=${rr_alloc_NFILES:-555}
1059         local rr_alloc_MNTPTS=${rr_alloc_MNTPTS:-11}
1060         local total_MNTPTS=$((rr_alloc_MNTPTS * num_clients))
1061         local mntpt_root="${TMP}/rr_alloc_mntpt/lustre"
1062         test_mkdir -c $MDSCOUNT $DIR/$tdir
1063         setstripe_getstripe $DIR/$tdir $rr_alloc_STRIPEPARAMS
1064
1065         ost_set_temp_seq_width_all $DATA_SEQ_MAX_WIDTH
1066
1067         (( ONLY_REPEAT_ITER == 1 )) || wait_delete_completed
1068
1069         $LFS df $DIR/$tdir
1070         $LFS df -i $DIR/$tdir
1071         chmod 0777 $DIR/$tdir
1072
1073         stack_trap "cleanup_rr_alloc $clients $mntpt_root $rr_alloc_MNTPTS"
1074         for ((i=0; i < rr_alloc_MNTPTS; i++)); do
1075                 zconf_mount_clients $clients ${mntpt_root}$i $MOUNT_OPTS ||
1076                 error_exit "Failed to mount lustre on ${mntpt_root}$i $clients"
1077         done
1078
1079         # Save mdt values, set threshold to 100% i.e always Round Robin,
1080         # restore the saved values again after creating files...
1081         save_lustre_params mds1 \
1082                 "lod.$FSNAME-MDT0000*.qos_threshold_rr" > $qos_prec_objs
1083         save_lustre_params mds1 \
1084                 "osp.$FSNAME-OST*-osc-MDT0000.create_count" >> $qos_prec_objs
1085         stack_trap "restore_lustre_params <$qos_prec_objs; rm -f $qos_prec_objs"
1086
1087         # Make sure that every osp has enough precreated objects for the file
1088         # creation app
1089
1090         # The MDS does not precreate objects if there are at least
1091         # create_count / 2 precreated objects available for the OST.
1092         # Set 'create_count' to 2x required number to force creation.
1093
1094         # foeo = file on each ost. calc = calculated.
1095         local foeo_calc=$((rr_alloc_NFILES * total_MNTPTS / OSTCOUNT))
1096         local create_count=$((2 * foeo_calc))
1097         local max_create_count=$(do_facet $SINGLEMDS "$LCTL get_param -n \
1098                                  osp.*OST0000*MDT0000.max_create_count")
1099
1100         # create_count accepted values:
1101         #   [OST_MIN_PRECREATE=32, OST_MAX_PRECREATE=20000]
1102         # values exceeding OST_MAX_PRECREATE are lowered to half of the maximum.
1103         (( create_count >= 32 )) || create_count=32
1104         (( create_count <= max_create_count )) ||
1105                 create_count=$((max_create_count / 2))
1106
1107         local mdts=$(comma_list $(mdts_nodes))
1108
1109         do_nodes $mdts "$LCTL set_param lod.*.qos_threshold_rr=100 \
1110                 osp.*.create_count=$create_count"
1111
1112         # Check for enough precreated objects... We should not
1113         # fail here because code(osp_precreate.c) also takes care of it.
1114         # So we have good chances of passing test even if this check fails.
1115         local stop=$((SECONDS + 60))
1116         local forced
1117         local waited
1118
1119         while ((SECONDS < stop)); do
1120                 local sleep=0
1121
1122                 for ((mdt_idx = 0; mdt_idx < $MDSCOUNT; mdt_idx++)); do
1123                         for ((ost_idx = 0; ost_idx < $OSTCOUNT; ost_idx++)); do
1124                                 local count=$(precreated_ost_obj_count \
1125                                               $mdt_idx $ost_idx)
1126                                 if ((count < foeo_calc / 6)); then
1127                                         local this_pair=mdt$mdt_idx.$ost_idx
1128
1129                                         sleep=1
1130
1131                                         # allow one iteration to precreate,
1132                                         # then force create new sequence once
1133                                         [[ "$forced" =~ "$this_pair" ]] &&
1134                                                 continue
1135
1136                                         if [[ "$waited" =~ "$this_pair" ]]; then
1137                                                 mkdir -p $DIR/$tdir.2
1138                                                 force_new_seq_ost $DIR/$tdir.2 \
1139                                                     mds$((mdt_idx+1)) $ost_idx
1140                                                 forced="$forced $this_pair"
1141                                         else
1142                                                 waited="$waited $this_pair"
1143                                         fi
1144                                 fi
1145                         done
1146                 done
1147
1148                 (( sleep > 0 )) || break
1149
1150                 sleep $sleep
1151         done
1152         [[ -d $DIR/$tdir.2 ]] && stack_trap "rm -rf $DIR/$tdir.2"
1153
1154         local cmd="$RR_ALLOC $mntpt_root/$tdir/f $rr_alloc_NFILES $num_clients"
1155
1156         if [[ $total_MNTPTS -ne 0 ]]; then
1157                 # Now start the actual file creation app.
1158                 mpi_run "-np $total_MNTPTS" $cmd || return
1159         else
1160                 error "No mount point"
1161         fi
1162
1163         diff_max_min_arr=($($LFS getstripe -r $DIR/$tdir/ |
1164                             awk '/lmm_stripe_offset:/ {print $2}' |
1165                             sort | uniq -c | tee /dev/stderr |
1166                             awk 'NR==1 {min=max=$1} \
1167                                  { $1<min ? min=$1:min; $1>max ? max=$1:max} \
1168                                  END {print max-min, max, min}'))
1169
1170         # In case of fairly large number of file creation using RR (round-robin)
1171         # there can be two cases in which deviation will occur than the regular
1172         # RR algo behaviour-
1173         # 1- When rr_alloc does not start right with 'lqr_start_count' reseeded,
1174         # 2- When rr_alloc does not finish with 'lqr_start_count == 0'.
1175         # So the difference of files for any 2 OST should not be more than 2-3.
1176         # In some cases it may be more, but shouldn't be > .3% of the files.
1177         local max_diff=$((create_count > 600 ? create_count / 200 : $MDSCOUNT))
1178
1179         (( ${diff_max_min_arr[0]} <= $max_diff )) || {
1180                 $LFS df $DIR/$tdir
1181                 $LFS df -i $DIR/$tdir
1182
1183                 error "max/min OST objects (${diff_max_min_arr[1]} : ${diff_max_min_arr[2]}) too different"
1184         }
1185 }
1186
1187 run_fs_test() {
1188         # fs_test.x is the default name for exe
1189         FS_TEST=${FS_TEST:=$(which fs_test.x 2> /dev/null || true)}
1190
1191         local clients=${CLIENTS:-$(hostname)}
1192         local testdir=$DIR/d0.fs_test
1193         local file=${testdir}/fs_test
1194         fs_test_threads=${fs_test_threads:-2}
1195         fs_test_type=${fs_test_type:-1}
1196         fs_test_nobj=${fs_test_nobj:-10}
1197         fs_test_check=${fs_test_check:-3}
1198         fs_test_strided=${fs_test_strided:-1}
1199         fs_test_touch=${fs_test_touch:-3}
1200         fs_test_supersize=${fs_test_supersize:-1}
1201         fs_test_op=${fs_test_op:-write}
1202         fs_test_barriers=${fs_test_barriers:-bopen,bwrite,bclose}
1203         fs_test_io=${fs_test_io:-mpi}
1204         fs_test_objsize=${fs_test_objsize:-100}
1205         fs_test_objunit=${fs_test_objunit:-1048576} # 1 mb
1206         fs_test_ndirs=${fs_test_ndirs:-80000}
1207
1208         [ x$FS_TEST = x ] && skip "FS_TEST not found"
1209
1210         # Space estimation  in bytes
1211         local space=$(df -B 1 -P $dir | tail -n 1 | awk '{ print $4 }')
1212         local total_threads=$((num_clients * fs_test_threads))
1213         echo "+ $fs_test_objsize * $fs_test_objunit * $total_threads "
1214         if [ $((space / 2)) -le \
1215                 $((fs_test_objsize * fs_test_objunit * total_threads)) ]; then
1216                         fs_test_objsize=$((space / 2 / fs_test_objunit /
1217                                 total_threads))
1218                         [ $fs_test_objsize -eq 0 ] &&
1219                         skip_env "Need free space more than \
1220                                 $((2 * total_threads * fs_test_objunit)) \
1221                                 : have $((space / fs_test_objunit))"
1222
1223                         echo "(reduced objsize to \
1224                                 $((fs_test_objsize * fs_test_objunit)) bytes)"
1225         fi
1226
1227         print_opts FS_TEST clients fs_test_threads fs_test_objsize MACHINEFILE
1228
1229         test_mkdir $testdir
1230         setstripe_getstripe $testdir $fs_test_STRIPEPARAMS
1231
1232         # mpi_run uses mpiuser
1233         chmod 0777 $testdir
1234
1235         # --nodb          Turn off the database code at runtime
1236         # -g --target     The path to the data file
1237         # -t --type       Whether to do N-N (1) or N-1 (2)
1238         # -n --nobj       The number of objects written/read by each proc
1239         # -z --size       The size of each object
1240         # -d ---num_nn_dirs Number of subdirectories for files
1241         # -C --check      Check every byte using argument 3.
1242         # --collective    Whether to use collective I/O (for N-1, mpi-io only)
1243         # -s --strided    Whether to use a strided pattern (for N-1 only)
1244         # -T --touch      Touch every byte using argument 3
1245         # -o --op         Whether to read only (read) or write only (write)
1246         # -b --barriers   When to barrier.
1247         # -i --io         Use POSIX, MPI, or PLFS IO routines (mpi|posix|plfs)
1248         # -S --supersize  Specify how many objects per superblock
1249
1250         local cmd="$FS_TEST -nodb -g $file -t $fs_test_type -n $fs_test_nobj \
1251                 -z $((fs_test_objsize * fs_test_objunit)) -d $fs_test_ndirs \
1252                 -C $fs_test_check -collective -s $fs_test_strided \
1253                 -T $fs_test_touch -o $fs_test_op -b $fs_test_barriers \
1254                 -i $fs_test_io -S $fs_test_supersize"
1255
1256         echo "+ $cmd"
1257         mpi_run "-np $((num_clients * fs_test_threads))" $cmd
1258
1259         local rc=$?
1260         if [ $rc != 0 ] ; then
1261                 error "fs_test failed! $rc"
1262         fi
1263
1264         rm -rf $testdir
1265 }
1266
1267 ior_mdtest_parallel() {
1268         local rc1=0
1269         local rc2=0
1270         local type=$1
1271
1272         run_ior $type &
1273         local pids=$!
1274
1275         run_mdtest $type || rc2=$?
1276         [[ $rc2 -ne 0 ]] && echo "mdtest failed with error $rc2"
1277
1278         wait $pids || rc1=$?
1279         [[ $rc1 -ne 0 ]] && echo "ior failed with error $rc1"
1280
1281         [[ $rc1 -ne 0 || $rc2 -ne 0 ]] && return 1
1282         return 0
1283 }
1284
1285 run_fio() {
1286         FIO=${FIO:=$(which fio 2> /dev/null || true)}
1287
1288         local clients=${CLIENTS:-$(hostname)}
1289         local fio_jobNum=${fio_jobNum:-4}
1290         local fio_jobFile=${fio_jobFile:-$TMP/fiojobfile.$(date +%s)}
1291         local fio_bs=${fio_bs:-1}
1292         local testdir=$DIR/d0.fio
1293         local file=${testdir}/fio
1294         local runtime=60
1295         local propagate=false
1296
1297         [ "$SLOW" = "no" ] || runtime=600
1298
1299         [ x$FIO = x ] && skip_env "FIO not found"
1300
1301         test_mkdir $testdir
1302         setstripe_getstripe $testdir $fio_STRIPEPARAMS
1303
1304         # use fio job file if exists,
1305         # create a simple one if missing
1306         if ! [ -f $fio_jobFile ]; then
1307                 cat >> $fio_jobFile <<EOF
1308 [global]
1309 rw=randwrite
1310 size=128m
1311 time_based=1
1312 runtime=$runtime
1313 filename=${file}_\$(hostname)
1314 EOF
1315                 # bs size increased by $i for each job
1316                 for ((i=1; i<=fio_jobNum; i++)); do
1317                         cat >> $fio_jobFile <<EOF
1318
1319 [job$i]
1320 bs=$(( fio_bs * i ))m
1321 EOF
1322                 done
1323                 # job file is created, should be propagated to all clients
1324                 propagate=true
1325         fi
1326
1327
1328         # propagate the job file if not all clients have it yet or
1329         # if the job file was created during the test run
1330         if ! do_nodesv $clients " [ -f $fio_jobFile ] " ||
1331            $propagate; then
1332                 local cfg=$(cat $fio_jobFile)
1333                 do_nodes $clients "echo \\\"$cfg\\\" > ${fio_jobFile}" ||
1334                         error "job file $fio_jobFile is not propagated"
1335                 do_nodesv $clients "cat ${fio_jobFile}"
1336         fi
1337
1338         cmd="$FIO $fio_jobFile"
1339         echo "+ $cmd"
1340
1341         log "clients: $clients $cmd"
1342
1343         local rc=0
1344         do_nodesv $clients "$cmd "
1345         rc=$?
1346
1347         [ $rc = 0 ] || error "fio failed: $rc"
1348         rm -rf $testdir
1349 }
1350
1351 run_xdd() {
1352         XDD=${XDD:=$(which xdd 2> /dev/null || true)}
1353
1354         local clients=${CLIENTS:-$(hostname)}
1355         local testdir=$DIR/d0.xdd
1356         xdd_queuedepth=${xdd_queuedepth:-4}
1357         xdd_blocksize=${xdd_blocksize:-512}
1358         xdd_reqsize=${xdd_reqsize:-128}
1359         xdd_mbytes=${xdd_mbytes:-100}
1360         xdd_passes=${xdd_passes:-40}
1361         xdd_rwratio=${xdd_rwratio:-0}
1362         xdd_ntargets=${xdd_ntargets:-6}
1363         local xdd_custom_params=${xdd_custom_params:-"-dio -stoponerror \
1364                 -maxpri -minall -noproclock -nomemlock"}
1365
1366         [ x$XDD = x ] && skip "XDD not found"
1367
1368         print_opts XDD clients xdd_queuedepth xdd_blocksize xdd_reqsize \
1369                 xdd_mbytes xdd_passes xdd_rwratio
1370
1371         test_mkdir $testdir
1372         setstripe_getstripe $testdir $xdd_STRIPEPARAMS
1373
1374         local files=""
1375         # Target files creates based on the given number of targets
1376         for (( i=0; i < $xdd_ntargets; i++ ))
1377         do
1378                 files+="${testdir}/xdd"$i" "
1379         done
1380
1381         # -targets      specifies the devices or files to perform operation
1382         # -reqsize      number of 'blocks' per operation
1383         # -mbytes       number of 1024*1024-byte blocks to transfer
1384         # -blocksize    size of a single 'block'
1385         # -passes       number of times to read mbytes
1386         # -queuedepth   number of commands to queue on the target
1387         # -rwratio      percentage of read to write operations
1388         # -verbose      will print out statistics on each pass
1389
1390         local cmd="$XDD -targets $xdd_ntargets $files -reqsize $xdd_reqsize \
1391                 -mbytes $xdd_mbytes -blocksize $xdd_blocksize \
1392                 -passes $xdd_passes -queuedepth $xdd_queuedepth \
1393                 -rwratio $xdd_rwratio -verbose $xdd_custom_params"
1394         echo "+ $cmd"
1395
1396         local rc=0
1397         do_nodesv $clients "$cmd "
1398         rc=$?
1399
1400         [ $rc = 0 ] || error "xdd failed: $rc"
1401
1402         rm -rf $testdir
1403 }
1404
1405 client_load_mkdir () {
1406         local dir=$1
1407         local parent=$(dirname $dir)
1408
1409         local mdtcount=$($LFS df $parent 2> /dev/null | grep -c MDT)
1410         if [ $mdtcount -le 1 ] || ! is_lustre ${parent}; then
1411                 mkdir $dir || return 1
1412                 return 0
1413         else
1414                 mdt_idx=$((RANDOM % mdtcount))
1415                 if $RECOVERY_SCALE_ENABLE_STRIPED_DIRS; then
1416                         # stripe_count in range [1,mdtcount]
1417                         # $LFS mkdir treats stripe_count 0 and 1 the same
1418                         stripe_count_opt="-c$((RANDOM % mdtcount + 1))"
1419                 else
1420                         stripe_count_opt=""
1421                 fi
1422         fi
1423
1424         if $RECOVERY_SCALE_ENABLE_REMOTE_DIRS ||
1425            $RECOVERY_SCALE_ENABLE_STRIPED_DIRS; then
1426                 $LFS mkdir -i$mdt_idx $stripe_count_opt $dir ||
1427                         return 1
1428         else
1429                 mkdir $dir || return 1
1430         fi
1431         $LFS getdirstripe $dir || return 1
1432
1433         if [ -n "$client_load_SETSTRIPEPARAMS" ]; then
1434                 $LFS setstripe $client_load_SETSTRIPEPARAMS $dir ||
1435                 return 1
1436         fi
1437         $LFS getstripe $dir || return 1
1438 }
1439
1440 enospc_detected () {
1441         grep "No space left on device" $1 | grep -qv grep
1442 }