lustre/tests/parallel-scale.sh

   1 #!/bin/bash
   2 #
   3 #set -vx
   4
   5 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
   6 . $LUSTRE/tests/test-framework.sh
   7 init_test_env $@
   8 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
   9 init_logging
  10
  11 #              bug 20670
  12 ALWAYS_EXCEPT="parallel_grouplock $PARALLEL_SCALE_EXCEPT"
  13
  14 # common setup
  15 #
  16 MACHINEFILE=${MACHINEFILE:-$TMP/$(basename $0 .sh).machines}
  17 clients=${CLIENTS:-$HOSTNAME}
  18 generate_machine_file $clients $MACHINEFILE || error "Failed to generate machine file"
  19 num_clients=$(get_node_count ${clients//,/ })
  20
  21
  22 # compilbench
  23 #
  24 cbench_DIR=${cbench_DIR:-""}
  25 cbench_IDIRS=${cbench_IDIRS:-4}
  26 cbench_RUNS=${cbench_RUNS:-4}   # FIXME: wiki page requirements is 30, do we really need 30 ?
  27
  28 if [ "$SLOW" = "no" ]; then
  29     cbench_IDIRS=2
  30     cbench_RUNS=2
  31 fi
  32
  33 #
  34 # metabench
  35 #
  36 METABENCH=${METABENCH:-$(which metabench 2> /dev/null || true)}
  37 mbench_NFILES=${mbench_NFILES:-30400}
  38 [ "$SLOW" = "no" ] && mbench_NFILES=10000
  39 # threads per client
  40 mbench_THREADS=${mbench_THREADS:-4}
  41
  42 #
  43 # simul
  44 #
  45 SIMUL=${SIMUL:=$(which simul 2> /dev/null || true)}
  46 # threads per client
  47 simul_THREADS=${simul_THREADS:-2}
  48 simul_REP=${simul_REP:-20}
  49 [ "$SLOW" = "no" ] && simul_REP=2
  50
  51 #
  52 # mib
  53 #
  54 MIB=${MIB:=$(which mib 2> /dev/null || true)}
  55 # threads per client
  56 mib_THREADS=${mib_THREADS:-2}
  57 mib_xferSize=${mib_xferSize:-1m}
  58 mib_xferLimit=${mib_xferLimit:-5000}
  59 mib_timeLimit=${mib_timeLimit:-300}
  60
  61 #
  62 # MDTEST
  63 #
  64 MDTEST=${MDTEST:=$(which mdtest 2> /dev/null || true)}
  65 # threads per client
  66 mdtest_THREADS=${mdtest_THREADS:-2}
  67 mdtest_nFiles=${mdtest_nFiles:-"100000"}
  68 # We devide the files by number of core
  69 mdtest_nFiles=$((mdtest_nFiles/mdtest_THREADS/num_clients))
  70 mdtest_iteration=${mdtest_iteration:-1}
  71
  72 #
  73 # connectathon
  74 #
  75 cnt_DIR=${cnt_DIR:-""}
  76 cnt_NRUN=${cnt_NRUN:-10}
  77 [ "$SLOW" = "no" ] && cnt_NRUN=2
  78
  79 #
  80 # cascading rw
  81 #
  82 CASC_RW=${CASC_RW:-$(which cascading_rw 2> /dev/null || true)}
  83 # threads per client
  84 casc_THREADS=${casc_THREADS:-2}
  85 casc_REP=${casc_REP:-300}
  86 [ "$SLOW" = "no" ] && casc_REP=10
  87
  88 #
  89 # IOR
  90 #
  91 IOR=${IOR:-$(which IOR 2> /dev/null || true)}
  92 # threads per client
  93 ior_THREADS=${ior_THREADS:-2}
  94 ior_iteration=${ior_iteration:-1}
  95 ior_blockSize=${ior_blockSize:-6}       # Gb
  96 ior_xferSize=${ior_xferSize:-2m}
  97 ior_type=${ior_type:-POSIX}
  98 ior_DURATION=${ior_DURATION:-30}        # minutes
  99 [ "$SLOW" = "no" ] && ior_DURATION=5
 100
 101 #
 102 # write_append_truncate
 103 #
 104 # threads per client
 105 write_THREADS=${write_THREADS:-8}
 106 write_REP=${write_REP:-10000}
 107 [ "$SLOW" = "no" ] && write_REP=100
 108
 109 #
 110 # write_disjoint
 111 #
 112 WRITE_DISJOINT=${WRITE_DISJOINT:-$(which write_disjoint 2> /dev/null || true)}
 113 # threads per client
 114 wdisjoint_THREADS=${wdisjoint_THREADS:-4}
 115 wdisjoint_REP=${wdisjoint_REP:-10000}
 116 [ "$SLOW" = "no" ] && wdisjoint_REP=100
 117
 118 #
 119 # parallel_grouplock
 120 #
 121 #
 122 PARALLEL_GROUPLOCK=${PARALLEL_GROUPLOCK:-$(which parallel_grouplock 2> /dev/null || true)}
 123 parallel_grouplock_MINTASKS=${parallel_grouplock_MINTASKS:-5}
 124
 125 build_test_filter
 126 check_and_setup_lustre
 127
 128 get_mpiuser_id $MPI_USER
 129 MPI_RUNAS=${MPI_RUNAS:-"runas -u $MPI_USER_UID -g $MPI_USER_GID"}
 130 $GSS_KRB5 && refresh_krb5_tgt $MPI_USER_UID $MPI_USER_GID $MPI_RUNAS
 131
 132
 133 print_opts () {
 134     local var
 135
 136     echo OPTIONS:
 137
 138     for i in $@; do
 139         var=$i
 140         echo "${var}=${!var}"
 141     done
 142     [ -e $MACHINEFILE ] && cat $MACHINEFILE
 143 }
 144
 145 # Takes:
 146 # 5 min * cbench_RUNS
 147 #        SLOW=no     10 mins
 148 #        SLOW=yes    50 mins
 149 # Space estimation:
 150 #        compile dir kernel-1 680MB
 151 #        required space       680MB * cbench_IDIRS = ~7 Gb
 152
 153 test_compilebench() {
 154     print_opts cbench_DIR cbench_IDIRS cbench_RUNS
 155
 156     [ x$cbench_DIR = x ] &&
 157         { skip_env "compilebench not found" && return; }
 158
 159     [ -e $cbench_DIR/compilebench ] || \
 160         { skip_env "No compilebench build" && return; }
 161
 162     local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
 163     if [ $space -le $((680 * 1024 * cbench_IDIRS)) ]; then
 164         cbench_IDIRS=$(( space / 680 / 1024))
 165         [ $cbench_IDIRS = 0 ] && \
 166             skip_env "Need free space atleast 680 Mb, have $space" && return
 167
 168         log free space=$space, reducing initial dirs to $cbench_IDIRS
 169     fi
 170     # FIXME:
 171     # t-f _base needs to be modifyed to set properly tdir
 172     # for new "test_foo" functions names
 173     # local testdir=$DIR/$tdir
 174     local testdir=$DIR/d0.compilebench
 175     mkdir -p $testdir
 176
 177     local savePWD=$PWD
 178     cd $cbench_DIR
 179     local cmd="./compilebench -D $testdir -i $cbench_IDIRS -r $cbench_RUNS --makej"
 180
 181     log "$cmd"
 182
 183     local rc=0
 184     eval $cmd
 185     rc=$?
 186
 187     cd $savePWD
 188     [ $rc = 0 ] || error "compilebench failed: $rc"
 189     rm -rf $testdir
 190 }
 191 run_test compilebench "compilebench"
 192
 193 test_metabench() {
 194     [ x$METABENCH = x ] &&
 195         { skip_env "metabench not found" && return; }
 196
 197     # FIXME
 198     # Need space estimation here.
 199
 200     print_opts METABENCH clients mbench_NFILES mbench_THREADS
 201
 202     local testdir=$DIR/d0.metabench
 203     mkdir -p $testdir
 204     # mpi_run uses mpiuser
 205     chmod 0777 $testdir
 206
 207     # -C             Run the file creation tests.
 208     # -S             Run the file stat tests.
 209     # -c nfile       Number of files to be used in each test.
 210     # -k             Cleanup.  Remove the test directories.
 211     local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S -k"
 212     echo "+ $cmd"
 213
 214     # find out if we need to use srun by checking $SRUN_PARTITION
 215     if [ "$SRUN_PARTITION" ]; then
 216         $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 217             -n $((num_clients * mbench_THREADS)) -p $SRUN_PARTITION -- $cmd
 218     else
 219         mpi_run -np $((num_clients * $mbench_THREADS)) -machinefile ${MACHINEFILE} $cmd
 220     fi
 221
 222     local rc=$?
 223     if [ $rc != 0 ] ; then
 224         error "metabench failed! $rc"
 225     fi
 226     rm -rf $testdir
 227 }
 228 run_test metabench "metabench"
 229
 230 test_simul() {
 231     if [ "$NFSCLIENT" ]; then
 232         skip "skipped for NFSCLIENT mode"
 233         return
 234     fi
 235
 236     [ x$SIMUL = x ] &&
 237         { skip_env "simul not found" && return; }
 238
 239     # FIXME
 240     # Need space estimation here.
 241
 242     print_opts SIMUL clients simul_REP simul_THREADS
 243
 244     local testdir=$DIR/d0.simul
 245     mkdir -p $testdir
 246     # mpi_run uses mpiuser
 247     chmod 0777 $testdir
 248
 249     # -n # : repeat each test # times
 250     # -N # : repeat the entire set of tests # times
 251
 252     local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP"
 253
 254     echo "+ $cmd"
 255     # find out if we need to use srun by checking $SRUN_PARTITION
 256     if [ "$SRUN_PARTITION" ]; then
 257         $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 258             -n $((num_clients * simul_THREADS)) -p $SRUN_PARTITION -- $cmd
 259     else
 260         mpi_run -np $((num_clients * simul_THREADS)) -machinefile ${MACHINEFILE} $cmd
 261     fi
 262
 263     local rc=$?
 264     if [ $rc != 0 ] ; then
 265         error "simul failed! $rc"
 266     fi
 267     rm -rf $testdir
 268 }
 269 run_test simul "simul"
 270
 271 test_mdtest() {
 272     local type=${1:-"ssf"}
 273
 274     if [ "$NFSCLIENT" ]; then
 275         skip "skipped for NFSCLIENT mode"
 276         return
 277     fi
 278
 279     [ x$MDTEST = x ] &&
 280         { skip_env "mdtest not found" && return; }
 281
 282     # FIXME
 283     # Need space estimation here.
 284
 285     print_opts MDTEST mdtest_iteration mdtest_THREADS mdtest_nFiles
 286
 287     local testdir=$DIR/d0.mdtest
 288     mkdir -p $testdir
 289     # mpi_run uses mpiuser
 290     chmod 0777 $testdir
 291
 292     # -i # : repeat each test # times
 293     # -d   : test dir
 294     # -n # : number of file/dir to create/stat/remove
 295     # -u   : each process create/stat/remove individually
 296
 297     local cmd="$MDTEST -d $testdir -i $mdtest_iteration -n $mdtest_nFiles"
 298     [ $type = "fpp" ] && cmd="$cmd -u"
 299
 300     echo "+ $cmd"
 301     # find out if we need to use srun by checking $SRUN_PARTITION
 302     if [ "$SRUN_PARTITION" ]; then
 303         $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 304             -n $((num_clients * mdtest_THREADS)) -p $SRUN_PARTITION -- $cmd
 305     else
 306         mpi_run -np $((num_clients * mdtest_THREADS)) -machinefile ${MACHINEFILE} $cmd
 307     fi
 308
 309     local rc=$?
 310     if [ $rc != 0 ] ; then
 311         error "mdtest failed! $rc"
 312     fi
 313     rm -rf $testdir
 314 }
 315
 316 test_mdtestssf() {
 317     test_mdtest "ssf"
 318 }
 319 run_test mdtestssf "mdtestssf"
 320
 321 test_mdtestfpp() {
 322     test_mdtest "fpp"
 323 }
 324 run_test mdtestfpp "mdtestfpp"
 325
 326 test_connectathon() {
 327     print_opts cnt_DIR cnt_NRUN
 328
 329     [ x$cnt_DIR = x ] &&
 330         { skip_env "connectathon dir not found" && return; }
 331
 332     [ -e $cnt_DIR/runtests ] || \
 333         { skip_env "No connectathon runtests found" && return; }
 334
 335     local testdir=$DIR/d0.connectathon
 336     mkdir -p $testdir
 337
 338     local savePWD=$PWD
 339     cd $cnt_DIR
 340
 341     #
 342     # cthon options (must be in this order)
 343     #
 344     # -N numpasses - will be passed to the runtests script.  This argument
 345     #         is optional.  It specifies the number of times to run
 346     #         through the tests.
 347     #
 348     # One of these test types
 349     #    -b  basic
 350     #    -g  general
 351     #    -s  special
 352     #    -l  lock
 353     #    -a  all of the above
 354     #
 355     # -f      a quick functionality test
 356     #
 357
 358     tests="-b -g -s"
 359     # Include lock tests unless we're running on nfsv4
 360     local fstype=$(df -TP $testdir | awk 'NR==2  {print $2}')
 361     echo "$testdir: $fstype"
 362     if [[ $fstype != "nfs4" ]]; then
 363         tests="$tests -l"
 364     fi
 365     echo "tests: $tests"
 366     for test in $tests; do
 367         local cmd="./runtests -N $cnt_NRUN $test -f $testdir"
 368         local rc=0
 369
 370         log "$cmd"
 371         eval $cmd
 372         rc=$?
 373         [ $rc = 0 ] || error "connectathon failed: $rc"
 374     done
 375
 376     cd $savePWD
 377     rm -rf $testdir
 378 }
 379 run_test connectathon "connectathon"
 380
 381 test_ior() {
 382     local type=${1:="ssf"}
 383
 384     [ x$IOR = x ] &&
 385         { skip_env "IOR not found" && return; }
 386
 387     local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
 388     echo "+ $ior_blockSize * 1024 * 1024 * $num_clients * $ior_THREADS "
 389     if [ $((space / 2)) -le $(( ior_blockSize * 1024 * 1024 * num_clients * ior_THREADS)) ]; then
 390         echo "+ $space * 9/10 / 1024 / 1024 / $num_clients / $ior_THREADS"
 391         ior_blockSize=$(( space /2 /1024 /1024 / num_clients / ior_THREADS ))
 392         [ $ior_blockSize = 0 ] && \
 393             skip_env "Need free space more than ($num_clients * $ior_THREADS )Gb: $((num_clients*ior_THREADS *1024 *1024*2)), have $space" && return
 394
 395         echo "free space=$space, Need: $num_clients x $ior_THREADS x $ior_blockSize Gb (blockSize reduced to $ior_blockSize Gb)"
 396     fi
 397
 398     print_opts IOR ior_THREADS ior_DURATION MACHINEFILE
 399
 400     local testdir=$DIR/d0.ior
 401     mkdir -p $testdir
 402     # mpi_run uses mpiuser
 403     chmod 0777 $testdir
 404     if [ "$NFSCLIENT" ]; then
 405         setstripe_nfsserver $testdir -c -1 ||
 406             { error "setstripe on nfsserver failed" && return 1; }
 407     else
 408         $LFS setstripe $testdir -c -1 ||
 409             { error "setstripe failed" && return 2; }
 410     fi
 411     #
 412     # -b N  blockSize -- contiguous bytes to write per task  (e.g.: 8, 4k, 2m, 1g)"
 413     # -o S  testFileName
 414     # -t N  transferSize -- size of transfer in bytes (e.g.: 8, 4k, 2m, 1g)"
 415     # -w    writeFile -- write file"
 416     # -r    readFile -- read existing file"
 417     # -T    maxTimeDuration -- max time in minutes to run tests"
 418     # -k    keepFile -- keep testFile(s) on program exit
 419
 420     local cmd="$IOR -a $ior_type -b ${ior_blockSize}g -o $testdir/iorData -t $ior_xferSize -v -w -r -i $ior_iteration -T $ior_DURATION -k"
 421     [ $type = "fpp" ] && cmd="$cmd -F"
 422
 423     echo "+ $cmd"
 424     # find out if we need to use srun by checking $SRUN_PARTITION
 425     if [ "$SRUN_PARTITION" ]; then
 426         $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 427             -n $((num_clients * ior_THREADS)) -p $SRUN_PARTITION -- $cmd
 428     else
 429         mpi_run -np $((num_clients * $ior_THREADS)) -machinefile ${MACHINEFILE} $cmd
 430     fi
 431
 432     local rc=$?
 433     if [ $rc != 0 ] ; then
 434         error "ior failed! $rc"
 435     fi
 436     rm -rf $testdir
 437 }
 438
 439 test_iorssf() {
 440     test_ior "ssf"
 441 }
 442 run_test iorssf "iorssf"
 443
 444 test_iorfpp() {
 445     test_ior "fpp"
 446 }
 447 run_test iorfpp "iorfpp"
 448
 449 test_mib() {
 450     if [ "$NFSCLIENT" ]; then
 451         skip "skipped for NFSCLIENT mode"
 452         return
 453     fi
 454
 455     [ x$MIB = x ] &&
 456         { skip_env "MIB not found" && return; }
 457
 458     print_opts MIB mib_THREADS mib_xferSize mib_xferLimit mib_timeLimit MACHINEFILE
 459
 460     local testdir=$DIR/d0.mib
 461     mkdir -p $testdir
 462     # mpi_run uses mpiuser
 463     chmod 0777 $testdir
 464     $LFS setstripe $testdir -c -1 ||
 465         { error "setstripe failed" && return 2; }
 466     #
 467     # -I    Show intermediate values in output
 468     # -H    Show headers in output
 469     # -L    Do not issue new system calls after this many seconds
 470     # -s    Use system calls of this size
 471     # -t    test dir
 472     # -l    Issue no more than this many system calls
 473     local cmd="$MIB -t $testdir -s $mib_xferSize -l $mib_xferLimit -L $mib_timeLimit -HI -p mib.$(date +%Y%m%d%H%M%S)"
 474
 475     echo "+ $cmd"
 476     # find out if we need to use srun by checking $SRUN_PARTITION
 477     if [ "$SRUN_PARTITION" ]; then
 478         $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 479             -n $((num_clients * mib_THREADS)) -p $SRUN_PARTITION -- $cmd
 480     else
 481         mpi_run -np $((num_clients * mib_THREADS)) -machinefile ${MACHINEFILE} $cmd
 482     fi
 483
 484     local rc=$?
 485     if [ $rc != 0 ] ; then
 486         error "mib failed! $rc"
 487     fi
 488     rm -rf $testdir
 489 }
 490 run_test mib "mib"
 491
 492 test_cascading_rw() {
 493     if [ "$NFSCLIENT" ]; then
 494         skip "skipped for NFSCLIENT mode"
 495         return
 496     fi
 497
 498     [ x$CASC_RW = x ] &&
 499         { skip_env "cascading_rw not found" && return; }
 500
 501     # FIXME
 502     # Need space estimation here.
 503
 504     print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE
 505
 506     local testdir=$DIR/d0.cascading_rw
 507     mkdir -p $testdir
 508     # mpi_run uses mpiuser
 509     chmod 0777 $testdir
 510
 511     # -g: debug mode
 512     # -n: repeat test # times
 513
 514     local cmd="$CASC_RW -g -d $testdir -n $casc_REP"
 515
 516     echo "+ $cmd"
 517     mpi_run -np $((num_clients * $casc_THREADS)) -machinefile ${MACHINEFILE} $cmd
 518
 519     local rc=$?
 520     if [ $rc != 0 ] ; then
 521         error "cascading_rw failed! $rc"
 522     fi
 523     rm -rf $testdir
 524 }
 525 run_test cascading_rw "cascading_rw"
 526
 527 test_write_append_truncate() {
 528     if [ "$NFSCLIENT" ]; then
 529         skip "skipped for NFSCLIENT mode"
 530         return
 531     fi
 532
 533     # location is lustre/tests dir
 534     if ! which write_append_truncate > /dev/null 2>&1 ; then
 535         skip_env "write_append_truncate not found"
 536         return
 537     fi
 538
 539     # FIXME
 540     # Need space estimation here.
 541
 542     local testdir=$DIR/d0.write_append_truncate
 543     local file=$testdir/f0.wat
 544
 545     print_opts clients write_REP write_THREADS MACHINEFILE
 546
 547     mkdir -p $testdir
 548     # mpi_run uses mpiuser
 549     chmod 0777 $testdir
 550
 551     local cmd="write_append_truncate -n $write_REP $file"
 552
 553     echo "+ $cmd"
 554     mpi_run -np $((num_clients * $write_THREADS)) -machinefile ${MACHINEFILE} $cmd
 555
 556     local rc=$?
 557     if [ $rc != 0 ] ; then
 558         error "write_append_truncate failed! $rc"
 559         return $rc
 560     fi
 561     rm -rf $testdir
 562 }
 563 run_test write_append_truncate "write_append_truncate"
 564
 565 test_write_disjoint() {
 566     if [ "$NFSCLIENT" ]; then
 567         skip "skipped for NFSCLIENT mode"
 568         return
 569     fi
 570
 571     [ x$WRITE_DISJOINT = x ] &&
 572         { skip_env "write_disjoint not found" && return; }
 573
 574     # FIXME
 575     # Need space estimation here.
 576
 577     print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP MACHINEFILE
 578     local testdir=$DIR/d0.write_disjoint
 579     mkdir -p $testdir
 580     # mpi_run uses mpiuser
 581     chmod 0777 $testdir
 582
 583     local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP"
 584
 585     echo "+ $cmd"
 586     mpi_run -np $((num_clients * $wdisjoint_THREADS)) -machinefile ${MACHINEFILE} $cmd
 587
 588     local rc=$?
 589     if [ $rc != 0 ] ; then
 590         error "write_disjoint failed! $rc"
 591     fi
 592     rm -rf $testdir
 593 }
 594 run_test write_disjoint "write_disjoint"
 595
 596 test_parallel_grouplock() {
 597     if [ "$NFSCLIENT" ]; then
 598         skip "skipped for NFSCLIENT mode"
 599         return
 600     fi
 601
 602     [ x$PARALLEL_GROUPLOCK = x ] &&
 603         { skip "PARALLEL_GROUPLOCK not found" && return; }
 604
 605     print_opts clients parallel_grouplock_MINTASKS MACHINEFILE
 606
 607     local testdir=$DIR/d0.parallel_grouplock
 608     mkdir -p $testdir
 609     # mpi_run uses mpiuser
 610     chmod 0777 $testdir
 611
 612     do_nodes $clients "lctl set_param llite.*.max_rw_chunk=0" ||
 613         error "set_param max_rw_chunk=0 failed "
 614
 615     local cmd
 616     local status=0
 617     local subtest
 618     for i in $(seq 12); do
 619         subtest="-t $i"
 620         local cmd="$PARALLEL_GROUPLOCK -g -v -d $testdir $subtest"
 621         echo "+ $cmd"
 622
 623         mpi_run -np $parallel_grouplock_MINTASKS -machinefile ${MACHINEFILE} $cmd
 624         local rc=$?
 625         if [ $rc != 0 ] ; then
 626             error_noexit "parallel_grouplock subtests $subtest failed! $rc"
 627         else
 628             echo "parallel_grouplock subtests $subtest PASS"
 629         fi
 630         let status=$((status + rc))
 631         # clear debug to collect one log per one test
 632         do_nodes $(comma_list $(nodes_list)) lctl clear
 633      done
 634     [ $status -eq 0 ] || error "parallel_grouplock status: $status"
 635     rm -rf $testdir
 636 }
 637 run_test parallel_grouplock "parallel_grouplock"
 638
 639 statahead_NUMMNTPTS=${statahead_NUMMNTPTS:-5}
 640 statahead_NUMFILES=${statahead_NUMFILES:-500000}
 641
 642 cleanup_statahead () {
 643     trap 0
 644
 645     local clients=$1
 646     local mntpt_root=$2
 647     local num_mntpts=$3
 648
 649     for i in $(seq 0 $num_mntpts);do
 650         zconf_umount_clients $clients ${mntpt_root}$i ||
 651             error_exit "Failed to umount lustre on ${mntpt_root}$i"
 652     done
 653 }
 654
 655 test_statahead () {
 656     if [[ -n $NFSCLIENT ]]; then
 657         skip "Statahead testing is not supported on NFS clients."
 658         return 0
 659     fi
 660
 661     [ x$MDSRATE = x ] &&
 662         { skip_env "mdsrate not found" && return; }
 663
 664     print_opts MDSRATE clients statahead_NUMMNTPTS statahead_NUMFILES
 665
 666     # create large dir
 667
 668     # do not use default "d[0-9]*" dir name
 669     # to avoid of rm $statahead_NUMFILES (500k) files in t-f cleanup
 670     local dir=dstatahead
 671     local testdir=$DIR/$dir
 672
 673     # cleanup only if dir exists
 674     # cleanup only $statahead_NUMFILES number of files
 675     # ignore the other files created by someone else
 676     [ -d $testdir ] &&
 677         mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE $statahead_NUMFILES $testdir 'f%%d' --ignore
 678
 679     mkdir -p $testdir
 680     # mpi_run uses mpiuser
 681     chmod 0777 $testdir
 682
 683     local num_files=$statahead_NUMFILES
 684
 685     local IFree=$(inodes_available)
 686     if [ $IFree -lt $num_files ]; then
 687       num_files=$IFree
 688     fi
 689
 690     cancel_lru_locks mdc
 691
 692     local cmd="${MDSRATE} ${MDSRATE_DEBUG} --mknod --dir $testdir --nfiles $num_files --filefmt 'f%%d'"
 693     echo "+ $cmd"
 694
 695     mpi_run -np $((num_clients * 32)) -machinefile ${MACHINEFILE} $cmd
 696
 697     local rc=$?
 698     if [ $rc != 0 ] ; then
 699         error "mdsrate failed to create $rc"
 700         return $rc
 701     fi
 702
 703     local num_mntpts=$statahead_NUMMNTPTS
 704     local mntpt_root=$TMP/mntpt/lustre
 705     local mntopts=${MNTOPTSTATAHEAD:-$MOUNTOPT}
 706
 707     echo "Mounting $num_mntpts lustre clients starts on $clients"
 708     trap "cleanup_statahead $clients $mntpt_root $num_mntpts" EXIT ERR
 709     for i in $(seq 0 $num_mntpts); do
 710         zconf_mount_clients $clients ${mntpt_root}$i "$mntopts" ||
 711             error_exit "Failed to mount lustre on ${mntpt_root}$i on $clients"
 712     done
 713
 714     do_rpc_nodes $clients cancel_lru_locks mdc
 715
 716     do_rpc_nodes $clients do_ls $mntpt_root $num_mntpts $dir
 717
 718     mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE $num_files $testdir 'f%%d' --ignore
 719
 720     # use rm instead of rmdir because of
 721     # testdir could contain the files created by someone else,
 722     # or by previous run where is num_files prev > num_files current
 723     rm -rf $testdir
 724     cleanup_statahead $clients $mntpt_root $num_mntpts
 725 }
 726
 727 run_test statahead "statahead test, multiple clients"
 728
 729 complete $(basename $0) $SECONDS
 730 check_and_cleanup_lustre
 731 exit_status