lustre/tests/parallel-scale.sh

   1 #!/bin/bash
   2 #
   3 #set -vx
   4
   5 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
   6 . $LUSTRE/tests/test-framework.sh
   7 init_test_env $@
   8 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
   9 init_logging
  10
  11 # bug number:
  12 ALWAYS_EXCEPT="$PARALLEL_SCALE_EXCEPT"
  13
  14 # common setup
  15 #
  16 MACHINEFILE=${MACHINEFILE:-$TMP/$(basename $0 .sh).machines}
  17 clients=${CLIENTS:-$HOSTNAME}
  18 generate_machine_file $clients $MACHINEFILE || return $?
  19 num_clients=$(get_node_count ${clients//,/ })
  20
  21 #
  22 # compilbench
  23 #
  24 cbench_DIR=${cbench_DIR:-""}
  25 cbench_IDIRS=${cbench_IDIRS:-4}
  26 # FIXME: wiki page requirements is 30, do we really need 30 ?
  27 cbench_RUNS=${cbench_RUNS:-4}
  28
  29 if [ "$SLOW" = "no" ]; then
  30     cbench_IDIRS=2
  31     cbench_RUNS=2
  32 fi
  33
  34 #
  35 # metabench
  36 #
  37 METABENCH=${METABENCH:-$(which metabench 2> /dev/null || true)}
  38 mbench_NFILES=${mbench_NFILES:-30400}
  39 [ "$SLOW" = "no" ] && mbench_NFILES=10000
  40 # threads per client
  41 mbench_THREADS=${mbench_THREADS:-4}
  42
  43 #
  44 # simul
  45 #
  46 SIMUL=${SIMUL:=$(which simul 2> /dev/null || true)}
  47 # threads per client
  48 simul_THREADS=${simul_THREADS:-2}
  49 simul_REP=${simul_REP:-20}
  50 [ "$SLOW" = "no" ] && simul_REP=2
  51
  52 #
  53 # mib
  54 #
  55 MIB=${MIB:=$(which mib 2> /dev/null || true)}
  56 # threads per client
  57 mib_THREADS=${mib_THREADS:-2}
  58 mib_xferSize=${mib_xferSize:-1m}
  59 mib_xferLimit=${mib_xferLimit:-5000}
  60 mib_timeLimit=${mib_timeLimit:-300}
  61
  62 #
  63 # MDTEST
  64 #
  65 MDTEST=${MDTEST:=$(which mdtest 2> /dev/null || true)}
  66 # threads per client
  67 mdtest_THREADS=${mdtest_THREADS:-2}
  68 mdtest_nFiles=${mdtest_nFiles:-"100000"}
  69 # We devide the files by number of core
  70 mdtest_nFiles=$((mdtest_nFiles/mdtest_THREADS/num_clients))
  71 mdtest_iteration=${mdtest_iteration:-1}
  72
  73 #
  74 # connectathon
  75 #
  76 cnt_DIR=${cnt_DIR:-""}
  77 cnt_NRUN=${cnt_NRUN:-10}
  78 [ "$SLOW" = "no" ] && cnt_NRUN=2
  79
  80 #
  81 # cascading rw
  82 #
  83 CASC_RW=${CASC_RW:-$(which cascading_rw 2> /dev/null || true)}
  84 # threads per client
  85 casc_THREADS=${casc_THREADS:-2}
  86 casc_REP=${casc_REP:-300}
  87 [ "$SLOW" = "no" ] && casc_REP=10
  88
  89 #
  90 # IOR
  91 #
  92 IOR=${IOR:-$(which IOR 2> /dev/null || true)}
  93 # threads per client
  94 ior_THREADS=${ior_THREADS:-2}
  95 ior_iteration=${ior_iteration:-1}
  96 ior_blockSize=${ior_blockSize:-6}       # Gb
  97 ior_xferSize=${ior_xferSize:-2m}
  98 ior_type=${ior_type:-POSIX}
  99 ior_DURATION=${ior_DURATION:-30}        # minutes
 100 [ "$SLOW" = "no" ] && ior_DURATION=5
 101
 102 #
 103 # write_append_truncate
 104 #
 105 # threads per client
 106 write_THREADS=${write_THREADS:-8}
 107 write_REP=${write_REP:-10000}
 108 [ "$SLOW" = "no" ] && write_REP=100
 109
 110 #
 111 # write_disjoint
 112 #
 113 WRITE_DISJOINT=${WRITE_DISJOINT:-$(which write_disjoint 2> /dev/null || true)}
 114 # threads per client
 115 wdisjoint_THREADS=${wdisjoint_THREADS:-4}
 116 wdisjoint_REP=${wdisjoint_REP:-10000}
 117 [ "$SLOW" = "no" ] && wdisjoint_REP=100
 118
 119 #
 120 # parallel_grouplock
 121 #
 122 #
 123 PARALLEL_GROUPLOCK=${PARALLEL_GROUPLOCK:-\
 124     $(which parallel_grouplock 2> /dev/null || true)}
 125 parallel_grouplock_MINTASKS=${parallel_grouplock_MINTASKS:-5}
 126
 127 build_test_filter
 128 check_and_setup_lustre
 129
 130 print_opts () {
 131     local var
 132
 133     echo OPTIONS:
 134
 135     for i in $@; do
 136         var=$i
 137         echo "${var}=${!var}"
 138     done
 139     [ -e $MACHINEFILE ] && cat $MACHINEFILE
 140 }
 141
 142 # Takes:
 143 # 5 min * cbench_RUNS
 144 #        SLOW=no     10 mins
 145 #        SLOW=yes    50 mins
 146 # Space estimation:
 147 #        compile dir kernel-1 680MB
 148 #        required space       680MB * cbench_IDIRS = ~7 Gb
 149
 150 test_compilebench() {
 151     print_opts cbench_DIR cbench_IDIRS cbench_RUNS
 152
 153     [ x$cbench_DIR = x ] &&
 154         { skip_env "compilebench not found" && return; }
 155
 156     [ -e $cbench_DIR/compilebench ] || \
 157         { skip_env "No compilebench build" && return; }
 158
 159     local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
 160     if [ $space -le $((680 * 1024 * cbench_IDIRS)) ]; then
 161         cbench_IDIRS=$(( space / 680 / 1024))
 162         [ $cbench_IDIRS = 0 ] && \
 163             skip_env "Need free space atleast 680 Mb, have $space" && return
 164
 165         log free space=$space, reducing initial dirs to $cbench_IDIRS
 166     fi
 167     # FIXME:
 168     # t-f _base needs to be modifyed to set properly tdir
 169     # for new "test_foo" functions names
 170     # local testdir=$DIR/$tdir
 171     local testdir=$DIR/d0.compilebench
 172     mkdir -p $testdir
 173
 174     local savePWD=$PWD
 175     cd $cbench_DIR
 176     local cmd="./compilebench -D $testdir -i $cbench_IDIRS \
 177         -r $cbench_RUNS --makej"
 178
 179     log "$cmd"
 180
 181     local rc=0
 182     eval $cmd
 183     rc=$?
 184
 185     cd $savePWD
 186     [ $rc = 0 ] || error "compilebench failed: $rc"
 187     rm -rf $testdir
 188 }
 189 run_test compilebench "compilebench"
 190
 191 test_metabench() {
 192     [ x$METABENCH = x ] &&
 193         { skip_env "metabench not found" && return; }
 194
 195     # FIXME
 196     # Need space estimation here.
 197
 198     print_opts METABENCH clients mbench_NFILES mbench_THREADS
 199
 200     local testdir=$DIR/d0.metabench
 201     mkdir -p $testdir
 202     # mpi_run uses mpiuser
 203     chmod 0777 $testdir
 204
 205     # -C             Run the file creation tests.
 206     # -S             Run the file stat tests.
 207     # -c nfile       Number of files to be used in each test.
 208     # -k             Cleanup.  Remove the test directories.
 209     local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S -k"
 210     echo "+ $cmd"
 211
 212     # find out if we need to use srun by checking $SRUN_PARTITION
 213     if [ "$SRUN_PARTITION" ]; then
 214         $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 215             -n $((num_clients * mbench_THREADS)) -p $SRUN_PARTITION -- $cmd
 216     else
 217         mpi_run -np $((num_clients * $mbench_THREADS)) \
 218             -machinefile ${MACHINEFILE} $cmd
 219     fi
 220
 221     local rc=$?
 222     if [ $rc != 0 ] ; then
 223         error "metabench failed! $rc"
 224     fi
 225     rm -rf $testdir
 226 }
 227 run_test metabench "metabench"
 228
 229 test_simul() {
 230     if [ "$NFSCLIENT" ]; then
 231         skip "skipped for NFSCLIENT mode"
 232         return
 233     fi
 234
 235     [ x$SIMUL = x ] &&
 236         { skip_env "simul not found" && return; }
 237
 238     # FIXME
 239     # Need space estimation here.
 240
 241     print_opts SIMUL clients simul_REP simul_THREADS
 242
 243     local testdir=$DIR/d0.simul
 244     mkdir -p $testdir
 245     # mpi_run uses mpiuser
 246     chmod 0777 $testdir
 247
 248     # -n # : repeat each test # times
 249     # -N # : repeat the entire set of tests # times
 250
 251     local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP"
 252
 253     echo "+ $cmd"
 254
 255     # find out if we need to use srun by checking $SRUN_PARTITION
 256     if [ "$SRUN_PARTITION" ]; then
 257         $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 258             -n $((num_clients * simul_THREADS)) -p $SRUN_PARTITION -- $cmd
 259     else
 260         mpi_run -np $((num_clients * $simul_THREADS)) \
 261             -machinefile ${MACHINEFILE} $cmd
 262     fi
 263
 264     local rc=$?
 265     if [ $rc != 0 ] ; then
 266         error "simul failed! $rc"
 267     fi
 268     rm -rf $testdir
 269 }
 270 run_test simul "simul"
 271
 272 test_mdtest() {
 273     local type=${1:-"ssf"}
 274
 275     if [ "$NFSCLIENT" ]; then
 276         skip "skipped for NFSCLIENT mode"
 277         return
 278     fi
 279
 280     [ x$MDTEST = x ] &&
 281         { skip_env "mdtest not found" && return; }
 282
 283     # FIXME
 284     # Need space estimation here.
 285
 286     print_opts MDTEST mdtest_iteration mdtest_THREADS mdtest_nFiles
 287
 288     local testdir=$DIR/d0.mdtest
 289     mkdir -p $testdir
 290     # mpi_run uses mpiuser
 291     chmod 0777 $testdir
 292
 293     # -i # : repeat each test # times
 294     # -d   : test dir
 295     # -n # : number of file/dir to create/stat/remove
 296     # -u   : each process create/stat/remove individually
 297
 298     local cmd="$MDTEST -d $testdir -i $mdtest_iteration -n $mdtest_nFiles"
 299     [ $type = "fpp" ] && cmd="$cmd -u"
 300
 301     echo "+ $cmd"
 302     # find out if we need to use srun by checking $SRUN_PARTITION
 303     if [ "$SRUN_PARTITION" ]; then
 304         $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 305             -n $((num_clients * mdtest_THREADS)) -p $SRUN_PARTITION -- $cmd
 306     else
 307         mpi_run -np $((num_clients * mdtest_THREADS)) \
 308             -machinefile ${MACHINEFILE} $cmd
 309     fi
 310
 311     local rc=$?
 312     if [ $rc != 0 ] ; then
 313         error "mdtest failed! $rc"
 314     fi
 315     rm -rf $testdir
 316 }
 317
 318 test_mdtestssf() {
 319     test_mdtest "ssf"
 320 }
 321 run_test mdtestssf "mdtestssf"
 322
 323 test_mdtestfpp() {
 324     test_mdtest "fpp"
 325 }
 326 run_test mdtestfpp "mdtestfpp"
 327
 328 test_connectathon() {
 329     print_opts cnt_DIR cnt_NRUN
 330
 331     [ x$cnt_DIR = x ] &&
 332         { skip_env "connectathon dir not found" && return; }
 333
 334     [ -e $cnt_DIR/runtests ] || \
 335         { skip_env "No connectathon runtests found" && return; }
 336
 337     local testdir=$DIR/d0.connectathon
 338     mkdir -p $testdir
 339
 340     local savePWD=$PWD
 341     cd $cnt_DIR
 342
 343     #
 344     # cthon options (must be in this order)
 345     #
 346     # -N numpasses - will be passed to the runtests script.  This argument
 347     #         is optional.  It specifies the number of times to run
 348     #         through the tests.
 349     #
 350     # One of these test types
 351     #    -b  basic
 352     #    -g  general
 353     #    -s  special
 354     #    -l  lock
 355     #    -a  all of the above
 356     #
 357     # -f      a quick functionality test
 358     #
 359
 360     tests="-b -g -s"
 361     # Include lock tests unless we're running on nfsv4
 362     local fstype=$(df -TP $testdir | awk 'NR==2  {print $2}')
 363     echo "$testdir: $fstype"
 364     if [[ $fstype != "nfs4" ]]; then
 365         tests="$tests -l"
 366     fi
 367     echo "tests: $tests"
 368     for test in $tests; do
 369         local cmd="./runtests -N $cnt_NRUN $test -f $testdir"
 370         local rc=0
 371
 372         log "$cmd"
 373         eval $cmd
 374         rc=$?
 375         [ $rc = 0 ] || error "connectathon failed: $rc"
 376     done
 377
 378     cd $savePWD
 379     rm -rf $testdir
 380 }
 381 run_test connectathon "connectathon"
 382
 383 test_ior() {
 384     local type=${1:="ssf"}
 385
 386     [ x$IOR = x ] &&
 387         { skip_env "IOR not found" && return; }
 388
 389     local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
 390     local total_threads=$(( num_clients * ior_THREADS ))
 391     echo "+ $ior_blockSize * 1024 * 1024 * $total_threads "
 392     if [ $((space / 2)) -le \
 393         $(( ior_blockSize * 1024 * 1024 * total_threads)) ]; then
 394         echo "+ $space * 9/10 / 1024 / 1024 / $num_clients / $ior_THREADS"
 395         ior_blockSize=$(( space /2 /1024 /1024 / num_clients / ior_THREADS ))
 396         [ $ior_blockSize = 0 ] && \
 397             skip_env "Need free space more than $((2 * total_threads))GB: \
 398                 $((total_threads *1024 *1024*2)), have $space" && return
 399
 400         local reduced_size="$num_clients x $ior_THREADS x $ior_blockSize"
 401         echo "free space=$space, Need: $reduced_size GB"
 402         echo "(blockSize reduced to $ior_blockSize GB)"
 403     fi
 404
 405     print_opts IOR ior_THREADS ior_DURATION MACHINEFILE
 406
 407     local testdir=$DIR/d0.ior
 408     mkdir -p $testdir
 409     # mpi_run uses mpiuser
 410     chmod 0777 $testdir
 411     if [ "$NFSCLIENT" ]; then
 412         setstripe_nfsserver $testdir -c -1 ||
 413             { error "setstripe on nfsserver failed" && return 1; }
 414     else
 415         $LFS setstripe $testdir -c -1 ||
 416             { error "setstripe failed" && return 2; }
 417     fi
 418     #
 419     # -b N  blockSize --
 420     #       contiguous bytes to write per task  (e.g.: 8, 4k, 2m, 1g)"
 421     # -o S  testFileName
 422     # -t N  transferSize -- size of transfer in bytes (e.g.: 8, 4k, 2m, 1g)"
 423     # -w    writeFile -- write file"
 424     # -r    readFile -- read existing file"
 425     # -T    maxTimeDuration -- max time in minutes to run tests"
 426     # -k    keepFile -- keep testFile(s) on program exit
 427
 428     local cmd="$IOR -a $ior_type -b ${ior_blockSize}g -o $testdir/iorData \
 429 -t $ior_xferSize -v -w -r -i $ior_iteration -T $ior_DURATION -k"
 430     [ $type = "fpp" ] && cmd="$cmd -F"
 431
 432     echo "+ $cmd"
 433
 434     # find out if we need to use srun by checking $SRUN_PARTITION
 435     if [ "$SRUN_PARTITION" ]; then
 436         $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 437             -n $((num_clients * ior_THREADS)) -p $SRUN_PARTITION -- $cmd
 438     else
 439         mpi_run -np $((num_clients * $ior_THREADS)) \
 440             -machinefile ${MACHINEFILE} $cmd
 441     fi
 442
 443     local rc=$?
 444     if [ $rc != 0 ] ; then
 445         error "ior failed! $rc"
 446     fi
 447     rm -rf $testdir
 448 }
 449
 450 test_iorssf() {
 451     test_ior "ssf"
 452 }
 453 run_test iorssf "iorssf"
 454
 455 test_iorfpp() {
 456     test_ior "fpp"
 457 }
 458 run_test iorfpp "iorfpp"
 459
 460 test_mib() {
 461     if [ "$NFSCLIENT" ]; then
 462         skip "skipped for NFSCLIENT mode"
 463         return
 464     fi
 465
 466     [ x$MIB = x ] &&
 467         { skip_env "MIB not found" && return; }
 468
 469     print_opts MIB mib_THREADS mib_xferSize mib_xferLimit mib_timeLimit \
 470         MACHINEFILE
 471
 472     local testdir=$DIR/d0.mib
 473     mkdir -p $testdir
 474     # mpi_run uses mpiuser
 475     chmod 0777 $testdir
 476     $LFS setstripe $testdir -c -1 ||
 477         { error "setstripe failed" && return 2; }
 478     #
 479     # -I    Show intermediate values in output
 480     # -H    Show headers in output
 481     # -L    Do not issue new system calls after this many seconds
 482     # -s    Use system calls of this size
 483     # -t    test dir
 484     # -l    Issue no more than this many system calls
 485     local cmd="$MIB -t $testdir -s $mib_xferSize -l $mib_xferLimit \
 486 -L $mib_timeLimit -HI -p mib.$(date +%Y%m%d%H%M%S)"
 487
 488     echo "+ $cmd"
 489     # find out if we need to use srun by checking $SRUN_PARTITION
 490     if [ "$SRUN_PARTITION" ]; then
 491         $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
 492             -n $((num_clients * mib_THREADS)) -p $SRUN_PARTITION -- $cmd
 493     else
 494         mpi_run -np $((num_clients * mib_THREADS)) \
 495             -machinefile ${MACHINEFILE} $cmd
 496     fi
 497
 498     local rc=$?
 499     if [ $rc != 0 ] ; then
 500         error "mib failed! $rc"
 501     fi
 502     rm -rf $testdir
 503 }
 504 run_test mib "mib"
 505
 506 test_cascading_rw() {
 507     if [ "$NFSCLIENT" ]; then
 508         skip "skipped for NFSCLIENT mode"
 509         return
 510     fi
 511
 512     [ x$CASC_RW = x ] &&
 513         { skip_env "cascading_rw not found" && return; }
 514
 515     # FIXME
 516     # Need space estimation here.
 517
 518     print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE
 519
 520     local testdir=$DIR/d0.cascading_rw
 521     mkdir -p $testdir
 522     # mpi_run uses mpiuser
 523     chmod 0777 $testdir
 524
 525     # -g: debug mode
 526     # -n: repeat test # times
 527
 528     local cmd="$CASC_RW -g -d $testdir -n $casc_REP"
 529
 530     echo "+ $cmd"
 531     mpi_run -np $((num_clients * $casc_THREADS)) \
 532         -machinefile ${MACHINEFILE} $cmd
 533
 534     local rc=$?
 535     if [ $rc != 0 ] ; then
 536         error "cascading_rw failed! $rc"
 537     fi
 538     rm -rf $testdir
 539 }
 540 run_test cascading_rw "cascading_rw"
 541
 542 test_write_append_truncate() {
 543     if [ "$NFSCLIENT" ]; then
 544         skip "skipped for NFSCLIENT mode"
 545         return
 546     fi
 547
 548     # location is lustre/tests dir
 549     if ! which write_append_truncate > /dev/null 2>&1 ; then
 550         skip_env "write_append_truncate not found"
 551         return
 552     fi
 553
 554     # FIXME
 555     # Need space estimation here.
 556
 557     local testdir=$DIR/d0.write_append_truncate
 558     local file=$testdir/f0.wat
 559
 560     print_opts clients write_REP write_THREADS MACHINEFILE
 561
 562     mkdir -p $testdir
 563     # mpi_run uses mpiuser
 564     chmod 0777 $testdir
 565
 566     local cmd="write_append_truncate -n $write_REP $file"
 567
 568     echo "+ $cmd"
 569     mpi_run -np $((num_clients * $write_THREADS)) \
 570         -machinefile ${MACHINEFILE} $cmd
 571
 572     local rc=$?
 573     if [ $rc != 0 ] ; then
 574         error "write_append_truncate failed! $rc"
 575         return $rc
 576     fi
 577     rm -rf $testdir
 578 }
 579 run_test write_append_truncate "write_append_truncate"
 580
 581 test_write_disjoint() {
 582     if [ "$NFSCLIENT" ]; then
 583         skip "skipped for NFSCLIENT mode"
 584         return
 585     fi
 586
 587     [ x$WRITE_DISJOINT = x ] &&
 588         { skip_env "write_disjoint not found" && return; }
 589
 590     # FIXME
 591     # Need space estimation here.
 592
 593     print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP \
 594         MACHINEFILE
 595     local testdir=$DIR/d0.write_disjoint
 596     mkdir -p $testdir
 597     # mpi_run uses mpiuser
 598     chmod 0777 $testdir
 599
 600     local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP"
 601
 602     echo "+ $cmd"
 603     mpi_run -np $((num_clients * $wdisjoint_THREADS)) \
 604         -machinefile ${MACHINEFILE} $cmd
 605
 606     local rc=$?
 607     if [ $rc != 0 ] ; then
 608         error "write_disjoint failed! $rc"
 609     fi
 610     rm -rf $testdir
 611 }
 612 run_test write_disjoint "write_disjoint"
 613
 614 test_parallel_grouplock() {
 615     if [ "$NFSCLIENT" ]; then
 616         skip "skipped for NFSCLIENT mode"
 617         return
 618     fi
 619
 620     [ x$PARALLEL_GROUPLOCK = x ] &&
 621         { skip "PARALLEL_GROUPLOCK not found" && return; }
 622
 623     print_opts clients parallel_grouplock_MINTASKS MACHINEFILE
 624
 625     local testdir=$DIR/d0.parallel_grouplock
 626     mkdir -p $testdir
 627     # mpi_run uses mpiuser
 628     chmod 0777 $testdir
 629
 630     do_nodes $clients "lctl set_param llite.*.max_rw_chunk=0" ||
 631         error "set_param max_rw_chunk=0 failed "
 632
 633     local cmd
 634     local status=0
 635     local subtest
 636     for i in $(seq 12); do
 637         subtest="-t $i"
 638         local cmd="$PARALLEL_GROUPLOCK -g -v -d $testdir $subtest"
 639         echo "+ $cmd"
 640
 641         mpi_run -np $parallel_grouplock_MINTASKS \
 642             -machinefile ${MACHINEFILE} $cmd
 643         local rc=$?
 644         if [ $rc != 0 ] ; then
 645             error_noexit "parallel_grouplock subtests $subtest failed! $rc"
 646         else
 647             echo "parallel_grouplock subtests $subtest PASS"
 648         fi
 649         let status=$((status + rc))
 650         # clear debug to collect one log per one test
 651         do_nodes $(comma_list $(nodes_list)) lctl clear
 652      done
 653     [ $status -eq 0 ] || error "parallel_grouplock status: $status"
 654     rm -rf $testdir
 655 }
 656 run_test parallel_grouplock "parallel_grouplock"
 657
 658 statahead_NUMMNTPTS=${statahead_NUMMNTPTS:-5}
 659 statahead_NUMFILES=${statahead_NUMFILES:-500000}
 660
 661 cleanup_statahead () {
 662     trap 0
 663
 664     local clients=$1
 665     local mntpt_root=$2
 666     local num_mntpts=$3
 667
 668     for i in $(seq 0 $num_mntpts);do
 669         zconf_umount_clients $clients ${mntpt_root}$i ||
 670             error_exit "Failed to umount lustre on ${mntpt_root}$i"
 671     done
 672 }
 673
 674 test_statahead () {
 675     if [ "$NFSCLIENT" ]; then
 676         skip "skipped for NFSCLIENT mode"
 677         return
 678     fi
 679
 680     [ x$MDSRATE = x ] &&
 681         { skip_env "mdsrate not found" && return; }
 682
 683     print_opts MDSRATE clients statahead_NUMMNTPTS statahead_NUMFILES
 684
 685     # create large dir
 686
 687     # do not use default "d[0-9]*" dir name
 688     # to avoid of rm $statahead_NUMFILES (500k) files in t-f cleanup
 689     local dir=dstatahead
 690     local testdir=$DIR/$dir
 691
 692     # cleanup only if dir exists
 693     # cleanup only $statahead_NUMFILES number of files
 694     # ignore the other files created by someone else
 695     [ -d $testdir ] &&
 696         mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
 697             $statahead_NUMFILES $testdir 'f%%d' --ignore
 698
 699     mkdir -p $testdir
 700     # mpi_run uses mpiuser
 701     chmod 0777 $testdir
 702
 703     local num_files=$statahead_NUMFILES
 704
 705     local IFree=$(inodes_available)
 706     if [ $IFree -lt $num_files ]; then
 707       num_files=$IFree
 708     fi
 709
 710     cancel_lru_locks mdc
 711
 712     local cmd1="${MDSRATE} ${MDSRATE_DEBUG} --mknod --dir $testdir"
 713     local cmd2="--nfiles $num_files --filefmt 'f%%d'"
 714     local cmd="$cmd1 $cmd2"
 715     echo "+ $cmd"
 716
 717     mpi_run -np $((num_clients * 32)) -machinefile ${MACHINEFILE} $cmd
 718
 719     local rc=$?
 720     if [ $rc != 0 ] ; then
 721         error "mdsrate failed to create $rc"
 722         return $rc
 723     fi
 724
 725     local num_mntpts=$statahead_NUMMNTPTS
 726     local mntpt_root=$TMP/mntpt/lustre
 727     local mntopts=${MNTOPTSTATAHEAD:-$MOUNTOPT}
 728
 729     echo "Mounting $num_mntpts lustre clients starts on $clients"
 730     trap "cleanup_statahead $clients $mntpt_root $num_mntpts" EXIT ERR
 731     for i in $(seq 0 $num_mntpts); do
 732         zconf_mount_clients $clients ${mntpt_root}$i "$mntopts" ||
 733             error_exit "Failed to mount lustre on ${mntpt_root}$i on $clients"
 734     done
 735
 736     do_rpc_nodes $clients cancel_lru_locks mdc
 737
 738     do_rpc_nodes $clients do_ls $mntpt_root $num_mntpts $dir
 739
 740     mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE \
 741         $num_files $testdir 'f%%d' --ignore
 742
 743     # use rm instead of rmdir because of
 744     # testdir could contain the files created by someone else,
 745     # or by previous run where is num_files prev > num_files current
 746     rm -rf $testdir
 747     cleanup_statahead $clients $mntpt_root $num_mntpts
 748 }
 749 run_test statahead "statahead test, multiple clients"
 750
 751 # bug 17764 accessing files via nfs,
 752 # ASSERTION(!mds_inode_is_orphan(dchild->d_inode)) failed
 753 test_nfsread_orphan_file() {
 754     if [ ! "$NFSCLIENT" ]; then
 755         skip "not NFSCLIENT mode, skipped"
 756         return
 757     fi
 758
 759     # copy file to lustre server
 760     local nfsserver=$(nfs_server $MOUNT)
 761     do_nodev $nfsserver cp /etc/passwd $DIR/$tfile
 762     zconf_mount $nfsserver $MOUNT2
 763
 764     # open, wait, unlink and close
 765     rmultiop_start --uniq unlink $nfsserver $DIR/$tfile o_uc
 766     echo "1. unlinker on NFS server $nfsserver opened the file $DIR/$tfile"
 767     sleep 1
 768
 769     # open $DIR2/$tfile and wait
 770     rmultiop_start --uniq open $nfsserver $DIR2/$tfile o_c
 771     echo "2. open on NFS server $nfsserver opened the file $DIR2/$tfile"
 772     sleep 1
 773
 774     # open $DIR/$tfile on nfs client, wait, read
 775     multiop_bg_pause $DIR/$tfile o_r10c
 776     NFSREADPID=$!
 777     echo "3. NFS client readder opened the file $DIR/$tfile"
 778     sleep 1
 779
 780     # let unlink to go
 781     rmultiop_stop --uniq unlink $nfsserver
 782     echo "4. unlink, close completed"
 783     sleep 1
 784
 785     # let nfs read to go
 786     kill -USR1 $NFSREADPID
 787     echo "5. NFS client read completed"
 788
 789     wait $NFSREADPID
 790
 791     rmultiop_stop --uniq open $nfsserver
 792     zconf_umount $nfsserver $MOUNT2
 793 }
 794 run_test nfsread_orphan_file "accessing files via nfs, bug 17764"
 795
 796 complete $(basename $0) $SECONDS
 797 check_and_cleanup_lustre
 798 exit_status