lustre/tests/parallel-scale.sh

   1 #!/bin/bash
   2 #
   3 #set -vx
   4
   5 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
   6 . $LUSTRE/tests/test-framework.sh
   7 init_test_env $@
   8 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
   9 init_logging
  10
  11 #              bug 20670
  12 ALWAYS_EXCEPT="parallel_grouplock $PARALLEL_SCALE_EXCEPT"
  13
  14 #
  15 # compilbench
  16 #
  17 cbench_DIR=${cbench_DIR:-""}
  18 cbench_IDIRS=${cbench_IDIRS:-4}
  19 cbench_RUNS=${cbench_RUNS:-4}   # FIXME: wiki page requirements is 30, do we really need 30 ?
  20
  21 if [ "$SLOW" = "no" ]; then
  22     cbench_IDIRS=2
  23     cbench_RUNS=2
  24 fi
  25
  26 #
  27 # metabench
  28 #
  29 METABENCH=${METABENCH:-$(which metabench 2> /dev/null || true)}
  30 mbench_NFILES=${mbench_NFILES:-30400}
  31 [ "$SLOW" = "no" ] && mbench_NFILES=10000
  32 MACHINEFILE=${MACHINEFILE:-$TMP/$(basename $0 .sh).machines}
  33 # threads per client
  34 mbench_THREADS=${mbench_THREADS:-4}
  35
  36 #
  37 # simul
  38 #
  39 SIMUL=${SIMUL:=$(which simul 2> /dev/null || true)}
  40 # threads per client
  41 simul_THREADS=${simul_THREADS:-2}
  42 simul_REP=${simul_REP:-20}
  43 [ "$SLOW" = "no" ] && simul_REP=2
  44
  45 #
  46 # connectathon
  47 #
  48 cnt_DIR=${cnt_DIR:-""}
  49 cnt_NRUN=${cnt_NRUN:-10}
  50 [ "$SLOW" = "no" ] && cnt_NRUN=2
  51
  52 #
  53 # cascading rw
  54 #
  55 CASC_RW=${CASC_RW:-$(which cascading_rw 2> /dev/null || true)}
  56 # threads per client
  57 casc_THREADS=${casc_THREADS:-2}
  58 casc_REP=${casc_REP:-300}
  59 [ "$SLOW" = "no" ] && casc_REP=10
  60
  61 #
  62 # IOR
  63 #
  64 IOR=${IOR:-$(which IOR 2> /dev/null || true)}
  65 # threads per client
  66 ior_THREADS=${ior_THREADS:-2}
  67 ior_blockSize=${ior_blockSize:-6}       # Gb
  68 ior_DURATION=${ior_DURATION:-30}        # minutes
  69 [ "$SLOW" = "no" ] && ior_DURATION=5
  70
  71 #
  72 # write_append_truncate
  73 #
  74 # threads per client
  75 write_THREADS=${write_THREADS:-8}
  76 write_REP=${write_REP:-10000}
  77 [ "$SLOW" = "no" ] && write_REP=100
  78
  79 #
  80 # write_disjoint
  81 #
  82 WRITE_DISJOINT=${WRITE_DISJOINT:-$(which write_disjoint 2> /dev/null || true)}
  83 # threads per client
  84 wdisjoint_THREADS=${wdisjoint_THREADS:-4}
  85 wdisjoint_REP=${wdisjoint_REP:-10000}
  86 [ "$SLOW" = "no" ] && wdisjoint_REP=100
  87
  88 #
  89 # parallel_grouplock
  90 #
  91 #
  92 PARALLEL_GROUPLOCK=${PARALLEL_GROUPLOCK:-$(which parallel_grouplock 2> /dev/null || true)}
  93 parallel_grouplock_MINTASKS=${parallel_grouplock_MINTASKS:-5}
  94
  95 build_test_filter
  96 check_and_setup_lustre
  97
  98 get_mpiuser_id $MPI_USER
  99 MPI_RUNAS=${MPI_RUNAS:-"runas -u $MPI_USER_UID -g $MPI_USER_GID"}
 100 $GSS_KRB5 && refresh_krb5_tgt $MPI_USER_UID $MPI_USER_GID $MPI_RUNAS
 101
 102 print_opts () {
 103     local var
 104
 105     echo OPTIONS:
 106
 107     for i in $@; do
 108         var=$i
 109         echo "${var}=${!var}"
 110     done
 111     [ -e $MACHINEFILE ] && cat $MACHINEFILE
 112 }
 113
 114 # Takes:
 115 # 5 min * cbench_RUNS
 116 #        SLOW=no     10 mins
 117 #        SLOW=yes    50 mins
 118 # Space estimation:
 119 #        compile dir kernel-1 680MB
 120 #        required space       680MB * cbench_IDIRS = ~7 Gb
 121
 122 test_compilebench() {
 123     print_opts cbench_DIR cbench_IDIRS cbench_RUNS
 124
 125     [ x$cbench_DIR = x ] &&
 126         { skip_env "compilebench not found" && return; }
 127
 128     [ -e $cbench_DIR/compilebench ] || \
 129         { skip_env "No compilebench build" && return; }
 130
 131     local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
 132     if [ $space -le $((680 * 1024 * cbench_IDIRS)) ]; then
 133         cbench_IDIRS=$(( space / 680 / 1024))
 134         [ $cbench_IDIRS = 0 ] && \
 135             skip_env "Need free space atleast 680 Mb, have $space" && return
 136
 137         log free space=$space, reducing initial dirs to $cbench_IDIRS
 138     fi
 139     # FIXME:
 140     # t-f _base needs to be modifyed to set properly tdir
 141     # for new "test_foo" functions names
 142     # local testdir=$DIR/$tdir
 143     local testdir=$DIR/d0.compilebench
 144     mkdir -p $testdir
 145
 146     local savePWD=$PWD
 147     cd $cbench_DIR
 148     local cmd="./compilebench -D $testdir -i $cbench_IDIRS -r $cbench_RUNS --makej"
 149
 150     log "$cmd"
 151
 152     local rc=0
 153     eval $cmd
 154     rc=$?
 155
 156     cd $savePWD
 157     [ $rc = 0 ] || error "compilebench failed: $rc"
 158     rm -rf $testdir
 159 }
 160 run_test compilebench "compilebench"
 161
 162 test_metabench() {
 163     [ x$METABENCH = x ] &&
 164         { skip_env "metabench not found" && return; }
 165
 166     local clients=$CLIENTS
 167     [ -z $clients ] && clients=$(hostname)
 168
 169     num_clients=$(get_node_count ${clients//,/ })
 170
 171     # FIXME
 172     # Need space estimation here.
 173
 174     generate_machine_file $clients $MACHINEFILE || return $?
 175
 176     print_opts METABENCH clients mbench_NFILES mbench_THREADS
 177
 178     local testdir=$DIR/d0.metabench
 179     mkdir -p $testdir
 180     # mpi_run uses mpiuser
 181     chmod 0777 $testdir
 182
 183     # -C             Run the file creation tests.
 184     # -S             Run the file stat tests.
 185     # -c nfile       Number of files to be used in each test.
 186     # -k             Cleanup.  Remove the test directories.
 187     local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S -k"
 188     echo "+ $cmd"
 189     mpi_run -np $((num_clients * $mbench_THREADS)) -machinefile ${MACHINEFILE} $cmd
 190     local rc=$?
 191     if [ $rc != 0 ] ; then
 192         error "metabench failed! $rc"
 193     fi
 194     rm -rf $testdir
 195 }
 196 run_test metabench "metabench"
 197
 198 test_simul() {
 199     if [ "$NFSCLIENT" ]; then
 200         skip "skipped for NFSCLIENT mode"
 201         return
 202     fi
 203
 204     [ x$SIMUL = x ] &&
 205         { skip_env "simul not found" && return; }
 206
 207     local clients=$CLIENTS
 208     [ -z $clients ] && clients=$(hostname)
 209
 210     local num_clients=$(get_node_count ${clients//,/ })
 211
 212     # FIXME
 213     # Need space estimation here.
 214
 215     generate_machine_file $clients $MACHINEFILE || return $?
 216
 217     print_opts SIMUL clients simul_REP simul_THREADS
 218
 219     local testdir=$DIR/d0.simul
 220     mkdir -p $testdir
 221     # mpi_run uses mpiuser
 222     chmod 0777 $testdir
 223
 224     # -n # : repeat each test # times
 225     # -N # : repeat the entire set of tests # times
 226
 227     local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP"
 228
 229     echo "+ $cmd"
 230     mpi_run -np $((num_clients * $simul_THREADS)) -machinefile ${MACHINEFILE} $cmd
 231
 232     local rc=$?
 233     if [ $rc != 0 ] ; then
 234         error "simul failed! $rc"
 235     fi
 236     rm -rf $testdir
 237 }
 238 run_test simul "simul"
 239
 240 test_connectathon() {
 241     print_opts cnt_DIR cnt_NRUN
 242
 243     [ x$cnt_DIR = x ] &&
 244         { skip_env "connectathon dir not found" && return; }
 245
 246     [ -e $cnt_DIR/runtests ] || \
 247         { skip_env "No connectathon runtests found" && return; }
 248
 249     local testdir=$DIR/d0.connectathon
 250     mkdir -p $testdir
 251
 252     local savePWD=$PWD
 253     cd $cnt_DIR
 254
 255     #
 256     # cthon options (must be in this order)
 257     #
 258     # -N numpasses - will be passed to the runtests script.  This argument
 259     #         is optional.  It specifies the number of times to run
 260     #         through the tests.
 261     #
 262     # One of these test types
 263     #    -b  basic
 264     #    -g  general
 265     #    -s  special
 266     #    -l  lock
 267     #    -a  all of the above
 268     #
 269     # -f      a quick functionality test
 270     #
 271
 272     tests="-b -g -s"
 273     # Include lock tests unless we're running on nfsv4
 274     local fstype=$(df -TP $testdir | awk 'NR==2  {print $2}')
 275     echo "$testdir: $fstype"
 276     if [[ $fstype != "nfs4" ]]; then
 277         tests="$tests -l"
 278     fi
 279     echo "tests: $tests"
 280     for test in $tests; do
 281         local cmd="./runtests -N $cnt_NRUN $test -f $testdir"
 282         local rc=0
 283
 284         log "$cmd"
 285         eval $cmd
 286         rc=$?
 287         [ $rc = 0 ] || error "connectathon failed: $rc"
 288     done
 289
 290     cd $savePWD
 291     rm -rf $testdir
 292 }
 293 run_test connectathon "connectathon"
 294
 295 test_ior() {
 296     [ x$IOR = x ] &&
 297         { skip_env "IOR not found" && return; }
 298
 299     local clients=$CLIENTS
 300     [ -z $clients ] && clients=$(hostname)
 301
 302     local num_clients=$(get_node_count ${clients//,/ })
 303
 304     local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
 305     echo "+ $ior_blockSize * 1024 * 1024 * $num_clients * $ior_THREADS "
 306     if [ $((space / 2)) -le $(( ior_blockSize * 1024 * 1024 * num_clients * ior_THREADS)) ]; then
 307         echo "+ $space * 9/10 / 1024 / 1024 / $num_clients / $ior_THREADS"
 308         ior_blockSize=$(( space /2 /1024 /1024 / num_clients / ior_THREADS ))
 309         [ $ior_blockSize = 0 ] && \
 310             skip_env "Need free space more than ($num_clients * $ior_THREADS )Gb: $((num_clients*ior_THREADS *1024 *1024*2)), have $space" && return
 311
 312         echo "free space=$space, Need: $num_clients x $ior_THREADS x $ior_blockSize Gb (blockSize reduced to $ior_blockSize Gb)"
 313     fi
 314
 315     generate_machine_file $clients $MACHINEFILE || return $?
 316
 317     print_opts IOR ior_THREADS ior_DURATION MACHINEFILE
 318
 319     local testdir=$DIR/d0.ior
 320     mkdir -p $testdir
 321     # mpi_run uses mpiuser
 322     chmod 0777 $testdir
 323     if [ "$NFSCLIENT" ]; then
 324         setstripe_nfsserver $testdir -c -1 ||
 325             { error "setstripe on nfsserver failed" && return 1; }
 326     else
 327         $LFS setstripe $testdir -c -1 ||
 328             { error "setstripe failed" && return 2; }
 329     fi
 330     #
 331     # -b N  blockSize -- contiguous bytes to write per task  (e.g.: 8, 4k, 2m, 1g)"
 332     # -o S  testFileName
 333     # -t N  transferSize -- size of transfer in bytes (e.g.: 8, 4k, 2m, 1g)"
 334     # -w    writeFile -- write file"
 335     # -r    readFile -- read existing file"
 336     # -T    maxTimeDuration -- max time in minutes to run tests"
 337     # -k    keepFile -- keep testFile(s) on program exit
 338     local cmd="$IOR -a POSIX -b ${ior_blockSize}g -o $testdir/iorData -t 2m -v -w -r -T $ior_DURATION -k"
 339
 340     echo "+ $cmd"
 341     mpi_run -np $((num_clients * $ior_THREADS)) -machinefile ${MACHINEFILE} $cmd
 342
 343     local rc=$?
 344     if [ $rc != 0 ] ; then
 345         error "ior failed! $rc"
 346     fi
 347     rm -rf $testdir
 348 }
 349 run_test ior "ior"
 350
 351 test_cascading_rw() {
 352     if [ "$NFSCLIENT" ]; then
 353         skip "skipped for NFSCLIENT mode"
 354         return
 355     fi
 356
 357     [ x$CASC_RW = x ] &&
 358         { skip_env "cascading_rw not found" && return; }
 359
 360     local clients=$CLIENTS
 361     [ -z $clients ] && clients=$(hostname)
 362
 363     num_clients=$(get_node_count ${clients//,/ })
 364
 365     # FIXME
 366     # Need space estimation here.
 367
 368     generate_machine_file $clients $MACHINEFILE || return $?
 369
 370     print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE
 371
 372     local testdir=$DIR/d0.cascading_rw
 373     mkdir -p $testdir
 374     # mpi_run uses mpiuser
 375     chmod 0777 $testdir
 376
 377     # -g: debug mode
 378     # -n: repeat test # times
 379
 380     local cmd="$CASC_RW -g -d $testdir -n $casc_REP"
 381
 382     echo "+ $cmd"
 383     mpi_run -np $((num_clients * $casc_THREADS)) -machinefile ${MACHINEFILE} $cmd
 384
 385     local rc=$?
 386     if [ $rc != 0 ] ; then
 387         error "cascading_rw failed! $rc"
 388     fi
 389     rm -rf $testdir
 390 }
 391 run_test cascading_rw "cascading_rw"
 392
 393 test_write_append_truncate() {
 394     if [ "$NFSCLIENT" ]; then
 395         skip "skipped for NFSCLIENT mode"
 396         return
 397     fi
 398
 399     # location is lustre/tests dir
 400     if ! which write_append_truncate > /dev/null 2>&1 ; then
 401         skip_env "write_append_truncate not found"
 402         return
 403     fi
 404
 405     local clients=$CLIENTS
 406     [ -z $clients ] && clients=$(hostname)
 407
 408     local num_clients=$(get_node_count ${clients//,/ })
 409
 410     # FIXME
 411     # Need space estimation here.
 412
 413     generate_machine_file $clients $MACHINEFILE || return $?
 414
 415     local testdir=$DIR/d0.write_append_truncate
 416     local file=$testdir/f0.wat
 417
 418     print_opts clients write_REP write_THREADS MACHINEFILE
 419
 420     mkdir -p $testdir
 421     # mpi_run uses mpiuser
 422     chmod 0777 $testdir
 423
 424     local cmd="write_append_truncate -n $write_REP $file"
 425
 426     echo "+ $cmd"
 427     mpi_run -np $((num_clients * $write_THREADS)) -machinefile ${MACHINEFILE} $cmd
 428
 429     local rc=$?
 430     if [ $rc != 0 ] ; then
 431         error "write_append_truncate failed! $rc"
 432         return $rc
 433     fi
 434     rm -rf $testdir
 435 }
 436 run_test write_append_truncate "write_append_truncate"
 437
 438 test_write_disjoint() {
 439     if [ "$NFSCLIENT" ]; then
 440         skip "skipped for NFSCLIENT mode"
 441         return
 442     fi
 443
 444     [ x$WRITE_DISJOINT = x ] &&
 445         { skip_env "write_disjoint not found" && return; }
 446
 447     local clients=$CLIENTS
 448     [ -z $clients ] && clients=$(hostname)
 449
 450     local num_clients=$(get_node_count ${clients//,/ })
 451
 452     # FIXME
 453     # Need space estimation here.
 454
 455     generate_machine_file $clients $MACHINEFILE || return $?
 456
 457     print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP MACHINEFILE
 458     local testdir=$DIR/d0.write_disjoint
 459     mkdir -p $testdir
 460     # mpi_run uses mpiuser
 461     chmod 0777 $testdir
 462
 463     local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP"
 464
 465     echo "+ $cmd"
 466     mpi_run -np $((num_clients * $wdisjoint_THREADS)) -machinefile ${MACHINEFILE} $cmd
 467
 468     local rc=$?
 469     if [ $rc != 0 ] ; then
 470         error "write_disjoint failed! $rc"
 471     fi
 472     rm -rf $testdir
 473 }
 474 run_test write_disjoint "write_disjoint"
 475
 476 test_parallel_grouplock() {
 477     if [ "$NFSCLIENT" ]; then
 478         skip "skipped for NFSCLIENT mode"
 479         return
 480     fi
 481
 482     [ x$PARALLEL_GROUPLOCK = x ] &&
 483         { skip "PARALLEL_GROUPLOCK not found" && return; }
 484
 485     local clients=$CLIENTS
 486     [ -z $clients ] && clients=$(hostname)
 487
 488     local num_clients=$(get_node_count ${clients//,/ })
 489
 490     generate_machine_file $clients $MACHINEFILE || return $?
 491
 492     print_opts clients parallel_grouplock_MINTASKS MACHINEFILE
 493
 494     local testdir=$DIR/d0.parallel_grouplock
 495     mkdir -p $testdir
 496     # mpi_run uses mpiuser
 497     chmod 0777 $testdir
 498
 499     do_nodes $clients "lctl set_param llite.*.max_rw_chunk=0" ||
 500         error "set_param max_rw_chunk=0 failed "
 501
 502     local cmd
 503     local status=0
 504     local subtest
 505     for i in $(seq 12); do
 506         subtest="-t $i"
 507         local cmd="$PARALLEL_GROUPLOCK -g -v -d $testdir $subtest"
 508         echo "+ $cmd"
 509
 510         mpi_run -np $parallel_grouplock_MINTASKS -machinefile ${MACHINEFILE} $cmd
 511         local rc=$?
 512         if [ $rc != 0 ] ; then
 513             error_noexit "parallel_grouplock subtests $subtest failed! $rc"
 514         else
 515             echo "parallel_grouplock subtests $subtest PASS"
 516         fi
 517         let status=$((status + rc))
 518         # clear debug to collect one log per one test
 519         do_nodes $(comma_list $(nodes_list)) lctl clear
 520      done
 521     [ $status -eq 0 ] || error "parallel_grouplock status: $status"
 522     rm -rf $testdir
 523 }
 524 run_test parallel_grouplock "parallel_grouplock"
 525
 526 statahead_NUMMNTPTS=${statahead_NUMMNTPTS:-5}
 527 statahead_NUMFILES=${statahead_NUMFILES:-500000}
 528
 529 cleanup_statahead () {
 530     trap 0
 531
 532     local clients=$1
 533     local mntpt_root=$2
 534     local num_mntpts=$3
 535
 536     for i in $(seq 0 $num_mntpts);do
 537         zconf_umount_clients $clients ${mntpt_root}$i ||
 538             error_exit "Failed to umount lustre on ${mntpt_root}$i"
 539     done
 540 }
 541
 542 test_statahead () {
 543     if [[ -n $NFSCLIENT ]]; then
 544         skip "Statahead testing is not supported on NFS clients."
 545         return 0
 546     fi
 547
 548     [ x$MDSRATE = x ] &&
 549         { skip_env "mdsrate not found" && return; }
 550
 551     local clients=$CLIENTS
 552     [ -z $clients ] && clients=$(hostname)
 553
 554     local num_clients=$(get_node_count ${clients//,/ })
 555
 556     generate_machine_file $clients $MACHINEFILE || return $?
 557
 558     print_opts MDSRATE clients statahead_NUMMNTPTS statahead_NUMFILES
 559
 560     # create large dir
 561
 562     # do not use default "d[0-9]*" dir name
 563     # to avoid of rm $statahead_NUMFILES (500k) files in t-f cleanup
 564     local dir=dstatahead
 565     local testdir=$DIR/$dir
 566
 567     # cleanup only if dir exists
 568     # cleanup only $statahead_NUMFILES number of files
 569     # ignore the other files created by someone else
 570     [ -d $testdir ] &&
 571         mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE $statahead_NUMFILES $testdir 'f%%d' --ignore
 572
 573     mkdir -p $testdir
 574     # mpi_run uses mpiuser
 575     chmod 0777 $testdir
 576
 577     local num_files=$statahead_NUMFILES
 578
 579     local IFree=$(inodes_available)
 580     if [ $IFree -lt $num_files ]; then
 581       num_files=$IFree
 582     fi
 583
 584     cancel_lru_locks mdc
 585
 586     local cmd="${MDSRATE} ${MDSRATE_DEBUG} --mknod --dir $testdir --nfiles $num_files --filefmt 'f%%d'"
 587     echo "+ $cmd"
 588
 589     mpi_run -np $((num_clients * 32)) -machinefile ${MACHINEFILE} $cmd
 590
 591     local rc=$?
 592     if [ $rc != 0 ] ; then
 593         error "mdsrate failed to create $rc"
 594         return $rc
 595     fi
 596
 597     local num_mntpts=$statahead_NUMMNTPTS
 598     local mntpt_root=$TMP/mntpt/lustre
 599     mntopts=${MNTOPTSTATAHEAD:-$MOUNTOPT}
 600
 601     echo "Mounting $num_mntpts lustre clients starts on $clients"
 602     trap "cleanup_statahead $clients $mntpt_root $num_mntpts" EXIT ERR
 603     for i in $(seq 0 $num_mntpts); do
 604         zconf_mount_clients $clients ${mntpt_root}$i $mntopts ||
 605             error_exit "Failed to mount lustre on ${mntpt_root}$i on $clients"
 606     done
 607
 608     do_rpc_nodes $clients cancel_lru_locks mdc
 609
 610     do_rpc_nodes $clients do_ls $mntpt_root $num_mntpts $dir
 611
 612     mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE $num_files $testdir 'f%%d' --ignore
 613
 614     # use rm instead of rmdir because of
 615     # testdir could contain the files created by someone else,
 616     # or by previous run where is num_files prev > num_files current
 617     rm -rf $testdir
 618     cleanup_statahead $clients $mntpt_root $num_mntpts
 619 }
 620
 621 run_test statahead "statahead test, multiple clients"
 622
 623 equals_msg `basename $0`: test complete, cleaning up
 624 check_and_cleanup_lustre
 625 [ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true