LU-814 test: automate NFS over lustre testing

[fs/lustre-release.git] / lustre / tests / parallel-scale.sh
diff --git a/lustre/tests/parallel-scale.sh b/lustre/tests/parallel-scale.sh

index 34f320e..66b6d50 100644 (file)
--- a/lustre/tests/parallel-scale.sh
+++ b/lustre/tests/parallel-scale.sh
@@ -15,7 +15,8 @@ ALWAYS_EXCEPT="parallel_grouplock $PARALLEL_SCALE_EXCEPT"
  #
  MACHINEFILE=${MACHINEFILE:-$TMP/$(basename $0 .sh).machines}
  clients=${CLIENTS:-$HOSTNAME}
-generate_machine_file $clients $MACHINEFILE || error "Failed to generate machine file"
+generate_machine_file $clients $MACHINEFILE || \
+    error "Failed to generate machine file"
  num_clients=$(get_node_count ${clients//,/ })
  
  
@@ -23,7 +24,8 @@ num_clients=$(get_node_count ${clients//,/ })
  #
  cbench_DIR=${cbench_DIR:-""}
  cbench_IDIRS=${cbench_IDIRS:-4}
-cbench_RUNS=${cbench_RUNS:-4}  # FIXME: wiki page requirements is 30, do we really need 30 ?
+# FIXME: wiki page requirements is 30, do we really need 30 ?
+cbench_RUNS=${cbench_RUNS:-4}
  
  if [ "$SLOW" = "no" ]; then
      cbench_IDIRS=2
@@ -119,9 +121,12 @@ wdisjoint_REP=${wdisjoint_REP:-10000}
  # parallel_grouplock
  #
  #
-PARALLEL_GROUPLOCK=${PARALLEL_GROUPLOCK:-$(which parallel_grouplock 2> /dev/null || true)}
+PARALLEL_GROUPLOCK=${PARALLEL_GROUPLOCK:-\
+    $(which parallel_grouplock 2> /dev/null || true)}
  parallel_grouplock_MINTASKS=${parallel_grouplock_MINTASKS:-5}
  
+. $LUSTRE/tests/functions.sh
+
  build_test_filter
  check_and_setup_lustre
  
@@ -129,601 +134,77 @@ get_mpiuser_id $MPI_USER
  MPI_RUNAS=${MPI_RUNAS:-"runas -u $MPI_USER_UID -g $MPI_USER_GID"}
  $GSS_KRB5 && refresh_krb5_tgt $MPI_USER_UID $MPI_USER_GID $MPI_RUNAS
  
-
-print_opts () {
-    local var
-
-    echo OPTIONS:
-
-    for i in $@; do
-        var=$i
-        echo "${var}=${!var}"
-    done
-    [ -e $MACHINEFILE ] && cat $MACHINEFILE
-}
-
-# Takes:
-# 5 min * cbench_RUNS
-#        SLOW=no     10 mins
-#        SLOW=yes    50 mins
-# Space estimation:
-#        compile dir kernel-1 680MB
-#        required space       680MB * cbench_IDIRS = ~7 Gb
-
  test_compilebench() {
-    print_opts cbench_DIR cbench_IDIRS cbench_RUNS
-
-    [ x$cbench_DIR = x ] &&
-        { skip_env "compilebench not found" && return; }
-
-    [ -e $cbench_DIR/compilebench ] || \
-        { skip_env "No compilebench build" && return; }
-
-    local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
-    if [ $space -le $((680 * 1024 * cbench_IDIRS)) ]; then
-        cbench_IDIRS=$(( space / 680 / 1024))
-        [ $cbench_IDIRS = 0 ] && \
-            skip_env "Need free space atleast 680 Mb, have $space" && return
-
-        log free space=$space, reducing initial dirs to $cbench_IDIRS
-    fi
-    # FIXME:
-    # t-f _base needs to be modifyed to set properly tdir
-    # for new "test_foo" functions names
-    # local testdir=$DIR/$tdir
-    local testdir=$DIR/d0.compilebench
-    mkdir -p $testdir
-
-    local savePWD=$PWD
-    cd $cbench_DIR
-    local cmd="./compilebench -D $testdir -i $cbench_IDIRS -r $cbench_RUNS --makej"
-
-    log "$cmd"
-
-    local rc=0
-    eval $cmd
-    rc=$?
-
-    cd $savePWD
-    [ $rc = 0 ] || error "compilebench failed: $rc"
-    rm -rf $testdir
+    run_compilebench
  }
  run_test compilebench "compilebench"
  
  test_metabench() {
-    [ x$METABENCH = x ] &&
-        { skip_env "metabench not found" && return; }
-
-    # FIXME
-    # Need space estimation here.
-
-    print_opts METABENCH clients mbench_NFILES mbench_THREADS
-
-    local testdir=$DIR/d0.metabench
-    mkdir -p $testdir
-    # mpi_run uses mpiuser
-    chmod 0777 $testdir
-
-    # -C             Run the file creation tests.
-    # -S             Run the file stat tests.
-    # -c nfile       Number of files to be used in each test.
-    # -k             Cleanup.  Remove the test directories.
-    local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S -k"
-    echo "+ $cmd"
-
-    # find out if we need to use srun by checking $SRUN_PARTITION
-    if [ "$SRUN_PARTITION" ]; then
-        $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
-            -n $((num_clients * mbench_THREADS)) -p $SRUN_PARTITION -- $cmd
-    else
-        mpi_run -np $((num_clients * $mbench_THREADS)) -machinefile ${MACHINEFILE} $cmd
-    fi
-
-    local rc=$?
-    if [ $rc != 0 ] ; then
-        error "metabench failed! $rc"
-    fi
-    rm -rf $testdir
+    run_metabench
  }
  run_test metabench "metabench"
  
  test_simul() {
-    if [ "$NFSCLIENT" ]; then
-        skip "skipped for NFSCLIENT mode"
-        return
-    fi
-
-    [ x$SIMUL = x ] &&
-        { skip_env "simul not found" && return; }
-
-    # FIXME
-    # Need space estimation here.
-
-    print_opts SIMUL clients simul_REP simul_THREADS
-
-    local testdir=$DIR/d0.simul
-    mkdir -p $testdir
-    # mpi_run uses mpiuser
-    chmod 0777 $testdir
-
-    # -n # : repeat each test # times
-    # -N # : repeat the entire set of tests # times
-
-    local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP"
-
-    echo "+ $cmd"
-    # find out if we need to use srun by checking $SRUN_PARTITION
-    if [ "$SRUN_PARTITION" ]; then
-        $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
-            -n $((num_clients * simul_THREADS)) -p $SRUN_PARTITION -- $cmd
-    else
-        mpi_run -np $((num_clients * simul_THREADS)) -machinefile ${MACHINEFILE} $cmd
-    fi
-
-    local rc=$?
-    if [ $rc != 0 ] ; then
-        error "simul failed! $rc"
-    fi
-    rm -rf $testdir
+    run_simul
  }
  run_test simul "simul"
  
-test_mdtest() {
-    local type=${1:-"ssf"}
-
-    if [ "$NFSCLIENT" ]; then
-        skip "skipped for NFSCLIENT mode"
-        return
-    fi
-
-    [ x$MDTEST = x ] &&
-        { skip_env "mdtest not found" && return; }
-
-    # FIXME
-    # Need space estimation here.
-
-    print_opts MDTEST mdtest_iteration mdtest_THREADS mdtest_nFiles
-
-    local testdir=$DIR/d0.mdtest
-    mkdir -p $testdir
-    # mpi_run uses mpiuser
-    chmod 0777 $testdir
-
-    # -i # : repeat each test # times
-    # -d   : test dir
-    # -n # : number of file/dir to create/stat/remove
-    # -u   : each process create/stat/remove individually
-
-    local cmd="$MDTEST -d $testdir -i $mdtest_iteration -n $mdtest_nFiles"
-    [ $type = "fpp" ] && cmd="$cmd -u"
-
-    echo "+ $cmd"
-    # find out if we need to use srun by checking $SRUN_PARTITION
-    if [ "$SRUN_PARTITION" ]; then
-        $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
-            -n $((num_clients * mdtest_THREADS)) -p $SRUN_PARTITION -- $cmd
-    else
-        mpi_run -np $((num_clients * mdtest_THREADS)) -machinefile ${MACHINEFILE} $cmd
-    fi
-
-    local rc=$?
-    if [ $rc != 0 ] ; then
-        error "mdtest failed! $rc"
-    fi
-    rm -rf $testdir
-}
-
  test_mdtestssf() {
-    test_mdtest "ssf"
+    run_mdtest "ssf"
  }
  run_test mdtestssf "mdtestssf"
  
  test_mdtestfpp() {
-    test_mdtest "fpp"
+    run_mdtest "fpp"
  }
  run_test mdtestfpp "mdtestfpp"
  
  test_connectathon() {
-    print_opts cnt_DIR cnt_NRUN
-
-    [ x$cnt_DIR = x ] &&
-        { skip_env "connectathon dir not found" && return; }
-
-    [ -e $cnt_DIR/runtests ] || \
-        { skip_env "No connectathon runtests found" && return; }
-
-    local testdir=$DIR/d0.connectathon
-    mkdir -p $testdir
-
-    local savePWD=$PWD
-    cd $cnt_DIR
-
-    #
-    # cthon options (must be in this order)
-    #
-    # -N numpasses - will be passed to the runtests script.  This argument
-    #         is optional.  It specifies the number of times to run
-    #         through the tests.
-    #
-    # One of these test types
-    #    -b  basic
-    #    -g  general
-    #    -s  special
-    #    -l  lock
-    #    -a  all of the above
-    #
-    # -f      a quick functionality test
-    #
-
-    tests="-b -g -s"
-    # Include lock tests unless we're running on nfsv4
-    local fstype=$(df -TP $testdir | awk 'NR==2  {print $2}')
-    echo "$testdir: $fstype"
-    if [[ $fstype != "nfs4" ]]; then
-        tests="$tests -l"
-    fi
-    echo "tests: $tests"
-    for test in $tests; do
-        local cmd="./runtests -N $cnt_NRUN $test -f $testdir"
-        local rc=0
-
-        log "$cmd"
-        eval $cmd
-        rc=$?
-        [ $rc = 0 ] || error "connectathon failed: $rc"
-    done
-
-    cd $savePWD
-    rm -rf $testdir
+    run_connectathon
  }
  run_test connectathon "connectathon"
  
-test_ior() {
-    local type=${1:="ssf"}
-
-    [ x$IOR = x ] &&
-        { skip_env "IOR not found" && return; }
-
-    local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
-    echo "+ $ior_blockSize * 1024 * 1024 * $num_clients * $ior_THREADS "
-    if [ $((space / 2)) -le $(( ior_blockSize * 1024 * 1024 * num_clients * ior_THREADS)) ]; then
-        echo "+ $space * 9/10 / 1024 / 1024 / $num_clients / $ior_THREADS"
-        ior_blockSize=$(( space /2 /1024 /1024 / num_clients / ior_THREADS ))
-        [ $ior_blockSize = 0 ] && \
-            skip_env "Need free space more than ($num_clients * $ior_THREADS )Gb: $((num_clients*ior_THREADS *1024 *1024*2)), have $space" && return
-
-        echo "free space=$space, Need: $num_clients x $ior_THREADS x $ior_blockSize Gb (blockSize reduced to $ior_blockSize Gb)"
-    fi
-
-    print_opts IOR ior_THREADS ior_DURATION MACHINEFILE
-
-    local testdir=$DIR/d0.ior
-    mkdir -p $testdir
-    # mpi_run uses mpiuser
-    chmod 0777 $testdir
-    if [ "$NFSCLIENT" ]; then
-        setstripe_nfsserver $testdir -c -1 ||
-            { error "setstripe on nfsserver failed" && return 1; }
-    else
-        $LFS setstripe $testdir -c -1 ||
-            { error "setstripe failed" && return 2; }
-    fi
-    #
-    # -b N  blockSize -- contiguous bytes to write per task  (e.g.: 8, 4k, 2m, 1g)"
-    # -o S  testFileName
-    # -t N  transferSize -- size of transfer in bytes (e.g.: 8, 4k, 2m, 1g)"
-    # -w    writeFile -- write file"
-    # -r    readFile -- read existing file"
-    # -T    maxTimeDuration -- max time in minutes to run tests"
-    # -k    keepFile -- keep testFile(s) on program exit
-
-    local cmd="$IOR -a $ior_type -b ${ior_blockSize}g -o $testdir/iorData -t $ior_xferSize -v -w -r -i $ior_iteration -T $ior_DURATION -k"
-    [ $type = "fpp" ] && cmd="$cmd -F"
-
-    echo "+ $cmd"
-    # find out if we need to use srun by checking $SRUN_PARTITION
-    if [ "$SRUN_PARTITION" ]; then
-        $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
-            -n $((num_clients * ior_THREADS)) -p $SRUN_PARTITION -- $cmd
-    else
-        mpi_run -np $((num_clients * $ior_THREADS)) -machinefile ${MACHINEFILE} $cmd
-    fi
-
-    local rc=$?
-    if [ $rc != 0 ] ; then
-        error "ior failed! $rc"
-    fi
-    rm -rf $testdir
-}
-
  test_iorssf() {
-    test_ior "ssf"
+    run_ior "ssf"
  }
  run_test iorssf "iorssf"
  
  test_iorfpp() {
-    test_ior "fpp"
+    run_ior "fpp"
  }
  run_test iorfpp "iorfpp"
  
  test_mib() {
-    if [ "$NFSCLIENT" ]; then
-        skip "skipped for NFSCLIENT mode"
-        return
-    fi
-
-    [ x$MIB = x ] &&
-        { skip_env "MIB not found" && return; }
-
-    print_opts MIB mib_THREADS mib_xferSize mib_xferLimit mib_timeLimit MACHINEFILE
-
-    local testdir=$DIR/d0.mib
-    mkdir -p $testdir
-    # mpi_run uses mpiuser
-    chmod 0777 $testdir
-    $LFS setstripe $testdir -c -1 ||
-        { error "setstripe failed" && return 2; }
-    #
-    # -I    Show intermediate values in output
-    # -H    Show headers in output
-    # -L    Do not issue new system calls after this many seconds
-    # -s    Use system calls of this size
-    # -t    test dir
-    # -l    Issue no more than this many system calls
-    local cmd="$MIB -t $testdir -s $mib_xferSize -l $mib_xferLimit -L $mib_timeLimit -HI -p mib.$(date +%Y%m%d%H%M%S)"
-
-    echo "+ $cmd"
-    # find out if we need to use srun by checking $SRUN_PARTITION
-    if [ "$SRUN_PARTITION" ]; then
-        $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
-            -n $((num_clients * mib_THREADS)) -p $SRUN_PARTITION -- $cmd
-    else
-        mpi_run -np $((num_clients * mib_THREADS)) -machinefile ${MACHINEFILE} $cmd
-    fi
-
-    local rc=$?
-    if [ $rc != 0 ] ; then
-        error "mib failed! $rc"
-    fi
-    rm -rf $testdir
+    run_mib
  }
  run_test mib "mib"
  
  test_cascading_rw() {
-    if [ "$NFSCLIENT" ]; then
-        skip "skipped for NFSCLIENT mode"
-        return
-    fi
-
-    [ x$CASC_RW = x ] &&
-        { skip_env "cascading_rw not found" && return; }
-
-    # FIXME
-    # Need space estimation here.
-
-    print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE
-
-    local testdir=$DIR/d0.cascading_rw
-    mkdir -p $testdir
-    # mpi_run uses mpiuser
-    chmod 0777 $testdir
-
-    # -g: debug mode
-    # -n: repeat test # times
-
-    local cmd="$CASC_RW -g -d $testdir -n $casc_REP"
-
-    echo "+ $cmd"
-    mpi_run -np $((num_clients * $casc_THREADS)) -machinefile ${MACHINEFILE} $cmd
-
-    local rc=$?
-    if [ $rc != 0 ] ; then
-        error "cascading_rw failed! $rc"
-    fi
-    rm -rf $testdir
+    run_cascading_rw
  }
  run_test cascading_rw "cascading_rw"
  
  test_write_append_truncate() {
-    if [ "$NFSCLIENT" ]; then
-        skip "skipped for NFSCLIENT mode"
-        return
-    fi
-
-    # location is lustre/tests dir
-    if ! which write_append_truncate > /dev/null 2>&1 ; then
-        skip_env "write_append_truncate not found"
-        return
-    fi
-
-    # FIXME
-    # Need space estimation here.
-
-    local testdir=$DIR/d0.write_append_truncate
-    local file=$testdir/f0.wat
-
-    print_opts clients write_REP write_THREADS MACHINEFILE
-
-    mkdir -p $testdir
-    # mpi_run uses mpiuser
-    chmod 0777 $testdir
-
-    local cmd="write_append_truncate -n $write_REP $file"
-
-    echo "+ $cmd"
-    mpi_run -np $((num_clients * $write_THREADS)) -machinefile ${MACHINEFILE} $cmd
-
-    local rc=$?
-    if [ $rc != 0 ] ; then
-        error "write_append_truncate failed! $rc"
-        return $rc
-    fi
-    rm -rf $testdir
+    run_write_append_truncate
  }
  run_test write_append_truncate "write_append_truncate"
  
  test_write_disjoint() {
-    if [ "$NFSCLIENT" ]; then
-        skip "skipped for NFSCLIENT mode"
-        return
-    fi
-
-    [ x$WRITE_DISJOINT = x ] &&
-        { skip_env "write_disjoint not found" && return; }
-
-    # FIXME
-    # Need space estimation here.
-
-    print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP MACHINEFILE
-    local testdir=$DIR/d0.write_disjoint
-    mkdir -p $testdir
-    # mpi_run uses mpiuser
-    chmod 0777 $testdir
-
-    local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP"
-
-    echo "+ $cmd"
-    mpi_run -np $((num_clients * $wdisjoint_THREADS)) -machinefile ${MACHINEFILE} $cmd
-
-    local rc=$?
-    if [ $rc != 0 ] ; then
-        error "write_disjoint failed! $rc"
-    fi
-    rm -rf $testdir
+    run_write_disjoint
  }
  run_test write_disjoint "write_disjoint"
  
  test_parallel_grouplock() {
-    if [ "$NFSCLIENT" ]; then
-        skip "skipped for NFSCLIENT mode"
-        return
-    fi
-
-    [ x$PARALLEL_GROUPLOCK = x ] &&
-        { skip "PARALLEL_GROUPLOCK not found" && return; }
-
-    print_opts clients parallel_grouplock_MINTASKS MACHINEFILE
-
-    local testdir=$DIR/d0.parallel_grouplock
-    mkdir -p $testdir
-    # mpi_run uses mpiuser
-    chmod 0777 $testdir
-
-    do_nodes $clients "lctl set_param llite.*.max_rw_chunk=0" ||
-        error "set_param max_rw_chunk=0 failed "
-
-    local cmd
-    local status=0
-    local subtest
-    for i in $(seq 12); do
-        subtest="-t $i"
-        local cmd="$PARALLEL_GROUPLOCK -g -v -d $testdir $subtest"
-        echo "+ $cmd"
-
-        mpi_run -np $parallel_grouplock_MINTASKS -machinefile ${MACHINEFILE} $cmd
-        local rc=$?
-        if [ $rc != 0 ] ; then
-            error_noexit "parallel_grouplock subtests $subtest failed! $rc"
-        else
-            echo "parallel_grouplock subtests $subtest PASS"
-        fi
-        let status=$((status + rc))
-        # clear debug to collect one log per one test
-        do_nodes $(comma_list $(nodes_list)) lctl clear
-     done
-    [ $status -eq 0 ] || error "parallel_grouplock status: $status"
-    rm -rf $testdir
+    run_parallel_grouplock
  }
  run_test parallel_grouplock "parallel_grouplock"
  
  statahead_NUMMNTPTS=${statahead_NUMMNTPTS:-5}
  statahead_NUMFILES=${statahead_NUMFILES:-500000}
  
-cleanup_statahead () {
-    trap 0
-
-    local clients=$1
-    local mntpt_root=$2
-    local num_mntpts=$3
-
-    for i in $(seq 0 $num_mntpts);do
-        zconf_umount_clients $clients ${mntpt_root}$i ||
-            error_exit "Failed to umount lustre on ${mntpt_root}$i"
-    done
-}
-
  test_statahead () {
-    if [[ -n $NFSCLIENT ]]; then
-        skip "Statahead testing is not supported on NFS clients."
-        return 0
-    fi
-
-    [ x$MDSRATE = x ] &&
-        { skip_env "mdsrate not found" && return; }
-
-    print_opts MDSRATE clients statahead_NUMMNTPTS statahead_NUMFILES
-
-    # create large dir
-
-    # do not use default "d[0-9]*" dir name
-    # to avoid of rm $statahead_NUMFILES (500k) files in t-f cleanup
-    local dir=dstatahead
-    local testdir=$DIR/$dir
-
-    # cleanup only if dir exists
-    # cleanup only $statahead_NUMFILES number of files
-    # ignore the other files created by someone else
-    [ -d $testdir ] &&
-        mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE $statahead_NUMFILES $testdir 'f%%d' --ignore
-
-    mkdir -p $testdir
-    # mpi_run uses mpiuser
-    chmod 0777 $testdir
-
-    local num_files=$statahead_NUMFILES
-
-    local IFree=$(inodes_available)
-    if [ $IFree -lt $num_files ]; then
-      num_files=$IFree
-    fi
-
-    cancel_lru_locks mdc
-
-    local cmd="${MDSRATE} ${MDSRATE_DEBUG} --mknod --dir $testdir --nfiles $num_files --filefmt 'f%%d'"
-    echo "+ $cmd"
-
-    mpi_run -np $((num_clients * 32)) -machinefile ${MACHINEFILE} $cmd
-
-    local rc=$?
-    if [ $rc != 0 ] ; then
-        error "mdsrate failed to create $rc"
-        return $rc
-    fi
-
-    local num_mntpts=$statahead_NUMMNTPTS
-    local mntpt_root=$TMP/mntpt/lustre
-    local mntopts=${MNTOPTSTATAHEAD:-$MOUNTOPT}
-
-    echo "Mounting $num_mntpts lustre clients starts on $clients"
-    trap "cleanup_statahead $clients $mntpt_root $num_mntpts" EXIT ERR
-    for i in $(seq 0 $num_mntpts); do
-        zconf_mount_clients $clients ${mntpt_root}$i "$mntopts" ||
-            error_exit "Failed to mount lustre on ${mntpt_root}$i on $clients"
-    done
-
-    do_rpc_nodes $clients cancel_lru_locks mdc
-
-    do_rpc_nodes $clients do_ls $mntpt_root $num_mntpts $dir
-
-    mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE $num_files $testdir 'f%%d' --ignore
-
-    # use rm instead of rmdir because of
-    # testdir could contain the files created by someone else,
-    # or by previous run where is num_files prev > num_files current
-    rm -rf $testdir
-    cleanup_statahead $clients $mntpt_root $num_mntpts
+    run_statahead
  }
-
  run_test statahead "statahead test, multiple clients"
  
  complete $(basename $0) $SECONDS