LU-952 quota: follow locking order of quota code

[fs/lustre-release.git] / lustre / tests / parallel-scale.sh
diff --git a/lustre/tests/parallel-scale.sh b/lustre/tests/parallel-scale.sh

index a7613e0..34f320e 100644 (file)
--- a/lustre/tests/parallel-scale.sh
+++ b/lustre/tests/parallel-scale.sh
@@ -6,11 +6,19 @@ LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
  . $LUSTRE/tests/test-framework.sh
  init_test_env $@
  . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
+init_logging
  
-#              bug 20670 
-ALWAYS_EXCEPT="parallel_grouplock  $PARALLEL_SCALE_EXCEPT"
+#              bug 20670
+ALWAYS_EXCEPT="parallel_grouplock $PARALLEL_SCALE_EXCEPT"
  
+# common setup
  #
+MACHINEFILE=${MACHINEFILE:-$TMP/$(basename $0 .sh).machines}
+clients=${CLIENTS:-$HOSTNAME}
+generate_machine_file $clients $MACHINEFILE || error "Failed to generate machine file"
+num_clients=$(get_node_count ${clients//,/ })
+
+
  # compilbench
  #
  cbench_DIR=${cbench_DIR:-""}
@@ -28,7 +36,6 @@ fi
  METABENCH=${METABENCH:-$(which metabench 2> /dev/null || true)}
  mbench_NFILES=${mbench_NFILES:-30400}
  [ "$SLOW" = "no" ] && mbench_NFILES=10000
-MACHINEFILE=${MACHINEFILE:-$TMP/$(basename $0 .sh).machines}
  # threads per client
  mbench_THREADS=${mbench_THREADS:-4}
  
@@ -42,6 +49,27 @@ simul_REP=${simul_REP:-20}
  [ "$SLOW" = "no" ] && simul_REP=2
  
  #
+# mib
+#
+MIB=${MIB:=$(which mib 2> /dev/null || true)}
+# threads per client
+mib_THREADS=${mib_THREADS:-2}
+mib_xferSize=${mib_xferSize:-1m}
+mib_xferLimit=${mib_xferLimit:-5000}
+mib_timeLimit=${mib_timeLimit:-300}
+
+#
+# MDTEST
+#
+MDTEST=${MDTEST:=$(which mdtest 2> /dev/null || true)}
+# threads per client
+mdtest_THREADS=${mdtest_THREADS:-2}
+mdtest_nFiles=${mdtest_nFiles:-"100000"}
+# We devide the files by number of core
+mdtest_nFiles=$((mdtest_nFiles/mdtest_THREADS/num_clients))
+mdtest_iteration=${mdtest_iteration:-1}
+
+#
  # connectathon
  #
  cnt_DIR=${cnt_DIR:-""}
@@ -63,7 +91,10 @@ casc_REP=${casc_REP:-300}
  IOR=${IOR:-$(which IOR 2> /dev/null || true)}
  # threads per client
  ior_THREADS=${ior_THREADS:-2}
+ior_iteration=${ior_iteration:-1}
  ior_blockSize=${ior_blockSize:-6}      # Gb
+ior_xferSize=${ior_xferSize:-2m}
+ior_type=${ior_type:-POSIX}
  ior_DURATION=${ior_DURATION:-30}       # minutes
  [ "$SLOW" = "no" ] && ior_DURATION=5
  
@@ -98,6 +129,7 @@ get_mpiuser_id $MPI_USER
  MPI_RUNAS=${MPI_RUNAS:-"runas -u $MPI_USER_UID -g $MPI_USER_GID"}
  $GSS_KRB5 && refresh_krb5_tgt $MPI_USER_UID $MPI_USER_GID $MPI_RUNAS
  
+
  print_opts () {
      local var
  
@@ -143,7 +175,7 @@ test_compilebench() {
      mkdir -p $testdir
  
      local savePWD=$PWD
-    cd $cbench_DIR 
+    cd $cbench_DIR
      local cmd="./compilebench -D $testdir -i $cbench_IDIRS -r $cbench_RUNS --makej"
  
      log "$cmd"
@@ -151,7 +183,7 @@ test_compilebench() {
      local rc=0
      eval $cmd
      rc=$?
-        
+
      cd $savePWD
      [ $rc = 0 ] || error "compilebench failed: $rc"
      rm -rf $testdir
@@ -162,17 +194,9 @@ test_metabench() {
      [ x$METABENCH = x ] &&
          { skip_env "metabench not found" && return; }
  
-    local clients=$CLIENTS
-    [ -z $clients ] && clients=$(hostname)
-
-    num_clients=$(get_node_count ${clients//,/ })
-
      # FIXME
      # Need space estimation here.
  
-    generate_machine_file $clients $MACHINEFILE || \
-        error "can not generate machinefile $MACHINEFILE"
-
      print_opts METABENCH clients mbench_NFILES mbench_THREADS
  
      local testdir=$DIR/d0.metabench
@@ -186,7 +210,15 @@ test_metabench() {
      # -k             Cleanup.  Remove the test directories.
      local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S -k"
      echo "+ $cmd"
-    mpi_run -np $((num_clients * $mbench_THREADS)) -machinefile ${MACHINEFILE} $cmd
+
+    # find out if we need to use srun by checking $SRUN_PARTITION
+    if [ "$SRUN_PARTITION" ]; then
+        $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
+            -n $((num_clients * mbench_THREADS)) -p $SRUN_PARTITION -- $cmd
+    else
+        mpi_run -np $((num_clients * $mbench_THREADS)) -machinefile ${MACHINEFILE} $cmd
+    fi
+
      local rc=$?
      if [ $rc != 0 ] ; then
          error "metabench failed! $rc"
@@ -196,20 +228,17 @@ test_metabench() {
  run_test metabench "metabench"
  
  test_simul() {
+    if [ "$NFSCLIENT" ]; then
+        skip "skipped for NFSCLIENT mode"
+        return
+    fi
+
      [ x$SIMUL = x ] &&
          { skip_env "simul not found" && return; }
  
-    local clients=$CLIENTS
-    [ -z $clients ] && clients=$(hostname)
-
-    local num_clients=$(get_node_count ${clients//,/ })
-
      # FIXME
      # Need space estimation here.
  
-    generate_machine_file $clients $MACHINEFILE || \
-        error "can not generate machinefile $MACHINEFILE"
-
      print_opts SIMUL clients simul_REP simul_THREADS
  
      local testdir=$DIR/d0.simul
@@ -223,7 +252,13 @@ test_simul() {
      local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP"
  
      echo "+ $cmd"
-    mpi_run -np $((num_clients * $simul_THREADS)) -machinefile ${MACHINEFILE} $cmd
+    # find out if we need to use srun by checking $SRUN_PARTITION
+    if [ "$SRUN_PARTITION" ]; then
+        $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
+            -n $((num_clients * simul_THREADS)) -p $SRUN_PARTITION -- $cmd
+    else
+        mpi_run -np $((num_clients * simul_THREADS)) -machinefile ${MACHINEFILE} $cmd
+    fi
  
      local rc=$?
      if [ $rc != 0 ] ; then
@@ -233,6 +268,61 @@ test_simul() {
  }
  run_test simul "simul"
  
+test_mdtest() {
+    local type=${1:-"ssf"}
+
+    if [ "$NFSCLIENT" ]; then
+        skip "skipped for NFSCLIENT mode"
+        return
+    fi
+
+    [ x$MDTEST = x ] &&
+        { skip_env "mdtest not found" && return; }
+
+    # FIXME
+    # Need space estimation here.
+
+    print_opts MDTEST mdtest_iteration mdtest_THREADS mdtest_nFiles
+
+    local testdir=$DIR/d0.mdtest
+    mkdir -p $testdir
+    # mpi_run uses mpiuser
+    chmod 0777 $testdir
+
+    # -i # : repeat each test # times
+    # -d   : test dir
+    # -n # : number of file/dir to create/stat/remove
+    # -u   : each process create/stat/remove individually
+
+    local cmd="$MDTEST -d $testdir -i $mdtest_iteration -n $mdtest_nFiles"
+    [ $type = "fpp" ] && cmd="$cmd -u"
+
+    echo "+ $cmd"
+    # find out if we need to use srun by checking $SRUN_PARTITION
+    if [ "$SRUN_PARTITION" ]; then
+        $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
+            -n $((num_clients * mdtest_THREADS)) -p $SRUN_PARTITION -- $cmd
+    else
+        mpi_run -np $((num_clients * mdtest_THREADS)) -machinefile ${MACHINEFILE} $cmd
+    fi
+
+    local rc=$?
+    if [ $rc != 0 ] ; then
+        error "mdtest failed! $rc"
+    fi
+    rm -rf $testdir
+}
+
+test_mdtestssf() {
+    test_mdtest "ssf"
+}
+run_test mdtestssf "mdtestssf"
+
+test_mdtestfpp() {
+    test_mdtest "fpp"
+}
+run_test mdtestfpp "mdtestfpp"
+
  test_connectathon() {
      print_opts cnt_DIR cnt_NRUN
  
@@ -248,35 +338,52 @@ test_connectathon() {
      local savePWD=$PWD
      cd $cnt_DIR
  
-    # -f      a quick functionality test
-    # -a      run basic, general, special, and lock tests
+    #
+    # cthon options (must be in this order)
+    #
      # -N numpasses - will be passed to the runtests script.  This argument
      #         is optional.  It specifies the number of times to run
      #         through the tests.
-
-    local cmd="./runtests -N $cnt_NRUN -a -f $testdir"
-
-    log "$cmd"
-
-    local rc=0
-    eval $cmd
-    rc=$?
+    #
+    # One of these test types
+    #    -b  basic
+    #    -g  general
+    #    -s  special
+    #    -l  lock
+    #    -a  all of the above
+    #
+    # -f      a quick functionality test
+    #
+
+    tests="-b -g -s"
+    # Include lock tests unless we're running on nfsv4
+    local fstype=$(df -TP $testdir | awk 'NR==2  {print $2}')
+    echo "$testdir: $fstype"
+    if [[ $fstype != "nfs4" ]]; then
+        tests="$tests -l"
+    fi
+    echo "tests: $tests"
+    for test in $tests; do
+        local cmd="./runtests -N $cnt_NRUN $test -f $testdir"
+        local rc=0
+
+        log "$cmd"
+        eval $cmd
+        rc=$?
+        [ $rc = 0 ] || error "connectathon failed: $rc"
+    done
  
      cd $savePWD
-    [ $rc = 0 ] || error "connectathon failed: $rc"
      rm -rf $testdir
  }
  run_test connectathon "connectathon"
  
  test_ior() {
+    local type=${1:="ssf"}
+
      [ x$IOR = x ] &&
          { skip_env "IOR not found" && return; }
  
-    local clients=$CLIENTS
-    [ -z $clients ] && clients=$(hostname)
-
-    local num_clients=$(get_node_count ${clients//,/ })
-
      local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
      echo "+ $ior_blockSize * 1024 * 1024 * $num_clients * $ior_THREADS "
      if [ $((space / 2)) -le $(( ior_blockSize * 1024 * 1024 * num_clients * ior_THREADS)) ]; then
@@ -287,9 +394,6 @@ test_ior() {
  
          echo "free space=$space, Need: $num_clients x $ior_THREADS x $ior_blockSize Gb (blockSize reduced to $ior_blockSize Gb)"
      fi
- 
-    generate_machine_file $clients $MACHINEFILE || \
-        error "can not generate machinefile $MACHINEFILE"
  
      print_opts IOR ior_THREADS ior_DURATION MACHINEFILE
  
@@ -297,9 +401,14 @@ test_ior() {
      mkdir -p $testdir
      # mpi_run uses mpiuser
      chmod 0777 $testdir
-    $LFS setstripe $testdir -c -1
-
-    # 
+    if [ "$NFSCLIENT" ]; then
+        setstripe_nfsserver $testdir -c -1 ||
+            { error "setstripe on nfsserver failed" && return 1; }
+    else
+        $LFS setstripe $testdir -c -1 ||
+            { error "setstripe failed" && return 2; }
+    fi
+    #
      # -b N  blockSize -- contiguous bytes to write per task  (e.g.: 8, 4k, 2m, 1g)"
      # -o S  testFileName
      # -t N  transferSize -- size of transfer in bytes (e.g.: 8, 4k, 2m, 1g)"
@@ -307,10 +416,18 @@ test_ior() {
      # -r    readFile -- read existing file"
      # -T    maxTimeDuration -- max time in minutes to run tests"
      # -k    keepFile -- keep testFile(s) on program exit
-    local cmd="$IOR -a POSIX -b ${ior_blockSize}g -o $testdir/iorData -t 2m -v -w -r -T $ior_DURATION -k"
+
+    local cmd="$IOR -a $ior_type -b ${ior_blockSize}g -o $testdir/iorData -t $ior_xferSize -v -w -r -i $ior_iteration -T $ior_DURATION -k"
+    [ $type = "fpp" ] && cmd="$cmd -F"
  
      echo "+ $cmd"
-    mpi_run -np $((num_clients * $ior_THREADS)) -machinefile ${MACHINEFILE} $cmd
+    # find out if we need to use srun by checking $SRUN_PARTITION
+    if [ "$SRUN_PARTITION" ]; then
+        $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
+            -n $((num_clients * ior_THREADS)) -p $SRUN_PARTITION -- $cmd
+    else
+        mpi_run -np $((num_clients * $ior_THREADS)) -machinefile ${MACHINEFILE} $cmd
+    fi
  
      local rc=$?
      if [ $rc != 0 ] ; then
@@ -318,23 +435,72 @@ test_ior() {
      fi
      rm -rf $testdir
  }
-run_test ior "ior"
- 
+
+test_iorssf() {
+    test_ior "ssf"
+}
+run_test iorssf "iorssf"
+
+test_iorfpp() {
+    test_ior "fpp"
+}
+run_test iorfpp "iorfpp"
+
+test_mib() {
+    if [ "$NFSCLIENT" ]; then
+        skip "skipped for NFSCLIENT mode"
+        return
+    fi
+
+    [ x$MIB = x ] &&
+        { skip_env "MIB not found" && return; }
+
+    print_opts MIB mib_THREADS mib_xferSize mib_xferLimit mib_timeLimit MACHINEFILE
+
+    local testdir=$DIR/d0.mib
+    mkdir -p $testdir
+    # mpi_run uses mpiuser
+    chmod 0777 $testdir
+    $LFS setstripe $testdir -c -1 ||
+        { error "setstripe failed" && return 2; }
+    #
+    # -I    Show intermediate values in output
+    # -H    Show headers in output
+    # -L    Do not issue new system calls after this many seconds
+    # -s    Use system calls of this size
+    # -t    test dir
+    # -l    Issue no more than this many system calls
+    local cmd="$MIB -t $testdir -s $mib_xferSize -l $mib_xferLimit -L $mib_timeLimit -HI -p mib.$(date +%Y%m%d%H%M%S)"
+
+    echo "+ $cmd"
+    # find out if we need to use srun by checking $SRUN_PARTITION
+    if [ "$SRUN_PARTITION" ]; then
+        $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
+            -n $((num_clients * mib_THREADS)) -p $SRUN_PARTITION -- $cmd
+    else
+        mpi_run -np $((num_clients * mib_THREADS)) -machinefile ${MACHINEFILE} $cmd
+    fi
+
+    local rc=$?
+    if [ $rc != 0 ] ; then
+        error "mib failed! $rc"
+    fi
+    rm -rf $testdir
+}
+run_test mib "mib"
+
  test_cascading_rw() {
+    if [ "$NFSCLIENT" ]; then
+        skip "skipped for NFSCLIENT mode"
+        return
+    fi
+
      [ x$CASC_RW = x ] &&
          { skip_env "cascading_rw not found" && return; }
  
-    local clients=$CLIENTS
-    [ -z $clients ] && clients=$(hostname)
-
-    num_clients=$(get_node_count ${clients//,/ })
-
      # FIXME
      # Need space estimation here.
  
-    generate_machine_file $clients $MACHINEFILE || \
-        error "can not generate machinefile $MACHINEFILE"
-
      print_opts CASC_RW clients casc_THREADS casc_REP MACHINEFILE
  
      local testdir=$DIR/d0.cascading_rw
@@ -342,7 +508,7 @@ test_cascading_rw() {
      # mpi_run uses mpiuser
      chmod 0777 $testdir
  
-    # -g: debug mode 
+    # -g: debug mode
      # -n: repeat test # times
  
      local cmd="$CASC_RW -g -d $testdir -n $casc_REP"
@@ -359,23 +525,20 @@ test_cascading_rw() {
  run_test cascading_rw "cascading_rw"
  
  test_write_append_truncate() {
-    # location is lustre/tests dir 
+    if [ "$NFSCLIENT" ]; then
+        skip "skipped for NFSCLIENT mode"
+        return
+    fi
+
+    # location is lustre/tests dir
      if ! which write_append_truncate > /dev/null 2>&1 ; then
          skip_env "write_append_truncate not found"
          return
      fi
  
-    local clients=$CLIENTS
-    [ -z $clients ] && clients=$(hostname)
-
-    local num_clients=$(get_node_count ${clients//,/ })
-
      # FIXME
      # Need space estimation here.
  
-    generate_machine_file $clients $MACHINEFILE || \
-        error "can not generate machinefile $MACHINEFILE"
-
      local testdir=$DIR/d0.write_append_truncate
      local file=$testdir/f0.wat
  
@@ -400,20 +563,17 @@ test_write_append_truncate() {
  run_test write_append_truncate "write_append_truncate"
  
  test_write_disjoint() {
+    if [ "$NFSCLIENT" ]; then
+        skip "skipped for NFSCLIENT mode"
+        return
+    fi
+
      [ x$WRITE_DISJOINT = x ] &&
          { skip_env "write_disjoint not found" && return; }
  
-    local clients=$CLIENTS
-    [ -z $clients ] && clients=$(hostname)
-
-    local num_clients=$(get_node_count ${clients//,/ })
-
      # FIXME
      # Need space estimation here.
  
-    generate_machine_file $clients $MACHINEFILE || \
-        error "can not generate machinefile $MACHINEFILE"
-
      print_opts WRITE_DISJOINT clients wdisjoint_THREADS wdisjoint_REP MACHINEFILE
      local testdir=$DIR/d0.write_disjoint
      mkdir -p $testdir
@@ -434,17 +594,14 @@ test_write_disjoint() {
  run_test write_disjoint "write_disjoint"
  
  test_parallel_grouplock() {
+    if [ "$NFSCLIENT" ]; then
+        skip "skipped for NFSCLIENT mode"
+        return
+    fi
+
      [ x$PARALLEL_GROUPLOCK = x ] &&
          { skip "PARALLEL_GROUPLOCK not found" && return; }
  
-    local clients=$CLIENTS
-    [ -z $clients ] && clients=$(hostname)
-
-    local num_clients=$(get_node_count ${clients//,/ })
-
-    generate_machine_file $clients $MACHINEFILE || \
-        error "can not generate machinefile $MACHINEFILE"
-
      print_opts clients parallel_grouplock_MINTASKS MACHINEFILE
  
      local testdir=$DIR/d0.parallel_grouplock
@@ -479,6 +636,96 @@ test_parallel_grouplock() {
  }
  run_test parallel_grouplock "parallel_grouplock"
  
-equals_msg `basename $0`: test complete, cleaning up
+statahead_NUMMNTPTS=${statahead_NUMMNTPTS:-5}
+statahead_NUMFILES=${statahead_NUMFILES:-500000}
+
+cleanup_statahead () {
+    trap 0
+
+    local clients=$1
+    local mntpt_root=$2
+    local num_mntpts=$3
+
+    for i in $(seq 0 $num_mntpts);do
+        zconf_umount_clients $clients ${mntpt_root}$i ||
+            error_exit "Failed to umount lustre on ${mntpt_root}$i"
+    done
+}
+
+test_statahead () {
+    if [[ -n $NFSCLIENT ]]; then
+        skip "Statahead testing is not supported on NFS clients."
+        return 0
+    fi
+
+    [ x$MDSRATE = x ] &&
+        { skip_env "mdsrate not found" && return; }
+
+    print_opts MDSRATE clients statahead_NUMMNTPTS statahead_NUMFILES
+
+    # create large dir
+
+    # do not use default "d[0-9]*" dir name
+    # to avoid of rm $statahead_NUMFILES (500k) files in t-f cleanup
+    local dir=dstatahead
+    local testdir=$DIR/$dir
+
+    # cleanup only if dir exists
+    # cleanup only $statahead_NUMFILES number of files
+    # ignore the other files created by someone else
+    [ -d $testdir ] &&
+        mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE $statahead_NUMFILES $testdir 'f%%d' --ignore
+
+    mkdir -p $testdir
+    # mpi_run uses mpiuser
+    chmod 0777 $testdir
+
+    local num_files=$statahead_NUMFILES
+
+    local IFree=$(inodes_available)
+    if [ $IFree -lt $num_files ]; then
+      num_files=$IFree
+    fi
+
+    cancel_lru_locks mdc
+
+    local cmd="${MDSRATE} ${MDSRATE_DEBUG} --mknod --dir $testdir --nfiles $num_files --filefmt 'f%%d'"
+    echo "+ $cmd"
+
+    mpi_run -np $((num_clients * 32)) -machinefile ${MACHINEFILE} $cmd
+
+    local rc=$?
+    if [ $rc != 0 ] ; then
+        error "mdsrate failed to create $rc"
+        return $rc
+    fi
+
+    local num_mntpts=$statahead_NUMMNTPTS
+    local mntpt_root=$TMP/mntpt/lustre
+    local mntopts=${MNTOPTSTATAHEAD:-$MOUNTOPT}
+
+    echo "Mounting $num_mntpts lustre clients starts on $clients"
+    trap "cleanup_statahead $clients $mntpt_root $num_mntpts" EXIT ERR
+    for i in $(seq 0 $num_mntpts); do
+        zconf_mount_clients $clients ${mntpt_root}$i "$mntopts" ||
+            error_exit "Failed to mount lustre on ${mntpt_root}$i on $clients"
+    done
+
+    do_rpc_nodes $clients cancel_lru_locks mdc
+
+    do_rpc_nodes $clients do_ls $mntpt_root $num_mntpts $dir
+
+    mdsrate_cleanup $((num_clients * 32)) $MACHINEFILE $num_files $testdir 'f%%d' --ignore
+
+    # use rm instead of rmdir because of
+    # testdir could contain the files created by someone else,
+    # or by previous run where is num_files prev > num_files current
+    rm -rf $testdir
+    cleanup_statahead $clients $mntpt_root $num_mntpts
+}
+
+run_test statahead "statahead test, multiple clients"
+
+complete $(basename $0) $SECONDS
  check_and_cleanup_lustre
-[ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true
+exit_status