LU-4433 tests: fix mds-survey.sh to support multiple MDTs

[fs/lustre-release.git] / lustre / tests / functions.sh
diff --git a/lustre/tests/functions.sh b/lustre/tests/functions.sh

index b176b7b..c76ba56 100644 (file)
--- a/lustre/tests/functions.sh
+++ b/lustre/tests/functions.sh
@@ -194,11 +194,16 @@ mpi_run () {
  }
  
  nids_list () {
-   local list
-   for i in ${1//,/ }; do
-       list="$list $i@$NETTYPE"
-   done
-   echo $list
+       local list
+       local escape="$2"
+       for i in ${1//,/ }; do
+               if [ "$list" = "" ]; then
+                       list="$i@$NETTYPE"
+               else
+                       list="$list$escape $i@$NETTYPE"
+               fi
+       done
+       echo $list
  }
  
  # FIXME: all setup/cleanup can be done without rpc.sh
@@ -209,10 +214,10 @@ lst_end_session () {
      export LST_SESSION=`$LST show_session 2>/dev/null | awk -F " " '{print $5}'`
      [ "$LST_SESSION" == "" ] && return
  
+       $LST stop b
      if $verbose; then
          $LST show_error c s
      fi
-    $LST stop b
      $LST end_session
  }
  
@@ -254,6 +259,19 @@ short_hostname() {
    echo $(sed 's/\..*//' <<< $1)
  }
  
+###
+# short_nodename
+#
+# Find remote nodename, stripped of any domain, etc.
+# 'hostname -s' is easy, but not implemented on all systems
+short_nodename() {
+       local rname=$(do_node $1 "uname -n" || echo -1)
+       if [[ "$rname" = "-1" ]]; then
+               rname=$1
+       fi
+       echo $(short_hostname $rname)
+}
+
  print_opts () {
      local var
  
@@ -267,18 +285,15 @@ print_opts () {
  }
  
  run_compilebench() {
+       # Space estimation:
+       # compile dir kernel-0  ~1GB
+       # required space        ~1GB * cbench_IDIRS
  
-# Takes:
-# 5 min * cbench_RUNS
-#        SLOW=no     10 mins
-#        SLOW=yes    50 mins
-# Space estimation:
-#        compile dir kernel-1 680MB
-#        required space       680MB * cbench_IDIRS = ~7 Gb
+       local dir=${1:-$DIR}
  
      cbench_DIR=${cbench_DIR:-""}
-    cbench_IDIRS=${cbench_IDIRS:-4}
-    cbench_RUNS=${cbench_RUNS:-4}
+    cbench_IDIRS=${cbench_IDIRS:-2}
+    cbench_RUNS=${cbench_RUNS:-2}
  
      print_opts cbench_DIR cbench_IDIRS cbench_RUNS
  
@@ -288,19 +303,21 @@ run_compilebench() {
      [ -e $cbench_DIR/compilebench ] || \
          { skip_env "No compilebench build" && return; }
  
-    local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
-    if [ $space -le $((680 * 1024 * cbench_IDIRS)) ]; then
-        cbench_IDIRS=$(( space / 680 / 1024))
-        [ $cbench_IDIRS = 0 ] && \
-            skip_env "Need free space atleast 680 Mb, have $space" && return
+       local space=$(df -P $dir | tail -n 1 | awk '{ print $4 }')
+       if [[ $space -le $((1024 * 1024 * cbench_IDIRS)) ]]; then
+               cbench_IDIRS=$((space / 1024 / 1024))
+               [[ $cbench_IDIRS -eq 0 ]] &&
+                       skip_env "Need free space at least 1GB, have $space" &&
+                       return
+
+               echo "free space=$space, reducing initial dirs to $cbench_IDIRS"
+       fi
  
-        log free space=$space, reducing initial dirs to $cbench_IDIRS
-    fi
      # FIXME:
      # t-f _base needs to be modifyed to set properly tdir
      # for new "test_foo" functions names
      # local testdir=$DIR/$tdir
-    local testdir=$DIR/d0.compilebench
+    local testdir=$dir/d0.compilebench
      mkdir -p $testdir
  
      local savePWD=$PWD
@@ -325,6 +342,7 @@ run_metabench() {
      mbench_NFILES=${mbench_NFILES:-30400}
      # threads per client
      mbench_THREADS=${mbench_THREADS:-4}
+       mbench_OPTIONS=${mbench_OPTIONS:-}
  
      [ x$METABENCH = x ] &&
          { skip_env "metabench not found" && return; }
@@ -343,17 +361,18 @@ run_metabench() {
      # -S             Run the file stat tests.
      # -c nfile       Number of files to be used in each test.
      # -k             Cleanup.  Remove the test directories.
-    local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S -k"
+       local cmd="$METABENCH -w $testdir -c $mbench_NFILES -C -S -k $mbench_OPTIONS"
      echo "+ $cmd"
  
-    # find out if we need to use srun by checking $SRUN_PARTITION
-    if [ "$SRUN_PARTITION" ]; then
-        $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
-            -n $((num_clients * mbench_THREADS)) -p $SRUN_PARTITION -- $cmd
-    else
-        mpi_run -np $((num_clients * $mbench_THREADS)) \
-            -machinefile ${MACHINEFILE} $cmd
-    fi
+       # find out if we need to use srun by checking $SRUN_PARTITION
+       if [ "$SRUN_PARTITION" ]; then
+               $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
+                       -n $((num_clients * mbench_THREADS)) \
+                       -p $SRUN_PARTITION -- $cmd
+       else
+               mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
+                       -np $((num_clients * $mbench_THREADS)) $cmd
+       fi
  
      local rc=$?
      if [ $rc != 0 ] ; then
@@ -392,15 +411,16 @@ run_simul() {
  
      local cmd="$SIMUL -d $testdir -n $simul_REP -N $simul_REP"
  
-    echo "+ $cmd"
-    # find out if we need to use srun by checking $SRUN_PARTITION
-    if [ "$SRUN_PARTITION" ]; then
-        $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
-            -n $((num_clients * simul_THREADS)) -p $SRUN_PARTITION -- $cmd
-    else
-        mpi_run -np $((num_clients * simul_THREADS)) \
-            -machinefile ${MACHINEFILE} $cmd
-    fi
+       echo "+ $cmd"
+       # find out if we need to use srun by checking $SRUN_PARTITION
+       if [ "$SRUN_PARTITION" ]; then
+               $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
+                       -n $((num_clients * simul_THREADS)) -p $SRUN_PARTITION \
+                       -- $cmd
+       else
+               mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
+                       -np $((num_clients * simul_THREADS)) $cmd
+       fi
  
      local rc=$?
      if [ $rc != 0 ] ; then
@@ -447,15 +467,16 @@ run_mdtest() {
      local cmd="$MDTEST -d $testdir -i $mdtest_iteration -n $mdtest_nFiles"
      [ $type = "fpp" ] && cmd="$cmd -u"
  
-    echo "+ $cmd"
-    # find out if we need to use srun by checking $SRUN_PARTITION
-    if [ "$SRUN_PARTITION" ]; then
-        $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
-            -n $((num_clients * mdtest_THREADS)) -p $SRUN_PARTITION -- $cmd
-    else
-        mpi_run -np $((num_clients * mdtest_THREADS)) \
-            -machinefile ${MACHINEFILE} $cmd
-    fi
+       echo "+ $cmd"
+       # find out if we need to use srun by checking $SRUN_PARTITION
+       if [ "$SRUN_PARTITION" ]; then
+               $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
+                       -n $((num_clients * mdtest_THREADS)) \
+                       -p $SRUN_PARTITION -- $cmd
+       else
+               mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
+                       -np $((num_clients * mdtest_THREADS)) $cmd
+       fi
  
      local rc=$?
      if [ $rc != 0 ] ; then
@@ -523,35 +544,50 @@ run_connectathon() {
  }
  
  run_ior() {
-    local type=${1:="ssf"}
-
-    IOR=${IOR:-$(which IOR 2> /dev/null || true)}
-    # threads per client
-    ior_THREADS=${ior_THREADS:-2}
-    ior_iteration=${ior_iteration:-1}
-    ior_blockSize=${ior_blockSize:-6}  # GB
-    ior_xferSize=${ior_xferSize:-2m}
-    ior_type=${ior_type:-POSIX}
-    ior_DURATION=${ior_DURATION:-30}   # minutes
-
-    [ x$IOR = x ] &&
+       local type=${1:="ssf"}
+
+       IOR=${IOR:-$(which IOR 2> /dev/null || true)}
+       # threads per client
+       ior_THREADS=${ior_THREADS:-2}
+       ior_iteration=${ior_iteration:-1}
+       ior_blockSize=${ior_blockSize:-6}
+       ior_blockUnit=${ior_blockUnit:-M}   # K, M, G
+       ior_xferSize=${ior_xferSize:-1M}
+       ior_type=${ior_type:-POSIX}
+       ior_DURATION=${ior_DURATION:-30}        # minutes
+       local multiplier=1
+       case ${ior_blockUnit} in
+               [G])
+                       multiplier=$((1024 * 1024 * 1024))
+                       ;;
+               [M])
+                       multiplier=$((1024 * 1024))
+                       ;;
+               [K])
+                       multiplier=1024
+                       ;;
+               *)      error "Incorrect block unit should be one of [KMG]"
+                       ;;
+       esac
+
+       [ x$IOR = x ] &&
          { skip_env "IOR not found" && return; }
  
-    local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }')
-    local total_threads=$(( num_clients * ior_THREADS ))
-    echo "+ $ior_blockSize * 1024 * 1024 * $total_threads "
-    if [ $((space / 2)) -le \
-        $(( ior_blockSize * 1024 * 1024 * total_threads)) ]; then
-        echo "+ $space * 9/10 / 1024 / 1024 / $num_clients / $ior_THREADS"
-        ior_blockSize=$(( space /2 /1024 /1024 / num_clients / ior_THREADS ))
-        [ $ior_blockSize = 0 ] && \
-            skip_env "Need free space more than $((2 * total_threads))GB: \
-                $((total_threads *1024 *1024*2)), have $space" && return
-
-        local reduced_size="$num_clients x $ior_THREADS x $ior_blockSize"
-        echo "free space=$space, Need: $reduced_size GB"
-        echo "(blockSize reduced to $ior_blockSize Gb)"
-    fi
+       # calculate the space in bytes
+       local space=$(df -B 1 -P $DIR | tail -n 1 | awk '{ print $4 }')
+       local total_threads=$((num_clients * ior_THREADS))
+       echo "+ $ior_blockSize * $multiplier * $total_threads "
+       if [ $((space / 2)) -le \
+            $((ior_blockSize * multiplier * total_threads)) ]; then
+               ior_blockSize=$((space / 2 / multiplier / total_threads))
+               [ $ior_blockSize -eq 0 ] && \
+               skip_env "Need free space more than $((2 * total_threads)) \
+                        ${ior_blockUnit}: have $((space / multiplier))" &&
+                        return
+
+               echo "(reduced blockSize to $ior_blockSize \
+                    ${ior_blockUnit} bytes)"
+       fi
  
      print_opts IOR ior_THREADS ior_DURATION MACHINEFILE
  
@@ -566,29 +602,34 @@ run_ior() {
          $LFS setstripe $testdir -c -1 ||
              { error "setstripe failed" && return 2; }
      fi
-    #
-    # -b N  blockSize --
-    #       contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)"
-    # -o S  testFileName
-    # -t N  transferSize -- size of transfer in bytes (e.g.: 8, 4k, 2m, 1g)"
-    # -w    writeFile -- write file"
-    # -r    readFile -- read existing file"
-    # -T    maxTimeDuration -- max time in minutes to run tests"
-    # -k    keepFile -- keep testFile(s) on program exit
-
-    local cmd="$IOR -a $ior_type -b ${ior_blockSize}g -o $testdir/iorData \
-         -t $ior_xferSize -v -w -r -i $ior_iteration -T $ior_DURATION -k"
-    [ $type = "fpp" ] && cmd="$cmd -F"
-
-    echo "+ $cmd"
-    # find out if we need to use srun by checking $SRUN_PARTITION
-    if [ "$SRUN_PARTITION" ]; then
-        $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
-            -n $((num_clients * ior_THREADS)) -p $SRUN_PARTITION -- $cmd
-    else
-        mpi_run -np $((num_clients * $ior_THREADS)) \
-            -machinefile ${MACHINEFILE} $cmd
-    fi
+       #
+       # -b N  blockSize --
+       #       contiguous bytes to write per task (e.g.: 8, 4K, 2M, 1G)"
+       # -o S  testFileName
+       # -t N  transferSize -- size of transfer in bytes (e.g.: 8, 4K, 2M, 1G)"
+       # -w    writeFile -- write file"
+       # -r    readFile -- read existing file"
+       # -W    checkWrite -- check read after write"
+       # -C    reorderTasks -- changes task ordering to n+1 ordering for readback
+       # -T    maxTimeDuration -- max time in minutes to run tests"
+       # -k    keepFile -- keep testFile(s) on program exit
+
+       local cmd="$IOR -a $ior_type -b ${ior_blockSize}${ior_blockUnit} \
+               -o $testdir/iorData -t $ior_xferSize -v -C -w -r -W \
+               -i $ior_iteration -T $ior_DURATION -k"
+
+       [ $type = "fpp" ] && cmd="$cmd -F"
+
+       echo "+ $cmd"
+       # find out if we need to use srun by checking $SRUN_PARTITION
+       if [ "$SRUN_PARTITION" ]; then
+               $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
+                       -n $((num_clients * ior_THREADS)) -p $SRUN_PARTITION \
+                       -- $cmd
+       else
+               mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
+                       -np $((num_clients * $ior_THREADS)) $cmd
+       fi
  
      local rc=$?
      if [ $rc != 0 ] ; then
@@ -633,15 +674,16 @@ run_mib() {
      local cmd="$MIB -t $testdir -s $mib_xferSize -l $mib_xferLimit \
          -L $mib_timeLimit -HI -p mib.$(date +%Y%m%d%H%M%S)"
  
-    echo "+ $cmd"
-    # find out if we need to use srun by checking $SRUN_PARTITION
-    if [ "$SRUN_PARTITION" ]; then
-        $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
-            -n $((num_clients * mib_THREADS)) -p $SRUN_PARTITION -- $cmd
-    else
-        mpi_run -np $((num_clients * mib_THREADS)) \
-            -machinefile ${MACHINEFILE} $cmd
-    fi
+       echo "+ $cmd"
+       # find out if we need to use srun by checking $SRUN_PARTITION
+       if [ "$SRUN_PARTITION" ]; then
+               $SRUN $SRUN_OPTIONS -D $testdir -w $clients -N $num_clients \
+                       -n $((num_clients * mib_THREADS)) -p $SRUN_PARTITION \
+                       -- $cmd
+       else
+               mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
+                       -np $((num_clients * mib_THREADS)) $cmd
+       fi
  
      local rc=$?
      if [ $rc != 0 ] ; then
@@ -680,9 +722,9 @@ run_cascading_rw() {
  
      local cmd="$CASC_RW -g -d $testdir -n $casc_REP"
  
-    echo "+ $cmd"
-    mpi_run -np $((num_clients * $casc_THREADS)) \
-        -machinefile ${MACHINEFILE} $cmd
+       echo "+ $cmd"
+       mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
+               -np $((num_clients * $casc_THREADS)) $cmd
  
      local rc=$?
      if [ $rc != 0 ] ; then
@@ -722,9 +764,9 @@ run_write_append_truncate() {
  
      local cmd="write_append_truncate -n $write_REP $file"
  
-    echo "+ $cmd"
-    mpi_run -np $((num_clients * $write_THREADS)) \
-        -machinefile ${MACHINEFILE} $cmd
+       echo "+ $cmd"
+       mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
+               -np $((num_clients * $write_THREADS)) $cmd
  
      local rc=$?
      if [ $rc != 0 ] ; then
@@ -762,9 +804,9 @@ run_write_disjoint() {
  
      local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP"
  
-    echo "+ $cmd"
-    mpi_run -np $((num_clients * $wdisjoint_THREADS)) \
-        -machinefile ${MACHINEFILE} $cmd
+       echo "+ $cmd"
+       mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
+               -np $((num_clients * $wdisjoint_THREADS)) $cmd
  
      local rc=$?
      if [ $rc != 0 ] ; then
@@ -794,31 +836,29 @@ run_parallel_grouplock() {
      # mpi_run uses mpiuser
      chmod 0777 $testdir
  
-    do_nodes $clients "lctl set_param llite.*.max_rw_chunk=0" ||
-        error "set_param max_rw_chunk=0 failed "
-
      local cmd
      local status=0
      local subtest
-    for i in $(seq 12); do
-        subtest="-t $i"
-        local cmd="$PARALLEL_GROUPLOCK -g -v -d $testdir $subtest"
-        echo "+ $cmd"
-
-        mpi_run -np $parallel_grouplock_MINTASKS \
-            -machinefile ${MACHINEFILE} $cmd
-        local rc=$?
-        if [ $rc != 0 ] ; then
-            error_noexit "parallel_grouplock subtests $subtest failed! $rc"
-        else
-            echo "parallel_grouplock subtests $subtest PASS"
-        fi
-        let status=$((status + rc))
-        # clear debug to collect one log per one test
-        do_nodes $(comma_list $(nodes_list)) lctl clear
-     done
-    [ $status -eq 0 ] || error "parallel_grouplock status: $status"
-    rm -rf $testdir
+       for i in $(seq 12); do
+               subtest="-t $i"
+               local cmd="$PARALLEL_GROUPLOCK -g -v -d $testdir $subtest"
+               echo "+ $cmd"
+
+               mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
+                       -np $parallel_grouplock_MINTASKS $cmd
+               local rc=$?
+               if [ $rc != 0 ] ; then
+                       error_noexit "parallel_grouplock subtests $subtest " \
+                                    "failed! $rc"
+               else
+                       echo "parallel_grouplock subtests $subtest PASS"
+               fi
+               let status=$((status + rc))
+               # clear debug to collect one log per one test
+               do_nodes $(comma_list $(nodes_list)) lctl clear
+       done
+       [ $status -eq 0 ] || error "parallel_grouplock status: $status"
+       rm -rf $testdir
  }
  
  cleanup_statahead () {
@@ -881,7 +921,8 @@ run_statahead () {
      local cmd="$cmd1 $cmd2"
      echo "+ $cmd"
  
-    mpi_run -np $((num_clients * 32)) -machinefile ${MACHINEFILE} $cmd
+       mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
+               -np $((num_clients * 32)) $cmd
  
      local rc=$?
      if [ $rc != 0 ] ; then
@@ -891,7 +932,7 @@ run_statahead () {
  
      local num_mntpts=$statahead_NUMMNTPTS
      local mntpt_root=$TMP/mntpt/lustre
-    local mntopts=${MNTOPTSTATAHEAD:-$MOUNTOPT}
+    local mntopts=$MNTOPTSTATAHEAD
  
      echo "Mounting $num_mntpts lustre clients starts on $clients"
      trap "cleanup_statahead $clients $mntpt_root $num_mntpts" EXIT ERR