Whamcloud - gitweb
LU-13776 tests: make sure pjdfstest.sh writes to tmp
[fs/lustre-release.git] / lustre / tests / dom-performance.sh
index a75e849..bf5085e 100644 (file)
@@ -7,74 +7,91 @@
 set -e
 
 ONLY=${ONLY:-"$*"}
-ALWAYS_EXCEPT=""
-[ "$SLOW" = "no" ] && EXCEPT_SLOW=""
+ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"$DOM_PERFORMANCE_EXCEPT"}
 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
 
+[ "$SLOW" = "no" ] && EXCEPT_SLOW=""
+
 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
 
 . $LUSTRE/tests/test-framework.sh
-CLEANUP=${CLEANUP:-:}
-SETUP=${SETUP:-:}
+
 init_test_env $@
 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
 init_logging
 
+SAVED_FAIL_ON_ERROR=$FAIL_ON_ERROR
 FAIL_ON_ERROR=false
 
-check_and_setup_lustre
+SAVED_DEBUG=$($LCTL get_param -n debug 2> /dev/null)
 
-# $RUNAS_ID may get set incorrectly somewhere else
-if [[ $UID -eq 0 && $RUNAS_ID -eq 0 ]]; then
-       skip_env "\$RUNAS_ID set to 0, but \$UID is also 0!" && exit
-fi
-check_runas_id $RUNAS_ID $RUNAS_GID $RUNAS
 
+. $LUSTRE/tests/functions.sh
 build_test_filter
+check_and_setup_lustre
 
-DOM="yes"
-DOM_SIZE=${DOM_SIZE:-"1M"}
-OSC="mdc"
+# if MACHINEFILE set and exists -- use it
+MACHINEFILE=${MACHINEFILE:-$TMP/$(basename $0 .sh)-$(hostname).machines}
+clients=${CLIENTS:-$HOSTNAME}
+generate_machine_file $clients $MACHINEFILE ||
+       error "Failed to generate machine file"
 
-rm -rf $DIR/*
+DP_DIO=${DP_DIO:-"no"}
 
-NORM=$DIR/norm
-DOM=$DIR/dom
-STATS=${STATS:-"yes"}
+DOM_SIZE=${DOM_SIZE:-"1M"}
+DP_OSC="mdc"
 
-# 1 stripe for normal files
-mkdir -p $NORM
-lfs setstripe -c 1 $NORM
+DP_NORM=$DIR/dp_norm
+DP_DOM=$DIR/dp_dom
+DP_DOM_DNE=$DIR/dp_dne
+DP_STATS=${DP_STATS:-"no"}
 
-if [ "x$DNE" == "xyes" ] ; then
-       lfs setdirstripe -i 0 -c 2 $DOM
+if $DO_CLEANUP; then
+       rm -rf $DIR/*
 else
-       mkdir -p $DOM
+       rm -rf $DP_NORM $DP_DOM $DP_DOM_DNE
 fi
 
-lfs setstripe -E ${DOM_SIZE} -L mdt -E EOF $DOM
-
 # total number of files
-FNUM=16384
+DP_FNUM=${DP_FNUM:-16384}
 # number of threads
-NUM=4
+DP_NUM=${DP_NUM:-4}
+
+# 1 stripe for normal files
+mkdir -p $DP_NORM
+$LFS setstripe -c 2 $DP_NORM ||
+       error "Cannot create test directory for ordinary files"
+
+if [[ $MDSCOUNT -gt 1 ]] ; then
+       $LFS setdirstripe -i 0 -c $MDSCOUNT $DP_DOM_DNE ||
+               error_noexit "Cannot create striped directory"
+       $LFS setstripe -E ${DOM_SIZE} -L mdt -E EOF $DP_DOM_DNE ||
+               error_noexit "Cannot create test directory for dom files"
+fi
 
-clear_stats() {
+mkdir -p $DP_DOM
+$LFS setstripe -E ${DOM_SIZE} -L mdt -E EOF $DP_DOM ||
+       error "Cannot create test directory for dom files"
+
+dp_clear_stats() {
        local cli=$1
 
+       $LCTL set_param -n osc.*.stats=0
+       $LCTL set_param -n mdc.*.stats=0
        $LCTL set_param -n ${cli}.*.${cli}_stats=0
        $LCTL set_param -n ${cli}.*.rpc_stats=0
-       $LCTL set_param -n ${cli}.*.stats=0
        $LCTL set_param -n llite.*.read_ahead_stats=0
        $LCTL set_param -n llite.*.unstable_stats=0
 }
 
-collect_stats() {
+dp_collect_stats() {
        local cli=$1
 
        sync;sync
+       echo ----- MDC RPCs: $(calc_stats mdc.*.stats req_active)
+       echo ----- OSC RPCs: $(calc_stats osc.*.stats req_active)
 
-       if [ "x$STATS" != "xyes" ] ; then
+       if [ "x$DP_STATS" != "xyes" ] ; then
                return 0
        fi
 
@@ -90,234 +107,334 @@ collect_stats() {
        $LCTL get_param llite.*.read_ahead_stats
 }
 
-setup_test() {
+dp_setup_test() {
        local cli=$1
 
        cancel_lru_locks $cli
-       ### drop all debug
-       $LCTL set_param -n debug=0
-       clear_stats $cli
+       ### drop all debug except critical
+       $LCTL set_param -n debug="error warning console emerg"
+       dp_clear_stats $cli
 }
 
-run_cmd() {
+dp_run_cmd() {
        local cmd=$1
+       local cmdlog=$TMP/dp_cmd.log
+       local rc
 
-       setup_test $OSC
+       dp_setup_test $DP_OSC
        if ! grep -qw "$MOUNT" /proc/mounts ; then
                echo "!!!!! Lustre is not mounted !!!!!, aborting"
                return 0
        fi
 
-       echo "##### $cmd #####"
-       echo "##### $(date +'%F %H:%M:%S'): START"
-       eval $cmd
-       echo "##### $(date +'%F %H:%M:%S'): GETSTATS"
-       collect_stats $OSC
-       echo "##### $(date +'%F %H:%M:%S'): STOP"
-       remount_client $DIR
+       echo "## $cmd" | awk '{ if (NR==1) {gsub(/[ \t\r\n]+/, " "); \
+                               gsub(/\|.*$/, ""); print }}'
+       echo "## $(date +'%F %H:%M:%S'): START"
+       eval $cmd 2>&1 | tee $cmdlog || true
+
+       rc=${PIPESTATUS[0]}
+       if [ $rc -eq 0 ] && grep -q "p4_error:" $cmdlog ; then
+               rc=1
+       fi
 
+       dp_collect_stats $DP_OSC
+       remount_client $DIR > /dev/null
+       return $rc
 }
 
 run_MDtest() {
        if ! which mdtest > /dev/null 2>&1 ; then
-               echo "Mdtest is not installed, skipping"
-               return 0
+               skip_env "Mdtest is not installed, skipping"
        fi
 
        local mdtest=$(which mdtest)
 
        local TDIR=${1:-$MOUNT}
-       local th_num=$((FNUM * 2 / NUM))
+       local th_num=$((DP_FNUM * 2 / DP_NUM))
+       local bsizes="8192"
 
-       for bsize in 4096 ; do
-               run_cmd "mpirun -np $NUM $mdtest \
-                        -i 3 -I $th_num -F -z 1 -b 1 -L -u -w $bsize -d $TDIR"
+       chmod 0777 $TDIR
+
+       [ "$SLOW" = "yes" ] && bsizes="4096 32768"
+
+       for bsize in $bsizes ; do
+               dp_run_cmd "mpi_run -np $DP_NUM $mdtest -i 3 -I $th_num -F \
+                       -z 1 -b 1 -L -u -w $bsize -R -d $TDIR"
+               if [ ${PIPESTATUS[0]} != 0 ]; then
+                       error "MDtest failed, aborting"
+               fi
        done
+
        rm -rf $TDIR/*
        return 0
 }
 
-run_smalliomany() {
-       if [ ! -f createmany ] ; then
-               echo "Createmany is not installed, skipping"
-               return 0
-       fi
-
-       if [ ! -f smalliomany ] ; then
-               echo "Smalliomany is not installed, skipping"
-               return 0
-       fi
-
+run_SmallIO() {
        local TDIR=${1:-$DIR}
-       local count=$FNUM
+       local count=$DP_FNUM
 
        local MIN=$((count * 16))
-       [ $MDSSIZE -le $MIN ] && count=$((MDSSIZE / 16))
+       local mdssize=$(mdssize_from_index $TDIR 0)
+       [ $mdssize -le $MIN ] && count=$((mdssize / 16))
 
-       run_cmd "./createmany -o $TDIR/file- $count | grep 'total'"
+       dp_run_cmd "createmany -o $TDIR/file- $count | grep 'total:'"
+       if [ ${PIPESTATUS[0]} != 0 ]; then
+               error "File creation failed, aborting"
+       fi
 
-       if [ -f statmany ]; then
-               run_cmd "./statmany -s $TDIR/file- $count $((count * 5)) | \
-                       grep 'total'"
+       dp_run_cmd "statmany -s $TDIR/file- $count $((count * 5)) |
+               grep 'total:'"
+       if [ ${PIPESTATUS[0]} != 0 ]; then
+               error "File stat failed, aborting"
        fi
 
        for opc in w a r ; do
-               run_cmd "./smalliomany -${opc} $TDIR/file- $count 300 | \
-                       grep 'total'"
+               dp_run_cmd "smalliomany -${opc} $TDIR/file- $count 300 |
+                       grep 'total:'"
+               if [ ${PIPESTATUS[0]} != 0 ]; then
+                       error "SmallIO -${opc} failed, aborting"
+               fi
+
        done
 
-       run_cmd "./unlinkmany $TDIR/file- $count | grep 'total'"
+       dp_run_cmd "unlinkmany $TDIR/file- $count | grep 'total:'"
+       if [ ${PIPESTATUS[0]} != 0 ]; then
+               error "SmallIO failed, aborting"
+       fi
+
        return 0
 }
 
 run_IOR() {
        if ! which IOR > /dev/null 2>&1 ; then
-               echo "IOR is not installed, skipping"
-               return 0
+               skip_env "IOR is not installed, skipping"
        fi
 
        local IOR=$(which IOR)
-       local iter=$((FNUM / NUM))
+       local iter=$((DP_FNUM / DP_NUM))
+       local direct=""
 
-       if [ "x$DIO" == "xyes" ] ; then
+       if [ "x$DP_DIO" == "xyes" ] ; then
                direct="-B"
-       else
-               direct=""
        fi
 
        local TDIR=${1:-$MOUNT}
 
-       for bsize in 4 ; do
-               segments=$((128 / bsize))
+       chmod 0777 $TDIR
+
+       # for DoM large files (beyond the DoM size) use
+       # DOM_SIZE=1M :
+       #     bsize="4096 " - 4Mb
+       #     nsegments=$((128 * 1024))
+       # DOM_SIZE=64k :
+       #     bsize="1024 " - 1Mb
+       #     nsegments=$((32 * 1024))
+       local bsizes=${BSIZES:-"4 32"}
+       local nsegments=${NSEGMENTS:-128}
+       [ "$SLOW" = "no" ] && bsizes="8"
+
+       for bsize in $bsizes ; do
+               segments=$((nsegments / bsize))
 
-               run_cmd "mpirun -np $NUM $IOR \
+               dp_run_cmd "mpi_run -np $DP_NUM $IOR \
                        -a POSIX -b ${bsize}K -t ${bsize}K -o $TDIR/ -k \
-                       -s $segments -w -r -i $iter -F -E -z -m -Z $direct"
+                       -s $segments -w -r -i $iter -F -E -z -m -Z $direct" |
+                       awk '($1 !~ /^(write|read|access)$/) || NF>12 {print}'
+               if [ ${PIPESTATUS[0]} != 0 ]; then
+                       error "IOR write test for ${bsize}K failed, aborting"
+               fi
+
                # check READ performance only (no cache)
-               run_cmd "mpirun -np $NUM $IOR \
+               dp_run_cmd "mpi_run -np $DP_NUM $IOR \
                        -a POSIX -b ${bsize}K -t ${bsize}K -o $TDIR/ -X 42\
-                       -s $segments -r -i $iter -F -E -z -m -Z $direct"
+                       -s $segments -r -i $iter -F -E -z -m -Z $direct" |
+                       awk '($1 !~ /^(read|access|remove)$/) || NF>12 {print}'
+               if [ ${PIPESTATUS[0]} != 0 ]; then
+                       error "IOR read test for ${bsize}K failed, aborting"
+               fi
+
        done
        rm -rf $TDIR/*
        return 0
 }
 
-run_dbench() {
+run_Dbench() {
        if ! which dbench > /dev/null 2>&1 ; then
-               echo "Dbench is not installed, skipping"
-               return 0
+               skip_env "Dbench is not installed, skipping"
        fi
 
-       if [ "x$DNE" == "xyes" ] ; then
-               echo "dbench uses subdirs, skipping for DNE setup"
+       local TDIR=${1:-$MOUNT}
+
+       if [ "x$DP_DOM_DNE" == "x$TDIR" ] ; then
+               echo "dbench uses subdirs, skipping for DNE dir"
                return 0
        fi
 
-       local TDIR=${1:-$MOUNT}
+       dp_run_cmd "dbench -D $TDIR $DP_NUM | egrep -v 'warmup|execute'"
+       if [ ${PIPESTATUS[0]} != 0 ]; then
+               error "Dbench failed, aborting"
+       fi
 
-       run_cmd "dbench -D $TDIR $NUM | egrep -v 'warmup|execute'"
        rm -rf $TDIR/*
        return 0
 }
 
-run_smallfile() {
-       if ! which unzip > /dev/null 2>&1 ; then
-               echo "No unzip is installed, skipping"
-               return 0;
+run_FIO() {
+       # https://github.com/axboe/fio/archive/fio-2.8.zip
+       if ! which fio > /dev/null 2>&1 ; then
+               skip_env "No FIO installed, skipping"
        fi
 
-       if [ "x$DIO" == "xyes" ] ; then
-               echo "smallfile has no DIRECT IO mode, skipping"
-               return 0
+       local fnum=128 # per thread
+       local total=$((fnum * DP_NUM)) # files in all threads
+       local loops=$((DP_FNUM / total)) # number of loops
+       local direct=""
+       local output=""
+
+       if [ $loops -eq 0 ] ; then
+               loops=1
        fi
 
-       if [ "x$DNE" == "xyes" ] ; then
-               echo "smallfile uses subdirs, skipping for DNE setup"
-               return 0
+       if [ "x$DP_DIO" == "xyes" ] ; then
+               direct="--direct=1"
+       else
+               direct="--buffered=1 --bs_unaligned=1"
        fi
 
-       local host_set=$(hostname)
+       if [ "x$DP_STATS" != "xyes" ] ; then
+               output="--minimal"
+       fi
 
-       ### since smallfile is not installed system wide, get it right now
-       [ -f master.zip ] || \
-               wget https://github.com/bengland2/smallfile/archive/master.zip
-       unzip -uo master.zip
-       cd ./smallfile-master
+       local TDIR=${1:-$MOUNT}
+       base_cmd="fio --name=smallio --ioengine=posixaio $output \
+                 --iodepth=$((DP_NUM * 4)) --directory=$TDIR \
+                 --nrfiles=$fnum --openfiles=10000 \
+                 --numjobs=$DP_NUM --filesize=64k --lockfile=readwrite"
+
+       dp_run_cmd "$base_cmd --create_only=1" > /dev/null
+       if [ ${PIPESTATUS[0]} != 0 ]; then
+               error "FIO file creation failed, aborting"
+       fi
 
-       if ! ls ./smallfile_cli.py > /dev/null 2>&1 ; then
-               echo "No smallfile test found, skipping"
-               cd ..
-               return 0
+       local bsizes="8"
+       [ "$SLOW" = "yes" ] && bsizes="4 32"
+
+       for bsize in $bsizes ; do
+               local write_cmd="$base_cmd --bs=${bsize}k --rw=randwrite \
+                       $direct --file_service_type=random --randrepeat=1 \
+                        --norandommap --group_reporting=1 --loops=$loops"
+               if [ "x$DP_STATS" != "xyes" ] ; then
+                       dp_run_cmd "$write_cmd | awk -F\; '{printf \"WRITE: \
+                               BW %dKiB/sec, IOPS %d, lat (%d/%d/%d)usec\n\", \
+                               \$48, \$49, \$53, \$57, \$81}'"
+               else
+                       dp_run_cmd "$write_cmd"
+               fi
+               if [ ${PIPESTATUS[0]} != 0 ]; then
+                       error "FIO write test with ${bsize}k failed, aborting"
+               fi
+
+               local read_cmd="$base_cmd --bs=${bsize}k --rw=randread \
+                       $direct --file_service_type=random --randrepeat=1 \
+                        --norandommap --group_reporting=1 --loops=$loops"
+               if [ "x$DP_STATS" != "xyes" ] ; then
+                       dp_run_cmd "$read_cmd | awk -F\; '{printf \"READ : \
+                               BW %dKiB/sec, IOPS %d, lat (%d/%d/%d)usec\n\", \
+                               \$7, \$8, \$12, \$16, \$40}'"
+               else
+                       dp_run_cmd "$read_cmd"
+               fi
+               if [ ${PIPESTATUS[0]} != 0 ]; then
+                       error "FIO read test with ${bsize}k failed, aborting"
+               fi
+       done
+       rm -rf $TDIR/*
+       return 0
+}
+
+run_compbench() {
+       local compilebench
+       if [ x$cbench_DIR = x ]; then
+               compilebench=$(which compilebench 2> /dev/null)
+       else
+               cd $cbench_DIR
+               [ -x compilebench ] ||
+                       skip_env "compilebench is missing in $cbench_DIR"
+               compilebench=compilebench
        fi
 
+       [ x$compilebench != x ] ||
+               skip_env "Compilebench is not installed, skipping"
+
        local TDIR=${1:-$MOUNT}
-       local thrds=$NUM
-       local fsize=64 # in Kbytes
-       local total=$FNUM # files in test
-       local fnum=$((total / NUM))
 
-       SYNC_DIR=${MOUNT}/sync
-       mkdir -p $SYNC_DIR
+       dp_run_cmd "$compilebench -D $TDIR -i 2 -r 2 --makej"
+       if [ ${PIPESTATUS[0]} != 0 ]; then
+               error "Compilebench failed, aborting"
+       fi
 
-       SMF="./smallfile_cli.py --pause 10 --host-set $host_set \
-            --response-times Y --threads $thrds --file-size $fsize \
-            --files $fnum --top $TDIR --network-sync-dir $SYNC_DIR \
-            --file-size-distribution exponential"
+       rm -rf $TDIR/*
+}
 
-       run_cmd "$SMF --operation create"
+dp_test_run() {
+       local test=$1
+       local facets=$(get_facets MDS)
+       local nodes=$(comma_list $(mdts_nodes))
+       local p="$TMP/$TESTSUITE-$TESTNAME.parameters"
 
-       for oper in read append overwrite ; do
-               for bsize in 8 ; do
-                       run_cmd "$SMF --record-size $bsize --operation $oper"
-               done
-       done
-       run_cmd "$SMF --operation delete"
+       save_lustre_params $facets "mdt.*.dom_lock" >> $p
 
-       rm -rf $TDIR/*
-       cd ..
-       return 0
+       printf "\n##### $test: DoM files\n"
+       do_nodes $nodes "lctl set_param -n mdt.*.dom_lock=1"
+       DP_OSC="mdc"
+       run_${test} $DP_DOM
+
+       if [ -d $DP_DOM_DNE ] ; then
+               printf "\n##### $test: DoM files + DNE\n"
+               DP_OSC="mdc"
+               run_${test} $DP_DOM_DNE
+       fi
+
+       printf "\n##### $test: OST files\n"
+       DP_OSC="osc"
+       run_${test} $DP_NORM
+
+       restore_lustre_params < $p
+       rm -f $p
 }
 
 test_smallio() {
-       OSC="mdc"
-       run_smalliomany $DOM
-       OSC="osc"
-       run_smalliomany $NORM
+       dp_test_run SmallIO
 }
 run_test smallio "Performance comparision: smallio"
 
 test_mdtest() {
-       OSC="mdc"
-       run_MDtest $DOM
-       OSC="osc"
-       run_MDtest $NORM
+       dp_test_run MDtest
 }
 run_test mdtest "Performance comparision: mdtest"
 
 test_IOR() {
-       OSC="mdc"
-       run_IOR $DOM
-       OSC="osc"
-       run_IOR $NORM
+       dp_test_run IOR
 }
 run_test IOR "Performance comparision: IOR"
 
 test_dbench() {
-       OSC="mdc"
-       run_dbench $DOM
-       OSC="osc"
-       run_dbench $NORM
+       dp_test_run Dbench
 }
 run_test dbench "Performance comparision: dbench"
 
-test_smf() {
-       OSC="mdc"
-       run_smallfile $DOM
-       OSC="osc"
-       run_smallfile $NORM
+test_fio() {
+       dp_test_run FIO
+}
+run_test fio "Performance comparision: FIO"
 
+test_compbench() {
+       dp_test_run compbench
 }
-run_test smf "Performance comparision: smallfile"
+run_test compbench "Performance comparision: compilebench"
+
+FAIL_ON_ERROR=$SAVED_FAIL_ON_ERROR
+$LCTL set_param -n debug="$SAVED_DEBUG"
 
 complete $SECONDS
 check_and_cleanup_lustre