Whamcloud - gitweb
LU-12219 obdfilter: changes PAGE_SIZE variable
[fs/lustre-release.git] / lustre-iokit / obdfilter-survey / obdfilter-survey
index 2060542..adb5b6b 100755 (executable)
@@ -1,5 +1,4 @@
 #!/bin/bash
-
 ######################################################################
 # customize per survey
 
 # ...or...
 # echo_client instances (set 'client_names')
 # ... use 'host:name' for obd instances on other nodes.
-
 # allow these to be passed in via string...
-ost_names_str=${ost_names_str:-""}
-if [ -n "$ost_names_str" ]; then
-    declare -a ost_names
-    count=0
-    for name in $ost_names_str; do
-        ost_names[$count]=$name
-        count=$((count+1))
-    done
-else
-    ost_names=(ost{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16})
-fi
-
-#client_names=(ns8:ECHO_ns8 ns9:ECHO_ns9)
-client_names_str=${client_names_str:-""}
-if [ -n "$client_names_str" ]; then
-    # make sure we unset ost_names so that our client_names get noticed... 
-    unset ost_names
-    declare -a client_names
-    count=0
-    for name in $client_names_str; do
-        client_names[$count]=$name
-        count=$((count+1))
-    done
-fi
-
+# OR
+# one can specify only case=disk or case=network or case=netdisk through
+# command line.
+
+# Prerequisite: For "disk" case and "netdisk" case you need to have lustre setup
+#             with one or more ost's. For "network" case  you need to have all
+#             modules (those llmount.sh loades) loaded in kernel. And the
+#             'lctl dl' output must be blank.
+
+# How to run test:
+# case 1 (local disk):
+#   $ nobjhi=2 thrhi=2 size=1024 case=disk sh obdfilter-survey
+#   one can also run test with user defined targets as follows,
+#   $ nobjhi=2 thrhi=2 size=1024 targets="lustre-OST0000 lustre-OST0001 ..." sh obdfilter-survey
+# case 2 (network):
+#   $ nobjhi=2 thrhi=2 size=1024 targets="<name/ip_of_server>" case=network sh obdfilter-survey
+#   where, targets is name or ip address of system, which you want to
+#   set as server.
+# case 3 (network and disk):
+#   $ nobjhi=2 thrhi=2 size=1024 case=netdisk sh obdfilter-survey
+#   one can also run test with user defined targets as follows,
+#   $ nobjhi=2 thrhi=2 size=1024 targets="<osc_name> ..." sh obdfilter-survey
+#[ NOTE: It is advised to have automated login (passwordless entry) between server and
+#  client systems on which this test runs.]
+
+# include library
+source $(dirname $0)/iokit-libecho
+
+# The following variables can be set in the environment, or on the
+# command line
 # result file prefix (date/time + hostname makes unique)
 # NB ensure path to it exists
-rslt=${rslt:-"/tmp/obdfilter_survey_`date +%F@%R`_`uname -n`"}
-
-# lustre root (if running with own source tree)
-# lustre_root=${lustre_root:-"/my/directory/lustre"}
-
-# what tests to run (first must be write)
-tests_str=${tests_str:-""}
-if [ -n "$tests_str" ]; then
-    declare -a tests
-    count=0
-    for name in $tests_str; do
-        tests[$count]=$name
-        count=$((count+1))
-    done
-else
-    #tests=(write rewrite read reread rewrite_again)
-    tests=(write rewrite read)
-fi
+rslt_loc=${rslt_loc:-"/tmp"}
+rslt=${rslt:-"$rslt_loc/obdfilter_survey_$(date +%F@%R)_$(uname -n)"}
 
 # Set this true to check file contents
-verify=0
+verify=${verify:-0}
+
+# test targets
+targets=${targets:-""}
+# test case
+case=${case:-"disk"}
 
 # total size (MBytes) per obd instance
-# large enough to avoid cache effects 
+# large enough to avoid cache effects
 # and to make test startup/shutdown overhead insignificant
 size=${size:-16384}
 
-# record size (KBytes)
+# record size (KBytes) ( 7168 max)
 rszlo=${rszlo:-1024}
 rszhi=${rszhi:-1024}
+rszmax=${rszmax:-4096}
 
 # number of objects per OST
 nobjlo=${nobjlo:-1}
-nobjhi=${nobjhi:-512}
+#was nobjhi=${nobjhi:-512}
+nobjhi=${nobjhi:-16}
 
 # threads per OST (1024 max)
 thrlo=${thrlo:-1}
-thrhi=${thrhi:-64}
-
-# restart from here iff all are defined
-restart_rsz=
-restart_thr=1
-restart_nobj=1
-
-# machine's page size (K)
-if [ -z "$PAGE_SIZE" ]; then
-    if which python >/dev/null; then
-       PAGE_SIZE=`echo 'import resource; print resource.getpagesize()/1024;' |python`
-    fi
-fi
-PAGE_SIZE=${PAGE_SIZE:-4}
-
-# max buffer_mem (total_threads * buffer size)
-# (to avoid lctl ENOMEM problems)
-max_buffer_mem=$((1024*1024))
-
-# how to run commands on other nodes
-# You need to make this work on your cluster if you have specified
-# non-local obd instances above
-custom_remote_shell () {
-    host=$1
-    shift
-    cmds="$*"
-    here=`pwd`
-    # Hop on to the remote node, chdir to 'here' and run the given
-    # commands. One of the following will probably work.
-    ssh $host "cd $here; $cmds"
-    #rsh $host "cd $here; $cmds"
-    # we have to remove the leading `uname -n`: from pdsh output lines
-    #pdsh -w $host "cd $here; $cmds" | sed 's/^[^:]*://'
-}
-
-#####################################################################
-# leave the rest of this alone unless you know what you're doing...
-
-# binaries
-lsmod="/sbin/lsmod"
-modprobe="/sbin/modprobe"
-insmod="/sbin/insmod"
-rmmod="/sbin/rmmod"
-
-# lctl::test_brw bandwidth snapshot interval (seconds)
-snap=1
-
-
-if [ ${#tests[@]} -eq 0 -o "${tests[0]}" != "write" ]; then
-    echo "tests: ${tests[@]}"
-    echo "First test must be 'write'" 1>&2
-    exit 1
-fi
-
-rsltf="${rslt}.summary"
-workf="${rslt}.detail"
-cmdsf="${rslt}.script"
-vmstatf="${rslt}.vmstat"
-echo -n > $rsltf
-echo -n > $workf
+thrhi=${thrhi:-16}
 
-declare -a vmstatpids
-
-# hide a little trick to unset this from the command line
-if [ "$lustre_root" == " " ]; then
-    unset lustre_root
-fi
+export LC_ALL=POSIX
 
-if [ -z "$lustre_root" ]; then
-    lctl=lctl
-else
-    lctl=${lustre_root}/utils/lctl
-fi
+# End of variables
 
-remote_shell () {
-    host=$1
-    shift
-    cmds="$*"
-    if [ "$host" = "localhost" -o "$host" = `uname -n` ]; then
-       eval "$cmds"
-    else
-       custom_remote_shell $host "$cmds"
-    fi
+# create a set of objects, check there are 'n' contiguous ones and
+# echo out the first or 'ERROR'
+# parameter: 1. hostname
+#           2. device number
+#           3. number of object to be created (specified by user)
+#           4. tempfile name
+create_objects () {
+       local host=$1
+       local devno=$2
+       local nobj=$3
+       local rfile=$4
+
+       remote_shell $host $lctl --device $devno create $nobj > $rfile 2>&1
+       first=0
+       prev=0
+       count=0
+       error=0
+
+       # Count number of objects (lines containing " is object id "), and
+       # ensure that the objects numbers are sequential.
+       #
+       exec 3< $rfile
+       while read -u3 line; do
+               case "$line" in
+               ( *' is object id '* )
+               set -- $line
+               if test $(( count += 1 )) -gt 1 ; then
+                       (( $6 != prev + 1 )) && error=1
+               else
+                       first=$(( $6 + 0 ))
+               fi
+               prev=$6
+               ;;
+               esac
+       done
+       exec 3<&-
+
+       if [ $nobj -ne $count ]; then
+               echo "ERROR: $nobj != $count" >&2
+               cat $rfile >&2
+               echo "ERROR"
+       elif [ $error -ne 0 ]; then
+               echo "ERROR: non contiguous objs found" >&2
+               echo ERROR
+       else
+               echo $first
+       fi
+       return $error
 }
 
-obdecho_loaded() {
-    local host=$1
-    remote_shell $host $lsmod | grep obdecho > /dev/null 2>&1
-}
+# destroys all objects created in create_objects routine
+# parameter: 3. start obj id.
+destroy_objects () {
+       local host=$1
+       local devno=$2
+       local obj0=$3
+       local nobj=$4
+       local rfile=$5
 
-load_obdecho () {
-    local host=$1
-    if [ -z "$lustre_root" ]; then
-       remote_shell $host $modprobe obdecho
-    elif [ -f ${lustre_root}/obdecho/obdecho.ko ]; then
-       remote_shell $host $insmod ${lustre_root}/obdecho/obdecho.ko
-    else
-       remote_shell $host $insmod ${lustre_root}/obdecho/obdecho.o
-    fi
+       remote_shell $host $lctl --device $devno destroy $obj0 $nobj > $rfile 2>&1
 }
 
-unload_obdecho () {
-    local host=$1
-    remote_shell $host $rmmod obdecho
+get_stats () {
+       local rfile=$1
+
+       gawk < $rfile                                                   \
+       '/^Selected device [0-9]+$/ {                                   \
+               n = 0;                                                  \
+               next;                                                   \
+       }                                                               \
+       /error/ {                                                       \
+               n = -1;                                                 \
+               exit;                                                   \
+       }                                                               \
+       /^Total/ {                                                      \
+               next;                                                   \
+       }                                                               \
+       /^[0-9]+\/[0-9]+ Total: [0-9]+\.[0-9]+\/second$/ {              \
+               n++;                                                    \
+               v=strtonum($3);                                         \
+               if (n == 1 || v < min)                                  \
+                       min = v;                                        \
+               if (n == 1 || v > max)                                  \
+                       max = v;                                        \
+               next;                                                   \
+       }                                                               \
+       {                                                               \
+               if (n != 0) {                                           \
+                       n = -1;                                         \
+                       exit;                                           \
+               }                                                       \
+       }                                                               \
+       END {                                                           \
+               printf "%d %f %f\n", n, min, max                        \
+       }'
 }
 
-get_devno () {
-    local host=$1
-    local type=$2
-    local name=$3
-    remote_shell $host $lctl device_list | \
-       awk "{if (\$2 == \"UP\" && \$3 == \"$type\" && \$4 == \"$name\") {\
-                 print \$1; exit}}"
+get_global_stats () {
+       local rfile=$1
+
+       awk < $rfile                                                    \
+       'BEGIN {                                                        \
+               n = 0;                                                  \
+       }                                                               \
+       {                                                               \
+               n++;                                                    \
+               if (n == 1) {                                           \
+                       err = $1;                                       \
+                       min = $2;                                       \
+                       max = $3;                                       \
+               } else {                                                \
+                       if ($1 < err)                                   \
+                               err = $1;                               \
+                       if ($2 < min)                                   \
+                               min = $2;                               \
+                       if ($3 > max)                                   \
+                               max = $3;                               \
+               }                                                       \
+       }                                                               \
+       END {                                                           \
+               if (n == 0)                                             \
+                       err = 0;                                        \
+               printf "%d %f %f\n", err, min, max                      \
+       }'
 }
 
-get_ec_devno () {
-    local host=$1
-    local client_name="$2"
-    local ost_name="$3"
-    if [ -z "$client_name" ]; then
-       if [ -z "$ost_name" ]; then
-           echo "client and ost name both null" 1>&2
-           return
+# enable or disable data check.
+# parameter: 1. read/write
+testname2type () {
+       # 'x' disables data check
+       if ((verify)); then
+               x=""
+       else
+               x="x"
        fi
-       client_name=${ost_name}_echo_client
-    fi
-    ec=`get_devno $host echo_client $client_name`
-    if [ -n "$ec" ]; then
-       echo $ec $client_name 0
-       return
-    fi
-    if [ -z "$ost_name" ]; then
-       echo "no echo client and ost_name not set, client: $client_name, host: $host" 1>&2
-       return
-    fi
-    ost=`get_devno $host obdfilter $ost_name`
-    if [ -z "$ost" ]; then
-       echo "OST $ost_name not setup" 1>&2
-       return
-    fi
-    remote_shell $host "$lctl <<EOF
-        attach echo_client $client_name ${client_name}_UUID
-       setup $ost_name
-EOF"
-    ec=`get_devno $host echo_client $client_name`
-    if [ -z "$ec" ]; then
-       echo "Can't setup echo client" 1>&2
-       return
-    fi
-    echo $ec $client_name 1
+       case $1 in
+       *write*)  echo "w$x";;
+       *)        echo "r$x";;
+       esac
 }
 
-teardown_ec_devno () {
-    local host=$1
-    local client_name=$2
-    remote_shell $host "$lctl <<EOF
-       cfg $client_name
-       cleanup
-       detach
-EOF"
+# for "echo_client + obdfilter" case, "prep + commit" mode should be used
+# for "echo_client + osc" case, "BRW" mode should be used
+testcase2mode() {
+       case $case in
+       disk)   echo "p";;
+       *)      echo "g";;
+       esac
 }
 
-create_objects () {
-    # create a set of objects, check there are 'n' contiguous ones and
-    # return the first or 'ERROR'
-    local host=$1
-    local devno=$2
-    local nobj=$3
-    local rfile=$4
-    remote_shell $host $lctl --device $devno create $nobj > $rfile 2>&1
-    first=0
-    prev=0
-    count=0
-    error=0
-    while read line; do
-        echo "$line" | grep -q 'is object id'
-        if [ $?  -ne 0 ]; then
-            continue
-        fi
-        if [ $first -eq 0 ]; then
-            first=$(echo $line | awk '{print $6}')
-            first=$(printf "%d" $first)
-            prev=$first
-            count=1
-        else
-            obj=$(echo $line | awk '{print $6}') 
-            obj=$(printf "%d" $obj) 
-            diff=$((obj - (prev+1))) 
-            if [ $diff -ne 0 ]; then 
-                error=1 
-            fi 
-            prev=$obj 
-            count=$((count+1)) 
-        fi 
-    done < $rfile 
-    if [ $nobj -ne $count ]; then 
-        echo "ERROR: $nobj != $count" >&2 
-        cat $rfile >&2 
-        echo "ERROR" 
-    elif [ $error -ne 0 ]; then 
-        echo "ERROR: non contiguous objs found" >&2 
-        echo "ERROR" 
-    else 
-        echo $first 
-    fi
+print_summary () {
+       if [ "$1" = "-n" ]; then
+               minusn=$1; shift
+       else
+               minusn=""
+       fi
+       echo $minusn "$*" >> $rsltf
+       echo $minusn "$*"
 }
 
-destroy_objects () {
-    local host=$1
-    local devno=$2
-    local obj0=$3
-    local nobj=$4
-    local rfile=$5
-    remote_shell $host $lctl --device $devno destroy $obj0 $nobj > $rfile 2>&1
+# Return a numeric version code based on a version string.  The version
+# code is useful for comparison two version strings to see which is newer.
+version_code() {
+       # split arguments like "2.3.61" into "2", "3", "61"
+       eval set -- $(tr "[:punct:]" " " <<< $*)
+       echo -n "$((($1 << 16) | ($2 << 8) | $3))"
 }
 
-get_stats () {
-    local rfile=$1
-    awk < $rfile \
-       '/^Selected device [0-9]+$/ {n = 0; next}\
-       /error/ {n = -1; exit}\
-       /^[0-9]+\/[0-9]+ Total: [0-9]+\.[0-9]+\/second$/ {n++; v=strtonum($3); \
-                                                         if (n == 1 || v < min) min = v;\
-                                                         if (n == 1 || v > max) max = v;\
-                                                         next}\
-        {if (n != 0) {n = -1; exit}}\
-       END {printf "%d %f %f\n", n, min, max}'
-}
+# inspired from the lustre_build_version routine in test-framework.sh
+get_lustre_version() {
+       local host=${1:-${unique_hosts[0]}}
+       local ver
 
-get_global_stats () {
-    local rfile=$1
-    awk < $rfile 'BEGIN {n = 0;}\
-                 {n++; if (n == 1) {err = $1; min = $2; max = $3} else\
-                                   {if ($1 < err) err = $1;\
-                                     if ($2 < min) min = $2;\
-                                    if ($3 > max) max = $3}}\
-                 END {if (n == 0) err = 0;\
-                      printf "%d %f %f\n", err, min, max}'
-}
+       ver=$(remote_shell $host "$lctl get_param -n version 2>/dev/null ||
+                               $lctl lustre_build_version 2>/dev/null ||
+                               $lctl --version 2>/dev/null | cut -d' ' -f2")
+       local lver=$(egrep -i "lustre: |version: " <<<$ver | head -n 1)
+       [ -n "$lver" ] && ver="$lver"
 
-testname2type () {
-    # 'x' disables data check
-    if ((verify)); then
-       x=""
-    else
-       x="x"
-    fi
-    case $1 in
-       *write*)  echo "w$x";;
-       *)        echo "r$x";;
-    esac
+       sed -e 's/.*: //' -e 's/^v//' -e 's/-.*//' -e 's/_/./g' <<<$ver |
+               cut -d. -f1-4
 }
 
-print_summary () {
-    if [ "$1" = "-n" ]; then
-       minusn=$1; shift
-    else
-       minusn=""
-    fi
-    echo $minusn "$*" >> $rsltf
-    echo $minusn "$*"
-}
+# Check whether the record size (KBytes) exceeds the maximum bulk I/O RPC size
+# or not.
+check_record_size() {
+       [ $(version_code $(get_lustre_version)) -lt $(version_code 2.3.61) ] &&
+               rszmax=1024
 
-unique () {
-    echo "$@" | xargs -n1 echo | sort -u
+       if [ "$rszhi" -gt "$rszmax" ]; then
+               echo "Test disk case support maximum ${rszmax}KB IO data" \
+                    "(rszhi=$rszhi is too big), please use a smaller value."
+               return 1
+       fi
+       return 0
 }
 
-split_hostname () {
-    local name=$1
-    case $name in
-    *:*) host=`echo $name | sed 's/:.*$//'`
-        name=`echo $name | sed 's/[^:]*://'`
-        ;;
-    *)   host=localhost
-        ;;
-    esac
-    echo "$host $name"
-}
+# Customisation variables
+#####################################################################
+# One can change variable values in this section as per requirements
+
+if [ -n "$targets" ]; then
+       declare -a ost_names
+       declare -a client_names
+       count=0
+       for name in $targets; do
+               if [ $case == "disk" ]; then
+               ost_names[$count]=$name
+               else
+               client_names[$count]=$name
+               fi
+               count=$((count + 1))
+       done
+fi
 
-# split out hostnames from client/ost names
-ndevs=${#client_names[@]}
-if ((ndevs != 0)); then
-    if ((${#ost_names[@]} != 0)); then
-        echo "Please specify client_names or ost_names, but not both" 1>&2
-       exit 1
-    fi
-    for ((i=0; i<ndevs;i++)); do
-       str=(`split_hostname ${client_names[$i]}`)
-       host_names[$i]=${str[0]}
-       client_names[$i]=${str[1]}
-    done
+# what tests to run (first must be write)
+tests_str=${tests_str:-""}
+if [ -n "$tests_str" ]; then
+       declare -a tests
+       count=0
+       for name in $tests_str; do
+               tests[$count]=$name
+               count=$((count + 1))
+       done
 else
-    ndevs=${#ost_names[@]}
-    if ((ndevs == 0)); then
-        echo "Please specify either client_names or ost_names" 1>&2
+       #tests=(write rewrite read reread rewrite_again)
+       tests=(write rewrite read)
+fi
+
+# restart from here iff all are defined
+restart_rsz=
+restart_thr=1
+restart_nobj=1
+
+# machine's page size (KB)
+PAGE_SIZE_KB=${PAGE_SIZE_KB:-$(( ${PAGE_SIZE:-$(getconf PAGE_SIZE)} / 1024))}
+PAGE_SIZE_KB=${PAGE_SIZE_KB:-4}
+
+snap=1
+clean_srv_OSS=0
+# Customisation variables ends here.
+#####################################################################
+# leave the rest of this alone unless you know what you're doing...
+
+# check and insert obdecho module
+if ! lsmod | grep obdecho > /dev/null; then
+       modprobe obdecho
+fi
+if [ ${#tests[@]} -eq 0 -o "${tests[0]}" != "write" ]; then
+       echo "tests: ${tests[@]}"
+       echo "First test must be 'write'" 1>&2
        exit 1
-    fi
-    for ((i=0; i<ndevs;i++)); do
-       str=(`split_hostname ${ost_names[$i]}`)
-       host_names[$i]=${str[0]}
-       ost_names[$i]=${str[1]}
-    done
 fi
 
-# get vmstat started
-# disable LNET debug and get obdecho loaded on all relevant hosts
-unique_hosts=(`unique ${host_names[@]}`)
-pidcount=0
-for host in ${unique_hosts[@]}; do
-    remote_shell $host "echo 0 > /proc/sys/lnet/debug"
-    host_vmstatf=${vmstatf}_${host}
-    echo -n > $host_vmstatf
-    remote_shell $host "vmstat 5 >> $host_vmstatf" &
-    pid=$!
-    vmstatpids[$pidcount]=$pid
-    pidcount=$((pidcount+1))
-    do_unload_obdecho[$host]=0
-    if obdecho_loaded $host; then
-        continue
-    fi
-    load_obdecho $host
-    if obdecho_loaded $host; then
-        do_unload_obdecho[$host]=1
-        continue
-        fi
-    echo "Can't load obdecho on $host" 1>&2
-    exit 1
-done
+rsltf="${rslt}.summary"
+workf="${rslt}.detail"
+cmdsf="${rslt}.script"
+vmstatf="${rslt}.vmstat"
+echo -n > $rsltf
+echo -n > $workf
 
-# get all the echo_client device numbers and names
-for ((i=0; i<ndevs; i++)); do
-    host=${host_names[$i]}
-    devno=(`get_ec_devno $host "${client_names[$i]}" "${ost_names[$i]}"`)
-    if ((${#devno[@]} != 3)); then
-        exit 1
-    fi
-    devnos[$i]=${devno[0]}
-    client_names[$i]=${devno[1]}
-    do_teardown_ec[$i]=${devno[2]}
+# hide a little trick to unset this from the command line
+if [ "$lustre_root" == " " ]; then
+       unset lustre_root
+fi
+
+if [ -z "$lustre_root" ]; then
+       lctl=lctl
+else
+       lctl=${lustre_root}/utils/lctl
+fi
+
+# split out hostnames from client/ost names
+ndevs=0
+for trgt in $targets; do
+       str=($(split_hostname $trgt))
+       host_names[$ndevs]=${str[0]}
+       client_names[$ndevs]=${str[1]}
+       ndevs=$((ndevs + 1))
 done
+if [ $case == "disk" ]; then
+       for ((i = 0; i < $ndevs; i++)); do
+               ost_names[$i]=${client_names[$i]}
+       done
+fi
+if [ $case == "netdisk" ]; then
+       if [ "$targets" ]; then
+               for ((i = 0; i < $ndevs; i++)); do
+                       setup_osc_for_remote_ost ${host_names[$i]} \
+                                                ${client_names[$i]} $i
+                       osc_name=${client_names[$i]}_osc
+                       ec_using_osc $osc_name
+                       cleanup_oscs="$cleanup_oscs $osc_name"
+               done
+       else
+               client_names_str=$($lctl dl | grep -v mdt |
+                       awk '{if ($2 == "UP" && $3 == "osc") {print $4} }')
+               count=0;
+               for name in $client_names_str; do
+                       client_names[$count]=$(echo $name | sed 's/-osc-.*$//')
+                       count=$((count + 1))
+               done
 
-for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do
-    for ((nobj=$nobjlo;nobj<=$nobjhi;nobj*=2)); do 
-       for ((thr=$thrlo;thr<=$thrhi;thr*=2)); do
-           if ((thr % nobj)); then
-               continue
-           fi
-           # restart?
-           if [ -n "$restart_rsz" -a\
-                -n "$restart_nobj" -a\
-                -n "$restart_thr" ]; then
-               if ((rsz < restart_rsz ||\
-                    (rsz == restart_rsz &&\
-                     (nobj < restart_nobj ||\
-                      (nobj == restart_nobj &&\
-                       thr < restart_thr))))); then
-                   continue;
-               fi
-           fi
-           # compute parameters
-           total_thr=$((ndevs*thr))
-           total_nobj=$((ndevs*nobj))
-           pages=$((rsz/PAGE_SIZE))
-           actual_rsz=$((pages*PAGE_SIZE))
-           count=$((size*1024/(actual_rsz*thr)))
-           actual_size=$((actual_rsz*count*thr))
-            total_size=$((actual_size*ndevs))
-           # show computed parameters
-           str=`printf 'ost %2d sz %8dK rsz %4d obj %4d thr %4d ' \
-                    $ndevs $total_size $actual_rsz $total_nobj $total_thr`
-           echo "=======================> $str" >> $workf
-           print_summary -n "$str"
-           if ((total_thr * actual_rsz > max_buffer_mem)); then
-               print_summary "Too much buffer space"
-               continue
-           fi
-           # create the objects
-           tmpf="${workf}_tmp"
-           for ((idx=0; idx < ndevs; idx++)); do
-               host=${host_names[$idx]}
-               devno=${devnos[$idx]}
-               client_name="${host}:${client_names[$idx]}"
-               echo "=============> Create $nobj on $client_name" >> $workf
-               first_obj=`create_objects $host $devno $nobj $tmpf`
-               cat $tmpf >> $workf
-               rm $tmpf
-               if [ $first_obj = "ERROR" ]; then
-                   print_summary "created object #s on $client_name not contiguous"
-                   exit 1
-               fi
-               first_objs[$idx]=$first_obj
-           done
-           # run tests
-           for test in ${tests[@]}; do
-                declare -a pidarray
-                for host in ${unique_hosts[@]}; do
-                    echo "starting run for test: $test rsz: $rsz threads: $thr objects: $nobj" >> ${vmstatf}_${host}
-                done
-               print_summary -n "$test "
-               # create per-host script files
-               for host in ${unique_hosts[@]}; do
-                   echo -n > ${cmdsf}_${host}
+               host_names_str=$($lctl dl -t | grep -v mdt |
+                       awk '{if ($2 == "UP" && $3 == "osc") {print $7} }')
+               count=0;
+               for name in $host_names_str; do
+                       host_names[$count]=$(echo $name | sed 's/@.*$//')
+                       count=$((count + 1))
                done
-               for ((idx=0; idx < ndevs; idx++)); do
-                   host=${host_names[$idx]}
-                   devno=${devnos[$idx]}
-                   tmpfi="${tmpf}_$idx"
-                   first_obj=${first_objs[$idx]}
-                   thr_per_obj=$((${thr}/${nobj}))
-                   echo >> ${cmdsf}_${host} \
-                       "$lctl > $tmpfi 2>&1 \\
-                         --threads $thr -$snap $devno \\
-                        test_brw $count `testname2type $test` q $pages ${thr_per_obj}t${first_obj} &"
-                done
-                pidcount=0
-                for host in ${unique_hosts[@]}; do
-                    echo "wait" >> ${cmdsf}_${host}
-                    pidarray[$pidcount]=0
-                    pidcount=$((pidcount+1))
-                done
-                # timed run of all the per-host script files
-                t0=`date +%s.%N`
-                pidcount=0
-                for host in ${unique_hosts[@]}; do
-                   # brutal hack to deal with a non-shared /tmp
-                   scp -q ${cmdsf}_${host} ${host}:/tmp > /dev/null
-                    remote_shell $host bash ${cmdsf}_${host} &
-                    pidarray[$pidcount]=$!
-                    pidcount=$((pidcount+1))
-                done
-                pidcount=0
-                for host in ${unique_hosts[@]}; do
-                    wait ${pidarray[$pidcount]}
-                    pidcount=$((pidcount+1))
-                done
-                #wait
-                t1=`date +%s.%N`
-                # clean up per-host script files
-                for host in ${unique_hosts[@]}; do
-                    rm ${cmdsf}_${host}
-                done
-               # compute bandwidth from total data / elapsed time
-               str=`awk "BEGIN {printf \"%7.2f \",\
-                        $total_size / (( $t1 - $t0 ) * 1024)}"`
-               print_summary -n "$str"
-               # collect/check individual OST stats
-               echo -n > $tmpf
-               for ((idx=0; idx < ndevs; idx++)); do
-                   client_name="${host_names[$idx]}:${client_names[$idx]}"
-                   tmpfi="${tmpf}_$idx"
-                   echo "=============> $test $client_name" >> $workf
-                   host="${host_names[$idx]}"
-                   scp -q ${host}:tmpfi $tmpfi > /dev/null
-                   cat $tmpfi >> $workf
-                   get_stats $tmpfi >> $tmpf
-                   rm $tmpfi
+
+               for (( i = 0; i < $count; i++ )) do
+                       setup_osc_for_remote_ost ${host_names[$i]} \
+                                                ${client_names[$i]} $i
+                       osc_name=${client_names[$i]}_osc
+                       ec_using_osc $osc_name
+                       cleanup_oscs="$cleanup_oscs $osc_name"
                done
-               # compute/display global min/max stats
-               echo "=============> $test global" >> $workf
-               cat $tmpf >> $workf
-               stats=(`get_global_stats $tmpf`)
-               rm $tmpf
-               if ((stats[0] <= 0)); then
-                   if ((stats[0] < 0)); then
-                       str=`printf "%17s " ERROR`
-                   else
-                       str=`printf "%17s " SHORT`
-                   fi
-               else
-                   str=`awk "BEGIN {printf \"[%7.2f,%7.2f] \",\
-                            (${stats[1]} * $actual_rsz)/1024,\
-                            (${stats[2]} * $actual_rsz)/1024; exit}"`
-               fi
-               print_summary -n "$str"
-           done
-           print_summary ""
-           # destroy objects we created
-           for ((idx=0; idx < ndevs; idx++)); do
-               host=${host_names[$idx]}
-               devno=${devnos[$idx]}
-               client_name="${host}:${client_names[$idx]}"
-               first_obj=${first_objs[$idx]}
-               echo "=============> Destroy $nobj on $client_name" >> $workf
-               destroy_objects $host $devno $first_obj $nobj $tmpf
-               cat $tmpf >> $workf
-               rm $tmpf
-           done
+       fi
+
+       echo_clients=$($lctl dl | grep echo_client |
+                      awk "{if (\$2 == \"UP\" && \$3 == \"echo_client\") { \
+                               print \$4} }")
+       cnt=0;
+       for name in $echo_clients; do
+               client_names[$cnt]=$name
+               host_names[$cnt]=localhost
+               cnt=$((cnt + 1))
        done
-    done
-done
+       ndevs=${#client_names[@]}
+fi
+if [ $case == "network" ]; then
+       server_nid=$targets
+       if [ -z "$server_nid" ]; then
+               echo "Specify hostname or ip-address of server"
+               exit 1;
+       fi
+       # check for obdecho module on server
+       if ! dsh $server_nid root "lsmod | grep obdecho > /dev/null"; then
+               dsh $server_nid root "modprobe obdecho"
+       fi
+       # Now do the server setup
+       setup_srv_obd $server_nid "echo_srv"
+       oss_on_srv=$(dsh $server_nid root "$lctl dl | grep OSS" |
+                    awk '{ print $4 }')
+       if [ -z $oss_on_srv ]; then
+               setup_OSS $server_nid
+               clean_srv_OSS=1
+       fi
+       if ! dsh $server_nid root "$lctl dl | grep obdecho > /dev/null 2>&1"; then
+               echo "obdecho not setup on server"
+               exit 1
+       fi
+       if ! dsh $server_nid root "$lctl dl | grep ost > /dev/null 2>&1"; then
+               echo "ost not setup on server"
+               exit 1
+       fi
+       # Now start client setup
+       osc_names_str=$($lctl dl| grep osc | grep -v mdt | grep UP || true)
+       if [ -n "$osc_names_str" ]; then
+               echo "The existing setup must be cleaned";
+               exit 0;
+       fi
+       ec_using_srv_nid $server_nid "echotmp" "echotmp_UUID"
+       client_names[0]="echotmp_ecc"
+fi
+if [ -z "$targets" ]; then
+       if [ $case == "disk" ]; then
+               get_targets
+               ndevs=${#ost_names[@]}
+       fi
+fi
+# get vmstat started
+# disable portals debug and get obdecho loaded on all relevant hosts
+unique_hosts=($(unique ${host_names[@]}))
+load_obdechos
 
-# tear down any echo clients we created
-for ((i=0; i<ndevs; i++)); do
-    host=${host_names[$i]}
-    if ((${do_teardown_ec[$i]})); then
-       teardown_ec_devno $host ${client_names[$i]}
-    fi
-done
+if [ $case == "disk" ]; then
+       check_record_size || cleanup ${PIPESTATUS[0]}
+fi
 
-# unload any obdecho modules we loaded
 pidcount=0
 for host in ${unique_hosts[@]}; do
-    remote_shell $host "killall vmstat" &
-    pid=$!
-    kill -term ${vmstatpids[$pidcount]}
-    kill -kill ${vmstatpids[$pidcount]} 2>/dev/null
-    wait $pid
-    pidcount=$((pidcount+1))
-    if ((${do_unload_obdecho[$host]})); then
-        unload_obdecho $host
-    fi
+       host_vmstatf=${vmstatf}_${host}
+       echo -n > $host_vmstatf
+       remote_shell $host "vmstat 5 >> $host_vmstatf" &> /dev/null &
+       pid=$!
+       vmstatpids[$pidcount]=$pid
+       pidcount=$((pidcount + 1))
 done
+# get all the echo_client device numbers and names
+for ((i=0; i < $ndevs; i++)); do
+       host=${host_names[$i]}
+       devno=($(get_ec_devno $host "${client_names[$i]}" "${ost_names[$i]}"))
+       if ((${#devno[@]} != 3)); then
+               cleanup 2
+       fi
+       devnos[$i]=${devno[0]}
+       client_names[$i]=${devno[1]}
+       do_teardown_ec[$i]=${devno[2]}
+done
+if (($ndevs <= 0 || ${#host_names[@]} <= 0)); then
+       echo "no devices or hosts specified"
+       cleanup 0 $clean_srv_OSS $cleanup_oscs
+fi
 
+print_summary "$(date) Obdfilter-survey for case=$case from $(hostname)"
+for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do
+       for ((nobj = $nobjlo; nobj <= $nobjhi; nobj*=2)); do
+               for ((thr = $thrlo; thr <= $thrhi; thr*=2)); do
+                       if ((thr % nobj)); then
+                               continue
+                       fi
+                       # restart?
+                       if [ -n "$restart_rsz" -a \
+                            -n "$restart_nobj" -a \
+                            -n "$restart_thr" ]; then
+                               if ((rsz < restart_rsz ||
+                                    (rsz == restart_rsz &&
+                                     (nobj < restart_nobj ||
+                                      (nobj == restart_nobj &&
+                                       thr < restart_thr))))); then
+                                       continue;
+                               fi
+                       fi
+
+                       # compute parameters
+                       total_thr=$((ndevs * thr))
+                       total_nobj=$((ndevs * nobj))
+                       pages=$(( (rsz + PAGE_SIZE_KB - 1) / PAGE_SIZE_KB))
+                       actual_rsz=$((pages * PAGE_SIZE_KB))
+                       count=$((size * 1024 / (actual_rsz * thr)))
+                       actual_size=$((actual_rsz * count * thr))
+                       total_size=$((actual_size * ndevs))
+
+                       # show computed parameters
+                       str=$(printf 'ost %2d sz %8dK rsz %4dK obj %4d thr %4d ' \
+                             $ndevs $total_size $actual_rsz $total_nobj $total_thr)
+                       echo "=======================> $str" >> $workf
+                       print_summary -n "$str"
+
+                       # create the objects
+                       tmpf="${workf}_tmp"
+                       for ((idx = 0; idx < $ndevs; idx++)); do
+                               host=${host_names[$idx]}
+                               devno=${devnos[$idx]}
+                               client_name="${host}:${client_names[$idx]}"
+                               echo "=============> Create $nobj on $client_name" >> $workf
+                               first_obj=$(create_objects $host $devno $nobj $tmpf)
+                               cat $tmpf >> $workf
+                               rm $tmpf
+                               if [ $first_obj = "ERROR" ]; then
+                                       print_summary "created object #s on $client_name not contiguous"
+                                       cleanup 3
+                               fi
+                               first_objs[$idx]=$first_obj
+                       done # $ndevs
+
+                       # run tests
+                       for test in ${tests[@]}; do
+                               declare -a pidarray
+                               for host in ${unique_hosts[@]}; do
+                                       remote_shell $host \
+                                           "lctl set_param -n osd*.*.force_sync 1 &>/dev/null || true"
+                                       echo "starting run for test: $test rsz: $rsz " \
+                                       "threads: $thr objects: $nobj" >> ${vmstatf}_${host}
+                               done
+                               print_summary -n "$test "
+
+                               # create per-host script files
+                               for host in ${unique_hosts[@]}; do
+                                       echo -n > ${cmdsf}_${host}
+                               done
+                               for ((idx = 0; idx < $ndevs; idx++)); do
+                                       host=${host_names[$idx]}
+                                       devno=${devnos[$idx]}
+                                       tmpfi="${tmpf}_$idx"
+                                       first_obj=${first_objs[$idx]}
+                                       thr_per_obj=$((${thr}/${nobj}))
+                                       echo >> ${cmdsf}_${host} \
+                                       "$lctl > $tmpfi 2>&1 \\
+                                       --threads $thr -$snap $devno \\
+                                       test_brw $count $(testname2type $test) q $pages \\
+                                       ${thr_per_obj}t${first_obj} $(testcase2mode)$pages &"
+                               done # $ndevs
+                               pidcount=0
+                               for host in ${unique_hosts[@]}; do
+                                       echo "wait" >> ${cmdsf}_${host}
+                                       pidarray[$pidcount]=0
+                                       pidcount=$((pidcount + 1))
+                               done
+                               # timed run of all the per-host script files
+                               t0=$(date +%s.%N)
+                               pidcount=0
+                               for host in ${unique_hosts[@]}; do
+                                       remote_shell $host bash < ${cmdsf}_${host} &
+                                       pidarray[$pidcount]=$!
+                                       pidcount=$((pidcount + 1))
+                               done
+                               pidcount=0
+                               for host in ${unique_hosts[@]}; do
+                                       wait ${pidarray[$pidcount]}
+                                       pidcount=$((pidcount + 1))
+                               done
+                               #wait
+                               t1=$(date +%s.%N)
+                               # clean up per-host script files
+                               for host in ${unique_hosts[@]}; do
+                                       rm ${cmdsf}_${host}
+                               done
+
+                               # compute bandwidth from total data / elapsed time
+                               str=$(awk "BEGIN {printf \"%7.2f \",\
+                               $total_size / (( $t1 - $t0 ) * 1024)}")
+                               print_summary -n "$str"
+                               # collect/check individual OST stats
+                               echo -n > $tmpf
+                               for ((idx = 0; idx < $ndevs; idx++)); do
+                                       client_name="${host_names[$idx]}:${client_names[$idx]}"
+                                       tmpfi="${tmpf}_$idx"
+                                       echo "=============> $test $client_name" >> $workf
+                                       host="${host_names[$idx]}"
+                                       remote_shell $host cat $tmpfi > ${tmpfi}_local
+                                       cat ${tmpfi}_local >> $workf
+                                       get_stats ${tmpfi}_local >> $tmpf
+                                       rm -f $tmpfi ${tmpfi}_local
+                               done # $ndevs
+
+                               # compute/display global min/max stats
+                               echo "=============> $test global" >> $workf
+                               cat $tmpf >> $workf
+                               stats=($(get_global_stats $tmpf))
+                               rm $tmpf
+                               if ((stats[0] <= 0)); then
+                                       if ((stats[0] < 0)); then
+                                               str=$(printf "%17s " ERROR)
+                                       else
+                                               str=$(printf "%17s " SHORT)
+                                       fi
+                               else
+                                       str=$(awk "BEGIN {printf \"[%7.2f, %7.2f] \",\
+                                       (${stats[1]} * $actual_rsz)/1024,\
+                                       (${stats[2]} * $actual_rsz)/1024; exit}")
+                               fi
+                               print_summary -n "$str"
+                       done # $tests[]
+                       print_summary ""
+
+                       # destroy objects we created
+                       for ((idx = 0; idx < $ndevs; idx++)); do
+                               host=${host_names[$idx]}
+                               devno=${devnos[$idx]}
+                               client_name="${host}:${client_names[$idx]}"
+                               first_obj=${first_objs[$idx]}
+                               echo "=============> Destroy $nobj on $client_name" >> $workf
+                               destroy_objects $host $devno $first_obj $nobj $tmpf
+                               cat $tmpf >> $workf
+                               rm $tmpf
+                       done # $ndevs
+               done # $thr
+       done # $nobj
+done # $rsz
+cleanup 0 $clean_srv_OSS $cleanup_oscs
 exit 0