# ...or...
# echo_client instances (set 'client_names')
# ... use 'host:name' for obd instances on other nodes.
-ost_names=(ost{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16})
+
+# allow these to be passed in via string...
+ost_names_str=${ost_names_str:-""}
+if [ -n "$ost_names_str" ]; then
+ declare -a ost_names
+ count=0
+ for name in $ost_names_str; do
+ ost_names[$count]=$name
+ count=$((count+1))
+ done
+else
+ ost_names=(ost{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16})
+fi
+
#client_names=(ns8:ECHO_ns8 ns9:ECHO_ns9)
+client_names_str=${client_names_str:-""}
+if [ -n "$client_names_str" ]; then
+ # make sure we unset ost_names so that our client_names get noticed...
+ unset ost_names
+ declare -a client_names
+ count=0
+ for name in $client_names_str; do
+ client_names[$count]=$name
+ count=$((count+1))
+ done
+fi
# result file prefix (date/time + hostname makes unique)
# NB ensure path to it exists
-rslt=/home_nfs/eeb/obdfilter_survey_`date +%F@%R`_`uname -n`
+rslt=${rslt:-"/tmp/obdfilter_survey_`date +%F@%R`_`uname -n`"}
# lustre root (if running with own source tree)
-lustre_root=/home_nfs/eeb/lustre
+lustre_root=${lustre_root:-"/home_nfs/eeb/lustre"}
# what tests to run (first must be write)
-#tests=(write rewrite read reread rewrite_again)
-tests=(write rewrite read)
+tests_str=${tests_str:-""}
+if [ -n "$tests_str" ]; then
+ declare -a tests
+ count=0
+ for name in $tests_str; do
+ tests[$count]=$name
+ count=$((count+1))
+ done
+else
+ #tests=(write rewrite read reread rewrite_again)
+ tests=(write rewrite read)
+fi
# total size (MBytes) per obd instance
# large enough to avoid cache effects
# and to make test startup/shutdown overhead insignificant
-size=16384
+size=${size:-16384}
# record size (KBytes)
-rszlo=1024
-rszhi=1024
+rszlo=${rszlo:-1024}
+rszhi=${rszhi:-1024}
# number of objects per OST
-nobjlo=1
-nobjhi=512
+nobjlo=${nobjlo:-1}
+nobjhi=${nobjhi:-512}
# threads per OST (1024 max)
-thrlo=1
-thrhi=64
+thrlo=${thrlo:-1}
+thrhi=${thrhi:-64}
# restart from here iff all are defined
restart_rsz=
restart_nobj=1
# machine's page size (K)
-PAGE_SIZE=64
+if [ -z "$PAGE_SIZE" ]; then
+ if which python >/dev/null; then
+ PAGE_SIZE=`echo 'import resource; print resource.getpagesize()/1024;' |python`
+ fi
+fi
+PAGE_SIZE=${PAGE_SIZE:-4}
# max buffer_mem (total_threads * buffer size)
# (to avoid lctl ENOMEM problems)
verify=1
if [ ${#tests[@]} -eq 0 -o "${tests[0]}" != "write" ]; then
+ echo "tests: ${tests[@]}"
echo "First test must be 'write'" 1>&2
exit 1
fi
rsltf="${rslt}.summary"
workf="${rslt}.detail"
cmdsf="${rslt}.script"
+vmstatf="${rslt}.vmstat"
echo -n > $rsltf
echo -n > $workf
+declare -a vmstatpids
+
+# hide a little trick to unset this from the command line
+if [ "$lustre_root" == " " ]; then
+ unset lustre_root
+fi
+
if [ -z "$lustre_root" ]; then
- lctl=lctl
+ lctl=$(which lctl)
else
lctl=${lustre_root}/utils/lctl
fi
done
fi
+# get vmstat started
# disable portals debug and get obdecho loaded on all relevant hosts
unique_hosts=(`unique ${host_names[@]}`)
+pidcount=0
for host in ${unique_hosts[@]}; do
remote_shell $host "echo 0 > /proc/sys/portals/debug"
+ host_vmstatf=${vmstatf}_${host}
+ echo -n > $host_vmstatf
+ remote_shell $host "vmstat 5 >> $host_vmstatf" &
+ pid=$!
+ vmstatpids[$pidcount]=$pid
+ pidcount=$((pidcount+1))
do_unload_obdecho[$host]=0
if obdecho_loaded $host; then
- continue
+ continue
fi
load_obdecho $host
if obdecho_loaded $host; then
- do_unload_obdecho[$host]=1
- continue
- fi
+ do_unload_obdecho[$host]=1
+ continue
+ fi
echo "Can't load obdecho on $host" 1>&2
exit 1
done
for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do
for ((nobj=$nobjlo;nobj<=$nobjhi;nobj*=2)); do
for ((thr=$thrlo;thr<=$thrhi;thr*=2)); do
- if ((thr < nobj)); then
+ if ((thr % nobj)); then
continue
fi
# restart?
done
# run tests
for test in ${tests[@]}; do
+ declare -a pidarray
+ for host in ${unique_hosts[@]}; do
+ echo "starting run for test: $test rsz: $rsz threads: $thr objects: $nobj" >> ${vmstatf}_${host}
+ done
print_summary -n "$test "
# create per-host script files
for host in ${unique_hosts[@]}; do
devno=${devnos[$idx]}
tmpfi="${tmpf}_$idx"
first_obj=${first_objs[$idx]}
+ thr_per_obj=$((${thr}/${nobj}))
echo >> ${cmdsf}_${host} \
"$lctl > $tmpfi 2>&1 \\
--threads $thr -$snap $devno \\
- test_brw $count `testname2type $test` q $pages ${thr}t${first_obj} &"
- done
- for host in ${unique_hosts[@]}; do
- echo "wait" >> ${cmdsf}_${host}
- done
- # timed run of all the per-host script files
- t0=`date +%s.%N`
- for host in ${unique_hosts[@]}; do
- remote_shell $host bash ${cmdsf}_${host}&
- done
- wait
- t1=`date +%s.%N`
- # clean up per-host script files
- for host in ${unique_hosts[@]}; do
- rm ${cmdsf}_${host}
- done
+ test_brw $count `testname2type $test` q $pages ${thr_per_obj}t${first_obj} &"
+ done
+ pidcount=0
+ for host in ${unique_hosts[@]}; do
+ echo "wait" >> ${cmdsf}_${host}
+ pidarray[$pidcount]=0
+ pidcount=$((pidcount+1))
+ done
+ # timed run of all the per-host script files
+ t0=`date +%s.%N`
+ pidcount=0
+ for host in ${unique_hosts[@]}; do
+ remote_shell $host bash ${cmdsf}_${host} &
+ pidarray[$pidcount]=$!
+ pidcount=$((pidcount+1))
+ done
+ pidcount=0
+ for host in ${unique_hosts[@]}; do
+ wait ${pidarray[$pidcount]}
+ pidcount=$((pidcount+1))
+ done
+ #wait
+ t1=`date +%s.%N`
+ # clean up per-host script files
+ for host in ${unique_hosts[@]}; do
+ rm ${cmdsf}_${host}
+ done
# compute bandwidth from total data / elapsed time
str=`awk "BEGIN {printf \"%7.2f \",\
$total_size / (( $t1 - $t0 ) * 1024)}"`
done
# unload any obdecho modules we loaded
+pidcount=0
for host in ${unique_hosts[@]}; do
+ remote_shell $host "killall vmstat" &
+ pid=$!
+ kill -term ${vmstatpids[$pidcount]}
+ kill -kill ${vmstatpids[$pidcount]} 2>/dev/null
+ wait $pid
+ pidcount=$((pidcount+1))
if ((${do_unload_obdecho[$host]})); then
- unload_obdecho $host
+ unload_obdecho $host
fi
done
+
+exit 0