# result file prefix (date/time + hostname makes unique)
# NB ensure path to it exists
-rslt=${rslt:-"/home_nfs/eeb/obdfilter_survey_`date +%F@%R`_`uname -n`"}
+rslt=${rslt:-"/tmp/obdfilter_survey_`date +%F@%R`_`uname -n`"}
# lustre root (if running with own source tree)
lustre_root=${lustre_root:-"/home_nfs/eeb/lustre"}
restart_nobj=1
# machine's page size (K)
-PAGE_SIZE=${PAGE_SIZE:-16}
+if [ -z "$PAGE_SIZE" ]; then
+ if which python >/dev/null; then
+ PAGE_SIZE=`echo 'import resource; print resource.getpagesize()/1024;' |python`
+ fi
+fi
+PAGE_SIZE=${PAGE_SIZE:-4}
# max buffer_mem (total_threads * buffer size)
# (to avoid lctl ENOMEM problems)
here=`pwd`
# Hop on to the remote node, chdir to 'here' and run the given
# commands. One of the following will probably work.
- #ssh $host "cd $here; $cmds"
+ ssh $host "cd $here; $cmds"
#rsh $host "cd $here; $cmds"
# we have to remove the leading `uname -n`: from pdsh output lines
- pdsh -w $host "cd $here; $cmds" | sed 's/^[^:]*://'
+ #pdsh -w $host "cd $here; $cmds" | sed 's/^[^:]*://'
}
#####################################################################
rsltf="${rslt}.summary"
workf="${rslt}.detail"
cmdsf="${rslt}.script"
+vmstatf="${rslt}.vmstat"
echo -n > $rsltf
echo -n > $workf
+declare -a vmstatpids
# hide a little trick to unset this from the command line
if [ "$lustre_root" == " " ]; then
done
fi
+# get vmstat started
# disable portals debug and get obdecho loaded on all relevant hosts
unique_hosts=(`unique ${host_names[@]}`)
+pidcount=0
for host in ${unique_hosts[@]}; do
remote_shell $host "echo 0 > /proc/sys/portals/debug"
+ host_vmstatf=${vmstatf}_${host}
+ echo -n > $host_vmstatf
+ remote_shell $host "vmstat 5 >> $host_vmstatf" &
+ pid=$!
+ vmstatpids[$pidcount]=$pid
+ pidcount=$((pidcount+1))
do_unload_obdecho[$host]=0
if obdecho_loaded $host; then
- continue
+ continue
fi
load_obdecho $host
if obdecho_loaded $host; then
- do_unload_obdecho[$host]=1
- continue
- fi
+ do_unload_obdecho[$host]=1
+ continue
+ fi
echo "Can't load obdecho on $host" 1>&2
exit 1
done
for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do
for ((nobj=$nobjlo;nobj<=$nobjhi;nobj*=2)); do
for ((thr=$thrlo;thr<=$thrhi;thr*=2)); do
- if ((thr < nobj)); then
+ if ((thr % nobj)); then
continue
fi
# restart?
done
# run tests
for test in ${tests[@]}; do
+ declare -a pidarray
+ for host in ${unique_hosts[@]}; do
+ echo "starting run for test: $test rsz: $rsz threads: $thr objects: $nobj" >> ${vmstatf}_${host}
+ done
print_summary -n "$test "
# create per-host script files
for host in ${unique_hosts[@]}; do
devno=${devnos[$idx]}
tmpfi="${tmpf}_$idx"
first_obj=${first_objs[$idx]}
+ thr_per_obj=$((${thr}/${nobj}))
echo >> ${cmdsf}_${host} \
"$lctl > $tmpfi 2>&1 \\
--threads $thr -$snap $devno \\
- test_brw $count `testname2type $test` q $pages ${thr}t${first_obj} &"
- done
- for host in ${unique_hosts[@]}; do
- echo "wait" >> ${cmdsf}_${host}
- done
- # timed run of all the per-host script files
- t0=`date +%s.%N`
- for host in ${unique_hosts[@]}; do
- remote_shell $host bash ${cmdsf}_${host}&
- done
- wait
- t1=`date +%s.%N`
- # clean up per-host script files
- for host in ${unique_hosts[@]}; do
- rm ${cmdsf}_${host}
- done
+ test_brw $count `testname2type $test` q $pages ${thr_per_obj}t${first_obj} &"
+ done
+ pidcount=0
+ for host in ${unique_hosts[@]}; do
+ echo "wait" >> ${cmdsf}_${host}
+ pidarray[$pidcount]=0
+ pidcount=$((pidcount+1))
+ done
+ # timed run of all the per-host script files
+ t0=`date +%s.%N`
+ pidcount=0
+ for host in ${unique_hosts[@]}; do
+ remote_shell $host bash ${cmdsf}_${host} &
+ pidarray[$pidcount]=$!
+ pidcount=$((pidcount+1))
+ done
+ pidcount=0
+ for host in ${unique_hosts[@]}; do
+ wait ${pidarray[$pidcount]}
+ pidcount=$((pidcount+1))
+ done
+ #wait
+ t1=`date +%s.%N`
+ # clean up per-host script files
+ for host in ${unique_hosts[@]}; do
+ rm ${cmdsf}_${host}
+ done
# compute bandwidth from total data / elapsed time
str=`awk "BEGIN {printf \"%7.2f \",\
$total_size / (( $t1 - $t0 ) * 1024)}"`
done
# unload any obdecho modules we loaded
+pidcount=0
for host in ${unique_hosts[@]}; do
+ remote_shell $host "killall vmstat" &
+ pid=$!
+ kill -term ${vmstatpids[$pidcount]}
+ kill -kill ${vmstatpids[$pidcount]} 2>/dev/null
+ wait $pid
+ pidcount=$((pidcount+1))
if ((${do_unload_obdecho[$host]})); then
- unload_obdecho $host
+ unload_obdecho $host
fi
done
+
+exit 0