--- /dev/null
+This bundle includes four tools:
+In order of preference:
+
+sgpdd-survey: - a test of the 'bare metal' performance, bypassing
+as much of the kernel as we can. Does not require Lustre, does
+require the sgp_dd package.
+
+obdsurvey: - a test of Lustre performance with three modes:
+Requires Python > 2.2
+Requires Lustre
+
+- local disk test - requires one OST
+- network performance test - requires two Lustre machines
+- network + disk test - requires Lustre filesystem and client
+
+ior-survey:
+A script to run the IOR benchmark. Version 2.8.6 of IOR is included
+
+obdfilter-survey:
+Obsoleted by obdsurvey, included for historical reasons, and for
+systems that do not have a recent version of Python.
+
-DEPENDENCIES
-------------
-
- IOR - from LLNL: ftp://ftp.llnl.gov/pub/siop/ior
-
*************** Survey still being developed ***********************
#!/bin/bash
# cluster name (all node names are this followed by the node number)
-cluster=nid000
+cluster=mdev
# client node numbers (individual numbers or inclusive ranges)
-clients=(1-64)
+clients=(7-8)
# numbers of clients to survey
clients_lo=1
-clients_hi=64
-clients_iterator="*=2"
+clients_hi=2
+clients_iterator="+=1"
# numbers of tasks per client to survey
tasks_per_client_lo=1
-tasks_per_client_hi=1
+tasks_per_client_hi=8
tasks_per_client_iterator="*=2"
# record sizes to survey
rsize_hi=1M
rsize_iterator="*=2"
-# This line contains all of the possible tests.
-# IMPORTANT:::if you want to remount, put it in your tests array
-# in the order you want to remount to clear the cache. For ex:
-# tests=(write rewrite remount read reread rewrite_again) has a
-# remount between the rewrite and remount tests
-tests=(write read)
+## which tests to run (first must be write)
+# remount) not really a test; just remount to uncache everything
+# *write*) write
+# *) read
+#tests=(write rewrite read reread rewrite_again)
+tests=(write rewrite remount read reread)
# total # bytes written/read by any client node
-min_per_client_size=1G
-min_total_size=1G
+min_per_client_size=4G
+min_total_size=8G
# should each task do I/O to its own file?
file_per_task=1
# the binaries
-IOR="/spin/home/henken/IOR-2.8.4/src/C/IOR"
-llmount=llmount
-
-#Command to run IOR (pdsh,mpirun,yod)
-runior="yod"
-#Path to binary for program specified in runior
-pathtobin="$(which yod)"
-#location of machines file for mpirun, this file
-#will be built from the cluster and client ranges
-#above
-machines=machines
+IOR="/home/ericb/ior/src/C/IOR"
+llmount=/home/ericb/lustre/utils/llmount
+pdsh=pdsh
# the result file prefix (date/time + hostname makes unique)
-rslt=/spin/home/henken/ior_survey/ior_survey_`date +%F@%R`_`uname -n`
-#rslt=/home/larry/ior_survey
+#rslt=/home/ericb/ior_survey_`date +%F@%R`_`uname -n`
+rslt=/home/ericb/ior_survey
# where lustre is mounted on the clients
-lustre=/lustre/fs1/nic/
+lustre=/mnt/lustre
# basename of the test file(s)
testfile=${lustre}/ior_survey_testfile
-
# how to unmount and remount the F/S on a client (to clear the cache)
# change this depending on lustre config (network type, MDS etc)
-unmount="umount $lustre"
-remount="llmount pegasus:/mds1/client $lustre"
+remount="umount $lustre && $llmount -o nettype=elan mdev6:/ll_mds/client $lustre"
# pdsh args required to instantiate all instances of IOR in parallel
# the chosen module must support '-n <procs-per-node>'
echo $minusn "$*"
}
-mpi_client_file() {
- echo -n > $machines
- local base=`base_range $1`
- echo $base
- local chunk=`count_range $1`
- echo $chunk
- local high=$((base+chunk-1))
- echo $high
- for ((nmpi=$base; nmpi<=$high; nmpi++)); do
- echo $cluster$nmpi >> $machines
- done
-}
-
-parse_cmdline() {
- case $runior in
- 'mpirun')
- #echo "this"
- $pathtobin -np $((ntask*nclnt)) -machinefile $machines >> $tmpf 2>1 \
- "${cmdline[@]}";;
- 'pdsh')
- $pathtobin -S -b $pdsh_mpiargs -w "$test_clients" -n $ntask \
- >> $tmpf 2>&1 "${cmdline[@]}";;
- 'yod')
- $pathtobin -np $((ntask*nclnt)) >> $tmpf 2>&1 "${cmdline[@]}";;
- esac
-}
-
-if [ $runior = "mpirun" ]; then
- mpi_client_file ${clients[@]}
-fi
-
# convert params to actual numbers
min_per_client_size=`parse_number $min_per_client_size`
min_total_size=`parse_number $min_total_size`
for ((nclnt=clients_lo; nclnt<=clients_hi; nclnt$clients_iterator)); do
test_clients="${cluster}`n2noderange $nclnt ${clients[@]}`"
-
+
per_client_size=$((min_total_size/nclnt))
if ((per_client_size < min_per_client_size)); then
per_client_size=$min_per_client_size
if [ "$test" = "remount" ]; then
echo "=> $remount" >> $tmpf
- if [ "$runior" = "pdsh" ]; then
- $pdsh -S -b -w "$test_clients" >> $tmpf 2>&1 \
- "$unmount"
- $pdsh -S -b -w "$test_clients" >> $tmpf 2>&1 \
- "$remount"
- else
- $unmount
- $remount
- fi
+ $pdsh -S -b -w "$test_clients" >> $tmpf 2>&1 \
+ "$remount"
status=$?
echo "Completion Status: $status" >> $tmpf
# check lustre is mounted everywhere it's needed
cmd="(mount -t lustre; mount -t lustre_lite) | grep $lustre"
echo "=> Mount Check: $cmd" >> $tmpf
- if [ "$runior" = "pdsh" ]; then
- $pdsh -S -b -w "$test_clients" >> $tmpf 2>&1 \
- "$cmd"
- fi
+ $pdsh -S -b -w "$test_clients" >> $tmpf 2>&1 \
+ "$cmd"
status=$?
echo "Completion Status: $status" >> $tmpf
if ((status)); then
esac
echo "=> ${cmdline[@]}" >> $tmpf
-
- parse_cmdline
+
+ $pdsh -S -b $pdsh_mpiargs -w "$test_clients" -n $ntask >> $tmpf 2>&1 \
+ "${cmdline[@]}"
status=$?
echo "Completion Status: $status" >> $tmpf
# ...or...
# echo_client instances (set 'client_names')
# ... use 'host:name' for obd instances on other nodes.
-
-# allow these to be passed in via string...
-ost_names_str=${ost_names_str:-""}
-if [ -n "$ost_names_str" ]; then
- declare -a ost_names
- count=0
- for name in $ost_names_str; do
- ost_names[$count]=$name
- count=$((count+1))
- done
-else
- ost_names=(ost{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16})
-fi
-
+ost_names=(ost{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16})
#client_names=(ns8:ECHO_ns8 ns9:ECHO_ns9)
-client_names_str=${client_names_str:-""}
-if [ -n "$client_names_str" ]; then
- # make sure we unset ost_names so that our client_names get noticed...
- unset ost_names
- declare -a client_names
- count=0
- for name in $client_names_str; do
- client_names[$count]=$name
- count=$((count+1))
- done
-fi
# result file prefix (date/time + hostname makes unique)
# NB ensure path to it exists
-rslt=${rslt:-"/home_nfs/eeb/obdfilter_survey_`date +%F@%R`_`uname -n`"}
+rslt=/home_nfs/eeb/obdfilter_survey_`date +%F@%R`_`uname -n`
# lustre root (if running with own source tree)
-lustre_root=${lustre_root:-"/home_nfs/eeb/lustre"}
+lustre_root=/home_nfs/eeb/lustre
# what tests to run (first must be write)
-tests_str=${tests_str:-""}
-if [ -n "$tests_str" ]; then
- declare -a tests
- count=0
- for name in $tests_str; do
- tests[$count]=$name
- count=$((count+1))
- done
-else
- #tests=(write rewrite read reread rewrite_again)
- tests=(write rewrite read)
-fi
+#tests=(write rewrite read reread rewrite_again)
+tests=(write rewrite read)
# total size (MBytes) per obd instance
# large enough to avoid cache effects
# and to make test startup/shutdown overhead insignificant
-size=${size:-16384}
+size=16384
# record size (KBytes)
-rszlo=${rszlo:-1024}
-rszhi=${rszhi:-1024}
+rszlo=1024
+rszhi=1024
# number of objects per OST
-nobjlo=${nobjlo:-1}
-nobjhi=${nobjhi:-512}
+nobjlo=1
+nobjhi=512
# threads per OST (1024 max)
-thrlo=${thrlo:-1}
-thrhi=${thrhi:-64}
+thrlo=1
+thrhi=64
# restart from here iff all are defined
restart_rsz=
restart_nobj=1
# machine's page size (K)
-PAGE_SIZE=${PAGE_SIZE:-16}
+PAGE_SIZE=64
# max buffer_mem (total_threads * buffer size)
# (to avoid lctl ENOMEM problems)
here=`pwd`
# Hop on to the remote node, chdir to 'here' and run the given
# commands. One of the following will probably work.
- #ssh $host "cd $here; $cmds"
+ ssh $host "cd $here; $cmds"
#rsh $host "cd $here; $cmds"
# we have to remove the leading `uname -n`: from pdsh output lines
- pdsh -w $host "cd $here; $cmds" | sed 's/^[^:]*://'
+ #pdsh -w $host "cd $here; $cmds" | sed 's/^[^:]*://'
}
#####################################################################
verify=1
if [ ${#tests[@]} -eq 0 -o "${tests[0]}" != "write" ]; then
- echo "tests: ${tests[@]}"
echo "First test must be 'write'" 1>&2
exit 1
fi
rsltf="${rslt}.summary"
workf="${rslt}.detail"
cmdsf="${rslt}.script"
-vmstatf="${rslt}.vmstat"
echo -n > $rsltf
echo -n > $workf
-declare -a vmstatpids
-
-# hide a little trick to unset this from the command line
-if [ "$lustre_root" == " " ]; then
- unset lustre_root
-fi
-
if [ -z "$lustre_root" ]; then
- lctl=$(which lctl)
+ lctl=lctl
else
lctl=${lustre_root}/utils/lctl
fi
shift
cmds="$*"
if [ "$host" = "localhost" -o "$host" = `uname -n` ]; then
- eval "$cmds"
+ eval "$cmds"
else
- custom_remote_shell $host "$cmds"
+ custom_remote_shell $host "$cmds"
fi
}
load_obdecho () {
local host=$1
if [ -z "$lustre_root" ]; then
- remote_shell $host $modprobe obdecho
+ remote_shell $host $modprobe obdecho
elif [ -f ${lustre_root}/obdecho/obdecho.ko ]; then
- remote_shell $host $insmod ${lustre_root}/obdecho/obdecho.ko
+ remote_shell $host $insmod ${lustre_root}/obdecho/obdecho.ko
else
- remote_shell $host $insmod ${lustre_root}/obdecho/obdecho.o
+ remote_shell $host $insmod ${lustre_root}/obdecho/obdecho.o
fi
}
local type=$2
local name=$3
remote_shell $host $lctl device_list | \
- awk "{if (\$2 == \"UP\" && \$3 == \"$type\" && \$4 == \"$name\") {\
- print \$1; exit}}"
+ awk "{if (\$2 == \"UP\" && \$3 == \"$type\" && \$4 == \"$name\") {\
+ print \$1; exit}}"
}
get_ec_devno () {
local client_name="$2"
local ost_name="$3"
if [ -z "$client_name" ]; then
- if [ -z "$ost_name" ]; then
- echo "client and ost name both null" 1>&2
- return
- fi
- client_name=${ost_name}_echo_client
+ if [ -z "$ost_name" ]; then
+ echo "client and ost name both null" 1>&2
+ return
+ fi
+ client_name=${ost_name}_echo_client
fi
ec=`get_devno $host echo_client $client_name`
if [ -n "$ec" ]; then
- echo $ec $client_name 0
- return
+ echo $ec $client_name 0
+ return
fi
if [ -z "$ost_name" ]; then
- echo "no echo client and ost_name not set" 1>&2
- return
+ echo "no echo client and ost_name not set" 1>&2
+ return
fi
ost=`get_devno $host obdfilter $ost_name`
if [ -z "$ost" ]; then
- echo "OST $ost_name not setup" 1>&2
- return
+ echo "OST $ost_name not setup" 1>&2
+ return
fi
remote_shell $host "$lctl <<EOF
attach echo_client $client_name ${client_name}_UUID
-setup $ost_name
+ setup $ost_name
EOF"
ec=`get_devno $host echo_client $client_name`
if [ -z "$ec" ]; then
- echo "Can't setup echo client" 1>&2
- return
+ echo "Can't setup echo client" 1>&2
+ return
fi
echo $ec $client_name 1
}
local host=$1
local client_name=$2
remote_shell $host "$lctl <<EOF
-cfg $client_name
-cleanup
-detach
+ cfg $client_name
+ cleanup
+ detach
EOF"
}
local nobj=$3
local rfile=$4
remote_shell $host $lctl --device $devno create $nobj > $rfile 2>&1
- first=0
- prev=0
- count=0
- error=0
- while read line; do
- echo "$line" | grep -q 'is object id'
- if [ $? -ne 0 ]; then
- continue
- fi
- if [ $first -eq 0 ]; then
- first=$(echo $line | awk '{print $6}')
- first=$(printf "%d" $first)
- prev=$first
- count=1
- else
- obj=$(echo $line | awk '{print $6}')
- obj=$(printf "%d" $obj)
- diff=$((obj - (prev+1)))
- if [ $diff -ne 0 ]; then
- error=1
- fi
- prev=$obj
- count=$((count+1))
- fi
- done < $rfile
- if [ $nobj -ne $count ]; then
- echo "ERROR: $nobj != $count" >&2
- cat $rfile >&2
- echo "ERROR"
- elif [ $error -ne 0 ]; then
- echo "ERROR: non contiguous objs found" >&2
- echo "ERROR"
- else
- echo $first
+ n=(`awk < $rfile \
+ '/is object id/ {obj=strtonum($6);\
+ first=!not_first; not_first=1;\
+ if (first) first_obj=obj;
+ else if (obj != prev + 1) exit;\
+ prev=obj; n++}\
+ END {printf "%d %d\n", first_obj, n}'`)
+ if ((n[1] != nobj)); then
+ echo "ERROR"
+ else
+ echo ${n[0]}
fi
}
testname2type () {
# 'x' disables data check
if ((verify)); then
- x=""
+ x=""
else
- x="x"
+ x="x"
fi
case $1 in
- *write*) echo "w$x";;
- *) echo "r$x";;
+ *write*) echo "w$x";;
+ *) echo "r$x";;
esac
}
done
fi
-# get vmstat started
# disable portals debug and get obdecho loaded on all relevant hosts
unique_hosts=(`unique ${host_names[@]}`)
-pidcount=0
for host in ${unique_hosts[@]}; do
remote_shell $host "echo 0 > /proc/sys/portals/debug"
- host_vmstatf=${vmstatf}_${host}
- echo -n > $host_vmstatf
- remote_shell $host "vmstat 5 >> $host_vmstatf" &
- pid=$!
- vmstatpids[$pidcount]=$pid
- pidcount=$((pidcount+1))
do_unload_obdecho[$host]=0
if obdecho_loaded $host; then
- continue
+ continue
fi
load_obdecho $host
if obdecho_loaded $host; then
- do_unload_obdecho[$host]=1
- continue
- fi
+ do_unload_obdecho[$host]=1
+ continue
+ fi
echo "Can't load obdecho on $host" 1>&2
exit 1
done
for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do
for ((nobj=$nobjlo;nobj<=$nobjhi;nobj*=2)); do
- for ((thr=$thrlo;thr<=$thrhi;thr*=2)); do
- if ((thr < nobj)); then
- continue
- fi
- # restart?
- if [ -n "$restart_rsz" -a\
- -n "$restart_nobj" -a\
- -n "$restart_thr" ]; then
- if ((rsz < restart_rsz ||\
- (rsz == restart_rsz &&\
- (nobj < restart_nobj ||\
- (nobj == restart_nobj &&\
- thr < restart_thr))))); then
- continue;
- fi
- fi
- # compute parameters
- total_thr=$((ndevs*thr))
- total_nobj=$((ndevs*nobj))
- pages=$((rsz/PAGE_SIZE))
- actual_rsz=$((pages*PAGE_SIZE))
- count=$((size*1024/(actual_rsz*thr)))
- actual_size=$((actual_rsz*count*thr))
- total_size=$((actual_size*ndevs))
- # show computed parameters
- str=`printf 'ost %2d sz %8dK rsz %4d obj %4d thr %4d ' \
- $ndevs $total_size $actual_rsz $total_nobj $total_thr`
- echo "=======================> $str" >> $workf
- print_summary -n "$str"
- if ((total_thr * actual_rsz > max_buffer_mem)); then
- print_summary "Too much buffer space"
- continue
- fi
- # create the objects
- tmpf="${workf}_tmp"
- for ((idx=0; idx < ndevs; idx++)); do
- host=${host_names[$idx]}
- devno=${devnos[$idx]}
- client_name="${host}:${client_names[$idx]}"
- echo "=============> Create $nobj on $client_name" >> $workf
- first_obj=`create_objects $host $devno $nobj $tmpf`
- cat $tmpf >> $workf
- rm $tmpf
- if [ $first_obj = "ERROR" ]; then
- print_summary "created object #s on $client_name not contiguous"
- exit 1
- fi
- first_objs[$idx]=$first_obj
- done
- # run tests
- for test in ${tests[@]}; do
- declare -a pidarray
- for host in ${unique_hosts[@]}; do
- echo "starting run for test: $test rsz: $rsz threads: $thr objects: $nobj" >> ${vmstatf}_${host}
- done
- print_summary -n "$test "
- # create per-host script files
- for host in ${unique_hosts[@]}; do
- echo -n > ${cmdsf}_${host}
- done
- for ((idx=0; idx < ndevs; idx++)); do
- host=${host_names[$idx]}
- devno=${devnos[$idx]}
- tmpfi="${tmpf}_$idx"
- first_obj=${first_objs[$idx]}
- echo >> ${cmdsf}_${host} \
- "$lctl > $tmpfi 2>&1 \\
- --threads $thr -$snap $devno \\
- test_brw $count `testname2type $test` q $pages ${thr}t${first_obj} &"
-
- done
- pidcount=0
- for host in ${unique_hosts[@]}; do
- echo "wait" >> ${cmdsf}_${host}
- pidarray[$pidcount]=0
- pidcount=$((pidcount+1))
- done
- # timed run of all the per-host script files
- t0=`date +%s.%N`
- pidcount=0
- for host in ${unique_hosts[@]}; do
- remote_shell $host bash ${cmdsf}_${host} &
- pidarray[$pidcount]=$!
- pidcount=$((pidcount+1))
- done
- pidcount=0
- for host in ${unique_hosts[@]}; do
- wait ${pidarray[$pidcount]}
- pidcount=$((pidcount+1))
- done
- #wait
- t1=`date +%s.%N`
- # clean up per-host script files
- for host in ${unique_hosts[@]}; do
- rm ${cmdsf}_${host}
- done
- # compute bandwidth from total data / elapsed time
- str=`awk "BEGIN {printf \"%7.2f \",\
- $total_size / (( $t1 - $t0 ) * 1024)}"`
- print_summary -n "$str"
- # collect/check individual OST stats
- echo -n > $tmpf
- for ((idx=0; idx < ndevs; idx++)); do
- client_name="${host_names[$idx]}:${client_names[$idx]}"
- tmpfi="${tmpf}_$idx"
- echo "=============> $test $client_name" >> $workf
- cat $tmpfi >> $workf
- get_stats $tmpfi >> $tmpf
- rm $tmpfi
- done
- # compute/display global min/max stats
- echo "=============> $test global" >> $workf
- cat $tmpf >> $workf
- stats=(`get_global_stats $tmpf`)
- rm $tmpf
- if ((stats[0] <= 0)); then
- if ((stats[0] < 0)); then
- str=`printf "%17s " ERROR`
- else
- str=`printf "%17s " SHORT`
- fi
- else
- str=`awk "BEGIN {printf \"[%7.2f,%7.2f] \",\
- (${stats[1]} * $actual_rsz)/1024,\
- (${stats[2]} * $actual_rsz)/1024; exit}"`
- fi
- print_summary -n "$str"
- done
- print_summary ""
- # destroy objects we created
- for ((idx=0; idx < ndevs; idx++)); do
- host=${host_names[$idx]}
- devno=${devnos[$idx]}
- client_name="${host}:${client_names[$idx]}"
- first_obj=${first_objs[$idx]}
- echo "=============> Destroy $nobj on $client_name" >> $workf
- destroy_objects $host $devno $first_obj $nobj $tmpf
- cat $tmpf >> $workf
- rm $tmpf
- done
- done
+ for ((thr=$thrlo;thr<=$thrhi;thr*=2)); do
+ if ((thr < nobj)); then
+ continue
+ fi
+ # restart?
+ if [ -n "$restart_rsz" -a\
+ -n "$restart_nobj" -a\
+ -n "$restart_thr" ]; then
+ if ((rsz < restart_rsz ||\
+ (rsz == restart_rsz &&\
+ (nobj < restart_nobj ||\
+ (nobj == restart_nobj &&\
+ thr < restart_thr))))); then
+ continue;
+ fi
+ fi
+ # compute parameters
+ total_thr=$((ndevs*thr))
+ total_nobj=$((ndevs*nobj))
+ pages=$((rsz/PAGE_SIZE))
+ actual_rsz=$((pages*PAGE_SIZE))
+ count=$((size*1024/(actual_rsz*thr)))
+ actual_size=$((actual_rsz*count*thr))
+ total_size=$((actual_size*ndevs))
+ # show computed parameters
+ str=`printf 'ost %2d sz %8dK rsz %4d obj %4d thr %4d ' \
+ $ndevs $total_size $actual_rsz $total_nobj $total_thr`
+ echo "=======================> $str" >> $workf
+ print_summary -n "$str"
+ if ((total_thr * actual_rsz > max_buffer_mem)); then
+ print_summary "Too much buffer space"
+ continue
+ fi
+ # create the objects
+ tmpf="${workf}_tmp"
+ for ((idx=0; idx < ndevs; idx++)); do
+ host=${host_names[$idx]}
+ devno=${devnos[$idx]}
+ client_name="${host}:${client_names[$idx]}"
+ echo "=============> Create $nobj on $client_name" >> $workf
+ first_obj=`create_objects $host $devno $nobj $tmpf`
+ cat $tmpf >> $workf
+ rm $tmpf
+ if [ $first_obj = "ERROR" ]; then
+ print_summary "created object #s on $client_name not contiguous"
+ exit 1
+ fi
+ first_objs[$idx]=$first_obj
+ done
+ # run tests
+ for test in ${tests[@]}; do
+ print_summary -n "$test "
+ # create per-host script files
+ for host in ${unique_hosts[@]}; do
+ echo -n > ${cmdsf}_${host}
+ done
+ for ((idx=0; idx < ndevs; idx++)); do
+ host=${host_names[$idx]}
+ devno=${devnos[$idx]}
+ tmpfi="${tmpf}_$idx"
+ first_obj=${first_objs[$idx]}
+ echo >> ${cmdsf}_${host} \
+ "$lctl > $tmpfi 2>&1 \\
+ --threads $thr -$snap $devno \\
+ test_brw $count `testname2type $test` q $pages ${thr}t${first_obj} &"
+ done
+ for host in ${unique_hosts[@]}; do
+ echo "wait" >> ${cmdsf}_${host}
+ done
+ # timed run of all the per-host script files
+ t0=`date +%s.%N`
+ for host in ${unique_hosts[@]}; do
+ remote_shell $host bash ${cmdsf}_${host}&
+ done
+ wait
+ t1=`date +%s.%N`
+ # clean up per-host script files
+ for host in ${unique_hosts[@]}; do
+ rm ${cmdsf}_${host}
+ done
+ # compute bandwidth from total data / elapsed time
+ str=`awk "BEGIN {printf \"%7.2f \",\
+ $total_size / (( $t1 - $t0 ) * 1024)}"`
+ print_summary -n "$str"
+ # collect/check individual OST stats
+ echo -n > $tmpf
+ for ((idx=0; idx < ndevs; idx++)); do
+ client_name="${host_names[$idx]}:${client_names[$idx]}"
+ tmpfi="${tmpf}_$idx"
+ echo "=============> $test $client_name" >> $workf
+ cat $tmpfi >> $workf
+ get_stats $tmpfi >> $tmpf
+ rm $tmpfi
+ done
+ # compute/display global min/max stats
+ echo "=============> $test global" >> $workf
+ cat $tmpf >> $workf
+ stats=(`get_global_stats $tmpf`)
+ rm $tmpf
+ if ((stats[0] <= 0)); then
+ if ((stats[0] < 0)); then
+ str=`printf "%17s " ERROR`
+ else
+ str=`printf "%17s " SHORT`
+ fi
+ else
+ str=`awk "BEGIN {printf \"[%7.2f,%7.2f] \",\
+ (${stats[1]} * $actual_rsz)/1024,\
+ (${stats[2]} * $actual_rsz)/1024; exit}"`
+ fi
+ print_summary -n "$str"
+ done
+ print_summary ""
+ # destroy objects we created
+ for ((idx=0; idx < ndevs; idx++)); do
+ host=${host_names[$idx]}
+ devno=${devnos[$idx]}
+ client_name="${host}:${client_names[$idx]}"
+ first_obj=${first_objs[$idx]}
+ echo "=============> Destroy $nobj on $client_name" >> $workf
+ destroy_objects $host $devno $first_obj $nobj $tmpf
+ cat $tmpf >> $workf
+ rm $tmpf
+ done
+ done
done
done
for ((i=0; i<ndevs; i++)); do
host=${host_names[$i]}
if ((${do_teardown_ec[$i]})); then
- teardown_ec_devno $host ${client_names[$i]}
+ teardown_ec_devno $host ${client_names[$i]}
fi
done
# unload any obdecho modules we loaded
-pidcount=0
for host in ${unique_hosts[@]}; do
- remote_shell $host "killall vmstat" &
- pid=$!
- kill -term ${vmstatpids[$pidcount]}
- kill -kill ${vmstatpids[$pidcount]} 2>/dev/null
- wait $pid
- pidcount=$((pidcount+1))
if ((${do_unload_obdecho[$host]})); then
- unload_obdecho $host
+ unload_obdecho $host
fi
done
-
-exit 0
--- /dev/null
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+ This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it. You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+ When we speak of free software, we are referring to freedom of use,
+not price. Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+ To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you. You must make sure that they, too, receive or can get the source
+code. If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it. And you must show them these terms so they know their rights.
+
+ We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+ To protect each distributor, we want to make it very clear that
+there is no warranty for the free library. Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+\f
+ Finally, software patents pose a constant threat to the existence of
+any free program. We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder. Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+ Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License. This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License. We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+ When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library. The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom. The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+ We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License. It also provides other free software developers Less
+of an advantage over competing non-free programs. These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+ For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard. To achieve this, non-free programs must be
+allowed to use the library. A more frequent case is that a free
+library does the same job as widely used non-free libraries. In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+ In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software. For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+ Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+ The precise terms and conditions for copying, distribution and
+modification follow. Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library". The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+\f
+ GNU LESSER GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+which has been distributed under these terms. A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language. (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+making modifications to it. For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it). Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+\f
+ 2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library. To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License. (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.) Do not make any other change in
+these notices.
+\f
+ Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library". Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library". The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library. The
+threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work. (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+\f
+ 6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License. You must supply a copy of this License. If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License. Also, you must do one
+of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a
+ copy of the library already present on the user's computer system,
+ rather than copying library functions into the executable, and (2)
+ will operate properly with a modified version of the library, if
+ the user installs one, as long as the modified version is
+ interface-compatible with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at
+ least three years, to give the same user the materials
+ specified in Subsection 6a, above, for a charge no more
+ than the cost of performing this distribution.
+
+ d) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ e) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it. However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+ It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system. Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+\f
+ 7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License. Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License. However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Library or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+\f
+ 11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all. For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded. In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+\f
+ 14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission. For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this. Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+\f
+ How to Apply These Terms to Your New Libraries
+
+ If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+ To apply these terms, attach the following notices to the library. It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the library's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+ <signature of Ty Coon>, 1 April 1990
+ Ty Coon, President of Vice
+
+That's all there is to it!
+
+
--- /dev/null
+Lustre IO-Kit is a collection of benchmark-tools for a cluster with
+the lustre filesystem.
+
+Currently only a Object Block Device-survey are included, but the kit may
+be extended with blockdevice- and filesystem- survey in the future.
+
+Copyright (c) 2005 Scali AS. All Rights Reserved.
+
+= Prerequisites
+
+- python2.2 or newer, available at /usr/bin/python2
+- the "logging"-module from python2.3
+- Passwordless remote access to nodes in the system, through
+ ScaSH (The Scali parallel shell) or SSH.
+
+
+= Interfaces
+
+Two interfaces to OBD-survey; a python library interface and a command
+line interface.
+
+The python library interface is documented in pydoc. Use
+"pydoc lustre_obdsurveylib" for information about the interface.
+
+The command line interface is accessed through the "lustre_obdsurvey"
+application. Running the application with "--help" will list the
+various options.
+
+lustre_obdsurvey implements 3 seperate test-modes:
+
+- Disk IO
+
+This testmode will set up a echo_client connected to existing obdfilters,
+and test the local disk-io performance on the fileservers. You need to list
+the names of the obdfilters you wish to test, and what nodes they are on,
+like this:
+ lustre_obdsurvey --diskio server1:test_ost00 server2:test_ost01 ....
+
+- Network IO
+
+This testmode will set up pairs of obdecho and echo_clients, to test the
+network bandwith between pairs of nodes in the cluster. You need to list the
+pairs of nodes to test, like this:
+ lustre_obdsurvey --networkio server1:client1 server2:client2 ....
+
+- Remote Disk IO
+
+This testmode will set up a echo_client that accesses a remote obdfilter-
+device, basically combining the two previous tests. The test assumes that the
+client already have a OSC-device configured for the remote obdfilter. You
+need to specify the names of the osc-devices and the nodes they are on like
+this:
+ lustre_obdsurvey --networkdiskio client1:osc_server1_ost00 client2:osc_server2_ost00 ...
+
+
+lustre_obdsurvey will then run a benchmark in parallel across all listed
+devices, and list the results. The following options apply to all test-modes:
+
+--pagesize=PAGESIZE: Set the pagesize for your nodes. Defaults to 4 KB.
+
+--size=SIZE: Set the size of the dataset to use for the tests. Defaults to 100 MB.
+ Make sure the size of the dataset is larger than the cache of your storage-devices.
+
+--minrecordsize=MINRECORDSIZE
+--maxrecordsize=MAXRECORDSIZE
+ Test performance with recordsizes in the range [MINRECORDSIZE, MAXRECORDSIZE]. By
+ default only 1 MB recordsizes are used.
+
+--minthreads=MINTHREADS
+--maxthreads=MAXTHREADS
+ Test performance with [MINTHREADS, MAXTHREADS] number of threads for each client.
+ By default the range [1, 16] is tested.
--- /dev/null
+Summary: Lustre IO test kit
+Name: lustre-iokit
+Vendor: Scali AS
+URL: http://www.scali.com/
+Version: 0.12.0
+Release: 1
+License: LGPL
+Group: Applications/System
+Source0: %{name}-%{version}-%{release}.tar.gz
+BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
+
+%description
+Lustre IO-Kit is a collection of benchmark-tools for a cluster with
+the lustre filesystem.
+
+Currently only a Object Block Device-survey are included, but the kit may
+be extended with blockdevice- and filesystem- survey in the future.
+
+Copyright (c) 2005 Scali AS. All Rights Reserved.
+
+
+Contact :
+Scali AS
+Olaf Helsets vei 6, P.O Box 150, Oppsal
+N-0619 Oslo
+NORWAY
+
+Technical support : support@scali.com
+Licensing support : license@scali.com
+http://www.scali.com
+
+
+%prep
+%setup -q -n lustre-iokit
+
+%build
+python setup.py build
+
+%install
+rm -rf $RPM_BUILD_ROOT
+python setup.py install --root $RPM_BUILD_ROOT
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+
+%files
+%defattr(-,root,root,-)
+/usr/bin/*
+/usr/lib*/python*/site-packages/*
+%doc /usr/share/lustre-iokit*
+
+
+%changelog
+* Mon Sep 19 2005 Ragnar Kjorstad <rk@scali.com> v0.12
+- Build fixes
+* Wed Apr 6 2005 Ragnar Kjorstad <rk@scali.com> v0.11-1
+- Compability-fixes with python2.2 (rhel3)
+* Fri Apr 1 2005 Ragnar Kjorstad <rk@scali.com> v0.10-2
+- Makefile-fixes for rhel3-x86_64
+* Mon Feb 21 2005 Ragnar Kjorstad <rk@scali.com>
+- Initial build.
+
--- /dev/null
+Summary: Lustre IO test kit
+Name: lustre-iokit
+Vendor: Scali AS
+URL: http://www.scali.com/
+Version: @VERSION@
+Release: @RELEASE@
+License: LGPL
+Group: Applications/System
+Source0: %{name}-%{version}-%{release}.tar.gz
+BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
+
+%description
+Lustre IO-Kit is a collection of benchmark-tools for a cluster with
+the lustre filesystem.
+
+Currently only a Object Block Device-survey are included, but the kit may
+be extended with blockdevice- and filesystem- survey in the future.
+
+Copyright (c) 2005 Scali AS. All Rights Reserved.
+
+
+Contact :
+Scali AS
+Olaf Helsets vei 6, P.O Box 150, Oppsal
+N-0619 Oslo
+NORWAY
+
+Technical support : support@scali.com
+Licensing support : license@scali.com
+http://www.scali.com
+
+
+%prep
+%setup -q -n lustre-iokit
+
+%build
+python setup.py build
+
+%install
+rm -rf $RPM_BUILD_ROOT
+python setup.py install --root $RPM_BUILD_ROOT
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+
+%files
+%defattr(-,root,root,-)
+/usr/bin/*
+/usr/lib*/python*/site-packages/*
+%doc /usr/share/lustre-iokit*
+
+
+%changelog
+* Mon Sep 19 2005 Ragnar Kjorstad <rk@scali.com> v0.12
+- Build fixes
+* Wed Apr 6 2005 Ragnar Kjorstad <rk@scali.com> v0.11-1
+- Compability-fixes with python2.2 (rhel3)
+* Fri Apr 1 2005 Ragnar Kjorstad <rk@scali.com> v0.10-2
+- Makefile-fixes for rhel3-x86_64
+* Mon Feb 21 2005 Ragnar Kjorstad <rk@scali.com>
+- Initial build.
+
--- /dev/null
+#!/usr/bin/python2
+"""
+Run the application with "--help" for documentation of how to run
+it as an application.
+
+Copyright (c) 2005 Scali AS. All Rights Reserved.
+"""
+
+import logging
+import string
+import lustre_obdsurveylib
+try:
+ import optparse
+except ImportError:
+ # Compability with older python-distributions:
+ import optik as optparse
+
+
+
+def main():
+ parser = optparse.OptionParser(version="%prog 1.0", usage="%prog <--diskio | --network | --networkdiskio> <targetlist>")
+ parser.add_option("--pagesize", type="int", default=4, help="Set the pagesize (KB)")
+ parser.add_option("--size", type="int", default=100, help="Set the dataset-size (MB)")
+ parser.add_option("--minrecordsize", type="int", default=1024, help="Minimum record size (KB)")
+ parser.add_option("--maxrecordsize", type="int", default=1024, help="Maximum record size (KB)")
+ parser.add_option("--minthreads", type="int", default=1, help="Minimum number of threads")
+ parser.add_option("--maxthreads", type="int", default=16, help="Maximum number of threads")
+ parser.add_option("--diskio", action="store_const", const="diskio", dest="mode",
+ help="Test local IO-performance on a set of OSTs. List OSTs as a space-seperated list of node:ostname.")
+ parser.add_option("--networkio", action="store_const", const="networkio", dest="mode",
+ help="Test network-performance. List network-connections as a space-seperated list of server:client pairs.")
+ parser.add_option("--networkdiskio", action="store_const", const="networkdiskio", dest="mode",
+ help="Test IO-performance over network. Assumes existing OSC-devices. List OSCs as a space-seperated list of"
+ "node:oscname pairs.")
+ (options, args) = parser.parse_args()
+ args = map(lambda arg: tuple(string.split(arg, ":")), args)
+ # Set up lustre-devices according to mode:
+ clients = []
+ if options.mode == "diskio":
+ for node, device in args:
+ obdfilter = lustre_obdsurveylib.ExistingOBDFilter(node, device)
+ echo_client = lustre_obdsurveylib.EchoClient(node, device+"_client", obdfilter)
+ clients.append(echo_client)
+ elif options.mode == "networkio":
+ for servername, clientname in args:
+ obdecho = lustre_obdsurveylib.OBDEcho(servername, "test_obdecho")
+ osc = lustre_obdsurveylib.OSC(clientname, "test_osc", obdecho)
+ echo_client = lustre_obdsurveylib.EchoClient(clientname, "test_client", osc)
+ clients.append(echo_client)
+ elif options.mode == "networkdiskio":
+ for clientname, oscname in args:
+ osc = lustre_obdsurveylib.ExistingOSC(clientname, oscname)
+ echo_client = lustre_obdsurveylib.EchoClient(clientname, oscname+"_client", osc)
+ clients.append(echo_client)
+ else:
+ parser.error("You need to specify either --diskio, --networkio or --networkdiskio")
+ rsz = options.minrecordsize
+ while rsz <= options.maxrecordsize:
+ threads = options.minthreads
+ while threads <= options.maxthreads:
+ results = lustre_obdsurveylib.ParallelTestBRW(clients, threads, options.size, ('w', 'r'), rsz, options.pagesize)
+ print "ost %2d sz %7dK rsz %4d thr %2d" % (len(clients), results[0].getTotalSize(), rsz, threads),
+ for result in results:
+ try:
+ result.verifyExitCodes()
+ except:
+ print "%30s" % "ERROR",
+ else:
+ print "%s %8.2f [%8.2f,%8.2f]" % (result.getTestType(), result.getTotalBandwidth(), result.getMinBandwidth(), result.getMaxBandwidth()),
+ print
+ threads *= 2
+ rsz *= 2
+
+
+if __name__ == '__main__':
+ log = logging.getLogger()
+ log.addHandler(logging.StreamHandler())
+ log.setLevel(logging.INFO)
+ main()
--- /dev/null
+#!/usr/bin/python2
+"""
+This module implements lustre-specific IO- and network-tests.
+It is based on the 'obdfilter-survey'-script distributed with lustre-iokit.
+
+To use it as a library, the caller should first create a set of
+EchoClient-object. The EchoClient-class will automatically create the
+echo_client-device, and set it up to communicate with the device
+given as the target to the EchoClient-constructor. See main() for
+an example of how to set up EchoClient-objects and the objects it
+depends on.
+
+Next, run ParallelTestBRW to run benchmarks in parallel over all
+the EchoClients with a specific number of threads++.
+
+ParallelTestBRW returns a list of ParallelTestBRWResult-objects
+(one for eacy type of test ('w' and 'r') performed).
+See the documentation for ParallelTestBRWResult for how to extract
+the data from this object.
+
+Some notes about the implementation:
+The core-functionality is implemented as python-classes wrapping lustre-devices
+such as obdecho-, osc-, and echo_client-devices. The constructors for these
+classes automatically create the lustre-device, and the destructor removes the
+devices. High-level devices keep references to low-level devices, ensuring that
+the low-level devices are not removed as long as they are in use. The
+garbage-collector will clean everything up in the right order. However, there
+are two corner-cases that users of the library must be awere of:
+
+1. You can not create to lustre-devices of the same type with the same name on
+the same node at the same time. Replacing one object with a conflicting object
+like this:
+
+foo = OBDEcho("nodename", "test_obdecho")
+foo = OBDEcho("nodename", "test_obdecho")
+
+will fail because the second obdecho-object's constructor will run before the old
+object has been removed. To replace an object with a conflicting new object the
+fist one has to explisitly be removed first:
+
+foo = OBDEcho("nodename", "test_obdecho")
+del foo
+foo = OBDEcho("nodename", "test_obdecho")
+
+
+2. When python exists it will remove all remaining objects without following
+the dependency-rules between objects. This may cause lustre-devices to not
+be removed properly. Make sure to delete all references to the lustre-device
+objects _before_ exiting python to make sure this doesn't happen.
+
+
+
+Copyright (c) 2005 Scali AS. All Rights Reserved.
+"""
+
+import string
+import time
+import random
+import re
+import logging
+import os
+import popen2
+
+
+
+# Classes that implement remote execution using different tools/protocols:
+# These should subclass Popen3, and implement the same interface.
+
+class scashPopen3(popen2.Popen3):
+ """
+ Implement the same functionality as popen2.Popen3, but for a
+ remote machine.
+ """
+ def __init__(self, node, cmd, *args, **kwargs):
+ """
+ As popen2.Popen3, except:
+ @node - hostname where to execute cmd
+ @cmd - the command to execute. (Needs to be a string!)
+ """
+ cmd = ["scash", "-n", node, cmd]
+ popen2.Popen3.__init__(self, cmd, *args, **kwargs)
+
+class sshPopen3(popen2.Popen3):
+ """
+ Implement the same functionality as popen2.Popen3, but for a
+ remote machine.
+ """
+ def __init__(self, node, cmd, *args, **kwargs):
+ """
+ As popen2.Popen3, except:
+ @node - hostname where to execute cmd
+ @cmd - the command to execute. (Needs to be a string!)
+ """
+ cmd = ["ssh", node, cmd]
+ popen2.Popen3.__init__(self, cmd, *args, **kwargs)
+
+
+# Select remote execution tool/protocol based on what is actually available:
+if os.path.isfile("/opt/scali/bin/scash"):
+ remotePopen3 = scashPopen3
+elif os.path.isfile("/usr/bin/ssh"):
+ remotePopen3 = sshPopen3
+else:
+ raise Exception("No remote-execution environment found!")
+
+
+def remoteCommand(node, command):
+ """
+ Run an external command, and return the output as a list of strings
+ (one string per line). Raise an exception if the command fails
+ (returns non-zero exit-code).
+ @node - nodename where to run the command
+ @command - the command to run
+ """
+ remote = remotePopen3(node, command, True)
+ exit_code = remote.wait()
+ if exit_code != 0 :
+ raise Exception("Remote command %s failed with exit-code: %d" %
+ (repr(command), exit_code))
+ return remote.fromchild.readlines()
+
+def genUUID():
+ """
+ Generate a random UUID
+ """
+ r = random.Random(time.time())
+ return "%04x%04x-%04x-%04x-%04x-%04x%04x%04x" % (r.randint(0,16**4), r.randint(0,16**4),
+ r.randint(0,16**4), r.randint(0,16**4), r.randint(0,16**4),
+ r.randint(0,16**4), r.randint(0,16**4), r.randint(0,16**4))
+
+class KernelModule:
+ """
+ Object to keep track of the usage of a kernel-module, and unload it when
+ it's no longer needed. The constructor will check if the module is already
+ loaded. If it is, the use_count will be preset to 1 and the module will never
+ be automatically unloaded. (Assuming no object will cal decUse without first
+ having called incUse)
+ """
+ def __init__(self, node, name):
+ """
+ KernelModule constructor.
+ Does _not_ increase the usage-counter or load the module!
+ @name - the name of the kernel-module
+ """
+ self.node = node
+ self.name = name
+ self.use_count = self.__isLoaded()
+ def __isLoaded(self):
+ """
+ Check if the module is currently loaded
+ """
+ for line in remoteCommand(self.node, "/sbin/lsmod"):
+ if line.split()[0] == self.name:
+ return 1
+ return 0
+ def __load(self):
+ """
+ Load the module now
+ Don't call this directly - call incUse.
+ """
+ remoteCommand(self.node, "modprobe %s" % self.name)
+ def __unload(self):
+ """
+ Unload the module now.
+ Don't call this directly - call decUse.
+ """
+ remoteCommand(self.node, "rmmod obdecho")
+ def incUse(self):
+ """
+ Call this method before using the module
+ """
+ self.use_count += 1
+ if self.use_count == 1:
+ self.__load()
+ def decUse(self):
+ """
+ Call this method when you're done using the module
+ """
+ self.use_count -= 1
+ if self.use_count == 0:
+ self.__unload()
+
+
+class KernelModules:
+ """
+ Class to keep track of multiple KernelModule-objects
+ for multiple kernel-modules on multiple nodes.
+ """
+ def __init__(self):
+ # The KernelModule-objects are stored in self.data
+ # The key in self.data is the nodename. The value is a new
+ # new dictionary with module-names as keys and KernelModule
+ # objects as values.
+ self.data = {}
+ def getKernelModule(self, nodename, modulename):
+ """
+ Lookup (or create) a KernelModule object
+ @nodename - the node where the kernel-module should be loaded
+ @modulename - the name of the kernel-module
+ """
+ # Create the object if it's not already in self.data:
+ if not self.data.has_key(nodename):
+ self.data[nodename] = {}
+ if not self.data[nodename].has_key(modulename):
+ self.data[nodename][modulename] = KernelModule(nodename, modulename)
+ # And then return it:
+ return self.data[nodename][modulename]
+
+# This global object is used to keep track of all the loaded kernel-modules:
+modules = KernelModules()
+
+def lctl(node, commands):
+ """
+ Run a set of lctl-commands
+ @node - node where to run the commands
+ @commands - list of commands
+ Returns the output from lctl as a list of strings (one string per line)
+ """
+ # Encapsulate in quotes:
+ commands = string.join(commands, '\n')
+ log = logging.getLogger("lctl")
+ log.debug("lctl: %s" % repr(commands))
+ return remoteCommand(node, 'echo -e "%s" | lctl' % commands)
+
+def find_device(node, search_type, search_name):
+ """
+ Find the devicenumber for a device
+ @ node - the node where the device lives
+ @ search_type - the device-type to search for
+ @ search_name - the devine-name to search for
+ Returns the device-number (int)
+ """
+
+ for dev in lctl(node, ['device_list']):
+ device_id, device_state, device_type, device_name, uuid, refcnt = dev.split()
+ if device_type == search_type and device_name == search_name:
+ return int(device_id)
+ raise ValueError("device not found: %s:%s" % (search_type, search_name))
+
+
+class OBDEcho:
+ """
+ Create a obdecho-device (A device that can simulate a ost)
+ """
+ def __init__(self, node, name):
+ """
+ The constructor will create the device
+ @node - the node where to run the obdecho-device
+ @name - the name of the new device
+ """
+ self.node = node
+ self.name = name
+ self.uuid = genUUID()
+ self.module = modules.getKernelModule(self.node, "obdecho")
+ self.module.incUse()
+ lctl(self.node, ['attach obdecho %s %s' % (self.name, self.uuid), 'setup n'])
+ def __del__(self):
+ """
+ The destructor will remove the device
+ """
+ lctl(self.node, ['cfg_device %s' % self.name, 'cleanup', 'detach'])
+ self.module.decUse()
+
+
+class ExistingOSC:
+ """
+ Class to represent an existing osc-device
+ The object is device is not manipulated in any way - this class
+ is just used to keep refer to the device
+ """
+ def __init__(self, node, name):
+ """
+ Create a reference to the device
+ @node - the node where the device lives
+ @name - the name of the device
+ """
+ self.node = node
+ self.name = name
+
+class OSC:
+ """
+ Create a osc-device (A device that connects to a remote ost/obdecho-device
+ and looks like a local obdfilter.
+ """
+ def __init__(self, node, name, ost):
+ """
+ Create the device
+ @node - the node where to run the OSC
+ @name - the name of the new device
+ @ost - the object that the osc should be connected to. This should
+ be an OBDEcho-object
+ """
+ self.node = node
+ self.name = name
+ self.ost = ost
+ self.module = modules.getKernelModule(self.node, "obdecho")
+ self.module.incUse()
+ self.uuid = genUUID()
+ # FIXME: "NID_%s_UUID" should probably not be hardcoded? Retrieve uuid from node-object?
+ lctl(self.node, ['attach osc %s %s' % (self.name, self.uuid), 'setup %s "NID_%s_UUID"' % (self.ost.uuid, self.ost.node)])
+ def __del__(self):
+ """
+ The destructor will remove the device
+ """
+ lctl(self.node, ['cfg_device %s' % self.name, 'cleanup', 'detach'])
+ self.module.decUse()
+
+class ExistingOBDFilter:
+ """
+ Class to represent an existing obdfilter-device
+ The object is device is not manipulated in any way - this class
+ is just used to keep refer to the device
+ """
+ def __init__(self, node, name):
+ """
+ Create a reference to the device
+ @node - the node where the device lives
+ @name - the name of the device
+ """
+ self.node = node
+ self.name = name
+
+class EchoClient:
+ """
+ Class wrapping echo_client functionality
+ """
+ def __init__(self, node, name, target):
+ """
+ Create a new echo_client
+ @node - the node to run the echo_client on
+ @name - the name of the new echo_client
+ @target - The obdfilter / osc device to connect to. This should
+ be an OSC, ExistingOSC or ExistingOBDFilter-object on the same node.
+ """
+ self.node = node
+ self.name = name
+ self.target = target
+ self.objects = [] # List of objects that have been created and not yet destroyed.
+ self.log = logging.getLogger("EchoClient")
+ self.module = modules.getKernelModule(self.node, "obdecho")
+ self.module.incUse()
+ self.uuid = genUUID()
+ lctl(self.node, ['attach echo_client %s %s' % (self.name, self.uuid), 'setup %s' % self.target.name])
+ self.devicenum = find_device(self.node, 'echo_client', self.name)
+ self.log.debug("EchoClient created: %s" % self.name)
+
+ def __del__(self):
+ """
+ Remove the echo_client, and unload the obdecho module if it is no longer in use
+ Destroy all objects that have been created.
+ """
+ self.log.debug("EchoClient destructor: destroying objects")
+ self.destroyObjects(self.objects[:])
+ self.log.debug("EchoClient destructor: detach echo_client:")
+ lctl(self.node, ['cfg_device %s' % self.name, 'cleanup', 'detach'])
+ self.log.debug("EchoClient destructor: Unload modules:")
+ self.module.decUse()
+ self.log.debug("EchoClient destructor: Done")
+
+ def createObjects(self, num):
+ """
+ Create new objects on this device
+ @num - the number of devices to create
+ Returns a list of object-ids.
+ """
+ oids = []
+ line = lctl(self.node, ['device %d' % self.devicenum, 'create %d' % num])
+ if line[0].strip() != 'create: %d objects' % num:
+ raise Exception("Invalid output from lctl(2): %s" % repr(line[1]))
+ pattern=re.compile('create: #(.*) is object id 0x(.*)')
+ for line in line[1:]:
+ i, oid = pattern.match(line).groups()
+ if int(i) != len(oids)+1:
+ raise Exception("Expected to find object nr %d - found object nr %d:" % ( len(oids)+1, int(i)))
+ oids.append(long(oid, 16))
+ self.objects += oids
+ return oids
+
+ def destroyObjects(self, objects):
+ """
+ Destroy a set of objects
+ @objects - list of object ids
+ """
+ for oid in objects:
+ lctl(self.node, ['device %d' % self.devicenum, 'destroy %d' % oid])
+ self.objects.remove(oid)
+
+ def startTestBRW(self, oid, threads=1, num=1, test='w', pages=1):
+ """
+ Start an test_brw, and return a remotePopen3-object to the test-process
+ Do <num> bulk read/writes on OST object <objid> (<npages> per I/O).
+ @oid - objectid for the first object to use.
+ (each thread will use one object)
+ @threads - number of threads to use
+ @num - number of io-operations to perform
+ @test - what test to perform ('w' or 'r', for write or read-tests)
+ @pages - number of pages to use in each io-request. (4KB on ia32)
+ """
+ cmd = 'lctl --threads %d q %d test_brw %d %s q %d %d' % \
+ (threads, self.devicenum, num, test, pages, oid)
+
+ self.log.debug("startTestBRW: %s:%s" % (self.node, cmd))
+ remote = remotePopen3(self.node, cmd, True)
+ return remote
+
+ def testBRW(self, oid, threads=1, num=1, test='w', pages=1):
+ """
+ Do <num> bulk read/writes on OST object <objid> (<npages> per I/O).
+ @oid - objectid for the first object to use.
+ (each thread will use one object)
+ @threads - number of threads to use
+ @num - number of io-operations to perform
+ @test - what test to perform ('w' or 'r', for write or read-tests)
+ @pages - number of pages to use in each io-request. (4KB on ia32)
+ """
+ test = self.startTestBRW(oid, threads, num, test, pages)
+ exit_code = test.wait()
+ if exit_code != 0:
+ raise Exception("test_brw failed with exitcode %d." % exit_code)
+
+class ParallelTestBRWResult:
+ """
+ Class to hold result from ParallelTestBRW
+ """
+ def __init__(self, threads, num, testtype, pages, pagesize, numclients):
+ """
+ Prepare the result-object with the constants for the test
+ threads -- number of threads (per client)
+ num -- number of io-operations for each thread
+ testtype -- what kind of test ('w' for write-test or 'r' for read-test)
+ pages -- number of pages in each request
+ pagesize -- pagesize (Assumes same page-size accross all clients)
+ numclients -- number of clients used in the tests
+ """
+ self.threads = threads
+ self.num = num
+ self.testtype = testtype
+ self.pages = pages
+ self.pagesize = pagesize
+ self.numclients = numclients
+ self.starttimes = {} # clientid to starttime mapping
+ self.finishtimes = {} # clientid to finishtime mapping
+ self.exitcodes = {} # clientid to exit-code mapping
+ self.runtimes = {} # clientid to runtime mapping
+ self.stdout = {} # clientid to output mapping
+ self.stderr = {} # clientid to errors mapping
+ def registerStart(self, clientid):
+ """
+ Register that this client is about to start
+ clientid -- the id of the client
+ """
+ self.starttimes[clientid] = time.time()
+ def registerFinish(self, clientid, exitcode, stdout, stderr):
+ """
+ Register that this client just finished
+ clientid -- the id of the client
+ exitcode -- the exitcode of this test
+ stdout -- the output from the test
+ stderr -- the errors from the test
+ """
+ self.finishtimes[clientid] = time.time()
+ self.exitcodes[clientid] = exitcode
+ self.stdout[clientid] = stdout
+ self.stderr[clientid] = stderr
+ self.runtimes[clientid] = self.finishtimes[clientid] - self.starttimes[clientid]
+ def getTestType(self):
+ """
+ Return the name of the test-type ('w' for write-tests and 'r' for read-tests)
+ """
+ return self.testtype
+ def verifyExitCodes(self):
+ """
+ Verify that all tests finished successfully. Raise exception if they didn't.
+ """
+ if self.exitcodes.values().count(0) != self.numclients:
+ raise Exception("test_brw failed!")
+ def getTotalTime(self):
+ """
+ Return the number of seconds used for the test
+ """
+ return max(self.finishtimes.values()) - min(self.starttimes.values())
+ def getTotalSize(self):
+ """
+ Return total amount of data transfered (in KB)
+ """
+ return self.numclients * self.num * self.pages * self.threads * self.pagesize
+ def getTotalBandwidth(self):
+ """
+ Return the total bandwidth for the test
+ """
+ return self.getTotalSize() / self.getTotalTime()
+ def getMaxBandwidth(self):
+ """
+ Return the bandwidth of the fastest OST
+ """
+ time = min(self.runtimes.values())
+ return self.num * self.pages * self.threads * self.pagesize / time
+ def getMinBandwidth(self):
+ """
+ Return the bandwidth of the fastest OST
+ """
+ time = max(self.runtimes.values())
+ return self.num * self.pages * self.threads * self.pagesize / time
+
+
+
+def ParallelTestBRW(echo_clients, threads=1, size=100, tests=('w', 'r'), rsz=1024, pagesize=4):
+ """
+ Run a test_brw in parallel on a set of echo_clients
+ @echo_client -- list of EchoClient-objects to run tests on
+ @threads -- number of threads to use per client
+ @size -- amount of data to transfer for each thread (MB)
+ @test -- list of tests to perform ('w' or 'r', for write or read-tests)
+ @rsz -- Amount of data (in KB) for each request. Default, 1024.
+ @pagesize - Size of each page (KB)
+ """
+ pages = rsz / pagesize
+ num = size * 1024 / rsz / threads
+ # Create objects:
+ objects = {}
+ for client in echo_clients:
+ objects[client] = client.createObjects(threads)
+ # Verify if the objectids are consequative:
+ for i in range(len(objects[client])-1):
+ if objects[client][i+1] != objects[client][i] + 1:
+ raise Exception("Non-consequative objectids on client %s: %s" % (client, objects[client]))
+ # Run tests:
+ results = []
+ for test in tests:
+ result = ParallelTestBRWResult(threads, num, test, pages, pagesize, len(echo_clients))
+ pids = {} # pid to clientid mapping
+ remotes = {} # clientid to RemotePopen3-objects
+ # Start tests:
+ clientid = 0
+ for client in echo_clients:
+ first_obj = objects[client][0]
+ result.registerStart(clientid)
+ remote = client.startTestBRW(first_obj, threads, num, test, pages)
+ remotes[clientid] = remote
+ pids[remote.pid] = clientid
+ clientid += 1
+ # Wait for tests to finish:
+ while pids:
+ pid, status = os.wait()
+ clientid = pids[pid]
+ remote = remotes[clientid]
+ # Workaround for leak in popen2, see patch #816059 at python.sf.net:
+ popen2._active.remove(remote)
+ result.registerFinish(clientid, status, remote.fromchild.read(), remote.childerr.read())
+ del pids[pid]
+ results.append(result)
+ # Clean up objects:
+ for client in echo_clients:
+ client.destroyObjects(objects[client])
+ return results
+
+
+
+def timeit(func, *args, **kwargs):
+ """
+ Helper-function to easily time the execution of a function.
+ @func - the function to run
+ @*args - possitional arguments
+ @**kwargs - keyword arguments
+ Returns the number of seconds used executing the function
+
+ Example:
+ timeit(max, 1, 2, 5, 2) - will time how long it takes to run max(1,2,5,2)
+ """
+ t1 = time.time()
+ func(*args, **kwargs)
+ t2 = time.time()
+ return t2-t1
+
--- /dev/null
+<?xml version="1.0" ?>
+
+<scamodule>
+ <build>
+ <step type="setup.py" />
+ </build>
+
+ <packaging>
+ <package />
+ </packaging>
+</scamodule>
\ No newline at end of file
--- /dev/null
+from distutils.core import setup
+setup(name='lustre-iokit',
+ py_modules=['lustre_obdsurveylib'],
+ scripts=['lustre_obdsurvey.py'],
+ data_files=[
+ ('/usr/share/lustre-iokit', ['README', 'LICENSE'])
+ ]
+ )
-WARNING: Running sgp_dd will ERASE the contents of the disk devices.
- This is NOT to be run on any OST where you care about any data
- or you are not expecting to reformat the filesystem afterward.
Requirements
------------
# customize per survey
# the SCSI devices to measure
-scsidevs=${scsidevs:-"/dev/sde /dev/sdh"}
+scsidevs="/dev/sde /dev/sdh"
# result file prefix. date/time+hostname makes unique
# NB ensure the path exists if it includes subdirs
-rslt=${rslt:-"/tmp/sgpdd_survey_`date +%F@%R`_`uname -n`"}
+rslt=/tmp/sgpdd_survey_`date +%F@%R`_`uname -n`
# what to do (read or write)
actions="write read"
rszhi=1024
# Concurrent regions per device
-crglo=${crglo:-1}
-crghi=${crghi:-256}
+crglo=1
+crghi=256
# threads to share between concurrent regions per device
# NB survey skips over #thr < #regions and #thr/#regions > SG_MAX_QUEUE
-thrlo=${thrlo:-1}
-thrhi=${thrhi:-4096}
+thrlo=1
+thrhi=4096
#####################################################################
# leave the rest of this alone unless you know what you're doing...
devs=()
for d in $scsidevs; do
devs[$i]=`sg_map | awk "{if ($ 2 == \"$d\") print $ 1}"`
- if [ -z "${devs[$i]}" ]; then
+ if [ -z "$devs[$i]" ]; then
echo "Can't find SG device for $d"
exit 1
fi