X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre-iokit%2Fobdfilter-survey%2Fobdfilter-survey;h=4f8991336528ce16853ae83d9048ef8133454748;hp=ac967debcd76e77d9b64a36c73f9d3dbdc442364;hb=d8d0244bce0afe54e18d43608f2f34b52a58dd7e;hpb=d715c24475bf1717d272efdf11d8df619a1acf8c diff --git a/lustre-iokit/obdfilter-survey/obdfilter-survey b/lustre-iokit/obdfilter-survey/obdfilter-survey index ac967de..4f89913 100755 --- a/lustre-iokit/obdfilter-survey/obdfilter-survey +++ b/lustre-iokit/obdfilter-survey/obdfilter-survey @@ -38,7 +38,7 @@ fi # result file prefix (date/time + hostname makes unique) # NB ensure path to it exists -rslt=${rslt:-"/home_nfs/eeb/obdfilter_survey_`date +%F@%R`_`uname -n`"} +rslt=${rslt:-"/tmp/obdfilter_survey_`date +%F@%R`_`uname -n`"} # lustre root (if running with own source tree) lustre_root=${lustre_root:-"/home_nfs/eeb/lustre"} @@ -80,7 +80,12 @@ restart_thr=1 restart_nobj=1 # machine's page size (K) -PAGE_SIZE=${PAGE_SIZE:-16} +if [ -z "$PAGE_SIZE" ]; then + if which python >/dev/null; then + PAGE_SIZE=`echo 'import resource; print resource.getpagesize()/1024;' |python` + fi +fi +PAGE_SIZE=${PAGE_SIZE:-4} # max buffer_mem (total_threads * buffer size) # (to avoid lctl ENOMEM problems) @@ -96,10 +101,10 @@ custom_remote_shell () { here=`pwd` # Hop on to the remote node, chdir to 'here' and run the given # commands. One of the following will probably work. - #ssh $host "cd $here; $cmds" + ssh $host "cd $here; $cmds" #rsh $host "cd $here; $cmds" # we have to remove the leading `uname -n`: from pdsh output lines - pdsh -w $host "cd $here; $cmds" | sed 's/^[^:]*://' + #pdsh -w $host "cd $here; $cmds" | sed 's/^[^:]*://' } ##################################################################### @@ -147,9 +152,9 @@ remote_shell () { shift cmds="$*" if [ "$host" = "localhost" -o "$host" = `uname -n` ]; then - eval "$cmds" + eval "$cmds" else - custom_remote_shell $host "$cmds" + custom_remote_shell $host "$cmds" fi } @@ -161,11 +166,11 @@ obdecho_loaded() { load_obdecho () { local host=$1 if [ -z "$lustre_root" ]; then - remote_shell $host $modprobe obdecho + remote_shell $host $modprobe obdecho elif [ -f ${lustre_root}/obdecho/obdecho.ko ]; then - remote_shell $host $insmod ${lustre_root}/obdecho/obdecho.ko + remote_shell $host $insmod ${lustre_root}/obdecho/obdecho.ko else - remote_shell $host $insmod ${lustre_root}/obdecho/obdecho.o + remote_shell $host $insmod ${lustre_root}/obdecho/obdecho.o fi } @@ -179,8 +184,8 @@ get_devno () { local type=$2 local name=$3 remote_shell $host $lctl device_list | \ - awk "{if (\$2 == \"UP\" && \$3 == \"$type\" && \$4 == \"$name\") {\ - print \$1; exit}}" + awk "{if (\$2 == \"UP\" && \$3 == \"$type\" && \$4 == \"$name\") {\ + print \$1; exit}}" } get_ec_devno () { @@ -188,34 +193,34 @@ get_ec_devno () { local client_name="$2" local ost_name="$3" if [ -z "$client_name" ]; then - if [ -z "$ost_name" ]; then - echo "client and ost name both null" 1>&2 - return - fi - client_name=${ost_name}_echo_client + if [ -z "$ost_name" ]; then + echo "client and ost name both null" 1>&2 + return + fi + client_name=${ost_name}_echo_client fi ec=`get_devno $host echo_client $client_name` if [ -n "$ec" ]; then - echo $ec $client_name 0 - return + echo $ec $client_name 0 + return fi if [ -z "$ost_name" ]; then - echo "no echo client and ost_name not set" 1>&2 - return + echo "no echo client and ost_name not set" 1>&2 + return fi ost=`get_devno $host obdfilter $ost_name` if [ -z "$ost" ]; then - echo "OST $ost_name not setup" 1>&2 - return + echo "OST $ost_name not setup" 1>&2 + return fi remote_shell $host "$lctl <&2 - return + echo "Can't setup echo client" 1>&2 + return fi echo $ec $client_name 1 } @@ -224,9 +229,9 @@ teardown_ec_devno () { local host=$1 local client_name=$2 remote_shell $host "$lctl < $str" >> $workf - print_summary -n "$str" - if ((total_thr * actual_rsz > max_buffer_mem)); then - print_summary "Too much buffer space" - continue - fi - # create the objects - tmpf="${workf}_tmp" - for ((idx=0; idx < ndevs; idx++)); do - host=${host_names[$idx]} - devno=${devnos[$idx]} - client_name="${host}:${client_names[$idx]}" - echo "=============> Create $nobj on $client_name" >> $workf - first_obj=`create_objects $host $devno $nobj $tmpf` - cat $tmpf >> $workf - rm $tmpf - if [ $first_obj = "ERROR" ]; then - print_summary "created object #s on $client_name not contiguous" - exit 1 - fi - first_objs[$idx]=$first_obj - done - # run tests - for test in ${tests[@]}; do + for ((thr=$thrlo;thr<=$thrhi;thr*=2)); do + if ((thr % nobj)); then + continue + fi + # restart? + if [ -n "$restart_rsz" -a\ + -n "$restart_nobj" -a\ + -n "$restart_thr" ]; then + if ((rsz < restart_rsz ||\ + (rsz == restart_rsz &&\ + (nobj < restart_nobj ||\ + (nobj == restart_nobj &&\ + thr < restart_thr))))); then + continue; + fi + fi + # compute parameters + total_thr=$((ndevs*thr)) + total_nobj=$((ndevs*nobj)) + pages=$((rsz/PAGE_SIZE)) + actual_rsz=$((pages*PAGE_SIZE)) + count=$((size*1024/(actual_rsz*thr))) + actual_size=$((actual_rsz*count*thr)) + total_size=$((actual_size*ndevs)) + # show computed parameters + str=`printf 'ost %2d sz %8dK rsz %4d obj %4d thr %4d ' \ + $ndevs $total_size $actual_rsz $total_nobj $total_thr` + echo "=======================> $str" >> $workf + print_summary -n "$str" + if ((total_thr * actual_rsz > max_buffer_mem)); then + print_summary "Too much buffer space" + continue + fi + # create the objects + tmpf="${workf}_tmp" + for ((idx=0; idx < ndevs; idx++)); do + host=${host_names[$idx]} + devno=${devnos[$idx]} + client_name="${host}:${client_names[$idx]}" + echo "=============> Create $nobj on $client_name" >> $workf + first_obj=`create_objects $host $devno $nobj $tmpf` + cat $tmpf >> $workf + rm $tmpf + if [ $first_obj = "ERROR" ]; then + print_summary "created object #s on $client_name not contiguous" + exit 1 + fi + first_objs[$idx]=$first_obj + done + # run tests + for test in ${tests[@]}; do declare -a pidarray for host in ${unique_hosts[@]}; do echo "starting run for test: $test rsz: $rsz threads: $thr objects: $nobj" >> ${vmstatf}_${host} done - print_summary -n "$test " - # create per-host script files - for host in ${unique_hosts[@]}; do - echo -n > ${cmdsf}_${host} - done - for ((idx=0; idx < ndevs; idx++)); do - host=${host_names[$idx]} - devno=${devnos[$idx]} - tmpfi="${tmpf}_$idx" - first_obj=${first_objs[$idx]} - echo >> ${cmdsf}_${host} \ - "$lctl > $tmpfi 2>&1 \\ - --threads $thr -$snap $devno \\ - test_brw $count `testname2type $test` q $pages ${thr}t${first_obj} &" - + print_summary -n "$test " + # create per-host script files + for host in ${unique_hosts[@]}; do + echo -n > ${cmdsf}_${host} + done + for ((idx=0; idx < ndevs; idx++)); do + host=${host_names[$idx]} + devno=${devnos[$idx]} + tmpfi="${tmpf}_$idx" + first_obj=${first_objs[$idx]} + thr_per_obj=$((${thr}/${nobj})) + echo >> ${cmdsf}_${host} \ + "$lctl > $tmpfi 2>&1 \\ + --threads $thr -$snap $devno \\ + test_brw $count `testname2type $test` q $pages ${thr_per_obj}t${first_obj} &" done pidcount=0 for host in ${unique_hosts[@]}; do @@ -507,51 +512,51 @@ for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do for host in ${unique_hosts[@]}; do rm ${cmdsf}_${host} done - # compute bandwidth from total data / elapsed time - str=`awk "BEGIN {printf \"%7.2f \",\ - $total_size / (( $t1 - $t0 ) * 1024)}"` - print_summary -n "$str" - # collect/check individual OST stats - echo -n > $tmpf - for ((idx=0; idx < ndevs; idx++)); do - client_name="${host_names[$idx]}:${client_names[$idx]}" - tmpfi="${tmpf}_$idx" - echo "=============> $test $client_name" >> $workf - cat $tmpfi >> $workf - get_stats $tmpfi >> $tmpf - rm $tmpfi - done - # compute/display global min/max stats - echo "=============> $test global" >> $workf - cat $tmpf >> $workf - stats=(`get_global_stats $tmpf`) - rm $tmpf - if ((stats[0] <= 0)); then - if ((stats[0] < 0)); then - str=`printf "%17s " ERROR` - else - str=`printf "%17s " SHORT` - fi - else - str=`awk "BEGIN {printf \"[%7.2f,%7.2f] \",\ - (${stats[1]} * $actual_rsz)/1024,\ - (${stats[2]} * $actual_rsz)/1024; exit}"` - fi - print_summary -n "$str" - done - print_summary "" - # destroy objects we created - for ((idx=0; idx < ndevs; idx++)); do - host=${host_names[$idx]} - devno=${devnos[$idx]} - client_name="${host}:${client_names[$idx]}" - first_obj=${first_objs[$idx]} - echo "=============> Destroy $nobj on $client_name" >> $workf - destroy_objects $host $devno $first_obj $nobj $tmpf - cat $tmpf >> $workf - rm $tmpf - done - done + # compute bandwidth from total data / elapsed time + str=`awk "BEGIN {printf \"%7.2f \",\ + $total_size / (( $t1 - $t0 ) * 1024)}"` + print_summary -n "$str" + # collect/check individual OST stats + echo -n > $tmpf + for ((idx=0; idx < ndevs; idx++)); do + client_name="${host_names[$idx]}:${client_names[$idx]}" + tmpfi="${tmpf}_$idx" + echo "=============> $test $client_name" >> $workf + cat $tmpfi >> $workf + get_stats $tmpfi >> $tmpf + rm $tmpfi + done + # compute/display global min/max stats + echo "=============> $test global" >> $workf + cat $tmpf >> $workf + stats=(`get_global_stats $tmpf`) + rm $tmpf + if ((stats[0] <= 0)); then + if ((stats[0] < 0)); then + str=`printf "%17s " ERROR` + else + str=`printf "%17s " SHORT` + fi + else + str=`awk "BEGIN {printf \"[%7.2f,%7.2f] \",\ + (${stats[1]} * $actual_rsz)/1024,\ + (${stats[2]} * $actual_rsz)/1024; exit}"` + fi + print_summary -n "$str" + done + print_summary "" + # destroy objects we created + for ((idx=0; idx < ndevs; idx++)); do + host=${host_names[$idx]} + devno=${devnos[$idx]} + client_name="${host}:${client_names[$idx]}" + first_obj=${first_objs[$idx]} + echo "=============> Destroy $nobj on $client_name" >> $workf + destroy_objects $host $devno $first_obj $nobj $tmpf + cat $tmpf >> $workf + rm $tmpf + done + done done done @@ -559,7 +564,7 @@ done for ((i=0; i