X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre-iokit%2Fobdfilter-survey%2Fobdfilter-survey;h=c4980c980287622929b4c7501dcece63eb2794a4;hb=784bbd385b22b6578bb98691f3bb130a1ed84dc6;hp=7ce397ff1f9dbb7a3207bafed64e1f0bace1d73d;hpb=e41b9977627f6082d060091f7eabcd55ce45e9ec;p=fs%2Flustre-release.git diff --git a/lustre-iokit/obdfilter-survey/obdfilter-survey b/lustre-iokit/obdfilter-survey/obdfilter-survey index 7ce397f..c4980c9 100755 --- a/lustre-iokit/obdfilter-survey/obdfilter-survey +++ b/lustre-iokit/obdfilter-survey/obdfilter-survey @@ -1,5 +1,5 @@ #!/bin/bash - +set -e ###################################################################### # customize per survey @@ -36,7 +36,7 @@ # client systems on which this test runs.] # include library -source libecho +source $(dirname $0)/libecho # The following variables can be set in the environment, or on the # command line @@ -48,6 +48,11 @@ rslt=${rslt:-"$rslt_loc/obdfilter_survey_`date +%F@%R`_`uname -n`"} # Set this true to check file contents verify=${verify:-0} +# test targets +targets=${targets:-""} +# test case +case=${case:-"disk"} + # total size (MBytes) per obd instance # large enough to avoid cache effects # and to make test startup/shutdown overhead insignificant @@ -56,6 +61,7 @@ size=${size:-16384} # record size (KBytes) ( 7168 max) rszlo=${rszlo:-1024} rszhi=${rszhi:-1024} +rszmax=${rszmax:-4096} # number of objects per OST nobjlo=${nobjlo:-1} @@ -66,10 +72,12 @@ nobjhi=${nobjhi:-16} thrlo=${thrlo:-1} thrhi=${thrhi:-16} +export LC_ALL=POSIX + # End of variables # create a set of objects, check there are 'n' contiguous ones and -# return the first or 'ERROR' +# echo out the first or 'ERROR' # parameter: 1. hostname # 2. device number # 3. number of object to be created (specified by user) @@ -84,36 +92,37 @@ create_objects () { prev=0 count=0 error=0 - while read line; do - echo "$line" | grep -q 'is object id' - if [ $? -ne 0 ]; then - continue - fi - if [ $first -eq 0 ]; then - first=$(echo $line | awk '{print $6}') - first=$(printf "%d" $first) - prev=$first - count=1 - else - obj=$(echo $line | awk '{print $6}') - obj=$(printf "%d" $obj) - diff=$((obj - (prev+1))) - if [ $diff -ne 0 ]; then - error=1 - fi - prev=$obj - count=$((count+1)) - fi - done < $rfile + + # Count number of objects (lines containing " is object id "), and + # ensure that the objects numbers are sequential. + # + exec 3< $rfile + while read -u3 line; do + case "$line" in + ( *' is object id '* ) + set -- $line + if test $(( count += 1 )) -gt 1 ; then + (( $6 != prev + 1 )) && error=1 + else + first=$(( $6 + 0 )) + fi + prev=$6 + ;; + esac + done + exec 3<&- + if [ $nobj -ne $count ]; then echo "ERROR: $nobj != $count" >&2 cat $rfile >&2 echo "ERROR" elif [ $error -ne 0 ]; then - echo "ERROR: non contiguous objs found" >&2 + echo "ERROR: non contiguous objs found" >&2 + echo ERROR else echo $first fi + return $error } # destroys all objects created in create_objects routine @@ -132,6 +141,7 @@ get_stats () { gawk < $rfile \ '/^Selected device [0-9]+$/ {n = 0; next}\ /error/ {n = -1; exit}\ + /^Total/ {next}\ /^[0-9]+\/[0-9]+ Total: [0-9]+\.[0-9]+\/second$/ {n++; v=strtonum($3); \ if (n == 1 || v < min) min = v;\ if (n == 1 || v > max) max = v;\ @@ -166,6 +176,15 @@ testname2type () { esac } +# for "echo_client + obdfilter" case, "prep + commit" mode should be used +# for "echo_client + osc" case, "BRW" mode should be used +testcase2mode() { + case $case in + disk) echo "p$1";; + *) echo "g";; + esac +} + print_summary () { if [ "$1" = "-n" ]; then minusn=$1; shift @@ -176,12 +195,36 @@ print_summary () { echo $minusn "$*" } +version_code() { + # split arguments like "2.3.61" into "2", "3", "61" + eval set -- $(tr "[:punct:]" " " <<< $*) + echo -n "$((($1 << 16) | ($2 << 8) | $3))" +} + +get_lustre_version() { + local host=${1:-${unique_hosts[0]}} + remote_shell $host $lctl get_param -n version | + awk '/^lustre:/ {print $2}' +} + +# Check whether the record size (KBytes) exceeds the maximum bulk I/O RPC size +# or not. +check_record_size() { + [ $(version_code $(get_lustre_version)) -lt $(version_code 2.3.61) ] && + rszmax=1024 + + if [ "$rszhi" -gt "$rszmax" ]; then + echo "Test disk case support maximum ${rszmax}KB IO data" \ + "(rszhi=$rszhi is too big), please use a smaller value." + return 1 + fi + return 0 +} + # Customisation variables ##################################################################### # One can change variable values in this section as per requirements -targets=${targets:-""} -case=${case:-"disk"} if [ -n "$targets" ]; then declare -a ost_names declare -a client_names @@ -268,6 +311,11 @@ for trgt in $targets; do client_names[$ndevs]=${str[1]} ndevs=$((ndevs+1)) done +if [ $case == "disk" ]; then + for ((i = 0; i < $ndevs; i++)); do + ost_names[$i]=${client_names[$i]} + done +fi if [ $case == "netdisk" ]; then if [ "$targets" ]; then for ((i = 0; i < $ndevs; i++)); do @@ -355,25 +403,20 @@ fi # get vmstat started # disable portals debug and get obdecho loaded on all relevant hosts unique_hosts=(`unique ${host_names[@]}`) +load_obdechos + +if [ $case == "disk" ]; then + check_record_size || cleanup ${PIPESTATUS[0]} +fi + pidcount=0 for host in ${unique_hosts[@]}; do host_vmstatf=${vmstatf}_${host} echo -n > $host_vmstatf - remote_shell $host "vmstat 5 >> $host_vmstatf" & + remote_shell $host "vmstat 5 >> $host_vmstatf" &> /dev/null & pid=$! vmstatpids[$pidcount]=$pid pidcount=$((pidcount+1)) - do_unload_obdecho[$host]=0 - if obdecho_loaded $host; then - continue - fi - load_obdecho $host - if obdecho_loaded $host; then - do_unload_obdecho[$host]=1 - continue - fi - echo "Can't load obdecho on $host" 1>&2 - exit 1 done # get all the echo_client device numbers and names for ((i=0; i < $ndevs; i++)); do @@ -390,6 +433,8 @@ if (($ndevs <= 0 || ${#host_names[@]} <= 0)); then echo "no devices or hosts specified" cleanup 0 $clean_srv_OSS $cleanup_oscs fi +# Buffers will be spread out among all hosts, so allow for that +max_buffer_mem=$(( ${max_buffer_mem} * ${#unique_hosts[@]} )) print_summary "$(date) Obdfilter-survey for case=$case from $(hostname)" for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do for ((nobj = $nobjlo; nobj <= $nobjhi; nobj*=2)); do @@ -462,7 +507,8 @@ for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do echo >> ${cmdsf}_${host} \ "$lctl > $tmpfi 2>&1 \\ --threads $thr -$snap $devno \\ - test_brw $count `testname2type $test` q $pages ${thr_per_obj}t${first_obj} &" + test_brw $count `testname2type $test` q $pages \\ + ${thr_per_obj}t${first_obj} `testcase2mode $pages` &" done pidcount=0 for host in ${unique_hosts[@]}; do @@ -501,9 +547,10 @@ for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do tmpfi="${tmpf}_$idx" echo "=============> $test $client_name" >> $workf host="${host_names[$idx]}" - remote_shell $host cat $tmpfi >> $workf - get_stats $tmpfi >> $tmpf - rm $tmpfi + remote_shell $host cat $tmpfi > ${tmpfi}_local + cat ${tmpfi}_local >> $workf + get_stats ${tmpfi}_local >> $tmpf + rm -f $tmpfi ${tmpfi}_local done # compute/display global min/max stats echo "=============> $test global" >> $workf