X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre-iokit%2Fobdfilter-survey%2Fobdfilter-survey;h=c4980c980287622929b4c7501dcece63eb2794a4;hb=784bbd385b22b6578bb98691f3bb130a1ed84dc6;hp=7b83a8e6fcb338d2548f9c3b6bc02dd69dc0e8e8;hpb=df045894c39be909fd2627eee0ed92b76a64a1dd;p=fs%2Flustre-release.git diff --git a/lustre-iokit/obdfilter-survey/obdfilter-survey b/lustre-iokit/obdfilter-survey/obdfilter-survey index 7b83a8e..c4980c9 100755 --- a/lustre-iokit/obdfilter-survey/obdfilter-survey +++ b/lustre-iokit/obdfilter-survey/obdfilter-survey @@ -1,5 +1,5 @@ #!/bin/bash - +set -e ###################################################################### # customize per survey @@ -36,7 +36,7 @@ # client systems on which this test runs.] # include library -source libecho +source $(dirname $0)/libecho # The following variables can be set in the environment, or on the # command line @@ -48,6 +48,11 @@ rslt=${rslt:-"$rslt_loc/obdfilter_survey_`date +%F@%R`_`uname -n`"} # Set this true to check file contents verify=${verify:-0} +# test targets +targets=${targets:-""} +# test case +case=${case:-"disk"} + # total size (MBytes) per obd instance # large enough to avoid cache effects # and to make test startup/shutdown overhead insignificant @@ -56,6 +61,7 @@ size=${size:-16384} # record size (KBytes) ( 7168 max) rszlo=${rszlo:-1024} rszhi=${rszhi:-1024} +rszmax=${rszmax:-4096} # number of objects per OST nobjlo=${nobjlo:-1} @@ -66,10 +72,12 @@ nobjhi=${nobjhi:-16} thrlo=${thrlo:-1} thrhi=${thrhi:-16} +export LC_ALL=POSIX + # End of variables # create a set of objects, check there are 'n' contiguous ones and -# return the first or 'ERROR' +# echo out the first or 'ERROR' # parameter: 1. hostname # 2. device number # 3. number of object to be created (specified by user) @@ -84,36 +92,37 @@ create_objects () { prev=0 count=0 error=0 - while read line; do - echo "$line" | grep -q 'is object id' - if [ $? -ne 0 ]; then - continue - fi - if [ $first -eq 0 ]; then - first=$(echo $line | awk '{print $6}') - first=$(printf "%d" $first) - prev=$first - count=1 - else - obj=$(echo $line | awk '{print $6}') - obj=$(printf "%d" $obj) - diff=$((obj - (prev+1))) - if [ $diff -ne 0 ]; then - error=1 - fi - prev=$obj - count=$((count+1)) - fi - done < $rfile + + # Count number of objects (lines containing " is object id "), and + # ensure that the objects numbers are sequential. + # + exec 3< $rfile + while read -u3 line; do + case "$line" in + ( *' is object id '* ) + set -- $line + if test $(( count += 1 )) -gt 1 ; then + (( $6 != prev + 1 )) && error=1 + else + first=$(( $6 + 0 )) + fi + prev=$6 + ;; + esac + done + exec 3<&- + if [ $nobj -ne $count ]; then echo "ERROR: $nobj != $count" >&2 cat $rfile >&2 echo "ERROR" elif [ $error -ne 0 ]; then - echo "ERROR: non contiguous objs found" >&2 + echo "ERROR: non contiguous objs found" >&2 + echo ERROR else echo $first fi + return $error } # destroys all objects created in create_objects routine @@ -132,6 +141,7 @@ get_stats () { gawk < $rfile \ '/^Selected device [0-9]+$/ {n = 0; next}\ /error/ {n = -1; exit}\ + /^Total/ {next}\ /^[0-9]+\/[0-9]+ Total: [0-9]+\.[0-9]+\/second$/ {n++; v=strtonum($3); \ if (n == 1 || v < min) min = v;\ if (n == 1 || v > max) max = v;\ @@ -166,6 +176,15 @@ testname2type () { esac } +# for "echo_client + obdfilter" case, "prep + commit" mode should be used +# for "echo_client + osc" case, "BRW" mode should be used +testcase2mode() { + case $case in + disk) echo "p$1";; + *) echo "g";; + esac +} + print_summary () { if [ "$1" = "-n" ]; then minusn=$1; shift @@ -176,12 +195,36 @@ print_summary () { echo $minusn "$*" } +version_code() { + # split arguments like "2.3.61" into "2", "3", "61" + eval set -- $(tr "[:punct:]" " " <<< $*) + echo -n "$((($1 << 16) | ($2 << 8) | $3))" +} + +get_lustre_version() { + local host=${1:-${unique_hosts[0]}} + remote_shell $host $lctl get_param -n version | + awk '/^lustre:/ {print $2}' +} + +# Check whether the record size (KBytes) exceeds the maximum bulk I/O RPC size +# or not. +check_record_size() { + [ $(version_code $(get_lustre_version)) -lt $(version_code 2.3.61) ] && + rszmax=1024 + + if [ "$rszhi" -gt "$rszmax" ]; then + echo "Test disk case support maximum ${rszmax}KB IO data" \ + "(rszhi=$rszhi is too big), please use a smaller value." + return 1 + fi + return 0 +} + # Customisation variables ##################################################################### # One can change variable values in this section as per requirements -targets=${targets:-""} -case=${case:-"disk"} if [ -n "$targets" ]; then declare -a ost_names declare -a client_names @@ -268,33 +311,47 @@ for trgt in $targets; do client_names[$ndevs]=${str[1]} ndevs=$((ndevs+1)) done +if [ $case == "disk" ]; then + for ((i = 0; i < $ndevs; i++)); do + ost_names[$i]=${client_names[$i]} + done +fi if [ $case == "netdisk" ]; then - if [ "$targets" ]; then - for ((i = 0; i < $ndevs; i++)); do - setup_osc_for_remote_ost ${host_names[$i]} ${client_names[$i]} $i - cleanup_oscs="$cleanup_oscs ${client_names[$i]}_osc" - host_names[$i]=localhost - done - fi - declare -a osc_names - declare -a osc_uuids - osc_names_str=$(lctl dl |grep osc | awk "{if (\$2 == \"UP\" && \$3 == \"osc\") {print \$4} }") - count=0; - for name in $osc_names_str; do - osc_names[$count]=$name - count=$((count+1)) - done - osc_uuid_str=$(lctl dl |grep osc | awk "{if (\$2 == \"UP\" && \$3 == \"osc\") {print \$5} }") - count=0; - for uuid in $osc_uuid_str; do - osc_uuids[$count]=$uuid - count=$((count+1)) - done - for (( i = 0; i < $count; i++ )) - do - ec_using_osc ${osc_names[$i]} - done - echo_clients=$(lctl dl | grep echo_client | awk "{if (\$2 == \"UP\" && \$3 == \"echo_client\") {print \$4} }") + if [ "$targets" ]; then + for ((i = 0; i < $ndevs; i++)); do + setup_osc_for_remote_ost ${host_names[$i]} \ + ${client_names[$i]} $i + osc_name=${client_names[$i]}_osc + ec_using_osc $osc_name + cleanup_oscs="$cleanup_oscs $osc_name" + done + else + client_names_str=$($lctl dl | grep -v mdt | \ + awk '{if ($2 == "UP" && $3 == "osc") {print $4} }') + count=0; + for name in $client_names_str; do + client_names[$count]=`echo $name | sed 's/-osc-.*$//'` + count=$((count+1)) + done + + host_names_str=$($lctl dl -t | grep -v mdt | \ + awk '{if ($2 == "UP" && $3 == "osc") {print $7} }') + count=0; + for name in $host_names_str; do + host_names[$count]=`echo $name | sed 's/@.*$//'` + count=$((count+1)) + done + + for (( i = 0; i < $count; i++ )) do + setup_osc_for_remote_ost ${host_names[$i]} \ + ${client_names[$i]} $i + osc_name=${client_names[$i]}_osc + ec_using_osc $osc_name + cleanup_oscs="$cleanup_oscs $osc_name" + done + fi + + echo_clients=$($lctl dl | grep echo_client | awk "{if (\$2 == \"UP\" && \$3 == \"echo_client\") {print \$4} }") cnt=0; for name in $echo_clients; do client_names[$cnt]=$name @@ -315,21 +372,21 @@ if [ $case == "network" ]; then fi # Now do the server setup setup_srv_obd $server_nid "echo_srv" - oss_on_srv=`dsh $server_nid root "lctl dl | grep OSS" | awk '{ print $4 }'` + oss_on_srv=`dsh $server_nid root "$lctl dl | grep OSS" | awk '{ print $4 }'` if [ -z $oss_on_srv ]; then setup_OSS $server_nid clean_srv_OSS=1 fi - if ! dsh $server_nid root "lctl dl | grep obdecho > /dev/null 2>&1"; then + if ! dsh $server_nid root "$lctl dl | grep obdecho > /dev/null 2>&1"; then echo "obdecho not setup on server" exit 1 fi - if ! dsh $server_nid root "lctl dl | grep ost > /dev/null 2>&1"; then + if ! dsh $server_nid root "$lctl dl | grep ost > /dev/null 2>&1"; then echo "ost not setup on server" exit 1 fi # Now start client setup - osc_names_str=$(lctl dl) + osc_names_str=$($lctl dl| grep osc | grep -v mdt | grep UP) if [ -n "$osc_names_str" ]; then echo "The existing setup must be cleaned"; exit 0; @@ -346,25 +403,20 @@ fi # get vmstat started # disable portals debug and get obdecho loaded on all relevant hosts unique_hosts=(`unique ${host_names[@]}`) +load_obdechos + +if [ $case == "disk" ]; then + check_record_size || cleanup ${PIPESTATUS[0]} +fi + pidcount=0 for host in ${unique_hosts[@]}; do host_vmstatf=${vmstatf}_${host} echo -n > $host_vmstatf - remote_shell $host "vmstat 5 >> $host_vmstatf" & + remote_shell $host "vmstat 5 >> $host_vmstatf" &> /dev/null & pid=$! vmstatpids[$pidcount]=$pid pidcount=$((pidcount+1)) - do_unload_obdecho[$host]=0 - if obdecho_loaded $host; then - continue - fi - load_obdecho $host - if obdecho_loaded $host; then - do_unload_obdecho[$host]=1 - continue - fi - echo "Can't load obdecho on $host" 1>&2 - exit 1 done # get all the echo_client device numbers and names for ((i=0; i < $ndevs; i++)); do @@ -381,6 +433,8 @@ if (($ndevs <= 0 || ${#host_names[@]} <= 0)); then echo "no devices or hosts specified" cleanup 0 $clean_srv_OSS $cleanup_oscs fi +# Buffers will be spread out among all hosts, so allow for that +max_buffer_mem=$(( ${max_buffer_mem} * ${#unique_hosts[@]} )) print_summary "$(date) Obdfilter-survey for case=$case from $(hostname)" for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do for ((nobj = $nobjlo; nobj <= $nobjhi; nobj*=2)); do @@ -409,7 +463,7 @@ for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do actual_size=$((actual_rsz*count*thr)) total_size=$((actual_size*ndevs)) # show computed parameters - str=`printf 'ost %2d sz %8dK rsz %4d obj %4d thr %4d ' \ + str=`printf 'ost %2d sz %8dK rsz %4dK obj %4d thr %4d ' \ $ndevs $total_size $actual_rsz $total_nobj $total_thr` echo "=======================> $str" >> $workf print_summary -n "$str" @@ -453,7 +507,8 @@ for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do echo >> ${cmdsf}_${host} \ "$lctl > $tmpfi 2>&1 \\ --threads $thr -$snap $devno \\ - test_brw $count `testname2type $test` q $pages ${thr_per_obj}t${first_obj} &" + test_brw $count `testname2type $test` q $pages \\ + ${thr_per_obj}t${first_obj} `testcase2mode $pages` &" done pidcount=0 for host in ${unique_hosts[@]}; do @@ -480,6 +535,7 @@ for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do for host in ${unique_hosts[@]}; do rm ${cmdsf}_${host} done + # compute bandwidth from total data / elapsed time str=`awk "BEGIN {printf \"%7.2f \",\ $total_size / (( $t1 - $t0 ) * 1024)}"` @@ -491,9 +547,10 @@ for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do tmpfi="${tmpf}_$idx" echo "=============> $test $client_name" >> $workf host="${host_names[$idx]}" - remote_shell $host cat $tmpfi >> $workf - get_stats $tmpfi >> $tmpf - rm $tmpfi + remote_shell $host cat $tmpfi > ${tmpfi}_local + cat ${tmpfi}_local >> $workf + get_stats ${tmpfi}_local >> $tmpf + rm -f $tmpfi ${tmpfi}_local done # compute/display global min/max stats echo "=============> $test global" >> $workf