#!/bin/bash ###################################################################### # customize per survey # specify obd instances to exercise # these can be either... # obdfilter instances (set 'ost_names') # ...or... # echo_client instances (set 'client_names') # ... use 'host:name' for obd instances on other nodes. # allow these to be passed in via string... # OR # one can specify only case=disk or case=network or case=netdisk through # command line. # Prerequisite: For "disk" case and "netdisk" case you need to have lustre setup # with one or more ost's. For "network" case you need to have all # modules (those llmount.sh loades) loaded in kernel. And the # 'lctl dl' output must be blank. # How to run test: # case 1 (local disk): # $ nobjhi=2 thrhi=2 size=1024 case=disk sh obdfilter-survey # one can also run test with user defined targets as follows, # $ nobjhi=2 thrhi=2 size=1024 targets="lustre-OST0000 lustre-OST0001 ..." sh obdfilter-survey # case 2 (network): # $ nobjhi=2 thrhi=2 size=1024 targets="" case=network sh obdfilter-survey # where, targets is name or ip address of system, which you want to # set as server. # case 3 (network and disk): # $ nobjhi=2 thrhi=2 size=1024 case=netdisk sh obdfilter-survey # one can also run test with user defined targets as follows, # $ nobjhi=2 thrhi=2 size=1024 targets=" ..." sh obdfilter-survey #[ NOTE: It is advised to have automated login (passwordless entry) between server and # client systems on which this test runs.] # include library source $(dirname $0)/iokit-libecho # The following variables can be set in the environment, or on the # command line # result file prefix (date/time + hostname makes unique) # NB ensure path to it exists rslt_loc=${rslt_loc:-"/tmp"} rslt=${rslt:-"$rslt_loc/obdfilter_survey_$(date +%F@%R)_$(uname -n)"} # Set this true to check file contents verify=${verify:-0} # test targets targets=${targets:-""} # test case case=${case:-"disk"} # total size (MBytes) per obd instance # large enough to avoid cache effects # and to make test startup/shutdown overhead insignificant size=${size:-16384} # record size (KBytes) ( 7168 max) rszlo=${rszlo:-1024} rszhi=${rszhi:-1024} rszmax=${rszmax:-4096} # number of objects per OST nobjlo=${nobjlo:-1} #was nobjhi=${nobjhi:-512} nobjhi=${nobjhi:-16} # threads per OST (1024 max) thrlo=${thrlo:-1} thrhi=${thrhi:-16} export LC_ALL=POSIX # End of variables # create a set of objects, check there are 'n' contiguous ones and # echo out the first or 'ERROR' # parameter: 1. hostname # 2. device number # 3. number of object to be created (specified by user) # 4. tempfile name create_objects () { local host=$1 local devno=$2 local nobj=$3 local rfile=$4 remote_shell $host $lctl --device $devno create $nobj > $rfile 2>&1 first=0 prev=0 count=0 error=0 # Count number of objects (lines containing " is object id "), and # ensure that the objects numbers are sequential. # exec 3< $rfile while read -u3 line; do case "$line" in ( *' is object id '* ) set -- $line if test $(( count += 1 )) -gt 1 ; then (( $6 != prev + 1 )) && error=1 else first=$(( $6 + 0 )) fi prev=$6 ;; esac done exec 3<&- if [ $nobj -ne $count ]; then echo "ERROR: $nobj != $count" >&2 cat $rfile >&2 echo "ERROR" elif [ $error -ne 0 ]; then echo "ERROR: non contiguous objs found" >&2 echo ERROR else echo $first fi return $error } # destroys all objects created in create_objects routine # parameter: 3. start obj id. destroy_objects () { local host=$1 local devno=$2 local obj0=$3 local nobj=$4 local rfile=$5 remote_shell $host $lctl --device $devno destroy $obj0 $nobj > $rfile 2>&1 } get_stats () { local rfile=$1 gawk < $rfile \ '/^Selected device [0-9]+$/ { \ n = 0; \ next; \ } \ /error/ { \ n = -1; \ exit; \ } \ /^Total/ { \ next; \ } \ /^[0-9]+\/[0-9]+ Total: [0-9]+\.[0-9]+\/second$/ { \ n++; \ v=strtonum($3); \ if (n == 1 || v < min) \ min = v; \ if (n == 1 || v > max) \ max = v; \ next; \ } \ { \ if (n != 0) { \ n = -1; \ exit; \ } \ } \ END { \ printf "%d %f %f\n", n, min, max \ }' } get_global_stats () { local rfile=$1 awk < $rfile \ 'BEGIN { \ n = 0; \ } \ { \ n++; \ if (n == 1) { \ err = $1; \ min = $2; \ max = $3; \ } else { \ if ($1 < err) \ err = $1; \ if ($2 < min) \ min = $2; \ if ($3 > max) \ max = $3; \ } \ } \ END { \ if (n == 0) \ err = 0; \ printf "%d %f %f\n", err, min, max \ }' } # enable or disable data check. # parameter: 1. read/write testname2type () { # 'x' disables data check if ((verify)); then x="" else x="x" fi case $1 in *write*) echo "w$x";; *) echo "r$x";; esac } # for "echo_client + obdfilter" case, "prep + commit" mode should be used # for "echo_client + osc" case, "BRW" mode should be used testcase2mode() { case $case in disk) echo "p";; *) echo "g";; esac } print_summary () { if [ "$1" = "-n" ]; then minusn=$1; shift else minusn="" fi echo $minusn "$*" >> $rsltf echo $minusn "$*" } # Return a numeric version code based on a version string. The version # code is useful for comparison two version strings to see which is newer. version_code() { # split arguments like "2.3.61" into "2", "3", "61" eval set -- $(tr "[:punct:]" " " <<< $*) echo -n "$((($1 << 16) | ($2 << 8) | $3))" } # inspired from the lustre_build_version routine in test-framework.sh get_lustre_version() { local host=${1:-${unique_hosts[0]}} local ver ver=$(remote_shell $host "$lctl get_param -n version 2>/dev/null || $lctl lustre_build_version 2>/dev/null || $lctl --version 2>/dev/null | cut -d' ' -f2") local lver=$(egrep -i "lustre: |version: " <<<$ver | head -n 1) [ -n "$lver" ] && ver="$lver" sed -e 's/.*: //' -e 's/^v//' -e 's/-.*//' -e 's/_/./g' <<<$ver | cut -d. -f1-4 } # Check whether the record size (KBytes) exceeds the maximum bulk I/O RPC size # or not. check_record_size() { [ $(version_code $(get_lustre_version)) -lt $(version_code 2.3.61) ] && rszmax=1024 if [ "$rszhi" -gt "$rszmax" ]; then echo "Test disk case support maximum ${rszmax}KB IO data" \ "(rszhi=$rszhi is too big), please use a smaller value." return 1 fi return 0 } # Customisation variables ##################################################################### # One can change variable values in this section as per requirements if [ -n "$targets" ]; then declare -a ost_names declare -a client_names count=0 for name in $targets; do if [ $case == "disk" ]; then ost_names[$count]=$name else client_names[$count]=$name fi count=$((count + 1)) done fi # what tests to run (first must be write) tests_str=${tests_str:-""} if [ -n "$tests_str" ]; then declare -a tests count=0 for name in $tests_str; do tests[$count]=$name count=$((count + 1)) done else #tests=(write rewrite read reread rewrite_again) tests=(write rewrite read) fi # restart from here iff all are defined restart_rsz= restart_thr=1 restart_nobj=1 # machine's page size (KB) PAGE_SIZE_KB=${PAGE_SIZE_KB:-$(( ${PAGE_SIZE:-$(getconf PAGE_SIZE)} / 1024))} PAGE_SIZE_KB=${PAGE_SIZE_KB:-4} snap=1 clean_srv_OSS=0 # Customisation variables ends here. ##################################################################### # leave the rest of this alone unless you know what you're doing... # check and insert obdecho module if ! lsmod | grep obdecho > /dev/null; then modprobe obdecho fi if [ ${#tests[@]} -eq 0 -o "${tests[0]}" != "write" ]; then echo "tests: ${tests[@]}" echo "First test must be 'write'" 1>&2 exit 1 fi rsltf="${rslt}.summary" workf="${rslt}.detail" cmdsf="${rslt}.script" vmstatf="${rslt}.vmstat" echo -n > $rsltf echo -n > $workf # hide a little trick to unset this from the command line if [ "$lustre_root" == " " ]; then unset lustre_root fi if [ -z "$lustre_root" ]; then lctl=lctl else lctl=${lustre_root}/utils/lctl fi # split out hostnames from client/ost names ndevs=0 for trgt in $targets; do str=($(split_hostname $trgt)) host_names[$ndevs]=${str[0]} client_names[$ndevs]=${str[1]} ndevs=$((ndevs + 1)) done if [ $case == "disk" ]; then for ((i = 0; i < $ndevs; i++)); do ost_names[$i]=${client_names[$i]} done fi if [ $case == "netdisk" ]; then if [ "$targets" ]; then for ((i = 0; i < $ndevs; i++)); do setup_osc_for_remote_ost ${host_names[$i]} \ ${client_names[$i]} $i osc_name=${client_names[$i]}_osc ec_using_osc $osc_name cleanup_oscs="$cleanup_oscs $osc_name" done else client_names_str=$($lctl dl | grep -v mdt | awk '{if ($2 == "UP" && $3 == "osc") {print $4} }') count=0; for name in $client_names_str; do client_names[$count]=$(echo $name | sed 's/-osc-.*$//') count=$((count + 1)) done host_names_str=$($lctl dl -t | grep -v mdt | awk '{if ($2 == "UP" && $3 == "osc") {print $7} }') count=0; for name in $host_names_str; do host_names[$count]=$(echo $name | sed 's/@.*$//') count=$((count + 1)) done for (( i = 0; i < $count; i++ )) do setup_osc_for_remote_ost ${host_names[$i]} \ ${client_names[$i]} $i osc_name=${client_names[$i]}_osc ec_using_osc $osc_name cleanup_oscs="$cleanup_oscs $osc_name" done fi echo_clients=$($lctl dl | grep echo_client | awk "{if (\$2 == \"UP\" && \$3 == \"echo_client\") { \ print \$4} }") cnt=0; for name in $echo_clients; do client_names[$cnt]=$name host_names[$cnt]=localhost cnt=$((cnt + 1)) done ndevs=${#client_names[@]} fi if [ $case == "network" ]; then server_nid=$targets if [ -z "$server_nid" ]; then echo "Specify hostname or ip-address of server" exit 1; fi # check for obdecho module on server if ! dsh $server_nid root "lsmod | grep obdecho > /dev/null"; then dsh $server_nid root "modprobe obdecho" fi # Now do the server setup setup_srv_obd $server_nid "echo_srv" oss_on_srv=$(dsh $server_nid root "$lctl dl | grep OSS" | awk '{ print $4 }') if [ -z $oss_on_srv ]; then setup_OSS $server_nid clean_srv_OSS=1 fi if ! dsh $server_nid root "$lctl dl | grep obdecho > /dev/null 2>&1"; then echo "obdecho not setup on server" exit 1 fi if ! dsh $server_nid root "$lctl dl | grep ost > /dev/null 2>&1"; then echo "ost not setup on server" exit 1 fi # Now start client setup osc_names_str=$($lctl dl| grep osc | grep -v mdt | grep UP || true) if [ -n "$osc_names_str" ]; then echo "The existing setup must be cleaned"; exit 0; fi ec_using_srv_nid $server_nid "echotmp" "echotmp_UUID" client_names[0]="echotmp_ecc" fi if [ -z "$targets" ]; then if [ $case == "disk" ]; then get_targets ndevs=${#ost_names[@]} fi fi # get vmstat started # disable portals debug and get obdecho loaded on all relevant hosts unique_hosts=($(unique ${host_names[@]})) load_obdechos if [ $case == "disk" ]; then check_record_size || cleanup ${PIPESTATUS[0]} fi pidcount=0 for host in ${unique_hosts[@]}; do host_vmstatf=${vmstatf}_${host} echo -n > $host_vmstatf remote_shell $host "vmstat 5 >> $host_vmstatf" &> /dev/null & pid=$! vmstatpids[$pidcount]=$pid pidcount=$((pidcount + 1)) done # get all the echo_client device numbers and names for ((i=0; i < $ndevs; i++)); do host=${host_names[$i]} devno=($(get_ec_devno $host "${client_names[$i]}" "${ost_names[$i]}")) if ((${#devno[@]} != 3)); then cleanup 2 fi devnos[$i]=${devno[0]} client_names[$i]=${devno[1]} do_teardown_ec[$i]=${devno[2]} done if (($ndevs <= 0 || ${#host_names[@]} <= 0)); then echo "no devices or hosts specified" cleanup 0 $clean_srv_OSS $cleanup_oscs fi print_summary "$(date) Obdfilter-survey for case=$case from $(hostname)" for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do for ((nobj = $nobjlo; nobj <= $nobjhi; nobj*=2)); do for ((thr = $thrlo; thr <= $thrhi; thr*=2)); do if ((thr % nobj)); then continue fi # restart? if [ -n "$restart_rsz" -a \ -n "$restart_nobj" -a \ -n "$restart_thr" ]; then if ((rsz < restart_rsz || (rsz == restart_rsz && (nobj < restart_nobj || (nobj == restart_nobj && thr < restart_thr))))); then continue; fi fi # compute parameters total_thr=$((ndevs * thr)) total_nobj=$((ndevs * nobj)) pages=$(( (rsz + PAGE_SIZE_KB - 1) / PAGE_SIZE_KB)) actual_rsz=$((pages * PAGE_SIZE_KB)) count=$((size * 1024 / (actual_rsz * thr))) actual_size=$((actual_rsz * count * thr)) total_size=$((actual_size * ndevs)) # show computed parameters str=$(printf 'ost %2d sz %8dK rsz %4dK obj %4d thr %4d ' \ $ndevs $total_size $actual_rsz $total_nobj $total_thr) echo "=======================> $str" >> $workf print_summary -n "$str" # create the objects tmpf="${workf}_tmp" for ((idx = 0; idx < $ndevs; idx++)); do host=${host_names[$idx]} devno=${devnos[$idx]} client_name="${host}:${client_names[$idx]}" echo "=============> Create $nobj on $client_name" >> $workf first_obj=$(create_objects $host $devno $nobj $tmpf) cat $tmpf >> $workf rm $tmpf if [ $first_obj = "ERROR" ]; then print_summary "created object #s on $client_name not contiguous" cleanup 3 fi first_objs[$idx]=$first_obj done # $ndevs # run tests for test in ${tests[@]}; do declare -a pidarray for host in ${unique_hosts[@]}; do remote_shell $host \ "lctl set_param -n osd*.*.force_sync 1 &>/dev/null || true" echo "starting run for test: $test rsz: $rsz " \ "threads: $thr objects: $nobj" >> ${vmstatf}_${host} done print_summary -n "$test " # create per-host script files for host in ${unique_hosts[@]}; do echo -n > ${cmdsf}_${host} done for ((idx = 0; idx < $ndevs; idx++)); do host=${host_names[$idx]} devno=${devnos[$idx]} tmpfi="${tmpf}_$idx" first_obj=${first_objs[$idx]} thr_per_obj=$((${thr}/${nobj})) echo >> ${cmdsf}_${host} \ "$lctl > $tmpfi 2>&1 \\ --threads $thr -$snap $devno \\ test_brw $count $(testname2type $test) q $pages \\ ${thr_per_obj}t${first_obj} $(testcase2mode)$pages &" done # $ndevs pidcount=0 for host in ${unique_hosts[@]}; do echo "wait" >> ${cmdsf}_${host} pidarray[$pidcount]=0 pidcount=$((pidcount + 1)) done # timed run of all the per-host script files t0=$(date +%s.%N) pidcount=0 for host in ${unique_hosts[@]}; do remote_shell $host bash < ${cmdsf}_${host} & pidarray[$pidcount]=$! pidcount=$((pidcount + 1)) done pidcount=0 for host in ${unique_hosts[@]}; do wait ${pidarray[$pidcount]} pidcount=$((pidcount + 1)) done #wait t1=$(date +%s.%N) # clean up per-host script files for host in ${unique_hosts[@]}; do rm ${cmdsf}_${host} done # compute bandwidth from total data / elapsed time str=$(awk "BEGIN {printf \"%7.2f \",\ $total_size / (( $t1 - $t0 ) * 1024)}") print_summary -n "$str" # collect/check individual OST stats echo -n > $tmpf for ((idx = 0; idx < $ndevs; idx++)); do client_name="${host_names[$idx]}:${client_names[$idx]}" tmpfi="${tmpf}_$idx" echo "=============> $test $client_name" >> $workf host="${host_names[$idx]}" remote_shell $host cat $tmpfi > ${tmpfi}_local cat ${tmpfi}_local >> $workf get_stats ${tmpfi}_local >> $tmpf rm -f $tmpfi ${tmpfi}_local done # $ndevs # compute/display global min/max stats echo "=============> $test global" >> $workf cat $tmpf >> $workf stats=($(get_global_stats $tmpf)) rm $tmpf if ((stats[0] <= 0)); then if ((stats[0] < 0)); then str=$(printf "%17s " ERROR) else str=$(printf "%17s " SHORT) fi else str=$(awk "BEGIN {printf \"[%7.2f, %7.2f] \",\ (${stats[1]} * $actual_rsz)/1024,\ (${stats[2]} * $actual_rsz)/1024; exit}") fi print_summary -n "$str" done # $tests[] print_summary "" # destroy objects we created for ((idx = 0; idx < $ndevs; idx++)); do host=${host_names[$idx]} devno=${devnos[$idx]} client_name="${host}:${client_names[$idx]}" first_obj=${first_objs[$idx]} echo "=============> Destroy $nobj on $client_name" >> $workf destroy_objects $host $devno $first_obj $nobj $tmpf cat $tmpf >> $workf rm $tmpf done # $ndevs done # $thr done # $nobj done # $rsz cleanup 0 $clean_srv_OSS $cleanup_oscs exit 0