X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre-iokit%2Fobdfilter-survey%2Fobdfilter-survey;h=19cdaf040be209272314d7d1eb64059c22f8d500;hb=2a7e1688e2c9bff8f18f5596c112c445d0039e94;hp=02a4298bb29460034613bf59328e7fe0dffab801;hpb=c2ec235e28383022a50c0a85c64844a491569d2e;p=fs%2Flustre-release.git diff --git a/lustre-iokit/obdfilter-survey/obdfilter-survey b/lustre-iokit/obdfilter-survey/obdfilter-survey index 02a4298..19cdaf0 100755 --- a/lustre-iokit/obdfilter-survey/obdfilter-survey +++ b/lustre-iokit/obdfilter-survey/obdfilter-survey @@ -1,5 +1,5 @@ #!/bin/bash - +set -e ###################################################################### # customize per survey @@ -22,24 +22,61 @@ # How to run test: # case 1 (local disk): # $ nobjhi=2 thrhi=2 size=1024 case=disk sh obdfilter-survey -# instead of case=disk one can also run test as follows -# $ nobjhi=2 thrhi=2 size=1024 OSTS="lustre-OST0000 lustre-OST0001 ..." sh obdfilter-survey +# one can also run test with user defined targets as follows, +# $ nobjhi=2 thrhi=2 size=1024 targets="lustre-OST0000 lustre-OST0001 ..." sh obdfilter-survey # case 2 (network): -# $ nobjhi=2 thrhi=2 size=1024 server_nid="" case=network sh obdfilter-survey -# where, server_nid is name or ip address of system, which you want to +# $ nobjhi=2 thrhi=2 size=1024 targets="" case=network sh obdfilter-survey +# where, targets is name or ip address of system, which you want to # set as server. # case 3 (network and disk): # $ nobjhi=2 thrhi=2 size=1024 case=netdisk sh obdfilter-survey -# instead of case="netdisk" one can also run test as follows -# $ nobjhi=2 thrhi=2 size=1024 ECHO_CLIENTS="ECHO_ ..." sh obdfilter-survey +# one can also run test with user defined targets as follows, +# $ nobjhi=2 thrhi=2 size=1024 targets=" ..." sh obdfilter-survey #[ NOTE: It is advised to have automated login (passwordless entry) between server and # client systems on which this test runs.] # include library -source libobd +source $(dirname $0)/libecho + +# The following variables can be set in the environment, or on the +# command line +# result file prefix (date/time + hostname makes unique) +# NB ensure path to it exists +rslt_loc=${rslt_loc:-"/tmp"} +rslt=${rslt:-"$rslt_loc/obdfilter_survey_`date +%F@%R`_`uname -n`"} + +# Set this true to check file contents +verify=${verify:-0} + +# test targets +targets=${targets:-""} +# test case +case=${case:-"disk"} + +# total size (MBytes) per obd instance +# large enough to avoid cache effects +# and to make test startup/shutdown overhead insignificant +size=${size:-16384} + +# record size (KBytes) ( 7168 max) +rszlo=${rszlo:-1024} +rszhi=${rszhi:-1024} + +# number of objects per OST +nobjlo=${nobjlo:-1} +#was nobjhi=${nobjhi:-512} +nobjhi=${nobjhi:-16} + +# threads per OST (1024 max) +thrlo=${thrlo:-1} +thrhi=${thrhi:-16} + +export LC_ALL=POSIX + +# End of variables # create a set of objects, check there are 'n' contiguous ones and -# return the first or 'ERROR' +# echo out the first or 'ERROR' # parameter: 1. hostname # 2. device number # 3. number of object to be created (specified by user) @@ -54,36 +91,37 @@ create_objects () { prev=0 count=0 error=0 - while read line; do - echo "$line" | grep -q 'is object id' - if [ $? -ne 0 ]; then - continue - fi - if [ $first -eq 0 ]; then - first=$(echo $line | awk '{print $6}') - first=$(printf "%d" $first) - prev=$first - count=1 - else - obj=$(echo $line | awk '{print $6}') - obj=$(printf "%d" $obj) - diff=$((obj - (prev+1))) - if [ $diff -ne 0 ]; then - error=1 - fi - prev=$obj - count=$((count+1)) - fi - done < $rfile + + # Count number of objects (lines containing " is object id "), and + # ensure that the objects numbers are sequential. + # + exec 3< $rfile + while read -u3 line; do + case "$line" in + ( *' is object id '* ) + set -- $line + if test $(( count += 1 )) -gt 1 ; then + (( $6 != prev + 1 )) && error=1 + else + first=$(( $6 + 0 )) + fi + prev=$6 + ;; + esac + done + exec 3<&- + if [ $nobj -ne $count ]; then echo "ERROR: $nobj != $count" >&2 cat $rfile >&2 echo "ERROR" elif [ $error -ne 0 ]; then - echo "ERROR: non contiguous objs found" >&2 + echo "ERROR: non contiguous objs found" >&2 + echo ERROR else echo $first fi + return $error } # destroys all objects created in create_objects routine @@ -99,9 +137,10 @@ destroy_objects () { get_stats () { local rfile=$1 - awk < $rfile \ + gawk < $rfile \ '/^Selected device [0-9]+$/ {n = 0; next}\ /error/ {n = -1; exit}\ + /^Total/ {next}\ /^[0-9]+\/[0-9]+ Total: [0-9]+\.[0-9]+\/second$/ {n++; v=strtonum($3); \ if (n == 1 || v < min) min = v;\ if (n == 1 || v > max) max = v;\ @@ -136,6 +175,15 @@ testname2type () { esac } +# for "echo_client + obdfilter" case, "prep + commit" mode should be used +# for "echo_client + osc" case, "BRW" mode should be used +testcase2mode() { + case $case in + disk) echo "p$1";; + *) echo "g";; + esac +} + print_summary () { if [ "$1" = "-n" ]; then minusn=$1; shift @@ -150,33 +198,20 @@ print_summary () { ##################################################################### # One can change variable values in this section as per requirements -OSTS=${OSTS:-""} -server_nid=${server_nid:-""} -case=${case:-"disk"} -if [ -n "$OSTS" ]; then +if [ -n "$targets" ]; then declare -a ost_names - count=0 - for name in $OSTS; do - ost_names[$count]=$name - count=$((count+1)) - done -fi -ECHO_CLIENTS=${ECHO_CLIENTS:-""} -if [ -n "$ECHO_CLIENTS" ]; then - # make sure we unset ost_names so that our client_names get noticed... - unset ost_names declare -a client_names count=0 - for name in $ECHO_CLIENTS; do - client_names[$count]=$name + for name in $targets; do + if [ $case == "disk" ]; then + ost_names[$count]=$name + else + client_names[$count]=$name + fi count=$((count+1)) done fi -# result file prefix (date/time + hostname makes unique) -# NB ensure path to it exists -rslt=${rslt:-"/tmp/obdfilter_survey_`date +%F@%R`_`uname -n`"} - # what tests to run (first must be write) tests_str=${tests_str:-""} if [ -n "$tests_str" ]; then @@ -191,27 +226,6 @@ else tests=(write rewrite read) fi -# Set this true to check file contents -verify=0 - -# total size (MBytes) per obd instance -# large enough to avoid cache effects -# and to make test startup/shutdown overhead insignificant -size=${size:-16384} - -# record size (KBytes) -rszlo=${rszlo:-1024} -rszhi=${rszhi:-1024} - -# number of objects per OST -nobjlo=${nobjlo:-1} -#was nobjhi=${nobjhi:-512} -nobjhi=${nobjhi:-16} - -# threads per OST (1024 max) -thrlo=${thrlo:-1} -thrhi=${thrhi:-16} - # restart from here iff all are defined restart_rsz= restart_thr=1 @@ -229,11 +243,15 @@ PAGE_SIZE=${PAGE_SIZE:-4} # (to avoid lctl ENOMEM problems) max_buffer_mem=$((1024 * 1024)) snap=1 - +clean_srv_OSS=0 # Customisation variables ends here. ##################################################################### # leave the rest of this alone unless you know what you're doing... +# check and insert obdecho module +if ! lsmod | grep obdecho > /dev/null; then + modprobe obdecho +fi if [ ${#tests[@]} -eq 0 -o "${tests[0]}" != "write" ]; then echo "tests: ${tests[@]}" echo "First test must be 'write'" 1>&2 @@ -258,137 +276,123 @@ else lctl=${lustre_root}/utils/lctl fi +# split out hostnames from client/ost names +ndevs=0 +for trgt in $targets; do + str=(`split_hostname $trgt`) + host_names[$ndevs]=${str[0]} + client_names[$ndevs]=${str[1]} + ndevs=$((ndevs+1)) +done +if [ $case == "disk" ]; then + if [ $rszhi -gt 1024 ]; then + echo "Test disk case support maximum 1024KB IO data" \ + "(rszhi=$rszhi is too big) please use a smaller value." + exit 1 + fi + for ((i = 0; i < $ndevs; i++)); do + ost_names[$i]=${client_names[$i]} + done +fi +if [ $case == "netdisk" ]; then + if [ "$targets" ]; then + for ((i = 0; i < $ndevs; i++)); do + setup_osc_for_remote_ost ${host_names[$i]} \ + ${client_names[$i]} $i + osc_name=${client_names[$i]}_osc + ec_using_osc $osc_name + cleanup_oscs="$cleanup_oscs $osc_name" + done + else + client_names_str=$($lctl dl | grep -v mdt | \ + awk '{if ($2 == "UP" && $3 == "osc") {print $4} }') + count=0; + for name in $client_names_str; do + client_names[$count]=`echo $name | sed 's/-osc-.*$//'` + count=$((count+1)) + done + + host_names_str=$($lctl dl -t | grep -v mdt | \ + awk '{if ($2 == "UP" && $3 == "osc") {print $7} }') + count=0; + for name in $host_names_str; do + host_names[$count]=`echo $name | sed 's/@.*$//'` + count=$((count+1)) + done + + for (( i = 0; i < $count; i++ )) do + setup_osc_for_remote_ost ${host_names[$i]} \ + ${client_names[$i]} $i + osc_name=${client_names[$i]}_osc + ec_using_osc $osc_name + cleanup_oscs="$cleanup_oscs $osc_name" + done + fi + + echo_clients=$($lctl dl | grep echo_client | awk "{if (\$2 == \"UP\" && \$3 == \"echo_client\") {print \$4} }") + cnt=0; + for name in $echo_clients; do + client_names[$cnt]=$name + host_names[$cnt]=localhost + cnt=$((cnt+1)) + done + ndevs=${#client_names[@]} +fi if [ $case == "network" ]; then + server_nid=$targets if [ -z "$server_nid" ]; then - echo "Specify the server NID" + echo "Specify hostname or ip-address of server" exit 1; fi - osc_names_string=`ssh root@"$server_nid" lctl dl` - count=0; - for name in $osc_names_str; do - count=$((count+1)) - done - - if [ $count != 0 ]; then - echo "The existing setup must be cleaned"; - exit 0; + # check for obdecho module on server + if ! dsh $server_nid root "lsmod | grep obdecho > /dev/null"; then + dsh $server_nid root "modprobe obdecho" fi # Now do the server setup - setup_srv_obd $server_nid "ost_testfs" - op_string=`ssh root@"$server_nid" lctl dl` - - obdecho=0 - ost=0 - for name in $op_string; do - if [ "$name" = "obdecho" ]; then - obdecho=1 - fi - if [ "$name" = "ost" ]; then - ost=1 - fi - done - - if (( $obdecho == 0 || $ost == 0 )); then - echo "Server setup not done properly" - exit 1 + setup_srv_obd $server_nid "echo_srv" + oss_on_srv=`dsh $server_nid root "$lctl dl | grep OSS" | awk '{ print $4 }'` + if [ -z $oss_on_srv ]; then + setup_OSS $server_nid + clean_srv_OSS=1 + fi + if ! dsh $server_nid root "$lctl dl | grep obdecho > /dev/null 2>&1"; then + echo "obdecho not setup on server" + exit 1 + fi + if ! dsh $server_nid root "$lctl dl | grep ost > /dev/null 2>&1"; then + echo "ost not setup on server" + exit 1 fi # Now start client setup - osc_names_str=$(lctl dl) + osc_names_str=$($lctl dl| grep osc | grep -v mdt | grep UP) if [ -n "$osc_names_str" ]; then echo "The existing setup must be cleaned"; exit 0; fi - ec_using_srv_nid $server_nid "osc_testfs" "test_obdfs" - declare -a client_names - client_names[0]="ECHO_osc_testfs" + ec_using_srv_nid $server_nid "echotmp" "echotmp_UUID" + client_names[0]="echotmp_ecc" fi - -if [ -z "$ECHO_CLIENTS" ]; then - if [ $case == "netdisk" ]; then - declare -a osc_names - declare -a osc_uuids - osc_names_str=$(lctl dl |grep osc | awk "{if (\$2 == \"UP\" && \$3 == \"osc\") {print \$4} }") - count=0; - for name in $osc_names_str; do - osc_names[$count]=$name - count=$((count+1)) - done - osc_uuid_str=$(lctl dl |grep osc | awk "{if (\$2 == \"UP\" && \$3 == \"osc\") {print \$5} }") - count=0; - for uuid in $osc_uuid_str; do - osc_uuids[$count]=$uuid - count=$((count+1)) - done - for (( i = 0 ; i < $count; i++ )) - do - ec_using_osc ${osc_names[$i]} ${osc_uuids[$i]} - done - ECHO_CLIENTS=$(lctl dl | grep echo_client | awk "{if (\$2 == \"UP\" && \$3 == \"echo_client\") {print \$4} }") - cnt=0; - for name in $ECHO_CLIENTS; do - client_names[$cnt]=$name - cnt=$((cnt+1)) - done - fi -fi - -if [ -z "$OSTS" ]; then +if [ -z "$targets" ]; then if [ $case == "disk" ]; then - get_targets + get_targets + ndevs=${#ost_names[@]} fi fi - -# split out hostnames from client/ost names -ndevs=${#client_names[@]} -if ((ndevs != 0)); then - if ((${#ost_names[@]} != 0)); then - echo "Please specify client_names or ost_names, but not both" 1>&2 - exit 1 - fi - for ((i = 0; i < ndevs; i++)); do - str=(`split_hostname ${client_names[$i]}`) - host_names[$i]=${str[0]} - client_names[$i]=${str[1]} - done -else - ndevs=${#ost_names[@]} - if ((ndevs == 0)); then - echo "Please specify either client_names or ost_names" 1>&2 - exit 1 - fi - for ((i = 0; i < ndevs; i++)); do - str=(`split_hostname ${ost_names[$i]}`) - host_names[$i]=${str[0]} - ost_names[$i]=${str[1]} - done -fi - # get vmstat started # disable portals debug and get obdecho loaded on all relevant hosts unique_hosts=(`unique ${host_names[@]}`) +load_obdechos pidcount=0 for host in ${unique_hosts[@]}; do host_vmstatf=${vmstatf}_${host} echo -n > $host_vmstatf - remote_shell $host "vmstat 5 >> $host_vmstatf" & + remote_shell $host "vmstat 5 >> $host_vmstatf" &> /dev/null & pid=$! vmstatpids[$pidcount]=$pid pidcount=$((pidcount+1)) - do_unload_obdecho[$host]=0 - if obdecho_loaded $host; then - continue - fi - load_obdecho $host - if obdecho_loaded $host; then - do_unload_obdecho[$host]=1 - continue - fi - echo "Can't load obdecho on $host" 1>&2 - exit 1 done - # get all the echo_client device numbers and names -for ((i=0; i < ndevs; i++)); do +for ((i=0; i < $ndevs; i++)); do host=${host_names[$i]} devno=(`get_ec_devno $host "${client_names[$i]}" "${ost_names[$i]}"`) if ((${#devno[@]} != 3)); then @@ -398,6 +402,13 @@ for ((i=0; i < ndevs; i++)); do client_names[$i]=${devno[1]} do_teardown_ec[$i]=${devno[2]} done +if (($ndevs <= 0 || ${#host_names[@]} <= 0)); then + echo "no devices or hosts specified" + cleanup 0 $clean_srv_OSS $cleanup_oscs +fi +# Buffers will be spread out among all hosts, so allow for that +max_buffer_mem=$(( ${max_buffer_mem} * ${#unique_hosts[@]} )) +print_summary "$(date) Obdfilter-survey for case=$case from $(hostname)" for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do for ((nobj = $nobjlo; nobj <= $nobjhi; nobj*=2)); do for ((thr = $thrlo; thr <= $thrhi; thr*=2)); do @@ -425,7 +436,7 @@ for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do actual_size=$((actual_rsz*count*thr)) total_size=$((actual_size*ndevs)) # show computed parameters - str=`printf 'ost %2d sz %8dK rsz %4d obj %4d thr %4d ' \ + str=`printf 'ost %2d sz %8dK rsz %4dK obj %4d thr %4d ' \ $ndevs $total_size $actual_rsz $total_nobj $total_thr` echo "=======================> $str" >> $workf print_summary -n "$str" @@ -435,7 +446,7 @@ for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do fi # create the objects tmpf="${workf}_tmp" - for ((idx = 0; idx < ndevs; idx++)); do + for ((idx = 0; idx < $ndevs; idx++)); do host=${host_names[$idx]} devno=${devnos[$idx]} client_name="${host}:${client_names[$idx]}" @@ -460,7 +471,7 @@ for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do for host in ${unique_hosts[@]}; do echo -n > ${cmdsf}_${host} done - for ((idx = 0; idx < ndevs; idx++)); do + for ((idx = 0; idx < $ndevs; idx++)); do host=${host_names[$idx]} devno=${devnos[$idx]} tmpfi="${tmpf}_$idx" @@ -469,7 +480,8 @@ for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do echo >> ${cmdsf}_${host} \ "$lctl > $tmpfi 2>&1 \\ --threads $thr -$snap $devno \\ - test_brw $count `testname2type $test` q $pages ${thr_per_obj}t${first_obj} &" + test_brw $count `testname2type $test` q $pages \\ + ${thr_per_obj}t${first_obj} `testcase2mode $pages` &" done pidcount=0 for host in ${unique_hosts[@]}; do @@ -481,9 +493,7 @@ for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do t0=`date +%s.%N` pidcount=0 for host in ${unique_hosts[@]}; do - # brutal hack to deal with a non-shared /tmp - scp -q ${cmdsf}_${host} ${host}:/tmp > /dev/null - remote_shell $host bash ${cmdsf}_${host} & + remote_shell $host bash < ${cmdsf}_${host} & pidarray[$pidcount]=$! pidcount=$((pidcount+1)) done @@ -498,21 +508,22 @@ for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do for host in ${unique_hosts[@]}; do rm ${cmdsf}_${host} done + # compute bandwidth from total data / elapsed time str=`awk "BEGIN {printf \"%7.2f \",\ $total_size / (( $t1 - $t0 ) * 1024)}"` print_summary -n "$str" # collect/check individual OST stats echo -n > $tmpf - for ((idx = 0; idx < ndevs; idx++)); do + for ((idx = 0; idx < $ndevs; idx++)); do client_name="${host_names[$idx]}:${client_names[$idx]}" tmpfi="${tmpf}_$idx" echo "=============> $test $client_name" >> $workf host="${host_names[$idx]}" - scp -q ${host}:$tmpfi $tmpfi > /dev/null - cat $tmpfi >> $workf - get_stats $tmpfi >> $tmpf - rm $tmpfi + remote_shell $host cat $tmpfi > ${tmpfi}_local + cat ${tmpfi}_local >> $workf + get_stats ${tmpfi}_local >> $tmpf + rm -f $tmpfi ${tmpfi}_local done # compute/display global min/max stats echo "=============> $test global" >> $workf @@ -534,7 +545,7 @@ for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do done print_summary "" # destroy objects we created - for ((idx = 0; idx < ndevs; idx++)); do + for ((idx = 0; idx < $ndevs; idx++)); do host=${host_names[$idx]} devno=${devnos[$idx]} client_name="${host}:${client_names[$idx]}" @@ -547,6 +558,5 @@ for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do done done done - -cleanup 0 +cleanup 0 $clean_srv_OSS $cleanup_oscs exit 0