X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre-iokit%2Fobdfilter-survey%2Fobdfilter-survey;h=9e8eb33409feea72e84c94f7c73d8eeb4965d7a6;hb=180c2dd3915383bbfdcf6cad6a8058ca045c8b61;hp=02a4298bb29460034613bf59328e7fe0dffab801;hpb=c2ec235e28383022a50c0a85c64844a491569d2e;p=fs%2Flustre-release.git diff --git a/lustre-iokit/obdfilter-survey/obdfilter-survey b/lustre-iokit/obdfilter-survey/obdfilter-survey index 02a4298..9e8eb33 100755 --- a/lustre-iokit/obdfilter-survey/obdfilter-survey +++ b/lustre-iokit/obdfilter-survey/obdfilter-survey @@ -22,21 +22,51 @@ # How to run test: # case 1 (local disk): # $ nobjhi=2 thrhi=2 size=1024 case=disk sh obdfilter-survey -# instead of case=disk one can also run test as follows -# $ nobjhi=2 thrhi=2 size=1024 OSTS="lustre-OST0000 lustre-OST0001 ..." sh obdfilter-survey +# one can also run test with user defined targets as follows, +# $ nobjhi=2 thrhi=2 size=1024 targets="lustre-OST0000 lustre-OST0001 ..." sh obdfilter-survey # case 2 (network): -# $ nobjhi=2 thrhi=2 size=1024 server_nid="" case=network sh obdfilter-survey -# where, server_nid is name or ip address of system, which you want to +# $ nobjhi=2 thrhi=2 size=1024 targets="" case=network sh obdfilter-survey +# where, targets is name or ip address of system, which you want to # set as server. # case 3 (network and disk): # $ nobjhi=2 thrhi=2 size=1024 case=netdisk sh obdfilter-survey -# instead of case="netdisk" one can also run test as follows -# $ nobjhi=2 thrhi=2 size=1024 ECHO_CLIENTS="ECHO_ ..." sh obdfilter-survey +# one can also run test with user defined targets as follows, +# $ nobjhi=2 thrhi=2 size=1024 targets=" ..." sh obdfilter-survey #[ NOTE: It is advised to have automated login (passwordless entry) between server and # client systems on which this test runs.] # include library -source libobd +source libecho + +# The following variables can be set in the environment, or on the +# command line +# result file prefix (date/time + hostname makes unique) +# NB ensure path to it exists +rslt_loc=${rslt_loc:-"/tmp"} +rslt=${rslt:-"$rslt_loc/obdfilter_survey_`date +%F@%R`_`uname -n`"} + +# Set this true to check file contents +verify=${verify:-0} + +# total size (MBytes) per obd instance +# large enough to avoid cache effects +# and to make test startup/shutdown overhead insignificant +size=${size:-16384} + +# record size (KBytes) ( 7168 max) +rszlo=${rszlo:-1024} +rszhi=${rszhi:-1024} + +# number of objects per OST +nobjlo=${nobjlo:-1} +#was nobjhi=${nobjhi:-512} +nobjhi=${nobjhi:-16} + +# threads per OST (1024 max) +thrlo=${thrlo:-1} +thrhi=${thrhi:-16} + +# End of variables # create a set of objects, check there are 'n' contiguous ones and # return the first or 'ERROR' @@ -99,7 +129,7 @@ destroy_objects () { get_stats () { local rfile=$1 - awk < $rfile \ + gawk < $rfile \ '/^Selected device [0-9]+$/ {n = 0; next}\ /error/ {n = -1; exit}\ /^[0-9]+\/[0-9]+ Total: [0-9]+\.[0-9]+\/second$/ {n++; v=strtonum($3); \ @@ -150,33 +180,22 @@ print_summary () { ##################################################################### # One can change variable values in this section as per requirements -OSTS=${OSTS:-""} -server_nid=${server_nid:-""} +targets=${targets:-""} case=${case:-"disk"} -if [ -n "$OSTS" ]; then +if [ -n "$targets" ]; then declare -a ost_names - count=0 - for name in $OSTS; do - ost_names[$count]=$name - count=$((count+1)) - done -fi -ECHO_CLIENTS=${ECHO_CLIENTS:-""} -if [ -n "$ECHO_CLIENTS" ]; then - # make sure we unset ost_names so that our client_names get noticed... - unset ost_names declare -a client_names count=0 - for name in $ECHO_CLIENTS; do - client_names[$count]=$name + for name in $targets; do + if [ $case == "disk" ]; then + ost_names[$count]=$name + else + client_names[$count]=$name + fi count=$((count+1)) done fi -# result file prefix (date/time + hostname makes unique) -# NB ensure path to it exists -rslt=${rslt:-"/tmp/obdfilter_survey_`date +%F@%R`_`uname -n`"} - # what tests to run (first must be write) tests_str=${tests_str:-""} if [ -n "$tests_str" ]; then @@ -188,30 +207,10 @@ if [ -n "$tests_str" ]; then done else #tests=(write rewrite read reread rewrite_again) - tests=(write rewrite read) + #tests=(write rewrite read) + tests=(write) fi -# Set this true to check file contents -verify=0 - -# total size (MBytes) per obd instance -# large enough to avoid cache effects -# and to make test startup/shutdown overhead insignificant -size=${size:-16384} - -# record size (KBytes) -rszlo=${rszlo:-1024} -rszhi=${rszhi:-1024} - -# number of objects per OST -nobjlo=${nobjlo:-1} -#was nobjhi=${nobjhi:-512} -nobjhi=${nobjhi:-16} - -# threads per OST (1024 max) -thrlo=${thrlo:-1} -thrhi=${thrhi:-16} - # restart from here iff all are defined restart_rsz= restart_thr=1 @@ -229,11 +228,15 @@ PAGE_SIZE=${PAGE_SIZE:-4} # (to avoid lctl ENOMEM problems) max_buffer_mem=$((1024 * 1024)) snap=1 - +clean_srv_OSS=0 # Customisation variables ends here. ##################################################################### # leave the rest of this alone unless you know what you're doing... +# check and insert obdecho module +if ! lsmod | grep obdecho > /dev/null; then + modprobe obdecho +fi if [ ${#tests[@]} -eq 0 -o "${tests[0]}" != "write" ]; then echo "tests: ${tests[@]}" echo "First test must be 'write'" 1>&2 @@ -258,53 +261,22 @@ else lctl=${lustre_root}/utils/lctl fi -if [ $case == "network" ]; then - if [ -z "$server_nid" ]; then - echo "Specify the server NID" - exit 1; - fi - osc_names_string=`ssh root@"$server_nid" lctl dl` - count=0; - for name in $osc_names_str; do - count=$((count+1)) - done - - if [ $count != 0 ]; then - echo "The existing setup must be cleaned"; - exit 0; - fi - # Now do the server setup - setup_srv_obd $server_nid "ost_testfs" - op_string=`ssh root@"$server_nid" lctl dl` - - obdecho=0 - ost=0 - for name in $op_string; do - if [ "$name" = "obdecho" ]; then - obdecho=1 - fi - if [ "$name" = "ost" ]; then - ost=1 - fi - done - - if (( $obdecho == 0 || $ost == 0 )); then - echo "Server setup not done properly" - exit 1 - fi - # Now start client setup - osc_names_str=$(lctl dl) - if [ -n "$osc_names_str" ]; then - echo "The existing setup must be cleaned"; - exit 0; +# split out hostnames from client/ost names +ndevs=0 +for trgt in $targets; do + str=(`split_hostname $trgt`) + host_names[$ndevs]=${str[0]} + client_names[$ndevs]=${str[1]} + ndevs=$((ndevs+1)) +done +if [ $case == "netdisk" ]; then + if [ "$targets" ]; then + for ((i = 0; i < $ndevs; i++)); do + setup_osc_for_remote_ost ${host_names[$i]} ${client_names[$i]} $i + cleanup_oscs="$cleanup_oscs ${client_names[$i]}_osc" + host_names[$i]=localhost + done fi - ec_using_srv_nid $server_nid "osc_testfs" "test_obdfs" - declare -a client_names - client_names[0]="ECHO_osc_testfs" -fi - -if [ -z "$ECHO_CLIENTS" ]; then - if [ $case == "netdisk" ]; then declare -a osc_names declare -a osc_uuids osc_names_str=$(lctl dl |grep osc | awk "{if (\$2 == \"UP\" && \$3 == \"osc\") {print \$4} }") @@ -319,50 +291,59 @@ if [ -z "$ECHO_CLIENTS" ]; then osc_uuids[$count]=$uuid count=$((count+1)) done - for (( i = 0 ; i < $count; i++ )) + for (( i = 0; i < $count; i++ )) do - ec_using_osc ${osc_names[$i]} ${osc_uuids[$i]} + ec_using_osc ${osc_names[$i]} done - ECHO_CLIENTS=$(lctl dl | grep echo_client | awk "{if (\$2 == \"UP\" && \$3 == \"echo_client\") {print \$4} }") + echo_clients=$(lctl dl | grep echo_client | awk "{if (\$2 == \"UP\" && \$3 == \"echo_client\") {print \$4} }") cnt=0; - for name in $ECHO_CLIENTS; do + for name in $echo_clients; do client_names[$cnt]=$name + host_names[$cnt]=localhost cnt=$((cnt+1)) done + ndevs=${#client_names[@]} +fi +if [ $case == "network" ]; then + server_nid=$targets + if [ -z "$server_nid" ]; then + echo "Specify hostname or ip-address of server" + exit 1; + fi + # check for obdecho module on server + if ! dsh $server_nid root "lsmod | grep obdecho > /dev/null"; then + dsh $server_nid root "modprobe obdecho" + fi + # Now do the server setup + setup_srv_obd $server_nid "echo_srv" + oss_on_srv=`dsh $server_nid root "lctl dl | grep OSS" | awk '{ print $4 }'` + if [ -z $oss_on_srv ]; then + setup_OSS $server_nid + clean_srv_OSS=1 + fi + if ! dsh $server_nid root "lctl dl | grep obdecho > /dev/null 2>&1"; then + echo "obdecho not setup on server" + exit 1 + fi + if ! dsh $server_nid root "lctl dl | grep ost > /dev/null 2>&1"; then + echo "ost not setup on server" + exit 1 + fi + # Now start client setup + osc_names_str=$(lctl dl) + if [ -n "$osc_names_str" ]; then + echo "The existing setup must be cleaned"; + exit 0; fi + ec_using_srv_nid $server_nid "echotmp" "echotmp_UUID" + client_names[0]="echotmp_ecc" fi - -if [ -z "$OSTS" ]; then +if [ -z "$targets" ]; then if [ $case == "disk" ]; then - get_targets + get_targets + ndevs=${#ost_names[@]} fi fi - -# split out hostnames from client/ost names -ndevs=${#client_names[@]} -if ((ndevs != 0)); then - if ((${#ost_names[@]} != 0)); then - echo "Please specify client_names or ost_names, but not both" 1>&2 - exit 1 - fi - for ((i = 0; i < ndevs; i++)); do - str=(`split_hostname ${client_names[$i]}`) - host_names[$i]=${str[0]} - client_names[$i]=${str[1]} - done -else - ndevs=${#ost_names[@]} - if ((ndevs == 0)); then - echo "Please specify either client_names or ost_names" 1>&2 - exit 1 - fi - for ((i = 0; i < ndevs; i++)); do - str=(`split_hostname ${ost_names[$i]}`) - host_names[$i]=${str[0]} - ost_names[$i]=${str[1]} - done -fi - # get vmstat started # disable portals debug and get obdecho loaded on all relevant hosts unique_hosts=(`unique ${host_names[@]}`) @@ -386,9 +367,8 @@ for host in ${unique_hosts[@]}; do echo "Can't load obdecho on $host" 1>&2 exit 1 done - # get all the echo_client device numbers and names -for ((i=0; i < ndevs; i++)); do +for ((i=0; i < $ndevs; i++)); do host=${host_names[$i]} devno=(`get_ec_devno $host "${client_names[$i]}" "${ost_names[$i]}"`) if ((${#devno[@]} != 3)); then @@ -398,6 +378,11 @@ for ((i=0; i < ndevs; i++)); do client_names[$i]=${devno[1]} do_teardown_ec[$i]=${devno[2]} done +if (($ndevs <= 0 || ${#host_names[@]} <= 0)); then + echo "no devices or hosts specified" + cleanup 0 $clean_srv_OSS $cleanup_oscs +fi +print_summary "$(date) Obdfilter-survey for case=$case from $(hostname)" for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do for ((nobj = $nobjlo; nobj <= $nobjhi; nobj*=2)); do for ((thr = $thrlo; thr <= $thrhi; thr*=2)); do @@ -435,7 +420,7 @@ for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do fi # create the objects tmpf="${workf}_tmp" - for ((idx = 0; idx < ndevs; idx++)); do + for ((idx = 0; idx < $ndevs; idx++)); do host=${host_names[$idx]} devno=${devnos[$idx]} client_name="${host}:${client_names[$idx]}" @@ -460,7 +445,7 @@ for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do for host in ${unique_hosts[@]}; do echo -n > ${cmdsf}_${host} done - for ((idx = 0; idx < ndevs; idx++)); do + for ((idx = 0; idx < $ndevs; idx++)); do host=${host_names[$idx]} devno=${devnos[$idx]} tmpfi="${tmpf}_$idx" @@ -481,9 +466,7 @@ for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do t0=`date +%s.%N` pidcount=0 for host in ${unique_hosts[@]}; do - # brutal hack to deal with a non-shared /tmp - scp -q ${cmdsf}_${host} ${host}:/tmp > /dev/null - remote_shell $host bash ${cmdsf}_${host} & + remote_shell $host bash < ${cmdsf}_${host} & pidarray[$pidcount]=$! pidcount=$((pidcount+1)) done @@ -504,13 +487,12 @@ for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do print_summary -n "$str" # collect/check individual OST stats echo -n > $tmpf - for ((idx = 0; idx < ndevs; idx++)); do + for ((idx = 0; idx < $ndevs; idx++)); do client_name="${host_names[$idx]}:${client_names[$idx]}" tmpfi="${tmpf}_$idx" echo "=============> $test $client_name" >> $workf host="${host_names[$idx]}" - scp -q ${host}:$tmpfi $tmpfi > /dev/null - cat $tmpfi >> $workf + remote_shell $host cat $tmpfi >> $workf get_stats $tmpfi >> $tmpf rm $tmpfi done @@ -534,7 +516,7 @@ for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do done print_summary "" # destroy objects we created - for ((idx = 0; idx < ndevs; idx++)); do + for ((idx = 0; idx < $ndevs; idx++)); do host=${host_names[$idx]} devno=${devnos[$idx]} client_name="${host}:${client_names[$idx]}" @@ -547,6 +529,5 @@ for ((rsz = $rszlo; rsz <= $rszhi; rsz*=2)); do done done done - -cleanup 0 +cleanup 0 $clean_srv_OSS $cleanup_oscs exit 0