From: Minh Diep Date: Mon, 12 Mar 2012 17:17:19 +0000 (-0700) Subject: LU-1082 test: create test for mds-survey X-Git-Tag: 2.2.51~60 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=a562648d49aca59a8f9af5c77a149a9fe69353e4 LU-1082 test: create test for mds-survey Create a mds-survey.sh test with two test cases 1. test without stripe count 2. test with stripe count Create test_224[a|b] for quick run on metadata survey Notice, we remove all clients before testing because the real client and echo_client share the same client object memory Signed-off-by: Minh Diep Change-Id: I453ac34d8b6019e109be5d89d482300d31571f93 Reviewed-on: http://review.whamcloud.com/2156 Tested-by: Hudson Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre-iokit/mds-survey/mds-survey b/lustre-iokit/mds-survey/mds-survey index 5888491..e4d20c3 100755 --- a/lustre-iokit/mds-survey/mds-survey +++ b/lustre-iokit/mds-survey/mds-survey @@ -17,7 +17,7 @@ # [ NOTE: It is advised to have automated login (passwordless entry) on server ] # include library -source libecho +source $(dirname $0)/libecho # Customisation variables ##################################################################### @@ -54,7 +54,6 @@ layer=${layer:-"mdd"} # leave the rest of this alone unless you know what you're doing... export LC_ALL=POSIX basedir="tests" -case=disk create_directories () { local host=$1 @@ -102,34 +101,36 @@ destroy_directories () { get_stats () { local rfile=$1 - gawk < $rfile \ - '/start at/ { n=0; next } \ - /error at/ {n = -1; exit} \ - /end/ {exit} \ - /^[0-9]+\/[0-9]+ Total: [0-9]+\.[0-9]+\/second$/ \ - { n++; v=strtonum($3); \ - if (n == 1 || v < min) min = v; \ - if (n == 1 || v > max) max = v; \ - next; \ - } \ - { if (n != 0) {n = -1; exit } } \ - END {printf "%d %f %f\n", n, min, max}' + gawk < $rfile \ + '/starting/ { n=0; next } \ + /error/ {n = -1; exit} \ + /^Total: total [0-9]+ threads [0-9]+ sec [0-9\.]+ [0-9]+\.[0-9]+\/second$/ \ + { ave = strtonum($8); n++; next} \ + /^[0-9]+\/[0-9]+ Total: [0-9]+\.[0-9]+\/second$/ \ + { n++; v = strtonum($3); \ + if (n == 1 || v < min) min = v; \ + if (n == 1 || v > max) max = v; \ + next; \ + } \ + { if (n != 0) {n = -1; exit } } \ + END { if (n == 1) { min = ave; max = ave; } \ + printf "%d %f %f %f\n", n, ave, min, max}' } get_global_stats () { local rfile=$1 - awk < $rfile \ - 'BEGIN {n = 0;} \ - { n++; \ - if (n == 1) { err = $1; min = $2; max = $3}\ - else \ - { if ($1 < err) err = $1; \ - if ($2 < min) min = $2; \ - if ($3 > max) max = $3; \ - } \ - } \ - END { if (n == 0) err = 0; \ - printf "%d %f %f\n", err, min, max}' + awk < $rfile \ + 'BEGIN {n = 0;} \ + { n++; \ + if (n == 1) { err = $1; ave = $2; min = $3; max = $4} \ + else \ + { if ($1 < err) err = $1; \ + if ($2 < min) min = $2; \ + if ($3 > max) max = $3; \ + } \ + } \ + END { if (n == 0) err = 0; \ + printf "%d %f %f %f\n", err, ave, min, max}' } print_summary () { @@ -171,16 +172,6 @@ if [ -z "$targets" ]; then fi fi -# check for ost -if (( $stripe_count > 0 )); then - obd=$($lctl device_list | awk "{if (\$2 == \"UP\" && \ - \$3 == \"obdfilter\") {print \$4} }") - if [ -z "$obd" ]; then - echo "Need obdfilter to test stripe_count" - exit 1 - fi -fi - # split out hostnames from mdt names ndevs=0 for trgt in $targets; do @@ -190,6 +181,19 @@ for trgt in $targets; do ndevs=$((ndevs+1)) done +# check for ost +if (( $stripe_count > 0 )); then + for ((i=0; i < $ndevs; i++)); do + host=${host_names[$i]} + obd=$(remote_shell $host $lctl device_list | awk "{if (\$2 == \"UP\" && + \$3 == \"osc\") { print \$4 } }") + if [ -z "$obd" ]; then + echo "Need obdfilter to test stripe_count" + exit 1 + fi + done +fi + # check and insert obdecho module if ! lsmod | grep obdecho > /dev/null; then modprobe obdecho @@ -255,6 +259,7 @@ for ((idx = 0; idx < $ndevs; idx++)); do done snap=1 +status=0 for ((thr = $thrlo; thr <= $thrhi; thr*=2)); do thr_per_dir=$((${thr}/${dir_count})) # skip if no enough thread @@ -293,8 +298,6 @@ for ((thr = $thrlo; thr <= $thrhi; thr*=2)); do pidarray[$pidcount]=0 pidcount=$((pidcount+1)) done - # timed run of all the per-host script files - t0=`date +%s.%N` pidcount=0 for host in ${unique_hosts[@]}; do remote_shell $host bash < ${cmdsf}_${host} & @@ -307,16 +310,11 @@ for ((thr = $thrlo; thr <= $thrhi; thr*=2)); do pidcount=$((pidcount+1)) done #wait - t1=`date +%s.%N` # clean up per-host script files for host in ${unique_hosts[@]}; do rm ${cmdsf}_${host} done - # compute bandwidth from total data / elapsed time - str=`awk "BEGIN {printf \"%7.2f \", \ - ( $file_count * $thr_per_dir ) / ( $t1 - $t0 )}"` - print_summary -n "$str" # collect/check individual MDT stats echo -n > $tmpf for ((idx = 0; idx < $ndevs; idx++)); do @@ -335,14 +333,11 @@ for ((thr = $thrlo; thr <= $thrhi; thr*=2)); do stats=(`get_global_stats $tmpf`) rm $tmpf if ((stats[0] <= 0)); then - if ((stats[0] < 0)); then - str=`printf "%17s " ERROR` - else - str=`printf "%17s " SHORT` - fi + str=`printf "%17s " ERROR` + status=1 else - str=`awk "BEGIN {printf \"[%7.2f,%7.2f] \", \ - ${stats[1]}, ${stats[2]}; exit}"` + str=`awk "BEGIN {printf \"%7.2f [%7.2f,%7.2f] \", \ + ${stats[1]}, ${stats[2]}, ${stats[3]}; exit}"` fi print_summary -n "$str" done @@ -358,5 +353,5 @@ for ((idx = 0; idx < $ndevs; idx++)); do destroy_directories $host $devno $dir_count $tmpf done -cleanup 0 -exit 0 +cleanup $status +exit $status diff --git a/lustre-iokit/obdfilter-survey/libecho b/lustre-iokit/obdfilter-survey/libecho index 8679157..edd61e2 100644 --- a/lustre-iokit/obdfilter-survey/libecho +++ b/lustre-iokit/obdfilter-survey/libecho @@ -143,10 +143,10 @@ unload_obdecho () { } # returns the device number which is displayed in "lctl device_list" -# +# # parameter: 1. hostname -# 2. type of device ex: echo_client -# 3. name of device ex: ECHO_matrix.linsyssoft.com +# 2. type of device ex: echo_client +# 3. name of device ex: ECHO_matrix.linsyssoft.com get_devno () { local host=$1 local type=$2 @@ -175,63 +175,68 @@ get_devnos () { # do cleanup for netdisk case. cleanup_netdisk () { for osc in $@; do - $lctl </dev/null 2>&1" & pid=$! kill -term ${vmstatpids[$pidcount]} 2>/dev/null kill -kill ${vmstatpids[$pidcount]} 2>/dev/null wait $pid pidcount=$((pidcount+1)) - if ((${do_unload_obdecho[$host]})); then - unload_obdecho $host + if ((${do_unload_echo[$i]})); then + unload_obdecho $i fi done if [ $case == "network" ]; then - cleanup_network $1 + cleanup_network $1 fi if [ $case == "netdisk" ]; then - shift + shift cleanup_netdisk $@ fi if [ $exit_status ]; then @@ -252,8 +257,8 @@ trap 'cleanup 0 $clean_srv_OSS $cleanup_oscs' SIGHUP SIGINT SIGTERM # This must run in a subshell. # # parameter: 1. hostname -# 2. client name, ex:- ns8:ECHO_ns8 -# 3. name of ost instances, ex:- lustre-OST0001 +# 2. client name, ex:- ns8:ECHO_ns8 +# 3. name of ost instances, ex:- lustre-OST0001 get_ec_devno () { exec 8>&1 1>&2 local host=$1 @@ -263,36 +268,37 @@ get_ec_devno () { local stack_type="${5:-}" if [ -z "$client_name" ]; then - if [ -z "$ost_name" ]; then - echo "client and ost name both null" - exit 1 - fi + if [ -z "$ost_name" ]; then + echo "client and ost name both null" + exit 1 + fi client_name=${ost_name}_ecc fi ec=`get_devno $host echo_client $client_name` if [ -n "$ec" ]; then - echo $ec $client_name $client_name >&8 - exit 0 + echo $ec $client_name $client_name >&8 + exit 0 fi if [ -z "$ost_name" ]; then - echo "no echo client and ost_name not set, client:" \ + echo "no echo client and ost_name not set, client:" \ "$client_name, host: $host" - exit 1 + exit 1 fi ost=`get_devno $host $dev_type $ost_name` if [ -z "$ost" ]; then - echo "OST $ost_name not setup" - exit 1 + echo "OST $ost_name not setup" + exit 1 fi client_name=${ost_name}_ecc - remote_shell $host "$lctl <&8 exit 0 @@ -302,11 +308,11 @@ EOF" # It creates echoclients for all osc listed using #lctl device_list command ec_using_osc () { local osc_name=$1 - $lctl </dev/null 2>&1 - $lctl </dev/null 2>&1 - $lctl </dev/null || true)} +if [ -z ${MDSSURVEY} ]; then + skip_env "mds-survey not found" && exit +fi + +# check for available inode, reduce to fit +inode_per_thr=$((dir_count * file_count)) +require_inode=$((inode_per_thr * thrhi * 11/10)) +avail_inode=$($LFS df -i $MOUNT | grep "filesystem summary:" | \ + awk '{print $5}') + +while [ $require_inode -ge $avail_inode ]; do + echo "Require $require_inode inode to run, only have $avail_inode" + # reduce 20% + file_count=$((file_count * 8 / 10)) + inode_per_thr=$((dir_count * file_count)) + require_inode=$((inode_per_thr * thrhi * 11 / 10)) +done + +if [ $require_inode -eq 0 ]; then + skip_env "Not enough inode to run" && exit +fi + +ost_count=$($LCTL dl | grep -c osc) + +# first unmount all the lustre clients +cleanup_mount $MOUNT +cleanup_mount $MOUNT2 + +get_target() { + local mds=$(facet_host $SINGLEMDS) + echo $(do_nodes $mds 'lctl dl' | \ + awk "{if (\$2 == \"UP\" && \$3 == \"mdt\") {print \$4}}") +} + +mds_survey_run() { + local layer=${1:-mdd} + local stripe_count=${2:-0} + local mds=$(facet_host $SINGLEMDS) + + rm -f ${TMP}/mds_survey* + + local target=$(get_target) + local cmd1="file_count=$file_count thrlo=$thrlo thrhi=$thrhi" + local cmd2="dir_count=$dir_count layer=$layer stripe_count=$stripe_count" + local cmd3="rslt_loc=${TMP} targets=\"$mds:$target\" $MDSSURVEY" + local cmd="$cmd1 $cmd2 $cmd3" + + echo + $cmd + eval $cmd || error "mds-survey failed" + cat ${TMP}/mds_survey* + rm -f ${TMP}/mds_survey* +} + +test_1() { + mds_survey_run "mdd" "0" +} +run_test 1 "Metadata survey with zero-stripe" + +test_2() { + if [ $ost_count -eq 0 ]; then + skip_env "Need to mount OST to test" && return + fi + mds_survey_run "mdd" "1" +} +run_test 2 "Metadata survey with stripe_count = 1" + +# remount the clients +restore_mount $MOUNT + +complete $(basename $0) $SECONDS +cleanup_echo_devs +check_and_cleanup_lustre +exit_status diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 96d5b8f..f61d6da 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -8719,6 +8719,55 @@ test_224b() { # LU-1039, MRP-303 } run_test 224b "Don't panic on bulk IO failure" +MDSSURVEY=${MDSSURVEY:-$(which mds-survey 2>/dev/null || true)} +test_225a () { + if [ -z ${MDSSURVEY} ]; then + skip_env "mds-survey not found" && return + fi + + local mds=$(facet_host $SINGLEMDS) + local target=$(do_nodes $mds 'lctl dl' | \ + awk "{if (\$2 == \"UP\" && \$3 == \"mdt\") {print \$4}}") + + local cmd1="file_count=1000 thrhi=4" + local cmd2="dir_count=2 layer=mdd stripe_count=0" + local cmd3="rslt_loc=${TMP} targets=\"$mds:$target\" $MDSSURVEY" + local cmd="$cmd1 $cmd2 $cmd3" + + rm -f ${TMP}/mds_survey* + echo + $cmd + eval $cmd || error "mds-survey with zero-stripe failed" + cat ${TMP}/mds_survey* + rm -f ${TMP}/mds_survey* +} +run_test 225a "Metadata survey sanity with zero-stripe" + +test_225b () { + if [ -z ${MDSSURVEY} ]; then + skip_env "mds-survey not found" && return + fi + + if [ $($LCTL dl | grep -c osc) -eq 0 ]; then + skip_env "Need to mount OST to test" && return + fi + + local mds=$(facet_host $SINGLEMDS) + local target=$(do_nodes $mds 'lctl dl' | \ + awk "{if (\$2 == \"UP\" && \$3 == \"mdt\") {print \$4}}") + + local cmd1="file_count=1000 thrhi=4" + local cmd2="dir_count=2 layer=mdd stripe_count=1" + local cmd3="rslt_loc=${TMP} targets=\"$mds:$target\" $MDSSURVEY" + local cmd="$cmd1 $cmd2 $cmd3" + + rm -f ${TMP}/mds_survey* + echo + $cmd + eval $cmd || error "mds-survey with stripe_count failed" + cat ${TMP}/mds_survey* + rm -f ${TMP}/mds_survey* +} +run_test 225b "Metadata survey sanity with stripe_count = 1" + # # tests that do cleanup/setup should be run at the end #