From 24f601a0d459d48449d2e16b00f6ff3a889ed3b3 Mon Sep 17 00:00:00 2001 From: Minh Diep Date: Tue, 6 Dec 2011 16:38:54 -0800 Subject: [PATCH 1/1] LU-633 iokit: mds-survey script for MD echo client test Create a mds-survey script to run echo client for MDS Signed-off-by: Minh Diep Change-Id: I425438aa294d8d4360c28f686296ff7f26f847ea Reviewed-on: http://review.whamcloud.com/1969 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: wangdi --- lustre-iokit/ChangeLog | 2 + lustre-iokit/Makefile.am | 5 +- lustre-iokit/configure.ac | 3 +- lustre-iokit/lustre-iokit.spec.in | 10 + lustre-iokit/mds-survey/Makefile.am | 3 + lustre-iokit/mds-survey/README.mds-survey | 97 ++++++++ lustre-iokit/mds-survey/mds-survey | 362 ++++++++++++++++++++++++++++++ lustre-iokit/obdfilter-survey/libecho | 7 +- 8 files changed, 484 insertions(+), 5 deletions(-) create mode 100644 lustre-iokit/mds-survey/Makefile.am create mode 100644 lustre-iokit/mds-survey/README.mds-survey create mode 100755 lustre-iokit/mds-survey/mds-survey diff --git a/lustre-iokit/ChangeLog b/lustre-iokit/ChangeLog index 67e7768..ea34825 100644 --- a/lustre-iokit/ChangeLog +++ b/lustre-iokit/ChangeLog @@ -30,3 +30,5 @@ 23363 - obdfilter-survey: no echo client and ost_name not set LU-349 - set LC_ALL explicitly in obdfilter-survey LU-843 - iokit: get_ec_devno returns trash +2012-01-13 + - Added mdt-survey script diff --git a/lustre-iokit/Makefile.am b/lustre-iokit/Makefile.am index a654ff6..d811f17 100644 --- a/lustre-iokit/Makefile.am +++ b/lustre-iokit/Makefile.am @@ -1,6 +1,7 @@ -SUBDIRS = obdfilter-survey sgpdd-survey ost-survey ior-survey stats-collect +SUBDIRS = obdfilter-survey sgpdd-survey ost-survey ior-survey +SUBDIRS += mds-survey stats-collect -EXTRA_DIST = lustre-iokit.spec +EXTRA_DIST = lustre-iokit.spec rpms rpm: dist rpmbuild -ta $(PACKAGE)-$(VERSION).tar.gz diff --git a/lustre-iokit/configure.ac b/lustre-iokit/configure.ac index 3a4d25f..b25db9b 100644 --- a/lustre-iokit/configure.ac +++ b/lustre-iokit/configure.ac @@ -1,6 +1,6 @@ AC_INIT AC_CONFIG_AUX_DIR([.]) -AM_INIT_AUTOMAKE(lustre-iokit,1.3.0) +AM_INIT_AUTOMAKE(lustre-iokit,1.4.0) AC_PATH_PROGS(BASH, bash) AC_PATH_PROGS(PERL, perl) RELEASE="1" @@ -13,5 +13,6 @@ sgpdd-survey/Makefile obdfilter-survey/Makefile ost-survey/Makefile ior-survey/Makefile +mds-survey/Makefile stats-collect/Makefile ) diff --git a/lustre-iokit/lustre-iokit.spec.in b/lustre-iokit/lustre-iokit.spec.in index b534a6a..8f4690d 100644 --- a/lustre-iokit/lustre-iokit.spec.in +++ b/lustre-iokit/lustre-iokit.spec.in @@ -38,6 +38,12 @@ This script will collect IO stats on a defined set of nodes. ior-survey: A script to run the IOR benchmark. The latest version can be downloaded from http://www.llnl.gov/asci/purple/benchmarks/limited/ior/ +mds-survey: +This survey tests the local metadata performance using the echo_client to drive +the MDD layer to perform operations. It is run with multiple threads (to +simulate MDT service threads) locally on the MDS node, and does not need Lustre +clients in order to run + %prep %setup -qn %{name}-%{version} @@ -62,14 +68,18 @@ make install DESTDIR=$RPM_BUILD_ROOT /usr/bin/lstats.sh /usr/bin/gather_stats_everywhere.sh /usr/bin/config.sh +/usr/bin/mds-survey %doc obdfilter-survey/README.obdfilter-survey %doc ior-survey/README.ior-survey %doc ost-survey/README.ost-survey +%doc mds-survey/README.mds-survey %doc sgpdd-survey/README.sgpdd-survey %doc stats-collect/README.lstats.sh %changelog +* Fri Jan 13 2012 Minh Diep +- Added mds-survey * Fri Dec 30 2011 Minh Diep - WhamCloud release for bug fixes * Tue Jul 24 2007 Cliff White diff --git a/lustre-iokit/mds-survey/Makefile.am b/lustre-iokit/mds-survey/Makefile.am new file mode 100644 index 0000000..cf7e949 --- /dev/null +++ b/lustre-iokit/mds-survey/Makefile.am @@ -0,0 +1,3 @@ +bin_SCRIPTS = mds-survey +CLEANFILE = $(bin_SCRIPTS) +EXTRA_DIST = mds-survey README.mds-survey diff --git a/lustre-iokit/mds-survey/README.mds-survey b/lustre-iokit/mds-survey/README.mds-survey new file mode 100644 index 0000000..e66e166 --- /dev/null +++ b/lustre-iokit/mds-survey/README.mds-survey @@ -0,0 +1,97 @@ +Overview +-------- + +This survey script generates meta-data load on the MDS with varying +numbers of threads by using the echo_client to drive different layers +of the MDS stack: mdd,mdt, and osd. Currently, we only support mdd layer. + +It can be used with the following classes of operations + +1. Open-create/mkdir/create +2. Lookup/getattr/setxattr +3. Delete/destroy +4. Unlink/rmdir + + These operations will be run by a variable number of concurrent + threads and will test with the number of directories specified by the user. + The run can be executed such that all threads operate in a single + directory (dir_count=1) or in private/unique directory + (dir_count=x thrlo=x thrhi=x). + + The mdd instance is driven directly. The script automatically + loads the obdecho module if required and creates instance of + echo_client. + + This script can also create OST objects by providing + stripe_count greater than zero. + +Running +------- + +The script must be customised according to the components under test and +where it should keep its working files. Customization variables are +described as followed: + +thrlo threads to start testing. skipped if less than dir_count +thrhi maximum number of threads to test +targets MDT instance +file_count total number of files to test +dir_count total number of directories to test +stripe_count number stripe on OST objects +tests_str test operations. Must have at least "create" and "destroy" +start_number base number for each thread to prevent name collisions + +- Create a Lustre configuraton using your normal methods + +1. Run without OST objects creation: +Setup the Lustre MDS without OST mounted. +Then invoke the mds-survey script +e.g. : $ thrhi=64 file_count=200000 sh mds-survey + +2. Run with OST objects creation: +Setup the Lustre MDS with at least one OST mounted. +Then invoke the mds-survey script with stripe_count parameter +e.g. : $ thrhi=64 file_count=200000 stripe_count=2 sh mds-survey + +Note: a specific mdt instance can be specified using targets variable. +e.g. : $ targets=lustre-MDT0000 thrhi=64 file_count=200000 stripe_count=2 sh mds-survey + +Output files: +------------- + +When the script runs, it creates a number of working files and a pair of +result files. All files start with the prefix given by ${rslt}. + +${rslt}.summary same as stdout +${rslt}.script_* per-host test script files +${rslt}.detail_tmp* per-mdt result files +${rslt}.detail collected result files for post-mortem + +The script iterates over the given numbers of threads performing +all the specified tests and checking that all test processes +completed successfully. + +Note that the script may not clean up properly if it is aborted or if it +encounters an unrecoverable error. In this case, manual cleanup may be +required, possibly including killing any running instances of 'lctl' (local +or remote), removing echo_client instances created by the script and +unloading obdecho. + + +Script output +------------- + +The summary file and stdout contain lines like... + +mdt 1 file 100000 dir 4 thr 4 create 5652.05 [ 999.01,46940.48] destroy 5797.79 [ 0.00,52951.55] + +mdt 1 is the total number of MDTs under test. +file 100000 is the total number of files to operate +dir 4 is the total number of directories to operate +thr 4 is the total number of threads operate over all directories +create +destroy are the test name. More tests will be displayed on the same line. +565.05 is the aggregate operations over all MDTs measured by + dividing the total number of operations by the elapsed time. +[999.01,46940.48] are the minimum and maximum instantaneous operation seen on + any individual MDT. diff --git a/lustre-iokit/mds-survey/mds-survey b/lustre-iokit/mds-survey/mds-survey new file mode 100755 index 0000000..5888491 --- /dev/null +++ b/lustre-iokit/mds-survey/mds-survey @@ -0,0 +1,362 @@ +#!/bin/bash +# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4: + +###################################################################### +# customize per survey + +# Prerequisite: For "stripe_count > 0" you need to have ost setup and mounted. +# +# How to run test: +# case 1 (stripe_count=0 default): +# $ thrhi=8 dir_count=4 sh mds-survey +# one can also run test with user defined targets as follows, +# $ thrhi=8 dir_count=4 file_count=50000 targets="lustre-MDT0000" sh mds-survey +# case 2 (stripe_count > 0, must have ost mounted): +# $ thrhi=8 dir_count=4 file_count=50000 stripe_count=2 +# targets="lustre-MDT0000" sh mds-survey +# [ NOTE: It is advised to have automated login (passwordless entry) on server ] + +# include library +source libecho + +# Customisation variables +##################################################################### +# One can change variable values in this section as per requirements +# The following variables can be set in the environment, or on the +# command line +# result file prefix (date/time + hostname makes unique) +# NB ensure path to it exists +rslt_loc=${rslt_loc:-"/tmp"} +rslt=${rslt:-"$rslt_loc/mds_survey_`date +%F@%R`_`uname -n`"} + +# min and max thread count +thrlo=${thrlo:-4} +thrhi=${thrhi:-32} + +# number of directories to test +dir_count=${dir_count:-$thrlo} +# number of files per thread +file_count=${file_count:-100000} + +targets=${targets:-""} +stripe_count=${stripe_count:-0} +# what tests to run (first must be create, and last must be destroy) +# default=(create lookup md_getattr setxattr destroy) +tests_str=${tests_str:-"create lookup md_getattr setxattr destroy"} + +# start number for each thread +start_number=${start_number:-2} + +# layer to be tested +layer=${layer:-"mdd"} +# Customisation variables ends here. +##################################################################### +# leave the rest of this alone unless you know what you're doing... +export LC_ALL=POSIX +basedir="tests" +case=disk + +create_directories () { + local host=$1 + local devno=$2 + local ndir=$3 + local rfile=$4 + local idx + + for ((idx = 0; idx < $ndir; idx++)); do + if (( idx == 0 )); then + dirname=${basedir} + else + dirname=${basedir}${idx} + fi + remote_shell $host $lctl --device $devno test_mkdir /$dirname > $rfile 2>&1 + while read line; do + echo "$line" | grep -q 'error: test_mkdir' + if [ $? -eq 0 ]; then + cat $rfile >&2 + echo "ERROR: fail test_mkdir" >&2 + echo "ERROR" + return + fi + done < $rfile + done + echo $basedir +} + +destroy_directories () { + local host=$1 + local devno=$2 + local ndir=$3 + local rfile=$4 + local idx + + for ((idx = 0; idx < $ndir; idx++)); do + if (( idx == 0 )); then + dirname=${basedir} + else + dirname=${basedir}${idx} + fi + remote_shell $host $lctl --device $devno test_rmdir /$dirname > $rfile 2>&1 + done +} + +get_stats () { + local rfile=$1 + gawk < $rfile \ + '/start at/ { n=0; next } \ + /error at/ {n = -1; exit} \ + /end/ {exit} \ + /^[0-9]+\/[0-9]+ Total: [0-9]+\.[0-9]+\/second$/ \ + { n++; v=strtonum($3); \ + if (n == 1 || v < min) min = v; \ + if (n == 1 || v > max) max = v; \ + next; \ + } \ + { if (n != 0) {n = -1; exit } } \ + END {printf "%d %f %f\n", n, min, max}' +} + +get_global_stats () { + local rfile=$1 + awk < $rfile \ + 'BEGIN {n = 0;} \ + { n++; \ + if (n == 1) { err = $1; min = $2; max = $3}\ + else \ + { if ($1 < err) err = $1; \ + if ($2 < min) min = $2; \ + if ($3 > max) max = $3; \ + } \ + } \ + END { if (n == 0) err = 0; \ + printf "%d %f %f\n", err, min, max}' +} + +print_summary () { + if [ "$1" = "-n" ]; then + minusn=$1; shift + else + minusn="" + fi + echo $minusn "$*" >> $rsltf + echo $minusn "$*" +} + +declare -a tests +count=0 +for name in $tests_str; do + tests[$count]=$name + count=$((count+1)) +done + +# hide a little trick to unset this from the command line +if [ "$lustre_root" == " " ]; then + unset lustre_root +fi + +if [ -z "$lustre_root" ]; then + lctl=lctl +else + lctl=${lustre_root}/utils/lctl +fi + +declare -a client_names +declare -a host_names +if [ -z "$targets" ]; then + targets=$($lctl device_list | awk "{if (\$2 == \"UP\" && \ + \$3 == \"mdt\") {print \$4} }") + if [ -z "$targets" ]; then + echo "Can't find any MDT to test. Please set targets=..." + exit 1 + fi +fi + +# check for ost +if (( $stripe_count > 0 )); then + obd=$($lctl device_list | awk "{if (\$2 == \"UP\" && \ + \$3 == \"obdfilter\") {print \$4} }") + if [ -z "$obd" ]; then + echo "Need obdfilter to test stripe_count" + exit 1 + fi +fi + +# split out hostnames from mdt names +ndevs=0 +for trgt in $targets; do + str=(`split_hostname $trgt`) + host_names[$ndevs]=${str[0]} + client_names[$ndevs]=${str[1]} + ndevs=$((ndevs+1)) +done + +# check and insert obdecho module +if ! lsmod | grep obdecho > /dev/null; then + modprobe obdecho +fi +count=${#tests[@]} +if [ $count -eq 0 -o "${tests[0]}" != "create" -o "${tests[(($count - 1))]}" != "destroy" ]; then + echo "tests: ${tests[@]}" + echo "First test must be 'create', and last test must be 'destroy'" 1>&2 + exit 1 +fi + +rsltf="${rslt}.summary" +workf="${rslt}.detail" +cmdsf="${rslt}.script" +vmstatf="${rslt}.vmstat" +echo -n > $rsltf +echo -n > $workf + +# get vmstat started +# disable portals debug and get obdecho loaded on all relevant hosts +unique_hosts=(`unique ${host_names[@]}`) +load_obdechos +pidcount=0 +for host in ${unique_hosts[@]}; do + host_vmstatf=${vmstatf}_${host} + echo -n > $host_vmstatf + remote_shell $host "vmstat 5 >> $host_vmstatf" &> /dev/null & + pid=$! + vmstatpids[$pidcount]=$pid + pidcount=$((pidcount+1)) +done +# get all the echo_client device numbers and names +for ((i=0; i < $ndevs; i++)); do + host=${host_names[$i]} + devno=(`get_ec_devno $host "${client_names[$i]}" "${client_names[$i]}" "mdt" $layer`) + if ((${#devno[@]} != 3)); then + exit 1 + fi + devnos[$i]=${devno[0]} + client_names[$i]=${devno[1]} + do_teardown_ec[$i]=${devno[2]} +done +if (($ndevs <= 0 || ${#host_names[@]} <= 0)); then + echo "no devices or hosts specified" + cleanup 0 +fi +print_summary "$(date) $0 from $(hostname)" +# create directories +tmpf="${workf}_tmp" +for ((idx = 0; idx < $ndevs; idx++)); do + host=${host_names[$idx]} + devno=${devnos[$idx]} + client_name="${host}:${client_names[$idx]}" + echo "=============> Create $dir_count directories on $client_name" >> $workf + destroy_directories $host $devno $dir_count $tmpf + ret=`create_directories $host $devno $dir_count $tmpf` + cat $tmpf >> $workf + rm $tmpf + if [ $ret = "ERROR" ]; then + print_summary "created directories on $client_name failed" + cleanup 1 + fi +done + +snap=1 +for ((thr = $thrlo; thr <= $thrhi; thr*=2)); do + thr_per_dir=$((${thr}/${dir_count})) + # skip if no enough thread + if (( thr_per_dir <= 0 )); then + continue + fi + str=`printf 'mdt %1d file %7d dir %4d thr %4d ' \ + $ndevs $file_count $dir_count $thr` + echo "=======================> $str" >> $workf + print_summary -n "$str" + # run tests + for test in ${tests[@]}; do + declare -a pidarray + for host in ${unique_hosts[@]}; do + echo "starting run for config: $config test: $test file: \ + $file_count threads: $thr directories: $dir_count" >> ${vmstatf}_${host} + done + print_summary -n "$test " + # create per-host script files + for host in ${unique_hosts[@]}; do + echo -n > ${cmdsf}_${host} + done + for ((idx = 0; idx < $ndevs; idx++)); do + host=${host_names[$idx]} + devno=${devnos[$idx]} + tmpfi="${tmpf}_$idx" + [ $test = "create" ] && test="create -c $stripe_count" + echo >> ${cmdsf}_${host} \ + "$lctl > $tmpfi 2>&1 \ + --threads $thr -$snap $devno test_$test -d /$basedir -D $dir_count \ + -b $start_number -n $file_count" + done + pidcount=0 + for host in ${unique_hosts[@]}; do + echo "wait" >> ${cmdsf}_${host} + pidarray[$pidcount]=0 + pidcount=$((pidcount+1)) + done + # timed run of all the per-host script files + t0=`date +%s.%N` + pidcount=0 + for host in ${unique_hosts[@]}; do + remote_shell $host bash < ${cmdsf}_${host} & + pidarray[$pidcount]=$! + pidcount=$((pidcount+1)) + done + pidcount=0 + for host in ${unique_hosts[@]}; do + wait ${pidarray[$pidcount]} + pidcount=$((pidcount+1)) + done + #wait + t1=`date +%s.%N` + # clean up per-host script files + for host in ${unique_hosts[@]}; do + rm ${cmdsf}_${host} + done + + # compute bandwidth from total data / elapsed time + str=`awk "BEGIN {printf \"%7.2f \", \ + ( $file_count * $thr_per_dir ) / ( $t1 - $t0 )}"` + print_summary -n "$str" + # collect/check individual MDT stats + echo -n > $tmpf + for ((idx = 0; idx < $ndevs; idx++)); do + client_name="${host_names[$idx]}:${client_names[$idx]}" + tmpfi="${tmpf}_$idx" + echo "=============> $test $client_name" >> $workf + host="${host_names[$idx]}" + remote_shell $host cat $tmpfi > ${tmpfi}_local + cat ${tmpfi}_local >> $workf + get_stats ${tmpfi}_local >> $tmpf + rm -f $tmpfi ${tmpfi}_local + done + # compute/display global min/max stats + echo "=============> $test global" >> $workf + cat $tmpf >> $workf + stats=(`get_global_stats $tmpf`) + rm $tmpf + if ((stats[0] <= 0)); then + if ((stats[0] < 0)); then + str=`printf "%17s " ERROR` + else + str=`printf "%17s " SHORT` + fi + else + str=`awk "BEGIN {printf \"[%7.2f,%7.2f] \", \ + ${stats[1]}, ${stats[2]}; exit}"` + fi + print_summary -n "$str" + done + print_summary "" +done +# destroy directories +tmpf="${workf}_tmp" +for ((idx = 0; idx < $ndevs; idx++)); do + host=${host_names[$idx]} + devno=${devnos[$idx]} + client_name="${host}:${client_names[$idx]}" + echo "=============> Destroy $dir_count directories on $client_name" >> $workf + destroy_directories $host $devno $dir_count $tmpf +done + +cleanup 0 +exit 0 diff --git a/lustre-iokit/obdfilter-survey/libecho b/lustre-iokit/obdfilter-survey/libecho index 23aa2b8..8679157 100644 --- a/lustre-iokit/obdfilter-survey/libecho +++ b/lustre-iokit/obdfilter-survey/libecho @@ -259,6 +259,9 @@ get_ec_devno () { local host=$1 local client_name="$2" local ost_name="$3" + local dev_type="${4:-obdfilter}" + local stack_type="${5:-}" + if [ -z "$client_name" ]; then if [ -z "$ost_name" ]; then echo "client and ost name both null" @@ -276,7 +279,7 @@ get_ec_devno () { "$client_name, host: $host" exit 1 fi - ost=`get_devno $host obdfilter $ost_name` + ost=`get_devno $host $dev_type $ost_name` if [ -z "$ost" ]; then echo "OST $ost_name not setup" exit 1 @@ -284,7 +287,7 @@ get_ec_devno () { client_name=${ost_name}_ecc remote_shell $host "$lctl <