From 2e56e28c5dadfed888e859ebbaff09e504351132 Mon Sep 17 00:00:00 2001 From: wangdi Date: Fri, 20 Jul 2007 17:49:42 +0000 Subject: [PATCH] Branch: HEAD 1) add client stats collection in lstat. 2) add README --- lustre-iokit/stats-collect/README | 75 ++++++++++++++++++++++++++++++++++++ lustre-iokit/stats-collect/lstats.sh | 67 ++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+) create mode 100644 lustre-iokit/stats-collect/README diff --git a/lustre-iokit/stats-collect/README b/lustre-iokit/stats-collect/README new file mode 100644 index 0000000..28e4d12 --- /dev/null +++ b/lustre-iokit/stats-collect/README @@ -0,0 +1,75 @@ +Overview +-------- +These script will be used to collect profile info of lustre client and server. +It will be run on a single(control) node, and collect all the profile info and +create a tarball on the control node. + +lstat.sh : The stat script for single node, which will be run on each profile + node. +gather_stats_everywhere.sh : collect stats script. +config.sh : the config for gather_stats_everywhere.sh. + +Requirements +------- +1) Lustre is installed and setup on your profiling cluster. +2) ssh/scp to these node names works without requiring a password. + +Configuration +------ +Configuration is very simple for this script, all of the profiling config VARs are +in config.sh + +XXXX_INTERVAL: the profiling interval +where value of interval means: + 0 - gather stats at start and stop only + N - gather stats every N seconds +if some XXX_INTERVAL isn't specified, related stats won't be collected +XXXX can be: VMSTAT, SERVICE, BRW, SDIO, MBALLOC, IO, JBD, CLIENT + +As for ior-collect-stat.sh, you can modify the various IOR and MPI +parameters inside ior-collect-stat.sh + +Running +-------- +1) The gather_stats_everywhere.sh will be run in three mode + + a)sh gather_stats_everywhere.sh config.sh start + It will start collect stats on each node provided in config.sh + + b)sh gather_stats_everywhere.sh config.sh stop + It will stop collect stats on each node. If is provided, + it will create a profile tarball /tmp/.tar.gz. + + c)sh gather_stats_everywhere.sh config.sh analyse log_tarball.tar.gz csv + It will analyse the log_tarball and create a csv tarball for this + profiling tarball. + +2) The ior-collect-stat.sh will be run as + sh ior-collect-stat.sh start + It will create a ior result csv file. If is provided, + the detail profile info tarball will be created under /tmp. + +Example +------- +When you want collect your profile info, you should + 1)sh gather_stats_everywhere.sh config.sh start + #start the collect profile daemon on each node. + + 2)run your test. + + 3)sh gather_stats_everywhere.sh config.sh stop log_tarball + #stop the collect profile daemon on each node, cleanup + the tmp file and create a profiling tarball. + + 4)sh gather_stats_everywhere.sh config.sh analyse log_tarball.tar.gz csv + #create a csv file according to the profile. + +TBD +------ +Add liblustre profiling support and add more options for analyse. + + + + + + diff --git a/lustre-iokit/stats-collect/lstats.sh b/lustre-iokit/stats-collect/lstats.sh index b0a04bd..217144d 100755 --- a/lustre-iokit/stats-collect/lstats.sh +++ b/lustre-iokit/stats-collect/lstats.sh @@ -272,6 +272,72 @@ function service_start() } # +# client_stats collector +# +# CLIENT_INTERVAL: +# - 0 - collect at start and stop only +# - N - collect each N seconds +# +function client_collector() +{ + local file=$1 + local target=$2 + local srv=$3 + + echo "client stats for ${target}/${srv} " `date` + + # clear old stats + echo 0 >$file + + if let "CLIENT_INTERVAL==0"; then + grep -v "^[^ ]*[^0-9]*0 samples" $file + idle_collector + grep -v "^[^ ]*[^0-9]*0 samples" $file + elif let "CLIENT_INTERVAL>0"; then + while [ "$stop_collector" != "1" ]; do + grep -v "^[^ ]*[^0-9]*0 samples" $file + sleep $CLIENT_INTERVAL + done + else + echo "Invalid CLIENT_INTERVAL=$CLIENT_INTERVAL" + idle_collector + fi +} + +function client_start() +{ + if [ "$CLIENT_INTERVAL" == "" ]; then + return; + fi + + # find all osc + for i in /proc/fs/lustre/osc/* ; do + target=`basename $i` + if [ "$target" == "num_refs" ]; then + continue; + fi + for j in ${i}/*; do + stats=`basename $j` + if [ "$stats" == "stats" -o "$stats" == "rpc_stats" ]; then + run_collector "osc-${stats}" client_collector \ + ${j} $target $stats & + fi + done + done + # find all llite stats + for i in /proc/fs/lustre/llite/* ; do + target=`basename $i` + for j in ${i}/*; do + stats=`basename $j` + if [ "$stats" == "stats" -o "$stats" == "vfs_ops_stats" ]; then + run_collector "llite-${stats}" client_collector \ + ${j} $target ${stats} & + fi + done + done +} + +# # sdio_stats collector # # SDIO_INVERVAL: @@ -529,6 +595,7 @@ function ls_start() mballoc_start io_start jbd_start + client_start } # -- 1.8.3.1