Branch: HEAD

author wangdi <wangdi>

Fri, 20 Jul 2007 17:49:42 +0000 (17:49 +0000)

committer wangdi <wangdi>

Fri, 20 Jul 2007 17:49:42 +0000 (17:49 +0000)
author wangdi <wangdi>
Fri, 20 Jul 2007 17:49:42 +0000 (17:49 +0000)
committer wangdi <wangdi>
Fri, 20 Jul 2007 17:49:42 +0000 (17:49 +0000)
diff --git a/lustre-iokit/stats-collect/README b/lustre-iokit/stats-collect/README

new file mode 100644 (file)

index 0000000..28e4d12
--- /dev/null
+++ b/lustre-iokit/stats-collect/README
@@ -0,0 +1,75 @@
+Overview
+--------
+These script will be used to collect profile info of lustre client and server.
+It will be run on a single(control) node, and collect all the profile info and 
+create a tarball on the control node. 
+
+lstat.sh : The stat script for single node, which will be run on each profile 
+          node.
+gather_stats_everywhere.sh : collect stats script.
+config.sh : the config for gather_stats_everywhere.sh.
+
+Requirements
+-------
+1) Lustre is installed and setup on your profiling cluster.
+2) ssh/scp to these node names works without requiring a password.
+
+Configuration
+------
+Configuration is very simple for this script, all of the profiling config VARs are
+in config.sh
+
+XXXX_INTERVAL: the profiling interval
+where value of interval means:
+   0 - gather stats at start and stop only
+   N - gather stats every N seconds
+if some XXX_INTERVAL isn't specified, related stats won't be collected
+XXXX can be: VMSTAT, SERVICE, BRW, SDIO, MBALLOC, IO, JBD, CLIENT 
+
+As for ior-collect-stat.sh, you can modify the various IOR and MPI 
+parameters inside ior-collect-stat.sh 
+
+Running
+--------
+1) The gather_stats_everywhere.sh will be run in three mode
+   
+   a)sh gather_stats_everywhere.sh config.sh start 
+     It will start collect stats on each node provided in config.sh
+   
+   b)sh gather_stats_everywhere.sh config.sh stop <log_name>
+     It will stop collect stats on each node. If <log_name> is provided,
+     it will create a profile tarball /tmp/<log_name>.tar.gz.
+   
+   c)sh gather_stats_everywhere.sh config.sh analyse log_tarball.tar.gz csv
+     It will analyse the log_tarball and create a csv tarball for this
+     profiling tarball. 
+
+2) The ior-collect-stat.sh will be run as
+        sh ior-collect-stat.sh start <profile> 
+   It will create a ior result csv file. If <profile> is provided, 
+   the detail profile info tarball will be created under /tmp.
+
+Example
+-------
+When you want collect your profile info, you should
+   1)sh gather_stats_everywhere.sh config.sh start 
+        #start the collect profile daemon on each node.
+
+   2)run your test.
+
+   3)sh gather_stats_everywhere.sh config.sh stop log_tarball
+     #stop the collect profile daemon on each node, cleanup
+      the tmp file and create a profiling tarball.
+
+   4)sh gather_stats_everywhere.sh config.sh analyse log_tarball.tar.gz csv
+     #create a csv file according to the profile.
+
+TBD
+------
+Add liblustre profiling support and add more options for analyse.  
+
+
+
+   
+
+
diff --git a/lustre-iokit/stats-collect/lstats.sh b/lustre-iokit/stats-collect/lstats.sh

index b0a04bd..217144d 100755 (executable)
--- a/lustre-iokit/stats-collect/lstats.sh
+++ b/lustre-iokit/stats-collect/lstats.sh
@@ -272,6 +272,72 @@ function service_start()
  }
  
  #
+# client_stats collector
+#
+# CLIENT_INTERVAL:
+# - 0 - collect at start and stop only
+# - N - collect each N seconds
+#
+function client_collector()
+{
+       local file=$1
+       local target=$2
+       local srv=$3
+
+       echo "client stats for ${target}/${srv} " `date`
+
+       # clear old stats
+       echo 0 >$file
+
+       if let "CLIENT_INTERVAL==0"; then
+               grep -v "^[^ ]*[^0-9]*0 samples" $file
+               idle_collector
+               grep -v "^[^ ]*[^0-9]*0 samples" $file
+       elif let "CLIENT_INTERVAL>0"; then
+               while [ "$stop_collector" != "1" ]; do
+                       grep -v "^[^ ]*[^0-9]*0 samples" $file
+                       sleep $CLIENT_INTERVAL
+               done
+       else
+               echo "Invalid CLIENT_INTERVAL=$CLIENT_INTERVAL"
+               idle_collector
+       fi
+}
+
+function client_start()
+{
+       if [ "$CLIENT_INTERVAL" == "" ]; then
+               return;
+       fi
+
+       # find all osc 
+       for i in /proc/fs/lustre/osc/* ; do
+               target=`basename $i`
+               if [ "$target" == "num_refs" ]; then
+                       continue;
+               fi
+               for j in ${i}/*; do
+                       stats=`basename $j`
+                       if [ "$stats" == "stats" -o "$stats" == "rpc_stats" ]; then
+                               run_collector "osc-${stats}" client_collector \
+                                       ${j} $target $stats &
+                       fi
+               done
+       done
+       # find all llite stats
+       for i in /proc/fs/lustre/llite/* ; do
+               target=`basename $i`
+               for j in ${i}/*; do
+                       stats=`basename $j`
+                       if [ "$stats" == "stats" -o "$stats" == "vfs_ops_stats" ]; then
+                               run_collector "llite-${stats}" client_collector \
+                                       ${j} $target ${stats} &
+                       fi
+               done
+       done
+}
+
+#
  # sdio_stats collector
  #
  # SDIO_INVERVAL:
@@ -529,6 +595,7 @@ function ls_start()
         mballoc_start
         io_start
         jbd_start
+       client_start
  }
  
  #
author	wangdi <wangdi>
	Fri, 20 Jul 2007 17:49:42 +0000 (17:49 +0000)
committer	wangdi <wangdi>
	Fri, 20 Jul 2007 17:49:42 +0000 (17:49 +0000)
lustre-iokit/stats-collect/README	[new file with mode: 0644]	patch \| blob
lustre-iokit/stats-collect/lstats.sh		patch \| blob \| history