Whamcloud - gitweb
current branches now use lnet from HEAD
[fs/lustre-release.git] / lustre / scripts / collect-stats.sh
1 #!/bin/bash
2
3 die() {
4         echo $* 1>&2
5         exit 1
6 }
7 cleanup_lock=""
8 cleanup() {
9         [ ! -z "$cleanup_lock" ] && rmdir $cleanup_lock
10 }
11 trap cleanup EXIT
12
13 usage() {
14         echo "  -d dir  (required)"
15         echo "          Specifies the top level directory that all hosts share"
16         echo "          and collects stats under.  Each host will use a "
17         echo "          subdirectory named after its hostname."
18         echo
19         echo "          If the host directory doesn't exist, stats collection"
20         echo "          begins by clearing accumulators in /proc and launching"
21         echo "          background tasks."
22         echo
23         echo "          If the host directory exists, the script stops "
24         echo "          background processes and collects the results.  A host"
25         echo "          directory can not be reused once it has collected"
26         echo "          stats."
27         echo "  -h"
28         echo "          Shows this help message."
29         echo
30         echo "Example:"
31         echo " [on all nodes] $0 -d /tmp/collection"
32         echo " (time passes while a load is run)"
33         echo " [on all nodes] $0 -d /tmp/collection"
34         echo " tree /tmp/collection"
35         echo
36         exit
37 }
38
39 [ ${#*} == 0 ] && usage
40
41 while getopts ":d:" opt; do
42         case $opt in
43                 d) topdir=$OPTARG                 ;;
44                 \?) usage
45         esac
46 done
47
48 if [ ! -e $topdir ]; then
49         mkdir -p $topdir || die "couldn't create dir $topdir"
50 fi
51
52 [ ! -d $topdir ] && die "$topdir isn't a directory"
53
54 mydir="$topdir/`hostname`"
55 lock="$topdir/.`hostname`-lock"
56
57 mkdir $lock || "another script is working on $mydir, exiting."
58 cleanup_lock="$lock"
59
60 clear_files() {
61         for f in $1; do
62                 [ ! -f $f ] && continue
63                 echo 0 > $f
64         done
65 }
66
67 dump_files() {
68         dirglob=$1
69         shift
70         for d in $dirglob; do
71                 [ ! -d $d ] && continue
72                 log="$mydir/`basename $d`"
73                 > $log
74                 for f in $*; do
75                         [ ! -f $d/$f ] && continue
76                         echo "----------------- $f" >> $log
77                         ( cd $d && cat $f ) >> $log
78                 done
79         done
80 }
81
82 # find filter dirs, sigh.
83 num_filter_dirs=0
84 for f in /proc/fs/lustre/obdfilter/*; do
85         [ ! -d $f ] && continue;
86         num_filter_dirs=$((num_filter_dirs + 1))
87         filter_dirs="$filter_dirs,`basename $f`"
88 done
89 if [ $num_filter_dirs == "1" ]; then
90         tmp=`echo $filter_dirs | sed -e 's/,//g'`
91         filter_dirs="/proc/fs/lustre/obdfilter/$tmp"
92 fi
93 if [ $num_filter_dirs -gt "1" ]; then
94         filter_dirs="/proc/fs/lustre/obdfilter/{$filter_dirs}"
95 fi
96
97 save_proc_files() {
98         cd /proc
99         for f in $*; do
100                 save=`echo $f | sed -e 's@/@_@g'`
101                 [ ! -f $f ] && continue
102                 cat $f > $mydir/$save
103         done
104         cd -
105 }
106
107 launch() {
108         touch $mydir/pids
109
110         if ! which $1 > /dev/null 2>&1; then
111                 return
112         fi
113
114         cd $mydir
115         $* > $1.log 2>&1 &
116         PID=$!
117         if [ $? = 0 ]; then
118                 echo $PID >> pids
119                 echo "launched '$*' as pid $PID"
120         else
121                 echo "'$*' failed"
122                 rm $1.log
123         fi
124         cd -
125 }
126
127
128 start_collection() {
129         echo "starting collection in $mydir"
130         mkdir $mydir || die "couldn't create dir $mydir"
131
132         echo clearing files in /proc/fs/lustre
133         clear_files '/proc/fs/lustre/osc/*MNT*/rpc_stats'
134         clear_files '/proc/fs/lustre/llite/*/read_ahead_stats'
135         [ ! -z "$filter_dirs" ] && clear_files "$filter_dirs/brw_stats"
136
137         launch vmstat 2
138         launch iostat -x 2
139
140
141         date > $mydir/started
142 }
143
144
145 stop_collection() {
146         pids="$mydir/pids"
147
148         [ -e $mydir/finished ] && die "$mydir already contains collected files"
149         [ ! -e $mydir/started ] && die "$mydir hasn't started collection?"
150
151         echo "collecting files for $mydir"
152         dump_files '/proc/fs/lustre/osc/*MNT*' max_dirty_mb max_pages_per_rpc \
153                         max_rpcs_in_flight cur_grant_bytes rpc_stats
154         dump_files '/proc/fs/lustre/llite/*' read_ahead max_read_ahead_mb \
155                 read_ahead_stats
156         [ ! -z "$filter_dirs" ] && dump_files $filter_dirs \
157                                 readcache_max_filesize tot_granted \
158                                 brw_stats
159
160         for pid in `cat $pids`; do
161                 echo killing pid $pid
162                 kill $pid
163         done
164         rm $pids
165
166         save_proc_files cpuinfo meminfo slabinfo
167
168         if which lspci > /dev/null 2>&1; then
169                 lspci > $mydir/lspci 2>&1
170         fi
171
172         date > $mydir/finished
173         echo DONE
174 }
175
176 if [ -e $mydir ]; then
177         stop_collection
178 else
179         start_collection
180 fi