-bin_SCRIPTS = ior-survey parse-ior
+bin_SCRIPTS = ior-survey iokit-parse-ior
CLEANFILE = $(bin_SCRIPTS)
-EXTRA_DIST = README.ior-survey ior-survey parse-ior
+EXTRA_DIST = README.ior-survey ior-survey iokit-parse-ior
2: type visudo
3: make an entry as
- username ALL=(ALL) NOPASSWD: ALL //(username is the name of the user)
+ username ALL=(ALL) NOPASSWD: ALL //(username is the name of the user)
Building IOR :
4: run the ior_survey script using "./ior_survey"
Note:
- The node names of the clients should be like rhea1, rhea2, rhea3, so on.
+ The node names of the clients should be like rhea1, rhea2, rhea3, so on.
The name of the cluster (1st part of the node name) should be set in the
ior_survey script in the cluster name field.
e.g. cluster=rhea //name of the cluster
- The client node numbers should be set as last part of the node name i.e.
+ The client node numbers should be set as last part of the node name i.e.
numeral part.
e.g. client=(1) //to run test on one node only node1.
client=(1-2) //to run test on two nodes node1, node2.
Instructions for graphing IOR results
- The plot-ior.pl script will plot the results from the .detail file
+ The plot-ior.pl script will plot the results from the .detail file
generated by ior-survery. It will create a data file for writes as
/tmp/ior_survey_run_date@start_time_nodename.detail.dat1 and for reads
as /tmp/ior_survey_run_date@start_time_nodename.detail.dat2 and gnuplot
file as /tmp/ior_survey_run_date@start_time_nodename.detail.scr.
- $ perl parse-ior.pl /tmp/ior_survey_run_date@start_time_nodename.detail
+ $ perl iokit-parse-ior /tmp/ior_survey_run_date@start_time_nodename.detail
#!/usr/bin/perl -w
-# arg 0 is filename
+# arg 0 is filename
sub usages_msg(){
- print "Usage: $0 <results_filename>\n";
- print " parses and plots IOR results using gnuplot, and generates a .dat file for\n";
- print " simple graphing in spreadhseets\n";
- print "e.g.> perl parse-ior.pl ior-log\n";
- exit 1;
+ print "Usage: $0 <results_filename>\n"
+ print " parses and plots IOR results using gnuplot, and generates a .dat file for\n"
+ print " simple graphing in spreadhseets\n"
+ print "e.g.> perl iokit-parse-ior ior-log\n"
+ exit 1;
}
-
+
if ( !$ARGV[0] ) {
- usages_msg();
+ usages_msg()
}
$file = $ARGV[0];
@line = split( /\s+/ ); # splits line into tokens
if ( $line[0] ) {
# This comparison will be changed if there will be changes log file.
- if( $line[0] eq "access" && $line[1] eq "bw(MiB/s)" ) {
+ if ( $line[0] eq "access" && $line[1] eq "bw(MiB/s)" ) {
print DATAFILE "$count $line[1] $line[4] $line[5] $line[6] br(MiB/s) ropen(s) rd(s) rclose(s)\n";
- $count = $count + 1;
- }
- # Two columns from output file are skiped since
+ $count = $count + 1;
+ }
+ # Two columns from output file are skiped since
# they are constant and may not be so useful while graphing results.
- if( $line[0] eq "write" ) {
+ if ( $line[0] eq "write" ) {
print DATAFILE "$count $line[1] $line[4] $line[5] $line[6] ";
- }
- if( $line[0] eq "read" ) {
+ }
+ if ( $line[0] eq "read" ) {
print DATAFILE "$line[1] $line[4] $line[5] $line[6]\n";
$count = $count + 1;
- }
+ }
}
}
close PFILE;
# [ NOTE: It is advised to have automated login (passwordless entry) on server ]
# include library
-source $(dirname $0)/libecho
+source $(dirname $0)/iokit-libecho
# Customisation variables
#####################################################################
-bin_SCRIPTS = obdfilter-survey libecho plot-obdfilter
+bin_SCRIPTS = obdfilter-survey iokit-libecho iokit-plot-obdfilter
CLEANFILE = $(bin_SCRIPTS)
-EXTRA_DIST = README.obdfilter-survey obdfilter-survey libecho plot-obdfilter
+EXTRA_DIST = README.obdfilter-survey obdfilter-survey iokit-libecho
+EXTRA_DIST += iokit-plot-obdfilter
You need to tell the script all the names of the obdfilter instances.
These should be up and running already . If some are on different
- nodes, you need to specify their hostnames too (e.g. node1:ost1).
+ nodes, you need to specify their hostnames too (e.g. node1:ost1).
--OR--
- You just need to pass parameter case=disk to the script. The script will
+ You just need to pass parameter case=disk to the script. The script will
automatically detect the local obdfilter instances.
All the obdfilter instances are driven directly. The script
Here the script drives one or more instances of obdecho server via instances
of echo_client running on 1 or more nodes.
- You just need to pass parameters case=network and
- targets="<hostname/ip_of_server>" to the script. The script will do the
- required setup for network case.
-
+ You just need to pass parameters case=network and
+ targets="<hostname/ip_of_server>" to the script. The script will do the
+ required setup for network case.
+
3. The Stripe F/S over the Network.
Here the script drives one or more instances of obdfilter via instances
Note that the script is _NOT_ scalable to 100s of nodes since it is only
intended to measure individual servers, not the scalability of the system
as a whole.
-
+
Running
-------
--OR--
2. Manual run:
-- You do not need to specify and MDS or LOV
+- You do not need to specify and MDS or LOV
- List all OSTs that you wish to test
- On all OSS machines:
Remember, write tests are destructive! This test should be run prior to
startup of your actual Lustre filesystem. If that is the case, you will not
need to reformat to restart Lustre - however, if the test is terminated before
-completion, you may have to remove objects from the disk.
+completion, you may have to remove objects from the disk.
- Determine the obdfilter instance names on all the clients, column 4
of 'lctl dl'. For example:
Here the obdfilter instance names are oss01-sdb, oss01-sdd, oss02-sdi.
Since you are driving obdfilter instances directly, set the shell array
-variable 'targets' to the names of the obdfilter instances.
+variable 'targets' to the names of the obdfilter instances.
Example:
is to be done.
- Install all lustre modules including obdecho.
- Start lctl and check for the device list. The device list must be empty.
-- It is suggested that there should be passwordless enrty between client
- and server machine to avoid typing password.
+- It is suggested that there should be passwordless enrty between client
+ and server machine to avoid typing password.
1. Automated run:
- To run obdfilter-surevy against network you just need to pass parameter
+ To run obdfilter-surevy against network you just need to pass parameter
case=netdisk and targets="<hostname/ip_of_server>" to the script.
-
+
e.g. $ nobjhi=2 thrhi=2 size=1024 targets="<hostname/ip_of_server>" \
- case=network sh obdfilter-survey
+ case=network sh obdfilter-survey
-On server side you can see the stats at :
- /proc/fs/lustre/obdecho/<echo_srv>/stats
+On server side you can see the stats at:
+ /proc/fs/lustre/obdecho/<echo_srv>/stats
where, 'echo_srv' is the obdecho server created through script.
NOTE: In network test only automated run is supported.
While running manually you need to tell the script all the names of the
echo_client instances, which should already be up and running.
e.g. $ nobjhi=2 thrhi=2 size=1024 targets="<osc_name> ..." \
- sh obdfilter-survey
+ sh obdfilter-survey
Output files:
The summary file and stdout contain lines like...
-ost 8 sz 67108864K rsz 1024 obj 8 thr 8 write 613.54 [ 64.00, 82.00]
+ost 8 sz 67108864K rsz 1024 obj 8 thr 8 write 613.54 [ 64.00, 82.00]
ost 8 is the total number of OSTs under test.
sz 67108864K is the total amount of data read or written (in KB).
obj 8 is the total number of objects over all OSTs
thr 8 is the total number of threads over all OSTs and objects
write is the test name. If more tests have been specified they
- all appear on the same line.
+ all appear on the same line.
613.54 is the aggregate bandwidth over all OSTs measured by
dividing the total number of MB by the elapsed time.
[64.00, 82.00] are the minimum and maximum instantaneous bandwidths seen on
- any individual OST.
+ any individual OST.
Note that although the numbers of threads and objects are specifed per-OST
in the customization section of the script, results are reported aggregated
-------------------
I've found it most useful to import the summary data (it's fixed width)
-into gnuplot, Excel (or any graphing package) and graph bandwidth v.
+into gnuplot, Excel (or any graphing package) and graph bandwidth v.
# threads for varying numbers of concurrent regions. This shows how
the OSS performs for a given number of concurrently accessed objects
(i.e. files) with varying numbers of I/Os in flight.
test. These numbers help find pathologies in file the file system block
allocator and the block device elevator.
-The included plot-obdfilter script is an example of processing the output
-files to a .csv format and plotting graph using gnuplot.
+The included iokit-plot-obdfilter script is an example of processing the
+output files to a .csv format and plotting graph using gnuplot.
#!/bin/bash
+# -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*-
+# vim:shiftwidth=4:softtabstop=4:tabstop=4:
+#
# GPL HEADER START
#
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
case $DSH in
ssh)
- if [ -n "$user" ]; then
- user="$user@"
- fi
- $DSH $user$node "$command"
- ;;
+ if [ -n "$user" ]; then
+ user="$user@"
+ fi
+ $DSH $user$node "$command"
+ ;;
rsh)
- if [ -n "$user" ]; then
- user="-l $user"
- fi
- $DSH $user $node "$command"
- ;;
+ if [ -n "$user" ]; then
+ user="-l $user"
+ fi
+ $DSH $user $node "$command"
+ ;;
esac
}
shift
cmds="$@"
if [ "$host" = "localhost" -o "$host" = `uname -n` ]; then
- eval "$cmds"
+ eval "$cmds"
else
- # split $host into $host and $user
- local user=""
- if [[ $host == *@* ]]; then
- user=${host%@*}
- host=${host#*@}
+ # split $host into $host and $user
+ local user=""
+ if [[ $host == *@* ]]; then
+ user=${host%@*}
+ host=${host#*@}
+ fi
+ dsh $host "$user" "$cmds"
fi
- dsh $host "$user" "$cmds"
- fi
}
# checks whether obdecho module is loded on given host.
load_obdecho () {
local index=$1
local host=${unique_hosts[$index]}
+
do_unload_echo[$index]=0
if obdecho_loaded $host; then
return 0
}
load_obdechos () {
- for ((i = 0; i < ${#unique_hosts[@]}; i++)); do
- load_obdecho $i || cleanup 1
- done
+ for ((i = 0; i < ${#unique_hosts[@]}; i++)); do
+ load_obdecho $i || cleanup 1
+ done
}
# unload obdecho module from host kernel.
local host=$1
local type=$2
local name=$3
- remote_shell $host $lctl device_list | \
- awk "{if (\$2 == \"UP\" && \$3 == \"$type\" && \$4 == \"$name\") {\
+
+ remote_shell $host $lctl device_list |
+ awk "{if (\$2 == \"UP\" && \$3 == \"$type\" && \$4 == \"$name\") {\
print \$1; exit}}"
}
dev=$(get_devno $host obdfilter $ost)
dev_list[$i]=$dev
if [ -z "$dev" ]; then
- echo Cant find device for $ost on $host
+ echo "Cannot find device for $ost on $host"
return 1
fi
done
# do cleanup for netdisk case.
cleanup_netdisk () {
- for osc in $@; do
- $lctl <<-EOF
- cfg_device $osc
- cleanup
- detach
- EOF
- done
+ for osc in $@; do
+ $lctl <<-EOF
+ cfg_device $osc
+ cleanup
+ detach
+ EOF
+ done
}
# do cleanup for network case.
cleanup_network () {
- local clean_srv_OSS=$1
+ local clean_srv_OSS=$1
+
$lctl <<-EOF
cfg_device echotmp
cleanup
detach
EOF
- remote_shell "root@$server_nid" \
+ remote_shell "root@$server_nid" \
"$lctl <<-EOF
cfg_device echo_srv
cleanup
detach
EOF"
- if [ $clean_srv_OSS ]; then
- remote_shell "root@$server_nid" \
- "$lctl <<-EOF
- cfg_device OSS
- cleanup
- detach
- EOF"
- fi
+ if [ $clean_srv_OSS ]; then
+ remote_shell "root@$server_nid" \
+ "$lctl <<-EOF
+ cfg_device OSS
+ cleanup
+ detach
+ EOF"
+ fi
}
# do cleanup and exit.
cleanup () {
- local exit_status=$1
- local host
- case=${case:-"disk"}
- shift
- for ((i = 0; i < $ndevs; i++)); do
- host=${host_names[$i]}
- if [[ -n "${do_teardown_ec[$i]}" ]]; then
- teardown_ec_devno $host ${client_names[$i]}
- fi
- done
- pidcount=0
- for ((i = 0; i < ${#unique_hosts[@]}; i++)); do
- host=${unique_hosts[$i]}
- remote_shell $host "killall -q vmstat >/dev/null 2>&1" &
- pid=$!
- kill -term ${vmstatpids[$pidcount]} 2>/dev/null
- kill -kill ${vmstatpids[$pidcount]} 2>/dev/null
- wait $pid
- pidcount=$((pidcount+1))
- if ((${do_unload_echo[$i]})); then
- unload_obdecho $i
- fi
- done
- if [ $case == "network" ]; then
- cleanup_network $1
- fi
- if [ $case == "netdisk" ]; then
- shift
- cleanup_netdisk $@
- fi
- if [ $exit_status ]; then
- if [ $exit_status -ne 0 ]; then
- echo "program exited with error "
- else
- echo "done!"
- fi
- else
- echo "Terminated"
- fi
- exit $exit_status
+ local exit_status=$1
+ local host
+
+ case=${case:-"disk"}
+ shift
+ for ((i = 0; i < $ndevs; i++)); do
+ host=${host_names[$i]}
+ if [[ -n "${do_teardown_ec[$i]}" ]]; then
+ teardown_ec_devno $host ${client_names[$i]}
+ fi
+ done
+ pidcount=0
+ for ((i = 0; i < ${#unique_hosts[@]}; i++)); do
+ host=${unique_hosts[$i]}
+ remote_shell $host "killall -q vmstat >/dev/null 2>&1" &
+ pid=$!
+ kill -term ${vmstatpids[$pidcount]} 2>/dev/null
+ kill -kill ${vmstatpids[$pidcount]} 2>/dev/null
+ wait $pid
+ pidcount=$((pidcount + 1))
+ if ((${do_unload_echo[$i]})); then
+ unload_obdecho $i
+ fi
+ done
+ if [ $case == "network" ]; then
+ cleanup_network $1
+ fi
+ if [ $case == "netdisk" ]; then
+ shift
+ cleanup_netdisk $@
+ fi
+ if [ $exit_status ]; then
+ if [ $exit_status -ne 0 ]; then
+ echo "program exited with error "
+ else
+ echo "done!"
+ fi
+ else
+ echo "Terminated"
+ fi
+ exit $exit_status
}
trap 'cleanup 0 $clean_srv_OSS $cleanup_oscs' SIGHUP SIGINT SIGTERM
# 2. client name, ex:- ns8:ECHO_ns8
# 3. name of ost instances, ex:- lustre-OST0001
get_ec_devno () {
- exec 8>&1 1>&2
- local host=$1
- local client_name="$2"
- local ost_name="$3"
- local dev_type="${4:-obdfilter}"
- local stack_type="${5:-}"
-
- if [ -z "$client_name" ]; then
- if [ -z "$ost_name" ]; then
- echo "client and ost name both null"
- exit 1
- fi
- client_name=${ost_name}_ecc
- fi
- ec=`get_devno $host echo_client $client_name`
- if [ -n "$ec" ]; then
- echo $ec $client_name $client_name >&8
- exit 0
- fi
- if [ -z "$ost_name" ]; then
- echo "no echo client and ost_name not set, client:" \
- "$client_name, host: $host"
- exit 1
- fi
- ost=`get_devno $host $dev_type $ost_name`
- if [ -z "$ost" ]; then
- echo "OST $ost_name not setup"
- exit 1
- fi
- client_name=${ost_name}_ecc
- remote_shell $host \
- "$lctl <<-EOF
- attach echo_client $client_name ${client_name}_UUID
- setup $ost_name $stack_type
- EOF"
- ec=`get_devno $host echo_client $client_name`
- if [ -z "$ec" ]; then
- echo "Can't setup echo-client"
- exit 1
- fi
- echo $ec $client_name 1 >&8
- exit 0
+ exec 8>&1 1>&2
+ local host=$1
+ local client_name="$2"
+ local ost_name="$3"
+ local dev_type="${4:-obdfilter}"
+ local stack_type="${5:-}"
+
+ if [ -z "$client_name" ]; then
+ if [ -z "$ost_name" ]; then
+ echo "client and ost name both null"
+ exit 1
+ fi
+ client_name=${ost_name}_ecc
+ fi
+ ec=$(get_devno $host echo_client $client_name)
+ if [ -n "$ec" ]; then
+ echo $ec $client_name $client_name >&8
+ exit 0
+ fi
+ if [ -z "$ost_name" ]; then
+ echo "no echo client and ost_name not set, client:" \
+ "$client_name, host: $host"
+ exit 1
+ fi
+ ost=$(get_devno $host $dev_type $ost_name)
+ if [ -z "$ost" ]; then
+ echo "OST $ost_name not setup"
+ exit 1
+ fi
+ client_name=${ost_name}_ecc
+ remote_shell $host \
+ "$lctl <<-EOF
+ attach echo_client $client_name ${client_name}_UUID
+ setup $ost_name $stack_type
+ EOF"
+ ec=$(get_devno $host echo_client $client_name)
+ if [ -z "$ec" ]; then
+ echo "Can't setup echo-client"
+ exit 1
+ fi
+ echo $ec $client_name 1 >&8
+ exit 0
}
# Create echo-clients using osc_names and osc_uuid
# It creates echoclients for all osc listed using #lctl device_list command
ec_using_osc () {
- local osc_name=$1
+ local osc_name=$1
+
$lctl <<-EOF
attach echo_client ${osc_name}_ecc ${osc_name}_ecc_UUID
cfg_device ${osc_name}_ecc
# create echo client using server nid.
ec_using_srv_nid () {
- local server_nid=$1
- local ocsname=$2
- local oscuuid=$3
- $lctl add_uuid echo_UUID $server_nid@$NETTYPE >/dev/null 2>&1
+ local server_nid=$1
+ local ocsname=$2
+ local oscuuid=$3
+
+ $lctl add_uuid echo_UUID $server_nid@$NETTYPE >/dev/null 2>&1
$lctl <<-EOF
attach osc $ocsname $oscuuid
cfg_device $ocsname
}
setup_osc_for_remote_ost () {
- local ost_nid=$1
- local obdfilter_name=$2
- local host_name=host_$3
- $lctl add_uuid ${host_name}_UUID $ost_nid@$NETTYPE >/dev/null 2>&1
+ local ost_nid=$1
+ local obdfilter_name=$2
+ local host_name=host_$3
+
+ $lctl add_uuid ${host_name}_UUID $ost_nid@$NETTYPE >/dev/null 2>&1
$lctl <<-EOF
attach osc ${obdfilter_name}_osc ${obdfilter_name}_osc_UUID
cfg_device ${obdfilter_name}_osc
# setup obdecho on server
setup_srv_obd () {
- local server_nid=$1
- local test_ostfsnm=$2
- remote_shell "root@$server_nid" \
+ local server_nid=$1
+ local test_ostfsnm=$2
+
+ remote_shell "root@$server_nid" \
"$lctl <<-EOF
attach obdecho $test_ostfsnm ${test_ostfsnm}_UUID
cfg_device $test_ostfsnm
# setup OSS on server
setup_OSS () {
- local server_nid=$1
- remote_shell "root@$server_nid" \
+ local server_nid=$1
+
+ remote_shell "root@$server_nid" \
"$lctl <<-EOF
attach ost OSS OSS_UUID
cfg_device OSS
# parameter: 1. hostname
# 2. client name, ex:- ns8:ECHO_ns8
teardown_ec_devno () {
- local host=$1
- local client_name=$2
- remote_shell $host \
+ local host=$1
+ local client_name=$2
+
+ remote_shell $host \
"$lctl <<-EOF
cfg $client_name
cleanup
}
unique () {
- echo "$@" | xargs -n1 echo | sort -u
+ echo "$@" | xargs -n1 echo | sort -u
}
split_hostname () {
- local name=$1
- case $name in
- *:*) host=`echo $name | sed 's/:.*$//'`
- name=`echo $name | sed 's/[^:]*://'`
- ;;
- *) host=localhost
- ;;
- esac
- echo "$host $name"
+ local name=$1
+
+ case $name in
+ *:*) host=$(echo $name | sed 's/:.*$//')
+ name=$(echo $name | sed 's/[^:]*://')
+ ;;
+ *) host=localhost
+ ;;
+ esac
+ echo "$host $name"
}
check_cleanup () {
- type_obj="$1"
- osc_names_str=$($lctl dl | grep $type_obj)
- count=0;
- for name in $osc_names_str; do
- count=$((count+1))
- done
+ local type_obj="$1"
+ local osc_names_str=$($lctl dl | grep $type_obj)
+ local count=0;
- if [ $count != 0 ]; then
- echo "$type_obj could not be cleanup";
- exit 0;
- fi
+ for name in $osc_names_str; do
+ count=$((count + 1))
+ done
+
+ if [ $count != 0 ]; then
+ echo "$type_obj could not be cleanup";
+ exit 0;
+ fi
}
check_setup () {
- type_obj="$1"
- osc_names_str=$($lctl dl | grep $type_obj)
- count=0;
- for name in $osc_names_str; do
- count=$((count+1))
- done
+ local type_obj="$1"
+ local osc_names_str=$($lctl dl | grep $type_obj)
+ local count=0;
- if [ $count == 0 ]; then
- echo "$type_obj could not be setup";
- exit 0;
- fi
+ for name in $osc_names_str; do
+ count=$((count + 1))
+ done
+
+ if [ $count == 0 ]; then
+ echo "$type_obj could not be setup";
+ exit 0;
+ fi
}
# added from bugzill req.
get_targets () {
- if [ -z "$ost_names" ]; then
- targets=$($lctl device_list | awk "{if (\$2 == \"UP\" && \
- \$3 == \"obdfilter\") {print \$4} }")
- fi
- if [ -z "$targets" ]; then
- echo "Can't find any OSTs to test. Please set targets=..."
- exit 1
- fi
- count=0
- for name in $targets; do
- ost_names[$count]=$name
- str=(`split_hostname $name`)
- host_names[$count]=${str[0]}
- count=$((count+1))
- done
+ if [ -z "$ost_names" ]; then
+ targets=$($lctl device_list | awk "{if (\$2 == \"UP\" && \
+ \$3 == \"obdfilter\") {print \$4} }")
+ fi
+ if [ -z "$targets" ]; then
+ echo "Can't find any OSTs to test. Please set targets=..."
+ exit 1
+ fi
+
+ local count=0
+ for name in $targets; do
+ ost_names[$count]=$name
+ str=($(split_hostname $name))
+ host_names[$count]=${str[0]}
+ count=$((count + 1))
+ done
}
get_hosts () {
- # split out hostnames from ost names
- for ((i = 0; i < count; i++)); do
- str=(`split_hostname ${targets[$i]}`)
- host_list[$i]=${str[0]}
- ost_names[$i]=${str[1]}
- done
+ # split out hostnames from ost names
+ for ((i = 0; i < count; i++)); do
+ local str=($(split_hostname ${targets[$i]}))
+ host_list[$i]=${str[0]}
+ ost_names[$i]=${str[1]}
+ done
}
#
# Author: Jitendra Pawar <jitendra@clusterfs.com>
-# Report generation for plot-obdfilter
+# Report generation for iokit-plot-obdfilter
# ====================================
-# The plot-obdfilter script is used to generate csv file and
+# The iokit-plot-obdfilter script is used to generate csv file and
# instructions files for gnuplot from the output of obdfilter-survey script.
#
-# The plot-obdfilter also creates .scr file that contains instructions
+# The iokit-plot-obdfilter also creates .scr file that contains instructions
# for gnuplot to plot the graph. After generating .dat and .scr files this
# script invokes gnuplot to display graph.
#
# Syntax:
# $ obdfilter-survey > log_filename
-# $ plot-obdfilter <log_filename>
+# $ iokit-plot-obdfilter <log_filename>
# [Note: 1. Please use the .summary file generated by obdfilter-survey as log_file.
# It is generally available at /tmp/obdfilter_survey_<date_time_system>.summary
# 2. This script may need modifications whenever there will be
my $rwlabel = $_[1];
print "generating plot $file-$rsz-$op.png\n";
open ( SCRFILE, "> $file-$rsz-$op.scr" ) or die "Can't open scr file for writing";
-
+
if ($subtitle) {
print SCRFILE "set title \"@GraphTitle\\n$rwlabel, Rsize = $rsz KBytes, $subtitle\"\n";
} else {
print SCRFILE "set ylabel \"Speeds(MB/s)\"\n";
print SCRFILE "set logscale x\n";
print SCRFILE "set grid\n";
- print SCRFILE "set terminal png\n";
- print SCRFILE "set output \"/dev/null\"\n";
+ print SCRFILE "set terminal png\n";
+ print SCRFILE "set output \"/dev/null\"\n";
if ($opt_y0 != -9999) {
print SCRFILE "set yrange [ $opt_y0: ]\n";
- }
+ }
my $plot = "plot";
$i = 2;
$xrange = 1;
} else {
printf DATAFILE "%-10s", "-";
}
- }
+ }
}
}
close DATAFILE;
$graphgen = 0;
# Command line parameter parsing
use Getopt::Long;
-GetOptions ('help' => \$opt_help, 'st=s' => \$subtitle, 'y0=i' => \$opt_y0) or usage();
+GetOptions ('help' => \$opt_help, 'st=s' => \$subtitle, 'y0=i' => \$opt_y0) or usage();
if ($opt_help) {
usage();
}
$linelen = @line;
if ($linelen > 26 || $linelen < 11) {
print "invalid file format at line $count\n";
- exit 1;
- }
+ exit 1;
+ }
if (!$rsz && $line[5]) {
$cnt = 0;
$rsz = $line[5];
if ($line[$i]) {
$operations[$cnt] = $line[$i];
$cnt++;
- }
+ }
}
}
if ($rsz ne $line[5]) {
$rsz = $line[5];
$first_obj = $line[7];
$first_thread = $line[9];
- @operations = ();
+ @operations = ();
$cnt = 0;
for ($i = 10; $i <= $linelen; $i = $i + 5) {
if ($line[$i]) {
$operations[$cnt] = $line[$i];
$cnt++;
- }
+ }
}
$obj = 0;
$thread = 0;
$arwr{$line[9]}{$line[7]} = $line[$i * 5 + 11];
} elsif ($operations[$i] eq "rewrite_again") {
$arwa{$line[9]}{$line[7]} = $line[$i * 5 + 11];
- }
+ }
}
if ( $obj < $line[9] ) {
$obj = $line[9];
# client systems on which this test runs.]
# include library
-source $(dirname $0)/libecho
+source $(dirname $0)/iokit-libecho
# The following variables can be set in the environment, or on the
# command line
-bin_SCRIPTS = ost-survey plot-ost
+bin_SCRIPTS = ost-survey iokit-plot-ost
CLEANFILE = $(bin_SCRIPTS)
-EXTRA_DIST = README.ost-survey ost-survey plot-ost
+EXTRA_DIST = README.ost-survey ost-survey iokit-plot-ost
ost-survey (OST performance survey)
===================================
- This script is designed to test the client-to-disk performance
-of the individual OSTs in a Lustre filesystem. The network transfer
+This script is designed to test the client-to-disk performance
+of the individual OSTs in a Lustre filesystem. The network transfer
time from the client is included; to get a better idea of the isolated
disk perfomance, run this script on a client running on the OST.
$ ost-survey [-h] [-s <size>] <lustre_path>
where -s : size in MB
-h : help
- <lustre_path> : mount point of lustre client
+ <lustre_path> : mount point of lustre client
Assumptions
- Lustre filesystem is up and running
- Script is being run on a client
-plot-ost (OST survey graph)
+iokit-plot-ost (OST survey graph)
===========================
- The plot-ost script can be used to plot the results from the
+The iokit-plot-ost script can be used to plot the results from the
ost-survey script using gnuplot.
Syntax: $ ost-survey /mnt/lustre > ost_log
- $ plot-ost ost_log
+ $ iokit-plot-ost ost_log
#!/usr/bin/perl -w
# Report generation for ost-survey.pl
# ===================================
-# The plot-ost.pl script is used to generate csv file and
+# The iokit-plot-ost script is used to generate csv file and
# instructions files for gnuplot from the output of ost-survey.pl script.
#
-# The plot-ost.pl also creates .scr file that contains instructions
+# iokit-plot-ost also creates .scr file that contains instructions
# for gnuplot to plot the graph. After generating .dat and .scr files this
# script invokes gnuplot to display graph.
#
# Syntax:
-# $ plot-ost.pl <log_filename>
-# Note: 1. This script may need modifications whenever there will be
+# $ iokit-plot-ost <log_filename>
+# Note: 1. This script may need modifications whenever there will be
# modifications in output format of ost-survey.pl script.
# 2. Gnuplot version 4.0 or above is required.
-# arg 0 is filename
+# arg 0 is filename
sub usages_msg(){
print "Usage: $0 <log_filename> \n";
print " $0 produces graphs from the output of ost-survey.pl\n";
print " using gnuplot.\n";
print "e.g.# perl ost-survey /mnt/lustre > ost-log; perl $0 ost-log\n";
- exit 1;
+ exit 1;
}
my $count = 0; # count for number of rows in csv(.dat) file.
my $flag = 0;
my @GraphTitle;
if ( !$ARGV[0] ) {
- usages_msg();
+ usages_msg();
}
$file = $ARGV[0];
chomp;
@line = split( /\s+/ ); # splits line into tokens
# This comparison may be changed if there will be changes log file.
- if ( $line[0] eq "Ost#" ) {
+ if ( $line[0] eq "Ost#" ) {
print DATAFILE "$line[0] $line[1] $line[2]\n";
$flag = 1;
<PFILE>; # skip the "---------" line from result file.
chomp;
@line = split( /\s+/ ); # splits line into tokens
if ( $line[1] ne "Inactive" ) {
- print DATAFILE "$count $line[1] $line[2]\n";
- }
+ print DATAFILE "$count $line[1] $line[2]\n";
+ }
$count = $count + 1;
}
close PFILE;
-bin_SCRIPTS = plot-sgpdd sgpdd-survey
+bin_SCRIPTS = iokit-plot-sgpdd sgpdd-survey
CLEANFILE = $(bin_SCRIPTS)
-EXTRA_DIST = README.sgpdd-survey plot-sgpdd sgpdd-survey
+EXTRA_DIST = README.sgpdd-survey iokit-plot-sgpdd sgpdd-survey
regions) all seem to land on top of each other, it shows the device is
phased by seeks at the given record size.
-The included script "plot-sgpdd" will process output files and create
-.dat (similar to csv) files for spreadsheet import. It also plots the
-results directly using gnuplot and creates .png files.
+The included script "iokit-plot-sgpdd" will process output files and
+create .dat (similar to csv) files for spreadsheet import. It also plots
+the results directly using gnuplot and creates .png files.
#!/usr/bin/perl -w
-# Report generation for plot-sgpdd
-# ================================
-# The plot-sgpdd script is used to generate csv file and
+# Report generation for iokit-plot-sgpdd
+# ======================================
+# The iokit-plot-sgpdd script is used to generate csv file and
# instructions files for gnuplot from the output of sgpdd-survey script.
#
-# The plot-sgpdd also creates .scr file that contains instructions
+# iokit-plot-sgpdd also creates .scr file that contains instructions
# for gnuplot to plot the graph. After generating .dat and .scr files this
# script invokes gnuplot to display graph.
#
# Syntax:
# $ sgpdd-survey > log_filename
-# $ plot-sgpdd <log_filename>
+# $ iokit-plot-sgpdd <log_filename>
# [Note: 1. This script may need modifications whenever there will be
# modifications in output format of sgpdd-survey script.
# 2. Gnuplot version 4.0 or above is required.]
-sub usage()
+sub usage()
{
print STDERR "Usage: $0 [options] <log_filename>\n";
print STDERR " $0 parses and plots graphs from the output of sgpdd-survey\n";
# check whether gnuplot supports png
$pngsupport = "ldd `which gnuplot` | grep -q libpng";
-system ("$pngsupport") == 0 or die "gnuplot installed does not support png.
- Please install gnuplot to support png and try again.\n";
+system ("$pngsupport") == 0 or die "gnuplot installed does not support PNG.
+ Please install gnuplot to support PNG and try again.\n";
my @GraphTitle;
my $rwlabel = $_[1];
print "generating plot $file-$rsz-$op.png\n";
open ( SCRFILE, "> $file-$rsz-$op.scr" ) or die "Can't open scr file for writing";
-
+
if ($opt_rdtitle || $opt_wrtitle) {
if ($op eq "rd") {
print SCRFILE "set title \"@GraphTitle\\n$rwlabel, " .
print SCRFILE "set logscale x\n";
print SCRFILE "set grid\n";
print SCRFILE "set key right bottom\n";
- print SCRFILE "set terminal png\n";
- print SCRFILE "set output \"/dev/null\"\n";
+ print SCRFILE "set terminal png\n";
+ print SCRFILE "set output \"/dev/null\"\n";
if ($opt_y != 0) {
print SCRFILE "set yrange [ 0:$opt_y ]\n";
} else {
@operations = ();
# Command line parameter parsing
use Getopt::Long;
-GetOptions ('help' => \$opt_help, 'rt=s' => \$opt_rdtitle, 'wt=s' => \$opt_wrtitle, 'y=i' => \$opt_y) or usage();
+GetOptions ('help' => \$opt_help, 'rt=s' => \$opt_rdtitle, 'wt=s' => \$opt_wrtitle, 'y=i' => \$opt_y) or usage();
if ($opt_help) {
usage();
}
-bin_SCRIPTS = config.sh gather_stats_everywhere.sh lstats.sh
+bin_SCRIPTS = iokit-config iokit-gather-stats iokit-lstats
CLEANFILE = $(bin_SCRIPTS)
-EXTRA_DIST = README.lstats.sh config.sh gather_stats_everywhere.sh lstats.sh
+EXTRA_DIST = README.iokit-lstats iokit-config iokit-gather-stats iokit-lstats
+++ /dev/null
-Overview
---------
-These scripts will be used to collect application profiling info from
-lustre clients and servers. It will be run on a single (control)
-node, and collect all the profile info and create a tarball on the
-control node.
-
-lstat.sh : script for single node, will be run on each profile node.
-gather_stats_everywhere.sh : collect stats script.
-config.sh : customized configuration description
-
-Requirements
--------
-1) Lustre is installed and setup on your cluster.
-2) ssh/scp to these nodes works without requiring a password.
-
-Configuration
-------
-Configuration is very simple for this script - all of the profiling
-config VARs are in config.sh
-
-XXXX_INTERVAL: the profiling interval
-where value of interval means:
- 0 - gather stats at start and stop only
- N - gather stats every N seconds
-if XXX_INTERVAL isn't specified, XXX stats won't be collected
-XXX can be: VMSTAT, SERVICE, BRW, SDIO, MBALLOC, IO, JBD, CLIENT
-
-
-Running
---------
-The gather_stats_everywhere.sh should be run in three phases:
-
- a)sh gather_stats_everywhere.sh config.sh start
- It will start stats collection on each node specified in config.sh
-
- b)sh gather_stats_everywhere.sh config.sh stop <log_name.tgz>
- It will stop collect stats on each node. If <log_name.tgz> is provided,
- it will create a profile tarball /tmp/<log_name.tgz>
-
- c)sh gather_stats_everywhere.sh config.sh analyse log_tarball.tgz csv
- It will analyse the log_tarball and create a csv tarball for this
- profiling tarball.
-
-
-Example
--------
-When you want collect your profile info, you should
- 1) start the collect profile daemon on each node.
- sh gather_stats_everywhere.sh config.sh start
-
- 2) run your test.
-
- 3) stop the collect profile daemon on each node, cleanup the tmp
- file and create a profiling tarball.
- sh gather_stats_everywhere.sh config.sh stop log_tarball.tgz
-
- 4) create a csv file according to the profile.
- sh gather_stats_everywhere.sh config.sh analyse log_tarball.tgz csv
-
-
-TBD
-------
-Add liblustre profiling support and add more options for analyse.
-
-
-
-
-
-
--- /dev/null
+Overview
+--------
+These scripts will be used to collect application profiling info from
+lustre clients and servers. It will be run on a single (control)
+node, and collect all the profile info and create a tarball on the
+control node.
+
+iokit-lstats : script for single node, will be run on each profile node
+iokit-gather-stats : collect stats script
+iokit-config : customized configuration description
+
+Requirements
+-------
+1) Lustre is installed and setup on your cluster.
+2) ssh/scp to these nodes works without requiring a password.
+
+Configuration
+------
+Configuration is very simple for this script, all of the profiling
+config VARs are in iokit-config
+
+XXXX_INTERVAL: the profiling interval
+where value of interval means:
+ 0 - gather stats at start and stop only
+ N - gather stats every N seconds
+if some XXX_INTERVAL isn't specified, related stats won't be collected
+XXX can be: VMSTAT, SERVICE, BRW, SDIO, MBALLOC, IO, JBD, CLIENT
+
+
+Running
+--------
+The iokit-gather-stats should be run in three phases:
+
+ a) sh iokit-gather-stats iokit-config start
+ It will start stats collection on each node specified in iokit-config
+
+ b) sh iokit-gather-stats iokit-config stop <log_name>
+ It will stop collect stats on each node. If <log_name> is provided,
+ it will create a profile tarball /tmp/<log_name>.tar.gz
+
+ c) sh iokit-gather-stats iokit-config analyse log_tarball.tar.gz csv
+ It will analyse the log_tarball and create a csv tarball for this
+ profiling tarball.
+
+
+Example
+-------
+When you want collect your profile info, you should
+ 1) start the collect profile daemon on each node.
+ sh iokit-gather-stats iokit-config start
+
+ 2) run your test.
+
+ 3) stop the collect profile daemon on each node, cleanup the tmp
+ file and create a profiling tarball.
+ sh iokit-gather-stats iokit-config stop log_tarball
+
+ 4) create a csv file according to the profile.
+ sh iokit-gather-stats iokit-config analyse log_tarball.tar.gz csv
+
+++ /dev/null
-Overview
---------
-These script will be used to collect profile info of lustre client and server.
-It will be run on a single(control) node, and collect all the profile info and
-create a tarball on the control node.
-
-lstat.sh : The stat script for single node, which will be run on each profile
- node.
-gather_stats_everywhere.sh : collect stats script.
-config.sh : the config for gather_stats_everywhere.sh.
-
-Requirements
--------
-1) Lustre is installed and setup on your profiling cluster.
-2) ssh/scp to these node names works without requiring a password.
-
-Configuration
-------
-Configuration is very simple for this script, all of the profiling config VARs are
-in config.sh
-
-XXXX_INTERVAL: the profiling interval
-where value of interval means:
- 0 - gather stats at start and stop only
- N - gather stats every N seconds
-if some XXX_INTERVAL isn't specified, related stats won't be collected
-XXXX can be: VMSTAT, SERVICE, BRW, SDIO, MBALLOC, IO, JBD, CLIENT
-
-As for ior-collect-stat.sh, you can modify the various IOR and MPI
-parameters inside ior-collect-stat.sh
-
-Running
---------
-1) The gather_stats_everywhere.sh will be run in three mode
-
- a)sh gather_stats_everywhere.sh config.sh start
- It will start collect stats on each node provided in config.sh
-
- b)sh gather_stats_everywhere.sh config.sh stop <log_name>
- It will stop collect stats on each node. If <log_name> is provided,
- it will create a profile tarball /tmp/<log_name>.tar.gz.
-
- c)sh gather_stats_everywhere.sh config.sh analyse log_tarball.tar.gz csv
- It will analyse the log_tarball and create a csv tarball for this
- profiling tarball.
-
-2) The ior-collect-stat.sh will be run as
- sh ior-collect-stat.sh start <profile>
- It will create a ior result csv file. If <profile> is provided,
- the detail profile info tarball will be created under /tmp.
-
-Example
--------
-When you want collect your profile info, you should
- 1)sh gather_stats_everywhere.sh config.sh start
- #start the collect profile daemon on each node.
-
- 2)run your test.
-
- 3)sh gather_stats_everywhere.sh config.sh stop log_tarball
- #stop the collect profile daemon on each node, cleanup
- the tmp file and create a profiling tarball.
-
- 4)sh gather_stats_everywhere.sh config.sh analyse log_tarball.tar.gz csv
- #create a csv file according to the profile.
-
-TBD
-------
-Add liblustre profiling support and add more options for analyse.
-
-
-
-
-
-
MDS_LIST=${PERCH_MDS_LIST}
OST_LIST=${PERCH_OST_LIST}
-export TARGETS="${MDS_LIST} ${OST_LIST}"
+export TARGETS="${MDS_LIST} ${OST_LIST}"
-#script var
+#script var
#case $TARGET in
-# oss*)
-# VMSTAT_INTERVAL=0
-# SERVICE_INTERVAL=2
-# SDIO_INTERVAL=0
+# oss*)
+# VMSTAT_INTERVAL=0
+# SERVICE_INTERVAL=2
+# SDIO_INTERVAL=0
# ;;
# client*) ALEX_SCRIPT_CLIENT_VAR1="hello!"
# ;;
#esac
-#FIXME: diff these parameters according to client/MDS/OSS
-VMSTAT_INTERVAL=${VMSTAT_INTERVAL:-1}
+#FIXME: diff these parameters according to client/MDS/OSS
+VMSTAT_INTERVAL=${VMSTAT_INTERVAL:-1}
SERVICE_INTERVAL=${SERVICE_INTERVAL:-0}
SDIO_INTERVAL=${SDIO_INTERVAL:-0}
BRW_INTERVAL=${BRW_INTERVAL:-0}
#some environment var
TMP=${TMP:-"/tmp"}
-SCRIPT=${SCRIPT:-"lstats.sh"}
+SCRIPT=${SCRIPT:-"iokit-lstats"}
#Remote ssh script
DSH=${DSH:-ssh}
DCP=${DCP:-scp}
-#!/bin/sh
+#!/bin/bash
-# gather_stats_everywhere:
+# iokit-gather-stats:
# script on a selection of nodes and collect all the results into a single
# tar ball
#
}
warning() {
- echo "WARNING: $@"
+ echo "WARNING: $@"
}
info () {
- if [ ${PRINT_INFO_MSGS} -gt 0 ]
- then
- echo "INFO: $@"
- fi
+ if [ ${PRINT_INFO_MSGS} -gt 0 ]; then
+ echo "INFO: $@"
+ fi
}
debug () {
- if [ ${PRINT_DEBUG_MSGS} -gt 0 ]
- then
- echo "DEBUG: $@"
- fi
+ if [ ${PRINT_DEBUG_MSGS} -gt 0 ]; then
+ echo "DEBUG: $@"
+ fi
}
usage() {
- printf $"Usage: gather_stats_everywhere [-help] config_file [start|stop|cleanup] <log_name>\n"
- if [ x$1 = x-h ]
- then
+ printf $"Usage: iokit-gather-stats [-help] config_file [start|stop|cleanup] <log_name>\n"
+ if [ x$1 = x-h ]; then
printf $"
-The distribution script will run on a single node. It is parameterised
-with a set of target node names. It may assume ssh/scp to these node
+The distribution script will run on a single node. It is parameterised
+with a set of target node names. It may assume ssh/scp to these node
names works without requiring a password. It will run in 2 modes...
-gather_stats_everywhere config_file start
+iokit-gather-stats config_file start
...will copy the script to /tmp everywhere described in
config_file running on all the target hosts. And...
-gather_stats_everywhere config_file stop log_name
+iokit-gather-stats config_file stop log_name
-...will stop script running on all the hosts it started on and collect
+...will stop script running on all the hosts it started on and collect
all the individual stats files into a single compressed tarball if the log_name is
provided.
The config file is just a list of shell variable assignments that can be
-customised.
+customised.
Serveral variables must be set in the config file
Targets: the nodes where run the script.
"
- exit 0
+ exit 0
else
- exit 1
+ exit 1
fi
}
options=`getopt -o h --long help:: -- "$@"`
-if [ $? -ne 0 ]
-then
+if [ $? -ne 0 ]; then
usage
fi
usage -h ;;
--help)
usage -h ;;
- --)
+ --)
shift
break ;;
esac
done
if [ $# != 2 -a $# != 3 ] ; then
- usage
+ usage
fi
CONFIG=$1
. $CONFIG
if [ -z "$SCRIPT" ]; then
- error "SCRIPT in ${CONFIG} is empty"
- exit 1
-fi
+ error "SCRIPT in ${CONFIG} is empty"
+ exit 1
+fi
if [ -z "$TARGETS" ]; then
- error "TARGETS in ${CONFIG} is empty"
- exit 1
+ error "TARGETS in ${CONFIG} is empty"
+ exit 1
fi
-#check nodes accessiable
+#check nodes accessiable
Check_nodes_available() {
- local NODES_NOT_AVAILABLE=""
+ local NODES_NOT_AVAILABLE=""
+
+ debug "Entering Check_nodes_available()"
- debug "Entering Check_nodes_available()"
-
for TARGET in $TARGETS; do
- if ! ping -c 1 -w 3 $TARGET > /dev/null; then
- NODES_NOT_AVAILABLE=$NODES_NOT_AVAILABLE$TARGET
- fi
- done
-
+ if ! ping -c 1 -w 3 $TARGET > /dev/null; then
+ NODES_NOT_AVAILABLE=$NODES_NOT_AVAILABLE$TARGET
+ fi
+ done
+
if [ -z "$NODES_NOT_AVAILABLE" ]; then
- debug "Check_nodes_available() returning 0 (success - all nodes available)"
+ debug "Check_nodes_available() returning 0 "
+ "(success - all nodes available)"
return 0
fi
- error "Check_nodes_available: these nodes are not available (did not respond to pings): ${NODES_NOT_AVAILABLE}"
- debug "Check_nodes_available() returning with errors"
-
+ error "Check_nodes_available: these nodes are not available "
+ "(did not respond to pings): ${NODES_NOT_AVAILABLE}"
+ debug "Check_nodes_available() returning with errors"
+
return 1
}
-if ! Check_nodes_available; then
+if ! Check_nodes_available; then
error "not all the nodes are available"
exit 1
fi
Nodes_are_not_clean() {
local DIRTY_NODES=""
- debug "Entering Nodes_are_not_clean()"
-
+ debug "Entering Nodes_are_not_clean()"
+
# check whether there are running threads on the targets
for TARGET in $TARGETS; do
ps_str=`$DSH $TARGET "ps aux | grep -v grep | grep ${SCRIPT}-${TARGET}"`
if [ -n "$ps_str" ]; then
- DIRTY_NODES="${DIRTY_NODES} ${TARGET}"
+ DIRTY_NODES="${DIRTY_NODES} ${TARGET}"
fi
done
if [ -n "$DIRTY_NODES" ]; then
- debug "Nodes_are_not_clean() returning 1"
+ debug "Nodes_are_not_clean() returning 1"
return 1
fi
- debug "Nodes_are_not_clean() returning 0"
- return 0
+ debug "Nodes_are_not_clean() returning 0"
+ return 0
}
Clean_nodes() {
- debug "Entering Clean_nodes()"
-
- #
- # if debugging is enabled, show lists of lstats processes
- # still running on the target nodes before the clean operation
- #
- if [ ${PRINT_DEBUG_MSGS} -gt 0 ]
- then
- for TARGET in $TARGETS; do
- debug "List of processes which need to be cleaned up on ${TARGET}:"
- $DSH $TARGET "ps aux | grep -v grep | grep ${SCRIPT}-${TARGET}"
- debug "List of pids which need to be cleaned up on ${TARGET}:"
- $DSH $TARGET "ps aux | grep ${SCRIPT}-${TARGET} | grep -v grep | ${AWK} '{ print \$2 }'"
- done
- fi
-
- #
- # do the actual cleanup
+ debug "Entering Clean_nodes()"
+
+ #
+ # if debugging is enabled, show lists of lstats processes
+ # still running on the target nodes before the clean operation
+ #
+ if [ ${PRINT_DEBUG_MSGS} -gt 0 ]; then
+ for TARGET in $TARGETS; do
+ debug "List of processes which need to be cleaned up on ${TARGET}:"
+ $DSH $TARGET "ps aux | grep -v grep | grep ${SCRIPT}-${TARGET}"
+ debug "List of pids which need to be cleaned up on ${TARGET}:"
+ $DSH $TARGET "ps aux | grep ${SCRIPT}-${TARGET} | grep -v grep | ${AWK} '{ print \$2 }'"
+ done
+ fi
+
+ #
+ # do the actual cleanup
# kill any old lstats processes still running on the target nodes
#
for TARGET in $TARGETS; do
-
- ps_str=`$DSH $TARGET "ps aux | grep -v grep | grep ${SCRIPT}-${TARGET}"`
- if [ -n "$ps_str" ]; then
- debug "cleaning node ${TARGET}"
- $DSH $TARGET "ps aux | grep ${SCRIPT}-${TARGET} | grep -v grep | ${AWK} '{ print \$2 }' | ${XARGS} kill"
- fi
- done
-
- debug "Leaving Clean_nodes()"
- return 0
+ ps_str=$($DSH $TARGET "ps aux | grep -v grep | grep ${SCRIPT}-${TARGET}")
+ if [ -n "$ps_str" ]; then
+ debug "cleaning node ${TARGET}"
+ $DSH $TARGET "ps aux | grep ${SCRIPT}-${TARGET} |
+ grep -v grep | ${AWK} '{ print \$2 }' |
+ ${XARGS} kill"
+ fi
+ done
+
+ debug "Leaving Clean_nodes()"
+ return 0
}
copy_target_script() {
local target=$1
- debug "Entering copy_target_script()"
-
+ debug "Entering copy_target_script()"
+
#copy alex's run scripts to the target
copy_cmd="$DCP $SCRIPT ${USER}${target}:$TMP/${SCRIPT}-${target}"
- ${copy_cmd} 1>/dev/null 2>&1
- if [ ${PIPESTATUS[0]} != 0 ]; then
+ ${copy_cmd} 1>/dev/null 2>&1
+ if [ ${PIPESTATUS[0]} != 0 ]; then
echo "copy command failed: ${copy_cmd}" 2>&1
debug "Leaving copy_target_script() (error return)"
return 1
fi
-
- echo "$SCRIPT copied to ${USER}${target} (into $TMP)"
- debug "Leaving copy_target_script() (normal return)"
+
+ echo "$SCRIPT copied to ${USER}${target} (into $TMP)"
+ debug "Leaving copy_target_script() (normal return)"
return 0
}
start_target_script() {
local target=$1
- debug "Entering start_target_script()"
-
+ debug "Entering start_target_script()"
+
if ! copy_target_script $target; then
echo "copy_target_script $target failed." 2>&1
debug "Leaving start_target_script() (error return)"
#run the script on the target
$DSH ${USER}${target} "VMSTAT_INTERVAL=${VMSTAT_INTERVAL} \
- SDIO_INTERVAL=${SDIO_INTERVAL} \
- SERVICE_INTERVAL=${SERVICE_INTERVAL} \
- BRW_INTERVAL=${BRW_INTERVAL} \
- JBD_INTERVAL=${JBD_INTERVAL} \
- IO_INTERVAL=${IO_INTERVAL} \
- MBALLOC_INTERVAL=${MBALLOC_INTERVAL} \
- sh ${TMP}/${SCRIPT}-${target} start \
+ SDIO_INTERVAL=${SDIO_INTERVAL} \
+ SERVICE_INTERVAL=${SERVICE_INTERVAL} \
+ BRW_INTERVAL=${BRW_INTERVAL} \
+ JBD_INTERVAL=${JBD_INTERVAL} \
+ IO_INTERVAL=${IO_INTERVAL} \
+ MBALLOC_INTERVAL=${MBALLOC_INTERVAL} \
+ sh ${TMP}/${SCRIPT}-${target} start \
1> /dev/null 2>/dev/null </dev/null"
if [ ${PIPESTATUS[0]} != 0 ]; then
debug "Leaving start_target_script() (error return)"
return 1
fi
-
+
echo "Start the ${SCRIPT} on ${target} success"
debug "Leaving start_target_script() (normal return)"
return 0
stop_target_script() {
local target=$1
- debug "Entering stop_target_script()"
-
+ debug "Entering stop_target_script()"
+
#stop the target script first
$DSH ${USER}${target} "sh ${TMP}/${SCRIPT}-${target} stop" 1>/dev/null 2>&1
if [ ${PIPESTATUS[0]} != 0 ]; then
echo "stop the collecting stats script on ${target} failed"
debug "Leaving stop_target_script() (error return)"
- return 1
- else
+ return 1
+ else
echo "stop the collecting stats script on ${target} success"
fi
#remove those tmp file
$DSH ${USER}${target} "rm -rf $TMP/${SCRIPT}-${target}" 1>/dev/null 2>&1
echo "cleanup ${target} tmp file after stop "
-
+
debug "Leaving stop_target_script() (normal return)"
- return 0
+ return 0
}
#
# different from the clock on this node)
#
generate_timestamp() {
- if [ "X${GLOBAL_TIMESTAMP}" = "X" ]
- then
+ if [ "X${GLOBAL_TIMESTAMP}" = "X" ]; then
export GLOBAL_TIMESTAMP=`date +%F-%H.%M.%S`
debug "Global Timestamp Created: ${GLOBAL_TIMESTAMP}"
fi
local -a pids_array
local -a clients_array
- debug "Entering fetch_log()"
-
+ debug "Entering fetch_log()"
+
if ! mkdir -p $TMP/$log_name ; then
error "can not mkdir $log_name"
exit 1
fi
- #retrive the log_tarball from remote nodes background
- local n=0
+ #retrive the log_tarball from remote nodes background
+ local n=0
for TARGET in $TARGETS; do
- (fetch_target_log ${TARGET}) &
+ (fetch_target_log ${TARGET}) &
pids_array[$n]=$!
clients_array[$n]=$TARGET
-
+
debug "fetch_log: spawned fetch_target_log process for ${TARGET} pid ${pids_array[$n]}"
- let n=$n+1
+ let n=$n+1
done
-
+
local num_pids=$n
#Waiting log fetch finished
for ((n=0; $n < $num_pids; n++)); do
- debug "fetch_log(): waiting for pid ${pids_array[$n]}"
+ debug "fetch_log(): waiting for pid ${pids_array[$n]}"
wait ${pids_array[$n]}
-
+
#
# TODO: add check of exit status from wait()
#
#compress the log tarball
cmd="$TAR ${stat_tar_name} $TMP/${log_name}"
echo "Creating compressed tar file ${stat_tar_name} from log files in $TMP/${log_name}"
- ${cmd} 1>/dev/null 2>&1
- if [ ${PIPESTATUS[0]} == 0 ]; then
+ ${cmd} 1>/dev/null 2>&1
+ if [ ${PIPESTATUS[0]} == 0 ]; then
echo "removing temporary directory $TMP/${log_name}"
rm -rf $TMP/${log_name}
else
echo "Compressed logfiles are in $TMP/${stat_tar_name}"
fi
-
+
debug "Leaving fetch_log()"
}
local -a pids_array
local -a clients_array
local n=0
-
+
debug "Entering stop_targets_script()"
-
+
for TARGET in $TARGETS; do
(stop_target_script ${TARGET}) &
pids_array[$n]=$!
clients_array[$n]=$TARGET
- let n=$n+1
+ let n=$n+1
done
local num_pids=$n
-
+
#Waiting log fetch finished
for ((n=0; $n < $num_pids; n++)); do
if ! wait ${pids_array[$n]}; then
echo "${clients_array[$n]}: can not stop stats collect"
fi
done
-
+
debug "Leaving stop_targets_script()"
-
}
gather_start() {
local -a pids_array
local -a clients_array
local n=0
-
+
debug "Entering gather_start()"
-
- #check whether the collect scripts already start in some targets
-
- Nodes_are_not_clean
- ret=$?
-
- if [ $ret -gt 0 ]
- then
- warning "$SCRIPT already running in some targets, attempting cleanup..."
-
- Clean_nodes
-
- Nodes_are_not_clean
- ret=$?
-
- if [ $ret -gt 0 ]
- then
- error "$SCRIPT automatic cleanup attempt failed."
- error "$SCRIPT Please make sure lstats is no longer running on target nodes and try again."
- debug "Error return from gather_start()"
- return 1
- fi
+
+ #check whether the collect scripts already start in some targets
+
+ Nodes_are_not_clean
+ ret=$?
+
+ if [ $ret -gt 0 ]; then
+ warning "$SCRIPT already running on some targets, try cleanup"
+
+ Clean_nodes
+
+ Nodes_are_not_clean
+ ret=$?
+
+ if [ $ret -gt 0 ]; then
+ error "$SCRIPT automatic cleanup attempt failed."
+ error "$SCRIPT Please make sure lstats is not running "\
+ "on target nodes and try again."
+ debug "Error return from gather_start()"
+ return 1
+ fi
fi
-
+
for TARGET in $TARGETS; do
(start_target_script ${TARGET}) &
pids_array[$n]=$!
clients_array[$n]=$TARGET
- let n=$n+1
+ let n=$n+1
done
-
+
local num_pids=$n
- local RC=0
+ local RC=0
#Waiting log fetch finished
for ((n=0; $n < $num_pids; n++)); do
if ! wait ${pids_array[$n]}; then
if [ $RC != 0 ]; then
stop_targets_script
fi
-
+
debug "Leaving gather_start()"
}
gather_stop() {
log=$1
- debug "Entering gather_stop()"
-
+ debug "Entering gather_stop()"
+
if [ -n "$log" ]; then
fetch_log $log
fi
-
+
stop_targets_script
-
+
debug "Leaving gather_stop()"
}
{
local log_name=$1
- ln=`grep -n snapshot_time ${log_name} | awk -F":" '{ln=$1;} END{print ln;}'`
- total_ln=`wc ${log_name} | awk '{print $1}'`
+ local ln=$(grep -n snapshot_time ${log_name} |
+ awk -F":" '{ln=$1;} END{print ln;}')
+ local total_ln=$(wc ${log_name} | awk '{print $1}')
- local endlen=$((${total_ln} - ${ln}))
+ local endlen=$((total_ln - $ln))
echo $endlen
}
tail -n $end_len ${statf} | awk '{print $1 "," $2}' \
>> $logdir/analyse_${type_name}.csv
else
- tail -n $end_len ${statf} | \
- awk '/^[[:digit:]]/{print $1","$2","$6} \
+ tail -n $end_len ${statf} | \
+ awk '/^[[:digit:]]/{print $1","$2","$6} \
/^page/{print "page per rpc,read,write"} \
/^rpcs/{print "rpcs,read,write"} \
- /^offset/{print "offset, read,write"}' \
+ /^offset/{print "offset, read,write"}' \
>> $logdir/analyse_${type_name}.csv
fi
fi
local log_tarball=$1
local option=$2
- debug "Entering gather_analyze()"
-
+ debug "Entering gather_analyze()"
+
#validating option
if [ -z "$log_tarball" -o -r "$option" ]; then
usage;
shift
local date=`date +%F-%H-%M`
- local logdir="analyse-${date}"
+ local logdir="analyse-${date}"
mkdir -p ${TMP}/${logdir}
mkdir -p ${TMP}/${logdir}/tmp
$TAR ${TMP}/${logdir}.tar.gz ${TMP}/${logdir} 1>/dev/null 2>&1
echo "create analysed tarball ${TMP}/${logdir}.tar.gz"
-
- debug "Leaving gather_analyze()"
+
+ debug "Leaving gather_analyze()"
}
case $OPTION in
-#!/bin/sh
+#!/bin/bash
#
# very short example:
#
# to start collection:
-# VMSTAT_INTERVAL=0 SERVICE_INTERVAL=2 SDIO_INTERVAL=0 lstats.sh start
+# VMSTAT_INTERVAL=0 SERVICE_INTERVAL=2 SDIO_INTERVAL=0 iokit-lstats start
#
# where value of interval means:
# 0 - gather stats at start and stop only
# XXX can be: VMSTAT, SERVICE, BRW, SDIO, MBALLOC, IO, JBD
#
# to stop collection:
-# lstats.sh stop
+# iokit-lstats stop
#
# to fetch collected stats:
-# lstats.sh fetch >file
+# iokit-lstats fetch >file
# in file you'll get a tarbal containing directory with stats
# directory's name consists of hostname and date,
# like: stats-bzzz-2007-05-13-22.52.31
function ls_grab_control()
{
- OCOMM=`ps -p $$ -o comm=`
+ OCOMM=$(ps -p $$ -o comm=)
if [ "$OCOMM" == "" ]; then
echo "Can't fetch process name"
exit
fi
# check for running master first
- PID=`cat $PIDFILE 2>/dev/null`
+ PID=$(cat $PIDFILE 2>/dev/null)
#echo "check master $PID"
if [ "x$PID" != "x" ]; then
- COMM=`ps -p $PID -o comm=`
+ COMM=$(ps -p $PID -o comm=)
if [ "$COMM" == "$OCOMM" ]; then
echo "Master is already running by $PID"
return 1
# XXX: race -- two process can do this at same time, use rename instead
echo $$ >${PIDFILE}.$$
mv ${PIDFILE}.$$ ${PIDFILE}
- a=`cat ${PIDFILE}`
+ a=$(cat ${PIDFILE})
if [ "$$" != "$a" ]; then
echo "Some one $a won the race"
return 1
shift
read pid NN </proc/self/stat
- stime=`ps -p $pid -o bsdstart=`
+ stime=$(ps -p $pid -o bsdstart=)
echo -n "$pid " >>$STATPIDS
echo -n "$stime" >>${STIMEPREFIX}${pid}
# - N - collect each N seconds
function vmstat_collector()
{
- echo "vmstat " `date`
+ echo "vmstat " $(date)
if let "VMSTAT_INTERVAL==0"; then
date
{
local filter=$1
- echo "brw_* for $filter " `date`
+ echo "brw_* for $filter " $(date)
# clear old stats
for i in /proc/fs/lustre/obdfilter/${filter}/brw_*; do
# find all obdfilters
for i in /proc/fs/lustre/obdfilter/*; do
- filter=`basename $i`
+ local filter=$(basename $i)
if [ "$filter" == "num_refs" ]; then
continue;
fi
local target=$2
local srv=$3
- echo "service stats for ${target}/${srv} " `date`
+ echo "service stats for ${target}/${srv} " $(date)
# clear old stats
echo 0 >$file
# find all OSTs and MDTs
for i in /proc/fs/lustre/ost/* /proc/fs/lustre/mdt/*; do
- target=`basename $i`
+ target=$(basename $i)
if [ "$target" == "num_refs" ]; then
continue;
fi
for j in ${i}/*; do
- srv=`basename $j`
+ srv=$(basename $j)
if [ "$srv" == "uuid" ]; then
continue;
fi
# find all LDLM services
for i in /proc/fs/lustre/ldlm/services/*; do
- srv=`basename $i`
+ srv=$(basename $i)
run_collector "service" service_collector ${i}/stats "ldlm" $srv &
done
local target=$2
local srv=$3
- echo "client stats for ${target}/${srv} " `date`
+ echo "client stats for ${target}/${srv} " $(date)
# clear old stats
echo 0 >$file
return;
fi
- # find all osc
+ # find all osc
for i in /proc/fs/lustre/osc/* ; do
- target=`basename $i`
+ local target=$(basename $i)
if [ "$target" == "num_refs" ]; then
continue;
fi
for j in ${i}/*; do
- stats=`basename $j`
+ local stats=$(basename $j)
if [ "$stats" == "stats" -o "$stats" == "rpc_stats" ]; then
run_collector "osc-${stats}" client_collector \
${j} $target $stats &
done
# find all llite stats
for i in /proc/fs/lustre/llite/* ; do
- target=`basename $i`
+ target=$(basename $i)
for j in ${i}/*; do
- stats=`basename $j`
+ stats=$(basename $j)
if [ "$stats" == "stats" -o "$stats" == "vfs_ops_stats" ]; then
run_collector "llite-${stats}" client_collector \
${j} $target ${stats} &
function sdio_collector()
{
local obd=$1
- local uuid=`cat $obd/uuid`
- local tmp=`cat $obd/mntdev`
- local disk=`basename $tmp`
+ local uuid=$(cat $obd/uuid)
+ local tmp=$(cat $obd/mntdev)
+ local disk=$(basename $tmp)
local file="/proc/scsi/sd_iostats/${disk}"
- echo "sd_iostats for ${uuid}/${disk} " `date`
+ echo "sd_iostats for ${uuid}/${disk} " $(date)
# clear old stats
echo 0 >$file
# find all obdfilters and MDSs
for i in /proc/fs/lustre/obdfilter/* /proc/fs/lustre/mds/*; do
- obd=`basename $i`
+ local obd=$(basename $i)
if [ "$obd" == "num_refs" ]; then
continue;
fi
if [ ! -f ${i}/mntdev ]; then
continue;
fi
- tmp=`cat ${i}/mntdev`
- disk=`basename $tmp`
+ local tmp=$(cat ${i}/mntdev)
+ local disk=$(basename $tmp)
if [ ! -f /proc/scsi/sd_iostats/${disk} ]; then
continue;
fi
function mballoc_collector()
{
local obd=$1
- local uuid=`cat $obd/uuid`
- local tmp=`cat $obd/mntdev`
- local disk=`basename $tmp`
+ local uuid=$(cat $obd/uuid)
+ local tmp=$(cat $obd/mntdev)
+ local disk=$(basename $tmp)
local file="/proc/fs/ldiskfs*/${disk}/mb_history"
- echo "mballoc history for ${uuid}/${disk} " `date`
+ echo "mballoc history for ${uuid}/${disk} " $(date)
# log allocations only
for i in $file; do
# find all obdfilters and MDSs
for i in /proc/fs/lustre/obdfilter/* /proc/fs/lustre/mds/*; do
- obd=`basename $i`
+ obd=$(basename $i)
if [ "$obd" == "num_refs" ]; then
continue;
fi
if [ ! -f ${i}/mntdev ]; then
continue;
fi
- tmp=`cat ${i}/mntdev`
- disk=`basename $tmp`
+ tmp=$(cat ${i}/mntdev)
+ disk=$(basename $tmp)
if [ ! -f /proc/fs/ldiskfs*/${disk}/mb_history ]; then
continue;
fi
function io_collector()
{
local obd=$1
- local uuid=`cat $obd/uuid`
- local tmp=`cat $obd/mntdev`
- local disk=`basename $tmp`
+ local uuid=$(cat $obd/uuid)
+ local tmp=$(cat $obd/mntdev)
+ local disk=$(basename $tmp)
local file="/sys/block/${disk}/stat"
- echo "iostats for ${uuid}/${disk} " `date`
+ echo "iostats for ${uuid}/${disk} " $(date)
if let "IO_INTERVAL==0"; then
cat $file
# find all obdfilters and MDSs
for i in /proc/fs/lustre/obdfilter/* /proc/fs/lustre/mds/*; do
- obd=`basename $i`
+ local obd=$(basename $i)
if [ "$obd" == "num_refs" ]; then
continue;
fi
if [ ! -f ${i}/mntdev ]; then
continue;
fi
- tmp=`cat ${i}/mntdev`
- disk=`basename $tmp`
+ local tmp=$(cat ${i}/mntdev)
+ local disk=$(basename $tmp)
if [ ! -f /sys/block/${disk}/stat ]; then
continue;
fi
function jbd_collector()
{
local obd=$1
- local uuid=`cat $obd/uuid`
- local tmp=`cat $obd/mntdev`
- local disk=`basename $tmp`
+ local uuid=$(cat $obd/uuid)
+ local tmp=$(cat $obd/mntdev)
+ local disk=$(basename $tmp)
local file="/proc/fs/jbd/${disk}/history"
- echo "jbd history for ${uuid}/${disk} " `date`
+ echo "jbd history for ${uuid}/${disk} " $(date)
if let "JBD_INTERVAL==0"; then
idle_collector
# find all obdfilters and MDSs
for i in /proc/fs/lustre/obdfilter/* /proc/fs/lustre/mds/*; do
- obd=`basename $i`
+ local obd=$(basename $i)
if [ "$obd" == "num_refs" ]; then
continue;
fi
if [ ! -f ${i}/mntdev ]; then
continue;
fi
- tmp=`cat ${i}/mntdev`
- disk=`basename $tmp`
+ local tmp=$(cat ${i}/mntdev)
+ local disk=$(basename $tmp)
if [ ! -f /proc/fs/jbd/${disk}/history ]; then
continue;
fi
exit
fi
- PID=`cat $STATPIDS 2>/dev/null`
+ local PID=$(cat $STATPIDS 2>/dev/null)
if [ "x$PID" != "x" ]; then
for i in $PID; do
- i=`echo $i | sed 's/^[^:]*://'`
- TO=`cat ${STIMEPREFIX}$i`
- TN=`ps -p $i -o bsdstart=`
+ local i=$(echo $i | sed 's/^[^:]*://')
+ local TO=$(cat ${STIMEPREFIX}$i)
+ local TN=$(ps -p $i -o bsdstart=)
if [ "$TO" != "" -a "$TO" == "$TN" ]; then
echo "Some slave is already running by $i"
exit
exit
fi
- PID=`cat $STATPIDS 2>/dev/null`
+ local PID=$(cat $STATPIDS 2>/dev/null)
if [ "x$PID" != "x" ]; then
- pids2wait=""
+ local pids2wait=""
for i in $PID; do
- i=`echo $i | sed 's/^[^:]*://'`
- TO=`cat ${STIMEPREFIX}$i 2>/dev/null`
- TN=`ps -p $i -o bsdstart=`
+ local i=$(echo $i | sed 's/^[^:]*://')
+ local TO=$(cat ${STIMEPREFIX}$i 2>/dev/null)
+ local TN=$(ps -p $i -o bsdstart=)
if [ "$TO" == "" -o "$TO" != "$TN" ]; then
echo "No collector with $i found"
continue
done
#echo "XXX: wait collectors $pids2wait"
for i in $pids2wait; do
- TO=`cat ${STIMEPREFIX}$i 2>/dev/null`
- TN=`ps -p $i -o bsdstart=`
+ TO=$(cat ${STIMEPREFIX}$i 2>/dev/null)
+ TN=$(ps -p $i -o bsdstart=)
while [ "$TO" != "" -a "$TO" == "$TN" ]; do
sleep 1
- TN=`ps -p $i -o bsdstart=`
+ TN=$(ps -p $i -o bsdstart=)
done
done
fi
# current version is silly - just finds all *out* files in $TMP
ls_fetch()
{
- if [ "X${GLOBAL_TIMESTAMP}" = "X" ]
- then
- local date=`date +%F-%H.%M.%S`
+ if [ "X${GLOBAL_TIMESTAMP}" = "X" ]; then
+ local date=$(date +%F-%H.%M.%S)
else
- local date=${GLOBAL_TIMESTAMP}
+ date=${GLOBAL_TIMESTAMP}
fi
- local hostname=`hostname -s`
+ local hostname=$(hostname -s)
local name="stats-$hostname-$date"
stats=${OUTPREFIX}*
echo "No stats found"
fi
rm -rf ${TMP}/${name}*
-
}
#
%if %{with lustre_iokit}
%files -n lustre-iokit
%defattr(-, root, root)
+%{_bindir}/iokit-config
+%{_bindir}/iokit-gather-stats
+%{_bindir}/iokit-libecho
+%{_bindir}/iokit-lstats
+%{_bindir}/iokit-parse-ior
+%{_bindir}/iokit-plot-obdfilter
+%{_bindir}/iokit-plot-ost
+%{_bindir}/iokit-plot-sgpdd
%{_bindir}/ior-survey
-%{_bindir}/parse-ior
-%{_bindir}/libecho
+%{_bindir}/mds-survey
%{_bindir}/obdfilter-survey
-%{_bindir}/plot-obdfilter
-%{_bindir}/plot-ost
%{_bindir}/ost-survey
%{_bindir}/sgpdd-survey
-%{_bindir}/plot-sgpdd
-%{_bindir}/lstats.sh
-%{_bindir}/gather_stats_everywhere.sh
-%{_bindir}/config.sh
-%{_bindir}/mds-survey
-%doc lustre-iokit/obdfilter-survey/README.obdfilter-survey
%doc lustre-iokit/ior-survey/README.ior-survey
-%doc lustre-iokit/ost-survey/README.ost-survey
%doc lustre-iokit/mds-survey/README.mds-survey
+%doc lustre-iokit/obdfilter-survey/README.obdfilter-survey
+%doc lustre-iokit/ost-survey/README.ost-survey
%doc lustre-iokit/sgpdd-survey/README.sgpdd-survey
-%doc lustre-iokit/stats-collect/README.lstats.sh
+%doc lustre-iokit/stats-collect/README.iokit-lstats
%endif
%if %{defined rpm_post_base}
}
run_test 171 "test libcfs_debug_dumplog_thread stuck in do_exit() ======"
-# it would be good to share it with obdfilter-survey/libecho code
+# it would be good to share it with obdfilter-survey/iokit-libecho code
setup_obdecho_osc () {
local rc=0
local ost_nid=$1