From: cliffw <cliffw>
Date: Thu, 24 Aug 2006 20:34:46 +0000 (+0000)
Subject: b=10851
X-Git-Tag: 1.4.10~265
X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=3865c836940f9a4d838d76e21544fca6bcc5af83

b=10851

Includes new ost-survey script, fixes to obdfiler-survey from scjody, new echo.sh for obdfilter setup from eeb
---

diff --git a/lustre-iokit/obdfilter-survey/README b/lustre-iokit/obdfilter-survey/README
index 68c4726..9983999 100644
--- a/lustre-iokit/obdfilter-survey/README
+++ b/lustre-iokit/obdfilter-survey/README
@@ -52,9 +52,42 @@ The script must be customised according to the components under test and
 where it should keep its working files.  Customisation variables are
 described clearly at the start of the script.
 
-If you are driving obdfilter instances directly, set the shell array
+To run against a local disk:
+---------------------------
+
+- Create a Lustre configuraton shell script and XML using your normal
+methods
+	- You do not need to specify and MDS or LOV 
+	- List all OSTs that you wish to test
+
+- On all OSS machines:
+  # lconf --refomat <XML file> - Remember, write tests are
+destructive! This test should be run prior to startup of your actual
+Lustre filesystem. If that is the case, you will not need to reformat
+to restart Lustre - however, if the test is terminated before
+completion, you may have to remove objects from the disk. 
+
+- Determine the obdfilter instance names on all the clients, column 4
+of 'lctl dl'.  For example:
+
+# pdsh -w oss[01-02] lctl dl |grep obdfilter |sort
+oss01:   0 UP obdfilter oss01-sdb oss01-sdb_UUID 3
+oss01:   2 UP obdfilter oss01-sdd oss01-sdd_UUID 3
+oss02:   0 UP obdfilter oss02-sdi oss02-sdi_UUID 3
+...
+
+Here the obdfilter instance names are oss01-sdb, oss01-sdd, oss02-sdi.
+
+Since you are driving obdfilter instances directly, set the shell array
 variable 'ost_names' to the names of the obdfilter instances and leave
 'client_names' undefined.
+Example:
+
+ost_names_str='oss01:oss01-sdb oss01:oss01-sdd oss02:oss02-sdi' \
+   ./obdfilter-survey
+
+To run against a network:
+------------------------
 
 If you are driving obdfilter or obdecho instances over the network, you
 must instantiate the echo_clients yourself using lmc/lconf.  Set the shell
@@ -69,20 +102,59 @@ case, you need to ensure...
 (a) 'custom_remote_shell()' works on your cluster
 (b) all pathnames you specify in the script are mounted on the node you
     start the survey from and all the remote nodes.
+(c) obdfilter-survey must be installed on the clients, in the same
+ location as on the master node.
 
-Use 'lctl device_list' to verify the obdfilter/echo_client instance names
-e.g...
+- First, bring up obdecho instances on the servers and echo_client instances
+on the clients:
+   - run the included echo.sh on a node that has Lustre installed.  
+	 -shell variables:
+	 - SERVERS: Set this to a list of server hostnames, or `hostname` of
+	   the current node will be used.  This may be the wrong interface, so
+	   check it.  NOTE: echo.sh could probably be smarter about this...
+
+	   - NETS: set this if you are using a network type other than
+tcp.
+    - example: SERVERS=oss01-eth2 sh echo.sh
+
+- On the servers start the obdecho server and verify that it is up:
 
-[root@ns9 root]# lctl device_list
-  0 UP confobd conf_ost3 OSD_ost3_ns9_UUID 1
-  1 UP obdfilter ost3 ost3_UUID 1
-  2 UP ost OSS OSS_UUID 1
-  3 AT confobd conf_ost12 OSD_ost12_ns9_UUID 1
-[root@ns9 root]# 
+# lconf --node (hostname) /(path)/echo.xml
+# lctl dl
+  0 UP obdecho ost_oss01.local ost_oss01.local_UUID 3
+  1 UP ost OSS OSS_UUID 3
 
-...here device 1 is an instance of obdfilter called 'ost3'.  To exercise it
-directly, add 'ns9:ost3' to 'ost_names'.  If the script is only to be run
-on node 'ns9' you could simply add 'ost3' to 'ost_names'.
+- On the clients start the other side of the echo connection:
+
+# lconf --node client /(path)/echo.xml
+# lctl dl
+  0 UP osc OSC_xfer01.local_ost_oss01.local_ECHO_client 6bc9b_ECHO_client_2a8a2cb3dd 5
+  1 UP echo_client ECHO_client 6bc9b_ECHO_client_2a8a2cb3dd 3
+
+- verify connectivity from a client:
+ - lctl ping SERVER_NID
+
+- Run the script on the master node, specifying the client names in an
+environment variable
+
+Example:
+# client_names_str='xfer01:ECHO_client xfer02:ECHO_client
+xfer03:ECHO_client xfer04:ECHO_client xfer05:ECHO_client
+xfer06:ECHO_client xfer07:ECHO_client xfer08:ECHO_client
+xfer09:ECHO_client xfer10:ECHO_client xfer11:ECHO_client
+xfer12:ECHO_client' ./obdfilter-survey
+
+
+- When done: cleanup echo_client/obdecho instances:
+       - on clients: lconf --cleanup --node client /(path)/echo.xml
+       - on server(s): lconf --cleanup --node (hostname) /(path)/echo.xml
+
+- When aborting: killall vmstat on clients:
+
+pdsh -w (clients) killall vmstat
+
+Use 'lctl device_list' to verify the obdfilter/echo_client instance names
+e.g...
 
 When the script runs, it creates a number of working files and a pair of
 result files.  All files start with the prefix given by ${rslt}.
diff --git a/lustre-iokit/obdfilter-survey/echo.sh b/lustre-iokit/obdfilter-survey/echo.sh
new file mode 100755
index 0000000..822f061
--- /dev/null
+++ b/lustre-iokit/obdfilter-survey/echo.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+# This script will create a Lustre .xml configuration consisting
+# of echo client/servers for use with the obdfilter-survey test
+
+#######################################################################
+# Customisation variables
+#######################################################################
+
+config=$(basename $0 .sh).xml
+
+SERVERS=${SERVERS:-$(uname -n)}
+
+NETS=${NETS:-tcp}
+
+LMC=lmc
+VERBOSE=1
+BATCH=/tmp/lmc-batch.$$
+
+#######################################################################
+# some helpers: actual config below
+#######################################################################
+
+h2elan () {
+     echo $1 | sed 's/[^0-9]*//g'
+}
+
+_LMC="${LMC} -m $config"
+
+_lmc () {
+     if [ $VERBOSE ]; then echo "$@"; fi
+     if [ -n "$BATCH" ]; then
+	echo "$@" >> $BATCH
+     else
+	$_LMC "$@"
+     fi
+}
+
+config_end () {
+     [ -n "$BATCH" ] && $_LMC --batch $BATCH
+     cleanup
+}
+
+cleanup () {
+     [ -n "$BATCH" ] && rm -f $BATCH
+}
+
+ABORT_ON="ERR QUIT INT HUP"
+
+abort () {
+     trap - EXIT $ABORT_ON
+     echo "Error/Interrupt creating $config"
+     cleanup
+     exit 1
+}
+
+trap config_end EXIT
+trap abort      $ABORT_ON
+
+[ -f $config ] && rm $config
+
+####################################################################
+# the actual config
+####################################################################
+
+# client net
+_lmc --node client --add net --nettype lnet --nid '*'
+
+for srv in $SERVERS; do
+     for net in $NETS; do
+	case $net in
+	    elan*) nid=`h2elan $srv`;;
+	    gm*)   nid=`gmnalnid -n $srv`;;
+	    *)     nid=$srv;;
+	esac
+	_lmc --node $srv --add net --nettype lnet --nid ${nid}@${net}
+     done
+
+     _lmc --node $srv --add ost --ost ost_$srv --osdtype=obdecho
+
+     _lmc --node client --add echo_client --ost ost_$srv
+done
diff --git a/lustre-iokit/obdfilter-survey/obdfilter-survey b/lustre-iokit/obdfilter-survey/obdfilter-survey
index 4f89913..588e883 100755
--- a/lustre-iokit/obdfilter-survey/obdfilter-survey
+++ b/lustre-iokit/obdfilter-survey/obdfilter-survey
@@ -41,7 +41,7 @@ fi
 rslt=${rslt:-"/tmp/obdfilter_survey_`date +%F@%R`_`uname -n`"}
 
 # lustre root (if running with own source tree)
-lustre_root=${lustre_root:-"/home_nfs/eeb/lustre"}
+# lustre_root=${lustre_root:-"/my/directory/lustre"}
 
 # what tests to run (first must be write)
 tests_str=${tests_str:-""}
@@ -57,6 +57,9 @@ else
     tests=(write rewrite read)
 fi
 
+# Set this true to check file contents
+verify=0
+
 # total size (MBytes) per obd instance
 # large enough to avoid cache effects 
 # and to make test startup/shutdown overhead insignificant
@@ -118,8 +121,7 @@ rmmod="/sbin/rmmod"
 
 # lctl::test_brw bandwidth snapshot interval (seconds)
 snap=1
-# check file contents?
-verify=1
+
 
 if [ ${#tests[@]} -eq 0 -o "${tests[0]}" != "write" ]; then
     echo "tests: ${tests[@]}"
@@ -142,7 +144,7 @@ if [ "$lustre_root" == " " ]; then
 fi
 
 if [ -z "$lustre_root" ]; then
-    lctl=$(which lctl)
+    lctl=lctl
 else
     lctl=${lustre_root}/utils/lctl
 fi
@@ -205,7 +207,7 @@ get_ec_devno () {
 	return
     fi
     if [ -z "$ost_name" ]; then
-	echo "no echo client and ost_name not set" 1>&2
+	echo "no echo client and ost_name not set, client: $client_name, host: $host" 1>&2
 	return
     fi
     ost=`get_devno $host obdfilter $ost_name`
@@ -497,6 +499,8 @@ for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do
                 t0=`date +%s.%N`
                 pidcount=0
                 for host in ${unique_hosts[@]}; do
+		    # brutal hack to deal with a non-shared /tmp
+		    scp -q ${cmdsf}_${host} ${host}:/tmp > /dev/null
                     remote_shell $host bash ${cmdsf}_${host} &
                     pidarray[$pidcount]=$!
                     pidcount=$((pidcount+1))
@@ -522,6 +526,8 @@ for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do
 		    client_name="${host_names[$idx]}:${client_names[$idx]}"
 		    tmpfi="${tmpf}_$idx"
 		    echo "=============> $test $client_name" >> $workf
+		    host="${host_names[$idx]}"
+		    scp -q ${host}:tmpfi $tmpfi > /dev/null
 		    cat $tmpfi >> $workf
 		    get_stats $tmpfi >> $tmpf
 		    rm $tmpfi
diff --git a/lustre-iokit/ost-survey/README b/lustre-iokit/ost-survey/README
new file mode 100644
index 0000000..7fa66db
--- /dev/null
+++ b/lustre-iokit/ost-survey/README
@@ -0,0 +1,45 @@
+This script is to be used to test the performance of the
+individual OSTs in a Lustre filesystem.
+
+Goal: 
+	- Generate a fixed amount of IO per OST
+	- Identify the fastest and slowest OST
+	
+Parameters
+	- Size of per-OST IO
+
+Plan
+
+	Identify number of OSTs in system
+	For each OST
+	- create a directory
+	- use lfs setstripe to restrict IO to that directory
+	- run some form of IO 
+	
+Assumptions
+	- Lustre filesystem is up and running
+	- Script is being run on a client
+
+
+Steps
+
+	0. Check pre-requisites
+		- sgp_dd
+	1. Identify the number of OSTs in the system
+		- store value
+
+	2. Collect input parameters
+		- store value
+
+	3. Set up array to hold output
+	
+	4. Loop over all OSTs
+		- Create a directory
+		- Set striping on that directory
+		- Run some IO and time
+		- Resolve MB/s
+		- Store in array
+
+	5. Determine max an min for the array
+
+	
diff --git a/lustre-iokit/ost-survey/ost-survey.sh b/lustre-iokit/ost-survey/ost-survey.sh
new file mode 100755
index 0000000..73a3b05
--- /dev/null
+++ b/lustre-iokit/ost-survey/ost-survey.sh
@@ -0,0 +1,194 @@
+#!/bin/bash
+
+# This script is to be run on a client machine and will test all the 
+# OSTs to determine which is the fastest and slowest
+# The current test method 
+# Create a directory for each OST
+# Use 'lfs setstripe' to set the Lustre striping such that IO goes to 
+# only one OST
+# Use 'dd' to write a file of a specified size
+# Use 'dd' to read a file of a specified size
+# Compute the average 
+# Find the slowest OST
+
+
+declare -a rTime=()          # Time to read some data
+declare -a wTime=()          # Time to write some data
+declare -a rMBs=()           # Read speed
+declare -a wMBs=()           # Write speed
+
+# GLOBALS
+OSTS=0                       # Number of OSTS we will loop over
+OFILE=testdummy              #  File name to use
+BSIZE=1024                   # size of blocks to be written 
+MNT=''                       # Location of Lustre file system
+DIR="tmpdir"                 # Name used to create a series of tmp directories
+VERBOSE=1                    # Set this to get verbose output ( TODO - use getopts? )
+
+# Usage
+if [ $# -ne 2 ]; then
+   echo "Usage: $0 <size of test file in KB> <Lustre directory>"
+   exit 1
+fi
+
+
+test_preq () {
+    # check for a mounted Lustre filesystem
+    MNT=`grep lustre /proc/mounts | awk '{print $2}'`
+    if [ -z $MNT ]; then
+	echo "Mounted Lustre filesystem not found"
+	exit 1
+    fi
+    
+    # Check for Lustre utilites in PATH
+    # Check for dd
+}
+
+ost_count () {
+  # We assume that all devices with 'osc' in the string are OSTs
+  OSTS=`lctl dl | grep -c osc`
+}
+
+make_dummy () {
+# Create a file full of zeros
+    echo "make dummy"
+    local DIR=$1
+    local SIZE=$2
+    mkdir -p $MNT/$DIR
+    dd if=/dev/zero of=$MNT/$DIR/$OFILE count=$SIZE bs=$BSIZE 2> /dev/null
+    
+}
+
+output_all_data () {
+    echo "$OSTS OST devices found"
+    local CNT=0
+    while [ $CNT -lt $OSTS ]; do
+	echo "Ost index $CNT Read speed ${rMBs[$CNT]} Write speed ${wMBs[$CNT]}"
+	echo "Ost index $CNT Read time ${rTime[$CNT]} Write time ${wTime[$CNT]}"
+	CNT=$(( $CNT + 1 ))
+    done
+}
+run_test () {
+    local DIR=$1
+    local SIZE=$2
+    local INX=$3
+    local ACTION=$4
+    
+    if [ ! -f $MNT/$DIR/$OFILE ] && [ $ACTION == 'read' ]; then
+	make_dummy $DIR $SIZE
+    fi
+
+    t0=`date +%s.%N`
+    if [ $ACTION == 'read' ]; then 
+	OUTS=`dd if=$MNT/$DIR/$OFILE of=/dev/null count=$SIZE bs=$BSIZE 2> /dev/null`
+    elif [ $ACTION == 'write' ]; then 
+	OUTS=`dd of=$MNT/$DIR/$OFILE if=/dev/zero count=$SIZE bs=$BSIZE 2> /dev/null`
+    else
+	echo "Action not read||write"
+	exit 1
+    fi
+    t1=`date +%s.%N`
+
+    tdelta=`awk "BEGIN {printf \"%7.2f\", $t1 - $t0; exit}"`
+    sdelta=$(( $SIZE * $BSIZE ))
+    delta=`awk "BEGIN {printf \"%7.2f\", ($SIZE * $BSIZE / ( $t1 - $t0 )) / ( 1024 * 1024 ) ; exit}"`
+    
+    if [ $ACTION == 'read' ]; then 
+	rTime[$INX]=$tdelta
+	rMBs[$INX]=$delta
+    else 
+	wTime[$INX]=$tdelta
+	wMBs[$INX]=$delta
+    fi
+}
+
+display_average () {
+    local CNT=0
+    local OP=$1
+    while [ $CNT -lt $OSTS ]; do
+	if [ $OP == "read" ]; then
+	    echo "${rMBs[$CNT]} $OP"
+	elif [ $OP == "write" ]; then
+	    echo "${wMBs[$CNT]} $OP"
+	else
+	    echo "Bad param"
+            exit 1
+	fi
+	CNT=$(( $CNT + 1 ))
+    done |  awk '{ c++; t+= $1; op = $2 }; END { printf "Average %s Speed: %7.2f\n", op, t/c }'
+
+}
+
+find_min () {
+    local CNT=0
+    local OP=$1
+    while [ $CNT -lt $OSTS ]; do
+	if [ $OP == "read" ]; then
+	    echo "${rMBs[$CNT]} $CNT $OP"
+	elif [ $OP == "write" ]; then 
+	    echo "${wMBs[$CNT]} $CNT $OP"
+	else
+	    echo "Bad param"
+            exit 1
+	fi
+	    CNT=$(( $CNT + 1 ))
+    done | awk '{
+	if (NR == 1) { min = $1; indx = $2; op = $3 } 
+	else if (min > $1){  min = $1; indx = $ 2; op = $3}
+    } 
+    END {printf "%s - Worst OST indx %d %7.2f MB/s\n", op, indx, min}'
+}
+
+find_max () {
+    local CNT=0
+    local OP=$1
+    while [ $CNT -lt $OSTS ]; do
+	if [ $OP == "read" ]; then
+	    echo "${rMBs[$CNT]} $CNT $OP"
+	elif [ $OP == "write" ]; then 
+	    echo "${wMBs[$CNT]} $CNT $OP"
+	else
+	    echo "Bad param"
+            exit 1
+	fi
+	    CNT=$(( $CNT + 1 ))
+    done | awk '{
+	if (NR == 1) { max = $1; indx = $2; op = $3 } 
+	else if (max < $1){  max = $1; indx = $ 2; op = $3 }
+    } 
+    END {printf "%s - Best OST indx %d %7.2f MB/s\n", op, indx, max}'
+}
+# Temp cleanup
+
+CNT=0
+MYSIZE=1024
+
+test_preq
+ost_count
+
+while [ $CNT -lt $OSTS ]; do
+    rm -rf $MNT/${DIR}${CNT}
+    mkdir -p $MNT/${DIR}${CNT}
+    lfs setstripe $MNT/${DIR}${CNT} 0 $CNT 1
+    run_test ${DIR}${CNT} $MYSIZE $CNT write
+    run_test ${DIR}${CNT} $MYSIZE $CNT read
+    CNT=$(( $CNT + 1 ))
+done
+
+MAX_MB=0
+MIN_T=999999999
+
+display_average read
+display_average write
+find_min read
+find_min write
+find_max read
+find_max write
+
+CNT=0
+
+
+if [ $VERBOSE ]; then
+    output_all_data
+fi
+