From 2471d35c0e0eb869640509ad70b02c891f74aefc Mon Sep 17 00:00:00 2001
From: Chris Horn <chris.horn@hpe.com>
Date: Tue, 4 Oct 2022 05:05:15 -0500
Subject: [PATCH] LU-16217 iokit: Add lst.sh wrapper and lst-survey

lst.sh is a wrapper around the LNet selftest (lst) utility. It
provides a streamlined interface for executing read, write, combined
read/write and ping lst tests.

lst-survey leverages lst.sh to test the performance of groups of LNet
peers against each other.

HPE-bug-id: LUS-10279
Test-Parameters: trivial
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Change-Id: I4c2593df1289b0b97760cb402de1e101ca22c319
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/48799
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alexey Lyashkov <alexey.lyashkov@hpe.com>
Reviewed-by: Alexander Zarochentsev <alexander.zarochentsev@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
---
 config/lustre-build.m4                    |   1 +
 lustre-iokit/Makefile.am                  |   2 +-
 lustre-iokit/lst-survey/Makefile.am       |   3 +
 lustre-iokit/lst-survey/README.lst-survey | 178 +++++++++++++
 lustre-iokit/lst-survey/lst-survey        | 365 ++++++++++++++++++++++++++
 lustre-iokit/lst-survey/lst.sh            | 410 ++++++++++++++++++++++++++++++
 lustre.spec.in                            |   6 +
 7 files changed, 964 insertions(+), 1 deletion(-)
 create mode 100644 lustre-iokit/lst-survey/Makefile.am
 create mode 100644 lustre-iokit/lst-survey/README.lst-survey
 create mode 100755 lustre-iokit/lst-survey/lst-survey
 create mode 100755 lustre-iokit/lst-survey/lst.sh
diff --git a/config/lustre-build.m4 b/config/lustre-build.m4
index 412abf8..9fc78586 100644
--- a/config/lustre-build.m4
+++ b/config/lustre-build.m4
@@ -485,6 +485,7 @@ AC_DEFUN([LB_CONFIG_FILES], [
 		lustre-iokit/mds-survey/Makefile
 		lustre-iokit/ior-survey/Makefile
 		lustre-iokit/stats-collect/Makefile
+		lustre-iokit/lst-survey/Makefile
 	)
 ])
 
diff --git a/lustre-iokit/Makefile.am b/lustre-iokit/Makefile.am
index 83dcb47..1803399 100644
--- a/lustre-iokit/Makefile.am
+++ b/lustre-iokit/Makefile.am
@@ -1,2 +1,2 @@
 SUBDIRS = obdfilter-survey sgpdd-survey ost-survey ior-survey
-SUBDIRS += mds-survey stats-collect
+SUBDIRS += mds-survey stats-collect lst-survey
diff --git a/lustre-iokit/lst-survey/Makefile.am b/lustre-iokit/lst-survey/Makefile.am
new file mode 100644
index 0000000..652e29e
--- /dev/null
+++ b/lustre-iokit/lst-survey/Makefile.am
@@ -0,0 +1,3 @@
+bin_SCRIPTS = lst.sh lst-survey
+CLEANFILE = $(bin_SCRIPTS)
+EXTRA_DIST = lst.sh lst-survey README.lst-survey
diff --git a/lustre-iokit/lst-survey/README.lst-survey b/lustre-iokit/lst-survey/README.lst-survey
new file mode 100644
index 0000000..a94f94c
--- /dev/null
+++ b/lustre-iokit/lst-survey/README.lst-survey
@@ -0,0 +1,178 @@
+Overview
+--------
+
+This survey script performs a series of LNet selftest (LST) benchmarks between
+groups of LNet peers. It can be used to characterize the performance of the LNet
+interface(s) on Lustre servers, Lustre clients, or LNet routers.
+
+The LST client group is defined using the '-f' flag, and the LST server group
+is defined using the '-t' flag. Both of these flags take a space-separated or
+comma-separated list of LNet NIDs. The '-M' and '-N' options can be used to
+divide the client or server group into multiple smaller groups.
+For example, given 16 clients and 8 servers, '-M 8' and '-N 2' would create
+two client groups with eight peers in each group, and four servers groups with
+two peers in each group. Every server group is tested against every client
+group, so this would result in 4*2=8 test iterations.
+
+By default, each test iterations performs 4k read and write, 1m read and write,
+and ping LST benchmarks.
+
+A directory is created in the current working directory to store results.
+The csv output is written to a results.<timestamp>.csv file and the full
+lst.sh output is stored in an lst.<timestamp>.out file. An alternative output
+directory can be specified with the '-O' argument.
+
+Various options exist to customize the benchmarks that are run. See
+'lst-survey -h' for more information.
+
+A note on interpreting the results:
+By default, lst-survey displays bandwidth and rate statistics for peers in the
+server group as reported by the LST utility.
+These statistics reported by LST can be confusing because a "read" test will
+typically report read bandwidth that is lower than write bandwidth, and a
+"write" test will typically report write bandwidth that is lower than read
+bandwidth. This is because a "read" test involves peers in the client group
+setting up a sink that is then written to by peers in the server group, and a
+"write" test involves the clients setting up a source that is then read by the
+servers. Thus, the read test is really measuring the write performance of the
+servers and the write test is really measuing the read performance of the
+servers.
+
+The '-g clients' option can be used to instead report the client bandwidth and
+rate statistics. In this case, the reported stats will align with the benchmarks
+in the expected manner.
+
+Example 1: Default options
+# pdsh -w n0[0-3] lctl list_nids | dshbak -c
+----------------
+n00
+----------------
+172.18.2.5@tcp
+----------------
+n01
+----------------
+172.18.2.6@tcp
+----------------
+n02
+----------------
+172.18.2.7@tcp
+----------------
+n03
+----------------
+172.18.2.8@tcp
+# ./lst-survey -t 172.18.2.5@tcp,172.18.2.6@tcp -f 172.18.2.7@tcp,172.18.2.8@tcp
+CSV results: /tmp/lst_survey.1666207637/results.1666207637.csv
+LST output: /tmp/lst-survey/lst_survey.1666207637/lst.1666207637.out
+
+Commence lst-survey - Wed 19 Oct 2022 01:27:17 PM MDT
+Server Group: 172.18.2.5@tcp 172.18.2.6@tcp
+Client Group: 172.18.2.7@tcp 172.18.2.8@tcp
+
+          Mode       Read MB/s       Read RPC/s      Write MB/S      Write RPC/s
+       read 4k              22           149981             608           299961
+       read 1m               2            14405           14405            28808
+      write 4k             489           241229              18           241229
+      write 1m           11463            22924               1            22924
+          ping              25           167928              25           167928
+
+Finished lst-survey - Wed 19 Oct 2022 01:28:08 PM MDT
+# cat /tmp/lst_survey.1666207637/results.1666207637.csv
+Servers,Clients,Mode,Read_BW,Read_Rate,Write_BW,Write_Rate,Server_Errors,Client_Errors
+172.18.2.5@tcp 172.18.2.6@tcp,172.18.2.7@tcp 172.18.2.8@tcp,read_4k,22,149981,608,299961,0,0
+172.18.2.5@tcp 172.18.2.6@tcp,172.18.2.7@tcp 172.18.2.8@tcp,read_1m,2,14405,14405,28808,0,0
+172.18.2.5@tcp 172.18.2.6@tcp,172.18.2.7@tcp 172.18.2.8@tcp,write_4k,489,241229,18,241229,0,0
+172.18.2.5@tcp 172.18.2.6@tcp,172.18.2.7@tcp 172.18.2.8@tcp,write_1m,11463,22924,1,22924,0,0
+172.18.2.5@tcp 172.18.2.6@tcp,172.18.2.7@tcp 172.18.2.8@tcp,ping,25,167928,25,167928,0,0
+#
+
+Example 2: Divide the servers into groups of size 1
+
+# ./lst-survey -t 172.18.2.5@tcp,172.18.2.6@tcp -f 172.18.2.7@tcp,172.18.2.8@tcp -N 1
+CSV results: /tmp/lst_survey.1666207844/results.1666207844.csv
+LST output: /tmp/lst_survey.1666207844/lst.1666207844.out
+
+Commence lst-survey - Wed 19 Oct 2022 01:30:44 PM MDT
+Server Group: 172.18.2.5@tcp
+Client Group: 172.18.2.7@tcp 172.18.2.8@tcp
+
+          Mode       Read MB/s       Read RPC/s      Write MB/S      Write RPC/s
+       read 4k              25           167068             678           334135
+       read 1m               2            16186           16186            32366
+      write 4k             512           252613              19           252612
+      write 1m           11353            22706               1            22704
+          ping              29           192358              29           192358
+
+Finished lst-survey - Wed 19 Oct 2022 01:31:34 PM MDT
+
+Commence lst-survey - Wed 19 Oct 2022 01:31:34 PM MDT
+Server Group: 172.18.2.6@tcp
+Client Group: 172.18.2.7@tcp 172.18.2.8@tcp
+
+          Mode       Read MB/s       Read RPC/s      Write MB/S      Write RPC/s
+       read 4k              22           144821             587           289642
+       read 1m               2            16841           16843            33681
+      write 4k             498           245552              18           245552
+      write 1m           11611            23219               1            23217
+          ping              22           145374              22           145374
+
+Finished lst-survey - Wed 19 Oct 2022 01:32:25 PM MDT
+#
+
+Example 3: Divide the servers and clients into groups of size 1
+
+# ./lst-survey -t 172.18.2.5@tcp,172.18.2.6@tcp -f 172.18.2.7@tcp,172.18.2.8@tcp -N 1 -M 1
+CSV results: /tmp/lst_survey.1666208473/results.1666208473.csv
+LST output: /tmp/lst_survey.1666208473/lst.1666208473.out
+
+Commence lst-survey - Wed 19 Oct 2022 01:41:13 PM MDT
+Server Group: 172.18.2.5@tcp
+Client Group: 172.18.2.7@tcp
+
+          Mode       Read MB/s       Read RPC/s      Write MB/S      Write RPC/s
+       read 4k              11            75112             304           150224
+       read 1m               1             8808            8809            17616
+      write 4k             240           118402               9           118402
+      write 1m            6561            13119               1            13118
+          ping              13            90402              13            90402
+
+Finished lst-survey - Wed 19 Oct 2022 01:42:03 PM MDT
+
+Commence lst-survey - Wed 19 Oct 2022 01:42:03 PM MDT
+Server Group: 172.18.2.5@tcp
+Client Group: 172.18.2.8@tcp
+
+          Mode       Read MB/s       Read RPC/s      Write MB/S      Write RPC/s
+       read 4k              13            90017             365           180034
+       read 1m               1             7333            7328            14655
+      write 4k             280           138173              10           138173
+      write 1m            8694            17388               1            17388
+          ping              15            98316              15            98316
+
+Finished lst-survey - Wed 19 Oct 2022 01:42:53 PM MDT
+
+Commence lst-survey - Wed 19 Oct 2022 01:42:53 PM MDT
+Server Group: 172.18.2.6@tcp
+Client Group: 172.18.2.7@tcp
+
+          Mode       Read MB/s       Read RPC/s      Write MB/S      Write RPC/s
+       read 4k               9            64613             262           129225
+       read 1m               1             9101            9101            18201
+      write 4k             212           104575               7           104573
+      write 1m            6769            13537               1            13539
+          ping              10            71612              10            71612
+
+Finished lst-survey - Wed 19 Oct 2022 01:43:44 PM MDT
+
+Commence lst-survey - Wed 19 Oct 2022 01:43:44 PM MDT
+Server Group: 172.18.2.6@tcp
+Client Group: 172.18.2.8@tcp
+
+          Mode       Read MB/s       Read RPC/s      Write MB/S      Write RPC/s
+       read 4k              12            83144             337           166287
+       read 1m               1             7582            7584            15166
+      write 4k             293           144601              11           144602
+      write 1m            8913            17824               1            17825
+          ping              11            78409              11            78409
+
+Finished lst-survey - Wed 19 Oct 2022 01:44:35 PM MDT
+#
diff --git a/lustre-iokit/lst-survey/lst-survey b/lustre-iokit/lst-survey/lst-survey
new file mode 100755
index 0000000..7a1c1fb
--- /dev/null
+++ b/lustre-iokit/lst-survey/lst-survey
@@ -0,0 +1,365 @@
+#!/bin/bash
+
+print_help() {
+	cat <<EOF
+Usage:
+${0##*/} -f "nid1[ nid2...]" -t "nidA[ nidB...]" [options]
+or
+${0##*/} -H -f "host1[ host2...]" -t "hostA[ hostB...]" [options]
+
+Options:
+	-c concurrency
+	   The number of requests that are active at one time. Default is 64.
+	-d
+	   Debug mode. Outputs the generated lst.sh commands, but does not
+	   execute them.
+	-e
+	   Lists the number of failed RPCs on test nodes in the current session.
+	-D delay
+	   The interval of the statistics (in seconds). Default is $STAT_DELAY.
+	-h
+	   Display this help.
+	-H
+	   Run in "host mode". Host mode indicates that the arguments to '-t'
+	   and '-f' flags are hostnames rather than LNet nids.
+	-f "nid1[ nid2...]"
+	   Space-separated list of LNet NIDs to place in the "clients" group.
+	   When '-H' flag is specified, the '-f' argument is a space-separated
+	   list of hostnames.
+	-g servers|clients
+	   Report stats from the specified group. Either 'clients' or
+	   'servers'. Default is 'servers'.
+	-m read|write|rw|ping<,read|write|rw|ping<,...>>
+	   Execute the specified list of tests. Default is ${MODE_LIST// /, }.
+	-M group_size
+	   Subdivide the client group (-f) into multiple groups of the
+	   specified size. Every client group is tested against every server
+	   group (see -t and -N).
+	-n count
+	   The number of stat RPCs to issue. Default is $STAT_COUNT.
+	-N group_size
+	   Subdivide the server group (-t) into multiple groups of the
+	   specified size. Every server group is tested against every client
+	   group (see -f and -M).
+	-O output_dir
+	   Create output files in specified directory.
+	   Default is PWD/lst_survey.<timestamp>
+	-t "nid1[ nid2...]"
+	   Space-separated list of LNet NIDs to place in the "servers" group.
+	   When '-H' flag is specified, the '-t' argument is a space-separated
+	   list of hostnames.
+	-s bulksize1<,bulksize2<,...>>
+	   For each read, write, or combined read-write test, execute the test
+	   with the specified bulk sizes. Default is 4k and 1m.
+	-S separator
+	   Use the specified character to separate fields in the .csv output
+	   file. Default is ','.
+	-v
+	   Prints additional output. e.g. LST parameters, group construction,
+	   etc.
+EOF
+	exit
+}
+
+verbose() {
+	${VERBOSE} && echo "$@"
+}
+
+SERVERS=""
+CLIENTS=""
+CONCURRENCY=64
+HOST_MODE=false
+LST_DEBUG=false
+MODE_LIST="read write ping"
+C_GRP_SIZE=""
+S_GRP_SIZE=""
+SEP=','
+SIZE_LIST="4k 1m"
+SHOW_ERRORS=false
+STAT_COUNT=3
+STAT_DELAY=3
+STAT_GROUP="servers"
+TS=$(date +%s)
+TEST_DIR=$PWD/lst_survey.${TS}
+VERBOSE=false
+while getopts "c:dD:e:Hhf:g:m:M:n:N:O:t:s:S:v" flag ; do
+	case $flag in
+		c) CONCURRENCY="$OPTARG";;
+		d) LST_DEBUG=true;;
+		D) STAT_DELAY="$OPTARG";;
+		e) SHOW_ERRORS=true;;
+		H) HOST_MODE=true;;
+		h) print_help;;
+		f) CLIENTS="$OPTARG";;
+		g) STAT_GROUP="$OPTARG";;
+		m) MODE_LIST="$OPTARG";;
+		M) C_GRP_SIZE="$OPTARG";;
+		n) STAT_COUNT="$OPTARG";;
+		N) S_GRP_SIZE="$OPTARG";;
+		O) TEST_DIR="$OPTARG";;
+		t) SERVERS="$OPTARG";;
+		s) SIZE_LIST="$OPTARG";;
+		S) SEP="${OPTARG}";;
+		v) VERBOSE=true;;
+		*) echo "Unrecognized option '-$flag'"
+		   exit 1;;
+	esac
+done
+
+LSTSH=${LSTSH:-$(dirname "$0")/lst.sh}
+if ! [[ -f $LSTSH ]]; then
+	LSTSH=$(which lst.sh 2>/dev/null)
+fi
+
+if ! [[ -f $LSTSH ]]; then
+	echo "Cannot find lst.sh script at $LSTSH"
+	exit 1
+fi
+
+if [[ -z $CLIENTS ]]; then
+	echo "Must specify \"clients\" group (-f)"
+	exit 1
+elif [[ -z $SERVERS ]]; then
+	echo "Must specify \"servers\" group (-t)"
+	exit 1
+fi
+
+IFS=" " read -r -a CLIENTS <<< "${CLIENTS//,/ }"
+[[ -z $C_GRP_SIZE ]] &&
+	C_GRP_SIZE=${#CLIENTS[@]}
+
+IFS=" " read -r -a SERVERS <<< "${SERVERS//,/ }"
+[[ -z $S_GRP_SIZE ]] &&
+	S_GRP_SIZE=${#SERVERS[@]}
+
+if [[ $STAT_COUNT -lt 1 ]]; then
+	echo "Stat count must be > 0 (-n count)"
+	exit 1
+elif [[ $C_GRP_SIZE -lt 1 ]]; then
+	echo "Client group size must be > 0 (-M group_size)"
+	exit 1
+elif [[ $C_GRP_SIZE -gt ${#CLIENTS[@]} ]]; then
+	echo "Specified client group size (-M $C_GRP_SIZE) cannot be larger than number of clients specified with -f (${#CLIENTS[@]})"
+	exit 1
+elif [[ $S_GRP_SIZE -lt 1 ]]; then
+	echo "Server group size must be > 0 (-N group_size)"
+	exit 1
+elif [[ $S_GRP_SIZE -gt ${#SERVERS[@]} ]]; then
+	echo "Specified server group size (-M $S_GRP_SIZE) cannot be larger than number of servers specified with -t (${#SERVERS[@]})"
+	exit 1
+elif ! [[ $STAT_GROUP =~ ^(servers|clients)$ ]]; then
+	echo "Invalid stat group $STAT_GROUP (-g servers|clients)"
+	exit 1
+elif [[ -z $MODE_LIST ]]; then
+	echo "Empty mode list (-m read|write|rw|ping)"
+	exit 1
+elif [[ -z $SIZE_LIST ]]; then
+	echo "Empty bulk size list (-s 1024|4k|1m)"
+	exit 1
+fi
+
+for m in $MODE_LIST; do
+	if ! [[ $m =~ (read|write|rw|ping) ]]; then
+		echo "Invalid mode \"$m\" specified (-m read|write|rw|ping)"
+		exit 1
+	fi
+done
+
+if ! mkdir -p "${TEST_DIR}" ; then
+	echo "Failed to create results directory at \"${TEST_DIR}\" rc=$?"
+	exit 1
+fi
+OUTFILE=${TEST_DIR}/results.${TS}.csv
+
+LST_OPTIONS="-c $CONCURRENCY -n $STAT_COUNT -D $STAT_DELAY -e -S \"bw rate\""
+LST_OPTIONS+=" -g ${STAT_GROUP} -e"
+if ${HOST_MODE}; then
+	LST_OPTIONS+=" -H"
+fi
+
+print_results() {
+	local mode="$1"
+	local size="$2"
+
+	if ${LST_DEBUG}; then
+		return
+	fi
+
+	[[ $mode != ping ]] &&
+		mode="${mode}_${size}"
+
+	{
+		echo -n "${SEP}${mode}"
+		echo -n "${SEP}${RD_BW_AVG}${SEP}${RD_RATE_AVG}"
+		echo -n "${SEP}${W_BW_AVG}${SEP}${W_RATE_AVG}"
+		echo "${SEP}${SERVER_ERRORS}${SEP}${CLIENT_ERRORS}"
+	}>>"${OUTFILE}"
+
+	printf "%14s  %14s  %15s  %14s  %15s\n" \
+		"${mode}" "${RD_BW_AVG}" "${RD_RATE_AVG}" "${W_BW_AVG}" \
+		"${W_RATE_AVG}"
+}
+
+SERVER_ERRORS=0
+CLIENT_ERRORS=0
+RD_RATE_AVG=0
+W_RATE_AVG=0
+RD_BW_AVG=0
+W_BW_AVG=0
+do_lst() {
+	local mode="$1"
+	shift
+	local lst_args="$*"
+
+	RD_RATE_AVG=0
+	W_RATE_AVG=0
+	RD_BW_AVG=0
+	W_BW_AVG=0
+
+	declare -a vals
+
+	if ${LST_DEBUG}; then
+		echo "$LSTSH ${lst_args}"
+		return
+	fi
+	IFS=" " read -r -a vals <<< "$(eval "$LSTSH" "${lst_args}" 2>&1 |
+				       tee -a "${TEST_DIR}"/lst."${TS}".out |
+				       awk '/^\[(R|W)\]/{print $3};
+				            /error nodes in/{print $2}' |
+				       xargs echo)"
+
+	# Each stat RPC generates 4 lines of output, and we have two lines for
+	# the error counts
+	local expect=$((2 + STAT_COUNT * 4))
+
+	if [[ ${#vals[@]} -ne $expect ]]; then
+		echo
+		echo "Error: Failed to get all samples. Expect $expect, found ${#vals[@]}"
+		exit
+	fi
+
+	local i rd_rate w_rate rd_bw w_bw
+	for ((i = 0; i < $((expect - 4)); i+=4)); do
+		rd_rate=${vals[i]}
+		w_rate=${vals[i+1]}
+		rd_bw=${vals[i+2]}
+		w_bw=${vals[i+3]}
+
+		RD_RATE_AVG="${RD_RATE_AVG:+$RD_RATE_AVG +} $rd_rate"
+		W_RATE_AVG="${W_RATE_AVG:+$W_RATE_AVG +} $w_rate"
+		RD_BW_AVG="${RD_BW_AVG:+$RD_BW_AVG +} $rd_bw"
+		W_BW_AVG="${W_BW_AVG:+$W_BW_AVG +} $w_bw"
+	done
+
+	RD_RATE_AVG=$(echo "($RD_RATE_AVG)/$STAT_COUNT" | bc)
+	W_RATE_AVG=$(echo "($W_RATE_AVG)/$STAT_COUNT" | bc)
+	RD_BW_AVG=$(echo "($RD_BW_AVG)/$STAT_COUNT" | bc)
+	W_BW_AVG=$(echo "($W_BW_AVG)/$STAT_COUNT" | bc)
+
+	SERVER_ERRORS=$((SERVER_ERRORS + ${vals[$expect - 2]}))
+	CLIENT_ERRORS=$((CLIENT_ERRORS + ${vals[$expect - 1]}))
+}
+
+run_test() {
+	local server_group="$1"
+	local client_group="$2"
+
+	if ! ${LST_DEBUG}; then
+		echo
+		echo "Commence lst-survey - $(date)"
+		echo "Server Group: ${server_group}"
+		echo "Client Group: ${client_group}"
+		echo
+		printf "%14s  %14s  %15s  %14s  %15s\n" \
+			"Mode" "Read MB/s" "Read RPC/s" "Write MB/S" "Write RPC/s"
+	fi
+
+	SERVER_ERRORS=0 # See do_lst()
+	CLIENT_ERRORS=0 # See do_lst()
+
+	local lst_args
+	lst_args="-t \"${server_group}\""
+	lst_args+=" -f \"${client_group}\""
+	lst_args+=" -d ${C_GRP_SIZE}:${S_GRP_SIZE} $LST_OPTIONS"
+
+	local bulksize mode
+	for mode in ${MODE_LIST//,/ }; do
+		for bulksize in ${SIZE_LIST//,/ } ping; do
+			[[ $bulksize == ping ]] && [[ $mode != ping ]] &&
+				continue
+			[[ $bulksize != ping ]] && [[ $mode == ping ]] &&
+				continue
+
+			{
+				echo -n "${server_group}"
+				echo -n "${SEP}${client_group}"
+			}>>"${OUTFILE}"
+			do_lst "$mode" "${lst_args} -m $mode -s $bulksize"
+			print_results "$mode" "$bulksize"
+		done
+	done
+
+	if ${SHOW_ERRORS} && ! ${LST_DEBUG}; then
+		echo "Server Errors: ${SERVER_ERRORS}"
+		echo "Client Errors: ${CLIENT_ERRORS}"
+	fi
+
+	if ! ${LST_DEBUG}; then
+		echo
+		echo "Finished lst-survey - $(date)"
+	fi
+}
+
+{
+	echo -n "Servers${SEP}Clients${SEP}"
+	echo -n "Mode${SEP}Read_BW${SEP}Read_Rate${SEP}"
+	echo -n "Write_BW${SEP}Write_Rate${SEP}"
+	echo "Server_Errors${SEP}Client_Errors"
+}>>"${OUTFILE}"
+
+declare -a s_groups
+n_s_groups=$((${#SERVERS[@]} / S_GRP_SIZE))
+verbose "Creating $n_s_groups server group(s) of size $S_GRP_SIZE"
+s_count=0
+s_grp_idx=0
+s_grp_str=""
+for s in "${SERVERS[@]}"; do
+	((s_count++))
+	s_grp_str="${s_grp_str:+$s_grp_str }${s}"
+	if [[ $s_count -eq $S_GRP_SIZE ]]; then
+		s_groups[s_grp_idx]="$s_grp_str"
+		((s_grp_idx++))
+		verbose "Server group $s_grp_idx: $s_grp_str"
+		s_count=0
+		s_grp_str=""
+	fi
+done
+
+declare -a c_groups
+n_c_groups=$((${#CLIENTS[@]} / C_GRP_SIZE))
+verbose "Creating $n_c_groups client group(s) of size $C_GRP_SIZE"
+c_count=0
+c_grp_idx=0
+c_grp_str=""
+for c in "${CLIENTS[@]}"; do
+	((c_count++))
+	c_grp_str="${c_grp_str:+$c_grp_str }${c}"
+	if [[ $c_count -eq $C_GRP_SIZE ]]; then
+		c_groups[c_grp_idx]="$c_grp_str"
+		((c_grp_idx++))
+		verbose "Client group $c_grp_idx: $c_grp_str"
+		c_count=0
+		c_grp_str=""
+	fi
+done
+
+verbose "Arguments to $LSTSH: $LST_OPTIONS"
+
+echo "CSV results: ${OUTFILE}"
+echo "LST output: ${TEST_DIR}/lst.${TS}.out"
+
+for ((s_grp_idx = 0; s_grp_idx < n_s_groups; s_grp_idx++)); do
+	for ((c_grp_idx = 0; c_grp_idx < n_c_groups; c_grp_idx++)); do
+		run_test "${s_groups[s_grp_idx]}" "${c_groups[c_grp_idx]}"
+	done
+done
diff --git a/lustre-iokit/lst-survey/lst.sh b/lustre-iokit/lst-survey/lst.sh
new file mode 100755
index 0000000..4e0a5cd
--- /dev/null
+++ b/lustre-iokit/lst-survey/lst.sh
@@ -0,0 +1,410 @@
+#!/bin/bash
+
+print_help() {
+	cat <<EOF
+Usage:
+${0##*/} -f "nid1[ nid2...]" -t "nidA[ nidB...]" -m read|write|rw|ping [options]
+or
+${0##*/} -H -f "host1[ host2...]" -t "hostA[ hostB...]" -m read|write|rw|ping [options]
+
+Options:
+	-b batch_name
+	   Creates a batch test called <batch_name> rather than using the
+	   default.
+	-c concurrency
+	   The number of requests that are active at one time.
+	-C simple|full
+	   A data validation check (checksum of data). The default is that no
+	   check is done.
+	-d <source_count:sink_count>
+	   Determines the ratio of client nodes to server nodes for the
+	   specified test. This allows you to specify a wide range of
+	   topologies, including one-to-one and all-to-all. Distribution divides
+	   the source group into subsets, which are paired with equivalent
+	   subsets from the target group so only nodes in matching subsets
+	   communicate.
+	-D delay
+	   The interval of the statistics (in seconds). Default is 15.
+	-e
+	   Lists the number of failed RPCs on test nodes in the current session.
+	-h
+	   Display this help.
+	-H
+	   Run in "host mode". Host mode indicates that the arguments to '-t'
+	   and '-f' flags are hostnames rather than LNet nids. This script will
+	   attempt to ssh to each node to ensure the lnet-selftest module is
+	   loaded, and to determine the appropriate LNet NIDs to give to LST.
+	-f "nid1[ nid2...]"
+	   Space-separated list of LNet NIDs to place in the "clients" group.
+	   When '-H' flag is specified, the '-f' argument is a space-separated
+	   list of hostnames.
+	   PDSH-style expressions are supported for NID arguments, but not for
+	   host mode ('-H').
+	-g servers|clients
+	   Report stats only from the specified group. Either 'clients' or
+	   'servers'.
+	-l loops
+	   The number of test loops. Default is -1 (infinite).
+	-L
+	   Load lnet-selftest module on local and remote hosts. The module will
+	   be unloaded at the end of the test execution. Requires running in
+	   host mode ('-H').
+	-m read|write|rw|ping
+	   Type of test to run. 'rw' specifies to run simultaneous read and
+	   write test.
+	-M
+	   Report bandwidth stats in MiB/s (default is MB/s).
+	-n count
+	   The number of stat RPCs to issue. Default is 1.
+	-o <offset>
+	   Add off=<offset> to brw tests.
+	-s iosize
+	   I/O size in bytes, kilobytes, or Megabytes (i.e., -s 1024, -s 4K,
+	   -s 1M). The default is 1 Megabyte.
+	-S <rate|bw|"rate  bw">
+	   By default, only bandwidth stats are displayed for read and write
+	   and only RPC rate stats are shown for ping tests. The '-S' flag can
+	   be used to override the stat output.
+	   Examples:
+	     Show only RPC rate stats:
+		# lst.sh -S rate ...
+	     Show only bandwidth stats:
+		# lst.sh -S bw ...
+	     Show both bandwidth and RPC rate stats:
+		# lst.sh -S "rate bw" ...
+		or
+		# lst.sh -S "bw rate" ...
+	-t "nid1[ nid2...]"
+	   Space-separated list of LNet NIDs to place in the "servers" group.
+	   When '-H' flag is specified, the '-t' argument is a space-separated
+	   list of hostnames.
+	   PDSH-style expressions are supported for NID arguments, but not for
+	   host mode ('-H').
+EOF
+	exit
+}
+
+stop_lst() {
+	local rc=0
+
+	if ${LST_BATCH_STARTED}; then
+		lctl mark "lst stop ${BATCH_NAME}"
+
+		[[ -n ${ALL_HOSTS} ]] &&
+			$PDSH "${ALL_HOSTS}" "lctl mark \"lst stop ${BATCH_NAME}\""
+
+		lst stop "${BATCH_NAME}" || rc=$?
+		LST_BATCH_STARTED=false
+	fi
+
+	if ${LST_SESSION_CREATED}; then
+		lctl mark "Stop LST $MODE"
+		echo "Stop LST $MODE - $(date)"
+
+		[[ -n ${ALL_HOSTS} ]] &&
+			$PDSH "${ALL_HOSTS}" "lctl mark \"Stop LST $MODE\""
+
+		lst end_session || rc=$((rc + $?))
+		LST_SESSION_CREATED=false
+	fi
+
+	return $rc
+}
+
+exit_handler() {
+	local rc=${1:-0}
+
+	trap "" EXIT
+
+	stop_lst || rc=$((rc + $?))
+
+	if ${LOAD_MODULES}; then
+		echo "Attempting to 'modprobe -r lnet-selftest' on all hosts (30 second timeout)..."
+		$PDSH "${ALL_HOSTS}" -u 30 \
+			"if lsmod | grep -q lnet_selftest; then
+				 modprobe -r lnet-selftest
+			 else
+				 :
+			 fi" | dshbak -c
+		rc=$((rc + PIPESTATUS[0]))
+		if lsmod | grep -q lnet_selftest; then
+			timeout 30 modprobe -r lnet-selftest
+			rc=$((rc + $?))
+		fi
+	fi
+
+	return $rc
+}
+
+LST_SESSION_CREATED=false # Whether 'lst new_session' was executed
+LST_BATCH_STARTED=false # Whether 'lst run <batch>' was executed
+
+PDSH="pdsh -S -Rssh -w"
+BATCH_NAME=""
+CONCURRENCY=16
+CHECK=
+DISTRIBUTION="1:1"
+CLIENTS=""
+LOOPS=""
+MODE=""
+IOSIZE="1m"
+SERVERS=""
+COUNT="1"
+DELAY="15"
+STAT_GROUP=""
+SHOW_ERRORS=false
+STAT_OPTS=""
+STAT_OPT_RATE=false
+STAT_OPT_BW=false
+BW_UNITS="--mbs"
+HOST_MODE=false
+LOAD_MODULES=false
+BRW_OFFSET=""
+while getopts "b:C:c:d:D:ef:g:hHl:Lm:Mn:o:s:S:t:" flag ; do
+	case $flag in
+		b) BATCH_NAME="$OPTARG";;
+		c) CONCURRENCY="$OPTARG";;
+		C) CHECK="$OPTARG";;
+		d) DISTRIBUTION="$OPTARG";;
+		D) DELAY="$OPTARG";;
+		e) SHOW_ERRORS=true;;
+		h) print_help;;
+		H) HOST_MODE=true;;
+		f) CLIENTS="$OPTARG";;
+		g) STAT_GROUP="$OPTARG";;
+		l) LOOPS="$OPTARG";;
+		L) LOAD_MODULES=true;;
+		m) MODE="$OPTARG";;
+		M) BW_UNITS="";;
+		n) COUNT="$OPTARG";;
+		o) BRW_OFFSET="$OPTARG";;
+		s) IOSIZE="$OPTARG";;
+		S) STAT_OPTS="$OPTARG";;
+		t) SERVERS="$OPTARG";;
+		*) echo "Unrecognized option '-$flag'"
+		   exit 1;;
+	esac
+done
+
+if [[ -z $CLIENTS ]]; then
+	echo "Must specify \"clients\" group (-f)"
+	exit 1
+elif [[ -z $SERVERS ]]; then
+	echo "Must specify \"servers\" group (-t)"
+	exit 1
+elif [[ -z $MODE ]]; then
+	echo "Must specify a mode (-m <read|write|rw|ping>)"
+	exit 1
+elif ! [[ $MODE =~ read|write|rw|ping ]]; then
+	echo "Invalid mode - \"$MODE\". (-m <read|write|rw|ping>)"
+	exit 1
+elif [[ -z $(which lst 2>/dev/null) ]]; then
+	echo "Cannot find lst executable in PATH."
+	exit 1
+elif ${LOAD_MODULES} && ! ${HOST_MODE}; then
+	echo "Module loading ('-L') is only available in host mode ('-H')"
+	exit 1
+fi
+
+for stat_opt in ${STAT_OPTS}; do
+	if [[ $stat_opt == rate ]]; then
+		STAT_OPT_RATE=true
+	elif [[ $stat_opt == bw ]]; then
+		STAT_OPT_BW=true
+	else
+		echo "Invalid stat option \"-S $stat_opt\""
+		print_help
+	fi
+done
+
+if [[ -z $STAT_GROUP ]]; then
+	STAT_GROUP="clients servers"
+elif ! [[ $STAT_GROUP =~ clients|servers ]]; then
+	echo "Stat group must be either \"clients\" or \"servers\". Found \"$STAT_GROUP\""
+	exit 1
+fi
+
+if [[ -n ${LOOPS} && ${LOOPS} -eq 0 ]]; then
+	echo "Loops must be -1 or > 0. Found \"${LOOPS}\""
+	exit 1
+fi
+
+if ! ${LOAD_MODULES} && ! lsmod | grep -q lnet_selftest; then
+	echo "lnet-selftest module is not loaded on local host."
+	echo "Please ensure lnet-selftest module is loaded on the local host and all test nodes."
+	exit 1
+fi
+
+ALL_HOSTS=""
+if ${HOST_MODE}; then
+	which pdsh &>/dev/null || { echo "Need pdsh for host mode"; exit; }
+	which ssh &>/dev/null || { echo "Need ssh for host mode"; exit; }
+
+	ALL_HOSTS="${SERVERS} ${CLIENTS}"
+	ALL_HOSTS=${ALL_HOSTS## }
+	ALL_HOSTS=${ALL_HOSTS%% }
+	ALL_HOSTS="${ALL_HOSTS// /,}"
+
+	if ${LOAD_MODULES}; then
+		echo "Loading lnet-selftest on test nodes"
+		$PDSH "${ALL_HOSTS}" \
+			"if ! lsmod | grep -q lnet_selftest; then
+				 modprobe lnet-selftest 2>&1
+			 else
+				 true
+			 fi" | dshbak -c
+		rc=${PIPESTATUS[0]}
+		if [[ $rc -ne 0 ]]; then
+			echo "Failed to load lnet-selftest module on test nodes"
+			exit "$rc"
+		fi
+
+		if ! lsmod | grep -q lnet_selftest; then
+			modprobe lnet-selftest
+			rc=$?
+			if [[ $rc -ne 0 ]]; then
+				echo "Failed to load lnet-selftest on local host"
+				exit $rc
+			fi
+		fi
+	fi
+
+	idx=0
+	opts=( -o NumberOfPasswordPrompts=0 -o ConnectTimeout=5 )
+	for host in ${SERVERS//,/ }; do
+		s_nids[idx]=$(ssh "${opts[@]}" "$host" 'lctl list_nids | head -n 1')
+		if [[ -z ${s_nids[idx]} ]]; then
+			echo "Failed to determine primary NID of $host"
+			exit
+		fi
+		idx=$((idx + 1))
+	done
+
+	idx=0
+	for host in ${CLIENTS//,/ }; do
+		c_nids[idx]=$(ssh "${opts[@]}" "${host}" 'lctl list_nids | head -n 1')
+		if [[ -z ${c_nids[idx]} ]]; then
+			echo "Failed to determine primary NID of $host"
+			exit
+		fi
+		idx=$((idx + 1))
+	done
+
+	SERVER_NIDS=( "${s_nids[@]}" )
+	CLIENT_NIDS=( "${c_nids[@]}" )
+else
+	IFS=" " read -r -a SERVER_NIDS <<< "${SERVERS}"
+	IFS=" " read -r -a CLIENT_NIDS <<< "${CLIENTS}"
+fi
+
+if ! grep -q '\[' <<<"${SERVER_NIDS[@]}" && which lnetctl &>/dev/null; then
+	echo "Discover server NIDs"
+	lnetctl discover "${SERVER_NIDS[@]}" 1>/dev/null
+	rc=$?
+	if [[ $rc -ne 0 ]]; then
+		echo "Failed to discover all server NIDs"
+		exit $rc
+	fi
+fi
+
+if ! grep -q '\[' <<<"${CLIENT_NIDS[@]}" && which lnetctl &>/dev/null; then
+	echo "Discover client NIDs"
+	lnetctl discover "${CLIENT_NIDS[@]}" 1>/dev/null
+	rc=$?
+	if [[ $rc -ne 0 ]]; then
+		echo "Failed to discover all client NIDs"
+		exit $rc
+	fi
+fi
+
+[[ -n $ALL_HOSTS ]] &&
+	$PDSH "$ALL_HOSTS" "lctl mark \"Start LST $MODE\""
+
+lctl mark "Start LST $MODE"
+echo "Start LST $MODE - $(date)"
+
+trap 'exit_handler' EXIT
+
+export LST_SESSION=$$
+echo "LST_SESSION=$LST_SESSION"
+lst new_session lnet_session || { echo "new_session failed $?"; exit; }
+LST_SESSION_CREATED=true
+
+echo "Adding clients: ${CLIENT_NIDS[*]}"
+lst add_group clients "${CLIENT_NIDS[@]}" || exit
+echo "Adding servers: ${SERVER_NIDS[*]}"
+lst add_group servers "${SERVER_NIDS[@]}" || exit
+
+if [[ -z ${BATCH_NAME} ]]; then
+	BATCH_NAME="brw_${MODE}"
+fi
+lst add_batch "${BATCH_NAME}" || exit
+
+test_opts+=( --batch "${BATCH_NAME}" --concurrency "${CONCURRENCY}" )
+test_opts+=( --from clients --to servers --distribute "${DISTRIBUTION}" )
+[[ -n ${LOOPS} ]] &&
+	test_opts+=( --loop "${LOOPS}" )
+
+if [[ $MODE == ping ]]; then
+	test_opts+=( ping )
+elif [[ $MODE == rw ]]; then
+	read_opts=( "${test_opts[@]}" brw read size="$IOSIZE" )
+	write_opts=( "${test_opts[@]}" brw write size="$IOSIZE" )
+	if [[ -n $CHECK ]];  then
+		read_opts+=( check="$CHECK" )
+		write_opts+=( check="$CHECK" )
+	fi
+	if [[ -n $BRW_OFFSET ]]; then
+		read_opts+=( off="$BRW_OFFSET" )
+		write_opts+=( off="$BRW_OFFSET" )
+	fi
+else
+	test_opts+=( brw "${MODE}" )
+	[[ -n $BRW_OFFSET ]] &&
+		test_opts+=( off="$BRW_OFFSET" )
+	[[ -n $CHECK ]] &&
+		test_opts+=( check="$CHECK" )
+	test_opts+=( size="$IOSIZE" )
+fi
+
+stat_opts=( --count "${COUNT}" --delay "${DELAY}" )
+if [[ -n $STAT_OPTS ]]; then
+	if ${STAT_OPT_RATE}; then
+		stat_opts+=( --rate )
+	fi
+	if ${STAT_OPT_BW}; then
+		stat_opts+=( --bw )
+	fi
+elif [[ $MODE == ping ]]; then
+	stat_opts+=( --rate )
+else
+	stat_opts+=( --bw "${BW_UNITS}" )
+fi
+
+for g in ${STAT_GROUP}; do
+	stat_opts+=( "${g}" )
+done
+
+if [[ $MODE == rw ]]; then
+	echo "Test: ${read_opts[*]}"
+	echo "Test: ${write_opts[*]}"
+	echo "Stat: ${stat_opts[*]}"
+	lst add_test "${read_opts[@]}" || exit
+	lst add_test "${write_opts[@]}" || exit
+else
+	echo "Test: ${test_opts[*]}"
+	echo "Stat: ${stat_opts[*]}"
+	lst add_test "${test_opts[@]}" || exit
+fi
+
+lst run "${BATCH_NAME}" || exit
+
+LST_BATCH_STARTED=true
+
+lst stat "${stat_opts[@]}"
+
+if ${SHOW_ERRORS}; then
+	lst show_error --session servers clients
+fi
+
+exit
diff --git a/lustre.spec.in b/lustre.spec.in
index 570ae22..463ce1b 100644
--- a/lustre.spec.in
+++ b/lustre.spec.in
@@ -440,6 +440,9 @@ This survey tests the local metadata performance using the echo_client to drive
 the MDD layer to perform operations. It is run with multiple threads (to
 simulate MDT service threads) locally on the MDS node, and does not need Lustre
 clients in order to run
+
+lst-survey:
+This survey tests LNet performance between a group of clients and servers.
 %endif
 
 %if 0%{?suse_version}
@@ -848,12 +851,15 @@ echo '%{_libdir}/lustre/tests/lutf/*' >>lustre-tests.files
 %{_bindir}/obdfilter-survey
 %{_bindir}/ost-survey
 %{_bindir}/sgpdd-survey
+%{_bindir}/lst-survey
+%{_bindir}/lst.sh
 %doc lustre-iokit/ior-survey/README.ior-survey
 %doc lustre-iokit/mds-survey/README.mds-survey
 %doc lustre-iokit/obdfilter-survey/README.obdfilter-survey
 %doc lustre-iokit/ost-survey/README.ost-survey
 %doc lustre-iokit/sgpdd-survey/README.sgpdd-survey
 %doc lustre-iokit/stats-collect/README.iokit-lstats
+%doc lustre-iokit/lst-survey/README.lst-survey
 %endif
 
 %post
-- 
1.8.3.1