From 2471d35c0e0eb869640509ad70b02c891f74aefc Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Tue, 4 Oct 2022 05:05:15 -0500 Subject: [PATCH] LU-16217 iokit: Add lst.sh wrapper and lst-survey lst.sh is a wrapper around the LNet selftest (lst) utility. It provides a streamlined interface for executing read, write, combined read/write and ping lst tests. lst-survey leverages lst.sh to test the performance of groups of LNet peers against each other. HPE-bug-id: LUS-10279 Test-Parameters: trivial Signed-off-by: Chris Horn Change-Id: I4c2593df1289b0b97760cb402de1e101ca22c319 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/48799 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Alexey Lyashkov Reviewed-by: Alexander Zarochentsev Reviewed-by: Oleg Drokin --- config/lustre-build.m4 | 1 + lustre-iokit/Makefile.am | 2 +- lustre-iokit/lst-survey/Makefile.am | 3 + lustre-iokit/lst-survey/README.lst-survey | 178 +++++++++++++ lustre-iokit/lst-survey/lst-survey | 365 ++++++++++++++++++++++++++ lustre-iokit/lst-survey/lst.sh | 410 ++++++++++++++++++++++++++++++ lustre.spec.in | 6 + 7 files changed, 964 insertions(+), 1 deletion(-) create mode 100644 lustre-iokit/lst-survey/Makefile.am create mode 100644 lustre-iokit/lst-survey/README.lst-survey create mode 100755 lustre-iokit/lst-survey/lst-survey create mode 100755 lustre-iokit/lst-survey/lst.sh diff --git a/config/lustre-build.m4 b/config/lustre-build.m4 index 412abf8..9fc78586 100644 --- a/config/lustre-build.m4 +++ b/config/lustre-build.m4 @@ -485,6 +485,7 @@ AC_DEFUN([LB_CONFIG_FILES], [ lustre-iokit/mds-survey/Makefile lustre-iokit/ior-survey/Makefile lustre-iokit/stats-collect/Makefile + lustre-iokit/lst-survey/Makefile ) ]) diff --git a/lustre-iokit/Makefile.am b/lustre-iokit/Makefile.am index 83dcb47..1803399 100644 --- a/lustre-iokit/Makefile.am +++ b/lustre-iokit/Makefile.am @@ -1,2 +1,2 @@ SUBDIRS = obdfilter-survey sgpdd-survey ost-survey ior-survey -SUBDIRS += mds-survey stats-collect +SUBDIRS += mds-survey stats-collect lst-survey diff --git a/lustre-iokit/lst-survey/Makefile.am b/lustre-iokit/lst-survey/Makefile.am new file mode 100644 index 0000000..652e29e --- /dev/null +++ b/lustre-iokit/lst-survey/Makefile.am @@ -0,0 +1,3 @@ +bin_SCRIPTS = lst.sh lst-survey +CLEANFILE = $(bin_SCRIPTS) +EXTRA_DIST = lst.sh lst-survey README.lst-survey diff --git a/lustre-iokit/lst-survey/README.lst-survey b/lustre-iokit/lst-survey/README.lst-survey new file mode 100644 index 0000000..a94f94c --- /dev/null +++ b/lustre-iokit/lst-survey/README.lst-survey @@ -0,0 +1,178 @@ +Overview +-------- + +This survey script performs a series of LNet selftest (LST) benchmarks between +groups of LNet peers. It can be used to characterize the performance of the LNet +interface(s) on Lustre servers, Lustre clients, or LNet routers. + +The LST client group is defined using the '-f' flag, and the LST server group +is defined using the '-t' flag. Both of these flags take a space-separated or +comma-separated list of LNet NIDs. The '-M' and '-N' options can be used to +divide the client or server group into multiple smaller groups. +For example, given 16 clients and 8 servers, '-M 8' and '-N 2' would create +two client groups with eight peers in each group, and four servers groups with +two peers in each group. Every server group is tested against every client +group, so this would result in 4*2=8 test iterations. + +By default, each test iterations performs 4k read and write, 1m read and write, +and ping LST benchmarks. + +A directory is created in the current working directory to store results. +The csv output is written to a results..csv file and the full +lst.sh output is stored in an lst..out file. An alternative output +directory can be specified with the '-O' argument. + +Various options exist to customize the benchmarks that are run. See +'lst-survey -h' for more information. + +A note on interpreting the results: +By default, lst-survey displays bandwidth and rate statistics for peers in the +server group as reported by the LST utility. +These statistics reported by LST can be confusing because a "read" test will +typically report read bandwidth that is lower than write bandwidth, and a +"write" test will typically report write bandwidth that is lower than read +bandwidth. This is because a "read" test involves peers in the client group +setting up a sink that is then written to by peers in the server group, and a +"write" test involves the clients setting up a source that is then read by the +servers. Thus, the read test is really measuring the write performance of the +servers and the write test is really measuing the read performance of the +servers. + +The '-g clients' option can be used to instead report the client bandwidth and +rate statistics. In this case, the reported stats will align with the benchmarks +in the expected manner. + +Example 1: Default options +# pdsh -w n0[0-3] lctl list_nids | dshbak -c +---------------- +n00 +---------------- +172.18.2.5@tcp +---------------- +n01 +---------------- +172.18.2.6@tcp +---------------- +n02 +---------------- +172.18.2.7@tcp +---------------- +n03 +---------------- +172.18.2.8@tcp +# ./lst-survey -t 172.18.2.5@tcp,172.18.2.6@tcp -f 172.18.2.7@tcp,172.18.2.8@tcp +CSV results: /tmp/lst_survey.1666207637/results.1666207637.csv +LST output: /tmp/lst-survey/lst_survey.1666207637/lst.1666207637.out + +Commence lst-survey - Wed 19 Oct 2022 01:27:17 PM MDT +Server Group: 172.18.2.5@tcp 172.18.2.6@tcp +Client Group: 172.18.2.7@tcp 172.18.2.8@tcp + + Mode Read MB/s Read RPC/s Write MB/S Write RPC/s + read 4k 22 149981 608 299961 + read 1m 2 14405 14405 28808 + write 4k 489 241229 18 241229 + write 1m 11463 22924 1 22924 + ping 25 167928 25 167928 + +Finished lst-survey - Wed 19 Oct 2022 01:28:08 PM MDT +# cat /tmp/lst_survey.1666207637/results.1666207637.csv +Servers,Clients,Mode,Read_BW,Read_Rate,Write_BW,Write_Rate,Server_Errors,Client_Errors +172.18.2.5@tcp 172.18.2.6@tcp,172.18.2.7@tcp 172.18.2.8@tcp,read_4k,22,149981,608,299961,0,0 +172.18.2.5@tcp 172.18.2.6@tcp,172.18.2.7@tcp 172.18.2.8@tcp,read_1m,2,14405,14405,28808,0,0 +172.18.2.5@tcp 172.18.2.6@tcp,172.18.2.7@tcp 172.18.2.8@tcp,write_4k,489,241229,18,241229,0,0 +172.18.2.5@tcp 172.18.2.6@tcp,172.18.2.7@tcp 172.18.2.8@tcp,write_1m,11463,22924,1,22924,0,0 +172.18.2.5@tcp 172.18.2.6@tcp,172.18.2.7@tcp 172.18.2.8@tcp,ping,25,167928,25,167928,0,0 +# + +Example 2: Divide the servers into groups of size 1 + +# ./lst-survey -t 172.18.2.5@tcp,172.18.2.6@tcp -f 172.18.2.7@tcp,172.18.2.8@tcp -N 1 +CSV results: /tmp/lst_survey.1666207844/results.1666207844.csv +LST output: /tmp/lst_survey.1666207844/lst.1666207844.out + +Commence lst-survey - Wed 19 Oct 2022 01:30:44 PM MDT +Server Group: 172.18.2.5@tcp +Client Group: 172.18.2.7@tcp 172.18.2.8@tcp + + Mode Read MB/s Read RPC/s Write MB/S Write RPC/s + read 4k 25 167068 678 334135 + read 1m 2 16186 16186 32366 + write 4k 512 252613 19 252612 + write 1m 11353 22706 1 22704 + ping 29 192358 29 192358 + +Finished lst-survey - Wed 19 Oct 2022 01:31:34 PM MDT + +Commence lst-survey - Wed 19 Oct 2022 01:31:34 PM MDT +Server Group: 172.18.2.6@tcp +Client Group: 172.18.2.7@tcp 172.18.2.8@tcp + + Mode Read MB/s Read RPC/s Write MB/S Write RPC/s + read 4k 22 144821 587 289642 + read 1m 2 16841 16843 33681 + write 4k 498 245552 18 245552 + write 1m 11611 23219 1 23217 + ping 22 145374 22 145374 + +Finished lst-survey - Wed 19 Oct 2022 01:32:25 PM MDT +# + +Example 3: Divide the servers and clients into groups of size 1 + +# ./lst-survey -t 172.18.2.5@tcp,172.18.2.6@tcp -f 172.18.2.7@tcp,172.18.2.8@tcp -N 1 -M 1 +CSV results: /tmp/lst_survey.1666208473/results.1666208473.csv +LST output: /tmp/lst_survey.1666208473/lst.1666208473.out + +Commence lst-survey - Wed 19 Oct 2022 01:41:13 PM MDT +Server Group: 172.18.2.5@tcp +Client Group: 172.18.2.7@tcp + + Mode Read MB/s Read RPC/s Write MB/S Write RPC/s + read 4k 11 75112 304 150224 + read 1m 1 8808 8809 17616 + write 4k 240 118402 9 118402 + write 1m 6561 13119 1 13118 + ping 13 90402 13 90402 + +Finished lst-survey - Wed 19 Oct 2022 01:42:03 PM MDT + +Commence lst-survey - Wed 19 Oct 2022 01:42:03 PM MDT +Server Group: 172.18.2.5@tcp +Client Group: 172.18.2.8@tcp + + Mode Read MB/s Read RPC/s Write MB/S Write RPC/s + read 4k 13 90017 365 180034 + read 1m 1 7333 7328 14655 + write 4k 280 138173 10 138173 + write 1m 8694 17388 1 17388 + ping 15 98316 15 98316 + +Finished lst-survey - Wed 19 Oct 2022 01:42:53 PM MDT + +Commence lst-survey - Wed 19 Oct 2022 01:42:53 PM MDT +Server Group: 172.18.2.6@tcp +Client Group: 172.18.2.7@tcp + + Mode Read MB/s Read RPC/s Write MB/S Write RPC/s + read 4k 9 64613 262 129225 + read 1m 1 9101 9101 18201 + write 4k 212 104575 7 104573 + write 1m 6769 13537 1 13539 + ping 10 71612 10 71612 + +Finished lst-survey - Wed 19 Oct 2022 01:43:44 PM MDT + +Commence lst-survey - Wed 19 Oct 2022 01:43:44 PM MDT +Server Group: 172.18.2.6@tcp +Client Group: 172.18.2.8@tcp + + Mode Read MB/s Read RPC/s Write MB/S Write RPC/s + read 4k 12 83144 337 166287 + read 1m 1 7582 7584 15166 + write 4k 293 144601 11 144602 + write 1m 8913 17824 1 17825 + ping 11 78409 11 78409 + +Finished lst-survey - Wed 19 Oct 2022 01:44:35 PM MDT +# diff --git a/lustre-iokit/lst-survey/lst-survey b/lustre-iokit/lst-survey/lst-survey new file mode 100755 index 0000000..7a1c1fb --- /dev/null +++ b/lustre-iokit/lst-survey/lst-survey @@ -0,0 +1,365 @@ +#!/bin/bash + +print_help() { + cat <> + Execute the specified list of tests. Default is ${MODE_LIST// /, }. + -M group_size + Subdivide the client group (-f) into multiple groups of the + specified size. Every client group is tested against every server + group (see -t and -N). + -n count + The number of stat RPCs to issue. Default is $STAT_COUNT. + -N group_size + Subdivide the server group (-t) into multiple groups of the + specified size. Every server group is tested against every client + group (see -f and -M). + -O output_dir + Create output files in specified directory. + Default is PWD/lst_survey. + -t "nid1[ nid2...]" + Space-separated list of LNet NIDs to place in the "servers" group. + When '-H' flag is specified, the '-t' argument is a space-separated + list of hostnames. + -s bulksize1<,bulksize2<,...>> + For each read, write, or combined read-write test, execute the test + with the specified bulk sizes. Default is 4k and 1m. + -S separator + Use the specified character to separate fields in the .csv output + file. Default is ','. + -v + Prints additional output. e.g. LST parameters, group construction, + etc. +EOF + exit +} + +verbose() { + ${VERBOSE} && echo "$@" +} + +SERVERS="" +CLIENTS="" +CONCURRENCY=64 +HOST_MODE=false +LST_DEBUG=false +MODE_LIST="read write ping" +C_GRP_SIZE="" +S_GRP_SIZE="" +SEP=',' +SIZE_LIST="4k 1m" +SHOW_ERRORS=false +STAT_COUNT=3 +STAT_DELAY=3 +STAT_GROUP="servers" +TS=$(date +%s) +TEST_DIR=$PWD/lst_survey.${TS} +VERBOSE=false +while getopts "c:dD:e:Hhf:g:m:M:n:N:O:t:s:S:v" flag ; do + case $flag in + c) CONCURRENCY="$OPTARG";; + d) LST_DEBUG=true;; + D) STAT_DELAY="$OPTARG";; + e) SHOW_ERRORS=true;; + H) HOST_MODE=true;; + h) print_help;; + f) CLIENTS="$OPTARG";; + g) STAT_GROUP="$OPTARG";; + m) MODE_LIST="$OPTARG";; + M) C_GRP_SIZE="$OPTARG";; + n) STAT_COUNT="$OPTARG";; + N) S_GRP_SIZE="$OPTARG";; + O) TEST_DIR="$OPTARG";; + t) SERVERS="$OPTARG";; + s) SIZE_LIST="$OPTARG";; + S) SEP="${OPTARG}";; + v) VERBOSE=true;; + *) echo "Unrecognized option '-$flag'" + exit 1;; + esac +done + +LSTSH=${LSTSH:-$(dirname "$0")/lst.sh} +if ! [[ -f $LSTSH ]]; then + LSTSH=$(which lst.sh 2>/dev/null) +fi + +if ! [[ -f $LSTSH ]]; then + echo "Cannot find lst.sh script at $LSTSH" + exit 1 +fi + +if [[ -z $CLIENTS ]]; then + echo "Must specify \"clients\" group (-f)" + exit 1 +elif [[ -z $SERVERS ]]; then + echo "Must specify \"servers\" group (-t)" + exit 1 +fi + +IFS=" " read -r -a CLIENTS <<< "${CLIENTS//,/ }" +[[ -z $C_GRP_SIZE ]] && + C_GRP_SIZE=${#CLIENTS[@]} + +IFS=" " read -r -a SERVERS <<< "${SERVERS//,/ }" +[[ -z $S_GRP_SIZE ]] && + S_GRP_SIZE=${#SERVERS[@]} + +if [[ $STAT_COUNT -lt 1 ]]; then + echo "Stat count must be > 0 (-n count)" + exit 1 +elif [[ $C_GRP_SIZE -lt 1 ]]; then + echo "Client group size must be > 0 (-M group_size)" + exit 1 +elif [[ $C_GRP_SIZE -gt ${#CLIENTS[@]} ]]; then + echo "Specified client group size (-M $C_GRP_SIZE) cannot be larger than number of clients specified with -f (${#CLIENTS[@]})" + exit 1 +elif [[ $S_GRP_SIZE -lt 1 ]]; then + echo "Server group size must be > 0 (-N group_size)" + exit 1 +elif [[ $S_GRP_SIZE -gt ${#SERVERS[@]} ]]; then + echo "Specified server group size (-M $S_GRP_SIZE) cannot be larger than number of servers specified with -t (${#SERVERS[@]})" + exit 1 +elif ! [[ $STAT_GROUP =~ ^(servers|clients)$ ]]; then + echo "Invalid stat group $STAT_GROUP (-g servers|clients)" + exit 1 +elif [[ -z $MODE_LIST ]]; then + echo "Empty mode list (-m read|write|rw|ping)" + exit 1 +elif [[ -z $SIZE_LIST ]]; then + echo "Empty bulk size list (-s 1024|4k|1m)" + exit 1 +fi + +for m in $MODE_LIST; do + if ! [[ $m =~ (read|write|rw|ping) ]]; then + echo "Invalid mode \"$m\" specified (-m read|write|rw|ping)" + exit 1 + fi +done + +if ! mkdir -p "${TEST_DIR}" ; then + echo "Failed to create results directory at \"${TEST_DIR}\" rc=$?" + exit 1 +fi +OUTFILE=${TEST_DIR}/results.${TS}.csv + +LST_OPTIONS="-c $CONCURRENCY -n $STAT_COUNT -D $STAT_DELAY -e -S \"bw rate\"" +LST_OPTIONS+=" -g ${STAT_GROUP} -e" +if ${HOST_MODE}; then + LST_OPTIONS+=" -H" +fi + +print_results() { + local mode="$1" + local size="$2" + + if ${LST_DEBUG}; then + return + fi + + [[ $mode != ping ]] && + mode="${mode}_${size}" + + { + echo -n "${SEP}${mode}" + echo -n "${SEP}${RD_BW_AVG}${SEP}${RD_RATE_AVG}" + echo -n "${SEP}${W_BW_AVG}${SEP}${W_RATE_AVG}" + echo "${SEP}${SERVER_ERRORS}${SEP}${CLIENT_ERRORS}" + }>>"${OUTFILE}" + + printf "%14s %14s %15s %14s %15s\n" \ + "${mode}" "${RD_BW_AVG}" "${RD_RATE_AVG}" "${W_BW_AVG}" \ + "${W_RATE_AVG}" +} + +SERVER_ERRORS=0 +CLIENT_ERRORS=0 +RD_RATE_AVG=0 +W_RATE_AVG=0 +RD_BW_AVG=0 +W_BW_AVG=0 +do_lst() { + local mode="$1" + shift + local lst_args="$*" + + RD_RATE_AVG=0 + W_RATE_AVG=0 + RD_BW_AVG=0 + W_BW_AVG=0 + + declare -a vals + + if ${LST_DEBUG}; then + echo "$LSTSH ${lst_args}" + return + fi + IFS=" " read -r -a vals <<< "$(eval "$LSTSH" "${lst_args}" 2>&1 | + tee -a "${TEST_DIR}"/lst."${TS}".out | + awk '/^\[(R|W)\]/{print $3}; + /error nodes in/{print $2}' | + xargs echo)" + + # Each stat RPC generates 4 lines of output, and we have two lines for + # the error counts + local expect=$((2 + STAT_COUNT * 4)) + + if [[ ${#vals[@]} -ne $expect ]]; then + echo + echo "Error: Failed to get all samples. Expect $expect, found ${#vals[@]}" + exit + fi + + local i rd_rate w_rate rd_bw w_bw + for ((i = 0; i < $((expect - 4)); i+=4)); do + rd_rate=${vals[i]} + w_rate=${vals[i+1]} + rd_bw=${vals[i+2]} + w_bw=${vals[i+3]} + + RD_RATE_AVG="${RD_RATE_AVG:+$RD_RATE_AVG +} $rd_rate" + W_RATE_AVG="${W_RATE_AVG:+$W_RATE_AVG +} $w_rate" + RD_BW_AVG="${RD_BW_AVG:+$RD_BW_AVG +} $rd_bw" + W_BW_AVG="${W_BW_AVG:+$W_BW_AVG +} $w_bw" + done + + RD_RATE_AVG=$(echo "($RD_RATE_AVG)/$STAT_COUNT" | bc) + W_RATE_AVG=$(echo "($W_RATE_AVG)/$STAT_COUNT" | bc) + RD_BW_AVG=$(echo "($RD_BW_AVG)/$STAT_COUNT" | bc) + W_BW_AVG=$(echo "($W_BW_AVG)/$STAT_COUNT" | bc) + + SERVER_ERRORS=$((SERVER_ERRORS + ${vals[$expect - 2]})) + CLIENT_ERRORS=$((CLIENT_ERRORS + ${vals[$expect - 1]})) +} + +run_test() { + local server_group="$1" + local client_group="$2" + + if ! ${LST_DEBUG}; then + echo + echo "Commence lst-survey - $(date)" + echo "Server Group: ${server_group}" + echo "Client Group: ${client_group}" + echo + printf "%14s %14s %15s %14s %15s\n" \ + "Mode" "Read MB/s" "Read RPC/s" "Write MB/S" "Write RPC/s" + fi + + SERVER_ERRORS=0 # See do_lst() + CLIENT_ERRORS=0 # See do_lst() + + local lst_args + lst_args="-t \"${server_group}\"" + lst_args+=" -f \"${client_group}\"" + lst_args+=" -d ${C_GRP_SIZE}:${S_GRP_SIZE} $LST_OPTIONS" + + local bulksize mode + for mode in ${MODE_LIST//,/ }; do + for bulksize in ${SIZE_LIST//,/ } ping; do + [[ $bulksize == ping ]] && [[ $mode != ping ]] && + continue + [[ $bulksize != ping ]] && [[ $mode == ping ]] && + continue + + { + echo -n "${server_group}" + echo -n "${SEP}${client_group}" + }>>"${OUTFILE}" + do_lst "$mode" "${lst_args} -m $mode -s $bulksize" + print_results "$mode" "$bulksize" + done + done + + if ${SHOW_ERRORS} && ! ${LST_DEBUG}; then + echo "Server Errors: ${SERVER_ERRORS}" + echo "Client Errors: ${CLIENT_ERRORS}" + fi + + if ! ${LST_DEBUG}; then + echo + echo "Finished lst-survey - $(date)" + fi +} + +{ + echo -n "Servers${SEP}Clients${SEP}" + echo -n "Mode${SEP}Read_BW${SEP}Read_Rate${SEP}" + echo -n "Write_BW${SEP}Write_Rate${SEP}" + echo "Server_Errors${SEP}Client_Errors" +}>>"${OUTFILE}" + +declare -a s_groups +n_s_groups=$((${#SERVERS[@]} / S_GRP_SIZE)) +verbose "Creating $n_s_groups server group(s) of size $S_GRP_SIZE" +s_count=0 +s_grp_idx=0 +s_grp_str="" +for s in "${SERVERS[@]}"; do + ((s_count++)) + s_grp_str="${s_grp_str:+$s_grp_str }${s}" + if [[ $s_count -eq $S_GRP_SIZE ]]; then + s_groups[s_grp_idx]="$s_grp_str" + ((s_grp_idx++)) + verbose "Server group $s_grp_idx: $s_grp_str" + s_count=0 + s_grp_str="" + fi +done + +declare -a c_groups +n_c_groups=$((${#CLIENTS[@]} / C_GRP_SIZE)) +verbose "Creating $n_c_groups client group(s) of size $C_GRP_SIZE" +c_count=0 +c_grp_idx=0 +c_grp_str="" +for c in "${CLIENTS[@]}"; do + ((c_count++)) + c_grp_str="${c_grp_str:+$c_grp_str }${c}" + if [[ $c_count -eq $C_GRP_SIZE ]]; then + c_groups[c_grp_idx]="$c_grp_str" + ((c_grp_idx++)) + verbose "Client group $c_grp_idx: $c_grp_str" + c_count=0 + c_grp_str="" + fi +done + +verbose "Arguments to $LSTSH: $LST_OPTIONS" + +echo "CSV results: ${OUTFILE}" +echo "LST output: ${TEST_DIR}/lst.${TS}.out" + +for ((s_grp_idx = 0; s_grp_idx < n_s_groups; s_grp_idx++)); do + for ((c_grp_idx = 0; c_grp_idx < n_c_groups; c_grp_idx++)); do + run_test "${s_groups[s_grp_idx]}" "${c_groups[c_grp_idx]}" + done +done diff --git a/lustre-iokit/lst-survey/lst.sh b/lustre-iokit/lst-survey/lst.sh new file mode 100755 index 0000000..4e0a5cd --- /dev/null +++ b/lustre-iokit/lst-survey/lst.sh @@ -0,0 +1,410 @@ +#!/bin/bash + +print_help() { + cat < rather than using the + default. + -c concurrency + The number of requests that are active at one time. + -C simple|full + A data validation check (checksum of data). The default is that no + check is done. + -d + Determines the ratio of client nodes to server nodes for the + specified test. This allows you to specify a wide range of + topologies, including one-to-one and all-to-all. Distribution divides + the source group into subsets, which are paired with equivalent + subsets from the target group so only nodes in matching subsets + communicate. + -D delay + The interval of the statistics (in seconds). Default is 15. + -e + Lists the number of failed RPCs on test nodes in the current session. + -h + Display this help. + -H + Run in "host mode". Host mode indicates that the arguments to '-t' + and '-f' flags are hostnames rather than LNet nids. This script will + attempt to ssh to each node to ensure the lnet-selftest module is + loaded, and to determine the appropriate LNet NIDs to give to LST. + -f "nid1[ nid2...]" + Space-separated list of LNet NIDs to place in the "clients" group. + When '-H' flag is specified, the '-f' argument is a space-separated + list of hostnames. + PDSH-style expressions are supported for NID arguments, but not for + host mode ('-H'). + -g servers|clients + Report stats only from the specified group. Either 'clients' or + 'servers'. + -l loops + The number of test loops. Default is -1 (infinite). + -L + Load lnet-selftest module on local and remote hosts. The module will + be unloaded at the end of the test execution. Requires running in + host mode ('-H'). + -m read|write|rw|ping + Type of test to run. 'rw' specifies to run simultaneous read and + write test. + -M + Report bandwidth stats in MiB/s (default is MB/s). + -n count + The number of stat RPCs to issue. Default is 1. + -o + Add off= to brw tests. + -s iosize + I/O size in bytes, kilobytes, or Megabytes (i.e., -s 1024, -s 4K, + -s 1M). The default is 1 Megabyte. + -S + By default, only bandwidth stats are displayed for read and write + and only RPC rate stats are shown for ping tests. The '-S' flag can + be used to override the stat output. + Examples: + Show only RPC rate stats: + # lst.sh -S rate ... + Show only bandwidth stats: + # lst.sh -S bw ... + Show both bandwidth and RPC rate stats: + # lst.sh -S "rate bw" ... + or + # lst.sh -S "bw rate" ... + -t "nid1[ nid2...]" + Space-separated list of LNet NIDs to place in the "servers" group. + When '-H' flag is specified, the '-t' argument is a space-separated + list of hostnames. + PDSH-style expressions are supported for NID arguments, but not for + host mode ('-H'). +EOF + exit +} + +stop_lst() { + local rc=0 + + if ${LST_BATCH_STARTED}; then + lctl mark "lst stop ${BATCH_NAME}" + + [[ -n ${ALL_HOSTS} ]] && + $PDSH "${ALL_HOSTS}" "lctl mark \"lst stop ${BATCH_NAME}\"" + + lst stop "${BATCH_NAME}" || rc=$? + LST_BATCH_STARTED=false + fi + + if ${LST_SESSION_CREATED}; then + lctl mark "Stop LST $MODE" + echo "Stop LST $MODE - $(date)" + + [[ -n ${ALL_HOSTS} ]] && + $PDSH "${ALL_HOSTS}" "lctl mark \"Stop LST $MODE\"" + + lst end_session || rc=$((rc + $?)) + LST_SESSION_CREATED=false + fi + + return $rc +} + +exit_handler() { + local rc=${1:-0} + + trap "" EXIT + + stop_lst || rc=$((rc + $?)) + + if ${LOAD_MODULES}; then + echo "Attempting to 'modprobe -r lnet-selftest' on all hosts (30 second timeout)..." + $PDSH "${ALL_HOSTS}" -u 30 \ + "if lsmod | grep -q lnet_selftest; then + modprobe -r lnet-selftest + else + : + fi" | dshbak -c + rc=$((rc + PIPESTATUS[0])) + if lsmod | grep -q lnet_selftest; then + timeout 30 modprobe -r lnet-selftest + rc=$((rc + $?)) + fi + fi + + return $rc +} + +LST_SESSION_CREATED=false # Whether 'lst new_session' was executed +LST_BATCH_STARTED=false # Whether 'lst run ' was executed + +PDSH="pdsh -S -Rssh -w" +BATCH_NAME="" +CONCURRENCY=16 +CHECK= +DISTRIBUTION="1:1" +CLIENTS="" +LOOPS="" +MODE="" +IOSIZE="1m" +SERVERS="" +COUNT="1" +DELAY="15" +STAT_GROUP="" +SHOW_ERRORS=false +STAT_OPTS="" +STAT_OPT_RATE=false +STAT_OPT_BW=false +BW_UNITS="--mbs" +HOST_MODE=false +LOAD_MODULES=false +BRW_OFFSET="" +while getopts "b:C:c:d:D:ef:g:hHl:Lm:Mn:o:s:S:t:" flag ; do + case $flag in + b) BATCH_NAME="$OPTARG";; + c) CONCURRENCY="$OPTARG";; + C) CHECK="$OPTARG";; + d) DISTRIBUTION="$OPTARG";; + D) DELAY="$OPTARG";; + e) SHOW_ERRORS=true;; + h) print_help;; + H) HOST_MODE=true;; + f) CLIENTS="$OPTARG";; + g) STAT_GROUP="$OPTARG";; + l) LOOPS="$OPTARG";; + L) LOAD_MODULES=true;; + m) MODE="$OPTARG";; + M) BW_UNITS="";; + n) COUNT="$OPTARG";; + o) BRW_OFFSET="$OPTARG";; + s) IOSIZE="$OPTARG";; + S) STAT_OPTS="$OPTARG";; + t) SERVERS="$OPTARG";; + *) echo "Unrecognized option '-$flag'" + exit 1;; + esac +done + +if [[ -z $CLIENTS ]]; then + echo "Must specify \"clients\" group (-f)" + exit 1 +elif [[ -z $SERVERS ]]; then + echo "Must specify \"servers\" group (-t)" + exit 1 +elif [[ -z $MODE ]]; then + echo "Must specify a mode (-m )" + exit 1 +elif ! [[ $MODE =~ read|write|rw|ping ]]; then + echo "Invalid mode - \"$MODE\". (-m )" + exit 1 +elif [[ -z $(which lst 2>/dev/null) ]]; then + echo "Cannot find lst executable in PATH." + exit 1 +elif ${LOAD_MODULES} && ! ${HOST_MODE}; then + echo "Module loading ('-L') is only available in host mode ('-H')" + exit 1 +fi + +for stat_opt in ${STAT_OPTS}; do + if [[ $stat_opt == rate ]]; then + STAT_OPT_RATE=true + elif [[ $stat_opt == bw ]]; then + STAT_OPT_BW=true + else + echo "Invalid stat option \"-S $stat_opt\"" + print_help + fi +done + +if [[ -z $STAT_GROUP ]]; then + STAT_GROUP="clients servers" +elif ! [[ $STAT_GROUP =~ clients|servers ]]; then + echo "Stat group must be either \"clients\" or \"servers\". Found \"$STAT_GROUP\"" + exit 1 +fi + +if [[ -n ${LOOPS} && ${LOOPS} -eq 0 ]]; then + echo "Loops must be -1 or > 0. Found \"${LOOPS}\"" + exit 1 +fi + +if ! ${LOAD_MODULES} && ! lsmod | grep -q lnet_selftest; then + echo "lnet-selftest module is not loaded on local host." + echo "Please ensure lnet-selftest module is loaded on the local host and all test nodes." + exit 1 +fi + +ALL_HOSTS="" +if ${HOST_MODE}; then + which pdsh &>/dev/null || { echo "Need pdsh for host mode"; exit; } + which ssh &>/dev/null || { echo "Need ssh for host mode"; exit; } + + ALL_HOSTS="${SERVERS} ${CLIENTS}" + ALL_HOSTS=${ALL_HOSTS## } + ALL_HOSTS=${ALL_HOSTS%% } + ALL_HOSTS="${ALL_HOSTS// /,}" + + if ${LOAD_MODULES}; then + echo "Loading lnet-selftest on test nodes" + $PDSH "${ALL_HOSTS}" \ + "if ! lsmod | grep -q lnet_selftest; then + modprobe lnet-selftest 2>&1 + else + true + fi" | dshbak -c + rc=${PIPESTATUS[0]} + if [[ $rc -ne 0 ]]; then + echo "Failed to load lnet-selftest module on test nodes" + exit "$rc" + fi + + if ! lsmod | grep -q lnet_selftest; then + modprobe lnet-selftest + rc=$? + if [[ $rc -ne 0 ]]; then + echo "Failed to load lnet-selftest on local host" + exit $rc + fi + fi + fi + + idx=0 + opts=( -o NumberOfPasswordPrompts=0 -o ConnectTimeout=5 ) + for host in ${SERVERS//,/ }; do + s_nids[idx]=$(ssh "${opts[@]}" "$host" 'lctl list_nids | head -n 1') + if [[ -z ${s_nids[idx]} ]]; then + echo "Failed to determine primary NID of $host" + exit + fi + idx=$((idx + 1)) + done + + idx=0 + for host in ${CLIENTS//,/ }; do + c_nids[idx]=$(ssh "${opts[@]}" "${host}" 'lctl list_nids | head -n 1') + if [[ -z ${c_nids[idx]} ]]; then + echo "Failed to determine primary NID of $host" + exit + fi + idx=$((idx + 1)) + done + + SERVER_NIDS=( "${s_nids[@]}" ) + CLIENT_NIDS=( "${c_nids[@]}" ) +else + IFS=" " read -r -a SERVER_NIDS <<< "${SERVERS}" + IFS=" " read -r -a CLIENT_NIDS <<< "${CLIENTS}" +fi + +if ! grep -q '\[' <<<"${SERVER_NIDS[@]}" && which lnetctl &>/dev/null; then + echo "Discover server NIDs" + lnetctl discover "${SERVER_NIDS[@]}" 1>/dev/null + rc=$? + if [[ $rc -ne 0 ]]; then + echo "Failed to discover all server NIDs" + exit $rc + fi +fi + +if ! grep -q '\[' <<<"${CLIENT_NIDS[@]}" && which lnetctl &>/dev/null; then + echo "Discover client NIDs" + lnetctl discover "${CLIENT_NIDS[@]}" 1>/dev/null + rc=$? + if [[ $rc -ne 0 ]]; then + echo "Failed to discover all client NIDs" + exit $rc + fi +fi + +[[ -n $ALL_HOSTS ]] && + $PDSH "$ALL_HOSTS" "lctl mark \"Start LST $MODE\"" + +lctl mark "Start LST $MODE" +echo "Start LST $MODE - $(date)" + +trap 'exit_handler' EXIT + +export LST_SESSION=$$ +echo "LST_SESSION=$LST_SESSION" +lst new_session lnet_session || { echo "new_session failed $?"; exit; } +LST_SESSION_CREATED=true + +echo "Adding clients: ${CLIENT_NIDS[*]}" +lst add_group clients "${CLIENT_NIDS[@]}" || exit +echo "Adding servers: ${SERVER_NIDS[*]}" +lst add_group servers "${SERVER_NIDS[@]}" || exit + +if [[ -z ${BATCH_NAME} ]]; then + BATCH_NAME="brw_${MODE}" +fi +lst add_batch "${BATCH_NAME}" || exit + +test_opts+=( --batch "${BATCH_NAME}" --concurrency "${CONCURRENCY}" ) +test_opts+=( --from clients --to servers --distribute "${DISTRIBUTION}" ) +[[ -n ${LOOPS} ]] && + test_opts+=( --loop "${LOOPS}" ) + +if [[ $MODE == ping ]]; then + test_opts+=( ping ) +elif [[ $MODE == rw ]]; then + read_opts=( "${test_opts[@]}" brw read size="$IOSIZE" ) + write_opts=( "${test_opts[@]}" brw write size="$IOSIZE" ) + if [[ -n $CHECK ]]; then + read_opts+=( check="$CHECK" ) + write_opts+=( check="$CHECK" ) + fi + if [[ -n $BRW_OFFSET ]]; then + read_opts+=( off="$BRW_OFFSET" ) + write_opts+=( off="$BRW_OFFSET" ) + fi +else + test_opts+=( brw "${MODE}" ) + [[ -n $BRW_OFFSET ]] && + test_opts+=( off="$BRW_OFFSET" ) + [[ -n $CHECK ]] && + test_opts+=( check="$CHECK" ) + test_opts+=( size="$IOSIZE" ) +fi + +stat_opts=( --count "${COUNT}" --delay "${DELAY}" ) +if [[ -n $STAT_OPTS ]]; then + if ${STAT_OPT_RATE}; then + stat_opts+=( --rate ) + fi + if ${STAT_OPT_BW}; then + stat_opts+=( --bw ) + fi +elif [[ $MODE == ping ]]; then + stat_opts+=( --rate ) +else + stat_opts+=( --bw "${BW_UNITS}" ) +fi + +for g in ${STAT_GROUP}; do + stat_opts+=( "${g}" ) +done + +if [[ $MODE == rw ]]; then + echo "Test: ${read_opts[*]}" + echo "Test: ${write_opts[*]}" + echo "Stat: ${stat_opts[*]}" + lst add_test "${read_opts[@]}" || exit + lst add_test "${write_opts[@]}" || exit +else + echo "Test: ${test_opts[*]}" + echo "Stat: ${stat_opts[*]}" + lst add_test "${test_opts[@]}" || exit +fi + +lst run "${BATCH_NAME}" || exit + +LST_BATCH_STARTED=true + +lst stat "${stat_opts[@]}" + +if ${SHOW_ERRORS}; then + lst show_error --session servers clients +fi + +exit diff --git a/lustre.spec.in b/lustre.spec.in index 570ae22..463ce1b 100644 --- a/lustre.spec.in +++ b/lustre.spec.in @@ -440,6 +440,9 @@ This survey tests the local metadata performance using the echo_client to drive the MDD layer to perform operations. It is run with multiple threads (to simulate MDT service threads) locally on the MDS node, and does not need Lustre clients in order to run + +lst-survey: +This survey tests LNet performance between a group of clients and servers. %endif %if 0%{?suse_version} @@ -848,12 +851,15 @@ echo '%{_libdir}/lustre/tests/lutf/*' >>lustre-tests.files %{_bindir}/obdfilter-survey %{_bindir}/ost-survey %{_bindir}/sgpdd-survey +%{_bindir}/lst-survey +%{_bindir}/lst.sh %doc lustre-iokit/ior-survey/README.ior-survey %doc lustre-iokit/mds-survey/README.mds-survey %doc lustre-iokit/obdfilter-survey/README.obdfilter-survey %doc lustre-iokit/ost-survey/README.ost-survey %doc lustre-iokit/sgpdd-survey/README.sgpdd-survey %doc lustre-iokit/stats-collect/README.iokit-lstats +%doc lustre-iokit/lst-survey/README.lst-survey %endif %post -- 1.8.3.1