6 ${0##*/} -f "nid1[ nid2...]" -t "nidA[ nidB...]" -m read|write|rw|ping [options]
8 ${0##*/} -H -f "host1[ host2...]" -t "hostA[ hostB...]" -m read|write|rw|ping [options]
12 Creates a batch test called <batch_name> rather than using the
15 The number of requests that are active at one time.
17 A data validation check (checksum of data). The default is that no
19 -d <source_count:sink_count>
20 Determines the ratio of client nodes to server nodes for the
21 specified test. This allows you to specify a wide range of
22 topologies, including one-to-one and all-to-all. Distribution divides
23 the source group into subsets, which are paired with equivalent
24 subsets from the target group so only nodes in matching subsets
27 The interval of the statistics (in seconds). Default is 15.
29 Lists the number of failed RPCs on test nodes in the current session.
33 Run in "host mode". Host mode indicates that the arguments to '-t'
34 and '-f' flags are hostnames rather than LNet nids. This script will
35 attempt to ssh to each node to ensure the lnet-selftest module is
36 loaded, and to determine the appropriate LNet NIDs to give to LST.
38 Space-separated list of LNet NIDs to place in the "clients" group.
39 When '-H' flag is specified, the '-f' argument is a space-separated
41 PDSH-style expressions are supported for NID arguments, but not for
44 Report stats only from the specified group. Either 'clients' or
47 The number of test loops. Default is -1 (infinite).
49 Load lnet-selftest module on local and remote hosts. The module will
50 be unloaded at the end of the test execution. Requires running in
53 Type of test to run. 'rw' specifies to run simultaneous read and
56 Report bandwidth stats in MiB/s (default is MB/s).
58 The number of stat RPCs to issue. Default is 1.
60 Add off=<offset> to brw tests.
62 I/O size in bytes, kilobytes, or Megabytes (i.e., -s 1024, -s 4K,
63 -s 1M). The default is 1 Megabyte.
64 -S <rate|bw|"rate bw">
65 By default, only bandwidth stats are displayed for read and write
66 and only RPC rate stats are shown for ping tests. The '-S' flag can
67 be used to override the stat output.
69 Show only RPC rate stats:
71 Show only bandwidth stats:
73 Show both bandwidth and RPC rate stats:
74 # lst.sh -S "rate bw" ...
76 # lst.sh -S "bw rate" ...
78 Space-separated list of LNet NIDs to place in the "servers" group.
79 When '-H' flag is specified, the '-t' argument is a space-separated
81 PDSH-style expressions are supported for NID arguments, but not for
90 if ${LST_BATCH_STARTED}; then
91 $LCTL mark "lst stop ${BATCH_NAME}"
93 [[ -n ${ALL_HOSTS} ]] &&
94 $PDSH "${ALL_HOSTS}" "$LCTL mark 'lst stop ${BATCH_NAME}'"
96 lst stop "${BATCH_NAME}" || rc=$?
97 LST_BATCH_STARTED=false
100 if ${LST_SESSION_CREATED}; then
101 $LCTL mark "Stop LST $MODE"
102 echo "Stop LST $MODE - $(date)"
104 [[ -n ${ALL_HOSTS} ]] &&
105 $PDSH "${ALL_HOSTS}" "$LCTL mark 'Stop LST $MODE'"
107 lst end_session || rc=$((rc + $?))
108 LST_SESSION_CREATED=false
119 stop_lst || rc=$((rc + $?))
121 if ${LOAD_MODULES}; then
122 echo "Attempting to 'modprobe -r lnet-selftest' on all hosts (30 second timeout)..."
123 $PDSH "${ALL_HOSTS}" -u 30 \
124 "if lsmod | grep -q lnet_selftest; then
125 modprobe -r lnet-selftest
129 rc=$((rc + PIPESTATUS[0]))
130 if lsmod | grep -q lnet_selftest; then
131 timeout 30 modprobe -r lnet-selftest
139 LST_SESSION_CREATED=false # Whether 'lst new_session' was executed
140 LST_BATCH_STARTED=false # Whether 'lst run <batch>' was executed
142 PDSH="pdsh -S -Rssh -w"
163 while getopts "b:C:c:d:D:ef:g:hHl:Lm:Mn:o:s:S:t:" flag ; do
165 b) BATCH_NAME="$OPTARG";;
166 c) CONCURRENCY="$OPTARG";;
168 d) DISTRIBUTION="$OPTARG";;
170 e) SHOW_ERRORS=true;;
173 f) CLIENTS="$OPTARG";;
174 g) STAT_GROUP="$OPTARG";;
176 L) LOAD_MODULES=true;;
180 o) BRW_OFFSET="$OPTARG";;
181 s) IOSIZE="$OPTARG";;
182 S) STAT_OPTS="$OPTARG";;
183 t) SERVERS="$OPTARG";;
184 *) echo "Unrecognized option '-$flag'"
189 # find where 'lctl' binary is installed on this system
190 if [[ -x "$LCTL" ]]; then # full pathname specified
191 : # echo "LCTL=$LCTL"
192 elif [[ -n "$LUSTRE" && -x "$LUSTRE/utils/lctl" ]]; then
193 LCTL=$LUSTRE/utils/lctl
194 else # hope that it is in the PATH
197 #echo "using LCTL='$LCTL' lustre_root='$lustre_root' LUSTRE='$LUSTRE'"
198 [[ -n "$(which $LCTL)" ]] || { echo "error: lctl not found"; exit 99; }
200 if [[ -z $CLIENTS ]]; then
201 echo "Must specify \"clients\" group (-f)"
203 elif [[ -z $SERVERS ]]; then
204 echo "Must specify \"servers\" group (-t)"
206 elif [[ -z $MODE ]]; then
207 echo "Must specify a mode (-m <read|write|rw|ping>)"
209 elif ! [[ $MODE =~ read|write|rw|ping ]]; then
210 echo "Invalid mode - \"$MODE\". (-m <read|write|rw|ping>)"
212 elif [[ -z $(which lst 2>/dev/null) ]]; then
213 echo "Cannot find lst executable in PATH."
215 elif ${LOAD_MODULES} && ! ${HOST_MODE}; then
216 echo "Module loading ('-L') is only available in host mode ('-H')"
220 for stat_opt in ${STAT_OPTS}; do
221 if [[ $stat_opt == rate ]]; then
223 elif [[ $stat_opt == bw ]]; then
226 echo "Invalid stat option \"-S $stat_opt\""
231 if [[ -z $STAT_GROUP ]]; then
232 STAT_GROUP="clients servers"
233 elif ! [[ $STAT_GROUP =~ clients|servers ]]; then
234 echo "Stat group must be either \"clients\" or \"servers\". Found \"$STAT_GROUP\""
238 if [[ -n ${LOOPS} && ${LOOPS} -eq 0 ]]; then
239 echo "Loops must be -1 or > 0. Found \"${LOOPS}\""
243 if ! ${LOAD_MODULES} && ! lsmod | grep -q lnet_selftest; then
244 echo "lnet-selftest module is not loaded on local host."
245 echo "Please ensure lnet-selftest module is loaded on the local host and all test nodes."
250 if ${HOST_MODE}; then
251 which pdsh &>/dev/null || { echo "Need pdsh for host mode"; exit; }
252 which ssh &>/dev/null || { echo "Need ssh for host mode"; exit; }
254 ALL_HOSTS="${SERVERS} ${CLIENTS}"
255 ALL_HOSTS=${ALL_HOSTS## }
256 ALL_HOSTS=${ALL_HOSTS%% }
257 ALL_HOSTS="${ALL_HOSTS// /,}"
259 if ${LOAD_MODULES}; then
260 echo "Loading lnet-selftest on test nodes"
261 $PDSH "${ALL_HOSTS}" \
262 "if ! lsmod | grep -q lnet_selftest; then
263 modprobe lnet-selftest 2>&1
268 if [[ $rc -ne 0 ]]; then
269 echo "Failed to load lnet-selftest module on test nodes"
273 if ! lsmod | grep -q lnet_selftest; then
274 modprobe lnet-selftest
276 if [[ $rc -ne 0 ]]; then
277 echo "Failed to load lnet-selftest on local host"
284 opts=( -o NumberOfPasswordPrompts=0 -o ConnectTimeout=5 )
285 for host in ${SERVERS//,/ }; do
286 s_nids[idx]=$(ssh "${opts[@]}" "$host" '$LCTL list_nids | head -n 1')
287 if [[ -z ${s_nids[idx]} ]]; then
288 echo "Failed to determine primary NID of $host"
295 for host in ${CLIENTS//,/ }; do
296 c_nids[idx]=$(ssh "${opts[@]}" "${host}" '$LCTL list_nids | head -n 1')
297 if [[ -z ${c_nids[idx]} ]]; then
298 echo "Failed to determine primary NID of $host"
304 SERVER_NIDS=( "${s_nids[@]}" )
305 CLIENT_NIDS=( "${c_nids[@]}" )
307 IFS=" " read -r -a SERVER_NIDS <<< "${SERVERS}"
308 IFS=" " read -r -a CLIENT_NIDS <<< "${CLIENTS}"
311 if ! grep -q '\[' <<<"${SERVER_NIDS[@]}" && which lnetctl &>/dev/null; then
312 echo "Discover server NIDs"
313 lnetctl discover "${SERVER_NIDS[@]}" 1>/dev/null
315 if [[ $rc -ne 0 ]]; then
316 echo "Failed to discover all server NIDs"
321 if ! grep -q '\[' <<<"${CLIENT_NIDS[@]}" && which lnetctl &>/dev/null; then
322 echo "Discover client NIDs"
323 lnetctl discover "${CLIENT_NIDS[@]}" 1>/dev/null
325 if [[ $rc -ne 0 ]]; then
326 echo "Failed to discover all client NIDs"
331 [[ -n $ALL_HOSTS ]] &&
332 $PDSH "$ALL_HOSTS" "$LCTL mark 'Start LST $MODE'"
334 $LCTL mark "Start LST $MODE"
335 echo "Start LST $MODE - $(date)"
337 trap 'exit_handler' EXIT
339 export LST_SESSION=$$
340 echo "LST_SESSION=$LST_SESSION"
341 lst new_session lnet_session || { echo "new_session failed $?"; exit; }
342 LST_SESSION_CREATED=true
344 echo "Adding clients: ${CLIENT_NIDS[*]}"
345 lst add_group clients "${CLIENT_NIDS[@]}" || exit
346 echo "Adding servers: ${SERVER_NIDS[*]}"
347 lst add_group servers "${SERVER_NIDS[@]}" || exit
349 if [[ -z ${BATCH_NAME} ]]; then
350 BATCH_NAME="brw_${MODE}"
352 lst add_batch "${BATCH_NAME}" || exit
354 test_opts+=( --batch "${BATCH_NAME}" --concurrency "${CONCURRENCY}" )
355 test_opts+=( --from clients --to servers --distribute "${DISTRIBUTION}" )
357 test_opts+=( --loop "${LOOPS}" )
359 if [[ $MODE == ping ]]; then
361 elif [[ $MODE == rw ]]; then
362 read_opts=( "${test_opts[@]}" brw read size="$IOSIZE" )
363 write_opts=( "${test_opts[@]}" brw write size="$IOSIZE" )
364 if [[ -n $CHECK ]]; then
365 read_opts+=( check="$CHECK" )
366 write_opts+=( check="$CHECK" )
368 if [[ -n $BRW_OFFSET ]]; then
369 read_opts+=( off="$BRW_OFFSET" )
370 write_opts+=( off="$BRW_OFFSET" )
373 test_opts+=( brw "${MODE}" )
374 [[ -n $BRW_OFFSET ]] &&
375 test_opts+=( off="$BRW_OFFSET" )
377 test_opts+=( check="$CHECK" )
378 test_opts+=( size="$IOSIZE" )
381 stat_opts=( --count "${COUNT}" --delay "${DELAY}" )
382 if [[ -n $STAT_OPTS ]]; then
383 if ${STAT_OPT_RATE}; then
384 stat_opts+=( --rate )
386 if ${STAT_OPT_BW}; then
389 elif [[ $MODE == ping ]]; then
390 stat_opts+=( --rate )
392 stat_opts+=( --bw "${BW_UNITS}" )
395 for g in ${STAT_GROUP}; do
396 stat_opts+=( "${g}" )
399 if [[ $MODE == rw ]]; then
400 echo "Test: ${read_opts[*]}"
401 echo "Test: ${write_opts[*]}"
402 echo "Stat: ${stat_opts[*]}"
403 lst add_test "${read_opts[@]}" || exit
404 lst add_test "${write_opts[@]}" || exit
406 echo "Test: ${test_opts[*]}"
407 echo "Stat: ${stat_opts[*]}"
408 lst add_test "${test_opts[@]}" || exit
411 lst run "${BATCH_NAME}" || exit
413 LST_BATCH_STARTED=true
415 lst stat "${stat_opts[@]}"
417 if ${SHOW_ERRORS}; then
418 lst show_error --session servers clients