summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
024f930)
Don't call check_network() (which can take several seconds per node)
if the get_param command ran successfully on all of the nodes. The
get_param success implies the connection to the remote nodes works
properly, and completes more quickly.
For consistency with previous behavior, still call check_network() if
get_param didn't return any output, since the modules may be unloaded.
Remove some extra visual clutter from every subtest.
Test-Parameters: trivial
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: I6a11cf8a1a6b43bebc3ff8f5506e1faac13ebbe5
Reviewed-on: https://review.whamcloud.com/44033
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: James Nunez <jnunez@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Elena Gryaznova <elena.gryaznova@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
- echo -n "Resetting fail_loc on all nodes..."
- do_nodes $(comma_list $(nodes_list)) "lctl set_param -n fail_loc=0 \
- fail_val=0 2>/dev/null" || true
- echo done.
+ #echo -n "Resetting fail_loc on all nodes..."
+ do_nodes $(comma_list $(nodes_list)) \
+ "lctl set_param -n fail_loc=0 fail_val=0 2>/dev/null" || true
+ #echo done.
# Also appends a timestamp and prepends the testsuite name.
#
# Also appends a timestamp and prepends the testsuite name.
#
-EQUALS="===================================================================================================="
+# ======================================================== 15:06:12 (1624050372)
+EQUALS="========================================================"
banner() {
msg="== ${TESTSUITE} $*"
last=${msg: -1:1}
banner() {
msg="== ${TESTSUITE} $*"
last=${msg: -1:1}
check_node_health() {
local nodes=${1:-$(comma_list $(nodes_list))}
check_node_health() {
local nodes=${1:-$(comma_list $(nodes_list))}
-
- for node in ${nodes//,/ }; do
- check_network "$node" 5
- if [ $? -eq 0 ]; then
- do_node $node "$LCTL get_param catastrophe 2>&1" |
- grep -q "catastrophe=1" &&
- error "$node:LBUG/LASSERT detected" || true
- fi
- done
+ local health=$TMP/node_health.$$
+
+ do_nodes $nodes "$LCTL get_param catastrophe 2>&1" | tee $health |
+ grep "catastrophe=1" && error "LBUG/LASSERT detected"
+ # Only check/report network health if get_param isn't reported, since
+ # *clearly* the network is working if get_param returned something.
+ if (( $(grep -c catastro $health) != $(wc -w <<< ${nodes//,/ }) )); then
+ for node in ${nodes//,/}; do
+ check_network $node 5
+ done
+ fi
+ rm -f $health