From: Andreas Dilger Date: Fri, 18 Jun 2021 20:55:51 +0000 (-0600) Subject: LU-14773 tests: skip check_network() on working node X-Git-Tag: 2.14.54~10 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=67752f6db2;hp=024f9303bc6f32a3113357c864765c4f9c93ed03;p=fs%2Flustre-release.git LU-14773 tests: skip check_network() on working node Don't call check_network() (which can take several seconds per node) if the get_param command ran successfully on all of the nodes. The get_param success implies the connection to the remote nodes works properly, and completes more quickly. For consistency with previous behavior, still call check_network() if get_param didn't return any output, since the modules may be unloaded. Remove some extra visual clutter from every subtest. Test-Parameters: trivial Signed-off-by: Andreas Dilger Change-Id: I6a11cf8a1a6b43bebc3ff8f5506e1faac13ebbe5 Reviewed-on: https://review.whamcloud.com/44033 Tested-by: jenkins Reviewed-by: James Nunez Tested-by: Maloo Reviewed-by: Elena Gryaznova Reviewed-by: Oleg Drokin --- diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 1dea3f5..1cbb4b5 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -6470,10 +6470,10 @@ check_mds() { } reset_fail_loc () { - echo -n "Resetting fail_loc on all nodes..." - do_nodes $(comma_list $(nodes_list)) "lctl set_param -n fail_loc=0 \ - fail_val=0 2>/dev/null" || true - echo done. + #echo -n "Resetting fail_loc on all nodes..." + do_nodes $(comma_list $(nodes_list)) \ + "lctl set_param -n fail_loc=0 fail_val=0 2>/dev/null" || true + #echo done. } @@ -6482,7 +6482,8 @@ reset_fail_loc () { # Also appends a timestamp and prepends the testsuite name. # -EQUALS="====================================================================================================" +# ======================================================== 15:06:12 (1624050372) +EQUALS="========================================================" banner() { msg="== ${TESTSUITE} $*" last=${msg: -1:1} @@ -7264,15 +7265,18 @@ restore_lustre_params() { check_node_health() { local nodes=${1:-$(comma_list $(nodes_list))} - - for node in ${nodes//,/ }; do - check_network "$node" 5 - if [ $? -eq 0 ]; then - do_node $node "$LCTL get_param catastrophe 2>&1" | - grep -q "catastrophe=1" && - error "$node:LBUG/LASSERT detected" || true - fi - done + local health=$TMP/node_health.$$ + + do_nodes $nodes "$LCTL get_param catastrophe 2>&1" | tee $health | + grep "catastrophe=1" && error "LBUG/LASSERT detected" + # Only check/report network health if get_param isn't reported, since + # *clearly* the network is working if get_param returned something. + if (( $(grep -c catastro $health) != $(wc -w <<< ${nodes//,/ }) )); then + for node in ${nodes//,/}; do + check_network $node 5 + done + fi + rm -f $health } mdsrate_cleanup () {