Whamcloud - gitweb
LU-16563 tests: Check peer NI health after link down 88/50188/7
authorChris Horn <chris.horn@hpe.com>
Thu, 2 Mar 2023 17:44:44 +0000 (11:44 -0600)
committerOleg Drokin <green@whamcloud.com>
Tue, 11 Apr 2023 20:09:23 +0000 (20:09 +0000)
Validate whether peer NI health is decremented when a link is downed.

Test-Parameters: trivial testlist=sanity-lnet env=ONLY=303
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Change-Id: I5aa4a2f6efa2c0be2d7eb166b21ffab268e39941
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50188
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/tests/sanity-lnet.sh

index 2666f7e..565668f 100755 (executable)
@@ -3444,6 +3444,48 @@ test_302() {
 }
 run_test 302 "Check that peer debug info can be dumped"
 
+test_303() {
+       [[ ${NETTYPE} == tcp* ]] || skip "Need tcp NETTYPE"
+
+       setup_health_test true || return $?
+
+       cleanup_netns || error "Failed to cleanup netns before test execution"
+       setup_fakeif || error "Failed to add fake IF"
+       have_interface "$FAKE_IF" ||
+               error "Expect $FAKE_IF configured but not found"
+
+       add_net "${NETTYPE}99" "$FAKE_IF" || return $?
+
+       local nid=$($LCTL list_nids | tail --lines 1)
+
+       # Our updated config should be pushed to RNODE
+       local found=$(do_node $RNODE "$LNETCTL peer show --nid $nid")
+
+       [[ -z $found ]] && error "Peer not updated on $RNODE"
+
+       local prim=$($LCTL list_nids | head -n 1)
+
+       if ! grep -q -- "- primary nid: $prim"<<<"${found}"; then
+               echo "$found"
+               error "Wrong primary nid"
+       fi
+
+       echo "Set $FAKE_IF down"
+       echo "ip link set dev $FAKE_IF down"
+       ip link set dev $FAKE_IF down
+       check_ni_status "$nid" down
+
+       local hval=$(do_node $RNODE "$LNETCTL peer show --nid $nid -v 2 | \
+                                    grep -e '- nid:' -e 'health value:'")
+
+       hval=$(grep -A 1 $nid<<<"$hval" | tail -n 1 | awk '{print $NF}')
+       (( hval < 1000 )) ||
+               error "Expect $hval < 1000"
+
+       return 0
+}
+run_test 303 "Check peer NI health after link down"
+
 check_udsp_prio() {
        local target_net="${1}"
        local target_nid="${2}"