From b89563c08f5b4adbd470d7e86bae219e5b3c9989 Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Mon, 26 Sep 2022 09:19:19 -0600 Subject: [PATCH] LU-15791 tests: Drop local traffic during health test Existing drop rules for health tests omit local nids for the destination so it is possible for local NI health values to recover while the tests execute. Add drop rules for local NIDs to prevent their health from recovering. Lustre-change: https://review.whamcloud.com/48661 Lustre-commit: 43344697dccdcdb0f4b8dba4899be9571d640131 Test-Parameters: trivial Test-Parameters: testlist=sanity-lnet env=ONLY=205,ONLY_REPEAT=100 Signed-off-by: Chris Horn Change-Id: I6a4a06b3fa76effd21e21449abf47cd0e14bbf18 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/51916 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lustre/tests/sanity-lnet.sh | 99 ++++++++++++++------------------------------- 1 file changed, 30 insertions(+), 69 deletions(-) diff --git a/lustre/tests/sanity-lnet.sh b/lustre/tests/sanity-lnet.sh index f6d6583..9ad1ca5 100755 --- a/lustre/tests/sanity-lnet.sh +++ b/lustre/tests/sanity-lnet.sh @@ -1546,16 +1546,35 @@ cleanup_health_test() { } add_health_test_drop_rules() { - local hstatus=$1 - local lnid rnid + local args="-m GET -r 1 -e ${1}" + local src dst - for lnid in ${LNIDS[@]}; do - for rnid in ${RNIDS[@]}; do - $LCTL net_drop_add -s $lnid -d $rnid -m GET -r 1 -e ${hstatus} + for src in "${LNIDS[@]}"; do + for dst in "${RNIDS[@]}" "${LNIDS[@]}"; do + $LCTL net_drop_add -s $src -d $dst ${args} || + error "Failed to add drop rule $src $dst $args" done done } +do_lnet_health_ping_test() { + local hstatus="$1" + + echo "Simulate $hstatus" + + lnet_health_pre || return $? + + add_health_test_drop_rules ${hstatus} + do_lnetctl ping ${RNIDS[0]} && + error "Should have failed" + + lnet_health_post + + $LCTL net_drop_del -a + + return 0 +} + # See lnet/lnet/lib-msg.c:lnet_health_check() LNET_LOCAL_RESEND_STATUSES="local_interrupt local_dropped local_aborted" LNET_LOCAL_RESEND_STATUSES+=" local_no_route local_timeout" @@ -1566,16 +1585,7 @@ test_204() { local hstatus for hstatus in ${LNET_LOCAL_RESEND_STATUSES} \ ${LNET_LOCAL_NO_RESEND_STATUSES}; do - echo "Simulate $hstatus" - lnet_health_pre || return $? - - add_health_test_drop_rules ${hstatus} - do_lnetctl discover ${RNIDS[0]} && - error "Should have failed" - $LCTL net_drop_del -a - - lnet_health_post - + do_lnet_health_ping_test "${hstatus}" || return $? check_no_resends || return $? check_no_local_health || return $? done @@ -1591,31 +1601,13 @@ test_205() { local hstatus for hstatus in ${LNET_LOCAL_RESEND_STATUSES}; do - echo "Simulate $hstatus" - lnet_health_pre || return $? - - add_health_test_drop_rules ${hstatus} - do_lnetctl discover ${RNIDS[0]} && - error "Should have failed" - $LCTL net_drop_del * - - lnet_health_post - + do_lnet_health_ping_test "${hstatus}" || return $? check_resends || return $? check_local_health || return $? done for hstatus in ${LNET_LOCAL_NO_RESEND_STATUSES}; do - echo "Simulate $hstatus" - lnet_health_pre || return $? - - add_health_test_drop_rules ${hstatus} - do_lnetctl discover ${RNIDS[0]} && - error "Should have failed" - $LCTL net_drop_del * - - lnet_health_post - + do_lnet_health_ping_test "${hstatus}" || return $? check_no_resends || return $? check_local_health || return $? done @@ -1635,16 +1627,7 @@ test_206() { local hstatus for hstatus in ${LNET_REMOTE_RESEND_STATUSES} \ ${LNET_REMOTE_NO_RESEND_STATUSES}; do - echo "Simulate $hstatus" - lnet_health_pre || return $? - - add_health_test_drop_rules ${hstatus} - do_lnetctl discover ${RNIDS[0]} && - error "Should have failed" - $LCTL net_drop_del -a - - lnet_health_post - + do_lnet_health_ping_test "${hstatus}" || return $? check_no_resends || return $? check_no_local_health || return $? check_no_remote_health || return $? @@ -1661,18 +1644,7 @@ test_207() { local hstatus for hstatus in ${LNET_REMOTE_RESEND_STATUSES}; do - echo "Simulate $hstatus" - lnet_health_pre || return $? - - add_health_test_drop_rules ${hstatus} - - do_lnetctl discover ${RNIDS[0]} && - error "Should have failed" - - lnet_health_post - - $LCTL net_drop_del -a - + do_lnet_health_ping_test "${hstatus}" || return $? check_resends || return $? check_no_local_health || return $? check_remote_health || return $? @@ -1680,18 +1652,7 @@ test_207() { error "Unable to reset health rc=$?" done for hstatus in ${LNET_REMOTE_NO_RESEND_STATUSES}; do - echo "Simulate $hstatus" - lnet_health_pre || return $? - - add_health_test_drop_rules ${hstatus} - - do_lnetctl discover ${RNIDS[0]} && - error "Should have failed" - - lnet_health_post - - $LCTL net_drop_del -a - + do_lnet_health_ping_test "${hstatus}" || return $? check_no_resends || return $? check_no_local_health || return $? check_remote_health || return $? -- 1.8.3.1