X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftests%2Fsanity-lnet.sh;h=7c47dc89254f7af1e6bb079e437571380a052bf3;hb=b90c0100dd93b56f3bfaee037b3bdd077523f43e;hp=64d18166593772c306b8e7c56ac04d98d7e373a6;hpb=8ee85e15412d32fbe60f70c474c0a28ff15b8351;p=fs%2Flustre-release.git diff --git a/lustre/tests/sanity-lnet.sh b/lustre/tests/sanity-lnet.sh index 64d1816..7c47dc8 100755 --- a/lustre/tests/sanity-lnet.sh +++ b/lustre/tests/sanity-lnet.sh @@ -1346,6 +1346,8 @@ function lnet_health_post() { restore_lnet_params + $LCTL net_drop_del -a + do_lnetctl peer set --health 1000 --all do_lnetctl net set --health 1000 --all @@ -1549,7 +1551,6 @@ test_204() { add_health_test_drop_rules ${hstatus} do_lnetctl discover ${RNIDS[0]} && error "Should have failed" - $LCTL net_drop_del -a lnet_health_post @@ -1574,7 +1575,6 @@ test_205() { add_health_test_drop_rules ${hstatus} do_lnetctl discover ${RNIDS[0]} && error "Should have failed" - $LCTL net_drop_del -a lnet_health_post @@ -1589,7 +1589,6 @@ test_205() { add_health_test_drop_rules ${hstatus} do_lnetctl discover ${RNIDS[0]} && error "Should have failed" - $LCTL net_drop_del -a lnet_health_post @@ -1618,7 +1617,6 @@ test_206() { add_health_test_drop_rules ${hstatus} do_lnetctl discover ${RNIDS[0]} && error "Should have failed" - $LCTL net_drop_del -a lnet_health_post @@ -1648,8 +1646,6 @@ test_207() { lnet_health_post - $LCTL net_drop_del -a - check_resends || return $? check_no_local_health || return $? check_remote_health || return $? @@ -1667,8 +1663,6 @@ test_207() { lnet_health_post - $LCTL net_drop_del -a - check_no_resends || return $? check_no_local_health || return $? check_remote_health || return $? @@ -1775,7 +1769,6 @@ test_209() { do_lnetctl discover ${RNIDS[0]} && error "Should have failed" - $LCTL net_drop_del -a lnet_health_post @@ -1795,7 +1788,6 @@ test_209() { do_lnetctl discover ${RNIDS[0]} && error "Should have failed" - $LCTL net_drop_del -a lnet_health_post @@ -1847,18 +1839,18 @@ check_nid_in_recovq() { # If the recovery limit is 10 seconds, then when the 5th enqueue happens # we expect the peer NI to have aged out, so it will not actually be # queued. -# If max_recovery_ping_interval is set to 2 then: +# If max_recovery_ping_interval is set to 4 then: # First enqueue happens at time 0. -# 2nd at 0 + 2^0 = 1 -# 3rd at 1 + 2^1 = 3 -# 4th at 3 + 2^1 = 5 -# 5th at 5 + 2^1 = 7 -# 6th at 7 + 2^1 = 9 -# 7th at 9 + 2^1 = 11 -# e.g. after 4 seconds we would expect to have seen the 3th enqueue, +# 2nd at 0 + min(2^0, 4) = 1 +# 3rd at 1 + min(2^1, 4) = 3 +# 4th at 3 + min(2^2, 4) = 7 +# 5th at 7 + min(2^3, 4) = 11 +# 6th at 11 + min(2^4, 4) = 15 +# 7th at 15 + min(2^5, 4) = 19 +# e.g. after 4 seconds we would expect to have seen the 3rd enqueue, # (2 pings sent, 3rd about to happen), and the 4th enqueue is yet to happen -# e.g. after 10 seconds we would expect to have seen the 6th enqueue, -# (5 pings sent, 6th about to happen), and the 8th enqueue is yet to happen +# e.g. after 13 seconds we would expect to have seen the 5th enqueue, +# (4 pings sent, 5th about to happen), and the 6th enqueue is yet to happen check_ping_count() { local queue="$1" local expect="$2" @@ -1947,7 +1939,7 @@ test_210() { default=$($LNETCTL global show | awk '/max_recovery_ping_interval/{print $NF}') - do_lnetctl set max_recovery_ping_interval 2 || + do_lnetctl set max_recovery_ping_interval 4 || error "failed to set max_recovery_ping_interval" $LCTL set_param debug=+net @@ -1962,9 +1954,9 @@ test_210() { check_nid_in_recovq "-l" "1" check_ping_count "ni" "2" - sleep 6 + sleep 9 check_nid_in_recovq "-l" "1" - check_ping_count "ni" "5" + check_ping_count "ni" "4" $LCTL net_drop_del -a @@ -2045,7 +2037,7 @@ test_211() { default=$($LNETCTL global show | awk '/max_recovery_ping_interval/{print $NF}') - do_lnetctl set max_recovery_ping_interval 2 || + do_lnetctl set max_recovery_ping_interval 4 || error "failed to set max_recovery_ping_interval" $LCTL net_drop_add -s *@tcp -d *@tcp -m GET -r 1 -e remote_error @@ -2060,9 +2052,9 @@ test_211() { check_nid_in_recovq "-p" "1" check_ping_count "peer_ni" "2" - sleep 6 + sleep 9 check_nid_in_recovq "-p" "1" - check_ping_count "peer_ni" "5" + check_ping_count "peer_ni" "4" $LCTL net_drop_del -a @@ -2501,8 +2493,9 @@ do_net_add() { local node=$1 local net=$2 local if=$3 + local opts=$4 - do_rpc_nodes $node "$LNETCTL net add --net $net --if $if" || + do_rpc_nodes $node "$LNETCTL net add --net $net --if $if $opts" || error "add $net on interface $if on node $node failed rc=$?" } @@ -2559,7 +2552,8 @@ LNIDS=() LOCAL_NET=${NETTYPE}1 REMOTE_NET=${NETTYPE}2 setup_router_test() { - local mod_opts="$@" + local mod_opts="$1" + local rtr_net_opts="$2" if [[ ${#RPEER_INTERFACES[@]} -eq 0 ]]; then init_router_test_vars || @@ -2579,7 +2573,7 @@ setup_router_test() { do_nodes $all_nodes "$LNETCTL lnet configure" || error "Failed to initialize DLC" - do_net_add $ROUTER $LOCAL_NET ${ROUTER_INTERFACES[0]} || + do_net_add $ROUTER $LOCAL_NET ${ROUTER_INTERFACES[0]} $rtr_net_opts || return $? do_net_add $ROUTER $REMOTE_NET ${ROUTER_INTERFACES[0]} || @@ -2830,7 +2824,7 @@ run_test 222 "Check avoid_asym_router_failure=1" test_223() { local opts="avoid_asym_router_failure=1 lnet_peer_discovery_disabled=1" - setup_router_test $opts || return $? + setup_router_test "$opts" || return $? do_aarf_enabled_test || return $? @@ -2899,7 +2893,7 @@ run_test 224 "Check avoid_asym_router_failure=0" test_225() { local opts="avoid_asym_router_failure=0 lnet_peer_discovery_disabled=1" - setup_router_test $opts || return $? + setup_router_test "$opts" || return $? do_aarf_disabled_test || return $? @@ -2908,6 +2902,86 @@ test_225() { } run_test 225 "Check avoid_asym_router_failure=0 w/DD disabled" +do_rtr_peer_health_test() { + local expected="$1" + + do_node $ROUTER "$LNETCTL set routing 1" || + error "Unable to enable routing on $ROUTER" + + do_route_add $HOSTNAME $REMOTE_NET ${ROUTER_NIDS[0]} || + return $? + + do_route_add $RPEER $LOCAL_NET ${ROUTER_NIDS[1]} || + return $? + + check_router_ni_status "up" "up" || + return $? + + check_route_aliveness "$HOSTNAME" "up" || + return $? + + check_route_aliveness "$RPEER" "up" || + return $? + + do_lnetctl ping ${RPEER_NIDS[0]} || + error "Failed to ping ${RPEER_NIDS[0]}" + + do_node $RPEER "$LNETCTL ping ${LNIDS[0]}" || + error "$RPEER failed to ping ${LNIDS[0]}" + + # Stop LNet on local host + do_lnetctl lnet unconfigure || + error "Failed to stop LNet rc=$?" + + check_router_ni_status "down" "up" || + return $? + + check_route_aliveness "$RPEER" "up" || + return $? + + # The NI used to send the message to the destination will be the + # router's NI on LOCAL_NET, so that's the drop count that will be + # incremented + local d1=$(do_node $ROUTER $LNETCTL net show -v --net $LOCAL_NET | \ + awk '/drop_count:/{print $NF}') + + # Ping from RPEER to local host should be dropped by the router + do_node $RPEER "$LCTL ping ${LNIDS[0]}" && + error "$RPEER expected ping to fail" + + local d2=$(do_node $ROUTER $LNETCTL net show -v --net $LOCAL_NET | \ + awk '/drop_count:/{print $NF}') + + [[ $((d2 - d1)) -ne $expected ]] && + error "Expected drop count change by $expected: $d1 -> $d2" + + return 0 +} + +test_226() { + setup_router_test avoid_asym_router_failure=0 --peer-timeout=10 || + return $? + + do_rtr_peer_health_test 1 || + return $? + + cleanup_router_test || + return $? +} +run_test 226 "Check router peer health enabled" + +test_227() { + setup_router_test avoid_asym_router_failure=0 --peer-timeout=0 || + return $? + + do_rtr_peer_health_test 0 || + return $? + + cleanup_router_test || + return $? +} +run_test 227 "Check router peer health disabled" + test_230() { # LU-12815 echo "Check valid values; Should succeed" @@ -2958,6 +3032,52 @@ test_230() { } run_test 230 "Test setting conns-per-peer" +test_231() { + reinit_dlc || return $? + + do_lnetctl net add --net tcp --if ${INTERFACES[0]} || + error "Failed to add net" + + $LNETCTL export --backup > $TMP/sanity-lnet-$testnum-expected.yaml + sed -i 's/peer_timeout: .*$/peer_timeout: 0/' \ + $TMP/sanity-lnet-$testnum-expected.yaml + + reinit_dlc || return $? + + do_lnetctl import $TMP/sanity-lnet-$testnum-expected.yaml || + error "Failed to import configuration" + + $LNETCTL export --backup > $TMP/sanity-lnet-$testnum-actual.yaml + + compare_yaml_files || error "Wrong config after import" + + do_lnetctl net del --net tcp --if ${INTERFACES[0]} || + error "Failed to delete net tcp" + + do_lnetctl net add --net tcp --if ${INTERFACES[0]} --peer-timeout=0 || + error "Failed to add net with peer-timeout=0" + + $LNETCTL export --backup > $TMP/sanity-lnet-$testnum-actual.yaml + + compare_yaml_files || error "Wrong config after lnetctl net add" + + reinit_dlc || return $? + + # lnet/include/lnet/lib-lnet.h defines DEFAULT_PEER_TIMEOUT 180 + sed -i 's/peer_timeout: .*$/peer_timeout: 180/' \ + $TMP/sanity-lnet-$testnum-expected.yaml + + sed -i '/^.*peer_timeout:.*$/d' $TMP/sanity-lnet-$testnum-actual.yaml + + do_lnetctl import $TMP/sanity-lnet-$testnum-actual.yaml || + error "Failed to import config without peer_timeout" + + $LNETCTL export --backup > $TMP/sanity-lnet-$testnum-actual.yaml + + compare_yaml_files +} +run_test 231 "Check DLC handling of peer_timeout parameter" + ### Test that linux route is added for each ni test_250() { reinit_dlc || return $? @@ -3002,6 +3122,20 @@ test_300() { } run_test 300 "packaged LNet UAPI headers can be compiled" +# LU-16081 lnet: Memory leak on adding existing interface + +test_301() { + reinit_dlc || return $? + do_lnetctl net add --net tcp --if ${INTERFACES[0]} || + error "Failed to add net" + do_lnetctl net add --net tcp --if ${INTERFACES[0]} && + error "add net should have failed" + do_lnetctl net del --net tcp --if ${INTERFACES[0]} || + error "Failed to del net" + unload_modules +} +run_test 301 "Check for dynamic adds of same/wrong interface (memory leak)" + complete $SECONDS cleanup_testsuite