From 18e14c7099065891516b189467c0ff11ec7f470a Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Mon, 3 Oct 2022 12:20:20 -0600 Subject: [PATCH] LU-16216 tests: Update sanity-lnet for other LNDs Modify various sanity-lnet test cases to allow them to execute on other LNDs. Some tests only work or make sense with socklnd, so we add explicit checks for NETTYPE == tcp to these tests. kfilnd doesn't currently support LNet drop rules, so any tests cases that utilize those are skipped for NETTYPE == kfi. Two other fixes are included here: - test_230 doesn't check the correct default value of conns_per_peer in cases where the conns_per_peer parameter is set or when the link speed causes a value other than 1 to be used. - test_250 should be skipped in cases where the skip_mr_route_setup module parameter is > 0. HPE-bug-id: LUS-10852 Test-Parameters: trivial testlist=sanity-lnet Signed-off-by: Chris Horn Change-Id: I60f4c49d44d81b00bea01ff1f65adb6f20674bbf Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/48788 Tested-by: Maloo Tested-by: jenkins Reviewed-by: Serguei Smirnov Reviewed-by: Frank Sehr Reviewed-by: Oleg Drokin --- lustre/tests/sanity-lnet.sh | 207 +++++++++++++++++++++++++++-------------- lustre/tests/test-framework.sh | 2 + 2 files changed, 141 insertions(+), 68 deletions(-) diff --git a/lustre/tests/sanity-lnet.sh b/lustre/tests/sanity-lnet.sh index f42050a..c351531 100755 --- a/lustre/tests/sanity-lnet.sh +++ b/lustre/tests/sanity-lnet.sh @@ -106,7 +106,7 @@ cleanup_netns() { configure_dlc() { echo "Loading LNet and configuring DLC" - load_lnet + load_lnet || return $? do_lnetctl lnet configure } @@ -158,10 +158,10 @@ validate_nid() { local net="${nid//*@/}" local addr="${nid//@*/}" - local num_re='[0-9]\+' + local num_re='[0-9]+' local ip_re="[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}" - if [[ $net =~ gni[0-9]* ]] || [[ $net =~ kfi[0-9]* ]]; then + if [[ $net =~ (gni|kfi)[0-9]* ]]; then [[ $addr =~ ${num_re} ]] && return 0 else [[ $addr =~ ${ip_re} ]] && return 0 @@ -227,8 +227,7 @@ cleanup_lnet || error "Failed to cleanup LNet" stack_trap 'cleanup_testsuite' EXIT test_0() { - load_module ../lnet/lnet/lnet || error "Failed to load module rc = $?" - do_lnetctl lnet configure || error "lnet configure failed rc = $?" + configure_dlc || error "Failed to configure DLC rc = $?" define_global_yaml reinit_dlc || return $? do_lnetctl import < ${GLOBAL_YAML_FILE} || error "Import failed $?" @@ -1023,11 +1022,6 @@ add_net() { local net="$1" local if="$2" - if ! lsmod | grep -q ksocklnd ; then - load_module ../lnet/klnds/socklnd/ksocklnd || - error "Can't load ksocklnd.ko" - fi - do_lnetctl net add --net ${net} --if ${if} || error "Failed to add net ${net} on if ${if}" } @@ -1040,16 +1034,22 @@ compare_route_add() { do_lnetctl route add --net ${rnet} --gateway ${gw} || error "route add failed $?" - # CPT configuration is pruned from the exported yaml, since the default - # can vary across test systems (unlike default values for things like - # peer_credits, peer_timeout, etc.) - $LNETCTL export --backup | grep -v CPT > $actual || + $LNETCTL export --backup > $actual || error "export failed $?" validate_gateway_nids return $? } +append_net_tunables() { + local net=${1:-tcp} + + $LNETCTL net show -v --net ${net} | grep -v 'dev cpt' | + awk '/^\s+tunables:$/,/^\s+CPT:/' >> $TMP/sanity-lnet-$testnum-expected.yaml +} + test_100() { + [[ ${NETTYPE} == tcp* ]] || + skip "Need tcp NETTYPE" reinit_dlc || return $? add_net "tcp" "${INTERFACES[0]}" cat < $TMP/sanity-lnet-$testnum-expected.yaml @@ -1058,13 +1058,9 @@ net: local NI(s): - interfaces: 0: ${INTERFACES[0]} - tunables: - peer_timeout: 180 - peer_credits: 8 - peer_buffer_credits: 0 - credits: 256 - lnd tunables: - conns_per_peer: 1 +EOF + append_net_tunables tcp + cat <> $TMP/sanity-lnet-$testnum-expected.yaml route: - net: tcp7 gateway: 7.7.7.7@tcp @@ -1084,6 +1080,8 @@ EOF run_test 100 "Add route with single gw (tcp)" test_101() { + [[ ${NETTYPE} == tcp* ]] || + skip "Need tcp NETTYPE" reinit_dlc || return $? add_net "tcp" "${INTERFACES[0]}" cat < $TMP/sanity-lnet-$testnum-expected.yaml @@ -1147,26 +1145,51 @@ compare_route_del() { validate_gateway_nids } +generate_nid() { + local net=${1} + local nid=$((${testnum} % 255)) + + if [[ ${net} =~ (tcp|o2ib)[0-9]* ]]; then + echo "${nid}.${nid}.${nid}.${nid}@${net}" + else + echo "${nid}@${net}" + fi +} + test_102() { reinit_dlc || return $? - add_net "tcp" "${INTERFACES[0]}" + add_net "${NETTYPE}" "${INTERFACES[0]}" $LNETCTL export --backup > $TMP/sanity-lnet-$testnum-expected.yaml - do_lnetctl route add --net tcp102 --gateway 102.102.102.102@tcp || + + local gwnid=$(generate_nid ${NETTYPE}) + + do_lnetctl route add --net ${NETTYPE}2 --gateway ${gwnid} || error "route add failed $?" - compare_route_del "tcp102" "102.102.102.102@tcp" + compare_route_del "${NETTYPE}2" "${gwnid}" } -run_test 102 "Delete route with single gw (tcp)" +run_test 102 "Delete route with single gw" +IP_NID_EXPR='103.103.103.[103-120/4]' +NUM_NID_EXPR='[103-120/4]' test_103() { reinit_dlc || return $? - add_net "tcp" "${INTERFACES[0]}" + add_net "${NETTYPE}" "${INTERFACES[0]}" $LNETCTL export --backup > $TMP/sanity-lnet-$testnum-expected.yaml - do_lnetctl route add --net tcp103 \ - --gateway 103.103.103.[103-120/4]@tcp || + + local nid_expr + + if [[ $NETTYPE =~ (tcp|o2ib)[0-9]* ]]; then + nid_expr="${IP_NID_EXPR}" + else + nid_expr="${NUM_NID_EXPR}" + fi + + do_lnetctl route add --net ${NETTYPE}103 \ + --gateway ${nid_expr}@${NETTYPE} || error "route add failed $?" - compare_route_del "tcp103" "103.103.103.[103-120/4]@tcp" + compare_route_del "${NETTYPE}103" "${nid_expr}@${NETTYPE}" } -run_test 103 "Delete route with multiple gw (tcp)" +run_test 103 "Delete route with multiple gw" test_104() { local tyaml="$TMP/sanity-lnet-$testnum-expected.yaml" @@ -1227,10 +1250,13 @@ run_test 104 "Set/check response_tracking param" test_105() { reinit_dlc || return $? - add_net "tcp" "${INTERFACES[0]}" - do_lnetctl route add --net tcp105 --gateway 105.105.105.105@tcp || + add_net "${NETTYPE}" "${INTERFACES[0]}" + + local gwnid=$(generate_nid ${NETTYPE}) + + do_lnetctl route add --net ${NETTYPE}105 --gateway ${gwnid} || error "route add failed $?" - do_lnetctl peer add --prim 105.105.105.105@tcp && + do_lnetctl peer add --prim ${gwnid} && error "peer add should fail" return 0 @@ -1239,10 +1265,13 @@ run_test 105 "Adding duplicate GW peer should fail" test_106() { reinit_dlc || return $? - add_net "tcp" "${INTERFACES[0]}" - do_lnetctl route add --net tcp106 --gateway 106.106.106.106@tcp || + add_net "${NETTYPE}" "${INTERFACES[0]}" + + local gwnid=$(generate_nid ${NETTYPE}) + + do_lnetctl route add --net ${NETTYPE}106 --gateway ${gwnid} || error "route add failed $?" - do_lnetctl peer del --prim 106.106.106.106@tcp && + do_lnetctl peer del --prim ${gwnid} && error "peer del should fail" return 0 @@ -1250,6 +1279,8 @@ test_106() { run_test 106 "Deleting GW peer should fail" test_200() { + [[ ${NETTYPE} == tcp* ]] || + skip "Need tcp NETTYPE" cleanup_lnet || exit 1 load_lnet "networks=\"\"" do_ns $LNETCTL lnet configure --all || exit 1 @@ -1258,6 +1289,8 @@ test_200() { run_test 200 "load lnet w/o module option, configure in a non-default namespace" test_201() { + [[ ${NETTYPE} == tcp* ]] || + skip "Need tcp NETTYPE" cleanup_lnet || exit 1 load_lnet "networks=tcp($FAKE_IF)" do_ns $LNETCTL lnet configure --all || exit 1 @@ -1266,6 +1299,8 @@ test_201() { run_test 201 "load lnet using networks module options in a non-default namespace" test_202() { + [[ ${NETTYPE} == tcp* ]] || + skip "Need tcp NETTYPE" cleanup_lnet || exit 1 load_lnet "networks=\"\" ip2nets=\"tcp0($FAKE_IF) ${FAKE_IP}\"" do_ns $LNETCTL lnet configure --all || exit 1 @@ -1277,6 +1312,8 @@ run_test 202 "load lnet using ip2nets in a non-default namespace" ### Add the interfaces in the target namespace test_203() { + [[ ${NETTYPE} == tcp* ]] || + skip "Need tcp NETTYPE" cleanup_lnet || exit 1 load_lnet do_lnetctl lnet configure || exit 1 @@ -1411,6 +1448,8 @@ setup_health_test() { local need_mr=$1 local rc=0 + [[ ${NETTYPE} == kfi* ]] && skip "kfi doesn't support drop rules" + local rnodes=$(remote_nodes_list) [[ -z $rnodes ]] && skip "Need at least 1 remote node" @@ -1674,6 +1713,9 @@ test_208_load_and_check_lnet() { } test_208() { + [[ ${NETTYPE} == tcp* ]] || + skip "Need tcp NETTYPE" + cleanup_netns || error "Failed to cleanup netns before test execution" cleanup_lnet || error "Failed to unload modules before test execution" setup_fakeif || error "Failed to add fake IF" @@ -1858,9 +1900,11 @@ check_ping_count() { } test_210() { + [[ ${NETTYPE} == kfi* ]] && skip "kfi doesn't support drop rules" + reinit_dlc || return $? - add_net "tcp" "${INTERFACES[0]}" || return $? - add_net "tcp1" "${INTERFACES[0]}" || return $? + add_net "${NETTYPE}" "${INTERFACES[0]}" || return $? + add_net "${NETTYPE}1" "${INTERFACES[0]}" || return $? local prim_nid=$($LCTL list_nids | head -n 1) @@ -1875,8 +1919,8 @@ test_210() { $LCTL set_param debug=+net # Use local_error so LNet doesn't attempt to resend the discovery ping - $LCTL net_drop_add -s *@tcp -d *@tcp -m GET -r 1 -e local_error - $LCTL net_drop_add -s *@tcp1 -d *@tcp1 -m GET -r 1 -e local_error + $LCTL net_drop_add -s *@${NETTYPE} -d *@${NETTYPE} -m GET -r 1 -e local_error + $LCTL net_drop_add -s *@${NETTYPE}1 -d *@${NETTYPE}1 -m GET -r 1 -e local_error do_lnetctl discover $prim_nid && error "Expected discovery to fail" @@ -1893,8 +1937,8 @@ test_210() { $LCTL net_drop_del -a reinit_dlc || return $? - add_net "tcp" "${INTERFACES[0]}" || return $? - add_net "tcp1" "${INTERFACES[0]}" || return $? + add_net "${NETTYPE}" "${INTERFACES[0]}" || return $? + add_net "${NETTYPE}1" "${INTERFACES[0]}" || return $? local prim_nid=$($LCTL list_nids | head -n 1) @@ -1911,8 +1955,8 @@ test_210() { $LCTL set_param debug=+net # Use local_error so LNet doesn't attempt to resend the discovery ping - $LCTL net_drop_add -s *@tcp -d *@tcp -m GET -r 1 -e local_error - $LCTL net_drop_add -s *@tcp1 -d *@tcp1 -m GET -r 1 -e local_error + $LCTL net_drop_add -s *@${NETTYPE} -d *@${NETTYPE} -m GET -r 1 -e local_error + $LCTL net_drop_add -s *@${NETTYPE}1 -d *@${NETTYPE}1 -m GET -r 1 -e local_error do_lnetctl discover $prim_nid && error "Expected discovery to fail" @@ -1935,9 +1979,11 @@ test_210() { run_test 210 "Local NI recovery checks" test_211() { + [[ ${NETTYPE} == kfi* ]] && skip "kfi doesn't support drop rules" + reinit_dlc || return $? - add_net "tcp" "${INTERFACES[0]}" || return $? - add_net "tcp1" "${INTERFACES[0]}" || return $? + add_net "${NETTYPE}" "${INTERFACES[0]}" || return $? + add_net "${NETTYPE}1" "${INTERFACES[0]}" || return $? local prim_nid=$($LCTL list_nids | head -n 1) @@ -1950,8 +1996,8 @@ test_211() { do_lnetctl set recovery_limit 10 || error "failed to set recovery_limit" - $LCTL net_drop_add -s *@tcp -d *@tcp -m GET -r 1 -e remote_error - $LCTL net_drop_add -s *@tcp1 -d *@tcp1 -m GET -r 1 -e remote_error + $LCTL net_drop_add -s *@${NETTYPE} -d *@${NETTYPE} -m GET -r 1 -e remote_error + $LCTL net_drop_add -s *@${NETTYPE}1 -d *@${NETTYPE}1 -m GET -r 1 -e remote_error # Set health to 0 on one interface. This forces it onto the recovery # queue. @@ -1991,8 +2037,8 @@ test_211() { check_ping_count "peer_ni" "0" reinit_dlc || return $? - add_net "tcp" "${INTERFACES[0]}" || return $? - add_net "tcp1" "${INTERFACES[0]}" || return $? + add_net "${NETTYPE}" "${INTERFACES[0]}" || return $? + add_net "${NETTYPE}1" "${INTERFACES[0]}" || return $? local prim_nid=$($LCTL list_nids | head -n 1) @@ -2007,8 +2053,8 @@ test_211() { do_lnetctl set max_recovery_ping_interval 4 || error "failed to set max_recovery_ping_interval" - $LCTL net_drop_add -s *@tcp -d *@tcp -m GET -r 1 -e remote_error - $LCTL net_drop_add -s *@tcp1 -d *@tcp1 -m GET -r 1 -e remote_error + $LCTL net_drop_add -s *@${NETTYPE} -d *@${NETTYPE} -m GET -r 1 -e remote_error + $LCTL net_drop_add -s *@${NETTYPE}1 -d *@${NETTYPE}1 -m GET -r 1 -e remote_error # Set health to 0 on one interface. This forces it onto the recovery # queue. @@ -2033,6 +2079,8 @@ test_211() { run_test 211 "Remote NI recovery checks" test_212() { + [[ ${NETTYPE} == kfi* ]] && skip "kfi doesn't support drop rules" + local rnodes=$(remote_nodes_list) [[ -z $rnodes ]] && skip "Need at least 1 remote node" @@ -2133,6 +2181,8 @@ test_212() { run_test 212 "Check discovery refcount loss bug (LU-14627)" test_213() { + [[ ${NETTYPE} == tcp* ]] || skip "Need tcp NETTYPE" + cleanup_netns || error "Failed to cleanup netns before test execution" cleanup_lnet || error "Failed to unload modules before test execution" @@ -2175,6 +2225,8 @@ function check_ni_status() { } test_214() { + [[ ${NETTYPE} == tcp* ]] || skip "Need tcp NETTYPE" + cleanup_netns || error "Failed to cleanup netns before test execution" cleanup_lnet || error "Failed to unload modules before test execution" @@ -2252,8 +2304,8 @@ test_215() { reinit_dlc || return $? - add_net "tcp1" "${INTERFACES[0]}" || return $? - add_net "tcp2" "${INTERFACES[0]}" || return $? + add_net "${NETTYPE}1" "${INTERFACES[0]}" || return $? + add_net "${NETTYPE}2" "${INTERFACES[0]}" || return $? local nid1=$($LCTL list_nids | head -n 1) local nid2=$($LCTL list_nids | tail --lines 1) @@ -2330,12 +2382,14 @@ test_215() { run_test 215 "Test lnetctl ping --source option" test_216() { + [[ ${NETTYPE} == kfi* ]] && skip "kfi doesn't support drop rules" + local rc=0 reinit_dlc || return $? - add_net "tcp" "${INTERFACES[0]}" || return $? - add_net "tcp1" "${INTERFACES[0]}" || return $? + add_net "${NETTYPE}" "${INTERFACES[0]}" || return $? + add_net "${NETTYPE}1" "${INTERFACES[0]}" || return $? local nids=( $($LCTL list_nids | xargs echo) ) @@ -2383,19 +2437,21 @@ test_217() { run_test 217 "Don't leak memory when discovering peer with nnis <= 1" test_218() { + [[ ${NETTYPE} == kfi* ]] && skip "kfi doesn't support drop rules" + reinit_dlc || return $? [[ ${#INTERFACES[@]} -lt 2 ]] && skip "Need two LNet interfaces" - add_net "tcp" "${INTERFACES[0]}" || return $? + add_net "${NETTYPE}" "${INTERFACES[0]}" || return $? local nid1=$($LCTL list_nids | head -n 1) do_lnetctl ping $nid1 || error "ping failed" - add_net "tcp" "${INTERFACES[1]}" || return $? + add_net "${NETTYPE}" "${INTERFACES[1]}" || return $? local nid2=$($LCTL list_nids | tail --lines 1) @@ -2437,8 +2493,8 @@ run_test 218 "Local recovery pings should exercise all available paths" test_219() { reinit_dlc || return $? - add_net "tcp" "${INTERFACES[0]}" || return $? - add_net "tcp1" "${INTERFACES[0]}" || return $? + add_net "${NETTYPE}" "${INTERFACES[0]}" || return $? + add_net "${NETTYPE}1" "${INTERFACES[0]}" || return $? local nid1=$(lctl list_nids | head -n 1) local nid2=$(lctl list_nids | tail --lines 1) @@ -2956,6 +3012,8 @@ test_227() { run_test 227 "Check router peer health disabled" test_230() { + [[ ${NETTYPE} == tcp* ]] || + skip "Need tcp NETTYPE" # LU-12815 echo "Check valid values; Should succeed" local i @@ -2997,10 +3055,14 @@ test_230() { reinit_dlc || return $? add_net "tcp" "${INTERFACES[0]}" || return $? + + local default=$($LNETCTL net show -v 1 | + awk '/conns_per_peer/{print $NF}') + echo "Set < 0; Should be ignored" do_lnetctl net set --all --conns-per-peer -1 || error "should have succeeded $?" - $LNETCTL net show -v 1 | grep -q "conns_per_peer: 1" || + $LNETCTL net show -v 1 | grep -q "conns_per_peer: ${default}" || error "Did not stay at default" } run_test 230 "Test setting conns-per-peer" @@ -3008,7 +3070,7 @@ run_test 230 "Test setting conns-per-peer" test_231() { reinit_dlc || return $? - do_lnetctl net add --net tcp --if ${INTERFACES[0]} || + do_lnetctl net add --net ${NETTYPE} --if ${INTERFACES[0]} || error "Failed to add net" $LNETCTL export --backup > $TMP/sanity-lnet-$testnum-expected.yaml @@ -3024,10 +3086,10 @@ test_231() { compare_yaml_files || error "Wrong config after import" - do_lnetctl net del --net tcp --if ${INTERFACES[0]} || - error "Failed to delete net tcp" + do_lnetctl net del --net ${NETTYPE} --if ${INTERFACES[0]} || + error "Failed to delete net ${NETTYPE}" - do_lnetctl net add --net tcp --if ${INTERFACES[0]} --peer-timeout=0 || + do_lnetctl net add --net ${NETTYPE} --if ${INTERFACES[0]} --peer-timeout=0 || error "Failed to add net with peer-timeout=0" $LNETCTL export --backup > $TMP/sanity-lnet-$testnum-actual.yaml @@ -3053,8 +3115,17 @@ run_test 231 "Check DLC handling of peer_timeout parameter" ### Test that linux route is added for each ni test_250() { + local skip_param + + [[ ${NETTYPE} == tcp* ]] || + skip "Need tcp NETTYPE" reinit_dlc || return $? add_net "tcp" "${INTERFACES[0]}" || return $? + + skip_param=$(cat /sys/module/ksocklnd/parameters/skip_mr_route_setup) + [[ ${skip_param:-0} -ne 0 ]] && + skip "Need skip_mr_route_setup=0 found $skip_param" + ip route show table ${INTERFACES[0]} | grep -q "${INTERFACES[0]}" } run_test 250 "test that linux routes are added" @@ -3142,11 +3213,11 @@ run_test 300 "packaged LNet UAPI headers can be compiled" test_301() { reinit_dlc || return $? - do_lnetctl net add --net tcp --if ${INTERFACES[0]} || + do_lnetctl net add --net ${NETTYPE} --if ${INTERFACES[0]} || error "Failed to add net" - do_lnetctl net add --net tcp --if ${INTERFACES[0]} && + do_lnetctl net add --net ${NETTYPE} --if ${INTERFACES[0]} && error "add net should have failed" - do_lnetctl net del --net tcp --if ${INTERFACES[0]} || + do_lnetctl net del --net ${NETTYPE} --if ${INTERFACES[0]} || error "Failed to del net" unload_modules } diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index eb9ee3f..5144b23 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -742,6 +742,8 @@ load_lnet() { case $NETTYPE in o2ib*) LNETLND="o2iblnd/ko2iblnd" ;; tcp*) LNETLND="socklnd/ksocklnd" ;; + kfi*) LNETLND="kfilnd/kkfilnd" ;; + gni*) LNETLND="gnilnd/kgnilnd" ;; *) local lnd="${NETTYPE%%[0-9]}lnd" [ -f "$LNDPATH/$lnd/k$lnd.ko" ] && LNETLND="$lnd/k$lnd" || -- 1.8.3.1