From 46b641fa1485664140cc91ca8bb3ee4f4601c5cb Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Tue, 11 Jan 2022 16:19:16 -0600 Subject: [PATCH] LU-15440 lnet: lnet_peer_data_present() memory leak If the ping buffer has nnis <= 1 then the ref on the ping buffer does not get dropped. This causes a memory leak. Lustre-change: https://review.whamcloud.com/46052 Lustre-commit: 56384a4fc39ff99c8abb3538f93d303f2be6ab45 Test-Parameters: trivial testlist=sanity-lnet Signed-off-by: Chris Horn Change-Id: I5e3c651ffecbe4f8860afb86770cecef23ebe862 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/51684 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Cyril Bordage Reviewed-by: Andreas Dilger --- lnet/lnet/peer.c | 4 ++- lustre/tests/sanity-lnet.sh | 77 ++++++++++++++++++++++++++++++++++++++------- 2 files changed, 68 insertions(+), 13 deletions(-) diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 45c74f6..fd899b0 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -3334,8 +3334,10 @@ __must_hold(&lp->lp_lock) * primary NID to the correct value here. Moreover, this peer * can show up with only the loopback NID in the ping buffer. */ - if (pbuf->pb_info.pi_nnis <= 1) + if (pbuf->pb_info.pi_nnis <= 1) { + lnet_ping_buffer_decref(pbuf); goto out; + } nid = pbuf->pb_info.pi_ni[1].ns_nid; if (lp->lp_primary_nid == LNET_NID_LO_0) { rc = lnet_peer_set_primary_nid(lp, nid, flags); diff --git a/lustre/tests/sanity-lnet.sh b/lustre/tests/sanity-lnet.sh index 17f8f80..5dae873 100755 --- a/lustre/tests/sanity-lnet.sh +++ b/lustre/tests/sanity-lnet.sh @@ -2120,26 +2120,58 @@ test_215() { } run_test 215 "Test lnetctl ping --source option" -test_219() { +test_216() { + local rc=0 + reinit_dlc || return $? + add_net "tcp" "${INTERFACES[0]}" || return $? add_net "tcp1" "${INTERFACES[0]}" || return $? - local nid1=$(lctl list_nids | head -n 1) - local nid2=$(lctl list_nids | tail --lines 1) + local nids=( $($LCTL list_nids | xargs echo) ) - do_lnetctl ping $nid1 || - error "Ping failed $?" - do_lnetctl ping $nid2 || - error "Ping failed $?" + do_lnetctl discover ${nids[0]} || + error "Initial discovery failed" - do_lnetctl discover $nid2 || - error "Discovery failed" + do_lnetctl ping --source ${nids[0]} ${nids[0]} || + error "Initial ping failed $?" - $LNETCTL peer show --nid $nid1 | grep -q $nid2 || - error "$nid2 is not listed under $nid1" + do_lnetctl ping --source ${nids[1]} ${nids[1]} || + error "Initial ping failed $?" + + local src dst + for src in ${nids[@]}; do + for dst in ${nids[@]}; do + $LCTL net_drop_add -r 1 -s $src -d $dst -e network_timeout + done + done + + do_lnetctl ping ${nids[0]} || rc=$? + + $LCTL net_drop_del -a + + [[ $rc -eq 0 ]] && + error "expected ping to fail" + + check_nid_in_recovq "-p" 0 + check_nid_in_recovq "-l" 1 + + return 0 } -run_test 219 "Consolidate peer entries" +run_test 216 "Failed send to peer NI owned by local host should not trigger peer NI recovery" + +test_217() { + reinit_dlc || return $? + + [[ $($LNETCTL net show | grep -c nid) -ne 1 ]] && + error "Unexpected number of NIs after initalizing DLC" + + do_lnetctl discover 0@lo || + error "Failed to discover 0@lo" + + unload_modules +} +run_test 217 "Don't leak memory when discovering peer with nnis <= 1" test_218() { reinit_dlc || return $? @@ -2194,6 +2226,27 @@ test_218() { } run_test 218 "Local recovery pings should exercise all available paths" +test_219() { + reinit_dlc || return $? + add_net "tcp" "${INTERFACES[0]}" || return $? + add_net "tcp1" "${INTERFACES[0]}" || return $? + + local nid1=$(lctl list_nids | head -n 1) + local nid2=$(lctl list_nids | tail --lines 1) + + do_lnetctl ping $nid1 || + error "Ping failed $?" + do_lnetctl ping $nid2 || + error "Ping failed $?" + + do_lnetctl discover $nid2 || + error "Discovery failed" + + $LNETCTL peer show --nid $nid1 | grep -q $nid2 || + error "$nid2 is not listed under $nid1" +} +run_test 219 "Consolidate peer entries" + test_230() { # LU-12815 echo "Check valid values; Should succeed" -- 1.8.3.1