From 2718f8a4526382e6369ef63acf6df57c942f834e Mon Sep 17 00:00:00 2001 From: Frank Sehr Date: Fri, 19 Apr 2024 18:33:12 -0400 Subject: [PATCH] LU-17760 lnet: Crash caused by uninitialized interface name When adding an interface with ip2net, a duplicate configuration of an already existing interface can cause a crash or misconfiguration of lnet. Incoming interface names have to be checked if they are null and furthermore duplicate interface configurations have to be removed. When a duplicate is detected add has to be added to a list to be able to shut it down otherwise shutdown would assert. The problem can be repoduced on tcp and o2ib networks. Steps that were used to reproduce the problem in the original configuration, but it is reproducable in other variations and in tcp networks. modprobe lnet lnetctl lnet configure lnetctl net add --net o2ib --if mlxib1 lnetctl net add --net o2ib --if mlxib1 --ip2net "o2ib 172.30.12.*" Test-Parameters: trivial testlist=sanity-lnet Signed-off-by: Frank Sehr Change-Id: Ie76d97cc52855ab897a9e07a3697483189d4b19e Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54859 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Serguei Smirnov Reviewed-by: Cyril Bordage Reviewed-by: Oleg Drokin --- lnet/lnet/api-ni.c | 15 ++++++++++++++- lustre/tests/sanity-lnet.sh | 22 ++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index cf96e8e..1ce3aad 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -2537,6 +2537,14 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun) goto failed0; } + lnet_net_lock(0); + if (lnet_nid_to_ni_locked(&ni->ni_nid, 0)) { + lnet_ni_addref_locked(ni, 0); + lnet_net_unlock(0); + return -EEXIST; + } + lnet_net_unlock(0); + /* We keep a reference on the loopback net through the loopback NI */ if (net->net_lnd->lnd_type == LOLND) { lnet_ni_addref(ni); @@ -2687,7 +2695,9 @@ lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun) /* make sure that the the NI we're about to start * up is actually unique. if it's not fail. */ - if (!lnet_ni_unique_net(&net_l->net_ni_list, + + if (ni->ni_interface && + !lnet_ni_unique_net(&net_l->net_ni_list, ni->ni_interface)) { rc = -EEXIST; goto failed1; @@ -2699,6 +2709,9 @@ lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun) rc = lnet_startup_lndni(ni, tun); + if (rc == -EEXIST) + list_add_tail(&ni->ni_netlist, &local_ni_list); + if (rc != 0) goto failed1; diff --git a/lustre/tests/sanity-lnet.sh b/lustre/tests/sanity-lnet.sh index bbbaebe..14e198b 100755 --- a/lustre/tests/sanity-lnet.sh +++ b/lustre/tests/sanity-lnet.sh @@ -326,6 +326,7 @@ if [[ $NETTYPE =~ (tcp|o2ib)[0-9]* ]]; then if $FORCE_LARGE_NID; then always_except LU-14288 101 always_except LU-14288 103 + always_except LU-17457 199 always_except LU-17457 208 always_except LU-9680 213 always_except LU-17458 220 @@ -1824,6 +1825,27 @@ test_111() { } run_test 111 "Test many routes" +test_199() { + [[ ${NETTYPE} == tcp* || ${NETTYPE} == o2ib* ]] || + skip "Need tcp or o2ib NETTYPE" + + reinit_dlc || return $? + + echo "Add interface ${INTERFACES[0]}" + do_lnetctl net add --net ${NETTYPE} --if ${INTERFACES[0]} || + error "Failed to add net ${NETTYPE} with ${INTERFACES[0]}" + + local ipaddress=$(ip --oneline addr show dev ${INTERFACES[0]} | + awk '/inet /{print $4}' | + sed 's:/.*::') + + echo "Add IP address ${ipaddress} for interface ${INTERFACES[0]}" + do_lnetctl net add --ip2net "${NETTYPE} ${ipaddress}" || return 0 + + error "Failed to add net ${NETTYPE} with ${INTERFACES[0]}" +} +run_test 199 "load lnet w/o module option, configure interface twice" + test_200() { [[ ${NETTYPE} == tcp* ]] || skip "Need tcp NETTYPE" -- 1.8.3.1