From: Chris Horn Date: Wed, 28 Apr 2021 16:33:40 +0000 (-0500) Subject: LU-14649 lnet: Correct distance calculation of local NIDs X-Git-Tag: 2.14.53~101 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=3b263dd80ee56efae922e2cfcab375dbe2cb273a LU-14649 lnet: Correct distance calculation of local NIDs Multi-rail peers can have multiple local NIDs on the same net, but LNetDist() may only identify a NID as local if it is the first one returned by lnet_get_next_ni_locked(). We need to check all local NIs to find a match for the target NID in LNetDist(). Add test to check LNetDist() calculation of local NIDs for a peer with multiple NIDs on the same net. HPE-bug-id: LUS-9964 Signed-off-by: Chris Horn Change-Id: Ic8855f7798a90972c69d89d039d0bba882d8aed1 Reviewed-on: https://review.whamcloud.com/43498 Reviewed-by: Serguei Smirnov Tested-by: jenkins Tested-by: Maloo Reviewed-by: Alexander Boyko Reviewed-by: Oleg Drokin --- diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index 66cb804..e458bbf 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -5242,6 +5242,7 @@ LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp) int cpt; __u32 order = 2; struct list_head *rn_list; + bool matched_dstnet = false; /* if !local_nid_dist_zero, I don't return a distance of 0 ever * (when lustre sees a distance of 0, it substitutes 0@lo), so I @@ -5267,25 +5268,40 @@ LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp) return local_nid_dist_zero ? 0 : 1; } - if (LNET_NIDNET(ni->ni_nid) == dstnet) { - /* Check if ni was originally created in - * current net namespace. - * If not, assign order above 0xffff0000, - * to make this ni not a priority. */ - if (current->nsproxy && - !net_eq(ni->ni_net_ns, current->nsproxy->net_ns)) - order += 0xffff0000; - if (srcnidp != NULL) + if (!matched_dstnet && LNET_NIDNET(ni->ni_nid) == dstnet) { + matched_dstnet = true; + /* We matched the destination net, but we may have + * additional local NIs to inspect. + * + * We record the nid and order as appropriate, but + * they may be overwritten if we match local NI above. + */ + if (srcnidp) *srcnidp = ni->ni_nid; - if (orderp != NULL) - *orderp = order; - lnet_net_unlock(cpt); - return 1; + + if (orderp) { + /* Check if ni was originally created in + * current net namespace. + * If not, assign order above 0xffff0000, + * to make this ni not a priority. + */ + if (current->nsproxy && + !net_eq(ni->ni_net_ns, + current->nsproxy->net_ns)) + *orderp = order + 0xffff0000; + else + *orderp = order; + } } order++; } + if (matched_dstnet) { + lnet_net_unlock(cpt); + return 1; + } + rn_list = lnet_net2rnethash(dstnet); list_for_each(e, rn_list) { rnet = list_entry(e, struct lnet_remotenet, lrn_list); diff --git a/lustre/tests/sanity-lnet.sh b/lustre/tests/sanity-lnet.sh index 74eb19a..485e105 100755 --- a/lustre/tests/sanity-lnet.sh +++ b/lustre/tests/sanity-lnet.sh @@ -1960,6 +1960,34 @@ test_212() { } run_test 212 "Check discovery refcount loss bug (LU-14627)" +test_213() { + have_interface "eth0" || skip "Need eth0 interface with ipv4 configured" + + cleanup_netns || error "Failed to cleanup netns before test execution" + cleanup_lnet || error "Failed to unload modules before test execution" + + setup_fakeif || error "Failed to add fake IF" + have_interface "$FAKE_IF" || + error "Expect $FAKE_IF configured but not found" + + reinit_dlc || return $? + + add_net "tcp" "eth0" || return $? + add_net "tcp" "$FAKE_IF" || return $? + + local nid1=$(lctl list_nids | head -n 1) + local nid2=$(lctl list_nids | tail --lines 1) + + [[ $(lctl which_nid $nid1 $nid2) == $nid1 ]] || + error "Expect nid1 \"$nid1\" to be preferred" + + [[ $(lctl which_nid $nid2 $nid1) == $nid2 ]] || + error "Expect nid2 \"$nid2\" to be preferred" + + return 0 +} +run_test 213 "Check LNetDist calculation for multiple local NIDs" + test_300() { # LU-13274 local header