From 1a667fec1ee1397be1b66ddcc3d5069d7883e7e7 Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Sun, 12 Jul 2020 10:47:55 -0500 Subject: [PATCH] LU-13781 lnet: Local NI must be on same net as next-hop When sending to a remote peer we need to restrict our selection of a local NI to those on the same peer net as the next-hop. The code currently selects a local NI on the peer net specified by the lr_lnet field of the lnet_route returned by lnet_find_route_locked(). However, lnet_find_route_locked() may select a next-hop peer NI on any local peer net - not just lr_lnet. A redundant assignment to sd->sd_msg->msg_src_nid_param is also removed. That variable is always set appropriately in lnet_select_pathway(). Lustre-change: https://review.whamcloud.com/39352 Lustre-commit: 031c087f3847777c0099cbfae13f0b6fee54452b Test-Parameters: trivial HPE-bug-id: LUS-9095 Signed-off-by: Chris Horn Change-Id: If1bec26d6646b9e66b99656d7db2dc538d631a34 Reviewed-on: https://review.whamcloud.com/48381 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Frank Sehr Reviewed-by: Alexandre Ioffe Reviewed-by: Andreas Dilger --- lnet/lnet/lib-move.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index 1d38dcc..971fb13 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -2069,7 +2069,6 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, struct lnet_peer **gw_peer) { int rc; - __u32 local_lnet; struct lnet_peer *gw; struct lnet_peer *lp; struct lnet_peer_net *lpn; @@ -2096,10 +2095,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, if (gwni) { gw = gwni->lpni_peer_net->lpn_peer; lnet_peer_ni_decref_locked(gwni); - if (gw->lp_rtr_refcount) { - local_lnet = LNET_NIDNET(sd->sd_rtr_nid); + if (gw->lp_rtr_refcount) route_found = true; - } } else { CWARN("No peer NI for gateway %s. Attempting to find an alternative route.\n", libcfs_nid2str(sd->sd_rtr_nid)); @@ -2201,7 +2198,6 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, gw = best_route->lr_gateway; LASSERT(gw == gwni->lpni_peer_net->lpn_peer); - local_lnet = best_route->lr_lnet; } /* @@ -2209,24 +2205,22 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, * This means we might delay the message until discovery has * completed */ - sd->sd_msg->msg_src_nid_param = sd->sd_src_nid; rc = lnet_initiate_peer_discovery(gwni, sd->sd_msg, sd->sd_cpt); if (rc) return rc; - if (!sd->sd_best_ni) - sd->sd_best_ni = lnet_find_best_ni_on_spec_net(NULL, gw, - lnet_peer_get_net_locked(gw, - local_lnet), - sd->sd_msg, - sd->sd_md_cpt, - true); - if (!sd->sd_best_ni) { - CERROR("Internal Error. Expected local ni on %s but non found :%s\n", - libcfs_net2str(local_lnet), - libcfs_nid2str(sd->sd_src_nid)); - return -EFAULT; + lpn = gwni->lpni_peer_net; + sd->sd_best_ni = lnet_find_best_ni_on_spec_net(NULL, gw, lpn, + sd->sd_msg, + sd->sd_md_cpt, + true); + if (!sd->sd_best_ni) { + CERROR("Internal Error. Expected local ni on %s but non found: %s\n", + libcfs_net2str(lpn->lpn_net_id), + libcfs_nid2str(sd->sd_src_nid)); + return -EFAULT; + } } *gw_lpni = gwni; -- 1.8.3.1