From 031c087f3847777c0099cbfae13f0b6fee54452b Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Sun, 12 Jul 2020 10:47:55 -0500 Subject: [PATCH] LU-13781 lnet: Local NI must be on same net as next-hop When sending to a remote peer we need to restrict our selection of a local NI to those on the same peer net as the next-hop. The code currently selects a local NI on the peer net specified by the lr_lnet field of the lnet_route returned by lnet_find_route_locked(). However, lnet_find_route_locked() may select a next-hop peer NI on any local peer net - not just lr_lnet. A redundant assignment to sd->sd_msg->msg_src_nid_param is also removed. That variable is always set appropriately in lnet_select_pathway(). Test-Parameters: trivial HPE-bug-id: LUS-9095 Signed-off-by: Chris Horn Change-Id: If1bec26d6646b9e66b99656d7db2dc538d631a34 Reviewed-on: https://review.whamcloud.com/39352 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Neil Brown Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- lnet/lnet/lib-move.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index ed87e97..66cb804 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -2104,7 +2104,6 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, struct lnet_peer **gw_peer) { int rc; - __u32 local_lnet; struct lnet_peer *gw; struct lnet_peer *lp; struct lnet_peer_net *lpn; @@ -2133,10 +2132,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, if (gwni) { gw = gwni->lpni_peer_net->lpn_peer; lnet_peer_ni_decref_locked(gwni); - if (gw->lp_rtr_refcount) { - local_lnet = LNET_NIDNET(sd->sd_rtr_nid); + if (gw->lp_rtr_refcount) route_found = true; - } } else { CWARN("No peer NI for gateway %s. Attempting to find an alternative route.\n", libcfs_nid2str(sd->sd_rtr_nid)); @@ -2252,7 +2249,6 @@ use_lpn: gw = best_route->lr_gateway; LASSERT(gw == gwni->lpni_peer_net->lpn_peer); - local_lnet = best_route->lr_lnet; } /* @@ -2260,22 +2256,20 @@ use_lpn: * This means we might delay the message until discovery has * completed */ - sd->sd_msg->msg_src_nid_param = sd->sd_src_nid; rc = lnet_initiate_peer_discovery(gwni, sd->sd_msg, sd->sd_cpt); if (rc) return rc; - if (!sd->sd_best_ni) - sd->sd_best_ni = lnet_find_best_ni_on_spec_net(NULL, gw, - lnet_peer_get_net_locked(gw, - local_lnet), - sd->sd_md_cpt); - if (!sd->sd_best_ni) { - CERROR("Internal Error. Expected local ni on %s but non found :%s\n", - libcfs_net2str(local_lnet), - libcfs_nid2str(sd->sd_src_nid)); - return -EFAULT; + lpn = gwni->lpni_peer_net; + sd->sd_best_ni = lnet_find_best_ni_on_spec_net(NULL, gw, lpn, + sd->sd_md_cpt); + if (!sd->sd_best_ni) { + CERROR("Internal Error. Expected local ni on %s but non found: %s\n", + libcfs_net2str(lpn->lpn_net_id), + libcfs_nid2str(sd->sd_src_nid)); + return -EFAULT; + } } *gw_lpni = gwni; -- 1.8.3.1