From: Chris Horn Date: Wed, 27 May 2020 17:29:10 +0000 (-0500) Subject: LU-13605 lnet: Do not overwrite destination when routing X-Git-Tag: 2.13.56~47 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=ec94d6f77b61fe501c1ac8bb6256bd3caff23ad3;p=fs%2Flustre-release.git LU-13605 lnet: Do not overwrite destination when routing MR path selection in a routed environment is supposed to allow the originator of a message to set the final destination NID. On a multi-hop route, intermediate routers execute the same code path as the message originator (i.e. the remote send cases). This causes them to overwrite the destination NID when forwarding the message. Check the msg_routing flag to determine whether we should set the final destination NID (i.e. LNet peer NI). A somewhat related issue is that because intermediate routers are not selecting a destination lpni, they need to pick the next-hop lpni based on the destination NID's remote net. Test-Parameters: trivial Fixes: 9dfdc2238be ("LU-13035 lnet: fix remote peer ni selection") HPE-bug-id: LUS-8919 Signed-off-by: Chris Horn Change-Id: Id2fbbc5d8da347e971bbb8ad2779e80f75e29dd7 Reviewed-on: https://review.whamcloud.com/38731 Tested-by: jenkins Reviewed-by: Serguei Smirnov Tested-by: Maloo Reviewed-by: Shaun Tancheff Reviewed-by: Oleg Drokin --- diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index a8c5f5b..9565e29 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -2034,60 +2034,79 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, } if (!route_found) { - /* we've already looked up the initial lpni using dst_nid */ - lpni = sd->sd_best_lpni; - /* the peer tree must be in existence */ - LASSERT(lpni && lpni->lpni_peer_net && - lpni->lpni_peer_net->lpn_peer); - lp = lpni->lpni_peer_net->lpn_peer; - - list_for_each_entry(lpn, &lp->lp_peer_nets, lpn_peer_nets) { - /* is this remote network reachable? */ - rnet = lnet_find_rnet_locked(lpn->lpn_net_id); - if (!rnet) - continue; + if (sd->sd_msg->msg_routing) { + /* If I'm routing this message then I need to find the + * next hop based on the destination NID + */ + best_rnet = lnet_find_rnet_locked(LNET_NIDNET(sd->sd_dst_nid)); + if (!best_rnet) { + CERROR("Unable to route message to %s - Route table may be misconfigured\n", + libcfs_nid2str(sd->sd_dst_nid)); + return -EHOSTUNREACH; + } + } else { + /* we've already looked up the initial lpni using + * dst_nid + */ + lpni = sd->sd_best_lpni; + /* the peer tree must be in existence */ + LASSERT(lpni && lpni->lpni_peer_net && + lpni->lpni_peer_net->lpn_peer); + lp = lpni->lpni_peer_net->lpn_peer; + + list_for_each_entry(lpn, &lp->lp_peer_nets, lpn_peer_nets) { + /* is this remote network reachable? */ + rnet = lnet_find_rnet_locked(lpn->lpn_net_id); + if (!rnet) + continue; + + if (!best_lpn) { + best_lpn = lpn; + best_rnet = rnet; + } + + if (best_lpn->lpn_seq <= lpn->lpn_seq) + continue; - if (!best_lpn) { best_lpn = lpn; best_rnet = rnet; } - if (best_lpn->lpn_seq <= lpn->lpn_seq) - continue; + if (!best_lpn) { + CERROR("peer %s has no available nets\n", + libcfs_nid2str(sd->sd_dst_nid)); + return -EHOSTUNREACH; + } - best_lpn = lpn; - best_rnet = rnet; - } + sd->sd_best_lpni = lnet_find_best_lpni(sd->sd_best_ni, + sd->sd_dst_nid, + lp, + best_lpn->lpn_net_id); + if (!sd->sd_best_lpni) { + CERROR("peer %s is unreachable\n", + libcfs_nid2str(sd->sd_dst_nid)); + return -EHOSTUNREACH; + } - if (!best_lpn) { - CERROR("peer %s has no available nets\n", - libcfs_nid2str(sd->sd_dst_nid)); - return -EHOSTUNREACH; - } + /* We're attempting to round robin over the remote peer + * NI's so update the final destination we selected + */ + sd->sd_final_dst_lpni = sd->sd_best_lpni; - sd->sd_best_lpni = lnet_find_best_lpni(sd->sd_best_ni, - sd->sd_dst_nid, - lp, - best_lpn->lpn_net_id); - if (!sd->sd_best_lpni) { - CERROR("peer %s is unreachable\n", - libcfs_nid2str(sd->sd_dst_nid)); - return -EHOSTUNREACH; + /* Increment the sequence number of the remote lpni so + * we can round robin over the different interfaces of + * the remote lpni + */ + sd->sd_best_lpni->lpni_seq++; } /* - * We're attempting to round robin over the remote peer - * NI's so update the final destination we selected - */ - sd->sd_final_dst_lpni = sd->sd_best_lpni; - - /* * find the best route. Restrict the selection on the net of the * local NI if we've already picked the local NI to send from. * Otherwise, let's pick any route we can find and then find - * a local NI we can reach the route's gateway on. Any route we select - * will be reachable by virtue of the restriction we have when - * adding a route. + * a local NI we can reach the route's gateway on. Any route we + * select will be reachable by virtue of the restriction we have + * when adding a route. */ best_route = lnet_find_route_locked(best_rnet, LNET_NIDNET(src_nid), @@ -2110,13 +2129,6 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, gw = best_route->lr_gateway; LASSERT(gw == gwni->lpni_peer_net->lpn_peer); local_lnet = best_route->lr_lnet; - - /* - * Increment the sequence number of the remote lpni so we - * can round robin over the different interfaces of the - * remote lpni - */ - sd->sd_best_lpni->lpni_seq++; } /* @@ -2153,7 +2165,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, if (sd->sd_rtr_nid == LNET_NID_ANY) { LASSERT(best_route && last_route); best_route->lr_seq = last_route->lr_seq + 1; - best_lpn->lpn_seq++; + if (best_lpn) + best_lpn->lpn_seq++; } return 0;