From ef6c35877b96c11a83a6cb823bf66e44bf355ed3 Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Wed, 24 Jun 2020 11:17:45 -0500 Subject: [PATCH] LU-13712 lnet: Preferred NI logic breaks MR routing Edge (final-hop) routers typically use the non-multi-rail destination (NMR_DST) send case. i.e. they treat the destination as non-multi-rail. The reason for this is that we do not want routers to modify the destination peer interface selected by the message originator. As a result of using the NMR_DST send case, edge routers set a preferred NI, and then continue to use that NI, because it's preferred, even if the NI goes down and the router has other healthy interfaces available to it. Routers do not need to use the preferred NI selection logic when they are forwarding a message, so modify the NMR_DST algorithm to allow routers to select any suitable local NI. Test-Parameters: trivial HPE-bug-id: LUS-9045 Signed-off-by: Chris Horn Change-Id: Iae0fb47d58a70f640d316a8c85cf3058ca2f82eb Reviewed-on: https://review.whamcloud.com/39168 Reviewed-by: Serguei Smirnov Tested-by: jenkins Tested-by: Maloo Reviewed-by: Neil Brown Reviewed-by: Oleg Drokin --- lnet/lnet/lib-move.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index 8c297f9..cb472df 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -2326,7 +2326,7 @@ lnet_select_preferred_best_ni(struct lnet_send_data *sd) static int lnet_handle_any_local_nmr_dst(struct lnet_send_data *sd) { - int rc; + int rc = 0; /* sd->sd_best_lpni is already set to the final destination */ @@ -2343,7 +2343,23 @@ lnet_handle_any_local_nmr_dst(struct lnet_send_data *sd) return -EFAULT; } - rc = lnet_select_preferred_best_ni(sd); + if (sd->sd_msg->msg_routing) { + /* If I'm forwarding this message then I can choose any NI + * on the destination peer net + */ + sd->sd_best_ni = lnet_find_best_ni_on_spec_net(NULL, + sd->sd_peer, + sd->sd_best_lpni->lpni_peer_net, + sd->sd_md_cpt, + true); + if (!sd->sd_best_ni) { + CERROR("Unable to forward message to %s. No local NI available\n", + libcfs_nid2str(sd->sd_dst_nid)); + rc = -EHOSTUNREACH; + } + } else + rc = lnet_select_preferred_best_ni(sd); + if (!rc) rc = lnet_handle_send(sd); -- 1.8.3.1