From a7c9aba5eb96dd1e53899108a65af381b49e657b Mon Sep 17 00:00:00 2001 From: Amir Shehata Date: Wed, 17 Jun 2020 15:25:36 -0700 Subject: [PATCH] LU-13566 socklnd: fix local interface binding When a node is configured with multiple interfaces in Multi-Rail config, socklnd was not utilizing the local interface requested by LNet. In essence LNet was using all the NIDs in round robin, however the socklnd module was not binding to the correct interface. Traffic was thus sent on a subset of the interfaces. The reason is that the route interface number was not being set. In most cases lnet_connect() is called to create a socket. The socket is bound to the interface provided and then ksocknal_create_conn() is called to create the socklnd connection. ksocknal_create_conn() calls ksocknal_associate_route_conn_locked() at which point the route's local interface is assigned. However, this is already too late as the socket has already been created and bound to a local interface. Therefore, it's important to assign the route's interface before calling lnet_connect() to ensure socket is bound to correct local interface. To address this issue, the route's interface index is initialized to the NI's interface index when it's added to the peer_ni. Another bug fixed: The interface index was not being initialized in the startup routine. Note: We're strictly assuming that there is one interface for each NI. This is because tcp bonding will be removed from the socklnd as it has been deprecated by LNet mutli-rail. Signed-off-by: Amir Shehata Change-Id: Ibfa202ba009e07dbd69b19f1180790f1ea978ab1 Reviewed-on: https://review.whamcloud.com/38743 Tested-by: jenkins Reviewed-by: Neil Brown Reviewed-by: Serguei Smirnov Reviewed-by: James Simmons Tested-by: Maloo Reviewed-by: Oleg Drokin --- lnet/klnds/socklnd/socklnd.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index d750eaa..5249227 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -411,12 +411,14 @@ ksocknal_add_route_locked(struct ksock_peer_ni *peer_ni, struct ksock_route *rou struct list_head *tmp; struct ksock_conn *conn; struct ksock_route *route2; + struct ksock_net *net = peer_ni->ksnp_ni->ni_data; LASSERT(!peer_ni->ksnp_closing); LASSERT(route->ksnr_peer == NULL); LASSERT(!route->ksnr_scheduled); LASSERT(!route->ksnr_connecting); LASSERT(route->ksnr_connected == 0); + LASSERT(net->ksnn_ninterfaces > 0); /* LASSERT(unique) */ list_for_each(tmp, &peer_ni->ksnp_routes) { @@ -432,6 +434,11 @@ ksocknal_add_route_locked(struct ksock_peer_ni *peer_ni, struct ksock_route *rou route->ksnr_peer = peer_ni; ksocknal_peer_addref(peer_ni); + + /* set the route's interface to the current net's interface */ + route->ksnr_myiface = net->ksnn_interfaces[0].ksni_index; + net->ksnn_interfaces[0].ksni_nroutes++; + /* peer_ni's routelist takes over my ref on 'route' */ list_add_tail(&route->ksnr_list, &peer_ni->ksnp_routes); @@ -2668,6 +2675,7 @@ ksocknal_startup(struct lnet_ni *ni) net->ksnn_ninterfaces = 1; ni->ni_dev_cpt = ifaces[0].li_cpt; ksi->ksni_ipaddr = ifaces[0].li_ipaddr; + ksi->ksni_index = ksocknal_ip2index(ksi->ksni_ipaddr, ni); ksi->ksni_netmask = ifaces[0].li_netmask; strlcpy(ksi->ksni_name, ifaces[0].li_name, sizeof(ksi->ksni_name)); @@ -2707,6 +2715,8 @@ ksocknal_startup(struct lnet_ni *ni) ksi = &net->ksnn_interfaces[j]; ni->ni_dev_cpt = ifaces[j].li_cpt; ksi->ksni_ipaddr = ifaces[j].li_ipaddr; + ksi->ksni_index = + ksocknal_ip2index(ksi->ksni_ipaddr, ni); ksi->ksni_netmask = ifaces[j].li_netmask; strlcpy(ksi->ksni_name, ifaces[j].li_name, sizeof(ksi->ksni_name)); -- 1.8.3.1