If a peer has discovery disabled then it will not consolidate peer
NI information. This means we need to use a consistent source NI
when sending to it just like we do for non-MR peers.
A comment in lnet_discovery_event_reply() indicates that this was a
known issue, but the situation is not handled properly.
Do not assume peers are multi-rail capable when peer objects are
allocated and initialized.
Do not mark a peer as multi-rail capable unless all of the following
conditions are satisified:
1. The peer has the MR feature flag set
2. The peer has discovery enabled.
3. We have discovery enabled locally
Note: 1, 2, and 3 above are implemented in the code for
lnet_discovery_event_reply(), but code earlier in the function breaks
this behavior. Remove the offending code.
Update sanity-lnet tests 100 and 101 to reflect the fact that peers
added via the traffic path no longer have multi-rail by default.
Cray-bug-id: LUS-7918
Test-Parameters: trivial testlist=sanity-lnet
Signed-off-by: Chris Horn <hornc@cray.com>
Change-Id: Ia02bd446f4b2143fb490f56c1ff6103198316da3
Reviewed-on: https://review.whamcloud.com/36512
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Amir Shehata <ashehata@whamcloud.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Neil Brown <neilb@suse.de>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
struct lnet_peer *lp;
struct lnet_peer_net *lpn;
struct lnet_peer_ni *lpni;
struct lnet_peer *lp;
struct lnet_peer_net *lpn;
struct lnet_peer_ni *lpni;
- /*
- * Assume peer is Multi-Rail capable and let discovery find out
- * otherwise.
- */
- unsigned flags = LNET_PEER_MULTI_RAIL;
int rc = 0;
if (nid == LNET_NID_ANY) {
int rc = 0;
if (nid == LNET_NID_ANY) {
- * Only enable the multi-rail feature on the peer if both sides of
- * the connection have discovery on
- */
- if (pbuf->pb_info.pi_features & LNET_PING_FEAT_MULTI_RAIL) {
- CDEBUG(D_NET, "Peer %s has Multi-Rail feature enabled\n",
- libcfs_nid2str(lp->lp_primary_nid));
- lp->lp_state |= LNET_PEER_MULTI_RAIL;
- } else {
- CDEBUG(D_NET, "Peer %s has Multi-Rail feature disabled\n",
- libcfs_nid2str(lp->lp_primary_nid));
- lp->lp_state &= ~LNET_PEER_MULTI_RAIL;
- }
-
- /*
* The peer may have discovery disabled at its end. Set
* NO_DISCOVERY as appropriate.
*/
* The peer may have discovery disabled at its end. Set
* NO_DISCOVERY as appropriate.
*/
*/
if (pbuf->pb_info.pi_features & LNET_PING_FEAT_MULTI_RAIL) {
if (lp->lp_state & LNET_PEER_MULTI_RAIL) {
*/
if (pbuf->pb_info.pi_features & LNET_PING_FEAT_MULTI_RAIL) {
if (lp->lp_state & LNET_PEER_MULTI_RAIL) {
- /* Everything's fine */
+ CDEBUG(D_NET, "peer %s(%p) is MR\n",
+ libcfs_nid2str(lp->lp_primary_nid), lp);
} else if (lp->lp_state & LNET_PEER_CONFIGURED) {
CWARN("Reply says %s is Multi-Rail, DLC says not\n",
libcfs_nid2str(lp->lp_primary_nid));
} else if (lp->lp_state & LNET_PEER_CONFIGURED) {
CWARN("Reply says %s is Multi-Rail, DLC says not\n",
libcfs_nid2str(lp->lp_primary_nid));
+ } else if (lnet_peer_discovery_disabled) {
+ CDEBUG(D_NET,
+ "peer %s(%p) not MR: DD disabled locally\n",
+ libcfs_nid2str(lp->lp_primary_nid), lp);
+ } else if (lp->lp_state & LNET_PEER_NO_DISCOVERY) {
+ CDEBUG(D_NET,
+ "peer %s(%p) not MR: DD disabled remotely\n",
+ libcfs_nid2str(lp->lp_primary_nid), lp);
- /*
- * if discovery is disabled then we don't want to
- * update the state of the peer. All we'll do is
- * update the peer_nis which were reported back in
- * the initial ping
- */
-
- if (!lnet_is_discovery_disabled_locked(lp)) {
- lp->lp_state |= LNET_PEER_MULTI_RAIL;
- lnet_peer_clr_non_mr_pref_nids(lp);
- }
+ CDEBUG(D_NET, "peer %s(%p) is MR capable\n",
+ libcfs_nid2str(lp->lp_primary_nid), lp);
+ lp->lp_state |= LNET_PEER_MULTI_RAIL;
+ lnet_peer_clr_non_mr_pref_nids(lp);
}
} else if (lp->lp_state & LNET_PEER_MULTI_RAIL) {
if (lp->lp_state & LNET_PEER_CONFIGURED) {
}
} else if (lp->lp_state & LNET_PEER_MULTI_RAIL) {
if (lp->lp_state & LNET_PEER_CONFIGURED) {
health_sensitivity: 1
peer:
- primary nid: 7.7.7.7@tcp
health_sensitivity: 1
peer:
- primary nid: 7.7.7.7@tcp
peer ni:
- nid: 7.7.7.7@tcp
EOF
peer ni:
- nid: 7.7.7.7@tcp
EOF
health_sensitivity: 1
peer:
- primary nid: 8.8.8.9@tcp
health_sensitivity: 1
peer:
- primary nid: 8.8.8.9@tcp
peer ni:
- nid: 8.8.8.9@tcp
- primary nid: 8.8.8.10@tcp
peer ni:
- nid: 8.8.8.9@tcp
- primary nid: 8.8.8.10@tcp
peer ni:
- nid: 8.8.8.10@tcp
- primary nid: 8.8.8.8@tcp
peer ni:
- nid: 8.8.8.10@tcp
- primary nid: 8.8.8.8@tcp
peer ni:
- nid: 8.8.8.8@tcp
EOF
peer ni:
- nid: 8.8.8.8@tcp
EOF