From adae4295b62b1074f5c3c45543c586282394b1be Mon Sep 17 00:00:00 2001 From: Amir Shehata Date: Wed, 22 Apr 2020 17:26:48 -0700 Subject: [PATCH] LU-13478 lnet: handle discovery off properly Peers need to only be updated when discovery is toggled from on to off. This way the peers don't attempt to send to a non-primary NID of the node. However, when discovery is toggled from off to on, the peer will attempt rediscovery and the peer information will eventually consolidate. In order to properly delete the peer only when it makes sense we have to differentiate between the case when we get the initial message and when we get a push for an already discovered peer. We only want to delete our local representation if the peer is one we have already had in our records. Signed-off-by: Amir Shehata Change-Id: Id6a7353276fec82fddf90e0fa9d85d165b459c8d Reviewed-on: https://review.whamcloud.com/38321 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Chris Horn Reviewed-by: Serguei Smirnov Reviewed-by: Oleg Drokin --- lnet/lnet/api-ni.c | 27 +++++++-------------------- lnet/lnet/peer.c | 12 ++++++++---- 2 files changed, 15 insertions(+), 24 deletions(-) diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 0d5e45b..f1c94a5 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -326,7 +326,7 @@ static int discovery_set(const char *val, cfs_kernel_param_arg_t *kp) { int rc; - unsigned *discovery = (unsigned *)kp->arg; + unsigned *discovery_off = (unsigned *)kp->arg; unsigned long value; struct lnet_ping_buffer *pbuf; @@ -344,7 +344,7 @@ discovery_set(const char *val, cfs_kernel_param_arg_t *kp) */ mutex_lock(&the_lnet.ln_api_mutex); - if (value == *discovery) { + if (value == *discovery_off) { mutex_unlock(&the_lnet.ln_api_mutex); return 0; } @@ -357,7 +357,7 @@ discovery_set(const char *val, cfs_kernel_param_arg_t *kp) * updating the peers */ if (the_lnet.ln_state != LNET_STATE_RUNNING) { - *discovery = value; + *discovery_off = value; mutex_unlock(&the_lnet.ln_api_mutex); return 0; } @@ -371,23 +371,10 @@ discovery_set(const char *val, cfs_kernel_param_arg_t *kp) pbuf->pb_info.pi_features |= LNET_PING_FEAT_DISCOVERY; lnet_net_unlock(LNET_LOCK_EX); - /* - * Always update the peers. This will result in a push to the - * peers with the updated capabilities feature mask. The peer can - * then take appropriate action to update its representation of - * the node. - * - * If discovery is already off, turn it on first before pushing - * the update. The discovery flag must be on before pushing. - * otherwise if the flag is on and we're turning it off then push - * first before turning the flag off. In the former case the flag - * is being set twice, but I find it's better to do that rather - * than have duplicate code in an if/else statement. - */ - if (*discovery > 0 && value == 0) - *discovery = value; - lnet_push_update_to_peers(1); - *discovery = value; + /* only send a push when we're turning off discovery */ + if (*discovery_off <= 0 && value > 0) + lnet_push_update_to_peers(1); + *discovery_off = value; mutex_unlock(&the_lnet.ln_api_mutex); diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index b745eb3..f444911 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -2032,13 +2032,17 @@ void lnet_peer_push_event(struct lnet_event *ev) CDEBUG(D_NET, "Peer %s has discovery disabled\n", libcfs_nid2str(lp->lp_primary_nid)); /* - * If the peer is going from discovery enabled to - * discovery disabled, we need to reflect that in our - * representation of the peer. + * Mark the peer for deletion if we already know about it + * and it's going from discovery set to no discovery set */ if (!(lp->lp_state & (LNET_PEER_NO_DISCOVERY | - LNET_PEER_DISCOVERING))) + LNET_PEER_DISCOVERING)) && + lp->lp_state & LNET_PEER_DISCOVERED) { + CDEBUG(D_NET, "Marking %s:0x%x for deletion\n", + libcfs_nid2str(lp->lp_primary_nid), + lp->lp_state); lp->lp_state |= LNET_PEER_MARK_DELETION; + } lp->lp_state |= LNET_PEER_NO_DISCOVERY; } else if (lp->lp_state & LNET_PEER_NO_DISCOVERY) { CDEBUG(D_NET, "Peer %s has discovery enabled\n", -- 1.8.3.1