When discovery is toggled send a push message to all peers.
When a node receives a push notification that discovery is
turned off while previously it was on, then delete the peer
information. If the peer is a router, recreate the routes.
Test-parameters: trivial
Signed-off-by: Amir Shehata <ashehata@whamcloud.com>
Change-Id: I58f9f42542e4c05763128d7c9d23108c3e7f13a3
Reviewed-on: https://review.whamcloud.com/36919
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
int lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway_nid,
__u32 priority, __u32 sensitivity);
int lnet_del_route(__u32 net, lnet_nid_t gw_nid);
+void lnet_move_route(struct lnet_route *route, struct lnet_peer *lp,
+ struct list_head *rt_list);
void lnet_destroy_routes(void);
int lnet_get_route(int idx, __u32 *net, __u32 *hops,
lnet_nid_t *gateway, __u32 *alive, __u32 *priority,
/* gw has undergone discovery (does not indicate success or failure) */
#define LNET_PEER_RTR_DISCOVERED (1 << 17)
+/* peer is marked for deletion */
+#define LNET_PEER_MARK_DELETION (1 << 18)
+
struct lnet_peer_net {
/* chain on lp_peer_nets */
struct list_head lpn_peer_nets;
return 0;
}
- *discovery = value;
-
+ /*
+ * We still want to set the discovery value even when LNet is not
+ * running. This is the case when LNet is being loaded and we want
+ * the module parameters to take effect. Otherwise if we're
+ * changing the value dynamically, we want to set it after
+ * updating the peers
+ */
if (the_lnet.ln_state != LNET_STATE_RUNNING) {
+ *discovery = value;
mutex_unlock(&the_lnet.ln_api_mutex);
return 0;
}
pbuf->pb_info.pi_features |= LNET_PING_FEAT_DISCOVERY;
lnet_net_unlock(LNET_LOCK_EX);
+ /*
+ * Always update the peers. This will result in a push to the
+ * peers with the updated capabilities feature mask. The peer can
+ * then take appropriate action to update its representation of
+ * the node.
+ *
+ * If discovery is already off, turn it on first before pushing
+ * the update. The discovery flag must be on before pushing.
+ * otherwise if the flag is on and we're turning it off then push
+ * first before turning the flag off. In the former case the flag
+ * is being set twice, but I find it's better to do that rather
+ * than have duplicate code in an if/else statement.
+ */
+ if (*discovery > 0 && value == 0)
+ *discovery = value;
lnet_push_update_to_peers(1);
+ *discovery = value;
mutex_unlock(&the_lnet.ln_api_mutex);
if (!(pbuf->pb_info.pi_features & LNET_PING_FEAT_DISCOVERY)) {
CDEBUG(D_NET, "Peer %s has discovery disabled\n",
libcfs_nid2str(lp->lp_primary_nid));
+ /*
+ * If the peer is going from discovery enabled to
+ * discovery disabled, we need to reflect that in our
+ * representation of the peer.
+ */
+ if (!(lp->lp_state & LNET_PEER_NO_DISCOVERY))
+ lp->lp_state |= LNET_PEER_MARK_DELETION;
lp->lp_state |= LNET_PEER_NO_DISCOVERY;
} else if (lp->lp_state & LNET_PEER_NO_DISCOVERY) {
CDEBUG(D_NET, "Peer %s has discovery enabled\n",
} else {
CDEBUG(D_NET, "Peer %s has discovery disabled\n",
libcfs_nid2str(lp->lp_primary_nid));
+ /*
+ * If the peer is going from discovery enabled to
+ * discovery disabled, we need to reflect that in our
+ * representation of the peer.
+ */
+ if (!(lp->lp_state & LNET_PEER_NO_DISCOVERY))
+ lp->lp_state |= LNET_PEER_MARK_DELETION;
lp->lp_state |= LNET_PEER_NO_DISCOVERY;
}
lnet_peer_discovery_complete(lp);
if (the_lnet.ln_dc_state == LNET_DC_STATE_STOPPING)
break;
+
+ if (lp->lp_state & LNET_PEER_MARK_DELETION) {
+ struct list_head rlist;
+ struct lnet_route *route, *tmp;
+ int sensitivity = lp->lp_health_sensitivity;
+
+ INIT_LIST_HEAD(&rlist);
+
+ /*
+ * remove the peer from the discovery work
+ * queue if it's on there in preparation
+ * of deleting it.
+ */
+ if (!list_empty(&lp->lp_dc_list))
+ list_del(&lp->lp_dc_list);
+
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+
+ lnet_net_lock(LNET_LOCK_EX);
+ list_for_each_entry_safe(route, tmp,
+ &lp->lp_routes,
+ lr_gwlist)
+ lnet_move_route(route, NULL, &rlist);
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ /* delete the peer */
+ lnet_peer_del(lp);
+
+ list_for_each_entry_safe(route, tmp,
+ &rlist, lr_list) {
+ /* re-add these routes */
+ lnet_add_route(route->lr_net,
+ route->lr_hops,
+ route->lr_nid,
+ route->lr_priority,
+ sensitivity);
+ LIBCFS_FREE(route, sizeof(*route));
+ }
+ mutex_unlock(&the_lnet.ln_api_mutex);
+
+ lnet_net_lock(LNET_LOCK_EX);
+ }
}
lnet_net_unlock(LNET_LOCK_EX);
static void lnet_add_route_to_rnet(struct lnet_remotenet *rnet,
struct lnet_route *route);
-static void lnet_del_route_from_rnet(lnet_nid_t gw_nid,
- struct list_head *route_list,
+static void lnet_del_route_from_rnet(lnet_nid_t gw_nid, struct list_head *route_list,
struct list_head *zombies);
static int
return 0;
}
-static inline void
-lnet_move_route(struct lnet_route *route, struct lnet_peer *lp)
+void
+lnet_move_route(struct lnet_route *route, struct lnet_peer *lp,
+ struct list_head *rt_list)
{
struct lnet_remotenet *rnet;
struct list_head zombies;
+ struct list_head *l;
INIT_LIST_HEAD(&zombies);
+ if (rt_list)
+ l = rt_list;
+ else
+ l = &zombies;
+
rnet = lnet_find_rnet_locked(route->lr_net);
LASSERT(rnet);
- lnet_del_route_from_rnet(route->lr_nid, &rnet->lrn_routes,
- &zombies);
+ CDEBUG(D_NET, "deleting route %s->%s\n",
+ libcfs_net2str(route->lr_net),
+ libcfs_nid2str(route->lr_nid));
+
+ /*
+ * use the gateway's lp_primary_nid to delete the route as the
+ * lr_nid can be a constituent NID of the peer
+ */
+ lnet_del_route_from_rnet(route->lr_gateway->lp_primary_nid,
+ &rnet->lrn_routes, l);
if (lp) {
- route = list_first_entry(&zombies, struct lnet_route,
+ route = list_first_entry(l, struct lnet_route,
lr_list);
route->lr_gateway = lp;
lnet_add_route_to_rnet(rnet, route);
} else {
- while (!list_empty(&zombies)) {
- route = list_first_entry(&zombies, struct lnet_route,
+ while (!list_empty(l) && !rt_list) {
+ route = list_first_entry(l, struct lnet_route,
lr_list);
list_del(&route->lr_list);
LIBCFS_FREE(route, sizeof(*route));
}
}
-
}
void
else if (route->lr_hops >= r2->lr_hops)
present = true;
else
- lnet_move_route(r2, NULL);
+ lnet_move_route(r2, NULL, NULL);
}
}
if (present)
- lnet_move_route(route, NULL);
+ lnet_move_route(route, NULL, NULL);
else
- lnet_move_route(route, target);
+ lnet_move_route(route, target, NULL);
}
if (list_empty(&target->lp_rtr_list)) {
return rc;
}
-static void
+void
lnet_del_route_from_rnet(lnet_nid_t gw_nid, struct list_head *route_list,
struct list_head *zombies)
{