init_waitqueue_head(&lp->lp_dc_waitq);
spin_lock_init(&lp->lp_lock);
lp->lp_primary_nid = nid;
+
+ /*
+ * all peers created on a router should have health on
+ * if it's not already on.
+ */
+ if (the_lnet.ln_routing && !lnet_health_sensitivity)
+ lp->lp_health_sensitivity = 1;
+
/*
* Turn off discovery for loopback peer. If you're creating a peer
* for the loopback interface then that was initiated when we
return lp;
}
+struct lnet_peer_net *
+lnet_get_next_peer_net_locked(struct lnet_peer *lp, __u32 prev_lpn_id)
+{
+ struct lnet_peer_net *net;
+
+ if (!prev_lpn_id) {
+ /* no net id provided return the first net */
+ net = list_first_entry_or_null(&lp->lp_peer_nets,
+ struct lnet_peer_net,
+ lpn_peer_nets);
+
+ return net;
+ }
+
+ /* find the net after the one provided */
+ list_for_each_entry(net, &lp->lp_peer_nets, lpn_peer_nets) {
+ if (net->lpn_net_id == prev_lpn_id) {
+ /*
+ * if we reached the end of the list loop to the
+ * beginning.
+ */
+ if (net->lpn_peer_nets.next == &lp->lp_peer_nets)
+ return list_first_entry_or_null(&lp->lp_peer_nets,
+ struct lnet_peer_net,
+ lpn_peer_nets);
+ else
+ return list_next_entry(net, lpn_peer_nets);
+ }
+ }
+
+ return NULL;
+}
+
struct lnet_peer_ni *
lnet_get_next_peer_ni_locked(struct lnet_peer *peer,
struct lnet_peer_net *peer_net,
int cpt;
lnet_net_lock(LNET_LOCK_EX);
+ if (lnet_peer_discovery_disabled)
+ force = 0;
lncpt = cfs_percpt_number(the_lnet.ln_peer_tables);
for (cpt = 0; cpt < lncpt; cpt++) {
ptable = the_lnet.ln_peer_tables[cpt];
return primary_nid;
}
+bool
+lnet_is_discovery_disabled_locked(struct lnet_peer *lp)
+{
+ if (lnet_peer_discovery_disabled)
+ return true;
+
+ if (!(lp->lp_state & LNET_PEER_MULTI_RAIL) ||
+ (lp->lp_state & LNET_PEER_NO_DISCOVERY)) {
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Peer Discovery
+ */
+bool
+lnet_is_discovery_disabled(struct lnet_peer *lp)
+{
+ bool rc = false;
+
+ spin_lock(&lp->lp_lock);
+ rc = lnet_is_discovery_disabled_locked(lp);
+ spin_unlock(&lp->lp_lock);
+
+ return rc;
+}
+
lnet_nid_t
LNetPrimaryNID(lnet_nid_t nid)
{
goto out_unlock;
}
lp = lpni->lpni_peer_net->lpn_peer;
+
while (!lnet_peer_is_uptodate(lp)) {
rc = lnet_discover_peer_locked(lpni, cpt, true);
if (rc)
goto out_decref;
lp = lpni->lpni_peer_net->lpn_peer;
+
+ /* Only try once if discovery is disabled */
+ if (lnet_is_discovery_disabled(lp))
+ break;
}
primary_nid = lp->lp_primary_nid;
out_decref:
}
bool
-lnet_is_discovery_disabled_locked(struct lnet_peer *lp)
-{
- if (lnet_peer_discovery_disabled)
- return true;
-
- if (!(lp->lp_state & LNET_PEER_MULTI_RAIL) ||
- (lp->lp_state & LNET_PEER_NO_DISCOVERY)) {
- return true;
- }
-
- return false;
-}
-
-/*
- * Peer Discovery
- */
-bool
-lnet_is_discovery_disabled(struct lnet_peer *lp)
-{
- bool rc = false;
-
- spin_lock(&lp->lp_lock);
- rc = lnet_is_discovery_disabled_locked(lp);
- spin_unlock(&lp->lp_lock);
-
- return rc;
-}
-
-bool
lnet_peer_gw_discovery(struct lnet_peer *lp)
{
bool rc = false;
DEFINE_WAIT(wait);
struct lnet_peer *lp;
int rc = 0;
+ int count = 0;
again:
lnet_net_unlock(cpt);
* zombie if we race with DLC, so we must check for that.
*/
for (;;) {
+ /* Keep lp alive when the lnet_net_lock is unlocked */
+ lnet_peer_addref_locked(lp);
prepare_to_wait(&lp->lp_dc_waitq, &wait, TASK_INTERRUPTIBLE);
if (signal_pending(current))
break;
if (the_lnet.ln_dc_state != LNET_DC_STATE_RUNNING)
break;
+ /*
+ * Don't repeat discovery if discovery is disabled. This is
+ * done to ensure we can use discovery as a standard ping as
+ * well for backwards compatibility with routers which do not
+ * have discovery or have discovery disabled
+ */
+ if (lnet_is_discovery_disabled(lp) && count > 0)
+ break;
if (lp->lp_dc_error)
break;
if (lnet_peer_is_uptodate(lp))
break;
lnet_peer_queue_for_discovery(lp);
+ count++;
+ CDEBUG(D_NET, "Discovery attempt # %d\n", count);
- if (lnet_is_discovery_disabled(lp))
- break;
/*
- * if caller requested a non-blocking operation then
- * return immediately. Once discovery is complete then the
- * peer ref will be decremented and any pending messages
- * that were stopped due to discovery will be transmitted.
+ * If caller requested a non-blocking operation then
+ * return immediately. Once discovery is complete any
+ * pending messages that were stopped due to discovery
+ * will be transmitted.
*/
if (!block)
break;
- lnet_peer_addref_locked(lp);
lnet_net_unlock(LNET_LOCK_EX);
schedule();
finish_wait(&lp->lp_dc_waitq, &wait);
lnet_net_unlock(LNET_LOCK_EX);
lnet_net_lock(cpt);
-
+ lnet_peer_decref_locked(lp);
/*
- * If the peer has changed after we've discovered the older peer,
- * then we need to discovery the new peer to make sure the
- * interface information is up to date
+ * The peer may have changed, so re-check and rediscover if that turns
+ * out to have been the case. The reference count on lp ensured that
+ * even if it was unlinked from lpni the memory could not be recycled.
+ * Thus the check below is sufficient to determine whether the peer
+ * changed. If the peer changed, then lp must not be dereferenced.
*/
if (lp != lpni->lpni_peer_net->lpn_peer)
goto again;
struct lnet_peer *lp;
int rc;
+ wait_for_completion(&the_lnet.ln_started);
+
CDEBUG(D_NET, "started\n");
cfs_block_allsigs();
LASSERT(the_lnet.ln_dc_state == LNET_DC_STATE_RUNNING);
the_lnet.ln_dc_state = LNET_DC_STATE_STOPPING;
- wake_up(&the_lnet.ln_dc_waitq);
+
+ /* In the LNetNIInit() path we may be stopping discovery before it
+ * entered its work loop
+ */
+ if (!completion_done(&the_lnet.ln_started))
+ complete(&the_lnet.ln_started);
+ else
+ wake_up(&the_lnet.ln_dc_waitq);
wait_event(the_lnet.ln_dc_waitq,
the_lnet.ln_dc_state == LNET_DC_STATE_SHUTDOWN);