*/
/*
* This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/lnet/peer.c
*/
INIT_LIST_HEAD(&lpni->lpni_on_remote_peer_ni_list);
INIT_LIST_HEAD(&lpni->lpni_rtr_pref_nids);
LNetInvalidateMDHandle(&lpni->lpni_recovery_ping_mdh);
- atomic_set(&lpni->lpni_refcount, 1);
+ kref_init(&lpni->lpni_kref);
lpni->lpni_sel_priority = LNET_MAX_SELECTION_PRIORITY;
spin_lock_init(&lpni->lpni_lock);
spin_lock_init(&lp->lp_lock);
lp->lp_primary_nid = nid;
lp->lp_disc_src_nid = LNET_NID_ANY;
+ lp->lp_disc_dst_nid = LNET_NID_ANY;
if (lnet_peers_start_down())
lp->lp_alive = false;
else
}
lp = lpni->lpni_peer_net->lpn_peer;
- while (!lnet_peer_is_uptodate(lp)) {
+ /* If discovery is disabled locally then we needn't bother running
+ * discovery here because discovery will not modify whatever
+ * primary NID is currently set for this peer. If the specified peer is
+ * down then this discovery can introduce long delays into the mount
+ * process, so skip it if it isn't necessary.
+ */
+ while (!lnet_peer_discovery_disabled && !lnet_peer_is_uptodate(lp)) {
spin_lock(&lp->lp_lock);
/* force a full discovery cycle */
lp->lp_state |= LNET_PEER_FORCE_PING | LNET_PEER_FORCE_PUSH;
}
lp = lpni->lpni_peer_net->lpn_peer;
- /* Only try once if discovery is disabled */
+ /* If we find that the peer has discovery disabled then we will
+ * not modify whatever primary NID is currently set for this
+ * peer. Thus, we can break out of this loop even if the peer
+ * is not fully up to date.
+ */
if (lnet_is_discovery_disabled(lp))
break;
}
/* Add peer_ni to peer_net */
lpni->lpni_peer_net = lpn;
- list_add_tail(&lpni->lpni_peer_nis, &lpn->lpn_peer_nis);
+ if (lp->lp_primary_nid == lpni->lpni_nid)
+ list_add(&lpni->lpni_peer_nis, &lpn->lpn_peer_nis);
+ else
+ list_add_tail(&lpni->lpni_peer_nis, &lpn->lpn_peer_nis);
lnet_update_peer_net_healthv(lpni);
lnet_peer_net_addref_locked(lpn);
if (!lpn->lpn_peer) {
new_lpn = true;
lpn->lpn_peer = lp;
- list_add_tail(&lpn->lpn_peer_nets, &lp->lp_peer_nets);
+ if (lp->lp_primary_nid == lpni->lpni_nid)
+ list_add(&lpn->lpn_peer_nets, &lp->lp_peer_nets);
+ else
+ list_add_tail(&lpn->lpn_peer_nets, &lp->lp_peer_nets);
lnet_peer_addref_locked(lp);
}
if (lp->lp_primary_nid == nid)
goto out;
+
+ lp->lp_primary_nid = nid;
+
rc = lnet_peer_add_nid(lp, nid, flags);
- if (rc)
+ if (rc) {
+ lp->lp_primary_nid = old;
goto out;
- lp->lp_primary_nid = nid;
+ }
out:
CDEBUG(D_NET, "peer %s NID %s: %d\n",
libcfs_nid2str(old), libcfs_nid2str(nid), rc);
}
void
-lnet_destroy_peer_ni_locked(struct lnet_peer_ni *lpni)
+lnet_destroy_peer_ni_locked(struct kref *ref)
{
+ struct lnet_peer_ni *lpni = container_of(ref, struct lnet_peer_ni,
+ lpni_kref);
struct lnet_peer_table *ptable;
struct lnet_peer_net *lpn;
CDEBUG(D_NET, "%p nid %s\n", lpni, libcfs_nid2str(lpni->lpni_nid));
- LASSERT(atomic_read(&lpni->lpni_refcount) == 0);
+ LASSERT(kref_read(&lpni->lpni_kref) == 0);
LASSERT(list_empty(&lpni->lpni_txq));
LASSERT(lpni->lpni_txqnob == 0);
LASSERT(list_empty(&lpni->lpni_peer_nis));
spin_lock(&lp->lp_lock);
list_splice_init(&lp->lp_dc_pendq, &pending_msgs);
spin_unlock(&lp->lp_lock);
- wake_up_all(&lp->lp_dc_waitq);
+ wake_up(&lp->lp_dc_waitq);
if (lp->lp_rtr_refcount > 0)
lnet_router_discovery_complete(lp);
spin_lock(&lp->lp_lock);
lp->lp_disc_src_nid = ev->target.nid;
+ lp->lp_disc_dst_nid = ev->source.nid;
/*
* If some kind of error happened the contents of message
static int lnet_peer_merge_data(struct lnet_peer *lp,
struct lnet_ping_buffer *pbuf)
{
+ struct lnet_peer_net *lpn;
struct lnet_peer_ni *lpni;
lnet_nid_t *curnis = NULL;
struct lnet_ni_status *addnis = NULL;
goto out;
}
}
+
+ /* The peer net for the primary NID should be the first entry in the
+ * peer's lp_peer_nets list, and the peer NI for the primary NID should
+ * be the first entry in its peer net's lpn_peer_nis list.
+ */
+ lpni = lnet_find_peer_ni_locked(pbuf->pb_info.pi_ni[1].ns_nid);
+ if (!lpni) {
+ CERROR("Internal error: Failed to lookup peer NI for primary NID: %s\n",
+ libcfs_nid2str(pbuf->pb_info.pi_ni[1].ns_nid));
+ goto out;
+ }
+
+ lnet_peer_ni_decref_locked(lpni);
+
+ lpn = lpni->lpni_peer_net;
+ if (lpn->lpn_peer_nets.prev != &lp->lp_peer_nets)
+ list_move(&lpn->lpn_peer_nets, &lp->lp_peer_nets);
+
+ if (lpni->lpni_peer_nis.prev != &lpni->lpni_peer_net->lpn_peer_nis)
+ list_move(&lpni->lpni_peer_nis,
+ &lpni->lpni_peer_net->lpn_peer_nis);
+
/*
* Errors other than -ENOMEM are due to peers having been
* configured with DLC. Ignore these because DLC overrides
* received by lp, we need to set the discovery source
* NID for new_lp to the NID stored in lp.
*/
- if (lp->lp_disc_src_nid != LNET_NID_ANY)
+ if (lp->lp_disc_src_nid != LNET_NID_ANY) {
new_lp->lp_disc_src_nid = lp->lp_disc_src_nid;
+ new_lp->lp_disc_dst_nid = lp->lp_disc_dst_nid;
+ }
spin_unlock(&new_lp->lp_lock);
spin_unlock(&lp->lp_lock);
return rc ? rc : LNET_REDISCOVER_PEER;
}
-/*
- * Select NID to send a Ping or Push to.
- */
-static lnet_nid_t lnet_peer_select_nid(struct lnet_peer *lp)
-{
- struct lnet_peer_ni *lpni;
-
- /* Look for a direct-connected NID for this peer. */
- lpni = NULL;
- while ((lpni = lnet_get_next_peer_ni_locked(lp, NULL, lpni)) != NULL) {
- if (!lnet_get_net_locked(lpni->lpni_peer_net->lpn_net_id))
- continue;
- break;
- }
- if (lpni)
- return lpni->lpni_nid;
-
- /* Look for a routed-connected NID for this peer. */
- lpni = NULL;
- while ((lpni = lnet_get_next_peer_ni_locked(lp, NULL, lpni)) != NULL) {
- if (!lnet_find_rnet_locked(lpni->lpni_peer_net->lpn_net_id))
- continue;
- break;
- }
- if (lpni)
- return lpni->lpni_nid;
-
- return LNET_NID_ANY;
-}
-
/* Active side of ping. */
static int lnet_peer_send_ping(struct lnet_peer *lp)
__must_hold(&lp->lp_lock)
{
- lnet_nid_t pnid;
int nnis;
int rc;
int cpt;
cpt = lnet_net_lock_current();
/* Refcount for MD. */
lnet_peer_addref_locked(lp);
- pnid = lnet_peer_select_nid(lp);
lnet_net_unlock(cpt);
nnis = max(lp->lp_data_nnis, LNET_INTERFACES_MIN);
- rc = lnet_send_ping(pnid, &lp->lp_ping_mdh, nnis, lp,
+ rc = lnet_send_ping(lp->lp_primary_nid, &lp->lp_ping_mdh, nnis, lp,
the_lnet.ln_dc_handler, false);
/*
CERROR("Can't bind push source MD: %d\n", rc);
goto fail_error;
}
+
cpt = lnet_net_lock_current();
/* Refcount for MD. */
lnet_peer_addref_locked(lp);
id.pid = LNET_PID_LUSTRE;
- id.nid = lnet_peer_select_nid(lp);
+ if (lp->lp_disc_dst_nid != LNET_NID_ANY)
+ id.nid = lp->lp_disc_dst_nid;
+ else
+ id.nid = lp->lp_primary_nid;
lnet_net_unlock(cpt);
- if (id.nid == LNET_NID_ANY) {
- rc = -EHOSTUNREACH;
- goto fail_unlink;
- }
-
rc = LNetPut(lp->lp_disc_src_nid, lp->lp_push_mdh,
LNET_ACK_REQ, id, LNET_RESERVED_PORTAL,
LNET_PROTO_PING_MATCHBITS, 0, 0);
* scratch
*/
lp->lp_disc_src_nid = LNET_NID_ANY;
+ lp->lp_disc_dst_nid = LNET_NID_ANY;
if (rc)
goto fail_unlink;
aliveness = (lnet_is_peer_ni_alive(lp)) ? "up" : "down";
CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
- libcfs_nid2str(lp->lpni_nid), atomic_read(&lp->lpni_refcount),
+ libcfs_nid2str(lp->lpni_nid), kref_read(&lp->lpni_kref),
aliveness, lp->lpni_net->net_tunables.lct_peer_tx_credits,
lp->lpni_rtrcredits, lp->lpni_minrtrcredits,
lp->lpni_txcredits, lp->lpni_mintxcredits, lp->lpni_txqnob);
lnet_is_peer_ni_alive(lp) ? "up" : "down");
*nid = lp->lpni_nid;
- *refcount = atomic_read(&lp->lpni_refcount);
+ *refcount = kref_read(&lp->lpni_kref);
*ni_peer_tx_credits =
lp->lpni_net->net_tunables.lct_peer_tx_credits;
*peer_tx_credits = lp->lpni_txcredits;
snprintf(lpni_info->cr_aliveness, LNET_MAX_STR_LEN,
lnet_is_peer_ni_alive(lpni) ? "up" : "down");
- lpni_info->cr_refcount = atomic_read(&lpni->lpni_refcount);
+ lpni_info->cr_refcount = kref_read(&lpni->lpni_kref);
lpni_info->cr_ni_peer_tx_credits = (lpni->lpni_net != NULL) ?
lpni->lpni_net->net_tunables.lct_peer_tx_credits : 0;
lpni_info->cr_peer_tx_credits = lpni->lpni_txcredits;
if (atomic_read(&lpni->lpni_healthv) == LNET_MAX_HEALTH_VALUE)
return;
+ if (!lpni->lpni_last_alive) {
+ CDEBUG(D_NET,
+ "lpni %s(%p) not eligible for recovery last alive %lld\n",
+ libcfs_nid2str(lpni->lpni_nid), lpni,
+ lpni->lpni_last_alive);
+ return;
+ }
+
if (now > lpni->lpni_last_alive + lnet_recovery_limit) {
CDEBUG(D_NET, "lpni %s aged out last alive %lld\n",
libcfs_nid2str(lpni->lpni_nid),
lpni->lpni_last_alive);
+ /* Reset the ping count so that if this peer NI is added back to
+ * the recovery queue we will send the first ping right away.
+ */
+ lpni->lpni_ping_count = 0;
return;
}
/* This peer NI is going on the recovery queue, so take a ref on it */
lnet_peer_ni_addref_locked(lpni);
- CDEBUG(D_NET, "%s added to recovery queue. last alive: %lld health: %d\n",
+ lnet_peer_ni_set_next_ping(lpni, now);
+
+ CDEBUG(D_NET, "%s added to recovery queue. ping count: %u next ping: %lld last alive: %lld health: %d\n",
libcfs_nid2str(lpni->lpni_nid),
+ lpni->lpni_ping_count,
+ lpni->lpni_next_ping,
lpni->lpni_last_alive,
atomic_read(&lpni->lpni_healthv));