*/
/*
* This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/lnet/peer.c
*/
INIT_LIST_HEAD(&lpni->lpni_on_remote_peer_ni_list);
INIT_LIST_HEAD(&lpni->lpni_rtr_pref_nids);
LNetInvalidateMDHandle(&lpni->lpni_recovery_ping_mdh);
- atomic_set(&lpni->lpni_refcount, 1);
+ kref_init(&lpni->lpni_kref);
lpni->lpni_sel_priority = LNET_MAX_SELECTION_PRIORITY;
spin_lock_init(&lpni->lpni_lock);
spin_lock_init(&lp->lp_lock);
lp->lp_primary_nid = nid;
lp->lp_disc_src_nid = LNET_NID_ANY;
+ lp->lp_disc_dst_nid = LNET_NID_ANY;
if (lnet_peers_start_down())
lp->lp_alive = false;
else
return rc;
}
+int
+LNetAddPeer(lnet_nid_t *nids, __u32 num_nids)
+{
+ lnet_nid_t pnid = 0;
+ bool mr;
+ int i, rc;
+
+ if (!nids || num_nids < 1)
+ return -EINVAL;
+
+ rc = LNetNIInit(LNET_PID_ANY);
+ if (rc < 0)
+ return rc;
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+
+ mr = lnet_peer_discovery_disabled == 0;
+
+ rc = 0;
+ for (i = 0; i < num_nids; i++) {
+ if (nids[i] == LNET_NID_LO_0)
+ continue;
+
+ if (!pnid) {
+ pnid = nids[i];
+ rc = lnet_add_peer_ni(pnid, LNET_NID_ANY, mr, true);
+ } else if (lnet_peer_discovery_disabled) {
+ rc = lnet_add_peer_ni(nids[i], LNET_NID_ANY, mr, true);
+ } else {
+ rc = lnet_add_peer_ni(pnid, nids[i], mr, true);
+ }
+
+ if (rc && rc != -EEXIST)
+ goto unlock;
+ }
+
+unlock:
+ mutex_unlock(&the_lnet.ln_api_mutex);
+
+ LNetNIFini();
+
+ return rc == -EEXIST ? 0 : rc;
+}
+EXPORT_SYMBOL(LNetAddPeer);
+
lnet_nid_t
LNetPrimaryNID(lnet_nid_t nid)
{
}
lp = lpni->lpni_peer_net->lpn_peer;
- while (!lnet_peer_is_uptodate(lp)) {
+ /* If discovery is disabled locally then we needn't bother running
+ * discovery here because discovery will not modify whatever
+ * primary NID is currently set for this peer. If the specified peer is
+ * down then this discovery can introduce long delays into the mount
+ * process, so skip it if it isn't necessary.
+ */
+ while (!lnet_peer_discovery_disabled && !lnet_peer_is_uptodate(lp)) {
spin_lock(&lp->lp_lock);
/* force a full discovery cycle */
lp->lp_state |= LNET_PEER_FORCE_PING | LNET_PEER_FORCE_PUSH;
}
lp = lpni->lpni_peer_net->lpn_peer;
- /* Only try once if discovery is disabled */
+ /* If we find that the peer has discovery disabled then we will
+ * not modify whatever primary NID is currently set for this
+ * peer. Thus, we can break out of this loop even if the peer
+ * is not fully up to date.
+ */
if (lnet_is_discovery_disabled(lp))
break;
}
/* Add peer_ni to peer_net */
lpni->lpni_peer_net = lpn;
- list_add_tail(&lpni->lpni_peer_nis, &lpn->lpn_peer_nis);
+ if (lp->lp_primary_nid == lpni->lpni_nid)
+ list_add(&lpni->lpni_peer_nis, &lpn->lpn_peer_nis);
+ else
+ list_add_tail(&lpni->lpni_peer_nis, &lpn->lpn_peer_nis);
lnet_update_peer_net_healthv(lpni);
lnet_peer_net_addref_locked(lpn);
if (!lpn->lpn_peer) {
new_lpn = true;
lpn->lpn_peer = lp;
- list_add_tail(&lpn->lpn_peer_nets, &lp->lp_peer_nets);
+ if (lp->lp_primary_nid == lpni->lpni_nid)
+ list_add(&lpn->lpn_peer_nets, &lp->lp_peer_nets);
+ else
+ list_add_tail(&lpn->lpn_peer_nets, &lp->lp_peer_nets);
lnet_peer_addref_locked(lp);
}
else if ((lp->lp_state ^ flags) & LNET_PEER_MULTI_RAIL)
rc = -EPERM;
goto out;
+ } else if (!(flags & LNET_PEER_CONFIGURED)) {
+ if (lp->lp_primary_nid == nid) {
+ rc = -EEXIST;
+ goto out;
+ }
}
/* Delete and recreate as a configured peer. */
lnet_peer_del(lp);
if (lp->lp_primary_nid == nid)
goto out;
+
+ lp->lp_primary_nid = nid;
+
rc = lnet_peer_add_nid(lp, nid, flags);
- if (rc)
+ if (rc) {
+ lp->lp_primary_nid = old;
goto out;
- lp->lp_primary_nid = nid;
+ }
out:
CDEBUG(D_NET, "peer %s NID %s: %d\n",
libcfs_nid2str(old), libcfs_nid2str(nid), rc);
* being created/modified/deleted by a different thread.
*/
int
-lnet_add_peer_ni(lnet_nid_t prim_nid, lnet_nid_t nid, bool mr)
+lnet_add_peer_ni(lnet_nid_t prim_nid, lnet_nid_t nid, bool mr, bool temp)
{
struct lnet_peer *lp = NULL;
struct lnet_peer_ni *lpni;
- unsigned flags;
+ unsigned int flags = 0;
/* The prim_nid must always be specified */
if (prim_nid == LNET_NID_ANY)
return -EINVAL;
- flags = LNET_PEER_CONFIGURED;
+ if (!temp)
+ flags = LNET_PEER_CONFIGURED;
+
if (mr)
flags |= LNET_PEER_MULTI_RAIL;
lp = lpni->lpni_peer_net->lpn_peer;
/* Peer must have been configured. */
- if (!(lp->lp_state & LNET_PEER_CONFIGURED)) {
+ if (!temp && !(lp->lp_state & LNET_PEER_CONFIGURED)) {
CDEBUG(D_NET, "peer %s was not configured\n",
libcfs_nid2str(prim_nid));
return -ENOENT;
}
void
-lnet_destroy_peer_ni_locked(struct lnet_peer_ni *lpni)
+lnet_destroy_peer_ni_locked(struct kref *ref)
{
+ struct lnet_peer_ni *lpni = container_of(ref, struct lnet_peer_ni,
+ lpni_kref);
struct lnet_peer_table *ptable;
struct lnet_peer_net *lpn;
CDEBUG(D_NET, "%p nid %s\n", lpni, libcfs_nid2str(lpni->lpni_nid));
- LASSERT(atomic_read(&lpni->lpni_refcount) == 0);
+ LASSERT(kref_read(&lpni->lpni_kref) == 0);
LASSERT(list_empty(&lpni->lpni_txq));
LASSERT(lpni->lpni_txqnob == 0);
LASSERT(list_empty(&lpni->lpni_peer_nis));
spin_lock(&lp->lp_lock);
list_splice_init(&lp->lp_dc_pendq, &pending_msgs);
spin_unlock(&lp->lp_lock);
- wake_up_all(&lp->lp_dc_waitq);
+ wake_up(&lp->lp_dc_waitq);
if (lp->lp_rtr_refcount > 0)
lnet_router_discovery_complete(lp);
spin_lock(&lp->lp_lock);
lp->lp_disc_src_nid = ev->target.nid;
+ lp->lp_disc_dst_nid = ev->source.nid;
/*
* If some kind of error happened the contents of message
goto out;
}
-
/*
* The peer may have discovery disabled at its end. Set
* NO_DISCOVERY as appropriate.
*/
- if ((pbuf->pb_info.pi_features & LNET_PING_FEAT_DISCOVERY) &&
- !lnet_peer_discovery_disabled) {
- CDEBUG(D_NET, "Peer %s has discovery enabled\n",
- libcfs_nid2str(lp->lp_primary_nid));
- lp->lp_state &= ~LNET_PEER_NO_DISCOVERY;
- } else {
+ if (!(pbuf->pb_info.pi_features & LNET_PING_FEAT_DISCOVERY) ||
+ lnet_peer_discovery_disabled) {
CDEBUG(D_NET, "Peer %s has discovery disabled\n",
libcfs_nid2str(lp->lp_primary_nid));
+
+ /* Detect whether this peer has toggled discovery from on to
+ * off and whether we can delete and re-create the peer. Peers
+ * that were manually configured cannot be deleted by discovery.
+ * We need to delete this peer and re-create it if the peer was
+ * not configured manually, is currently considered DD capable,
+ * and either:
+ * 1. We've already discovered the peer (the peer has toggled
+ * the discovery feature from on to off), or
+ * 2. The peer is considered MR, but it was not user configured
+ * (this was a "temporary" peer created via the kernel APIs
+ * that we're discovering for the first time)
+ */
+ if (!(lp->lp_state & (LNET_PEER_CONFIGURED |
+ LNET_PEER_NO_DISCOVERY)) &&
+ (lp->lp_state & (LNET_PEER_DISCOVERED |
+ LNET_PEER_MULTI_RAIL))) {
+ CDEBUG(D_NET, "Marking %s:0x%x for deletion\n",
+ libcfs_nid2str(lp->lp_primary_nid),
+ lp->lp_state);
+ lp->lp_state |= LNET_PEER_MARK_DELETION;
+ }
lp->lp_state |= LNET_PEER_NO_DISCOVERY;
+ } else {
+ CDEBUG(D_NET, "Peer %s has discovery enabled\n",
+ libcfs_nid2str(lp->lp_primary_nid));
+ lp->lp_state &= ~LNET_PEER_NO_DISCOVERY;
}
/*
/* put peer back at end of request queue, if discovery not already
* done */
- if (rc == LNET_REDISCOVER_PEER && !lnet_peer_is_uptodate(lp)) {
+ if (rc == LNET_REDISCOVER_PEER && !lnet_peer_is_uptodate(lp) &&
+ lnet_peer_queue_for_discovery(lp)) {
list_move_tail(&lp->lp_dc_list, &the_lnet.ln_dc_request);
wake_up(&the_lnet.ln_dc_waitq);
}
static int lnet_peer_merge_data(struct lnet_peer *lp,
struct lnet_ping_buffer *pbuf)
{
+ struct lnet_peer_net *lpn;
struct lnet_peer_ni *lpni;
lnet_nid_t *curnis = NULL;
struct lnet_ni_status *addnis = NULL;
goto out;
}
}
+
+ /* The peer net for the primary NID should be the first entry in the
+ * peer's lp_peer_nets list, and the peer NI for the primary NID should
+ * be the first entry in its peer net's lpn_peer_nis list.
+ */
+ lpni = lnet_find_peer_ni_locked(pbuf->pb_info.pi_ni[1].ns_nid);
+ if (!lpni) {
+ CERROR("Internal error: Failed to lookup peer NI for primary NID: %s\n",
+ libcfs_nid2str(pbuf->pb_info.pi_ni[1].ns_nid));
+ goto out;
+ }
+
+ lnet_peer_ni_decref_locked(lpni);
+
+ lpn = lpni->lpni_peer_net;
+ if (lpn->lpn_peer_nets.prev != &lp->lp_peer_nets)
+ list_move(&lpn->lpn_peer_nets, &lp->lp_peer_nets);
+
+ if (lpni->lpni_peer_nis.prev != &lpni->lpni_peer_net->lpn_peer_nis)
+ list_move(&lpni->lpni_peer_nis,
+ &lpni->lpni_peer_net->lpn_peer_nis);
+
/*
* Errors other than -ENOMEM are due to peers having been
* configured with DLC. Ignore these because DLC overrides
* of deleting it.
*/
if (!list_empty(&lp->lp_dc_list))
- list_del(&lp->lp_dc_list);
+ list_del_init(&lp->lp_dc_list);
list_for_each_entry_safe(route, tmp,
&lp->lp_routes,
lr_gwlist)
* received by lp, we need to set the discovery source
* NID for new_lp to the NID stored in lp.
*/
- if (lp->lp_disc_src_nid != LNET_NID_ANY)
+ if (lp->lp_disc_src_nid != LNET_NID_ANY) {
new_lp->lp_disc_src_nid = lp->lp_disc_src_nid;
+ new_lp->lp_disc_dst_nid = lp->lp_disc_dst_nid;
+ }
spin_unlock(&new_lp->lp_lock);
spin_unlock(&lp->lp_lock);
return rc ? rc : LNET_REDISCOVER_PEER;
}
-/*
- * Select NID to send a Ping or Push to.
- */
-static lnet_nid_t lnet_peer_select_nid(struct lnet_peer *lp)
-{
- struct lnet_peer_ni *lpni;
-
- /* Look for a direct-connected NID for this peer. */
- lpni = NULL;
- while ((lpni = lnet_get_next_peer_ni_locked(lp, NULL, lpni)) != NULL) {
- if (!lnet_get_net_locked(lpni->lpni_peer_net->lpn_net_id))
- continue;
- break;
- }
- if (lpni)
- return lpni->lpni_nid;
-
- /* Look for a routed-connected NID for this peer. */
- lpni = NULL;
- while ((lpni = lnet_get_next_peer_ni_locked(lp, NULL, lpni)) != NULL) {
- if (!lnet_find_rnet_locked(lpni->lpni_peer_net->lpn_net_id))
- continue;
- break;
- }
- if (lpni)
- return lpni->lpni_nid;
-
- return LNET_NID_ANY;
-}
-
/* Active side of ping. */
static int lnet_peer_send_ping(struct lnet_peer *lp)
__must_hold(&lp->lp_lock)
{
- lnet_nid_t pnid;
int nnis;
int rc;
int cpt;
cpt = lnet_net_lock_current();
/* Refcount for MD. */
lnet_peer_addref_locked(lp);
- pnid = lnet_peer_select_nid(lp);
lnet_net_unlock(cpt);
nnis = max(lp->lp_data_nnis, LNET_INTERFACES_MIN);
- rc = lnet_send_ping(pnid, &lp->lp_ping_mdh, nnis, lp,
+ rc = lnet_send_ping(lp->lp_primary_nid, &lp->lp_ping_mdh, nnis, lp,
the_lnet.ln_dc_handler, false);
/*
CERROR("Can't bind push source MD: %d\n", rc);
goto fail_error;
}
+
cpt = lnet_net_lock_current();
/* Refcount for MD. */
lnet_peer_addref_locked(lp);
id.pid = LNET_PID_LUSTRE;
- id.nid = lnet_peer_select_nid(lp);
+ if (lp->lp_disc_dst_nid != LNET_NID_ANY)
+ id.nid = lp->lp_disc_dst_nid;
+ else
+ id.nid = lp->lp_primary_nid;
lnet_net_unlock(cpt);
- if (id.nid == LNET_NID_ANY) {
- rc = -EHOSTUNREACH;
- goto fail_unlink;
- }
-
rc = LNetPut(lp->lp_disc_src_nid, lp->lp_push_mdh,
LNET_ACK_REQ, id, LNET_RESERVED_PORTAL,
LNET_PROTO_PING_MATCHBITS, 0, 0);
* scratch
*/
lp->lp_disc_src_nid = LNET_NID_ANY;
+ lp->lp_disc_dst_nid = LNET_NID_ANY;
if (rc)
goto fail_unlink;
aliveness = (lnet_is_peer_ni_alive(lp)) ? "up" : "down";
CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
- libcfs_nid2str(lp->lpni_nid), atomic_read(&lp->lpni_refcount),
+ libcfs_nid2str(lp->lpni_nid), kref_read(&lp->lpni_kref),
aliveness, lp->lpni_net->net_tunables.lct_peer_tx_credits,
lp->lpni_rtrcredits, lp->lpni_minrtrcredits,
lp->lpni_txcredits, lp->lpni_mintxcredits, lp->lpni_txqnob);
lnet_is_peer_ni_alive(lp) ? "up" : "down");
*nid = lp->lpni_nid;
- *refcount = atomic_read(&lp->lpni_refcount);
+ *refcount = kref_read(&lp->lpni_kref);
*ni_peer_tx_credits =
lp->lpni_net->net_tunables.lct_peer_tx_credits;
*peer_tx_credits = lp->lpni_txcredits;
snprintf(lpni_info->cr_aliveness, LNET_MAX_STR_LEN,
lnet_is_peer_ni_alive(lpni) ? "up" : "down");
- lpni_info->cr_refcount = atomic_read(&lpni->lpni_refcount);
+ lpni_info->cr_refcount = kref_read(&lpni->lpni_kref);
lpni_info->cr_ni_peer_tx_credits = (lpni->lpni_net != NULL) ?
lpni->lpni_net->net_tunables.lct_peer_tx_credits : 0;
lpni_info->cr_peer_tx_credits = lpni->lpni_txcredits;
atomic_read(&lpni->lpni_hstats.hlt_remote_error);
lpni_hstats->hlpni_health_value =
atomic_read(&lpni->lpni_healthv);
+ lpni_hstats->hlpni_ping_count = lpni->lpni_ping_count;
+ lpni_hstats->hlpni_next_ping = lpni->lpni_next_ping;
if (copy_to_user(bulk, lpni_hstats, sizeof(*lpni_hstats)))
goto out_free_hstats;
bulk += sizeof(*lpni_hstats);
return rc;
}
+/* must hold net_lock/0 */
void
-lnet_peer_ni_add_to_recoveryq_locked(struct lnet_peer_ni *lpni)
+lnet_peer_ni_add_to_recoveryq_locked(struct lnet_peer_ni *lpni,
+ struct list_head *recovery_queue,
+ time64_t now)
{
/* the mt could've shutdown and cleaned up the queues */
if (the_lnet.ln_mt_state != LNET_MT_STATE_RUNNING)
return;
- if (list_empty(&lpni->lpni_recovery) &&
- atomic_read(&lpni->lpni_healthv) < LNET_MAX_HEALTH_VALUE) {
- CDEBUG(D_NET, "lpni %s added to recovery queue. Health = %d\n",
- libcfs_nid2str(lpni->lpni_nid),
- atomic_read(&lpni->lpni_healthv));
- list_add_tail(&lpni->lpni_recovery, &the_lnet.ln_mt_peerNIRecovq);
- lnet_peer_ni_addref_locked(lpni);
+ if (!list_empty(&lpni->lpni_recovery))
+ return;
+
+ if (atomic_read(&lpni->lpni_healthv) == LNET_MAX_HEALTH_VALUE)
+ return;
+
+ if (!lpni->lpni_last_alive) {
+ CDEBUG(D_NET,
+ "lpni %s(%p) not eligible for recovery last alive %lld\n",
+ libcfs_nid2str(lpni->lpni_nid), lpni,
+ lpni->lpni_last_alive);
+ return;
+ }
+
+ if (lnet_recovery_limit &&
+ now > lpni->lpni_last_alive + lnet_recovery_limit) {
+ CDEBUG(D_NET, "lpni %s aged out last alive %lld\n",
+ libcfs_nid2str(lpni->lpni_nid),
+ lpni->lpni_last_alive);
+ /* Reset the ping count so that if this peer NI is added back to
+ * the recovery queue we will send the first ping right away.
+ */
+ lpni->lpni_ping_count = 0;
+ return;
}
+
+ /* This peer NI is going on the recovery queue, so take a ref on it */
+ lnet_peer_ni_addref_locked(lpni);
+
+ lnet_peer_ni_set_next_ping(lpni, now);
+
+ CDEBUG(D_NET, "%s added to recovery queue. ping count: %u next ping: %lld last alive: %lld health: %d\n",
+ libcfs_nid2str(lpni->lpni_nid),
+ lpni->lpni_ping_count,
+ lpni->lpni_next_ping,
+ lpni->lpni_last_alive,
+ atomic_read(&lpni->lpni_healthv));
+
+ list_add_tail(&lpni->lpni_recovery, recovery_queue);
}
/* Call with the ln_api_mutex held */
struct lnet_peer_ni *lpni;
int lncpt;
int cpt;
+ time64_t now;
if (the_lnet.ln_state != LNET_STATE_RUNNING)
return;
+ now = ktime_get_seconds();
+
if (!all) {
lnet_net_lock(LNET_LOCK_EX);
lpni = lnet_find_peer_ni_locked(nid);
lnet_net_unlock(LNET_LOCK_EX);
return;
}
- atomic_set(&lpni->lpni_healthv, value);
- lnet_peer_ni_add_to_recoveryq_locked(lpni);
+ lnet_set_lpni_healthv_locked(lpni, value);
+ lnet_peer_ni_add_to_recoveryq_locked(lpni,
+ &the_lnet.ln_mt_peerNIRecovq, now);
lnet_peer_ni_decref_locked(lpni);
lnet_net_unlock(LNET_LOCK_EX);
return;
lncpt = cfs_percpt_number(the_lnet.ln_peer_tables);
/*
- * Walk all the peers and reset the healhv for each one to the
- * maximum value.
+ * Walk all the peers and reset the health value for each one to the
+ * specified value.
*/
lnet_net_lock(LNET_LOCK_EX);
for (cpt = 0; cpt < lncpt; cpt++) {
list_for_each_entry(lpn, &lp->lp_peer_nets, lpn_peer_nets) {
list_for_each_entry(lpni, &lpn->lpn_peer_nis,
lpni_peer_nis) {
- atomic_set(&lpni->lpni_healthv, value);
- lnet_peer_ni_add_to_recoveryq_locked(lpni);
+ lnet_set_lpni_healthv_locked(lpni,
+ value);
+ lnet_peer_ni_add_to_recoveryq_locked(lpni,
+ &the_lnet.ln_mt_peerNIRecovq, now);
}
}
}