X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Flnet%2Fpeer.c;h=bc6e25082ace2a6057b108c80cb14c51284676ff;hp=33bd35144c6e6ad7d0b0eaa70216e06f8e938b6d;hb=024f9303bc6f32a3113357c864765c4f9c93ed03;hpb=684943e2d0c2ad095e3521586d61d007b4f49abd diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 33bd351..bc6e250 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -540,6 +540,16 @@ lnet_peer_del_nid(struct lnet_peer *lp, lnet_nid_t nid, unsigned flags) goto out; } } + + /* If we're asked to lock down the primary NID we shouldn't be + * deleting it + */ + if (lp->lp_state & LNET_PEER_LOCK_PRIMARY && + primary_nid == nid) { + rc = -EPERM; + goto out; + } + lpni = lnet_find_peer_ni_locked(nid); if (!lpni) { rc = -ENOENT; @@ -1399,13 +1409,18 @@ LNetPrimaryNID(lnet_nid_t nid) * down then this discovery can introduce long delays into the mount * process, so skip it if it isn't necessary. */ - while (!lnet_peer_discovery_disabled && !lnet_peer_is_uptodate(lp)) { + if (!lnet_peer_discovery_disabled && !lnet_peer_is_uptodate(lp)) { spin_lock(&lp->lp_lock); /* force a full discovery cycle */ - lp->lp_state |= LNET_PEER_FORCE_PING | LNET_PEER_FORCE_PUSH; + lp->lp_state |= LNET_PEER_FORCE_PING | LNET_PEER_FORCE_PUSH | + LNET_PEER_LOCK_PRIMARY; spin_unlock(&lp->lp_lock); - rc = lnet_discover_peer_locked(lpni, cpt, true); + /* start discovery in the background. Messages to that + * peer will not go through until the discovery is + * complete + */ + rc = lnet_discover_peer_locked(lpni, cpt, false); if (rc) goto out_decref; /* The lpni (or lp) for this NID may have changed and our ref is @@ -1419,14 +1434,6 @@ LNetPrimaryNID(lnet_nid_t nid) goto out_unlock; } lp = lpni->lpni_peer_net->lpn_peer; - - /* If we find that the peer has discovery disabled then we will - * not modify whatever primary NID is currently set for this - * peer. Thus, we can break out of this loop even if the peer - * is not fully up to date. - */ - if (lnet_is_discovery_disabled(lp)) - break; } primary_nid = lp->lp_primary_nid; out_decref: @@ -1463,9 +1470,9 @@ lnet_peer_get_net_locked(struct lnet_peer *peer, __u32 net_id) */ static int lnet_peer_attach_peer_ni(struct lnet_peer *lp, - struct lnet_peer_net *lpn, - struct lnet_peer_ni *lpni, - unsigned flags) + struct lnet_peer_net *lpn, + struct lnet_peer_ni *lpni, + unsigned flags) { struct lnet_peer_table *ptable; bool new_lpn = false; @@ -1532,6 +1539,8 @@ lnet_peer_attach_peer_ni(struct lnet_peer *lp, lnet_peer_clr_non_mr_pref_nids(lp); } } + if (flags & LNET_PEER_LOCK_PRIMARY) + lp->lp_state |= LNET_PEER_LOCK_PRIMARY; spin_unlock(&lp->lp_lock); lp->lp_nnis++; @@ -1686,9 +1695,27 @@ lnet_peer_add_nid(struct lnet_peer *lp, lnet_nid_t nid, unsigned flags) } /* If this is the primary NID, destroy the peer. */ if (lnet_peer_ni_is_primary(lpni)) { - struct lnet_peer *rtr_lp = + struct lnet_peer *lp2 = lpni->lpni_peer_net->lpn_peer; - int rtr_refcount = rtr_lp->lp_rtr_refcount; + int rtr_refcount = lp2->lp_rtr_refcount; + + /* If the new peer that this NID belongs to is + * a primary NID for another peer which we're + * suppose to preserve the Primary for then we + * don't want to mess with it. But the + * configuration is wrong at this point, so we + * should flag both of these peers as in a bad + * state + */ + if (lp2->lp_state & LNET_PEER_LOCK_PRIMARY) { + spin_lock(&lp->lp_lock); + lp->lp_state |= LNET_PEER_BAD_CONFIG; + spin_unlock(&lp->lp_lock); + spin_lock(&lp2->lp_lock); + lp2->lp_state |= LNET_PEER_BAD_CONFIG; + spin_unlock(&lp2->lp_lock); + goto out_free_lpni; + } /* * if we're trying to delete a router it means * we're moving this peer NI to a new peer so must @@ -1696,9 +1723,9 @@ lnet_peer_add_nid(struct lnet_peer *lp, lnet_nid_t nid, unsigned flags) */ if (rtr_refcount > 0) { flags |= LNET_PEER_RTR_NI_FORCE_DEL; - lnet_rtr_transfer_to_peer(rtr_lp, lp); + lnet_rtr_transfer_to_peer(lp2, lp); } - lnet_peer_del(lpni->lpni_peer_net->lpn_peer); + lnet_peer_del(lp2); lnet_peer_ni_decref_locked(lpni); lpni = lnet_peer_ni_alloc(nid); if (!lpni) { @@ -1755,7 +1782,8 @@ lnet_peer_set_primary_nid(struct lnet_peer *lp, lnet_nid_t nid, unsigned flags) if (lp->lp_primary_nid == nid) goto out; - lp->lp_primary_nid = nid; + if (!(lp->lp_state & LNET_PEER_LOCK_PRIMARY)) + lp->lp_primary_nid = nid; rc = lnet_peer_add_nid(lp, nid, flags); if (rc) { @@ -1763,8 +1791,17 @@ lnet_peer_set_primary_nid(struct lnet_peer *lp, lnet_nid_t nid, unsigned flags) goto out; } out: + /* if this is a configured peer or the primary for that peer has + * been locked, then we don't want to flag this scenario as + * a failure + */ + if (lp->lp_state & LNET_PEER_CONFIGURED || + lp->lp_state & LNET_PEER_LOCK_PRIMARY) + return 0; + CDEBUG(D_NET, "peer %s NID %s: %d\n", libcfs_nid2str(old), libcfs_nid2str(nid), rc); + return rc; }