X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Flnet%2Fpeer.c;h=6336410a51386180fc283d47b7283c708dfe9cb2;hp=43c8d35e60f000d2b87be2b66dc9a098edb967c1;hb=135b5c0009e5201ac70394ee1fe98e523fe86072;hpb=9283e2ed6655e89fe693d35313c9dcf1d5a6703a diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 43c8d35..6336410 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -258,6 +258,10 @@ lnet_peer_alloc(lnet_nid_t nid) init_waitqueue_head(&lp->lp_dc_waitq); spin_lock_init(&lp->lp_lock); lp->lp_primary_nid = nid; + if (lnet_peers_start_down()) + lp->lp_alive = false; + else + lp->lp_alive = true; /* * all peers created on a router should have health on @@ -400,8 +404,6 @@ lnet_peer_ni_del_locked(struct lnet_peer_ni *lpni, bool force) /* decrement the ref count on the peer table */ ptable = the_lnet.ln_peer_tables[lpni->lpni_cpt]; - LASSERT(ptable->pt_number > 0); - ptable->pt_number--; /* * The peer_ni can no longer be found with a lookup. But there @@ -651,7 +653,8 @@ lnet_get_peer_ni_locked(struct lnet_peer_table *ptable, lnet_nid_t nid) struct list_head *peers; struct lnet_peer_ni *lp; - LASSERT(the_lnet.ln_state == LNET_STATE_RUNNING); + if (the_lnet.ln_state != LNET_STATE_RUNNING) + return NULL; peers = &ptable->pt_hash[lnet_nid2peerhash(nid)]; list_for_each_entry(lp, peers, lpni_hashlist) { @@ -1152,6 +1155,7 @@ lnet_peer_primary_nid_locked(lnet_nid_t nid) bool lnet_is_discovery_disabled_locked(struct lnet_peer *lp) +__must_hold(&lp->lp_lock) { if (lnet_peer_discovery_disabled) return true; @@ -1256,7 +1260,6 @@ lnet_peer_attach_peer_ni(struct lnet_peer *lp, ptable = the_lnet.ln_peer_tables[lpni->lpni_cpt]; list_add_tail(&lpni->lpni_hashlist, &ptable->pt_hash[hash]); ptable->pt_version++; - ptable->pt_number++; /* This is the 1st refcount on lpni. */ atomic_inc(&lpni->lpni_refcount); } @@ -1527,11 +1530,7 @@ lnet_peer_ni_traffic_add(lnet_nid_t nid, lnet_nid_t pref) struct lnet_peer *lp; struct lnet_peer_net *lpn; struct lnet_peer_ni *lpni; - /* - * Assume peer is Multi-Rail capable and let discovery find out - * otherwise. - */ - unsigned flags = LNET_PEER_MULTI_RAIL; + unsigned flags = 0; int rc = 0; if (nid == LNET_NID_ANY) { @@ -1844,6 +1843,17 @@ lnet_peer_gw_discovery(struct lnet_peer *lp) return rc; } +bool +lnet_peer_is_uptodate(struct lnet_peer *lp) +{ + bool rc; + + spin_lock(&lp->lp_lock); + rc = lnet_peer_is_uptodate_locked(lp); + spin_unlock(&lp->lp_lock); + return rc; +} + /* * Is a peer uptodate from the point of view of discovery? * @@ -1853,11 +1863,11 @@ lnet_peer_gw_discovery(struct lnet_peer *lp) * Otherwise look at whether the peer needs rediscovering. */ bool -lnet_peer_is_uptodate(struct lnet_peer *lp) +lnet_peer_is_uptodate_locked(struct lnet_peer *lp) +__must_hold(&lp->lp_lock) { bool rc; - spin_lock(&lp->lp_lock); if (lp->lp_state & (LNET_PEER_DISCOVERING | LNET_PEER_FORCE_PING | LNET_PEER_FORCE_PUSH)) { @@ -1874,7 +1884,6 @@ lnet_peer_is_uptodate(struct lnet_peer *lp) } else { rc = false; } - spin_unlock(&lp->lp_lock); return rc; } @@ -1915,9 +1924,7 @@ static void lnet_peer_discovery_complete(struct lnet_peer *lp) { struct lnet_msg *msg, *tmp; int rc = 0; - struct list_head pending_msgs; - - INIT_LIST_HEAD(&pending_msgs); + LIST_HEAD(pending_msgs); CDEBUG(D_NET, "Discovery complete. Dequeue peer %s\n", libcfs_nid2str(lp->lp_primary_nid)); @@ -2301,20 +2308,6 @@ lnet_discovery_event_reply(struct lnet_peer *lp, struct lnet_event *ev) /* - * Only enable the multi-rail feature on the peer if both sides of - * the connection have discovery on - */ - if (pbuf->pb_info.pi_features & LNET_PING_FEAT_MULTI_RAIL) { - CDEBUG(D_NET, "Peer %s has Multi-Rail feature enabled\n", - libcfs_nid2str(lp->lp_primary_nid)); - lp->lp_state |= LNET_PEER_MULTI_RAIL; - } else { - CDEBUG(D_NET, "Peer %s has Multi-Rail feature disabled\n", - libcfs_nid2str(lp->lp_primary_nid)); - lp->lp_state &= ~LNET_PEER_MULTI_RAIL; - } - - /* * The peer may have discovery disabled at its end. Set * NO_DISCOVERY as appropriate. */ @@ -2336,22 +2329,24 @@ lnet_discovery_event_reply(struct lnet_peer *lp, struct lnet_event *ev) */ if (pbuf->pb_info.pi_features & LNET_PING_FEAT_MULTI_RAIL) { if (lp->lp_state & LNET_PEER_MULTI_RAIL) { - /* Everything's fine */ + CDEBUG(D_NET, "peer %s(%p) is MR\n", + libcfs_nid2str(lp->lp_primary_nid), lp); } else if (lp->lp_state & LNET_PEER_CONFIGURED) { CWARN("Reply says %s is Multi-Rail, DLC says not\n", libcfs_nid2str(lp->lp_primary_nid)); + } else if (lnet_peer_discovery_disabled) { + CDEBUG(D_NET, + "peer %s(%p) not MR: DD disabled locally\n", + libcfs_nid2str(lp->lp_primary_nid), lp); + } else if (lp->lp_state & LNET_PEER_NO_DISCOVERY) { + CDEBUG(D_NET, + "peer %s(%p) not MR: DD disabled remotely\n", + libcfs_nid2str(lp->lp_primary_nid), lp); } else { - /* - * if discovery is disabled then we don't want to - * update the state of the peer. All we'll do is - * update the peer_nis which were reported back in - * the initial ping - */ - - if (!lnet_is_discovery_disabled_locked(lp)) { - lp->lp_state |= LNET_PEER_MULTI_RAIL; - lnet_peer_clr_non_mr_pref_nids(lp); - } + CDEBUG(D_NET, "peer %s(%p) is MR capable\n", + libcfs_nid2str(lp->lp_primary_nid), lp); + lp->lp_state |= LNET_PEER_MULTI_RAIL; + lnet_peer_clr_non_mr_pref_nids(lp); } } else if (lp->lp_state & LNET_PEER_MULTI_RAIL) { if (lp->lp_state & LNET_PEER_CONFIGURED) { @@ -2414,6 +2409,15 @@ lnet_discovery_event_reply(struct lnet_peer *lp, struct lnet_event *ev) out: lp->lp_state &= ~LNET_PEER_PING_SENT; spin_unlock(&lp->lp_lock); + + lnet_net_lock(LNET_LOCK_EX); + /* + * If this peer is a gateway, call the routing callback to + * handle the ping reply + */ + if (lp->lp_rtr_refcount > 0) + lnet_router_discovery_ping_reply(lp); + lnet_net_unlock(LNET_LOCK_EX); } /* @@ -3248,11 +3252,9 @@ static int lnet_peer_discovery_wait_for_work(void) static void lnet_resend_msgs(void) { struct lnet_msg *msg, *tmp; - struct list_head resend; + LIST_HEAD(resend); int rc; - INIT_LIST_HEAD(&resend); - spin_lock(&the_lnet.ln_msg_resend_lock); list_splice(&the_lnet.ln_msg_resend, &resend); spin_unlock(&the_lnet.ln_msg_resend_lock);