X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Flnet%2Fpeer.c;h=e9505fd50670c323bb30453a489531fa80968db7;hp=28bad27013ba04181d97d76588086a4688b3cc71;hb=4ef62976448d6821df9aab3e720fd8d9d0bdefce;hpb=398f4071dc17c83e6ac1600174b46e2675579ce7 diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 28bad27..e9505fd 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -258,6 +258,14 @@ lnet_peer_alloc(lnet_nid_t nid) init_waitqueue_head(&lp->lp_dc_waitq); spin_lock_init(&lp->lp_lock); lp->lp_primary_nid = nid; + + /* + * all peers created on a router should have health on + * if it's not already on. + */ + if (the_lnet.ln_routing && !lnet_health_sensitivity) + lp->lp_health_sensitivity = 1; + /* * Turn off discovery for loopback peer. If you're creating a peer * for the loopback interface then that was initiated when we @@ -366,12 +374,12 @@ lnet_peer_detach_peer_ni_locked(struct lnet_peer_ni *lpni) /* called with lnet_net_lock LNET_LOCK_EX held */ static int -lnet_peer_ni_del_locked(struct lnet_peer_ni *lpni) +lnet_peer_ni_del_locked(struct lnet_peer_ni *lpni, bool force) { struct lnet_peer_table *ptable = NULL; /* don't remove a peer_ni if it's also a gateway */ - if (lnet_isrouter(lpni)) { + if (lnet_isrouter(lpni) && !force) { CERROR("Peer NI %s is a gateway. Can not delete it\n", libcfs_nid2str(lpni->lpni_nid)); return -EBUSY; @@ -428,7 +436,7 @@ void lnet_peer_uninit(void) /* remove all peer_nis from the remote peer and the hash list */ list_for_each_entry_safe(lpni, tmp, &the_lnet.ln_remote_peer_ni_list, lpni_on_remote_peer_ni_list) - lnet_peer_ni_del_locked(lpni); + lnet_peer_ni_del_locked(lpni, false); lnet_peer_tables_destroy(); @@ -446,7 +454,7 @@ lnet_peer_del_locked(struct lnet_peer *peer) lpni = lnet_get_next_peer_ni_locked(peer, NULL, lpni); while (lpni != NULL) { lpni2 = lnet_get_next_peer_ni_locked(peer, NULL, lpni); - rc = lnet_peer_ni_del_locked(lpni); + rc = lnet_peer_ni_del_locked(lpni, false); if (rc != 0) rc2 = rc; lpni = lpni2; @@ -480,6 +488,7 @@ lnet_peer_del_nid(struct lnet_peer *lp, lnet_nid_t nid, unsigned flags) struct lnet_peer_ni *lpni; lnet_nid_t primary_nid = lp->lp_primary_nid; int rc = 0; + bool force = (flags & LNET_PEER_RTR_NI_FORCE_DEL) ? true : false; if (!(flags & LNET_PEER_CONFIGURED)) { if (lp->lp_state & LNET_PEER_CONFIGURED) { @@ -502,14 +511,21 @@ lnet_peer_del_nid(struct lnet_peer *lp, lnet_nid_t nid, unsigned flags) * This function only allows deletion of the primary NID if it * is the only NID. */ - if (nid == lp->lp_primary_nid && lp->lp_nnis != 1) { + if (nid == lp->lp_primary_nid && lp->lp_nnis != 1 && !force) { rc = -EBUSY; goto out; } lnet_net_lock(LNET_LOCK_EX); - rc = lnet_peer_ni_del_locked(lpni); + if (nid == lp->lp_primary_nid && lp->lp_nnis != 1 && force) { + struct lnet_peer_ni *lpni2; + /* assign the next peer_ni to be the primary */ + lpni2 = lnet_get_next_peer_ni_locked(lp, NULL, lpni); + LASSERT(lpni2); + lp->lp_primary_nid = lpni->lpni_nid; + } + rc = lnet_peer_ni_del_locked(lpni, force); lnet_net_unlock(LNET_LOCK_EX); @@ -537,7 +553,7 @@ lnet_peer_table_cleanup_locked(struct lnet_net *net, peer = lpni->lpni_peer_net->lpn_peer; if (peer->lp_primary_nid != lpni->lpni_nid) { - lnet_peer_ni_del_locked(lpni); + lnet_peer_ni_del_locked(lpni, false); continue; } /* @@ -663,6 +679,24 @@ lnet_find_peer_ni_locked(lnet_nid_t nid) return lpni; } +struct lnet_peer_ni * +lnet_peer_get_ni_locked(struct lnet_peer *lp, lnet_nid_t nid) +{ + struct lnet_peer_net *lpn; + struct lnet_peer_ni *lpni; + + lpn = lnet_peer_get_net_locked(lp, LNET_NIDNET(nid)); + if (!lpn) + return NULL; + + list_for_each_entry(lpni, &lpn->lpn_peer_nis, lpni_peer_nis) { + if (lpni->lpni_nid == nid) + return lpni; + } + + return NULL; +} + struct lnet_peer * lnet_find_peer(lnet_nid_t nid) { @@ -682,6 +716,39 @@ lnet_find_peer(lnet_nid_t nid) return lp; } +struct lnet_peer_net * +lnet_get_next_peer_net_locked(struct lnet_peer *lp, __u32 prev_lpn_id) +{ + struct lnet_peer_net *net; + + if (!prev_lpn_id) { + /* no net id provided return the first net */ + net = list_first_entry_or_null(&lp->lp_peer_nets, + struct lnet_peer_net, + lpn_peer_nets); + + return net; + } + + /* find the net after the one provided */ + list_for_each_entry(net, &lp->lp_peer_nets, lpn_peer_nets) { + if (net->lpn_net_id == prev_lpn_id) { + /* + * if we reached the end of the list loop to the + * beginning. + */ + if (net->lpn_peer_nets.next == &lp->lp_peer_nets) + return list_first_entry_or_null(&lp->lp_peer_nets, + struct lnet_peer_net, + lpn_peer_nets); + else + return list_next_entry(net, lpn_peer_nets); + } + } + + return NULL; +} + struct lnet_peer_ni * lnet_get_next_peer_ni_locked(struct lnet_peer *peer, struct lnet_peer_net *peer_net, @@ -815,6 +882,8 @@ lnet_push_update_to_peers(int force) int cpt; lnet_net_lock(LNET_LOCK_EX); + if (lnet_peer_discovery_disabled) + force = 0; lncpt = cfs_percpt_number(the_lnet.ln_peer_tables); for (cpt = 0; cpt < lncpt; cpt++) { ptable = the_lnet.ln_peer_tables[cpt]; @@ -1081,6 +1150,35 @@ lnet_peer_primary_nid_locked(lnet_nid_t nid) return primary_nid; } +bool +lnet_is_discovery_disabled_locked(struct lnet_peer *lp) +{ + if (lnet_peer_discovery_disabled) + return true; + + if (!(lp->lp_state & LNET_PEER_MULTI_RAIL) || + (lp->lp_state & LNET_PEER_NO_DISCOVERY)) { + return true; + } + + return false; +} + +/* + * Peer Discovery + */ +bool +lnet_is_discovery_disabled(struct lnet_peer *lp) +{ + bool rc = false; + + spin_lock(&lp->lp_lock); + rc = lnet_is_discovery_disabled_locked(lp); + spin_unlock(&lp->lp_lock); + + return rc; +} + lnet_nid_t LNetPrimaryNID(lnet_nid_t nid) { @@ -1097,11 +1195,16 @@ LNetPrimaryNID(lnet_nid_t nid) goto out_unlock; } lp = lpni->lpni_peer_net->lpn_peer; + while (!lnet_peer_is_uptodate(lp)) { rc = lnet_discover_peer_locked(lpni, cpt, true); if (rc) goto out_decref; lp = lpni->lpni_peer_net->lpn_peer; + + /* Only try once if discovery is disabled */ + if (lnet_is_discovery_disabled(lp)) + break; } primary_nid = lp->lp_primary_nid; out_decref: @@ -1336,6 +1439,18 @@ lnet_peer_add_nid(struct lnet_peer *lp, lnet_nid_t nid, unsigned flags) } /* If this is the primary NID, destroy the peer. */ if (lnet_peer_ni_is_primary(lpni)) { + struct lnet_peer *rtr_lp = + lpni->lpni_peer_net->lpn_peer; + int rtr_refcount = rtr_lp->lp_rtr_refcount; + /* + * if we're trying to delete a router it means + * we're moving this peer NI to a new peer so must + * transfer router properties to the new peer + */ + if (rtr_refcount > 0) { + flags |= LNET_PEER_RTR_NI_FORCE_DEL; + lnet_rtr_transfer_to_peer(rtr_lp, lp); + } lnet_peer_del(lpni->lpni_peer_net->lpn_peer); lpni = lnet_peer_ni_alloc(nid); if (!lpni) { @@ -1412,7 +1527,11 @@ lnet_peer_ni_traffic_add(lnet_nid_t nid, lnet_nid_t pref) struct lnet_peer *lp; struct lnet_peer_net *lpn; struct lnet_peer_ni *lpni; - unsigned flags = 0; + /* + * Assume peer is Multi-Rail capable and let discovery find out + * otherwise. + */ + unsigned flags = LNET_PEER_MULTI_RAIL; int rc = 0; if (nid == LNET_NID_ANY) { @@ -1712,9 +1831,29 @@ out_mutex_unlock: return lpni; } -/* - * Peer Discovery - */ +bool +lnet_peer_gw_discovery(struct lnet_peer *lp) +{ + bool rc = false; + + spin_lock(&lp->lp_lock); + if (lp->lp_state & LNET_PEER_RTR_DISCOVERY) + rc = true; + spin_unlock(&lp->lp_lock); + + return rc; +} + +bool +lnet_peer_is_uptodate(struct lnet_peer *lp) +{ + bool rc; + + spin_lock(&lp->lp_lock); + rc = lnet_peer_is_uptodate_locked(lp); + spin_unlock(&lp->lp_lock); + return rc; +} /* * Is a peer uptodate from the point of view of discovery? @@ -1725,22 +1864,17 @@ out_mutex_unlock: * Otherwise look at whether the peer needs rediscovering. */ bool -lnet_peer_is_uptodate(struct lnet_peer *lp) +lnet_peer_is_uptodate_locked(struct lnet_peer *lp) +__must_hold(&lp->lp_lock) { bool rc; - spin_lock(&lp->lp_lock); if (lp->lp_state & (LNET_PEER_DISCOVERING | LNET_PEER_FORCE_PING | LNET_PEER_FORCE_PUSH)) { rc = false; - } else if (lp->lp_state & LNET_PEER_NO_DISCOVERY) { - rc = true; } else if (lp->lp_state & LNET_PEER_REDISCOVER) { - if (lnet_peer_discovery_disabled) - rc = true; - else - rc = false; + rc = false; } else if (lnet_peer_needs_push(lp)) { rc = false; } else if (lp->lp_state & LNET_PEER_DISCOVERED) { @@ -1751,7 +1885,6 @@ lnet_peer_is_uptodate(struct lnet_peer *lp) } else { rc = false; } - spin_unlock(&lp->lp_lock); return rc; } @@ -1805,6 +1938,9 @@ static void lnet_peer_discovery_complete(struct lnet_peer *lp) spin_unlock(&lp->lp_lock); wake_up_all(&lp->lp_dc_waitq); + if (lp->lp_rtr_refcount > 0) + lnet_router_discovery_complete(lp); + lnet_net_unlock(LNET_LOCK_EX); /* iterate through all pending messages and send them again */ @@ -1940,38 +2076,9 @@ void lnet_peer_push_event(struct lnet_event *ev) goto out; } - /* - * Check whether the Put data is stale. Stale data can just be - * dropped. - */ - if (pbuf->pb_info.pi_nnis > 1 && - lp->lp_primary_nid == pbuf->pb_info.pi_ni[1].ns_nid && - LNET_PING_BUFFER_SEQNO(pbuf) < lp->lp_peer_seqno) { - CDEBUG(D_NET, "Stale Push from %s: got %u have %u\n", - libcfs_nid2str(lp->lp_primary_nid), - LNET_PING_BUFFER_SEQNO(pbuf), - lp->lp_peer_seqno); - goto out; - } - - /* - * Check whether the Put data is new, in which case we clear - * the UPTODATE flag and prepare to process it. - * - * If the Put data is current, and the peer is UPTODATE then - * we assome everything is all right and drop the data as - * stale. - */ - if (LNET_PING_BUFFER_SEQNO(pbuf) > lp->lp_peer_seqno) { - lp->lp_peer_seqno = LNET_PING_BUFFER_SEQNO(pbuf); - lp->lp_state &= ~LNET_PEER_NIDS_UPTODATE; - } else if (lp->lp_state & LNET_PEER_NIDS_UPTODATE) { - CDEBUG(D_NET, "Stale Push from %s: got %u have %u\n", - libcfs_nid2str(lp->lp_primary_nid), - LNET_PING_BUFFER_SEQNO(pbuf), - lp->lp_peer_seqno); - goto out; - } + /* always assume new data */ + lp->lp_peer_seqno = LNET_PING_BUFFER_SEQNO(pbuf); + lp->lp_state &= ~LNET_PEER_NIDS_UPTODATE; /* * If there is data present that hasn't been processed yet, @@ -2055,6 +2162,7 @@ lnet_discover_peer_locked(struct lnet_peer_ni *lpni, int cpt, bool block) DEFINE_WAIT(wait); struct lnet_peer *lp; int rc = 0; + int count = 0; again: lnet_net_unlock(cpt); @@ -2067,26 +2175,38 @@ again: * zombie if we race with DLC, so we must check for that. */ for (;;) { + /* Keep lp alive when the lnet_net_lock is unlocked */ + lnet_peer_addref_locked(lp); prepare_to_wait(&lp->lp_dc_waitq, &wait, TASK_INTERRUPTIBLE); if (signal_pending(current)) break; if (the_lnet.ln_dc_state != LNET_DC_STATE_RUNNING) break; + /* + * Don't repeat discovery if discovery is disabled. This is + * done to ensure we can use discovery as a standard ping as + * well for backwards compatibility with routers which do not + * have discovery or have discovery disabled + */ + if (lnet_is_discovery_disabled(lp) && count > 0) + break; if (lp->lp_dc_error) break; if (lnet_peer_is_uptodate(lp)) break; lnet_peer_queue_for_discovery(lp); + count++; + CDEBUG(D_NET, "Discovery attempt # %d\n", count); + /* - * if caller requested a non-blocking operation then - * return immediately. Once discovery is complete then the - * peer ref will be decremented and any pending messages - * that were stopped due to discovery will be transmitted. + * If caller requested a non-blocking operation then + * return immediately. Once discovery is complete any + * pending messages that were stopped due to discovery + * will be transmitted. */ if (!block) break; - lnet_peer_addref_locked(lp); lnet_net_unlock(LNET_LOCK_EX); schedule(); finish_wait(&lp->lp_dc_waitq, &wait); @@ -2099,11 +2219,13 @@ again: lnet_net_unlock(LNET_LOCK_EX); lnet_net_lock(cpt); - + lnet_peer_decref_locked(lp); /* - * If the peer has changed after we've discovered the older peer, - * then we need to discovery the new peer to make sure the - * interface information is up to date + * The peer may have changed, so re-check and rediscover if that turns + * out to have been the case. The reference count on lp ensured that + * even if it was unlinked from lpni the memory could not be recycled. + * Thus the check below is sufficient to determine whether the peer + * changed. If the peer changed, then lp must not be dereferenced. */ if (lp != lpni->lpni_peer_net->lpn_peer) goto again; @@ -2116,7 +2238,7 @@ again: rc = lp->lp_dc_error; else if (!block) CDEBUG(D_NET, "non-blocking discovery\n"); - else if (!lnet_peer_is_uptodate(lp)) + else if (!lnet_peer_is_uptodate(lp) && !lnet_is_discovery_disabled(lp)) goto again; CDEBUG(D_NET, "peer %s NID %s: %d. %s\n", @@ -2187,6 +2309,36 @@ lnet_discovery_event_reply(struct lnet_peer *lp, struct lnet_event *ev) goto out; } + + /* + * Only enable the multi-rail feature on the peer if both sides of + * the connection have discovery on + */ + if (pbuf->pb_info.pi_features & LNET_PING_FEAT_MULTI_RAIL) { + CDEBUG(D_NET, "Peer %s has Multi-Rail feature enabled\n", + libcfs_nid2str(lp->lp_primary_nid)); + lp->lp_state |= LNET_PEER_MULTI_RAIL; + } else { + CDEBUG(D_NET, "Peer %s has Multi-Rail feature disabled\n", + libcfs_nid2str(lp->lp_primary_nid)); + lp->lp_state &= ~LNET_PEER_MULTI_RAIL; + } + + /* + * The peer may have discovery disabled at its end. Set + * NO_DISCOVERY as appropriate. + */ + if ((pbuf->pb_info.pi_features & LNET_PING_FEAT_DISCOVERY) && + !lnet_peer_discovery_disabled) { + CDEBUG(D_NET, "Peer %s has discovery enabled\n", + libcfs_nid2str(lp->lp_primary_nid)); + lp->lp_state &= ~LNET_PEER_NO_DISCOVERY; + } else { + CDEBUG(D_NET, "Peer %s has discovery disabled\n", + libcfs_nid2str(lp->lp_primary_nid)); + lp->lp_state |= LNET_PEER_NO_DISCOVERY; + } + /* * Update the MULTI_RAIL flag based on the reply. If the peer * was configured with DLC then the setting should match what @@ -2199,8 +2351,17 @@ lnet_discovery_event_reply(struct lnet_peer *lp, struct lnet_event *ev) CWARN("Reply says %s is Multi-Rail, DLC says not\n", libcfs_nid2str(lp->lp_primary_nid)); } else { - lp->lp_state |= LNET_PEER_MULTI_RAIL; - lnet_peer_clr_non_mr_pref_nids(lp); + /* + * if discovery is disabled then we don't want to + * update the state of the peer. All we'll do is + * update the peer_nis which were reported back in + * the initial ping + */ + + if (!lnet_is_discovery_disabled_locked(lp)) { + lp->lp_state |= LNET_PEER_MULTI_RAIL; + lnet_peer_clr_non_mr_pref_nids(lp); + } } } else if (lp->lp_state & LNET_PEER_MULTI_RAIL) { if (lp->lp_state & LNET_PEER_CONFIGURED) { @@ -2221,20 +2382,6 @@ lnet_discovery_event_reply(struct lnet_peer *lp, struct lnet_event *ev) lp->lp_data_nnis = pbuf->pb_info.pi_nnis; /* - * The peer may have discovery disabled at its end. Set - * NO_DISCOVERY as appropriate. - */ - if (!(pbuf->pb_info.pi_features & LNET_PING_FEAT_DISCOVERY)) { - CDEBUG(D_NET, "Peer %s has discovery disabled\n", - libcfs_nid2str(lp->lp_primary_nid)); - lp->lp_state |= LNET_PEER_NO_DISCOVERY; - } else if (lp->lp_state & LNET_PEER_NO_DISCOVERY) { - CDEBUG(D_NET, "Peer %s has discovery enabled\n", - libcfs_nid2str(lp->lp_primary_nid)); - lp->lp_state &= ~LNET_PEER_NO_DISCOVERY; - } - - /* * Check for truncation of the Reply. Clear PING_SENT and set * PING_FAILED to trigger a retry. */ @@ -2256,21 +2403,18 @@ lnet_discovery_event_reply(struct lnet_peer *lp, struct lnet_event *ev) if (pbuf->pb_info.pi_features & LNET_PING_FEAT_MULTI_RAIL && pbuf->pb_info.pi_nnis > 1 && lp->lp_primary_nid == pbuf->pb_info.pi_ni[1].ns_nid) { - if (LNET_PING_BUFFER_SEQNO(pbuf) < lp->lp_peer_seqno) { - CDEBUG(D_NET, "Stale Reply from %s: got %u have %u\n", + if (LNET_PING_BUFFER_SEQNO(pbuf) < lp->lp_peer_seqno) + CDEBUG(D_NET, "peer %s: seq# got %u have %u. peer rebooted?\n", libcfs_nid2str(lp->lp_primary_nid), LNET_PING_BUFFER_SEQNO(pbuf), lp->lp_peer_seqno); - goto out; - } - if (LNET_PING_BUFFER_SEQNO(pbuf) > lp->lp_peer_seqno) - lp->lp_peer_seqno = LNET_PING_BUFFER_SEQNO(pbuf); + lp->lp_peer_seqno = LNET_PING_BUFFER_SEQNO(pbuf); } /* We're happy with the state of the data in the buffer. */ - CDEBUG(D_NET, "peer %s data present %u\n", - libcfs_nid2str(lp->lp_primary_nid), lp->lp_peer_seqno); + CDEBUG(D_NET, "peer %s data present %u. state = 0x%x\n", + libcfs_nid2str(lp->lp_primary_nid), lp->lp_peer_seqno, lp->lp_state); if (lp->lp_state & LNET_PEER_DATA_PRESENT) lnet_ping_buffer_decref(lp->lp_data); else @@ -2280,6 +2424,15 @@ lnet_discovery_event_reply(struct lnet_peer *lp, struct lnet_event *ev) out: lp->lp_state &= ~LNET_PEER_PING_SENT; spin_unlock(&lp->lp_lock); + + lnet_net_lock(LNET_LOCK_EX); + /* + * If this peer is a gateway, call the routing callback to + * handle the ping reply + */ + if (lp->lp_rtr_refcount > 0) + lnet_router_discovery_ping_reply(lp); + lnet_net_unlock(LNET_LOCK_EX); } /* @@ -2434,6 +2587,17 @@ static int lnet_peer_merge_data(struct lnet_peer *lp, if (pbuf->pb_info.pi_features & LNET_PING_FEAT_MULTI_RAIL) flags |= LNET_PEER_MULTI_RAIL; + /* + * Cache the routing feature for the peer; whether it is enabled + * for disabled as reported by the remote peer. + */ + spin_lock(&lp->lp_lock); + if (!(pbuf->pb_info.pi_features & LNET_PING_FEAT_RTE_DISABLED)) + lp->lp_state |= LNET_PEER_ROUTER_ENABLED; + else + lp->lp_state &= ~LNET_PEER_ROUTER_ENABLED; + spin_unlock(&lp->lp_lock); + nnis = MAX(lp->lp_nnis, pbuf->pb_info.pi_nnis); LIBCFS_ALLOC(curnis, nnis * sizeof(*curnis)); LIBCFS_ALLOC(addnis, nnis * sizeof(*addnis)); @@ -2491,6 +2655,15 @@ static int lnet_peer_merge_data(struct lnet_peer *lp, delnis[ndelnis++] = curnis[i]; } + /* + * If we get here and the discovery is disabled then we don't want + * to add or delete any NIs. We just updated the ones we have some + * information on, and call it a day + */ + rc = 0; + if (lnet_is_discovery_disabled(lp)) + goto out; + for (i = 0; i < naddnis; i++) { rc = lnet_peer_add_nid(lp, addnis[i].ns_nid, flags); if (rc) { @@ -2508,6 +2681,14 @@ static int lnet_peer_merge_data(struct lnet_peer *lp, } for (i = 0; i < ndelnis; i++) { + /* + * for routers it's okay to delete the primary_nid because + * the upper layers don't really rely on it. So if we're + * being told that the router changed its primary_nid + * then it's okay to delete it. + */ + if (lp->lp_rtr_refcount > 0) + flags |= LNET_PEER_RTR_NI_FORCE_DEL; rc = lnet_peer_del_nid(lp, delnis[i], flags); if (rc) { CERROR("Error deleting NID %s from peer %s: %d\n", @@ -2528,7 +2709,7 @@ out: LIBCFS_FREE(addnis, nnis * sizeof(*addnis)); LIBCFS_FREE(delnis, nnis * sizeof(*delnis)); lnet_ping_buffer_decref(pbuf); - CDEBUG(D_NET, "peer %s: %d\n", libcfs_nid2str(lp->lp_primary_nid), rc); + CDEBUG(D_NET, "peer %s (%p): %d\n", libcfs_nid2str(lp->lp_primary_nid), lp, rc); if (rc) { spin_lock(&lp->lp_lock); @@ -2601,6 +2782,18 @@ lnet_peer_set_primary_data(struct lnet_peer *lp, struct lnet_ping_buffer *pbuf) return 0; } +static bool lnet_is_nid_in_ping_info(lnet_nid_t nid, struct lnet_ping_info *pinfo) +{ + int i; + + for (i = 0; i < pinfo->pi_nnis; i++) { + if (pinfo->pi_ni[i].ns_nid == nid) + return true; + } + + return false; +} + /* * Update a peer using the data received. */ @@ -2668,7 +2861,17 @@ __must_hold(&lp->lp_lock) rc = lnet_peer_set_primary_nid(lp, nid, flags); if (!rc) rc = lnet_peer_merge_data(lp, pbuf); - } else if (lp->lp_primary_nid == nid) { + /* + * if the primary nid of the peer is present in the ping info returned + * from the peer, but it's not the local primary peer we have + * cached and discovery is disabled, then we don't want to update + * our local peer info, by adding or removing NIDs, we just want + * to update the status of the nids that we currently have + * recorded in that peer. + */ + } else if (lp->lp_primary_nid == nid || + (lnet_is_nid_in_ping_info(lp->lp_primary_nid, &pbuf->pb_info) && + lnet_is_discovery_disabled(lp))) { rc = lnet_peer_merge_data(lp, pbuf); } else { lpni = lnet_find_peer_ni_locked(nid); @@ -2682,13 +2885,26 @@ __must_hold(&lp->lp_lock) rc = lnet_peer_merge_data(lp, pbuf); } } else { - rc = lnet_peer_set_primary_data( - lpni->lpni_peer_net->lpn_peer, pbuf); + struct lnet_peer *new_lp; + new_lp = lpni->lpni_peer_net->lpn_peer; + /* + * if lp has discovery/MR enabled that means new_lp + * should have discovery/MR enabled as well, since + * it's the same peer, which we're about to merge + */ + if (!(lp->lp_state & LNET_PEER_NO_DISCOVERY)) + new_lp->lp_state &= ~LNET_PEER_NO_DISCOVERY; + if (lp->lp_state & LNET_PEER_MULTI_RAIL) + new_lp->lp_state |= LNET_PEER_MULTI_RAIL; + + rc = lnet_peer_set_primary_data(new_lp, pbuf); + lnet_consolidate_routes_locked(lp, new_lp); lnet_peer_ni_decref_locked(lpni); } } out: - CDEBUG(D_NET, "peer %s: %d\n", libcfs_nid2str(lp->lp_primary_nid), rc); + CDEBUG(D_NET, "peer %s(%p): %d. state = 0x%x\n", libcfs_nid2str(lp->lp_primary_nid), lp, rc, + lp->lp_state); mutex_unlock(&the_lnet.ln_api_mutex); spin_lock(&lp->lp_lock); @@ -2908,7 +3124,7 @@ fail_unlink: LNetMDUnlink(lp->lp_push_mdh); LNetInvalidateMDHandle(&lp->lp_push_mdh); fail_error: - CDEBUG(D_NET, "peer %s: %d\n", libcfs_nid2str(lp->lp_primary_nid), rc); + CDEBUG(D_NET, "peer %s(%p): %d\n", libcfs_nid2str(lp->lp_primary_nid), lp, rc); /* * The errors that get us here are considered hard errors and * cause Discovery to terminate. So we clear PUSH_SENT, but do @@ -2952,19 +3168,6 @@ __must_hold(&lp->lp_lock) return 0; } -/* - * Mark the peer as to be rediscovered. - */ -static int lnet_peer_rediscover(struct lnet_peer *lp) -__must_hold(&lp->lp_lock) -{ - lp->lp_state |= LNET_PEER_REDISCOVER; - lp->lp_state &= ~LNET_PEER_DISCOVERING; - - CDEBUG(D_NET, "peer %s\n", libcfs_nid2str(lp->lp_primary_nid)); - - return 0; -} /* * Discovering this peer is taking too long. Cancel any Ping or Push @@ -3092,6 +3295,8 @@ static int lnet_peer_discovery(void *arg) struct lnet_peer *lp; int rc; + wait_for_completion(&the_lnet.ln_started); + CDEBUG(D_NET, "started\n"); cfs_block_allsigs(); @@ -3139,8 +3344,8 @@ static int lnet_peer_discovery(void *arg) * forcing a Ping or Push. */ spin_lock(&lp->lp_lock); - CDEBUG(D_NET, "peer %s state %#x\n", - libcfs_nid2str(lp->lp_primary_nid), + CDEBUG(D_NET, "peer %s(%p) state %#x\n", + libcfs_nid2str(lp->lp_primary_nid), lp, lp->lp_state); if (lp->lp_state & LNET_PEER_DATA_PRESENT) rc = lnet_peer_data_present(lp); @@ -3152,16 +3357,14 @@ static int lnet_peer_discovery(void *arg) rc = lnet_peer_send_ping(lp); else if (lp->lp_state & LNET_PEER_FORCE_PUSH) rc = lnet_peer_send_push(lp); - else if (lnet_peer_discovery_disabled) - rc = lnet_peer_rediscover(lp); else if (!(lp->lp_state & LNET_PEER_NIDS_UPTODATE)) rc = lnet_peer_send_ping(lp); else if (lnet_peer_needs_push(lp)) rc = lnet_peer_send_push(lp); else rc = lnet_peer_discovered(lp); - CDEBUG(D_NET, "peer %s state %#x rc %d\n", - libcfs_nid2str(lp->lp_primary_nid), + CDEBUG(D_NET, "peer %s(%p) state %#x rc %d\n", + libcfs_nid2str(lp->lp_primary_nid), lp, lp->lp_state, rc); spin_unlock(&lp->lp_lock); @@ -3266,7 +3469,14 @@ void lnet_peer_discovery_stop(void) LASSERT(the_lnet.ln_dc_state == LNET_DC_STATE_RUNNING); the_lnet.ln_dc_state = LNET_DC_STATE_STOPPING; - wake_up(&the_lnet.ln_dc_waitq); + + /* In the LNetNIInit() path we may be stopping discovery before it + * entered its work loop + */ + if (!completion_done(&the_lnet.ln_started)) + complete(&the_lnet.ln_started); + else + wake_up(&the_lnet.ln_dc_waitq); wait_event(the_lnet.ln_dc_waitq, the_lnet.ln_dc_state == LNET_DC_STATE_SHUTDOWN); @@ -3298,7 +3508,7 @@ lnet_debug_peer(lnet_nid_t nid) } if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp)) - aliveness = lp->lpni_alive ? "up" : "down"; + aliveness = (lnet_is_peer_ni_alive(lp)) ? "up" : "down"; CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n", libcfs_nid2str(lp->lpni_nid), atomic_read(&lp->lpni_refcount), @@ -3354,7 +3564,7 @@ int lnet_get_peer_ni_info(__u32 peer_index, __u64 *nid, if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp)) snprintf(aliveness, LNET_MAX_STR_LEN, - lp->lpni_alive ? "up" : "down"); + lnet_is_peer_ni_alive(lp) ? "up" : "down"); *nid = lp->lpni_nid; *refcount = atomic_read(&lp->lpni_refcount); @@ -3441,7 +3651,7 @@ int lnet_get_peer_info(struct lnet_ioctl_peer_cfg *cfg, void __user *bulk) if (lnet_isrouter(lpni) || lnet_peer_aliveness_enabled(lpni)) snprintf(lpni_info->cr_aliveness, LNET_MAX_STR_LEN, - lpni->lpni_alive ? "up" : "down"); + lnet_is_peer_ni_alive(lpni) ? "up" : "down"); lpni_info->cr_refcount = atomic_read(&lpni->lpni_refcount); lpni_info->cr_ni_peer_tx_credits = (lpni->lpni_net != NULL) ?