Whamcloud - gitweb
LU-12889 lnet: Do not assume peers are MR capable
[fs/lustre-release.git] / lnet / lnet / peer.c
index 1425bd8..b373f49 100644 (file)
@@ -258,6 +258,10 @@ lnet_peer_alloc(lnet_nid_t nid)
        init_waitqueue_head(&lp->lp_dc_waitq);
        spin_lock_init(&lp->lp_lock);
        lp->lp_primary_nid = nid;
+       if (lnet_peers_start_down())
+               lp->lp_alive = false;
+       else
+               lp->lp_alive = true;
 
        /*
         * all peers created on a router should have health on
@@ -400,8 +404,6 @@ lnet_peer_ni_del_locked(struct lnet_peer_ni *lpni, bool force)
 
        /* decrement the ref count on the peer table */
        ptable = the_lnet.ln_peer_tables[lpni->lpni_cpt];
-       LASSERT(ptable->pt_number > 0);
-       ptable->pt_number--;
 
        /*
         * The peer_ni can no longer be found with a lookup. But there
@@ -651,7 +653,8 @@ lnet_get_peer_ni_locked(struct lnet_peer_table *ptable, lnet_nid_t nid)
        struct list_head        *peers;
        struct lnet_peer_ni     *lp;
 
-       LASSERT(the_lnet.ln_state == LNET_STATE_RUNNING);
+       if (the_lnet.ln_state != LNET_STATE_RUNNING)
+               return NULL;
 
        peers = &ptable->pt_hash[lnet_nid2peerhash(nid)];
        list_for_each_entry(lp, peers, lpni_hashlist) {
@@ -1257,7 +1260,6 @@ lnet_peer_attach_peer_ni(struct lnet_peer *lp,
                ptable = the_lnet.ln_peer_tables[lpni->lpni_cpt];
                list_add_tail(&lpni->lpni_hashlist, &ptable->pt_hash[hash]);
                ptable->pt_version++;
-               ptable->pt_number++;
                /* This is the 1st refcount on lpni. */
                atomic_inc(&lpni->lpni_refcount);
        }
@@ -1528,11 +1530,7 @@ lnet_peer_ni_traffic_add(lnet_nid_t nid, lnet_nid_t pref)
        struct lnet_peer *lp;
        struct lnet_peer_net *lpn;
        struct lnet_peer_ni *lpni;
-       /*
-        * Assume peer is Multi-Rail capable and let discovery find out
-        * otherwise.
-        */
-       unsigned flags = LNET_PEER_MULTI_RAIL;
+       unsigned flags = 0;
        int rc = 0;
 
        if (nid == LNET_NID_ANY) {
@@ -2312,20 +2310,6 @@ lnet_discovery_event_reply(struct lnet_peer *lp, struct lnet_event *ev)
 
 
        /*
-        * Only enable the multi-rail feature on the peer if both sides of
-        * the connection have discovery on
-        */
-       if (pbuf->pb_info.pi_features & LNET_PING_FEAT_MULTI_RAIL) {
-               CDEBUG(D_NET, "Peer %s has Multi-Rail feature enabled\n",
-                      libcfs_nid2str(lp->lp_primary_nid));
-               lp->lp_state |= LNET_PEER_MULTI_RAIL;
-       } else {
-               CDEBUG(D_NET, "Peer %s has Multi-Rail feature disabled\n",
-                      libcfs_nid2str(lp->lp_primary_nid));
-               lp->lp_state &= ~LNET_PEER_MULTI_RAIL;
-       }
-
-       /*
         * The peer may have discovery disabled at its end. Set
         * NO_DISCOVERY as appropriate.
         */
@@ -2347,22 +2331,24 @@ lnet_discovery_event_reply(struct lnet_peer *lp, struct lnet_event *ev)
         */
        if (pbuf->pb_info.pi_features & LNET_PING_FEAT_MULTI_RAIL) {
                if (lp->lp_state & LNET_PEER_MULTI_RAIL) {
-                       /* Everything's fine */
+                       CDEBUG(D_NET, "peer %s(%p) is MR\n",
+                              libcfs_nid2str(lp->lp_primary_nid), lp);
                } else if (lp->lp_state & LNET_PEER_CONFIGURED) {
                        CWARN("Reply says %s is Multi-Rail, DLC says not\n",
                              libcfs_nid2str(lp->lp_primary_nid));
+               } else if (lnet_peer_discovery_disabled) {
+                       CDEBUG(D_NET,
+                              "peer %s(%p) not MR: DD disabled locally\n",
+                              libcfs_nid2str(lp->lp_primary_nid), lp);
+               } else if (lp->lp_state & LNET_PEER_NO_DISCOVERY) {
+                       CDEBUG(D_NET,
+                              "peer %s(%p) not MR: DD disabled remotely\n",
+                              libcfs_nid2str(lp->lp_primary_nid), lp);
                } else {
-                       /*
-                        * if discovery is disabled then we don't want to
-                        * update the state of the peer. All we'll do is
-                        * update the peer_nis which were reported back in
-                        * the initial ping
-                        */
-
-                       if (!lnet_is_discovery_disabled_locked(lp)) {
-                               lp->lp_state |= LNET_PEER_MULTI_RAIL;
-                               lnet_peer_clr_non_mr_pref_nids(lp);
-                       }
+                       CDEBUG(D_NET, "peer %s(%p) is MR capable\n",
+                              libcfs_nid2str(lp->lp_primary_nid), lp);
+                       lp->lp_state |= LNET_PEER_MULTI_RAIL;
+                       lnet_peer_clr_non_mr_pref_nids(lp);
                }
        } else if (lp->lp_state & LNET_PEER_MULTI_RAIL) {
                if (lp->lp_state & LNET_PEER_CONFIGURED) {