Whamcloud - gitweb
LU-13278 lnet: Reconcile discovery push and reply handling 74/37674/3
authorChris Horn <hornc@cray.com>
Mon, 10 Feb 2020 20:11:49 +0000 (14:11 -0600)
committerOleg Drokin <green@whamcloud.com>
Tue, 24 Mar 2020 05:19:26 +0000 (05:19 +0000)
Reconcile the logic for updating the multi-rail flag of a peer when
processing a discovery PUSH with the logic used when processing a
discovery REPLY.

Cray-bug-id: LUS-8516
Signed-off-by: Chris Horn <hornc@cray.com>
Change-Id: Idfb4c3729822d03b71f9440ac66176ae6b886022
Reviewed-on: https://review.whamcloud.com/37674
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Shaun Tancheff <shaun.tancheff@hpe.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Stephen Champion <stephen.champion@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/lnet/peer.c

index c6ac769..3102e1d 100644 (file)
@@ -2021,20 +2021,6 @@ void lnet_peer_push_event(struct lnet_event *ev)
        }
 
        /*
-        * Check the MULTIRAIL flag. Complain if the peer was DLC
-        * configured without it.
-        */
-       if (!(lp->lp_state & LNET_PEER_MULTI_RAIL)) {
-               if (lp->lp_state & LNET_PEER_CONFIGURED) {
-                       CERROR("Push says %s is Multi-Rail, DLC says not\n",
-                              libcfs_nid2str(lp->lp_primary_nid));
-               } else {
-                       lp->lp_state |= LNET_PEER_MULTI_RAIL;
-                       lnet_peer_clr_non_mr_pref_nids(lp);
-               }
-       }
-
-       /*
         * The peer may have discovery disabled at its end. Set
         * NO_DISCOVERY as appropriate.
         */
@@ -2049,6 +2035,31 @@ void lnet_peer_push_event(struct lnet_event *ev)
        }
 
        /*
+        * Update the MULTI_RAIL flag based on the push. If the peer
+        * was configured with DLC then the setting should match what
+        * DLC put in.
+        * NB: We verified above that the MR feature bit is set in pi_features
+        */
+       if (lp->lp_state & LNET_PEER_MULTI_RAIL) {
+               CDEBUG(D_NET, "peer %s(%p) is MR\n",
+                      libcfs_nid2str(lp->lp_primary_nid), lp);
+       } else if (lp->lp_state & LNET_PEER_CONFIGURED) {
+               CWARN("Push says %s is Multi-Rail, DLC says not\n",
+                     libcfs_nid2str(lp->lp_primary_nid));
+       } else if (lnet_peer_discovery_disabled) {
+               CDEBUG(D_NET, "peer %s(%p) not MR: DD disabled locally\n",
+                      libcfs_nid2str(lp->lp_primary_nid), lp);
+       } else if (lp->lp_state & LNET_PEER_NO_DISCOVERY) {
+               CDEBUG(D_NET, "peer %s(%p) not MR: DD disabled remotely\n",
+                      libcfs_nid2str(lp->lp_primary_nid), lp);
+       } else {
+               CDEBUG(D_NET, "peer %s(%p) is MR capable\n",
+                      libcfs_nid2str(lp->lp_primary_nid), lp);
+               lp->lp_state |= LNET_PEER_MULTI_RAIL;
+               lnet_peer_clr_non_mr_pref_nids(lp);
+       }
+
+       /*
         * Check for truncation of the Put message. Clear the
         * NIDS_UPTODATE flag and set FORCE_PING to trigger a ping,
         * and tell discovery to allocate a bigger buffer.