Whamcloud - gitweb
LU-12739 lnet: Don't queue msg when discovery has completed
[fs/lustre-release.git] / lnet / lnet / peer.c
index a60a2e6..e9505fd 100644 (file)
@@ -882,6 +882,8 @@ lnet_push_update_to_peers(int force)
        int cpt;
 
        lnet_net_lock(LNET_LOCK_EX);
+       if (lnet_peer_discovery_disabled)
+               force = 0;
        lncpt = cfs_percpt_number(the_lnet.ln_peer_tables);
        for (cpt = 0; cpt < lncpt; cpt++) {
                ptable = the_lnet.ln_peer_tables[cpt];
@@ -1842,6 +1844,17 @@ lnet_peer_gw_discovery(struct lnet_peer *lp)
        return rc;
 }
 
+bool
+lnet_peer_is_uptodate(struct lnet_peer *lp)
+{
+       bool rc;
+
+       spin_lock(&lp->lp_lock);
+       rc = lnet_peer_is_uptodate_locked(lp);
+       spin_unlock(&lp->lp_lock);
+       return rc;
+}
+
 /*
  * Is a peer uptodate from the point of view of discovery?
  *
@@ -1851,11 +1864,11 @@ lnet_peer_gw_discovery(struct lnet_peer *lp)
  * Otherwise look at whether the peer needs rediscovering.
  */
 bool
-lnet_peer_is_uptodate(struct lnet_peer *lp)
+lnet_peer_is_uptodate_locked(struct lnet_peer *lp)
+__must_hold(&lp->lp_lock)
 {
        bool rc;
 
-       spin_lock(&lp->lp_lock);
        if (lp->lp_state & (LNET_PEER_DISCOVERING |
                            LNET_PEER_FORCE_PING |
                            LNET_PEER_FORCE_PUSH)) {
@@ -1872,7 +1885,6 @@ lnet_peer_is_uptodate(struct lnet_peer *lp)
        } else {
                rc = false;
        }
-       spin_unlock(&lp->lp_lock);
 
        return rc;
 }
@@ -2412,6 +2424,15 @@ lnet_discovery_event_reply(struct lnet_peer *lp, struct lnet_event *ev)
 out:
        lp->lp_state &= ~LNET_PEER_PING_SENT;
        spin_unlock(&lp->lp_lock);
+
+       lnet_net_lock(LNET_LOCK_EX);
+       /*
+        * If this peer is a gateway, call the routing callback to
+        * handle the ping reply
+        */
+       if (lp->lp_rtr_refcount > 0)
+               lnet_router_discovery_ping_reply(lp);
+       lnet_net_unlock(LNET_LOCK_EX);
 }
 
 /*
@@ -3274,6 +3295,8 @@ static int lnet_peer_discovery(void *arg)
        struct lnet_peer *lp;
        int rc;
 
+       wait_for_completion(&the_lnet.ln_started);
+
        CDEBUG(D_NET, "started\n");
        cfs_block_allsigs();
 
@@ -3446,7 +3469,14 @@ void lnet_peer_discovery_stop(void)
 
        LASSERT(the_lnet.ln_dc_state == LNET_DC_STATE_RUNNING);
        the_lnet.ln_dc_state = LNET_DC_STATE_STOPPING;
-       wake_up(&the_lnet.ln_dc_waitq);
+
+       /* In the LNetNIInit() path we may be stopping discovery before it
+        * entered its work loop
+        */
+       if (!completion_done(&the_lnet.ln_started))
+               complete(&the_lnet.ln_started);
+       else
+               wake_up(&the_lnet.ln_dc_waitq);
 
        wait_event(the_lnet.ln_dc_waitq,
                   the_lnet.ln_dc_state == LNET_DC_STATE_SHUTDOWN);