Whamcloud - gitweb
LU-12537 lnet: Sync the start of discovery and monitor threads
[fs/lustre-release.git] / lnet / lnet / peer.c
index 01fcad3..43c8d35 100644 (file)
@@ -882,6 +882,8 @@ lnet_push_update_to_peers(int force)
        int cpt;
 
        lnet_net_lock(LNET_LOCK_EX);
+       if (lnet_peer_discovery_disabled)
+               force = 0;
        lncpt = cfs_percpt_number(the_lnet.ln_peer_tables);
        for (cpt = 0; cpt < lncpt; cpt++) {
                ptable = the_lnet.ln_peer_tables[cpt];
@@ -2150,6 +2152,7 @@ lnet_discover_peer_locked(struct lnet_peer_ni *lpni, int cpt, bool block)
        DEFINE_WAIT(wait);
        struct lnet_peer *lp;
        int rc = 0;
+       int count = 0;
 
 again:
        lnet_net_unlock(cpt);
@@ -2169,11 +2172,21 @@ again:
                        break;
                if (the_lnet.ln_dc_state != LNET_DC_STATE_RUNNING)
                        break;
+               /*
+                * Don't repeat discovery if discovery is disabled. This is
+                * done to ensure we can use discovery as a standard ping as
+                * well for backwards compatibility with routers which do not
+                * have discovery or have discovery disabled
+                */
+               if (lnet_is_discovery_disabled(lp) && count > 0)
+                       break;
                if (lp->lp_dc_error)
                        break;
                if (lnet_peer_is_uptodate(lp))
                        break;
                lnet_peer_queue_for_discovery(lp);
+               count++;
+               CDEBUG(D_NET, "Discovery attempt # %d\n", count);
 
                /*
                 * If caller requested a non-blocking operation then
@@ -2191,16 +2204,6 @@ again:
                lnet_peer_decref_locked(lp);
                /* Peer may have changed */
                lp = lpni->lpni_peer_net->lpn_peer;
-
-               /*
-                * Wait for discovery to complete, but don't repeat if
-                * discovery is disabled. This is done to ensure we can
-                * use discovery as a standard ping as well for backwards
-                * compatibility with routers which do not have discovery
-                * or have discovery disabled
-                */
-               if (lnet_is_discovery_disabled(lp))
-                       break;
        }
        finish_wait(&lp->lp_dc_waitq, &wait);
 
@@ -3273,6 +3276,8 @@ static int lnet_peer_discovery(void *arg)
        struct lnet_peer *lp;
        int rc;
 
+       wait_for_completion(&the_lnet.ln_started);
+
        CDEBUG(D_NET, "started\n");
        cfs_block_allsigs();
 
@@ -3445,7 +3450,14 @@ void lnet_peer_discovery_stop(void)
 
        LASSERT(the_lnet.ln_dc_state == LNET_DC_STATE_RUNNING);
        the_lnet.ln_dc_state = LNET_DC_STATE_STOPPING;
-       wake_up(&the_lnet.ln_dc_waitq);
+
+       /* In the LNetNIInit() path we may be stopping discovery before it
+        * entered its work loop
+        */
+       if (!completion_done(&the_lnet.ln_started))
+               complete(&the_lnet.ln_started);
+       else
+               wake_up(&the_lnet.ln_dc_waitq);
 
        wait_event(the_lnet.ln_dc_waitq,
                   the_lnet.ln_dc_state == LNET_DC_STATE_SHUTDOWN);