Whamcloud - gitweb
LU-9971 lnet: fix peer ref counting 46/35446/2
authorAmir Shehata <ashehata@whamcloud.com>
Mon, 8 Jul 2019 19:51:05 +0000 (12:51 -0700)
committerOleg Drokin <green@whamcloud.com>
Wed, 10 Jul 2019 15:20:34 +0000 (15:20 +0000)
Exit from the loop after peer ref count has been incremented
to avoid wrong ref count.

The code makes sure that a peer is queued for discovery at most
once if discovery is disabled. This is done to use discovery
as a standard ping for gateways which do not have discovery feature
or discovery is disabled.

Signed-off-by: Amir Shehata <ashehata@whamcloud.com>
Change-Id: I2cc4c8f9d780f5c438d9b51bb2d1106fec553f39
Reviewed-on: https://review.whamcloud.com/35446
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Olaf Weber <olaf.weber@hpe.com>
Reviewed-by: Chris Horn <hornc@cray.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/lnet/peer.c

index 01fcad3..a60a2e6 100644 (file)
@@ -2150,6 +2150,7 @@ lnet_discover_peer_locked(struct lnet_peer_ni *lpni, int cpt, bool block)
        DEFINE_WAIT(wait);
        struct lnet_peer *lp;
        int rc = 0;
+       int count = 0;
 
 again:
        lnet_net_unlock(cpt);
@@ -2169,11 +2170,21 @@ again:
                        break;
                if (the_lnet.ln_dc_state != LNET_DC_STATE_RUNNING)
                        break;
+               /*
+                * Don't repeat discovery if discovery is disabled. This is
+                * done to ensure we can use discovery as a standard ping as
+                * well for backwards compatibility with routers which do not
+                * have discovery or have discovery disabled
+                */
+               if (lnet_is_discovery_disabled(lp) && count > 0)
+                       break;
                if (lp->lp_dc_error)
                        break;
                if (lnet_peer_is_uptodate(lp))
                        break;
                lnet_peer_queue_for_discovery(lp);
+               count++;
+               CDEBUG(D_NET, "Discovery attempt # %d\n", count);
 
                /*
                 * If caller requested a non-blocking operation then
@@ -2191,16 +2202,6 @@ again:
                lnet_peer_decref_locked(lp);
                /* Peer may have changed */
                lp = lpni->lpni_peer_net->lpn_peer;
-
-               /*
-                * Wait for discovery to complete, but don't repeat if
-                * discovery is disabled. This is done to ensure we can
-                * use discovery as a standard ping as well for backwards
-                * compatibility with routers which do not have discovery
-                * or have discovery disabled
-                */
-               if (lnet_is_discovery_disabled(lp))
-                       break;
        }
        finish_wait(&lp->lp_dc_waitq, &wait);