Whamcloud - gitweb
LU-10123 lnet: ensure peer put back on dc request queue 47/30147/10
authorBruno Faccini <bruno.faccini@intel.com>
Fri, 17 Nov 2017 11:57:42 +0000 (12:57 +0100)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 31 Jan 2018 05:51:59 +0000 (05:51 +0000)
Upon async PUT request received from peer already in discovery
process, lnet_peer_push_event() was not handling the case where
peer could be on working/ln_dc_working queue. This could lead
for peer not to be re-dsicovered as expected, but left on
working queue and to be finally timed-out.

Also ensure that peer will not be put back on request queue by
event handler if discovery is already completed.

Signed-off-by: Bruno Faccini <bruno.faccini@intel.com>
Change-Id: Ic74a313c00edc1b8fdd14794d2c88411d12e0979
Reviewed-on: https://review.whamcloud.com/30147
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Amir Shehata <amir.shehata@intel.com>
Reviewed-by: Dmitry Eremin <dmitry.eremin@intel.com>
Reviewed-by: Olaf Weber <olaf.weber@hpe.com>
Reviewed-by: Doug Oucharek <dougso@me.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lnet/lnet/peer.c

index f24294b..4b5702d 100644 (file)
@@ -1989,13 +1989,16 @@ void lnet_peer_push_event(struct lnet_event *ev)
 
 out:
        /*
-        * Queue the peer for discovery, and wake the discovery thread
-        * if the peer was already queued, because its status changed.
+        * Queue the peer for discovery if not done, force it on the request
+        * queue and wake the discovery thread if the peer was already queued,
+        * because its status changed.
         */
        spin_unlock(&lp->lp_lock);
        lnet_net_lock(LNET_LOCK_EX);
-       if (lnet_peer_queue_for_discovery(lp))
+       if (!lnet_peer_is_uptodate(lp) && lnet_peer_queue_for_discovery(lp)) {
+               list_move(&lp->lp_dc_list, &the_lnet.ln_dc_request);
                wake_up(&the_lnet.ln_dc_waitq);
+       }
        /* Drop refcount from lookup */
        lnet_peer_decref_locked(lp);
        lnet_net_unlock(LNET_LOCK_EX);
@@ -2355,7 +2358,10 @@ static void lnet_discovery_event_handler(struct lnet_event *event)
                lnet_ping_buffer_decref(pbuf);
                lnet_peer_decref_locked(lp);
        }
-       if (rc == LNET_REDISCOVER_PEER) {
+
+       /* put peer back at end of request queue, if discovery not already
+        * done */
+       if (rc == LNET_REDISCOVER_PEER && !lnet_peer_is_uptodate(lp)) {
                list_move_tail(&lp->lp_dc_list, &the_lnet.ln_dc_request);
                wake_up(&the_lnet.ln_dc_waitq);
        }