Whamcloud - gitweb
LU-14540 o2iblnd: Use REMOTE_DROPPED for ECONNREFUSED 14/42114/3
authorChris Horn <chris.horn@hpe.com>
Fri, 19 Mar 2021 18:22:26 +0000 (13:22 -0500)
committerOleg Drokin <green@whamcloud.com>
Tue, 6 Apr 2021 03:03:00 +0000 (03:03 +0000)
ECONNREFUSED means that we received a response from the remote end,
so setting the LNet health status to REMOTE_DROPPED is more
appropriate than setting LOCAL_DROPPED. Using REMOTE_DROPPED will
decrement the peer NI health and allow us to try other peer NIs for
future sends.

Decrementing the peer NI health will also result in routes being
marked down, as appropriate, for cases where a router has refused the
connection request.

Test-Parameters: trivial
HPE-bug-id: LUS-9853
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Change-Id: I8190f5d78a76ec25553908c4f215362c0c2051fc
Reviewed-on: https://review.whamcloud.com/42114
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alexander Boyko <alexander.boyko@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/klnds/o2iblnd/o2iblnd_cb.c

index 90ba35b..e22f946 100644 (file)
@@ -2208,10 +2208,11 @@ kiblnd_peer_connect_failed(struct kib_peer_ni *peer_ni, int active,
                           int error)
 {
        LIST_HEAD(zombies);
                           int error)
 {
        LIST_HEAD(zombies);
-       unsigned long   flags;
+       unsigned long flags;
+       enum lnet_msg_hstatus hstatus;
 
 
-       LASSERT (error != 0);
-       LASSERT (!in_interrupt());
+       LASSERT(error != 0);
+       LASSERT(!in_interrupt());
 
        write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
 
 
        write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
 
@@ -2254,12 +2255,20 @@ kiblnd_peer_connect_failed(struct kib_peer_ni *peer_ni, int active,
        CNETERR("Deleting messages for %s: connection failed\n",
                libcfs_nid2str(peer_ni->ibp_nid));
 
        CNETERR("Deleting messages for %s: connection failed\n",
                libcfs_nid2str(peer_ni->ibp_nid));
 
-       if (error == -EHOSTUNREACH || error == -ETIMEDOUT)
-               kiblnd_txlist_done(&zombies, error,
-                                  LNET_MSG_STATUS_NETWORK_TIMEOUT);
-       else
-               kiblnd_txlist_done(&zombies, error,
-                                  LNET_MSG_STATUS_LOCAL_DROPPED);
+       switch (error) {
+       case -EHOSTUNREACH:
+       case -ETIMEDOUT:
+               hstatus = LNET_MSG_STATUS_NETWORK_TIMEOUT;
+               break;
+       case -ECONNREFUSED:
+               hstatus = LNET_MSG_STATUS_REMOTE_DROPPED;
+               break;
+       default:
+               hstatus = LNET_MSG_STATUS_LOCAL_DROPPED;
+               break;
+       }
+
+       kiblnd_txlist_done(&zombies, error, hstatus);
 }
 
 static void
 }
 
 static void