Whamcloud - gitweb
LU-7646 lnet: Stop Infinite CON RACE Condition
[fs/lustre-release.git] / lnet / klnds / o2iblnd / o2iblnd_cb.c
index d5d6aec..7c13dd8 100644 (file)
@@ -40,6 +40,8 @@
 
 #include "o2iblnd.h"
 
+#define MAX_CONN_RACES_BEFORE_ABORT 20
+
 static void kiblnd_peer_alive(kib_peer_t *peer);
 static void kiblnd_peer_connect_failed(kib_peer_t *peer, int active, int error);
 static void kiblnd_init_tx_msg(lnet_ni_t *ni, kib_tx_t *tx,
@@ -2401,29 +2403,43 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
                              libcfs_nid2str(nid), peer2->ibp_version, version,
                              peer2->ibp_incarnation, reqmsg->ibm_srcstamp);
 
-                        kiblnd_peer_decref(peer);
-                        rej.ibr_why = IBLND_REJECT_CONN_STALE;
-                        goto failed;
-                }
+                       kiblnd_peer_decref(peer);
+                       rej.ibr_why = IBLND_REJECT_CONN_STALE;
+                       goto failed;
+               }
 
-                /* tie-break connection race in favour of the higher NID */
-                if (peer2->ibp_connecting != 0 &&
-                    nid < ni->ni_nid) {
+               /* Tie-break connection race in favour of the higher NID.
+                * If we keep running into a race condition multiple times,
+                * we have to assume that the connection attempt with the
+                * higher NID is stuck in a connecting state and will never
+                * recover.  As such, we pass through this if-block and let
+                * the lower NID connection win so we can move forward.
+                */
+               if (peer2->ibp_connecting != 0 &&
+                   nid < ni->ni_nid && peer2->ibp_races <
+                   MAX_CONN_RACES_BEFORE_ABORT) {
+                       peer2->ibp_races++;
                        write_unlock_irqrestore(g_lock, flags);
 
-                        CWARN("Conn race %s\n", libcfs_nid2str(peer2->ibp_nid));
+                       CDEBUG(D_NET, "Conn race %s\n",
+                              libcfs_nid2str(peer2->ibp_nid));
 
-                        kiblnd_peer_decref(peer);
-                        rej.ibr_why = IBLND_REJECT_CONN_RACE;
-                        goto failed;
-                }
+                       kiblnd_peer_decref(peer);
+                       rej.ibr_why = IBLND_REJECT_CONN_RACE;
+                       goto failed;
+               }
+               if (peer2->ibp_races >= MAX_CONN_RACES_BEFORE_ABORT)
+                       CNETERR("Conn race %s: unresolved after %d attempts, letting lower NID win\n",
+                               libcfs_nid2str(peer2->ibp_nid),
+                               MAX_CONN_RACES_BEFORE_ABORT);
                /*
                 * passive connection is allowed even this peer is waiting for
                 * reconnection.
                 */
                peer2->ibp_reconnecting = 0;
-                peer2->ibp_accepting++;
-                kiblnd_peer_addref(peer2);
+               peer2->ibp_races = 0;
+               peer2->ibp_accepting++;
+               kiblnd_peer_addref(peer2);
 
                /* Race with kiblnd_launch_tx (active connect) to create peer
                 * so copy validated parameters since we now know what the