From 94f757bf67d58694201b2434f7879974c7abd622 Mon Sep 17 00:00:00 2001 From: Doug Oucharek Date: Mon, 18 Jan 2016 17:26:08 -0800 Subject: [PATCH] LU-7646 lnet: Stop Infinite CON RACE Condition In current code, when a CON RACE occurs, the passive side will let the node with the higher NID value win the race. We have a field case where a node can have a "stuck" connection which never goes away and is the trigger of a never-ending loop of re-connections. This patch introduces a counter to how many times a connection in a connecting state has been the cause of a CON RACE rejection. After 20 times (constant MAX_CONN_RACES_BEFORE_ABORT), we assume the connection is stuck and let the other side (with lower NID) win. Signed-off-by: Doug Oucharek Change-Id: I32e035806e95868b13c28c42e241b969940a35c9 Reviewed-on: http://review.whamcloud.com/19430 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Amir Shehata Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lnet/klnds/o2iblnd/o2iblnd.h | 2 ++ lnet/klnds/o2iblnd/o2iblnd_cb.c | 44 ++++++++++++++++++++++++++++------------- 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/lnet/klnds/o2iblnd/o2iblnd.h b/lnet/klnds/o2iblnd/o2iblnd.h index 5fd1ffe..7092c7f 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.h +++ b/lnet/klnds/o2iblnd/o2iblnd.h @@ -748,6 +748,8 @@ typedef struct kib_peer unsigned short ibp_connecting; /* reconnect this peer later */ unsigned short ibp_reconnecting:1; + /* counter of how many times we triggered a conn race */ + unsigned char ibp_races; /* # consecutive reconnection attempts to this peer */ unsigned int ibp_reconnected; /* errno on closing this peer */ diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index d5d6aec..7c13dd8 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -40,6 +40,8 @@ #include "o2iblnd.h" +#define MAX_CONN_RACES_BEFORE_ABORT 20 + static void kiblnd_peer_alive(kib_peer_t *peer); static void kiblnd_peer_connect_failed(kib_peer_t *peer, int active, int error); static void kiblnd_init_tx_msg(lnet_ni_t *ni, kib_tx_t *tx, @@ -2401,29 +2403,43 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) libcfs_nid2str(nid), peer2->ibp_version, version, peer2->ibp_incarnation, reqmsg->ibm_srcstamp); - kiblnd_peer_decref(peer); - rej.ibr_why = IBLND_REJECT_CONN_STALE; - goto failed; - } + kiblnd_peer_decref(peer); + rej.ibr_why = IBLND_REJECT_CONN_STALE; + goto failed; + } - /* tie-break connection race in favour of the higher NID */ - if (peer2->ibp_connecting != 0 && - nid < ni->ni_nid) { + /* Tie-break connection race in favour of the higher NID. + * If we keep running into a race condition multiple times, + * we have to assume that the connection attempt with the + * higher NID is stuck in a connecting state and will never + * recover. As such, we pass through this if-block and let + * the lower NID connection win so we can move forward. + */ + if (peer2->ibp_connecting != 0 && + nid < ni->ni_nid && peer2->ibp_races < + MAX_CONN_RACES_BEFORE_ABORT) { + peer2->ibp_races++; write_unlock_irqrestore(g_lock, flags); - CWARN("Conn race %s\n", libcfs_nid2str(peer2->ibp_nid)); + CDEBUG(D_NET, "Conn race %s\n", + libcfs_nid2str(peer2->ibp_nid)); - kiblnd_peer_decref(peer); - rej.ibr_why = IBLND_REJECT_CONN_RACE; - goto failed; - } + kiblnd_peer_decref(peer); + rej.ibr_why = IBLND_REJECT_CONN_RACE; + goto failed; + } + if (peer2->ibp_races >= MAX_CONN_RACES_BEFORE_ABORT) + CNETERR("Conn race %s: unresolved after %d attempts, letting lower NID win\n", + libcfs_nid2str(peer2->ibp_nid), + MAX_CONN_RACES_BEFORE_ABORT); /* * passive connection is allowed even this peer is waiting for * reconnection. */ peer2->ibp_reconnecting = 0; - peer2->ibp_accepting++; - kiblnd_peer_addref(peer2); + peer2->ibp_races = 0; + peer2->ibp_accepting++; + kiblnd_peer_addref(peer2); /* Race with kiblnd_launch_tx (active connect) to create peer * so copy validated parameters since we now know what the -- 1.8.3.1