From: Liang Zhen Date: Tue, 25 Aug 2015 16:25:34 +0000 (-0400) Subject: LU-5718 o2iblnd: avoid intensive reconnecting X-Git-Tag: 2.7.60~43 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=refs%2Fchanges%2F00%2F14600%2F6;p=fs%2Flustre-release.git LU-5718 o2iblnd: avoid intensive reconnecting When there is connection race between two nodes and one side of connection is rejected by remote side, o2iblnd will reconnect immediately, this is going to generate a lot of memory pressure and even cause OOM if remote side is slow and can't complete connecting request in short time. This patch resolves this issue by reconnecting after rejected connection has been destroyed by connd, so there is no more than one zombie connection for each peer. Signed-off-by: Liang Zhen Change-Id: I78d3b00be70231d576572832b9b0fba2df3d3c12 Reviewed-on: http://review.whamcloud.com/14600 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Doug Oucharek Reviewed-by: Oleg Drokin --- diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index 3adff1f..2a21e65 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -976,9 +976,22 @@ kiblnd_destroy_conn (kib_conn_t *conn) if (conn->ibc_state != IBLND_CONN_INIT) { kib_net_t *net = peer->ibp_ni->ni_data; - kiblnd_peer_decref(peer); rdma_destroy_id(cmid); atomic_dec(&net->ibn_nconns); + if (conn->ibc_conn_race) { + if (peer->ibp_accepting == 0 && + !list_empty(&peer->ibp_tx_queue)) { + kiblnd_connect_peer(peer); + } else { + rwlock_t *glock = &kiblnd_data.kib_global_lock; + unsigned long flags; + + write_lock_irqsave(glock, flags); + peer->ibp_connecting--; + write_unlock_irqrestore(glock, flags); + } + } + kiblnd_peer_decref(peer); } LIBCFS_FREE(conn, sizeof(*conn)); diff --git a/lnet/klnds/o2iblnd/o2iblnd.h b/lnet/klnds/o2iblnd/o2iblnd.h index c30aa21..4e8b15b 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.h +++ b/lnet/klnds/o2iblnd/o2iblnd.h @@ -686,11 +686,13 @@ typedef struct kib_conn /* set on comms error */ int ibc_comms_error; /* receive buffers owned */ - unsigned int ibc_nrx:16; + unsigned short ibc_nrx; + /** rejected by connection race */ + unsigned short ibc_conn_race:1; /* scheduled for attention */ - unsigned int ibc_scheduled:1; + unsigned short ibc_scheduled:1; /* CQ callback fired */ - unsigned int ibc_ready:1; + unsigned short ibc_ready:1; /* time of last send */ unsigned long ibc_last_send; /** link chain for kiblnd_check_conns only */ @@ -1113,6 +1115,7 @@ int kiblnd_translate_mtu(int value); int kiblnd_dev_failover(kib_dev_t *dev); int kiblnd_create_peer (lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid); void kiblnd_destroy_peer (kib_peer_t *peer); +void kiblnd_connect_peer(kib_peer_t *peer); void kiblnd_destroy_dev (kib_dev_t *dev); void kiblnd_unlink_peer_locked (kib_peer_t *peer); kib_peer_t *kiblnd_find_peer_locked (lnet_nid_t nid); diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index ce7c601..0dd9cdf 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -1284,7 +1284,7 @@ static int kiblnd_resolve_addr(struct rdma_cm_id *cmid, return rc; } -static void +void kiblnd_connect_peer (kib_peer_t *peer) { struct rdma_cm_id *cmid; @@ -2473,10 +2473,10 @@ static void kiblnd_reconnect (kib_conn_t *conn, int version, __u64 incarnation, int why, kib_connparams_t *cp) { - kib_peer_t *peer = conn->ibc_peer; - char *reason; - int retry = 0; - unsigned long flags; + kib_peer_t *peer = conn->ibc_peer; + char *reason; + int retry_now = 0; + unsigned long flags; LASSERT (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT); LASSERT (peer->ibp_connecting > 0); /* 'conn' at least */ @@ -2492,7 +2492,15 @@ kiblnd_reconnect (kib_conn_t *conn, int version, peer->ibp_version != version) && peer->ibp_connecting == 1 && peer->ibp_accepting == 0) { - retry = 1; + if (why == IBLND_REJECT_CONN_RACE) { + /* don't reconnect immediately, intensive reconnecting + * may consume a lot of memory. kiblnd_destroy_conn + * will reconnect after releasing all resources of + * this connection */ + conn->ibc_conn_race = 1; + } else { + retry_now = 1; + } peer->ibp_connecting++; peer->ibp_version = version; @@ -2501,7 +2509,7 @@ kiblnd_reconnect (kib_conn_t *conn, int version, write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - if (!retry) + if (!retry_now) return; switch (why) { @@ -2513,10 +2521,6 @@ kiblnd_reconnect (kib_conn_t *conn, int version, reason = "stale"; break; - case IBLND_REJECT_CONN_RACE: - reason = "conn race"; - break; - case IBLND_REJECT_CONN_UNCOMPAT: reason = "version negotiation"; break;