From 3efb7683679ab2d18b4d2b256acd462596324d9c Mon Sep 17 00:00:00 2001 From: Doug Oucharek Date: Mon, 21 Dec 2015 13:37:57 -0800 Subject: [PATCH] LU-5718 o2iblnd: Revert original fix The original fix for this ticket introduced a regression where bit flags could interfere with each other triggering asserts. Also, the focus was on addressing connection races, but the fix should be expanded to include all reconnects. The updated fix is being done under ticket: LU-7569. Signed-off-by: Doug Oucharek Change-Id: I455e43f8a5134f7896ad14c3cd0888b8c08d38d2 Reviewed-on: http://review.whamcloud.com/17699 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Liang Zhen Reviewed-by: Amir Shehata Reviewed-by: Oleg Drokin --- lnet/klnds/o2iblnd/o2iblnd.c | 15 +-------------- lnet/klnds/o2iblnd/o2iblnd.h | 9 +++------ lnet/klnds/o2iblnd/o2iblnd_cb.c | 20 ++++++++------------ 3 files changed, 12 insertions(+), 32 deletions(-) diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index 5576ee8..4454b1c 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -980,22 +980,9 @@ kiblnd_destroy_conn (kib_conn_t *conn) if (conn->ibc_state != IBLND_CONN_INIT) { kib_net_t *net = peer->ibp_ni->ni_data; + kiblnd_peer_decref(peer); rdma_destroy_id(cmid); atomic_dec(&net->ibn_nconns); - if (conn->ibc_conn_race) { - if (peer->ibp_accepting == 0 && - !list_empty(&peer->ibp_tx_queue)) { - kiblnd_connect_peer(peer); - } else { - rwlock_t *glock = &kiblnd_data.kib_global_lock; - unsigned long flags; - - write_lock_irqsave(glock, flags); - peer->ibp_connecting--; - write_unlock_irqrestore(glock, flags); - } - } - kiblnd_peer_decref(peer); } LIBCFS_FREE(conn, sizeof(*conn)); diff --git a/lnet/klnds/o2iblnd/o2iblnd.h b/lnet/klnds/o2iblnd/o2iblnd.h index 593108e..5f5d10d 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.h +++ b/lnet/klnds/o2iblnd/o2iblnd.h @@ -664,13 +664,11 @@ typedef struct kib_conn /* connections max frags */ __u16 ibc_max_frags; /* receive buffers owned */ - unsigned short ibc_nrx; - /** rejected by connection race */ - unsigned short ibc_conn_race:1; + unsigned int ibc_nrx:16; /* scheduled for attention */ - unsigned short ibc_scheduled:1; + unsigned int ibc_scheduled:1; /* CQ callback fired */ - unsigned short ibc_ready:1; + unsigned int ibc_ready:1; /* time of last send */ unsigned long ibc_last_send; /** link chain for kiblnd_check_conns only */ @@ -1092,7 +1090,6 @@ int kiblnd_translate_mtu(int value); int kiblnd_dev_failover(kib_dev_t *dev); int kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid); void kiblnd_destroy_peer (kib_peer_t *peer); -void kiblnd_connect_peer(kib_peer_t *peer); void kiblnd_destroy_dev (kib_dev_t *dev); void kiblnd_unlink_peer_locked (kib_peer_t *peer); kib_peer_t *kiblnd_find_peer_locked (lnet_nid_t nid); diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index 606396d..8ef29f3 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -1234,7 +1234,7 @@ static int kiblnd_resolve_addr(struct rdma_cm_id *cmid, return rc; } -void +static void kiblnd_connect_peer (kib_peer_t *peer) { struct rdma_cm_id *cmid; @@ -2459,7 +2459,7 @@ kiblnd_reconnect (kib_conn_t *conn, int version, { kib_peer_t *peer = conn->ibc_peer; char *reason; - int retry_now = 0; + int retry = 0; unsigned long flags; LASSERT (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT); @@ -2476,15 +2476,7 @@ kiblnd_reconnect (kib_conn_t *conn, int version, peer->ibp_version != version) && peer->ibp_connecting == 1 && peer->ibp_accepting == 0) { - if (why == IBLND_REJECT_CONN_RACE) { - /* don't reconnect immediately, intensive reconnecting - * may consume a lot of memory. kiblnd_destroy_conn - * will reconnect after releasing all resources of - * this connection */ - conn->ibc_conn_race = 1; - } else { - retry_now = 1; - } + retry = 1; peer->ibp_connecting++; peer->ibp_version = version; peer->ibp_incarnation = incarnation; @@ -2492,7 +2484,7 @@ kiblnd_reconnect (kib_conn_t *conn, int version, write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - if (!retry_now) + if (!retry) return; switch (why) { @@ -2536,6 +2528,10 @@ kiblnd_reconnect (kib_conn_t *conn, int version, reason = "stale"; break; + case IBLND_REJECT_CONN_RACE: + reason = "conn race"; + break; + case IBLND_REJECT_CONN_UNCOMPAT: reason = "version negotiation"; break; -- 1.8.3.1