From 51b7c05b796290b08439e18c7e3c302ea10b1416 Mon Sep 17 00:00:00 2001 From: Vitaly Fertman Date: Fri, 24 Dec 2010 01:49:21 +0300 Subject: [PATCH] Revert "b=20288 kiblnd_check_conns can deadlock" This reverts commit 9887737999bb2db2bc2f10b77854dee7f471ba62. --- lnet/klnds/o2iblnd/o2iblnd.h | 68 ++++++++++++----------- lnet/klnds/o2iblnd/o2iblnd_cb.c | 117 +++++++++++++++++----------------------- 2 files changed, 81 insertions(+), 104 deletions(-) diff --git a/lnet/klnds/o2iblnd/o2iblnd.h b/lnet/klnds/o2iblnd/o2iblnd.h index bac486d..db982898 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.h +++ b/lnet/klnds/o2iblnd/o2iblnd.h @@ -553,43 +553,41 @@ typedef struct kib_connvars typedef struct kib_conn { - struct kib_peer *ibc_peer; /* owning peer */ - kib_hca_dev_t *ibc_hdev; /* HCA bound on */ - cfs_list_t ibc_list; /* stash on peer's conn list */ - cfs_list_t ibc_sched_list; /* schedule for attention */ - cfs_list_t ibc_connd_list; /* kiblnd_check_conns only */ - __u16 ibc_version; /* version of connection */ - __u64 ibc_incarnation; /* which instance of the peer */ - cfs_atomic_t ibc_refcount; /* # users */ - int ibc_state; /* what's happening */ - int ibc_nsends_posted; /* # uncompleted sends */ - int ibc_noops_posted; /* # uncompleted NOOPs */ - int ibc_credits; /* # credits I have */ - int ibc_outstanding_credits; /* # credits to return */ - int ibc_reserved_credits;/* # ACK/DONE msg credits */ - int ibc_retry_noop; /* need to retry returning credits */ - int ibc_comms_error; /* set on comms error */ - int ibc_nrx:16; /* receive buffers owned */ - int ibc_scheduled:1; /* scheduled for attention */ - int ibc_ready:1; /* CQ callback fired */ - unsigned long ibc_last_send; /* time of last send */ - cfs_list_t ibc_early_rxs; /* rxs completed before ESTABLISHED */ - cfs_list_t ibc_tx_noops; /* IBLND_MSG_NOOPs for IBLND_MSG_VERSION_1 */ - cfs_list_t ibc_tx_queue; /* sends that need a credit */ - cfs_list_t ibc_tx_queue_nocred;/* sends that don't need a credit */ - cfs_list_t ibc_tx_queue_rsrvd; /* sends that need to reserve an ACK/DONE msg */ - cfs_list_t ibc_active_txs; /* active tx awaiting completion */ - cfs_spinlock_t ibc_lock; /* serialise */ - kib_rx_t *ibc_rxs; /* the rx descs */ - kib_pages_t *ibc_rx_pages; /* premapped rx msg pages */ - - struct rdma_cm_id *ibc_cmid; /* CM id */ - struct ib_cq *ibc_cq; /* completion queue */ - - kib_connvars_t *ibc_connvars; /* in-progress connection state */ + struct kib_peer *ibc_peer; /* owning peer */ + kib_hca_dev_t *ibc_hdev; /* HCA bound on */ + cfs_list_t ibc_list; /* stash on peer's conn list */ + cfs_list_t ibc_sched_list; /* schedule for attention */ + __u16 ibc_version; /* version of connection */ + __u64 ibc_incarnation; /* which instance of the peer */ + cfs_atomic_t ibc_refcount; /* # users */ + int ibc_state; /* what's happening */ + int ibc_nsends_posted; /* # uncompleted sends */ + int ibc_noops_posted; /* # uncompleted NOOPs */ + int ibc_credits; /* # credits I have */ + int ibc_outstanding_credits; /* # credits to return */ + int ibc_reserved_credits;/* # ACK/DONE msg credits */ + int ibc_comms_error; /* set on comms error */ + int ibc_nrx:16; /* receive buffers owned */ + int ibc_scheduled:1; /* scheduled for attention */ + int ibc_ready:1; /* CQ callback fired */ + unsigned long ibc_last_send; /* time of last send */ + cfs_list_t ibc_early_rxs; /* rxs completed before ESTABLISHED */ + cfs_list_t ibc_tx_noops; /* IBLND_MSG_NOOPs for IBLND_MSG_VERSION_1 */ + cfs_list_t ibc_tx_queue; /* sends that need a credit */ + cfs_list_t ibc_tx_queue_nocred;/* sends that don't need a credit */ + cfs_list_t ibc_tx_queue_rsrvd; /* sends that need to reserve an ACK/DONE msg */ + cfs_list_t ibc_active_txs; /* active tx awaiting completion */ + cfs_spinlock_t ibc_lock; /* serialise */ + kib_rx_t *ibc_rxs; /* the rx descs */ + kib_pages_t *ibc_rx_pages; /* premapped rx msg pages */ + + struct rdma_cm_id *ibc_cmid; /* CM id */ + struct ib_cq *ibc_cq; /* completion queue */ + + kib_connvars_t *ibc_connvars; /* in-progress connection state */ } kib_conn_t; -#define IBLND_CONN_INIT 0 /* being initialised */ +#define IBLND_CONN_INIT 0 /* being intialised */ #define IBLND_CONN_ACTIVE_CONNECT 1 /* active sending req */ #define IBLND_CONN_PASSIVE_WAIT 2 /* passive waiting for rtu */ #define IBLND_CONN_ESTABLISHED 3 /* connection established */ diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index 35f5b88..431aa90 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -928,7 +928,6 @@ kiblnd_check_sends (kib_conn_t *conn) conn->ibc_reserved_credits--; } - conn->ibc_retry_noop = 0; if (kiblnd_send_noop(conn)) { cfs_spin_unlock(&conn->ibc_lock); @@ -939,8 +938,6 @@ kiblnd_check_sends (kib_conn_t *conn) cfs_spin_lock(&conn->ibc_lock); if (tx != NULL) kiblnd_queue_tx_locked(tx, conn); - else if (kiblnd_send_noop(conn)) /* still... */ - conn->ibc_retry_noop = 1; } kiblnd_conn_addref(conn); /* 1 ref for me.... (see b21911) */ @@ -2911,11 +2908,14 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) } } -static int -kiblnd_check_txs_locked (kib_conn_t *conn, cfs_list_t *txs) +int +kiblnd_check_txs (kib_conn_t *conn, cfs_list_t *txs) { kib_tx_t *tx; cfs_list_t *ttmp; + int timed_out = 0; + + cfs_spin_lock(&conn->ibc_lock); cfs_list_for_each (ttmp, txs) { tx = cfs_list_entry (ttmp, kib_tx_t, tx_list); @@ -2928,40 +2928,41 @@ kiblnd_check_txs_locked (kib_conn_t *conn, cfs_list_t *txs) } if (cfs_time_aftereq (jiffies, tx->tx_deadline)) { + timed_out = 1; CERROR("Timed out tx: %s, %lu seconds\n", kiblnd_queue2str(conn, txs), cfs_duration_sec(jiffies - tx->tx_deadline)); - return 1; + break; } } - return 0; + cfs_spin_unlock(&conn->ibc_lock); + return timed_out; } -static int -kiblnd_conn_timed_out_locked (kib_conn_t *conn) +int +kiblnd_conn_timed_out (kib_conn_t *conn) { - return kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue) || - kiblnd_check_txs_locked(conn, &conn->ibc_tx_noops) || - kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue_rsrvd) || - kiblnd_check_txs_locked(conn, &conn->ibc_tx_queue_nocred) || - kiblnd_check_txs_locked(conn, &conn->ibc_active_txs); + return kiblnd_check_txs(conn, &conn->ibc_tx_queue) || + kiblnd_check_txs(conn, &conn->ibc_tx_noops) || + kiblnd_check_txs(conn, &conn->ibc_tx_queue_rsrvd) || + kiblnd_check_txs(conn, &conn->ibc_tx_queue_nocred) || + kiblnd_check_txs(conn, &conn->ibc_active_txs); } void kiblnd_check_conns (int idx) { - CFS_LIST_HEAD (closes); - CFS_LIST_HEAD (checksends); - cfs_list_t *peers = &kiblnd_data.kib_peers[idx]; - cfs_list_t *ptmp; - kib_peer_t *peer; - kib_conn_t *conn; - cfs_list_t *ctmp; - unsigned long flags; + cfs_list_t *peers = &kiblnd_data.kib_peers[idx]; + cfs_list_t *ptmp; + kib_peer_t *peer; + kib_conn_t *conn; + cfs_list_t *ctmp; + unsigned long flags; + again: /* NB. We expect to have a look at all the peers and not find any - * RDMAs to time out, so we just use a shared lock while we + * rdmas to time out, so we just use a shared lock while we * take a look... */ cfs_read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); @@ -2969,63 +2970,41 @@ kiblnd_check_conns (int idx) peer = cfs_list_entry (ptmp, kib_peer_t, ibp_list); cfs_list_for_each (ctmp, &peer->ibp_conns) { - int timedout; - int retry_noop; - - conn = cfs_list_entry(ctmp, kib_conn_t, ibc_list); + conn = cfs_list_entry (ctmp, kib_conn_t, ibc_list); LASSERT (conn->ibc_state == IBLND_CONN_ESTABLISHED); - cfs_spin_lock(&conn->ibc_lock); + /* In case we have enough credits to return via a + * NOOP, but there were no non-blocking tx descs + * free to do it last time... */ + kiblnd_check_sends(conn); - retry_noop = conn->ibc_retry_noop; - timedout = kiblnd_conn_timed_out_locked(conn); - if (!retry_noop && !timedout) { - cfs_spin_unlock(&conn->ibc_lock); + if (!kiblnd_conn_timed_out(conn)) continue; - } - if (timedout) { - CERROR("Timed out RDMA with %s (%lu): " - "c: %u, oc: %u, rc: %u\n", - libcfs_nid2str(peer->ibp_nid), - cfs_duration_sec(cfs_time_current() - - peer->ibp_last_alive), - conn->ibc_credits, - conn->ibc_outstanding_credits, - conn->ibc_reserved_credits); - cfs_list_add(&conn->ibc_connd_list, &closes); - } else { - cfs_list_add(&conn->ibc_connd_list, &checksends); - } - /* +ref for 'closes' or 'checksends' */ - kiblnd_conn_addref(conn); + /* Handle timeout by closing the whole connection. We + * can only be sure RDMA activity has ceased once the + * QP has been modified. */ - cfs_spin_unlock(&conn->ibc_lock); - } - } + kiblnd_conn_addref(conn); /* 1 ref for me... */ - cfs_read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); + cfs_read_unlock_irqrestore(&kiblnd_data.kib_global_lock, + flags); - /* Handle timeout by closing the whole - * connection. We can only be sure RDMA activity - * has ceased once the QP has been modified. */ - while (!cfs_list_empty(&closes)) { - conn = cfs_list_entry(closes.next, kib_conn_t, ibc_connd_list); - cfs_list_del(&conn->ibc_connd_list); - kiblnd_close_conn(conn, -ETIMEDOUT); - kiblnd_conn_decref(conn); - } + CERROR("Timed out RDMA with %s (%lu)\n", + libcfs_nid2str(peer->ibp_nid), + cfs_duration_sec(cfs_time_current() - + peer->ibp_last_alive)); - /* In case we have enough credits to return via a - * NOOP, but there were no non-blocking tx descs - * free to do it last time... */ - while (!cfs_list_empty(&checksends)) { - conn = cfs_list_entry(checksends.next, kib_conn_t, ibc_connd_list); - cfs_list_del(&conn->ibc_connd_list); - kiblnd_check_sends(conn); - kiblnd_conn_decref(conn); + kiblnd_close_conn(conn, -ETIMEDOUT); + kiblnd_conn_decref(conn); /* ...until here */ + + /* start again now I've dropped the lock */ + goto again; + } } + + cfs_read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); } void -- 1.8.3.1