From: John L. Hammond Date: Fri, 16 Mar 2018 15:20:42 +0000 (-0500) Subject: LU-10800 lnet: Revert "LU-10270 lnet: remove an early rx code" X-Git-Tag: 2.11.0-RC1~2 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=6224bb44d8d10894f1c21921a0224dd8baf0ded0;p=fs%2Flustre-release.git LU-10800 lnet: Revert "LU-10270 lnet: remove an early rx code" This reverts commit c3894ff80fe4b48f2d62ea33ddc54fb5891e6484. Dropping early receives caused pings to be ignored and interacted badly with dynamic discovery. Signed-off-by: John L. Hammond Change-Id: I99a87a8f58ea67c59d5e85b964295472c2e15de4 Reviewed-on: https://review.whamcloud.com/31675 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Amir Shehata Reviewed-by: Doug Oucharek Reviewed-by: Oleg Drokin --- diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index d1ea6a2..dc7981d 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -604,6 +604,10 @@ kiblnd_debug_conn (kib_conn_t *conn) conn->ibc_outstanding_credits, conn->ibc_reserved_credits); CDEBUG(D_CONSOLE, " comms_err %d\n", conn->ibc_comms_error); + CDEBUG(D_CONSOLE, " early_rxs:\n"); + list_for_each(tmp, &conn->ibc_early_rxs) + kiblnd_debug_rx(list_entry(tmp, kib_rx_t, rx_list)); + CDEBUG(D_CONSOLE, " tx_noops:\n"); list_for_each(tmp, &conn->ibc_tx_noops) kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list)); @@ -810,6 +814,7 @@ kiblnd_create_conn(kib_peer_ni_t *peer_ni, struct rdma_cm_id *cmid, conn->ibc_max_frags = peer_ni->ibp_max_frags; conn->ibc_queue_depth = peer_ni->ibp_queue_depth; + INIT_LIST_HEAD(&conn->ibc_early_rxs); INIT_LIST_HEAD(&conn->ibc_tx_noops); INIT_LIST_HEAD(&conn->ibc_tx_queue); INIT_LIST_HEAD(&conn->ibc_tx_queue_rsrvd); @@ -999,6 +1004,7 @@ kiblnd_destroy_conn(kib_conn_t *conn) LASSERT (!in_interrupt()); LASSERT (atomic_read(&conn->ibc_refcount) == 0); + LASSERT(list_empty(&conn->ibc_early_rxs)); LASSERT(list_empty(&conn->ibc_tx_noops)); LASSERT(list_empty(&conn->ibc_tx_queue)); LASSERT(list_empty(&conn->ibc_tx_queue_rsrvd)); diff --git a/lnet/klnds/o2iblnd/o2iblnd.h b/lnet/klnds/o2iblnd/o2iblnd.h index 7a465aa..8ac9c45 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.h +++ b/lnet/klnds/o2iblnd/o2iblnd.h @@ -703,6 +703,8 @@ typedef struct kib_conn ktime_t ibc_last_send; /** link chain for kiblnd_check_conns only */ struct list_head ibc_connd_list; + /** rxs completed before ESTABLISHED */ + struct list_head ibc_early_rxs; /** IBLND_MSG_NOOPs for IBLND_MSG_VERSION_1 */ struct list_head ibc_tx_noops; /* sends that need a credit */ diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index 3323741..cf753dd 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -516,10 +516,21 @@ kiblnd_rx_complete (kib_rx_t *rx, int status, int nob) /* set time last known alive */ kiblnd_peer_alive(conn->ibc_peer); - /* racing with connection establishment/teardown! */ - if (conn->ibc_state < IBLND_CONN_ESTABLISHED) - goto ignore; + /* racing with connection establishment/teardown! */ + if (conn->ibc_state < IBLND_CONN_ESTABLISHED) { + rwlock_t *g_lock = &kiblnd_data.kib_global_lock; + unsigned long flags; + + write_lock_irqsave(g_lock, flags); + /* must check holding global lock to eliminate race */ + if (conn->ibc_state < IBLND_CONN_ESTABLISHED) { + list_add_tail(&rx->rx_list, &conn->ibc_early_rxs); + write_unlock_irqrestore(g_lock, flags); + return; + } + write_unlock_irqrestore(g_lock, flags); + } kiblnd_handle_rx(rx); return; @@ -2045,6 +2056,29 @@ kiblnd_close_conn(kib_conn_t *conn, int error) } static void +kiblnd_handle_early_rxs(kib_conn_t *conn) +{ + unsigned long flags; + kib_rx_t *rx; + + LASSERT(!in_interrupt()); + LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED); + + write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); + while (!list_empty(&conn->ibc_early_rxs)) { + rx = list_entry(conn->ibc_early_rxs.next, + kib_rx_t, rx_list); + list_del(&rx->rx_list); + write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); + + kiblnd_handle_rx(rx); + + write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); + } + write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); +} + +static void kiblnd_abort_txs(kib_conn_t *conn, struct list_head *txs) { struct list_head zombies = LIST_HEAD_INIT(zombies); @@ -2101,6 +2135,8 @@ kiblnd_finalise_conn (kib_conn_t *conn) kiblnd_abort_txs(conn, &conn->ibc_tx_queue_rsrvd); kiblnd_abort_txs(conn, &conn->ibc_tx_queue_nocred); kiblnd_abort_txs(conn, &conn->ibc_active_txs); + + kiblnd_handle_early_rxs(conn); } static void @@ -2257,6 +2293,8 @@ kiblnd_connreq_done(kib_conn_t *conn, int status) kiblnd_check_sends_locked(conn); spin_unlock(&conn->ibc_lock); + /* schedule blocked rxs */ + kiblnd_handle_early_rxs(conn); kiblnd_conn_decref(conn); }