From 1f7e6dd796f7e8e6ca9f55b9c6dc54efb69a5e34 Mon Sep 17 00:00:00 2001 From: Isaac Huang Date: Mon, 29 Nov 2010 00:08:24 -0700 Subject: [PATCH] b=23575 O2iblnd credit deadlock regression This fixed a regression of bug 14425. i=liang --- lnet/klnds/o2iblnd/o2iblnd.c | 6 ++++++ lnet/klnds/o2iblnd/o2iblnd.h | 23 +++++++++++++++++------ lnet/klnds/o2iblnd/o2iblnd_cb.c | 31 +++++++++++++++++++++++++------ 3 files changed, 48 insertions(+), 12 deletions(-) diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index 216dbff..4959aba 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -623,6 +623,10 @@ kiblnd_debug_conn (kib_conn_t *conn) cfs_list_for_each(tmp, &conn->ibc_early_rxs) kiblnd_debug_rx(cfs_list_entry(tmp, kib_rx_t, rx_list)); + CDEBUG(D_CONSOLE, " tx_noops:\n"); + cfs_list_for_each(tmp, &conn->ibc_tx_noops) + kiblnd_debug_tx(cfs_list_entry(tmp, kib_tx_t, tx_list)); + CDEBUG(D_CONSOLE, " tx_queue_nocred:\n"); cfs_list_for_each(tmp, &conn->ibc_tx_queue_nocred) kiblnd_debug_tx(cfs_list_entry(tmp, kib_tx_t, tx_list)); @@ -729,6 +733,7 @@ kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, conn->ibc_cmid = cmid; CFS_INIT_LIST_HEAD(&conn->ibc_early_rxs); + CFS_INIT_LIST_HEAD(&conn->ibc_tx_noops); CFS_INIT_LIST_HEAD(&conn->ibc_tx_queue); CFS_INIT_LIST_HEAD(&conn->ibc_tx_queue_rsrvd); CFS_INIT_LIST_HEAD(&conn->ibc_tx_queue_nocred); @@ -892,6 +897,7 @@ kiblnd_destroy_conn (kib_conn_t *conn) LASSERT (!cfs_in_interrupt()); LASSERT (cfs_atomic_read(&conn->ibc_refcount) == 0); LASSERT (cfs_list_empty(&conn->ibc_early_rxs)); + LASSERT (cfs_list_empty(&conn->ibc_tx_noops)); LASSERT (cfs_list_empty(&conn->ibc_tx_queue)); LASSERT (cfs_list_empty(&conn->ibc_tx_queue_rsrvd)); LASSERT (cfs_list_empty(&conn->ibc_tx_queue_nocred)); diff --git a/lnet/klnds/o2iblnd/o2iblnd.h b/lnet/klnds/o2iblnd/o2iblnd.h index 491cd70..db982898 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.h +++ b/lnet/klnds/o2iblnd/o2iblnd.h @@ -572,6 +572,7 @@ typedef struct kib_conn int ibc_ready:1; /* CQ callback fired */ unsigned long ibc_last_send; /* time of last send */ cfs_list_t ibc_early_rxs; /* rxs completed before ESTABLISHED */ + cfs_list_t ibc_tx_noops; /* IBLND_MSG_NOOPs for IBLND_MSG_VERSION_1 */ cfs_list_t ibc_tx_queue; /* sends that need a credit */ cfs_list_t ibc_tx_queue_nocred;/* sends that don't need a credit */ cfs_list_t ibc_tx_queue_rsrvd; /* sends that need to reserve an ACK/DONE msg */ @@ -727,15 +728,25 @@ kiblnd_send_noop(kib_conn_t *conn) !kiblnd_send_keepalive(conn)) return 0; /* No need to send NOOP */ - if (!cfs_list_empty(&conn->ibc_tx_queue_nocred)) - return 0; /* NOOP can be piggybacked */ + if (IBLND_OOB_CAPABLE(conn->ibc_version)) { + if (!cfs_list_empty(&conn->ibc_tx_queue_nocred)) + return 0; /* NOOP can be piggybacked */ - if (!IBLND_OOB_CAPABLE(conn->ibc_version)) - /* can't piggyback? */ - return cfs_list_empty(&conn->ibc_tx_queue); + /* No tx to piggyback NOOP onto or no credit to send a tx */ + return (cfs_list_empty(&conn->ibc_tx_queue) || conn->ibc_credits == 0); + } + + if (!cfs_list_empty(&conn->ibc_tx_noops) || /* NOOP already queued */ + !cfs_list_empty(&conn->ibc_tx_queue_nocred) || /* can be piggybacked */ + conn->ibc_credits == 0) /* no credit */ + return 0; + + if (conn->ibc_credits == 1 && /* last credit reserved for */ + conn->ibc_outstanding_credits == 0) /* giving back credits */ + return 0; /* No tx to piggyback NOOP onto or no credit to send a tx */ - return (cfs_list_empty(&conn->ibc_tx_queue) || conn->ibc_credits == 0); + return (cfs_list_empty(&conn->ibc_tx_queue) || conn->ibc_credits == 1); } static inline void diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index 577db3e..431aa90 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -328,6 +328,11 @@ kiblnd_handle_rx (kib_rx_t *rx) conn->ibc_credits += credits; + /* This ensures the credit taken by NOOP can be returned */ + if (msg->ibm_type == IBLND_MSG_NOOP && + !IBLND_OOB_CAPABLE(conn->ibc_version)) /* v1 only */ + conn->ibc_outstanding_credits++; + cfs_spin_unlock(&conn->ibc_lock); kiblnd_check_sends(conn); } @@ -341,9 +346,14 @@ kiblnd_handle_rx (kib_rx_t *rx) break; case IBLND_MSG_NOOP: - if (IBLND_OOB_CAPABLE(conn->ibc_version)) + if (IBLND_OOB_CAPABLE(conn->ibc_version)) { post_credit = IBLND_POSTRX_NO_CREDIT; - else + break; + } + + if (credits != 0) /* credit already posted */ + post_credit = IBLND_POSTRX_NO_CREDIT; + else /* a keepalive NOOP */ post_credit = IBLND_POSTRX_PEER_CREDIT; break; @@ -791,8 +801,8 @@ kiblnd_post_tx_locked (kib_conn_t *conn, kib_tx_t *tx, int credit) } if (credit != 0 && !IBLND_OOB_CAPABLE(ver) && - conn->ibc_credits == 1 && /* last credit reserved for */ - conn->ibc_outstanding_credits == 0) { /* giving back credits */ + conn->ibc_credits == 1 && /* last credit reserved */ + msg->ibm_type != IBLND_MSG_NOOP) { /* for NOOP */ CDEBUG(D_NET, "%s: not using last credit\n", libcfs_nid2str(peer->ibp_nid)); return -EAGAIN; @@ -939,6 +949,11 @@ kiblnd_check_sends (kib_conn_t *conn) credit = 0; tx = cfs_list_entry(conn->ibc_tx_queue_nocred.next, kib_tx_t, tx_list); + } else if (!cfs_list_empty(&conn->ibc_tx_noops)) { + LASSERT (!IBLND_OOB_CAPABLE(ver)); + credit = 1; + tx = cfs_list_entry(conn->ibc_tx_noops.next, + kib_tx_t, tx_list); } else if (!cfs_list_empty(&conn->ibc_tx_queue)) { credit = 1; tx = cfs_list_entry(conn->ibc_tx_queue.next, @@ -1171,7 +1186,7 @@ kiblnd_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn) if (IBLND_OOB_CAPABLE(conn->ibc_version)) q = &conn->ibc_tx_queue_nocred; else - q = &conn->ibc_tx_queue; + q = &conn->ibc_tx_noops; break; case IBLND_MSG_IMMEDIATE: @@ -1788,6 +1803,7 @@ kiblnd_close_conn_locked (kib_conn_t *conn, int error) return; /* already being handled */ if (error == 0 && + cfs_list_empty(&conn->ibc_tx_noops) && cfs_list_empty(&conn->ibc_tx_queue) && cfs_list_empty(&conn->ibc_tx_queue_rsrvd) && cfs_list_empty(&conn->ibc_tx_queue_nocred) && @@ -1795,9 +1811,10 @@ kiblnd_close_conn_locked (kib_conn_t *conn, int error) CDEBUG(D_NET, "closing conn to %s\n", libcfs_nid2str(peer->ibp_nid)); } else { - CNETERR("Closing conn to %s: error %d%s%s%s%s\n", + CNETERR("Closing conn to %s: error %d%s%s%s%s%s\n", libcfs_nid2str(peer->ibp_nid), error, cfs_list_empty(&conn->ibc_tx_queue) ? "" : "(sending)", + cfs_list_empty(&conn->ibc_tx_noops) ? "" : "(sending_noops)", cfs_list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)", cfs_list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)", cfs_list_empty(&conn->ibc_active_txs) ? "" : "(waiting)"); @@ -1921,6 +1938,7 @@ kiblnd_finalise_conn (kib_conn_t *conn) /* Complete all tx descs not waiting for sends to complete. * NB we should be safe from RDMA now that the QP has changed state */ + kiblnd_abort_txs(conn, &conn->ibc_tx_noops); kiblnd_abort_txs(conn, &conn->ibc_tx_queue); kiblnd_abort_txs(conn, &conn->ibc_tx_queue_rsrvd); kiblnd_abort_txs(conn, &conn->ibc_tx_queue_nocred); @@ -2926,6 +2944,7 @@ int kiblnd_conn_timed_out (kib_conn_t *conn) { return kiblnd_check_txs(conn, &conn->ibc_tx_queue) || + kiblnd_check_txs(conn, &conn->ibc_tx_noops) || kiblnd_check_txs(conn, &conn->ibc_tx_queue_rsrvd) || kiblnd_check_txs(conn, &conn->ibc_tx_queue_nocred) || kiblnd_check_txs(conn, &conn->ibc_active_txs); -- 1.8.3.1