From ce37c38691196075863eff6bb3ac9c6277e83f74 Mon Sep 17 00:00:00 2001 From: Amir Shehata Date: Mon, 27 Nov 2017 17:52:20 -0800 Subject: [PATCH] LU-10213 lnd: calculate qp max_send_wrs properly The maximum in-flight transfers can not exceed the negotiated queue depth. Instead of calculating the max_send_wrs to be the negotiated number of frags * concurrent sends, it should be the negotiated number of frags * queue depth. If that value is too large for successful qp creation then we reduce the queue depth in a loop until we successfully create the qp or the queue depth dips below 2. Due to the queue depth negotiation protocol it is guaranteed that the queue depth on both the active and the passive will match. This change resolves the discrepancy created by the previous code which reduces max_send_wr by a quarter. That could lead to: mlx5_ib_post_send:4184:(pid 26272): Failed to prepare WQE When the o2iblnd transfers a message which requires more WRs than the max that has been allocated. Test-Parameters: trivial Lustre-change: https://review.whamcloud.com/30310 Lustre-commit: 017d328fa832697533e4e032fe9a9213ea105320 Signed-off-by: Alexey Lyashkov Signed-off-by: Amir Shehata Change-Id: I88f96f950bf4c0a8efd4df812d44e5e20d5907dc Reviewed-by: Alexey Lyashkov Reviewed-by: Dmitry Eremin Reviewed-by: Doug Oucharek Reviewed-by: James Simmons Signed-off-by: Minh Diep Reviewed-on: https://review.whamcloud.com/33975 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Amir Shehata Reviewed-by: Oleg Drokin --- lnet/klnds/o2iblnd/o2iblnd.c | 31 ++++++++++++++++++++++++------- lnet/klnds/o2iblnd/o2iblnd.h | 3 --- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index 110b6e6..dfa7f9f 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -728,6 +728,19 @@ kiblnd_get_scheduler(int cpt) return NULL; } +static unsigned int kiblnd_send_wrs(struct kib_conn *conn) +{ + /* + * One WR for the LNet message + * And ibc_max_frags for the transfer WRs + */ + unsigned int ret = 1 + conn->ibc_max_frags; + + /* account for a maximum of ibc_queue_depth in-flight transfers */ + ret *= conn->ibc_queue_depth; + return ret; +} + kib_conn_t * kiblnd_create_conn(kib_peer_ni_t *peer_ni, struct rdma_cm_id *cmid, int state, int version) @@ -881,8 +894,6 @@ kiblnd_create_conn(kib_peer_ni_t *peer_ni, struct rdma_cm_id *cmid, init_qp_attr->event_handler = kiblnd_qp_event; init_qp_attr->qp_context = conn; - init_qp_attr->cap.max_send_wr = IBLND_SEND_WRS(conn); - init_qp_attr->cap.max_recv_wr = IBLND_RECV_WRS(conn); init_qp_attr->cap.max_send_sge = *kiblnd_tunables.kib_wrq_sge; init_qp_attr->cap.max_recv_sge = 1; init_qp_attr->sq_sig_type = IB_SIGNAL_REQ_WR; @@ -893,11 +904,14 @@ kiblnd_create_conn(kib_peer_ni_t *peer_ni, struct rdma_cm_id *cmid, conn->ibc_sched = sched; do { + init_qp_attr->cap.max_send_wr = kiblnd_send_wrs(conn); + init_qp_attr->cap.max_recv_wr = IBLND_RECV_WRS(conn); + rc = rdma_create_qp(cmid, conn->ibc_hdev->ibh_pd, init_qp_attr); - if (!rc || init_qp_attr->cap.max_send_wr < 16) + if (!rc || conn->ibc_queue_depth < 2) break; - init_qp_attr->cap.max_send_wr -= init_qp_attr->cap.max_send_wr / 4; + conn->ibc_queue_depth--; } while (rc); if (rc) { @@ -910,9 +924,12 @@ kiblnd_create_conn(kib_peer_ni_t *peer_ni, struct rdma_cm_id *cmid, goto failed_2; } - if (init_qp_attr->cap.max_send_wr != IBLND_SEND_WRS(conn)) - CDEBUG(D_NET, "original send wr %d, created with %d\n", - IBLND_SEND_WRS(conn), init_qp_attr->cap.max_send_wr); + if (conn->ibc_queue_depth != peer_ni->ibp_queue_depth) + CWARN("peer %s - queue depth reduced from %u to %u" + " to allow for qp creation\n", + libcfs_nid2str(peer_ni->ibp_nid), + peer_ni->ibp_queue_depth, + conn->ibc_queue_depth); LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr)); diff --git a/lnet/klnds/o2iblnd/o2iblnd.h b/lnet/klnds/o2iblnd/o2iblnd.h index cc3683a..e60a92d 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.h +++ b/lnet/klnds/o2iblnd/o2iblnd.h @@ -161,9 +161,6 @@ extern kib_tunables_t kiblnd_tunables; /* WRs and CQEs (per connection) */ #define IBLND_RECV_WRS(c) IBLND_RX_MSGS(c) -#define IBLND_SEND_WRS(c) \ - ((c->ibc_max_frags + 1) * kiblnd_concurrent_sends(c->ibc_version, \ - c->ibc_peer->ibp_ni)) /* 2 = LNet msg + Transfer chain */ #define IBLND_CQ_ENTRIES(c) \ -- 1.8.3.1