From ac25785328d31f63bd76a03f9cbb76f7f31f2ab0 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin Date: Tue, 17 May 2016 09:22:15 -0400 Subject: [PATCH] LU-6215 lnet: split struct ib_send_wr In v4.4 Linux kernel split up struct ib_send_wr so that all non-trivial verbs use their own structure which embeds struct ib_send_wr. Linux-commit: e622f2f4ad2142d2a613a57fb85f8cf737935ef5 Change-Id: Iea2599ea57a576a4c061841be47a989aba097cc6 Signed-off-by: Dmitry Eremin Reviewed-on: http://review.whamcloud.com/19168 Reviewed-by: Doug Oucharek Reviewed-by: James Simmons Reviewed-by: Li Dongyang Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- lnet/autoconf/lustre-lnet.m4 | 68 +++++++++++++++++++++++--------------- lnet/klnds/o2iblnd/o2iblnd.c | 36 +++++++++++--------- lnet/klnds/o2iblnd/o2iblnd.h | 14 +++++--- lnet/klnds/o2iblnd/o2iblnd_cb.c | 73 ++++++++++++++++++++--------------------- 4 files changed, 108 insertions(+), 83 deletions(-) diff --git a/lnet/autoconf/lustre-lnet.m4 b/lnet/autoconf/lustre-lnet.m4 index 221feda..05b47db 100644 --- a/lnet/autoconf/lustre-lnet.m4 +++ b/lnet/autoconf/lustre-lnet.m4 @@ -329,8 +329,8 @@ AC_SUBST(O2IBLND) AC_SUBST(O2IBPATH) AC_SUBST(ENABLEO2IB) -# In RHEL 6.2, rdma_create_id() takes the queue-pair type as a fourth argument AS_IF([test $ENABLEO2IB != "no"], [ + # In RHEL 6.2, rdma_create_id() takes the queue-pair type as a fourth argument LB_CHECK_COMPILE([if 'rdma_create_id' wants four args], rdma_create_id_4args, [ #ifdef HAVE_COMPAT_RDMA @@ -349,15 +349,32 @@ AS_IF([test $ENABLEO2IB != "no"], [ AC_DEFINE(HAVE_RDMA_CREATE_ID_4ARG, 1, [rdma_create_id wants 4 args]) ]) -]) -# -# 4.2 introduced struct ib_cq_init_attr which is used -# by ib_create_cq(). Note some OFED stacks only keep -# their headers in sync with latest kernels but not -# the functionality which means for infiniband testing -# we need to always test functionality testings. -# -AS_IF([test $ENABLEO2IB != "no"], [ + + # 4.4 added network namespace parameter for rdma_create_id() + LB_CHECK_COMPILE([if 'rdma_create_id' wants five args], + rdma_create_id_5args, [ + #ifdef HAVE_COMPAT_RDMA + #undef PACKAGE_NAME + #undef PACKAGE_TARNAME + #undef PACKAGE_VERSION + #undef PACKAGE_STRING + #undef PACKAGE_BUGREPORT + #undef PACKAGE_URL + #include + #endif + #include + ],[ + rdma_create_id(NULL, NULL, NULL, 0, 0); + ],[ + AC_DEFINE(HAVE_RDMA_CREATE_ID_5ARG, 1, + [rdma_create_id wants 5 args]) + ]) + + # 4.2 introduced struct ib_cq_init_attr which is used + # by ib_create_cq(). Note some OFED stacks only keep + # their headers in sync with latest kernels but not + # the functionality which means for infiniband testing + # we need to always test functionality testings. LB_CHECK_COMPILE([if 'struct ib_cq_init_attr' is used], ib_cq_init_attr, [ #ifdef HAVE_COMPAT_RDMA @@ -378,10 +395,8 @@ AS_IF([test $ENABLEO2IB != "no"], [ AC_DEFINE(HAVE_IB_CQ_INIT_ATTR, 1, [struct ib_cq_init_attr is used by ib_create_cq]) ]) -]) -# 4.3 removed ib_alloc_fast_reg_mr() -AS_IF([test $ENABLEO2IB != "no"], [ + # 4.3 removed ib_alloc_fast_reg_mr() LB_CHECK_COMPILE([if 'ib_alloc_fast_reg_mr' exists], ib_alloc_fast_reg_mr, [ #ifdef HAVE_COMPAT_RDMA @@ -400,12 +415,13 @@ AS_IF([test $ENABLEO2IB != "no"], [ AC_DEFINE(HAVE_IB_ALLOC_FAST_REG_MR, 1, [ib_alloc_fast_reg_mr is defined]) ]) -]) -# 4.4 added network namespace parameter for rdma_create_id() -AS_IF([test $ENABLEO2IB != "no"], [ - LB_CHECK_COMPILE([if 'rdma_create_id' wants five args], - rdma_create_id_5args, [ + # In v4.4 Linux kernel, + # commit e622f2f4ad2142d2a613a57fb85f8cf737935ef5 + # split up struct ib_send_wr so that all non-trivial verbs + # use their own structure which embedds struct ib_send_wr. + LB_CHECK_COMPILE([if 'struct ib_rdma_wr' is defined], + ib_rdma_wr, [ #ifdef HAVE_COMPAT_RDMA #undef PACKAGE_NAME #undef PACKAGE_TARNAME @@ -415,17 +431,17 @@ AS_IF([test $ENABLEO2IB != "no"], [ #undef PACKAGE_URL #include #endif - #include + #include ],[ - rdma_create_id(NULL, NULL, NULL, 0, 0); + struct ib_rdma_wr *wr __attribute__ ((unused)); + + wr = rdma_wr(NULL); ],[ - AC_DEFINE(HAVE_RDMA_CREATE_ID_5ARG, 1, - [rdma_create_id wants 5 args]) + AC_DEFINE(HAVE_IB_RDMA_WR, 1, + [struct ib_rdma_wr is defined]) ]) -]) -# new fast registration API introduced in 4.4 -AS_IF([test $ENABLEO2IB != "no"], [ + # new fast registration API introduced in 4.4 LB_CHECK_COMPILE([if 'ib_map_mr_sg' exists], ib_map_mr_sg, [ #ifdef HAVE_COMPAT_RDMA @@ -444,7 +460,7 @@ AS_IF([test $ENABLEO2IB != "no"], [ AC_DEFINE(HAVE_IB_MAP_MR_SG, 1, [ib_map_mr_sg exists]) ]) -]) +]) # ENABLEO2IB != "no" ]) # LN_CONFIG_O2IB # diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index e4bb8b9..92254b8 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -1833,7 +1833,7 @@ again: struct ib_reg_wr *wr; int n; #else - struct ib_send_wr *wr; + struct ib_rdma_wr *wr; struct ib_fast_reg_page_list *frpl; #endif struct ib_mr *mr; @@ -1850,14 +1850,15 @@ again: mr = frd->frd_mr; if (!frd->frd_valid) { - struct ib_send_wr *inv_wr; + struct ib_rdma_wr *inv_wr; __u32 key = is_rx ? mr->rkey : mr->lkey; inv_wr = &frd->frd_inv_wr; memset(inv_wr, 0, sizeof(*inv_wr)); - inv_wr->opcode = IB_WR_LOCAL_INV; - inv_wr->wr_id = IBLND_WID_MR; - inv_wr->ex.invalidate_rkey = key; + + inv_wr->wr.opcode = IB_WR_LOCAL_INV; + inv_wr->wr.wr_id = IBLND_WID_MR; + inv_wr->wr.ex.invalidate_rkey = key; /* Bump the key */ key = ib_inc_rkey(key); @@ -1877,8 +1878,9 @@ again: wr = &frd->frd_fastreg_wr; memset(wr, 0, sizeof(*wr)); + wr->wr.opcode = IB_WR_REG_MR; - wr->wr.wr_id = IBLND_WID_MR; + wr->wr.wr_id = IBLND_WID_MR; wr->wr.num_sge = 0; wr->wr.send_flags = 0; wr->mr = mr; @@ -1898,16 +1900,18 @@ again: /* Prepare FastReg WR */ wr = &frd->frd_fastreg_wr; memset(wr, 0, sizeof(*wr)); - wr->opcode = IB_WR_FAST_REG_MR; - wr->wr_id = IBLND_WID_MR; - wr->wr.fast_reg.iova_start = iov; - wr->wr.fast_reg.page_list = frpl; - wr->wr.fast_reg.page_list_len = npages; - wr->wr.fast_reg.page_shift = PAGE_SHIFT; - wr->wr.fast_reg.length = nob; - wr->wr.fast_reg.rkey = is_rx ? mr->rkey - : mr->lkey; - wr->wr.fast_reg.access_flags = + + wr->wr.opcode = IB_WR_FAST_REG_MR; + wr->wr.wr_id = IBLND_WID_MR; + + wr->wr.wr.fast_reg.iova_start = iov; + wr->wr.wr.fast_reg.page_list = frpl; + wr->wr.wr.fast_reg.page_list_len = npages; + wr->wr.wr.fast_reg.page_shift = PAGE_SHIFT; + wr->wr.wr.fast_reg.length = nob; + wr->wr.wr.fast_reg.rkey = + is_rx ? mr->rkey : mr->lkey; + wr->wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE); #endif diff --git a/lnet/klnds/o2iblnd/o2iblnd.h b/lnet/klnds/o2iblnd/o2iblnd.h index 3e40cc7..5fd1ffe 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.h +++ b/lnet/klnds/o2iblnd/o2iblnd.h @@ -310,14 +310,20 @@ typedef struct cfs_time_t fps_next_retry; } kib_fmr_poolset_t; +#ifndef HAVE_IB_RDMA_WR +struct ib_rdma_wr { + struct ib_send_wr wr; +}; +#endif + struct kib_fast_reg_descriptor { /* For fast registration */ struct list_head frd_list; - struct ib_send_wr frd_inv_wr; + struct ib_rdma_wr frd_inv_wr; #ifdef HAVE_IB_MAP_MR_SG struct ib_reg_wr frd_fastreg_wr; #else - struct ib_send_wr frd_fastreg_wr; - struct ib_fast_reg_page_list *frd_frpl; + struct ib_rdma_wr frd_fastreg_wr; + struct ib_fast_reg_page_list *frd_frpl; #endif struct ib_mr *frd_mr; bool frd_valid; @@ -610,7 +616,7 @@ typedef struct kib_tx /* transmit message */ /* # send work items */ int tx_nwrq; /* send work items... */ - struct ib_send_wr *tx_wrq; + struct ib_rdma_wr *tx_wrq; /* ...and their memory */ struct ib_sge *tx_sge; /* rdma descriptor */ diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index 1917dc4..1dbd518 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -834,33 +834,26 @@ __must_hold(&conn->ibc_lock) rc = -ENETDOWN; } else { struct kib_fast_reg_descriptor *frd = tx->fmr.fmr_frd; - struct ib_send_wr *bad = &tx->tx_wrq[tx->tx_nwrq - 1]; - struct ib_send_wr *wrq = tx->tx_wrq; + struct ib_send_wr *bad = &tx->tx_wrq[tx->tx_nwrq - 1].wr; + struct ib_send_wr *wr = &tx->tx_wrq[0].wr; if (frd != NULL) { if (!frd->frd_valid) { - wrq = &frd->frd_inv_wr; -#ifdef HAVE_IB_MAP_MR_SG - wrq->next = &frd->frd_fastreg_wr.wr; + wr = &frd->frd_inv_wr.wr; + wr->next = &frd->frd_fastreg_wr.wr; } else { - wrq = &frd->frd_fastreg_wr.wr; + wr = &frd->frd_fastreg_wr.wr; } - frd->frd_fastreg_wr.wr.next = tx->tx_wrq; -#else - wrq->next = &frd->frd_fastreg_wr; - } else { - wrq = &frd->frd_fastreg_wr; - } - frd->frd_fastreg_wr.next = tx->tx_wrq; -#endif + frd->frd_fastreg_wr.wr.next = &tx->tx_wrq[0].wr; } LASSERTF(bad->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX), "bad wr_id "LPX64", opc %d, flags %d, peer: %s\n", bad->wr_id, bad->opcode, bad->send_flags, libcfs_nid2str(conn->ibc_peer->ibp_nid)); + bad = NULL; - rc = ib_post_send(conn->ibc_cmid->qp, wrq, &bad); + rc = ib_post_send(conn->ibc_cmid->qp, wr, &bad); } conn->ibc_last_send = jiffies; @@ -1030,11 +1023,11 @@ kiblnd_tx_complete (kib_tx_t *tx, int status) static void kiblnd_init_tx_msg (lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob) { - kib_hca_dev_t *hdev = tx->tx_pool->tpo_hdev; - struct ib_sge *sge = &tx->tx_sge[tx->tx_nwrq]; - struct ib_send_wr *wrq = &tx->tx_wrq[tx->tx_nwrq]; - int nob = offsetof (kib_msg_t, ibm_u) + body_nob; - struct ib_mr *mr = hdev->ibh_mrs; + kib_hca_dev_t *hdev = tx->tx_pool->tpo_hdev; + struct ib_sge *sge = &tx->tx_sge[tx->tx_nwrq]; + struct ib_rdma_wr *wrq; + int nob = offsetof(kib_msg_t, ibm_u) + body_nob; + struct ib_mr *mr = hdev->ibh_mrs; LASSERT(tx->tx_nwrq >= 0); LASSERT(tx->tx_nwrq < IBLND_MAX_RDMA_FRAGS + 1); @@ -1047,16 +1040,17 @@ kiblnd_init_tx_msg (lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob) sge->addr = tx->tx_msgaddr; sge->length = nob; - memset(wrq, 0, sizeof(*wrq)); + wrq = &tx->tx_wrq[tx->tx_nwrq]; + memset(wrq, 0, sizeof(*wrq)); - wrq->next = NULL; - wrq->wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_TX); - wrq->sg_list = sge; - wrq->num_sge = 1; - wrq->opcode = IB_WR_SEND; - wrq->send_flags = IB_SEND_SIGNALED; + wrq->wr.next = NULL; + wrq->wr.wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_TX); + wrq->wr.sg_list = sge; + wrq->wr.num_sge = 1; + wrq->wr.opcode = IB_WR_SEND; + wrq->wr.send_flags = IB_SEND_SIGNALED; - tx->tx_nwrq++; + tx->tx_nwrq++; } static int @@ -1066,7 +1060,7 @@ kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type, kib_msg_t *ibmsg = tx->tx_msg; kib_rdma_desc_t *srcrd = tx->tx_rd; struct ib_sge *sge = &tx->tx_sge[0]; - struct ib_send_wr *wrq = &tx->tx_wrq[0]; + struct ib_rdma_wr *wrq; int rc = resid; int srcidx; int dstidx; @@ -1113,15 +1107,20 @@ kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type, wrq = &tx->tx_wrq[tx->tx_nwrq]; - wrq->next = wrq + 1; - wrq->wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA); - wrq->sg_list = sge; - wrq->num_sge = 1; - wrq->opcode = IB_WR_RDMA_WRITE; - wrq->send_flags = 0; + wrq->wr.next = &(wrq + 1)->wr; + wrq->wr.wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA); + wrq->wr.sg_list = sge; + wrq->wr.num_sge = 1; + wrq->wr.opcode = IB_WR_RDMA_WRITE; + wrq->wr.send_flags = 0; - wrq->wr.rdma.remote_addr = kiblnd_rd_frag_addr(dstrd, dstidx); - wrq->wr.rdma.rkey = kiblnd_rd_frag_key(dstrd, dstidx); +#ifdef HAVE_IB_RDMA_WR + wrq->remote_addr = kiblnd_rd_frag_addr(dstrd, dstidx); + wrq->rkey = kiblnd_rd_frag_key(dstrd, dstidx); +#else + wrq->wr.wr.rdma.remote_addr = kiblnd_rd_frag_addr(dstrd, dstidx); + wrq->wr.wr.rdma.rkey = kiblnd_rd_frag_key(dstrd, dstidx); +#endif srcidx = kiblnd_rd_consume_frag(srcrd, srcidx, wrknob); dstidx = kiblnd_rd_consume_frag(dstrd, dstidx, wrknob); -- 1.8.3.1