From e71f4475f4b6f462f47a480955fe7c81157912c2 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 20 Jun 2018 12:31:46 +0800 Subject: [PATCH] RM-849 lbuild: add mlnx fix patches we might need apply some mlnx patches and maintain them by ourselves before official release include them. [Updates: now we might not need mlnx 4.3, but let's keep the ability...] Change-Id: Id357d581a602153db65c6c00d6475d01d4761c04 Signed-off-by: Wang Shilong Reviewed-on: https://review.whamcloud.com/41878 Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Andreas Dilger --- contrib/lbuild/lbuild | 15 +- ...-BACKPORT-mlx4-Use-order-1-for-QP-buffers.patch | 57 ++++ ...BACKPORT-mlx5-use-vmap-on-coherent-memory.patch | 301 +++++++++++++++++++++ 3 files changed, 372 insertions(+), 1 deletion(-) create mode 100644 contrib/patches/mlnx/4.3/0193-BACKPORT-mlx4-Use-order-1-for-QP-buffers.patch create mode 100644 contrib/patches/mlnx/4.3/0194-BACKPORT-mlx5-use-vmap-on-coherent-memory.patch diff --git a/contrib/lbuild/lbuild b/contrib/lbuild/lbuild index ae01e42..f6b4fda 100755 --- a/contrib/lbuild/lbuild +++ b/contrib/lbuild/lbuild @@ -1266,9 +1266,22 @@ build_kernel_ib() { # but switch to building from the SPEC if we need to apply patches if ls ${TOPDIR}/lustre/contrib/patches/ofed/* >/dev/null; then - BUILD_TYPE="-bb" + BUILD_TYPE="-ba" rpm --define "_topdir ${TOPDIR}" -ivh $SOURCE SOURCE="${TOPDIR}/SPECS/${kib_prefix}.spec" + local mlnx_patch_dir=${TOPDIR}/lustre/contrib/patches/mlnx/${ofed_version%%-*} + if [ "$ofed_type" = "mlnx" ] && [ -d $mlnx_patch_dir ]; then + pushd ${TOPDIR}/SOURCES/ + local mlnx_tar_file=$(ls mlnx-ofa_kernel*.tgz) + tar -xvf $mlnx_tar_file + cp $mlnx_patch_dir/*.patch ${mlnx_tar_file%.*}/backports || + fatal 1 "failed to copy mlnx backport patches" + rm -f $mlnx_tar_file + tar -czvf $mlnx_tar_file ${mlnx_tar_file%.*} || + fatal 1 "failed to retar mlnx package" + rm -f ${filename%%-*} + popd + fi local file ed_fragment1 ed_fragment2 n=1 for file in $(ls ${TOPDIR}/lustre/contrib/patches/ofed/*.patch 2>/dev/null); do ed_fragment1="$ed_fragment1 diff --git a/contrib/patches/mlnx/4.3/0193-BACKPORT-mlx4-Use-order-1-for-QP-buffers.patch b/contrib/patches/mlnx/4.3/0193-BACKPORT-mlx4-Use-order-1-for-QP-buffers.patch new file mode 100644 index 0000000..dd171ba --- /dev/null +++ b/contrib/patches/mlnx/4.3/0193-BACKPORT-mlx4-Use-order-1-for-QP-buffers.patch @@ -0,0 +1,57 @@ +diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c +index dc91a1c..b592bdb 100644 +--- a/drivers/infiniband/hw/mlx4/qp.c ++++ b/drivers/infiniband/hw/mlx4/qp.c +@@ -994,7 +994,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, + { + int qpn; + int err; +- struct ib_qp_cap backup_cap; + struct mlx4_ib_sqp *sqp = NULL; + struct mlx4_ib_qp *qp; + enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; +@@ -1201,9 +1200,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, + goto err; + } + +- memcpy(&backup_cap, &init_attr->cap, sizeof(backup_cap)); + err = set_kernel_sq_size(dev, &init_attr->cap, +- qp_type, qp, true); ++ qp_type, qp, false); + if (err) + goto err; + +@@ -1230,28 +1228,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, + qp->bf.uar = &dev->priv_uar; + } + +- if (mlx4_buf_alloc(dev->dev, qp->buf_size, qp->buf_size, ++ if (mlx4_buf_alloc(dev->dev, qp->buf_size, + #ifdef HAVE_MEMALLOC_NOIO_SAVE +- &qp->buf)) { ++ PAGE_SIZE * 2, &qp->buf)) { + #else +- &qp->buf, gfp)) { ++ PAGE_SIZE * 2, &qp->buf, gfp)) { + #endif +- memcpy(&init_attr->cap, &backup_cap, +- sizeof(backup_cap)); +- err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, +- qp, false); +- if (err) +- goto err_db; +- +- if (mlx4_buf_alloc(dev->dev, qp->buf_size, +-#ifdef HAVE_MEMALLOC_NOIO_SAVE +- PAGE_SIZE * 2, &qp->buf)) { +-#else +- PAGE_SIZE * 2, &qp->buf, gfp)) { +-#endif +- err = -ENOMEM; +- goto err_db; +- } ++ err = -ENOMEM; ++ goto err_db; + } + + err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift, diff --git a/contrib/patches/mlnx/4.3/0194-BACKPORT-mlx5-use-vmap-on-coherent-memory.patch b/contrib/patches/mlnx/4.3/0194-BACKPORT-mlx5-use-vmap-on-coherent-memory.patch new file mode 100644 index 0000000..1b040a7 --- /dev/null +++ b/contrib/patches/mlnx/4.3/0194-BACKPORT-mlx5-use-vmap-on-coherent-memory.patch @@ -0,0 +1,301 @@ +diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c +index dbef2c4..00a6e23 100644 +--- a/drivers/infiniband/hw/mlx5/cq.c ++++ b/drivers/infiniband/hw/mlx5/cq.c +@@ -746,7 +746,8 @@ static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf, + { + int err; + +- err = mlx5_buf_alloc(dev->mdev, nent * cqe_size, &buf->buf); ++ err = mlx5_buf_alloc(dev->mdev, nent * cqe_size, ++ PAGE_SIZE * 2, &buf->buf); + if (err) + return err; + +diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c +index 19ad123..84df339 100644 +--- a/drivers/infiniband/hw/mlx5/qp.c ++++ b/drivers/infiniband/hw/mlx5/qp.c +@@ -1037,7 +1037,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, + qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; + base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift); + +- err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf); ++ err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, PAGE_SIZE * 2, &qp->buf); + if (err) { + mlx5_ib_dbg(dev, "err %d\n", err); + return err; +diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c +index 498efd8..7641661 100644 +--- a/drivers/infiniband/hw/mlx5/srq.c ++++ b/drivers/infiniband/hw/mlx5/srq.c +@@ -177,7 +177,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, + return err; + } + +- if (mlx5_buf_alloc(dev->mdev, buf_size, &srq->buf)) { ++ if (mlx5_buf_alloc(dev->mdev, buf_size, PAGE_SIZE * 2, &srq->buf)) { + mlx5_ib_dbg(dev, "buf alloc failed\n"); + err = -ENOMEM; + goto err_db; +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +index 83ec846..34d041a 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +@@ -49,7 +49,9 @@ struct mlx5_db_pgdir { + }; + + /* Handling for queue buffers -- we allocate a bunch of memory and +- * register it in a memory region at HCA virtual address 0. ++ * register it in a memory region at HCA virtual address 0. If the ++ * requested size is > max_direct, we split the allocation into ++ * multiple pages, so we don't require too much contiguous memory. + */ + + static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev, +@@ -78,39 +80,107 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev, + return cpu_handle; + } + +-int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, ++int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, int max_direct, + struct mlx5_buf *buf, int node) + { + dma_addr_t t; ++/* ARM arch does not allow vmap(virt_to_page(x)) operations. ++ * In this case, we must allocate 1 contiguous DMA buffer. ++ */ ++#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) ++ max_direct = size; ++#endif + + buf->size = size; +- buf->npages = 1; +- buf->page_shift = (u8)get_order(size) + PAGE_SHIFT; +- buf->direct.buf = mlx5_dma_zalloc_coherent_node(dev, size, +- &t, node); +- if (!buf->direct.buf) +- return -ENOMEM; +- +- buf->direct.map = t; ++ if (size <= max_direct) { ++ buf->nbufs = 1; ++ buf->npages = 1; ++ buf->page_shift = (u8)get_order(size) + PAGE_SHIFT; ++ buf->direct.buf = mlx5_dma_zalloc_coherent_node(dev, size, ++ &t, node); ++ if (!buf->direct.buf) ++ return -ENOMEM; ++ ++ buf->direct.map = t; ++ ++ while (t & ((1 << buf->page_shift) - 1)) { ++ --buf->page_shift; ++ buf->npages *= 2; ++ } ++ } else { ++ int i; ++ ++ buf->direct.buf = NULL; ++ buf->direct.map = 0; ++ buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE; ++ buf->npages = buf->nbufs; ++ buf->page_shift = PAGE_SHIFT; ++ buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), ++ GFP_KERNEL); ++ if (!buf->page_list) ++ return -ENOMEM; ++ ++ for (i = 0; i < buf->nbufs; i++) { ++ buf->page_list[i].buf = ++ mlx5_dma_zalloc_coherent_node(dev, PAGE_SIZE, ++ &t, node); ++ if (!buf->page_list[i].buf) ++ goto err_free; ++ ++ buf->page_list[i].map = t; ++ } + +- while (t & ((1 << buf->page_shift) - 1)) { +- --buf->page_shift; +- buf->npages *= 2; ++ if (BITS_PER_LONG == 64) { ++ struct page **pages; ++ pages = kmalloc(sizeof(*pages) * (buf->nbufs + 1), ++ GFP_KERNEL); ++ if (!pages) ++ goto err_free; ++ for (i = 0; i < buf->nbufs; i++) ++ pages[i] = virt_to_page(buf->page_list[i].buf); ++ pages[buf->nbufs] = pages[0]; ++ buf->direct.buf = vmap(pages, buf->nbufs + 1, VM_MAP, ++ PAGE_KERNEL); ++ kfree(pages); ++ if (!buf->direct.buf) ++ goto err_free; ++ } + } + + return 0; ++ ++err_free: ++ mlx5_buf_free(dev, buf); ++ ++ return -ENOMEM; + } + +-int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf) ++int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct, ++ struct mlx5_buf *buf) + { +- return mlx5_buf_alloc_node(dev, size, buf, dev->priv.numa_node); ++ return mlx5_buf_alloc_node(dev, size, max_direct, ++ buf, dev->priv.numa_node); + } + EXPORT_SYMBOL_GPL(mlx5_buf_alloc); + + void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf) + { +- dma_free_coherent(&dev->pdev->dev, buf->size, buf->direct.buf, +- buf->direct.map); ++ int i; ++ ++ if (buf->direct.map) ++ dma_free_coherent(&dev->pdev->dev, buf->size, buf->direct.buf, ++ buf->direct.map); ++ else { ++ if (BITS_PER_LONG == 64) ++ vunmap(buf->direct.buf); ++ ++ for (i = 0; i < buf->nbufs; i++) ++ if (buf->page_list[i].buf) ++ dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, ++ buf->page_list[i].buf, ++ buf->page_list[i].map); ++ kfree(buf->page_list); ++ } + } + EXPORT_SYMBOL_GPL(mlx5_buf_free); + +@@ -289,7 +359,10 @@ void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas) + int i; + + for (i = 0; i < buf->npages; i++) { +- addr = buf->direct.map + (i << buf->page_shift); ++ if (buf->direct.map) ++ addr = buf->direct.map + (i << buf->page_shift); ++ else ++ addr = buf->page_list[i].map; + + pas[i] = cpu_to_be64(addr); + } +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c +index 2ae47a1..6747586 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c +@@ -683,7 +683,8 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, + eq->type = type; + eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE); + eq->cons_index = 0; +- err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf); ++ err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, 2 * PAGE_SIZE, ++ &eq->buf); + if (err) + return err; + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c +index 6e65f98..9bde26d 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c +@@ -74,6 +74,7 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *wqc, struct mlx5_wq_cyc *wq, + struct mlx5_wq_ctrl *wq_ctrl) + { ++ int max_direct = param->linear ? INT_MAX : 0; + int err; + + wq->log_stride = MLX5_GET(wq, wqc, log_wq_stride); +@@ -87,7 +88,8 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + } + + err = mlx5_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq), +- &wq_ctrl->buf, param->buf_numa_node); ++ max_direct, &wq_ctrl->buf, ++ param->buf_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err); + goto err_db_free; +@@ -110,6 +112,7 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *qpc, struct mlx5_wq_qp *wq, + struct mlx5_wq_ctrl *wq_ctrl) + { ++ int max_direct = param->linear ? INT_MAX : 0; + int err; + + wq->rq.log_stride = MLX5_GET(qpc, qpc, log_rq_stride) + 4; +@@ -125,7 +128,8 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + } + + err = mlx5_buf_alloc_node(mdev, mlx5_wq_qp_get_byte_size(wq), +- &wq_ctrl->buf, param->buf_numa_node); ++ max_direct, &wq_ctrl->buf, ++ param->buf_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err); + goto err_db_free; +@@ -191,6 +195,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + struct mlx5_wq_ctrl *wq_ctrl) + { + struct mlx5_wqe_srq_next_seg *next_seg; ++ int max_direct = param->linear ? INT_MAX : 0; + int err; + int i; + +@@ -204,7 +209,8 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + } + + err = mlx5_buf_alloc_node(mdev, mlx5_wq_ll_get_byte_size(wq), +- &wq_ctrl->buf, param->buf_numa_node); ++ max_direct, &wq_ctrl->buf, ++ param->buf_numa_node); + if (err) { + mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err); + goto err_db_free; +diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h +index bbbb1d9..d22b784 100644 +--- a/include/linux/mlx5/driver.h ++++ b/include/linux/mlx5/driver.h +@@ -395,6 +395,8 @@ struct mlx5_buf_list { + + struct mlx5_buf { + struct mlx5_buf_list direct; ++ struct mlx5_buf_list *page_list; ++ int nbufs; + int npages; + int size; + u8 page_shift; +@@ -1132,7 +1134,11 @@ struct mlx5_hca_vport_context { + + static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset) + { ++ if (likely(BITS_PER_LONG == 64 || buf->nbufs == 1)) + return buf->direct.buf + offset; ++ else ++ return buf->page_list[offset >> PAGE_SHIFT].buf + ++ (offset & (PAGE_SIZE - 1)); + } + + #define STRUCT_FIELD(header, field) \ +@@ -1204,9 +1210,10 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev); + void mlx5_drain_health_wq(struct mlx5_core_dev *dev); + void mlx5_trigger_health_work(struct mlx5_core_dev *dev); + void mlx5_drain_health_recovery(struct mlx5_core_dev *dev); +-int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, ++int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, int max_direct, + struct mlx5_buf *buf, int node); +-int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf); ++int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct, ++ struct mlx5_buf *buf); + void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf); + int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, + struct mlx5_frag_buf *buf, int node); -- 1.8.3.1