Whamcloud - gitweb
RM-849 lbuild: add mlnx fix patches
authorWang Shilong <wshilong@ddn.com>
Wed, 20 Jun 2018 04:31:46 +0000 (12:31 +0800)
committerAndreas Dilger <adilger@whamcloud.com>
Sat, 6 Mar 2021 20:51:08 +0000 (20:51 +0000)
we might need apply some mlnx patches and maintain
them by ourselves before official release include them.

[Updates: now we might not need mlnx 4.3, but let's keep the ability...]

Change-Id: Id357d581a602153db65c6c00d6475d01d4761c04
Signed-off-by: Wang Shilong <wshilong@ddn.com>
Reviewed-on: https://review.whamcloud.com/41878
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Andreas Dilger <adilger@whamcloud.com>
contrib/lbuild/lbuild
contrib/patches/mlnx/4.3/0193-BACKPORT-mlx4-Use-order-1-for-QP-buffers.patch [new file with mode: 0644]
contrib/patches/mlnx/4.3/0194-BACKPORT-mlx5-use-vmap-on-coherent-memory.patch [new file with mode: 0644]

index ae01e42..f6b4fda 100755 (executable)
@@ -1266,9 +1266,22 @@ build_kernel_ib() {
 
     # but switch to building from the SPEC if we need to apply patches
     if ls ${TOPDIR}/lustre/contrib/patches/ofed/* >/dev/null; then
-        BUILD_TYPE="-bb"
+       BUILD_TYPE="-ba"
         rpm --define "_topdir ${TOPDIR}" -ivh $SOURCE
         SOURCE="${TOPDIR}/SPECS/${kib_prefix}.spec"
+       local mlnx_patch_dir=${TOPDIR}/lustre/contrib/patches/mlnx/${ofed_version%%-*}
+       if [ "$ofed_type" = "mlnx" ] && [ -d $mlnx_patch_dir ]; then
+               pushd ${TOPDIR}/SOURCES/
+               local mlnx_tar_file=$(ls mlnx-ofa_kernel*.tgz)
+               tar -xvf $mlnx_tar_file
+               cp $mlnx_patch_dir/*.patch ${mlnx_tar_file%.*}/backports ||
+                       fatal 1 "failed to copy mlnx backport patches"
+               rm -f $mlnx_tar_file
+               tar -czvf $mlnx_tar_file ${mlnx_tar_file%.*} ||
+                       fatal 1 "failed to retar mlnx package"
+               rm -f ${filename%%-*}
+               popd
+       fi
         local file ed_fragment1 ed_fragment2 n=1
         for file in $(ls ${TOPDIR}/lustre/contrib/patches/ofed/*.patch 2>/dev/null); do
             ed_fragment1="$ed_fragment1
diff --git a/contrib/patches/mlnx/4.3/0193-BACKPORT-mlx4-Use-order-1-for-QP-buffers.patch b/contrib/patches/mlnx/4.3/0193-BACKPORT-mlx4-Use-order-1-for-QP-buffers.patch
new file mode 100644 (file)
index 0000000..dd171ba
--- /dev/null
@@ -0,0 +1,57 @@
+diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
+index dc91a1c..b592bdb 100644
+--- a/drivers/infiniband/hw/mlx4/qp.c
++++ b/drivers/infiniband/hw/mlx4/qp.c
+@@ -994,7 +994,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
+ {
+       int qpn;
+       int err;
+-      struct ib_qp_cap backup_cap;
+       struct mlx4_ib_sqp *sqp = NULL;
+       struct mlx4_ib_qp *qp;
+       enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
+@@ -1201,9 +1200,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
+                               goto err;
+               }
+-              memcpy(&backup_cap, &init_attr->cap, sizeof(backup_cap));
+               err = set_kernel_sq_size(dev, &init_attr->cap,
+-                                       qp_type, qp, true);
++                                       qp_type, qp, false);
+               if (err)
+                       goto err;
+@@ -1230,28 +1228,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
+                       qp->bf.uar = &dev->priv_uar;
+               }
+-              if (mlx4_buf_alloc(dev->dev, qp->buf_size, qp->buf_size,
++              if (mlx4_buf_alloc(dev->dev, qp->buf_size,
+ #ifdef HAVE_MEMALLOC_NOIO_SAVE
+-                                 &qp->buf)) {
++                                 PAGE_SIZE * 2, &qp->buf)) {
+ #else
+-                                 &qp->buf, gfp)) {
++                                 PAGE_SIZE * 2, &qp->buf, gfp)) {
+ #endif
+-                      memcpy(&init_attr->cap, &backup_cap,
+-                             sizeof(backup_cap));
+-                      err = set_kernel_sq_size(dev, &init_attr->cap, qp_type,
+-                                               qp, false);
+-                      if (err)
+-                              goto err_db;
+-
+-                      if (mlx4_buf_alloc(dev->dev, qp->buf_size,
+-#ifdef HAVE_MEMALLOC_NOIO_SAVE
+-                                         PAGE_SIZE * 2, &qp->buf)) {
+-#else
+-                                         PAGE_SIZE * 2, &qp->buf, gfp)) {
+-#endif
+-                              err = -ENOMEM;
+-                              goto err_db;
+-                      }
++                      err = -ENOMEM;
++                      goto err_db;
+               }
+               err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
diff --git a/contrib/patches/mlnx/4.3/0194-BACKPORT-mlx5-use-vmap-on-coherent-memory.patch b/contrib/patches/mlnx/4.3/0194-BACKPORT-mlx5-use-vmap-on-coherent-memory.patch
new file mode 100644 (file)
index 0000000..1b040a7
--- /dev/null
@@ -0,0 +1,301 @@
+diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
+index dbef2c4..00a6e23 100644
+--- a/drivers/infiniband/hw/mlx5/cq.c
++++ b/drivers/infiniband/hw/mlx5/cq.c
+@@ -746,7 +746,8 @@ static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
+ {
+       int err;
+-      err = mlx5_buf_alloc(dev->mdev, nent * cqe_size, &buf->buf);
++      err = mlx5_buf_alloc(dev->mdev, nent * cqe_size,
++                           PAGE_SIZE * 2, &buf->buf);
+       if (err)
+               return err;
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index 19ad123..84df339 100644
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -1037,7 +1037,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
+       qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
+       base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
+-      err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf);
++      err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, PAGE_SIZE * 2, &qp->buf);
+       if (err) {
+               mlx5_ib_dbg(dev, "err %d\n", err);
+               return err;
+diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
+index 498efd8..7641661 100644
+--- a/drivers/infiniband/hw/mlx5/srq.c
++++ b/drivers/infiniband/hw/mlx5/srq.c
+@@ -177,7 +177,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
+               return err;
+       }
+-      if (mlx5_buf_alloc(dev->mdev, buf_size, &srq->buf)) {
++      if (mlx5_buf_alloc(dev->mdev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
+               mlx5_ib_dbg(dev, "buf alloc failed\n");
+               err = -ENOMEM;
+               goto err_db;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+index 83ec846..34d041a 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+@@ -49,7 +49,9 @@ struct mlx5_db_pgdir {
+ };
+ /* Handling for queue buffers -- we allocate a bunch of memory and
+- * register it in a memory region at HCA virtual address 0.
++ * register it in a memory region at HCA virtual address 0.  If the
++ * requested size is > max_direct, we split the allocation into
++ * multiple pages, so we don't require too much contiguous memory.
+  */
+ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev,
+@@ -78,39 +80,107 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev,
+       return cpu_handle;
+ }
+-int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
++int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, int max_direct,
+                       struct mlx5_buf *buf, int node)
+ {
+       dma_addr_t t;
++/* ARM arch does not allow vmap(virt_to_page(x)) operations.
++ * In this case, we must allocate 1 contiguous DMA buffer.
++ */
++#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
++      max_direct = size;
++#endif
+       buf->size = size;
+-      buf->npages       = 1;
+-      buf->page_shift   = (u8)get_order(size) + PAGE_SHIFT;
+-      buf->direct.buf   = mlx5_dma_zalloc_coherent_node(dev, size,
+-                                                        &t, node);
+-      if (!buf->direct.buf)
+-              return -ENOMEM;
+-
+-      buf->direct.map = t;
++      if (size <= max_direct) {
++              buf->nbufs        = 1;
++              buf->npages       = 1;
++              buf->page_shift   = (u8)get_order(size) + PAGE_SHIFT;
++              buf->direct.buf   = mlx5_dma_zalloc_coherent_node(dev, size,
++                                                                &t, node);
++              if (!buf->direct.buf)
++                      return -ENOMEM;
++
++              buf->direct.map = t;
++
++              while (t & ((1 << buf->page_shift) - 1)) {
++                      --buf->page_shift;
++                      buf->npages *= 2;
++              }
++      } else {
++              int i;
++
++              buf->direct.buf  = NULL;
++              buf->direct.map  = 0;
++              buf->nbufs       = (size + PAGE_SIZE - 1) / PAGE_SIZE;
++              buf->npages      = buf->nbufs;
++              buf->page_shift  = PAGE_SHIFT;
++              buf->page_list   = kcalloc(buf->nbufs, sizeof(*buf->page_list),
++                                         GFP_KERNEL);
++              if (!buf->page_list)
++                      return -ENOMEM;
++
++              for (i = 0; i < buf->nbufs; i++) {
++                      buf->page_list[i].buf =
++                              mlx5_dma_zalloc_coherent_node(dev, PAGE_SIZE,
++                                                            &t, node);
++                      if (!buf->page_list[i].buf)
++                              goto err_free;
++
++                      buf->page_list[i].map = t;
++              }
+-      while (t & ((1 << buf->page_shift) - 1)) {
+-              --buf->page_shift;
+-              buf->npages *= 2;
++              if (BITS_PER_LONG == 64) {
++                      struct page **pages;
++                      pages = kmalloc(sizeof(*pages) * (buf->nbufs + 1),
++                                      GFP_KERNEL);
++                      if (!pages)
++                              goto err_free;
++                      for (i = 0; i < buf->nbufs; i++)
++                              pages[i] = virt_to_page(buf->page_list[i].buf);
++                      pages[buf->nbufs] = pages[0];
++                      buf->direct.buf = vmap(pages, buf->nbufs + 1, VM_MAP,
++                                             PAGE_KERNEL);
++                      kfree(pages);
++                      if (!buf->direct.buf)
++                              goto err_free;
++              }
+       }
+       return 0;
++
++err_free:
++      mlx5_buf_free(dev, buf);
++
++      return -ENOMEM;
+ }
+-int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf)
++int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct,
++                 struct mlx5_buf *buf)
+ {
+-      return mlx5_buf_alloc_node(dev, size, buf, dev->priv.numa_node);
++      return mlx5_buf_alloc_node(dev, size, max_direct,
++                                 buf, dev->priv.numa_node);
+ }
+ EXPORT_SYMBOL_GPL(mlx5_buf_alloc);
+ void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf)
+ {
+-      dma_free_coherent(&dev->pdev->dev, buf->size, buf->direct.buf,
+-                        buf->direct.map);
++      int i;
++
++      if (buf->direct.map)
++              dma_free_coherent(&dev->pdev->dev, buf->size, buf->direct.buf,
++                                buf->direct.map);
++      else {
++              if (BITS_PER_LONG == 64)
++                      vunmap(buf->direct.buf);
++
++              for (i = 0; i < buf->nbufs; i++)
++                      if (buf->page_list[i].buf)
++                              dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
++                                                buf->page_list[i].buf,
++                                                buf->page_list[i].map);
++              kfree(buf->page_list);
++      }
+ }
+ EXPORT_SYMBOL_GPL(mlx5_buf_free);
+@@ -289,7 +359,10 @@ void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas)
+       int i;
+       for (i = 0; i < buf->npages; i++) {
+-              addr = buf->direct.map + (i << buf->page_shift);
++              if (buf->direct.map)
++                      addr = buf->direct.map + (i << buf->page_shift);
++              else
++                      addr = buf->page_list[i].map;
+               pas[i] = cpu_to_be64(addr);
+       }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+index 2ae47a1..6747586 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+@@ -683,7 +683,8 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
+       eq->type = type;
+       eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
+       eq->cons_index = 0;
+-      err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf);
++      err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, 2 * PAGE_SIZE,
++                           &eq->buf);
+       if (err)
+               return err;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+index 6e65f98..9bde26d 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+@@ -74,6 +74,7 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+                      void *wqc, struct mlx5_wq_cyc *wq,
+                      struct mlx5_wq_ctrl *wq_ctrl)
+ {
++      int max_direct = param->linear ? INT_MAX : 0;
+       int err;
+       wq->log_stride = MLX5_GET(wq, wqc, log_wq_stride);
+@@ -87,7 +88,8 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+       }
+       err = mlx5_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq),
+-                                &wq_ctrl->buf, param->buf_numa_node);
++                                max_direct, &wq_ctrl->buf,
++                                param->buf_numa_node);
+       if (err) {
+               mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
+               goto err_db_free;
+@@ -110,6 +112,7 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+                     void *qpc, struct mlx5_wq_qp *wq,
+                     struct mlx5_wq_ctrl *wq_ctrl)
+ {
++      int max_direct = param->linear ? INT_MAX : 0;
+       int err;
+       wq->rq.log_stride = MLX5_GET(qpc, qpc, log_rq_stride) + 4;
+@@ -125,7 +128,8 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+       }
+       err = mlx5_buf_alloc_node(mdev, mlx5_wq_qp_get_byte_size(wq),
+-                                &wq_ctrl->buf, param->buf_numa_node);
++                                max_direct, &wq_ctrl->buf,
++                                param->buf_numa_node);
+       if (err) {
+               mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
+               goto err_db_free;
+@@ -191,6 +195,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+                     struct mlx5_wq_ctrl *wq_ctrl)
+ {
+       struct mlx5_wqe_srq_next_seg *next_seg;
++      int max_direct = param->linear ? INT_MAX : 0;
+       int err;
+       int i;
+@@ -204,7 +209,8 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+       }
+       err = mlx5_buf_alloc_node(mdev, mlx5_wq_ll_get_byte_size(wq),
+-                                &wq_ctrl->buf, param->buf_numa_node);
++                                max_direct, &wq_ctrl->buf,
++                                param->buf_numa_node);
+       if (err) {
+               mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
+               goto err_db_free;
+diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
+index bbbb1d9..d22b784 100644
+--- a/include/linux/mlx5/driver.h
++++ b/include/linux/mlx5/driver.h
+@@ -395,6 +395,8 @@ struct mlx5_buf_list {
+ struct mlx5_buf {
+       struct mlx5_buf_list    direct;
++      struct mlx5_buf_list   *page_list;
++      int                     nbufs;
+       int                     npages;
+       int                     size;
+       u8                      page_shift;
+@@ -1132,7 +1134,11 @@ struct mlx5_hca_vport_context {
+ static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset)
+ {
++      if (likely(BITS_PER_LONG == 64 || buf->nbufs == 1))
+               return buf->direct.buf + offset;
++      else
++              return buf->page_list[offset >> PAGE_SHIFT].buf +
++                      (offset & (PAGE_SIZE - 1));
+ }
+ #define STRUCT_FIELD(header, field) \
+@@ -1204,9 +1210,10 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev);
+ void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
+ void mlx5_trigger_health_work(struct mlx5_core_dev *dev);
+ void mlx5_drain_health_recovery(struct mlx5_core_dev *dev);
+-int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
++int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, int max_direct,
+                       struct mlx5_buf *buf, int node);
+-int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf);
++int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct,
++                 struct mlx5_buf *buf);
+ void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf);
+ int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
+                            struct mlx5_frag_buf *buf, int node);