From: Amir Shehata Date: Tue, 28 Nov 2017 00:13:51 +0000 (-0800) Subject: LU-10129 lnd: set device capabilities X-Git-Tag: 2.10.57~120 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=refs%2Fchanges%2F09%2F30309%2F4;p=fs%2Flustre-release.git LU-10129 lnd: set device capabilities MLX-4, MLX-5 and OPA support different capabilities. Query the device and cache the capabilities of the device for future use. MLX5 can support fast registration and gaps MLX4 and OPA only support FMR Test-Parameters: trivial Signed-off-by: Amir Shehata Change-Id: I70d468f8af52d263139e7b51341bf4b5150b89c1 Reviewed-on: https://review.whamcloud.com/30309 Reviewed-by: Alexey Lyashkov Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Dmitry Eremin Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index 745b4c2..e4eb1e8 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -1539,16 +1539,19 @@ static int kiblnd_alloc_fmr_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo) else CERROR("FMRs are not supported\n"); } + fpo->fpo_is_fmr = true; return rc; } static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo, - struct ib_device_attr *dev_attr) + __u32 dev_caps) { struct kib_fast_reg_descriptor *frd, *tmp; int i, rc; + fpo->fpo_is_fmr = false; + INIT_LIST_HEAD(&fpo->fast_reg.fpo_pool_list); fpo->fast_reg.fpo_pool_size = 0; for (i = 0; i < fps->fps_pool_size; i++) { @@ -1580,17 +1583,15 @@ static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo, /* * it is expected to get here if this is an MLX-5 card. * MLX-4 cards will always use FMR and MLX-5 cards will - * always use fast_reg. MLX-5 cards should support - * IB_DEVICE_SG_GAPS_REG. If for whatever reason, that's - * not the case, we can't handle communication with - * cards lacking that support. + * always use fast_reg. It turns out that some MLX-5 cards + * (possibly due to older FW versions) do not natively support + * gaps. So we will need to track them here. */ - if (!(dev_attr->device_cap_flags & IB_DEVICE_SG_GAPS_REG)) { - rc = -EPROTONOSUPPORT; - goto out_middle; - } frd->frd_mr = ib_alloc_mr(fpo->fpo_hdev->ibh_pd, - IB_MR_TYPE_SG_GAPS, + (dev_caps & + IBLND_DEV_CAPS_FASTREG_GAPS_SUPPORT) ? + IB_MR_TYPE_SG_GAPS : + IB_MR_TYPE_MEM_REG, LNET_MAX_PAYLOAD/PAGE_SIZE); #endif if (IS_ERR(frd->frd_mr)) { @@ -1637,62 +1638,25 @@ out: static int kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t **pp_fpo) { - struct ib_device_attr *dev_attr; kib_dev_t *dev = fps->fps_net->ibn_dev; kib_fmr_pool_t *fpo; int rc; -#ifndef HAVE_IB_DEVICE_ATTRS - dev_attr = kmalloc(sizeof(*dev_attr), GFP_KERNEL); - if (!dev_attr) - return -ENOMEM; -#endif - LIBCFS_CPT_ALLOC(fpo, lnet_cpt_table(), fps->fps_cpt, sizeof(*fpo)); if (!fpo) { - rc = -ENOMEM; - goto out_dev_attr; + return -ENOMEM; } + memset(fpo, 0, sizeof(*fpo)); fpo->fpo_hdev = kiblnd_current_hdev(dev); -#ifdef HAVE_IB_DEVICE_ATTRS - dev_attr = &fpo->fpo_hdev->ibh_ibdev->attrs; -#else - rc = ib_query_device(fpo->fpo_hdev->ibh_ibdev, dev_attr); - if (rc) { - CERROR("Query device failed for %s: %d\n", - fpo->fpo_hdev->ibh_ibdev->name, rc); - goto out_dev_attr; - } -#endif - - /* Check for FMR or FastReg support */ - fpo->fpo_is_fmr = 0; - if (fpo->fpo_hdev->ibh_ibdev->alloc_fmr && - fpo->fpo_hdev->ibh_ibdev->dealloc_fmr && - fpo->fpo_hdev->ibh_ibdev->map_phys_fmr && - fpo->fpo_hdev->ibh_ibdev->unmap_fmr) { - LCONSOLE_INFO("Using FMR for registration\n"); - fpo->fpo_is_fmr = 1; - } else if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { - LCONSOLE_INFO("Using FastReg for registration\n"); - } else { - rc = -ENOSYS; - LCONSOLE_ERROR_MSG(rc, "IB device does not support FMRs nor FastRegs, can't register memory\n"); - goto out_dev_attr; - } - - if (fpo->fpo_is_fmr) + if (dev->ibd_dev_caps & IBLND_DEV_CAPS_FMR_ENABLED) rc = kiblnd_alloc_fmr_pool(fps, fpo); else - rc = kiblnd_alloc_freg_pool(fps, fpo, dev_attr); + rc = kiblnd_alloc_freg_pool(fps, fpo, dev->ibd_dev_caps); if (rc) goto out_fpo; -#ifndef HAVE_IB_DEVICE_ATTRS - kfree(dev_attr); -#endif fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE); fpo->fpo_owner = fps; *pp_fpo = fpo; @@ -1702,12 +1666,6 @@ kiblnd_create_fmr_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t **pp_fpo) out_fpo: kiblnd_hdev_decref(fpo->fpo_hdev); LIBCFS_FREE(fpo, sizeof(*fpo)); - -out_dev_attr: -#ifndef HAVE_IB_DEVICE_ATTRS - kfree(dev_attr); -#endif - return rc; } @@ -2554,45 +2512,67 @@ kiblnd_net_init_pools(kib_net_t *net, struct lnet_ni *ni, __u32 *cpts, static int kiblnd_hdev_get_attr(kib_hca_dev_t *hdev) { -#ifndef HAVE_IB_DEVICE_ATTRS - struct ib_device_attr *attr; - int rc; -#endif + struct ib_device_attr *dev_attr; + int rc = 0; - /* It's safe to assume a HCA can handle a page size - * matching that of the native system */ - hdev->ibh_page_shift = PAGE_SHIFT; - hdev->ibh_page_size = 1 << PAGE_SHIFT; - hdev->ibh_page_mask = ~((__u64)hdev->ibh_page_size - 1); + /* It's safe to assume a HCA can handle a page size + * matching that of the native system */ + hdev->ibh_page_shift = PAGE_SHIFT; + hdev->ibh_page_size = 1 << PAGE_SHIFT; + hdev->ibh_page_mask = ~((__u64)hdev->ibh_page_size - 1); -#ifdef HAVE_IB_DEVICE_ATTRS - hdev->ibh_mr_size = hdev->ibh_ibdev->attrs.max_mr_size; -#else - LIBCFS_ALLOC(attr, sizeof(*attr)); - if (attr == NULL) { - CERROR("Out of memory\n"); - return -ENOMEM; - } +#ifndef HAVE_IB_DEVICE_ATTRS + LIBCFS_ALLOC(dev_attr, sizeof(*dev_attr)); + if (dev_attr == NULL) { + CERROR("Out of memory\n"); + return -ENOMEM; + } - rc = ib_query_device(hdev->ibh_ibdev, attr); - if (rc == 0) - hdev->ibh_mr_size = attr->max_mr_size; + rc = ib_query_device(hdev->ibh_ibdev, dev_attr); + if (rc != 0) { + CERROR("Failed to query IB device: %d\n", rc); + goto out_clean_attr; + } +#else + dev_attr = &hdev->ibh_ibdev->attrs; +#endif - LIBCFS_FREE(attr, sizeof(*attr)); + hdev->ibh_mr_size = dev_attr->max_mr_size; - if (rc != 0) { - CERROR("Failed to query IB device: %d\n", rc); - return rc; - } + /* Setup device Memory Registration capabilities */ + if (hdev->ibh_ibdev->alloc_fmr && + hdev->ibh_ibdev->dealloc_fmr && + hdev->ibh_ibdev->map_phys_fmr && + hdev->ibh_ibdev->unmap_fmr) { + LCONSOLE_INFO("Using FMR for registration\n"); + hdev->ibh_dev->ibd_dev_caps |= IBLND_DEV_CAPS_FMR_ENABLED; + } else if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { + LCONSOLE_INFO("Using FastReg for registration\n"); + hdev->ibh_dev->ibd_dev_caps |= IBLND_DEV_CAPS_FASTREG_ENABLED; +#ifndef HAVE_IB_ALLOC_FAST_REG_MR + if (dev_attr->device_cap_flags & IB_DEVICE_SG_GAPS_REG) + hdev->ibh_dev->ibd_dev_caps |= IBLND_DEV_CAPS_FASTREG_GAPS_SUPPORT; #endif + } else { + rc = -ENOSYS; + } - if (hdev->ibh_mr_size == ~0ULL) { - hdev->ibh_mr_shift = 64; - return 0; - } + if (rc == 0 && hdev->ibh_mr_size == ~0ULL) + hdev->ibh_mr_shift = 64; + else if (rc != 0) + rc = -EINVAL; - CERROR("Invalid mr size: %#llx\n", hdev->ibh_mr_size); - return -EINVAL; +#ifndef HAVE_IB_DEVICE_ATTRS +out_clean_attr: + LIBCFS_FREE(dev_attr, sizeof(*dev_attr)); +#endif + + if (rc == -ENOSYS) + CERROR("IB device does not support FMRs nor FastRegs, can't " + "register memory: %d\n", rc); + else if (rc == -EINVAL) + CERROR("Invalid mr size: %#llx\n", hdev->ibh_mr_size); + return rc; } #ifdef HAVE_IB_GET_DMA_MR @@ -2629,14 +2609,9 @@ static int kiblnd_hdev_setup_mrs(kib_hca_dev_t *hdev) { struct ib_mr *mr; - int rc; int acflags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE; - rc = kiblnd_hdev_get_attr(hdev); - if (rc != 0) - return rc; - mr = ib_get_dma_mr(hdev->ibh_pd, acflags); if (IS_ERR(mr)) { CERROR("Failed ib_get_dma_mr: %ld\n", PTR_ERR(mr)); @@ -2804,16 +2779,16 @@ kiblnd_dev_failover(kib_dev_t *dev) goto out; } -#ifdef HAVE_IB_GET_DMA_MR - rc = kiblnd_hdev_setup_mrs(hdev); + rc = kiblnd_hdev_get_attr(hdev); if (rc != 0) { - CERROR("Can't setup device: %d\n", rc); + CERROR("Can't get device attributes: %d\n", rc); goto out; } -#else - rc = kiblnd_hdev_get_attr(hdev); + +#ifdef HAVE_IB_GET_DMA_MR + rc = kiblnd_hdev_setup_mrs(hdev); if (rc != 0) { - CERROR("Can't get device attributes: %d\n", rc); + CERROR("Can't setup device: %d\n", rc); goto out; } #endif diff --git a/lnet/klnds/o2iblnd/o2iblnd.h b/lnet/klnds/o2iblnd/o2iblnd.h index 247b62b..9b6e5db 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.h +++ b/lnet/klnds/o2iblnd/o2iblnd.h @@ -89,6 +89,10 @@ #define IBLND_N_SCHED 2 #define IBLND_N_SCHED_HIGH 4 +#define IBLND_DEV_CAPS_FASTREG_ENABLED 0x1 +#define IBLND_DEV_CAPS_FASTREG_GAPS_SUPPORT 0x2 +#define IBLND_DEV_CAPS_FMR_ENABLED 0x4 + typedef struct { int *kib_dev_failover; /* HCA failover */ @@ -192,6 +196,7 @@ typedef struct unsigned int ibd_can_failover; struct list_head ibd_nets; struct kib_hca_dev *ibd_hdev; + __u32 ibd_dev_caps; } kib_dev_t; typedef struct kib_hca_dev