From 36054d9942fcf7d980398d2062b5214c4417ac3c Mon Sep 17 00:00:00 2001 From: Liang Zhen Date: Wed, 29 Aug 2012 21:24:26 +0800 Subject: [PATCH] LU-1799 o2iblnd: debug patch for o2iblnd IBM reported kernel panic on their BGQ IO node when loading the ptlrpc module with an o2ib network. The IB interface had an IPv4 and IPv6 address. Removing the IPv6 address avoided the crash. I suspect rdma_bind_addr can't associate any RDMA device in this case, this patch will check if there's attached IB device on cmid even returned value is ZERO, it will also output more information. Test-Parameters: nettypes=o2ib Signed-off-by: Liang Zhen Change-Id: Id44110fcf56b199b1504ab4e6b0157d87bc2d270 Reviewed-on: http://review.whamcloud.com/3815 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Isaac Huang Reviewed-by: Doug Oucharek Reviewed-by: Oleg Drokin --- lnet/klnds/o2iblnd/o2iblnd.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index a3cd78a..35a6132 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -2544,14 +2544,14 @@ kiblnd_dev_need_failover(kib_dev_t *dev) dstaddr.sin_family = AF_INET; rc = rdma_resolve_addr(cmid, (struct sockaddr *)&srcaddr, (struct sockaddr *)&dstaddr, 1); - if (rc != 0) { - CERROR("Failed to bind %s to device: %d\n", - dev->ibd_ifname, rc); + if (rc != 0 || cmid->device == NULL) { + CERROR("Failed to bind %s:%u.%u.%u.%u to device(%p): %d\n", + dev->ibd_ifname, HIPQUAD(dev->ibd_ifip), + cmid->device, rc); rdma_destroy_id(cmid); return rc; } - LASSERT (cmid->device != NULL); if (dev->ibd_hdev->ibh_ibdev == cmid->device) { /* don't need device failover */ rdma_destroy_id(cmid); @@ -2617,9 +2617,10 @@ kiblnd_dev_failover(kib_dev_t *dev) /* Bind to failover device or port */ rc = rdma_bind_addr(cmid, (struct sockaddr *)&addr); - if (rc != 0) { - CERROR("Failed to bind %s to device: %d\n", - dev->ibd_ifname, rc); + if (rc != 0 || cmid->device == NULL) { + CERROR("Failed to bind %s:%u.%u.%u.%u to device(%p): %d\n", + dev->ibd_ifname, HIPQUAD(dev->ibd_ifip), + cmid->device, rc); rdma_destroy_id(cmid); goto out; } -- 1.8.3.1