From abd0ce62e96523193bfc2e2a3f574bc59d6c9f7c Mon Sep 17 00:00:00 2001 From: Serguei Smirnov Date: Fri, 17 Sep 2021 14:06:26 -0700 Subject: [PATCH] LU-15018 o2iblnd: treat cmid->device == NULL as an error Even if rdma_bind_addr is successful, kiblnd_dev_failover should treat cmid->device == NULL as an error in order to later avoid calling kiblnd_set_ni_fatal_on with possibly dev->ibd_hdev == NULL. Test-Parameters: trivial Fixes: 4668283cd1 ("LU-14806 o2iblnd: clear fatal error on successful failover") Signed-off-by: Serguei Smirnov Change-Id: Iefbe030b25d2dc543461cf98afeacd734fd64cf8 Reviewed-on: https://review.whamcloud.com/44981 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Chris Horn Reviewed-by: Cyril Bordage Reviewed-by: Oleg Drokin --- lnet/klnds/o2iblnd/o2iblnd.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index d5c61ab..ae83cee 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -2791,6 +2791,7 @@ kiblnd_dev_failover(struct kib_dev *dev, struct net *ns) unsigned long flags; int rc = 0; int i; + bool set_fatal = true; LASSERT(*kiblnd_tunables.kib_dev_failover > 1 || dev->ibd_can_failover || @@ -2836,6 +2837,8 @@ kiblnd_dev_failover(struct kib_dev *dev, struct net *ns) CERROR("Failed to bind %s:%pI4h to device(%p): %d\n", dev->ibd_ifname, &dev->ibd_ifip, cmid->device, rc); + if (!rc && !cmid->device) + set_fatal = false; rdma_destroy_id(cmid); goto out; } @@ -2924,11 +2927,13 @@ kiblnd_dev_failover(struct kib_dev *dev, struct net *ns) } else { dev->ibd_failed_failover = 0; - rcu_read_lock(); - netdev = dev_get_by_name_rcu(ns, dev->ibd_ifname); - if (netdev && (kiblnd_get_link_status(netdev) == 1)) - kiblnd_set_ni_fatal_on(dev->ibd_hdev, 0); - rcu_read_unlock(); + if (set_fatal) { + rcu_read_lock(); + netdev = dev_get_by_name_rcu(ns, dev->ibd_ifname); + if (netdev && (kiblnd_get_link_status(netdev) == 1)) + kiblnd_set_ni_fatal_on(dev->ibd_hdev, 0); + rcu_read_unlock(); + } } return rc; -- 1.8.3.1