Whamcloud - gitweb
LU-15018 o2iblnd: treat cmid->device == NULL as an error 81/44981/4
authorSerguei Smirnov <ssmirnov@whamcloud.com>
Fri, 17 Sep 2021 21:06:26 +0000 (14:06 -0700)
committerOleg Drokin <green@whamcloud.com>
Thu, 6 Jan 2022 22:01:44 +0000 (22:01 +0000)
Even if rdma_bind_addr is successful, kiblnd_dev_failover should
treat cmid->device == NULL as an error in order to later avoid
calling kiblnd_set_ni_fatal_on with possibly dev->ibd_hdev == NULL.

Test-Parameters: trivial
Fixes: 4668283cd1 ("LU-14806 o2iblnd: clear fatal error on successful failover")
Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Change-Id: Iefbe030b25d2dc543461cf98afeacd734fd64cf8
Reviewed-on: https://review.whamcloud.com/44981
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/klnds/o2iblnd/o2iblnd.c

index d5c61ab..ae83cee 100644 (file)
@@ -2791,6 +2791,7 @@ kiblnd_dev_failover(struct kib_dev *dev, struct net *ns)
        unsigned long       flags;
        int                 rc = 0;
        int                 i;
        unsigned long       flags;
        int                 rc = 0;
        int                 i;
+       bool                set_fatal = true;
 
        LASSERT(*kiblnd_tunables.kib_dev_failover > 1 ||
                dev->ibd_can_failover ||
 
        LASSERT(*kiblnd_tunables.kib_dev_failover > 1 ||
                dev->ibd_can_failover ||
@@ -2836,6 +2837,8 @@ kiblnd_dev_failover(struct kib_dev *dev, struct net *ns)
                CERROR("Failed to bind %s:%pI4h to device(%p): %d\n",
                       dev->ibd_ifname, &dev->ibd_ifip,
                       cmid->device, rc);
                CERROR("Failed to bind %s:%pI4h to device(%p): %d\n",
                       dev->ibd_ifname, &dev->ibd_ifip,
                       cmid->device, rc);
+               if (!rc && !cmid->device)
+                       set_fatal = false;
                rdma_destroy_id(cmid);
                goto out;
        }
                rdma_destroy_id(cmid);
                goto out;
        }
@@ -2924,11 +2927,13 @@ kiblnd_dev_failover(struct kib_dev *dev, struct net *ns)
        } else {
                dev->ibd_failed_failover = 0;
 
        } else {
                dev->ibd_failed_failover = 0;
 
-               rcu_read_lock();
-               netdev = dev_get_by_name_rcu(ns, dev->ibd_ifname);
-               if (netdev && (kiblnd_get_link_status(netdev) == 1))
-                       kiblnd_set_ni_fatal_on(dev->ibd_hdev, 0);
-               rcu_read_unlock();
+               if (set_fatal) {
+                       rcu_read_lock();
+                       netdev = dev_get_by_name_rcu(ns, dev->ibd_ifname);
+                       if (netdev && (kiblnd_get_link_status(netdev) == 1))
+                               kiblnd_set_ni_fatal_on(dev->ibd_hdev, 0);
+                       rcu_read_unlock();
+               }
        }
 
        return rc;
        }
 
        return rc;