Whamcloud - gitweb
LU-10015 o2iblnd: fix race at kiblnd_connect_peer 34/29134/6
authorAlexander Boyko <alexander.boyko@seagate.com>
Thu, 21 Sep 2017 13:13:27 +0000 (16:13 +0300)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 1 Nov 2017 04:57:18 +0000 (04:57 +0000)
cmid will be destroyed at OFED if kiblnd_cm_callback return error.
if error happen before the end of kiblnd_connect_peer, it will touch
destroyed cmid and fail as
(o2iblnd_cb.c:1315:kiblnd_connect_peer())
            ASSERTION( cmid->device != ((void *)0) ) failed:

Seagate-bug-id: MRP-4592
Signed-off-by: Alexander Boyko <alexander.boyko@seagate.com>
Change-Id: I83eb5bceeb567acef0316498b936d25d6c6ccd95
Reviewed-on: https://review.whamcloud.com/29134
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Alexey Lyashkov <c17817@cray.com>
Reviewed-by: Doug Oucharek <dougso@me.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lnet/klnds/o2iblnd/o2iblnd_cb.c

index 9699fa7..35502b6 100644 (file)
@@ -1296,11 +1296,6 @@ kiblnd_connect_peer (kib_peer_ni_t *peer_ni)
                 goto failed2;
         }
 
                 goto failed2;
         }
 
-        LASSERT (cmid->device != NULL);
-       CDEBUG(D_NET, "%s: connection bound to %s:%pI4h:%s\n",
-               libcfs_nid2str(peer_ni->ibp_nid), dev->ibd_ifname,
-              &dev->ibd_ifip, cmid->device->name);
-
        return;
 
  failed2:
        return;
 
  failed2:
@@ -2997,8 +2992,19 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
                 } else {
                         rc = rdma_resolve_route(
                                 cmid, *kiblnd_tunables.kib_timeout * 1000);
                 } else {
                         rc = rdma_resolve_route(
                                 cmid, *kiblnd_tunables.kib_timeout * 1000);
-                        if (rc == 0)
-                                return 0;
+                       if (rc == 0) {
+                               kib_net_t *net = peer_ni->ibp_ni->ni_data;
+                               kib_dev_t *dev = net->ibn_dev;
+
+                               CDEBUG(D_NET, "%s: connection bound to "\
+                                      "%s:%pI4h:%s\n",
+                                      libcfs_nid2str(peer_ni->ibp_nid),
+                                      dev->ibd_ifname,
+                                      &dev->ibd_ifip, cmid->device->name);
+
+                               return 0;
+                       }
+
                         /* Can't initiate route resolution */
                         CERROR("Can't resolve route for %s: %d\n",
                                libcfs_nid2str(peer_ni->ibp_nid), rc);
                         /* Can't initiate route resolution */
                         CERROR("Can't resolve route for %s: %d\n",
                                libcfs_nid2str(peer_ni->ibp_nid), rc);