Whamcloud - gitweb
LU-12678 o2iblnd: Use list_for_each_entry_safe
[fs/lustre-release.git] / lnet / klnds / o2iblnd / o2iblnd.c
index dd8b999..e846e9d 100644 (file)
@@ -39,7 +39,7 @@
 
 #include "o2iblnd.h"
 
-static struct lnet_lnd the_o2iblnd;
+static const struct lnet_lnd the_o2iblnd;
 
 struct kib_data kiblnd_data;
 
@@ -372,7 +372,8 @@ kiblnd_destroy_peer(struct kib_peer_ni *peer_ni)
         * they are destroyed, so we can be assured that _all_ state to do
         * with this peer_ni has been cleaned up when its refcount drops to
         * zero. */
-       atomic_dec(&net->ibn_npeers);
+       if (atomic_dec_and_test(&net->ibn_npeers))
+               wake_up_var(&net->ibn_npeers);
 }
 
 struct kib_peer_ni *
@@ -459,18 +460,15 @@ kiblnd_get_peer_info(struct lnet_ni *ni, int index,
 static void
 kiblnd_del_peer_locked(struct kib_peer_ni *peer_ni)
 {
-       struct list_head *ctmp;
-       struct list_head *cnxt;
+       struct kib_conn *cnxt;
        struct kib_conn *conn;
 
        if (list_empty(&peer_ni->ibp_conns)) {
                kiblnd_unlink_peer_locked(peer_ni);
        } else {
-               list_for_each_safe(ctmp, cnxt, &peer_ni->ibp_conns) {
-                       conn = list_entry(ctmp, struct kib_conn, ibc_list);
-
+               list_for_each_entry_safe(conn, cnxt, &peer_ni->ibp_conns,
+                                        ibc_list)
                        kiblnd_close_conn_locked(conn, 0);
-               }
                /* NB closing peer_ni's last conn unlinked it. */
        }
        /* NB peer_ni now unlinked; might even be freed if the peer_ni table had the
@@ -635,40 +633,16 @@ kiblnd_debug_conn(struct kib_conn *conn)
        spin_unlock(&conn->ibc_lock);
 }
 
-int
-kiblnd_translate_mtu(int value)
-{
-        switch (value) {
-        default:
-                return -1;
-        case 0:
-                return 0;
-        case 256:
-                return IB_MTU_256;
-        case 512:
-                return IB_MTU_512;
-        case 1024:
-                return IB_MTU_1024;
-        case 2048:
-                return IB_MTU_2048;
-        case 4096:
-                return IB_MTU_4096;
-        }
-}
-
 static void
 kiblnd_setup_mtu_locked(struct rdma_cm_id *cmid)
 {
-        int           mtu;
-
         /* XXX There is no path record for iWARP, set by netdev->change_mtu? */
         if (cmid->route.path_rec == NULL)
                 return;
 
-        mtu = kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu);
-        LASSERT (mtu >= 0);
-        if (mtu != 0)
-                cmid->route.path_rec->mtu = mtu;
+       if (*kiblnd_tunables.kib_ib_mtu)
+               cmid->route.path_rec->mtu =
+                       ib_mtu_int_to_enum(*kiblnd_tunables.kib_ib_mtu);
 }
 
 static int
@@ -772,7 +746,7 @@ kiblnd_create_conn(struct kib_peer_ni *peer_ni, struct rdma_cm_id *cmid,
        rwlock_t               *glock = &kiblnd_data.kib_global_lock;
        struct kib_net              *net = peer_ni->ibp_ni->ni_data;
        struct kib_dev *dev;
-       struct ib_qp_init_attr *init_qp_attr;
+       struct ib_qp_init_attr init_qp_attr = {};
        struct kib_sched_info   *sched;
 #ifdef HAVE_IB_CQ_INIT_ATTR
        struct ib_cq_init_attr  cq_attr = {};
@@ -803,19 +777,11 @@ kiblnd_create_conn(struct kib_peer_ni *peer_ni, struct rdma_cm_id *cmid,
         */
        cpt = sched->ibs_cpt;
 
-       LIBCFS_CPT_ALLOC(init_qp_attr, lnet_cpt_table(), cpt,
-                        sizeof(*init_qp_attr));
-       if (init_qp_attr == NULL) {
-               CERROR("Can't allocate qp_attr for %s\n",
-                      libcfs_nid2str(peer_ni->ibp_nid));
-               goto failed_0;
-       }
-
        LIBCFS_CPT_ALLOC(conn, lnet_cpt_table(), cpt, sizeof(*conn));
        if (conn == NULL) {
                CERROR("Can't allocate connection for %s\n",
                       libcfs_nid2str(peer_ni->ibp_nid));
-               goto failed_1;
+               goto failed_0;
        }
 
        conn->ibc_state = IBLND_CONN_INIT;
@@ -904,29 +870,29 @@ kiblnd_create_conn(struct kib_peer_ni *peer_ni, struct rdma_cm_id *cmid,
                goto failed_2;
        }
 
-       init_qp_attr->event_handler = kiblnd_qp_event;
-       init_qp_attr->qp_context = conn;
-       init_qp_attr->cap.max_send_sge = *kiblnd_tunables.kib_wrq_sge;
-       init_qp_attr->cap.max_recv_sge = 1;
-       init_qp_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
-       init_qp_attr->qp_type = IB_QPT_RC;
-       init_qp_attr->send_cq = cq;
-       init_qp_attr->recv_cq = cq;
+       init_qp_attr.event_handler = kiblnd_qp_event;
+       init_qp_attr.qp_context = conn;
+       init_qp_attr.cap.max_send_sge = *kiblnd_tunables.kib_wrq_sge;
+       init_qp_attr.cap.max_recv_sge = 1;
+       init_qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
+       init_qp_attr.qp_type = IB_QPT_RC;
+       init_qp_attr.send_cq = cq;
+       init_qp_attr.recv_cq = cq;
        /*
         * kiblnd_send_wrs() can change the connection's queue depth if
         * the maximum work requests for the device is maxed out
         */
-       init_qp_attr->cap.max_send_wr = kiblnd_send_wrs(conn);
-       init_qp_attr->cap.max_recv_wr = IBLND_RECV_WRS(conn);
+       init_qp_attr.cap.max_send_wr = kiblnd_send_wrs(conn);
+       init_qp_attr.cap.max_recv_wr = IBLND_RECV_WRS(conn);
 
-       rc = rdma_create_qp(cmid, conn->ibc_hdev->ibh_pd, init_qp_attr);
+       rc = rdma_create_qp(cmid, conn->ibc_hdev->ibh_pd, &init_qp_attr);
        if (rc) {
                CERROR("Can't create QP: %d, send_wr: %d, recv_wr: %d, "
                       "send_sge: %d, recv_sge: %d\n",
-                      rc, init_qp_attr->cap.max_send_wr,
-                      init_qp_attr->cap.max_recv_wr,
-                      init_qp_attr->cap.max_send_sge,
-                      init_qp_attr->cap.max_recv_sge);
+                      rc, init_qp_attr.cap.max_send_wr,
+                      init_qp_attr.cap.max_recv_wr,
+                      init_qp_attr.cap.max_send_sge,
+                      init_qp_attr.cap.max_recv_sge);
                goto failed_2;
        }
 
@@ -953,8 +919,6 @@ kiblnd_create_conn(struct kib_peer_ni *peer_ni, struct rdma_cm_id *cmid,
 
        kiblnd_map_rx_descs(conn);
 
-       LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr));
-
        /* 1 ref for caller and each rxmsg */
        atomic_set(&conn->ibc_refcount, 1 + IBLND_RX_MSGS(conn));
        conn->ibc_nrx = IBLND_RX_MSGS(conn);
@@ -1000,8 +964,6 @@ kiblnd_create_conn(struct kib_peer_ni *peer_ni, struct rdma_cm_id *cmid,
  failed_2:
        kiblnd_destroy_conn(conn);
        LIBCFS_FREE(conn, sizeof(*conn));
- failed_1:
-        LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr));
  failed_0:
         return NULL;
 }
@@ -1050,10 +1012,8 @@ kiblnd_destroy_conn(struct kib_conn *conn)
        if (conn->ibc_rx_pages != NULL)
                kiblnd_unmap_rx_descs(conn);
 
-       if (conn->ibc_rxs != NULL) {
-               LIBCFS_FREE(conn->ibc_rxs,
-                           IBLND_RX_MSGS(conn) * sizeof(struct kib_rx));
-       }
+       if (conn->ibc_rxs != NULL)
+               CFS_FREE_PTR_ARRAY(conn->ibc_rxs, IBLND_RX_MSGS(conn));
 
        if (conn->ibc_connvars != NULL)
                LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars));
@@ -1075,13 +1035,11 @@ int
 kiblnd_close_peer_conns_locked(struct kib_peer_ni *peer_ni, int why)
 {
        struct kib_conn *conn;
-       struct list_head        *ctmp;
-       struct list_head        *cnxt;
-       int                     count = 0;
-
-       list_for_each_safe(ctmp, cnxt, &peer_ni->ibp_conns) {
-               conn = list_entry(ctmp, struct kib_conn, ibc_list);
+       struct kib_conn *cnxt;
+       int count = 0;
 
+       list_for_each_entry_safe(conn, cnxt, &peer_ni->ibp_conns,
+                                ibc_list) {
                CDEBUG(D_NET, "Closing conn -> %s, "
                              "version: %x, reason: %d\n",
                       libcfs_nid2str(peer_ni->ibp_nid),
@@ -1099,13 +1057,11 @@ kiblnd_close_stale_conns_locked(struct kib_peer_ni *peer_ni,
                                int version, __u64 incarnation)
 {
        struct kib_conn *conn;
-       struct list_head        *ctmp;
-       struct list_head        *cnxt;
-       int                     count = 0;
-
-       list_for_each_safe(ctmp, cnxt, &peer_ni->ibp_conns) {
-               conn = list_entry(ctmp, struct kib_conn, ibc_list);
+       struct kib_conn *cnxt;
+       int count = 0;
 
+       list_for_each_entry_safe(conn, cnxt, &peer_ni->ibp_conns,
+                                ibc_list) {
                if (conn->ibc_version     == version &&
                    conn->ibc_incarnation == incarnation)
                        continue;
@@ -1224,36 +1180,6 @@ kiblnd_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg)
 }
 
 static void
-kiblnd_query(struct lnet_ni *ni, lnet_nid_t nid, time64_t *when)
-{
-       time64_t last_alive = 0;
-       time64_t now = ktime_get_seconds();
-       rwlock_t *glock = &kiblnd_data.kib_global_lock;
-       struct kib_peer_ni *peer_ni;
-       unsigned long flags;
-
-       read_lock_irqsave(glock, flags);
-
-       peer_ni = kiblnd_find_peer_locked(ni, nid);
-       if (peer_ni != NULL)
-               last_alive = peer_ni->ibp_last_alive;
-
-       read_unlock_irqrestore(glock, flags);
-
-       if (last_alive != 0)
-               *when = last_alive;
-
-       /* peer_ni is not persistent in hash, trigger peer_ni creation
-        * and connection establishment with a NULL tx */
-       if (peer_ni == NULL)
-               kiblnd_launch_tx(ni, NULL, nid);
-
-       CDEBUG(D_NET, "peer_ni %s %p, alive %lld secs ago\n",
-              libcfs_nid2str(nid), peer_ni,
-              last_alive ? now - last_alive : -1);
-}
-
-static void
 kiblnd_free_pages(struct kib_pages *p)
 {
        int     npages = p->ibp_npages;
@@ -1398,8 +1324,7 @@ kiblnd_current_hdev(struct kib_dev *dev)
                if (i++ % 50 == 0)
                        CDEBUG(D_NET, "%s: Wait for failover\n",
                               dev->ibd_ifname);
-               set_current_state(TASK_INTERRUPTIBLE);
-               schedule_timeout(cfs_time_seconds(1) / 100);
+               schedule_timeout_interruptible(cfs_time_seconds(1) / 100);
 
                read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
        }
@@ -1702,11 +1627,10 @@ kiblnd_fail_fmr_poolset(struct kib_fmr_poolset *fps, struct list_head *zombies)
                                                      fpo_list);
 
                fpo->fpo_failed = 1;
-               list_del(&fpo->fpo_list);
                if (fpo->fpo_map_count == 0)
-                       list_add(&fpo->fpo_list, zombies);
+                       list_move(&fpo->fpo_list, zombies);
                else
-                       list_add(&fpo->fpo_list, &fps->fps_failed_pool_list);
+                       list_move(&fpo->fpo_list, &fps->fps_failed_pool_list);
        }
 
        spin_unlock(&fps->fps_lock);
@@ -1919,14 +1843,14 @@ again:
 #ifdef HAVE_IB_MAP_MR_SG
 #ifdef HAVE_IB_MAP_MR_SG_5ARGS
                                n = ib_map_mr_sg(mr, tx->tx_frags,
-                                                tx->tx_nfrags, NULL, PAGE_SIZE);
+                                                rd->rd_nfrags, NULL, PAGE_SIZE);
 #else
                                n = ib_map_mr_sg(mr, tx->tx_frags,
-                                                tx->tx_nfrags, PAGE_SIZE);
+                                                rd->rd_nfrags, PAGE_SIZE);
 #endif
-                               if (unlikely(n != tx->tx_nfrags)) {
+                               if (unlikely(n != rd->rd_nfrags)) {
                                        CERROR("Failed to map mr %d/%d "
-                                              "elements\n", n, tx->tx_nfrags);
+                                              "elements\n", n, rd->rd_nfrags);
                                        return n < 0 ? n : -EINVAL;
                                }
 
@@ -1998,7 +1922,7 @@ again:
                spin_unlock(&fps->fps_lock);
                CDEBUG(D_NET, "Another thread is allocating new "
                       "FMR pool, waiting for her to complete\n");
-               schedule();
+               wait_var_event(fps, !fps->fps_increasing);
                goto again;
 
        }
@@ -2016,6 +1940,7 @@ again:
        rc = kiblnd_create_fmr_pool(fps, &fpo);
        spin_lock(&fps->fps_lock);
        fps->fps_increasing = 0;
+       wake_up_var(fps);
        if (rc == 0) {
                fps->fps_version++;
                list_add_tail(&fpo->fpo_list, &fps->fps_pool_list);
@@ -2074,11 +1999,10 @@ kiblnd_fail_poolset(struct kib_poolset *ps, struct list_head *zombies)
                                                 struct kib_pool, po_list);
 
                po->po_failed = 1;
-               list_del(&po->po_list);
                if (po->po_allocated == 0)
-                       list_add(&po->po_list, zombies);
+                       list_move(&po->po_list, zombies);
                else
-                       list_add(&po->po_list, &ps->ps_failed_pool_list);
+                       list_move(&po->po_list, &ps->ps_failed_pool_list);
        }
        spin_unlock(&ps->ps_lock);
 }
@@ -2204,13 +2128,11 @@ again:
                /* another thread is allocating a new pool */
                spin_unlock(&ps->ps_lock);
                trips++;
-                CDEBUG(D_NET, "Another thread is allocating new "
-                      "%s pool, waiting %d HZs for her to complete."
-                      "trips = %d\n",
+               CDEBUG(D_NET,
+                      "Another thread is allocating new %s pool, waiting %d jiffies for her to complete. trips = %d\n",
                       ps->ps_name, interval, trips);
 
-               set_current_state(TASK_INTERRUPTIBLE);
-               schedule_timeout(interval);
+               schedule_timeout_interruptible(interval);
                if (interval < cfs_time_seconds(1))
                        interval *= 2;
 
@@ -2263,37 +2185,32 @@ kiblnd_destroy_tx_pool(struct kib_pool *pool)
         if (tpo->tpo_tx_descs == NULL)
                 goto out;
 
-        for (i = 0; i < pool->po_size; i++) {
+       for (i = 0; i < pool->po_size; i++) {
                struct kib_tx *tx = &tpo->tpo_tx_descs[i];
                int       wrq_sge = *kiblnd_tunables.kib_wrq_sge;
 
                list_del(&tx->tx_list);
-                if (tx->tx_pages != NULL)
-                        LIBCFS_FREE(tx->tx_pages,
-                                    LNET_MAX_IOV *
-                                    sizeof(*tx->tx_pages));
-                if (tx->tx_frags != NULL)
-                        LIBCFS_FREE(tx->tx_frags,
-                                   (1 + IBLND_MAX_RDMA_FRAGS) *
-                                   sizeof(*tx->tx_frags));
-                if (tx->tx_wrq != NULL)
-                        LIBCFS_FREE(tx->tx_wrq,
-                                    (1 + IBLND_MAX_RDMA_FRAGS) *
-                                    sizeof(*tx->tx_wrq));
+               if (tx->tx_pages != NULL)
+                       CFS_FREE_PTR_ARRAY(tx->tx_pages, LNET_MAX_IOV);
+               if (tx->tx_frags != NULL)
+                       CFS_FREE_PTR_ARRAY(tx->tx_frags,
+                                          (1 + IBLND_MAX_RDMA_FRAGS));
+               if (tx->tx_wrq != NULL)
+                       CFS_FREE_PTR_ARRAY(tx->tx_wrq,
+                                          (1 + IBLND_MAX_RDMA_FRAGS));
                if (tx->tx_sge != NULL)
-                       LIBCFS_FREE(tx->tx_sge,
-                                   (1 + IBLND_MAX_RDMA_FRAGS) * wrq_sge *
-                                   sizeof(*tx->tx_sge));
-                if (tx->tx_rd != NULL)
-                        LIBCFS_FREE(tx->tx_rd,
+                       CFS_FREE_PTR_ARRAY(tx->tx_sge,
+                                          (1 + IBLND_MAX_RDMA_FRAGS) *
+                                          wrq_sge);
+               if (tx->tx_rd != NULL)
+                       LIBCFS_FREE(tx->tx_rd,
                                    offsetof(struct kib_rdma_desc,
-                                             rd_frags[IBLND_MAX_RDMA_FRAGS]));
-        }
+                                            rd_frags[IBLND_MAX_RDMA_FRAGS]));
+       }
 
-        LIBCFS_FREE(tpo->tpo_tx_descs,
-                   pool->po_size * sizeof(struct kib_tx));
+       CFS_FREE_PTR_ARRAY(tpo->tpo_tx_descs, pool->po_size);
 out:
-        kiblnd_fini_pool(pool);
+       kiblnd_fini_pool(pool);
        CFS_FREE_PTR(tpo);
 }
 
@@ -2537,10 +2454,95 @@ kiblnd_net_init_pools(struct kib_net *net, struct lnet_ni *ni, __u32 *cpts,
 }
 
 static int
+kiblnd_port_get_attr(struct kib_hca_dev *hdev)
+{
+       struct ib_port_attr *port_attr;
+       int rc;
+       unsigned long flags;
+       rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
+
+       LIBCFS_ALLOC(port_attr, sizeof(*port_attr));
+       if (port_attr == NULL) {
+               CDEBUG(D_NETERROR, "Out of memory\n");
+               return -ENOMEM;
+       }
+
+       rc = ib_query_port(hdev->ibh_ibdev, hdev->ibh_port, port_attr);
+
+       write_lock_irqsave(g_lock, flags);
+
+       if (rc == 0)
+               hdev->ibh_state = port_attr->state == IB_PORT_ACTIVE
+                                ? IBLND_DEV_PORT_ACTIVE
+                                : IBLND_DEV_PORT_DOWN;
+
+       write_unlock_irqrestore(g_lock, flags);
+       LIBCFS_FREE(port_attr, sizeof(*port_attr));
+
+       if (rc != 0) {
+               CDEBUG(D_NETERROR, "Failed to query IB port: %d\n", rc);
+               return rc;
+       }
+       return 0;
+}
+
+static inline void
+kiblnd_set_ni_fatal_on(struct kib_hca_dev *hdev, int val)
+{
+       struct kib_net  *net;
+
+       /* for health check */
+       list_for_each_entry(net, &hdev->ibh_dev->ibd_nets, ibn_list) {
+               if (val)
+                       CDEBUG(D_NETERROR, "Fatal device error for NI %s\n",
+                                       libcfs_nid2str(net->ibn_ni->ni_nid));
+               atomic_set(&net->ibn_ni->ni_fatal_error_on, val);
+       }
+}
+
+void
+kiblnd_event_handler(struct ib_event_handler *handler, struct ib_event *event)
+{
+       rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
+       struct kib_hca_dev  *hdev;
+       unsigned long flags;
+
+       hdev = container_of(handler, struct kib_hca_dev, ibh_event_handler);
+
+       write_lock_irqsave(g_lock, flags);
+
+       switch (event->event) {
+       case IB_EVENT_DEVICE_FATAL:
+               CDEBUG(D_NET, "IB device fatal\n");
+               hdev->ibh_state = IBLND_DEV_FATAL;
+               kiblnd_set_ni_fatal_on(hdev, 1);
+               break;
+       case IB_EVENT_PORT_ACTIVE:
+               CDEBUG(D_NET, "IB port active\n");
+               if (event->element.port_num == hdev->ibh_port) {
+                       hdev->ibh_state = IBLND_DEV_PORT_ACTIVE;
+                       kiblnd_set_ni_fatal_on(hdev, 0);
+               }
+               break;
+       case IB_EVENT_PORT_ERR:
+               CDEBUG(D_NET, "IB port err\n");
+               if (event->element.port_num == hdev->ibh_port) {
+                       hdev->ibh_state = IBLND_DEV_PORT_DOWN;
+                       kiblnd_set_ni_fatal_on(hdev, 1);
+               }
+               break;
+       default:
+               break;
+       }
+       write_unlock_irqrestore(g_lock, flags);
+}
+
+static int
 kiblnd_hdev_get_attr(struct kib_hca_dev *hdev)
 {
        struct ib_device_attr *dev_attr;
        int rc = 0;
+       int rc2 = 0;
 
        /* It's safe to assume a HCA can handle a page size
         * matching that of the native system */
@@ -2594,6 +2596,10 @@ kiblnd_hdev_get_attr(struct kib_hca_dev *hdev)
                rc = -ENOSYS;
        }
 
+       rc2 = kiblnd_port_get_attr(hdev);
+       if (rc2 != 0)
+               return rc2;
+
        if (rc != 0)
                rc = -EINVAL;
 
@@ -2626,6 +2632,9 @@ kiblnd_hdev_cleanup_mrs(struct kib_hca_dev *hdev)
 void
 kiblnd_hdev_destroy(struct kib_hca_dev *hdev)
 {
+       if (hdev->ibh_event_handler.device != NULL)
+               ib_unregister_event_handler(&hdev->ibh_event_handler);
+
 #ifdef HAVE_IB_GET_DMA_MR
         kiblnd_hdev_cleanup_mrs(hdev);
 #endif
@@ -2794,6 +2803,7 @@ kiblnd_dev_failover(struct kib_dev *dev, struct net *ns)
         hdev->ibh_dev   = dev;
         hdev->ibh_cmid  = cmid;
         hdev->ibh_ibdev = cmid->device;
+       hdev->ibh_port  = cmid->port_num;
 
 #ifdef HAVE_IB_ALLOC_PD_2ARGS
        pd = ib_alloc_pd(cmid->device, 0);
@@ -2828,6 +2838,10 @@ kiblnd_dev_failover(struct kib_dev *dev, struct net *ns)
        }
 #endif
 
+       INIT_IB_EVENT_HANDLER(&hdev->ibh_event_handler,
+                               hdev->ibh_ibdev, kiblnd_event_handler);
+       ib_register_event_handler(&hdev->ibh_event_handler);
+
        write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
 
        old = dev->ibd_hdev;
@@ -2917,28 +2931,19 @@ kiblnd_base_shutdown(void)
                wake_up_all(&kiblnd_data.kib_connd_waitq);
                wake_up_all(&kiblnd_data.kib_failover_waitq);
 
-               i = 2;
-               while (atomic_read(&kiblnd_data.kib_nthreads) != 0) {
-                       i++;
-                       /* power of 2? */
-                       CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
-                              "Waiting for %d threads to terminate\n",
-                              atomic_read(&kiblnd_data.kib_nthreads));
-                       set_current_state(TASK_UNINTERRUPTIBLE);
-                       schedule_timeout(cfs_time_seconds(1));
-               }
-
-                /* fall through */
+               wait_var_event_warning(&kiblnd_data.kib_nthreads,
+                                      !atomic_read(&kiblnd_data.kib_nthreads),
+                                      "Waiting for %d threads to terminate\n",
+                                      atomic_read(&kiblnd_data.kib_nthreads));
+               /* fall through */
 
         case IBLND_INIT_NOTHING:
                 break;
         }
 
-       if (kiblnd_data.kib_peers != NULL) {
-               LIBCFS_FREE(kiblnd_data.kib_peers,
-                           sizeof(struct list_head) *
-                           kiblnd_data.kib_peer_hash_size);
-       }
+       if (kiblnd_data.kib_peers)
+               CFS_FREE_PTR_ARRAY(kiblnd_data.kib_peers,
+                                  kiblnd_data.kib_peer_hash_size);
 
        if (kiblnd_data.kib_scheds != NULL)
                cfs_percpt_free(kiblnd_data.kib_scheds);
@@ -2955,8 +2960,7 @@ kiblnd_shutdown(struct lnet_ni *ni)
 {
        struct kib_net *net = ni->ni_data;
        rwlock_t     *g_lock = &kiblnd_data.kib_global_lock;
-        int               i;
-        unsigned long     flags;
+       unsigned long     flags;
 
         LASSERT(kiblnd_data.kib_init == IBLND_INIT_ALL);
 
@@ -2974,22 +2978,16 @@ kiblnd_shutdown(struct lnet_ni *ni)
         default:
                 LBUG();
 
-        case IBLND_INIT_ALL:
-                /* nuke all existing peers within this net */
-                kiblnd_del_peer(ni, LNET_NID_ANY);
+       case IBLND_INIT_ALL:
+               /* nuke all existing peers within this net */
+               kiblnd_del_peer(ni, LNET_NID_ANY);
 
                /* Wait for all peer_ni state to clean up */
-               i = 2;
-               while (atomic_read(&net->ibn_npeers) != 0) {
-                       i++;
-                       /* power of 2? */
-                       CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
-                              "%s: waiting for %d peers to disconnect\n",
-                              libcfs_nid2str(ni->ni_nid),
-                              atomic_read(&net->ibn_npeers));
-                       set_current_state(TASK_UNINTERRUPTIBLE);
-                       schedule_timeout(cfs_time_seconds(1));
-               }
+               wait_var_event_warning(&net->ibn_npeers,
+                                      atomic_read(&net->ibn_npeers) == 0,
+                                      "%s: waiting for %d peers to disconnect\n",
+                                      libcfs_nid2str(ni->ni_nid),
+                                      atomic_read(&net->ibn_npeers));
 
                kiblnd_net_fini_pools(net);
 
@@ -2999,7 +2997,7 @@ kiblnd_shutdown(struct lnet_ni *ni)
                list_del(&net->ibn_list);
                write_unlock_irqrestore(g_lock, flags);
 
-                /* fall through */
+               /* fall through */
 
         case IBLND_INIT_NOTHING:
                LASSERT (atomic_read(&net->ibn_nconns) == 0);
@@ -3033,7 +3031,9 @@ kiblnd_base_startup(struct net *ns)
 
        LASSERT(kiblnd_data.kib_init == IBLND_INIT_NOTHING);
 
-       try_module_get(THIS_MODULE);
+       if (!try_module_get(THIS_MODULE))
+               goto failed;
+
        memset(&kiblnd_data, 0, sizeof(kiblnd_data)); /* zero pointers, flags etc */
 
        rwlock_init(&kiblnd_data.kib_global_lock);
@@ -3042,9 +3042,8 @@ kiblnd_base_startup(struct net *ns)
        INIT_LIST_HEAD(&kiblnd_data.kib_failed_devs);
 
        kiblnd_data.kib_peer_hash_size = IBLND_PEER_HASH_SIZE;
-       LIBCFS_ALLOC(kiblnd_data.kib_peers,
-                    sizeof(struct list_head) *
-                    kiblnd_data.kib_peer_hash_size);
+       CFS_ALLOC_PTR_ARRAY(kiblnd_data.kib_peers,
+                           kiblnd_data.kib_peer_hash_size);
        if (kiblnd_data.kib_peers == NULL)
                goto failed;
 
@@ -3244,6 +3243,7 @@ kiblnd_startup(struct lnet_ni *ni)
                goto failed;
        }
 
+       net->ibn_ni = ni;
        net->ibn_incarnation = ktime_get_real_ns() / NSEC_PER_USEC;
 
        kiblnd_tunables_setup(ni);
@@ -3335,6 +3335,9 @@ kiblnd_startup(struct lnet_ni *ni)
        write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
        ibdev->ibd_nnets++;
        list_add_tail(&net->ibn_list, &ibdev->ibd_nets);
+       /* for health check */
+       if (ibdev->ibd_hdev->ibh_state == IBLND_DEV_PORT_DOWN)
+               kiblnd_set_ni_fatal_on(ibdev->ibd_hdev, 1);
        write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
 
        net->ibn_init = IBLND_INIT_ALL;
@@ -3354,12 +3357,11 @@ failed:
        return -ENETDOWN;
 }
 
-static struct lnet_lnd the_o2iblnd = {
+static const struct lnet_lnd the_o2iblnd = {
        .lnd_type       = O2IBLND,
        .lnd_startup    = kiblnd_startup,
        .lnd_shutdown   = kiblnd_shutdown,
        .lnd_ctl        = kiblnd_ctl,
-       .lnd_query      = kiblnd_query,
        .lnd_send       = kiblnd_send,
        .lnd_recv       = kiblnd_recv,
 };