Whamcloud - gitweb
LU-12768 o2iblnd: wait properly for fps->increasing.
[fs/lustre-release.git] / lnet / klnds / o2iblnd / o2iblnd.c
index dae653b..fd6f5d8 100644 (file)
@@ -372,7 +372,8 @@ kiblnd_destroy_peer(struct kib_peer_ni *peer_ni)
         * they are destroyed, so we can be assured that _all_ state to do
         * with this peer_ni has been cleaned up when its refcount drops to
         * zero. */
-       atomic_dec(&net->ibn_npeers);
+       if (atomic_dec_and_test(&net->ibn_npeers))
+               wake_up_var(&net->ibn_npeers);
 }
 
 struct kib_peer_ni *
@@ -635,40 +636,16 @@ kiblnd_debug_conn(struct kib_conn *conn)
        spin_unlock(&conn->ibc_lock);
 }
 
-int
-kiblnd_translate_mtu(int value)
-{
-        switch (value) {
-        default:
-                return -1;
-        case 0:
-                return 0;
-        case 256:
-                return IB_MTU_256;
-        case 512:
-                return IB_MTU_512;
-        case 1024:
-                return IB_MTU_1024;
-        case 2048:
-                return IB_MTU_2048;
-        case 4096:
-                return IB_MTU_4096;
-        }
-}
-
 static void
 kiblnd_setup_mtu_locked(struct rdma_cm_id *cmid)
 {
-        int           mtu;
-
         /* XXX There is no path record for iWARP, set by netdev->change_mtu? */
         if (cmid->route.path_rec == NULL)
                 return;
 
-        mtu = kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu);
-        LASSERT (mtu >= 0);
-        if (mtu != 0)
-                cmid->route.path_rec->mtu = mtu;
+       if (*kiblnd_tunables.kib_ib_mtu)
+               cmid->route.path_rec->mtu =
+                       ib_mtu_int_to_enum(*kiblnd_tunables.kib_ib_mtu);
 }
 
 static int
@@ -772,7 +749,7 @@ kiblnd_create_conn(struct kib_peer_ni *peer_ni, struct rdma_cm_id *cmid,
        rwlock_t               *glock = &kiblnd_data.kib_global_lock;
        struct kib_net              *net = peer_ni->ibp_ni->ni_data;
        struct kib_dev *dev;
-       struct ib_qp_init_attr *init_qp_attr;
+       struct ib_qp_init_attr init_qp_attr = {};
        struct kib_sched_info   *sched;
 #ifdef HAVE_IB_CQ_INIT_ATTR
        struct ib_cq_init_attr  cq_attr = {};
@@ -803,19 +780,11 @@ kiblnd_create_conn(struct kib_peer_ni *peer_ni, struct rdma_cm_id *cmid,
         */
        cpt = sched->ibs_cpt;
 
-       LIBCFS_CPT_ALLOC(init_qp_attr, lnet_cpt_table(), cpt,
-                        sizeof(*init_qp_attr));
-       if (init_qp_attr == NULL) {
-               CERROR("Can't allocate qp_attr for %s\n",
-                      libcfs_nid2str(peer_ni->ibp_nid));
-               goto failed_0;
-       }
-
        LIBCFS_CPT_ALLOC(conn, lnet_cpt_table(), cpt, sizeof(*conn));
        if (conn == NULL) {
                CERROR("Can't allocate connection for %s\n",
                       libcfs_nid2str(peer_ni->ibp_nid));
-               goto failed_1;
+               goto failed_0;
        }
 
        conn->ibc_state = IBLND_CONN_INIT;
@@ -904,29 +873,29 @@ kiblnd_create_conn(struct kib_peer_ni *peer_ni, struct rdma_cm_id *cmid,
                goto failed_2;
        }
 
-       init_qp_attr->event_handler = kiblnd_qp_event;
-       init_qp_attr->qp_context = conn;
-       init_qp_attr->cap.max_send_sge = *kiblnd_tunables.kib_wrq_sge;
-       init_qp_attr->cap.max_recv_sge = 1;
-       init_qp_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
-       init_qp_attr->qp_type = IB_QPT_RC;
-       init_qp_attr->send_cq = cq;
-       init_qp_attr->recv_cq = cq;
+       init_qp_attr.event_handler = kiblnd_qp_event;
+       init_qp_attr.qp_context = conn;
+       init_qp_attr.cap.max_send_sge = *kiblnd_tunables.kib_wrq_sge;
+       init_qp_attr.cap.max_recv_sge = 1;
+       init_qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
+       init_qp_attr.qp_type = IB_QPT_RC;
+       init_qp_attr.send_cq = cq;
+       init_qp_attr.recv_cq = cq;
        /*
         * kiblnd_send_wrs() can change the connection's queue depth if
         * the maximum work requests for the device is maxed out
         */
-       init_qp_attr->cap.max_send_wr = kiblnd_send_wrs(conn);
-       init_qp_attr->cap.max_recv_wr = IBLND_RECV_WRS(conn);
+       init_qp_attr.cap.max_send_wr = kiblnd_send_wrs(conn);
+       init_qp_attr.cap.max_recv_wr = IBLND_RECV_WRS(conn);
 
-       rc = rdma_create_qp(cmid, conn->ibc_hdev->ibh_pd, init_qp_attr);
+       rc = rdma_create_qp(cmid, conn->ibc_hdev->ibh_pd, &init_qp_attr);
        if (rc) {
                CERROR("Can't create QP: %d, send_wr: %d, recv_wr: %d, "
                       "send_sge: %d, recv_sge: %d\n",
-                      rc, init_qp_attr->cap.max_send_wr,
-                      init_qp_attr->cap.max_recv_wr,
-                      init_qp_attr->cap.max_send_sge,
-                      init_qp_attr->cap.max_recv_sge);
+                      rc, init_qp_attr.cap.max_send_wr,
+                      init_qp_attr.cap.max_recv_wr,
+                      init_qp_attr.cap.max_send_sge,
+                      init_qp_attr.cap.max_recv_sge);
                goto failed_2;
        }
 
@@ -953,8 +922,6 @@ kiblnd_create_conn(struct kib_peer_ni *peer_ni, struct rdma_cm_id *cmid,
 
        kiblnd_map_rx_descs(conn);
 
-       LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr));
-
        /* 1 ref for caller and each rxmsg */
        atomic_set(&conn->ibc_refcount, 1 + IBLND_RX_MSGS(conn));
        conn->ibc_nrx = IBLND_RX_MSGS(conn);
@@ -1000,8 +967,6 @@ kiblnd_create_conn(struct kib_peer_ni *peer_ni, struct rdma_cm_id *cmid,
  failed_2:
        kiblnd_destroy_conn(conn);
        LIBCFS_FREE(conn, sizeof(*conn));
- failed_1:
-        LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr));
  failed_0:
         return NULL;
 }
@@ -1885,14 +1850,14 @@ again:
 #ifdef HAVE_IB_MAP_MR_SG
 #ifdef HAVE_IB_MAP_MR_SG_5ARGS
                                n = ib_map_mr_sg(mr, tx->tx_frags,
-                                                tx->tx_nfrags, NULL, PAGE_SIZE);
+                                                rd->rd_nfrags, NULL, PAGE_SIZE);
 #else
                                n = ib_map_mr_sg(mr, tx->tx_frags,
-                                                tx->tx_nfrags, PAGE_SIZE);
+                                                rd->rd_nfrags, PAGE_SIZE);
 #endif
-                               if (unlikely(n != tx->tx_nfrags)) {
+                               if (unlikely(n != rd->rd_nfrags)) {
                                        CERROR("Failed to map mr %d/%d "
-                                              "elements\n", n, tx->tx_nfrags);
+                                              "elements\n", n, rd->rd_nfrags);
                                        return n < 0 ? n : -EINVAL;
                                }
 
@@ -1964,7 +1929,7 @@ again:
                spin_unlock(&fps->fps_lock);
                CDEBUG(D_NET, "Another thread is allocating new "
                       "FMR pool, waiting for her to complete\n");
-               schedule();
+               wait_var_event(fps, !fps->fps_increasing);
                goto again;
 
        }
@@ -1982,6 +1947,7 @@ again:
        rc = kiblnd_create_fmr_pool(fps, &fpo);
        spin_lock(&fps->fps_lock);
        fps->fps_increasing = 0;
+       wake_up_var(fps);
        if (rc == 0) {
                fps->fps_version++;
                list_add_tail(&fpo->fpo_list, &fps->fps_pool_list);
@@ -2972,17 +2938,11 @@ kiblnd_base_shutdown(void)
                wake_up_all(&kiblnd_data.kib_connd_waitq);
                wake_up_all(&kiblnd_data.kib_failover_waitq);
 
-               i = 2;
-               while (atomic_read(&kiblnd_data.kib_nthreads) != 0) {
-                       i++;
-                       /* power of 2? */
-                       CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
-                              "Waiting for %d threads to terminate\n",
-                              atomic_read(&kiblnd_data.kib_nthreads));
-                       schedule_timeout_uninterruptible(cfs_time_seconds(1));
-               }
-
-                /* fall through */
+               wait_var_event_warning(&kiblnd_data.kib_nthreads,
+                                      !atomic_read(&kiblnd_data.kib_nthreads),
+                                      "Waiting for %d threads to terminate\n",
+                                      atomic_read(&kiblnd_data.kib_nthreads));
+               /* fall through */
 
         case IBLND_INIT_NOTHING:
                 break;
@@ -3007,8 +2967,7 @@ kiblnd_shutdown(struct lnet_ni *ni)
 {
        struct kib_net *net = ni->ni_data;
        rwlock_t     *g_lock = &kiblnd_data.kib_global_lock;
-        int               i;
-        unsigned long     flags;
+       unsigned long     flags;
 
         LASSERT(kiblnd_data.kib_init == IBLND_INIT_ALL);
 
@@ -3026,21 +2985,16 @@ kiblnd_shutdown(struct lnet_ni *ni)
         default:
                 LBUG();
 
-        case IBLND_INIT_ALL:
-                /* nuke all existing peers within this net */
-                kiblnd_del_peer(ni, LNET_NID_ANY);
+       case IBLND_INIT_ALL:
+               /* nuke all existing peers within this net */
+               kiblnd_del_peer(ni, LNET_NID_ANY);
 
                /* Wait for all peer_ni state to clean up */
-               i = 2;
-               while (atomic_read(&net->ibn_npeers) != 0) {
-                       i++;
-                       /* power of 2? */
-                       CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
-                              "%s: waiting for %d peers to disconnect\n",
-                              libcfs_nid2str(ni->ni_nid),
-                              atomic_read(&net->ibn_npeers));
-                       schedule_timeout_uninterruptible(cfs_time_seconds(1));
-               }
+               wait_var_event_warning(&net->ibn_npeers,
+                                      atomic_read(&net->ibn_npeers) == 0,
+                                      "%s: waiting for %d peers to disconnect\n",
+                                      libcfs_nid2str(ni->ni_nid),
+                                      atomic_read(&net->ibn_npeers));
 
                kiblnd_net_fini_pools(net);
 
@@ -3050,7 +3004,7 @@ kiblnd_shutdown(struct lnet_ni *ni)
                list_del(&net->ibn_list);
                write_unlock_irqrestore(g_lock, flags);
 
-                /* fall through */
+               /* fall through */
 
         case IBLND_INIT_NOTHING:
                LASSERT (atomic_read(&net->ibn_nconns) == 0);