Whamcloud - gitweb
LU-12901 o2iblnd: retry qp creation with reduced queue depth
[fs/lustre-release.git] / lnet / klnds / o2iblnd / o2iblnd.c
index fd6f5d8..d939510 100644 (file)
@@ -335,6 +335,7 @@ kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer_ni **peerp,
        peer_ni->ibp_last_alive = 0;
        peer_ni->ibp_max_frags = IBLND_MAX_RDMA_FRAGS;
        peer_ni->ibp_queue_depth = ni->ni_net->net_tunables.lct_peer_tx_credits;
+       peer_ni->ibp_queue_depth_mod = 0;       /* try to use the default */
        atomic_set(&peer_ni->ibp_refcount, 1);  /* 1 ref for caller */
 
        INIT_LIST_HEAD(&peer_ni->ibp_list);     /* not in the peer_ni table yet */
@@ -460,18 +461,15 @@ kiblnd_get_peer_info(struct lnet_ni *ni, int index,
 static void
 kiblnd_del_peer_locked(struct kib_peer_ni *peer_ni)
 {
-       struct list_head *ctmp;
-       struct list_head *cnxt;
+       struct kib_conn *cnxt;
        struct kib_conn *conn;
 
        if (list_empty(&peer_ni->ibp_conns)) {
                kiblnd_unlink_peer_locked(peer_ni);
        } else {
-               list_for_each_safe(ctmp, cnxt, &peer_ni->ibp_conns) {
-                       conn = list_entry(ctmp, struct kib_conn, ibc_list);
-
+               list_for_each_entry_safe(conn, cnxt, &peer_ni->ibp_conns,
+                                        ibc_list)
                        kiblnd_close_conn_locked(conn, 0);
-               }
                /* NB closing peer_ni's last conn unlinked it. */
        }
        /* NB peer_ni now unlinked; might even be freed if the peer_ni table had the
@@ -881,14 +879,28 @@ kiblnd_create_conn(struct kib_peer_ni *peer_ni, struct rdma_cm_id *cmid,
        init_qp_attr.qp_type = IB_QPT_RC;
        init_qp_attr.send_cq = cq;
        init_qp_attr.recv_cq = cq;
-       /*
-        * kiblnd_send_wrs() can change the connection's queue depth if
-        * the maximum work requests for the device is maxed out
-        */
-       init_qp_attr.cap.max_send_wr = kiblnd_send_wrs(conn);
-       init_qp_attr.cap.max_recv_wr = IBLND_RECV_WRS(conn);
 
-       rc = rdma_create_qp(cmid, conn->ibc_hdev->ibh_pd, &init_qp_attr);
+       if (peer_ni->ibp_queue_depth_mod &&
+           peer_ni->ibp_queue_depth_mod < peer_ni->ibp_queue_depth) {
+               conn->ibc_queue_depth = peer_ni->ibp_queue_depth_mod;
+               CDEBUG(D_NET, "Use reduced queue depth %u (from %u)\n",
+                      peer_ni->ibp_queue_depth_mod,
+                      peer_ni->ibp_queue_depth);
+       }
+
+       do {
+               /* kiblnd_send_wrs() can change the connection's queue depth if
+                * the maximum work requests for the device is maxed out
+                */
+               init_qp_attr.cap.max_send_wr = kiblnd_send_wrs(conn);
+               init_qp_attr.cap.max_recv_wr = IBLND_RECV_WRS(conn);
+               rc = rdma_create_qp(cmid, conn->ibc_hdev->ibh_pd,
+                                   &init_qp_attr);
+               if (rc != -ENOMEM || conn->ibc_queue_depth < 2)
+                       break;
+               conn->ibc_queue_depth--;
+       } while (rc);
+
        if (rc) {
                CERROR("Can't create QP: %d, send_wr: %d, recv_wr: %d, "
                       "send_sge: %d, recv_sge: %d\n",
@@ -901,12 +913,15 @@ kiblnd_create_conn(struct kib_peer_ni *peer_ni, struct rdma_cm_id *cmid,
 
        conn->ibc_sched = sched;
 
-       if (conn->ibc_queue_depth != peer_ni->ibp_queue_depth)
+       if (!peer_ni->ibp_queue_depth_mod &&
+           conn->ibc_queue_depth != peer_ni->ibp_queue_depth) {
                CWARN("peer %s - queue depth reduced from %u to %u"
                      "  to allow for qp creation\n",
                      libcfs_nid2str(peer_ni->ibp_nid),
                      peer_ni->ibp_queue_depth,
                      conn->ibc_queue_depth);
+               peer_ni->ibp_queue_depth_mod = conn->ibc_queue_depth;
+       }
 
        LIBCFS_CPT_ALLOC(conn->ibc_rxs, lnet_cpt_table(), cpt,
                         IBLND_RX_MSGS(conn) * sizeof(struct kib_rx));
@@ -1038,13 +1053,11 @@ int
 kiblnd_close_peer_conns_locked(struct kib_peer_ni *peer_ni, int why)
 {
        struct kib_conn *conn;
-       struct list_head        *ctmp;
-       struct list_head        *cnxt;
-       int                     count = 0;
-
-       list_for_each_safe(ctmp, cnxt, &peer_ni->ibp_conns) {
-               conn = list_entry(ctmp, struct kib_conn, ibc_list);
+       struct kib_conn *cnxt;
+       int count = 0;
 
+       list_for_each_entry_safe(conn, cnxt, &peer_ni->ibp_conns,
+                                ibc_list) {
                CDEBUG(D_NET, "Closing conn -> %s, "
                              "version: %x, reason: %d\n",
                       libcfs_nid2str(peer_ni->ibp_nid),
@@ -1062,13 +1075,11 @@ kiblnd_close_stale_conns_locked(struct kib_peer_ni *peer_ni,
                                int version, __u64 incarnation)
 {
        struct kib_conn *conn;
-       struct list_head        *ctmp;
-       struct list_head        *cnxt;
-       int                     count = 0;
-
-       list_for_each_safe(ctmp, cnxt, &peer_ni->ibp_conns) {
-               conn = list_entry(ctmp, struct kib_conn, ibc_list);
+       struct kib_conn *cnxt;
+       int count = 0;
 
+       list_for_each_entry_safe(conn, cnxt, &peer_ni->ibp_conns,
+                                ibc_list) {
                if (conn->ibc_version     == version &&
                    conn->ibc_incarnation == incarnation)
                        continue;
@@ -2908,8 +2919,8 @@ kiblnd_base_shutdown(void)
 
        LASSERT(list_empty(&kiblnd_data.kib_devs));
 
-        CDEBUG(D_MALLOC, "before LND base cleanup: kmem %d\n",
-              atomic_read(&libcfs_kmemory));
+       CDEBUG(D_MALLOC, "before LND base cleanup: kmem %lld\n",
+              libcfs_kmem_read());
 
         switch (kiblnd_data.kib_init) {
         default:
@@ -2955,8 +2966,8 @@ kiblnd_base_shutdown(void)
        if (kiblnd_data.kib_scheds != NULL)
                cfs_percpt_free(kiblnd_data.kib_scheds);
 
-        CDEBUG(D_MALLOC, "after LND base cleanup: kmem %d\n",
-              atomic_read(&libcfs_kmemory));
+       CDEBUG(D_MALLOC, "after LND base cleanup: kmem %lld\n",
+              libcfs_kmem_read());
 
        kiblnd_data.kib_init = IBLND_INIT_NOTHING;
        module_put(THIS_MODULE);
@@ -2974,8 +2985,8 @@ kiblnd_shutdown(struct lnet_ni *ni)
         if (net == NULL)
                 goto out;
 
-        CDEBUG(D_MALLOC, "before LND net cleanup: kmem %d\n",
-              atomic_read(&libcfs_kmemory));
+       CDEBUG(D_MALLOC, "before LND net cleanup: kmem %lld\n",
+              libcfs_kmem_read());
 
        write_lock_irqsave(g_lock, flags);
        net->ibn_shutdown = 1;
@@ -3016,8 +3027,8 @@ kiblnd_shutdown(struct lnet_ni *ni)
                 break;
         }
 
-        CDEBUG(D_MALLOC, "after LND net cleanup: kmem %d\n",
-              atomic_read(&libcfs_kmemory));
+       CDEBUG(D_MALLOC, "after LND net cleanup: kmem %lld\n",
+              libcfs_kmem_read());
 
         net->ibn_init = IBLND_INIT_NOTHING;
         ni->ni_data = NULL;
@@ -3059,6 +3070,7 @@ kiblnd_base_startup(struct net *ns)
 
        spin_lock_init(&kiblnd_data.kib_connd_lock);
        INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns);
+       INIT_LIST_HEAD(&kiblnd_data.kib_connd_waits);
        INIT_LIST_HEAD(&kiblnd_data.kib_connd_zombies);
        INIT_LIST_HEAD(&kiblnd_data.kib_reconn_list);
        INIT_LIST_HEAD(&kiblnd_data.kib_reconn_wait);