Whamcloud - gitweb
LU-12901 o2iblnd: retry qp creation with reduced queue depth 48/40748/4
authorSerguei Smirnov <ssmirnov@whamcloud.com>
Tue, 24 Nov 2020 19:59:46 +0000 (14:59 -0500)
committerOleg Drokin <green@whamcloud.com>
Sun, 13 Dec 2020 08:23:11 +0000 (08:23 +0000)
If negotiated number of frags * queue depth is too large for
successful qp creation, reduce the queue depth in a loop
until qp creation succeeds or the queue depth dips below 2.
Remember the reduced queue depth value to use for later
connections to the same peer.

Test-Parameters: trivial
Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Change-Id: Iaa91510d6f80d813218a06a9bc52f5f9251e8b87
Reviewed-on: https://review.whamcloud.com/40748
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Amir Shehata <ashehata@whamcloud.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/klnds/o2iblnd/o2iblnd.c
lnet/klnds/o2iblnd/o2iblnd.h

index 0ad8553..d939510 100644 (file)
@@ -335,6 +335,7 @@ kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer_ni **peerp,
        peer_ni->ibp_last_alive = 0;
        peer_ni->ibp_max_frags = IBLND_MAX_RDMA_FRAGS;
        peer_ni->ibp_queue_depth = ni->ni_net->net_tunables.lct_peer_tx_credits;
        peer_ni->ibp_last_alive = 0;
        peer_ni->ibp_max_frags = IBLND_MAX_RDMA_FRAGS;
        peer_ni->ibp_queue_depth = ni->ni_net->net_tunables.lct_peer_tx_credits;
+       peer_ni->ibp_queue_depth_mod = 0;       /* try to use the default */
        atomic_set(&peer_ni->ibp_refcount, 1);  /* 1 ref for caller */
 
        INIT_LIST_HEAD(&peer_ni->ibp_list);     /* not in the peer_ni table yet */
        atomic_set(&peer_ni->ibp_refcount, 1);  /* 1 ref for caller */
 
        INIT_LIST_HEAD(&peer_ni->ibp_list);     /* not in the peer_ni table yet */
@@ -878,14 +879,28 @@ kiblnd_create_conn(struct kib_peer_ni *peer_ni, struct rdma_cm_id *cmid,
        init_qp_attr.qp_type = IB_QPT_RC;
        init_qp_attr.send_cq = cq;
        init_qp_attr.recv_cq = cq;
        init_qp_attr.qp_type = IB_QPT_RC;
        init_qp_attr.send_cq = cq;
        init_qp_attr.recv_cq = cq;
-       /*
-        * kiblnd_send_wrs() can change the connection's queue depth if
-        * the maximum work requests for the device is maxed out
-        */
-       init_qp_attr.cap.max_send_wr = kiblnd_send_wrs(conn);
-       init_qp_attr.cap.max_recv_wr = IBLND_RECV_WRS(conn);
 
 
-       rc = rdma_create_qp(cmid, conn->ibc_hdev->ibh_pd, &init_qp_attr);
+       if (peer_ni->ibp_queue_depth_mod &&
+           peer_ni->ibp_queue_depth_mod < peer_ni->ibp_queue_depth) {
+               conn->ibc_queue_depth = peer_ni->ibp_queue_depth_mod;
+               CDEBUG(D_NET, "Use reduced queue depth %u (from %u)\n",
+                      peer_ni->ibp_queue_depth_mod,
+                      peer_ni->ibp_queue_depth);
+       }
+
+       do {
+               /* kiblnd_send_wrs() can change the connection's queue depth if
+                * the maximum work requests for the device is maxed out
+                */
+               init_qp_attr.cap.max_send_wr = kiblnd_send_wrs(conn);
+               init_qp_attr.cap.max_recv_wr = IBLND_RECV_WRS(conn);
+               rc = rdma_create_qp(cmid, conn->ibc_hdev->ibh_pd,
+                                   &init_qp_attr);
+               if (rc != -ENOMEM || conn->ibc_queue_depth < 2)
+                       break;
+               conn->ibc_queue_depth--;
+       } while (rc);
+
        if (rc) {
                CERROR("Can't create QP: %d, send_wr: %d, recv_wr: %d, "
                       "send_sge: %d, recv_sge: %d\n",
        if (rc) {
                CERROR("Can't create QP: %d, send_wr: %d, recv_wr: %d, "
                       "send_sge: %d, recv_sge: %d\n",
@@ -898,12 +913,15 @@ kiblnd_create_conn(struct kib_peer_ni *peer_ni, struct rdma_cm_id *cmid,
 
        conn->ibc_sched = sched;
 
 
        conn->ibc_sched = sched;
 
-       if (conn->ibc_queue_depth != peer_ni->ibp_queue_depth)
+       if (!peer_ni->ibp_queue_depth_mod &&
+           conn->ibc_queue_depth != peer_ni->ibp_queue_depth) {
                CWARN("peer %s - queue depth reduced from %u to %u"
                      "  to allow for qp creation\n",
                      libcfs_nid2str(peer_ni->ibp_nid),
                      peer_ni->ibp_queue_depth,
                      conn->ibc_queue_depth);
                CWARN("peer %s - queue depth reduced from %u to %u"
                      "  to allow for qp creation\n",
                      libcfs_nid2str(peer_ni->ibp_nid),
                      peer_ni->ibp_queue_depth,
                      conn->ibc_queue_depth);
+               peer_ni->ibp_queue_depth_mod = conn->ibc_queue_depth;
+       }
 
        LIBCFS_CPT_ALLOC(conn->ibc_rxs, lnet_cpt_table(), cpt,
                         IBLND_RX_MSGS(conn) * sizeof(struct kib_rx));
 
        LIBCFS_CPT_ALLOC(conn->ibc_rxs, lnet_cpt_table(), cpt,
                         IBLND_RX_MSGS(conn) * sizeof(struct kib_rx));
index 5334403..2f7573c 100644 (file)
@@ -780,6 +780,8 @@ struct kib_peer_ni {
        __u16                   ibp_max_frags;
        /* max_peer_credits */
        __u16                   ibp_queue_depth;
        __u16                   ibp_max_frags;
        /* max_peer_credits */
        __u16                   ibp_queue_depth;
+       /* reduced value which allows conn to be created if max fails */
+       __u16                   ibp_queue_depth_mod;
 };
 
 #ifndef HAVE_IB_INC_RKEY
 };
 
 #ifndef HAVE_IB_INC_RKEY