Whamcloud - gitweb
LU-14488 o2ib: Use rdma_connect_locked if it is defined
[fs/lustre-release.git] / lnet / klnds / o2iblnd / o2iblnd_cb.c
index a7ededd..d63626d 100644 (file)
@@ -588,6 +588,7 @@ kiblnd_fmr_map_tx(struct kib_net *net, struct kib_tx *tx,
                return -EPROTONOSUPPORT;
        }
 
+#ifdef HAVE_FMR_POOL_API
        /*
         * FMR does not support gaps but the tx has gaps then
         * we should make sure that the number of fragments we'll be sending
@@ -606,6 +607,7 @@ kiblnd_fmr_map_tx(struct kib_net *net, struct kib_tx *tx,
                        return -EFBIG;
                }
        }
+#endif
 
        fps = net->ibn_fmr_ps[cpt];
        rc = kiblnd_fmr_pool_map(fps, tx, rd, nob, 0, &tx->tx_fmr);
@@ -624,11 +626,17 @@ kiblnd_fmr_map_tx(struct kib_net *net, struct kib_tx *tx,
         * for FastReg or FMR with no gaps we can accumulate all
         * the fragments in one FastReg or FMR fragment.
         */
-       if (((dev->ibd_dev_caps & IBLND_DEV_CAPS_FMR_ENABLED) && !tx->tx_gaps) ||
+       if (
+#ifdef HAVE_FMR_POOL_API
+           ((dev->ibd_dev_caps & IBLND_DEV_CAPS_FMR_ENABLED)
+            && !tx->tx_gaps) ||
+#endif
            (dev->ibd_dev_caps & IBLND_DEV_CAPS_FASTREG_ENABLED)) {
                /* FMR requires zero based address */
+#ifdef HAVE_FMR_POOL_API
                if (dev->ibd_dev_caps & IBLND_DEV_CAPS_FMR_ENABLED)
                        rd->rd_frags[0].rf_addr &= ~hdev->ibh_page_mask;
+#endif
                rd->rd_frags[0].rf_nob = nob;
                rd->rd_nfrags = 1;
        } else {
@@ -649,7 +657,11 @@ kiblnd_fmr_map_tx(struct kib_net *net, struct kib_tx *tx,
 static void
 kiblnd_unmap_tx(struct kib_tx *tx)
 {
-       if (tx->tx_fmr.fmr_pfmr || tx->tx_fmr.fmr_frd)
+       if (
+#ifdef HAVE_FMR_POOL_API
+               tx->tx_fmr.fmr_pfmr ||
+#endif
+               tx->tx_fmr.fmr_frd)
                kiblnd_fmr_pool_unmap(&tx->tx_fmr, tx->tx_status);
 
        if (tx->tx_nfrags != 0) {
@@ -676,8 +688,11 @@ kiblnd_find_rd_dma_mr(struct lnet_ni *ni, struct kib_rdma_desc *rd)
         * dead in the water and fail the operation.
         */
        if (tunables->lnd_map_on_demand &&
-           (net->ibn_dev->ibd_dev_caps & IBLND_DEV_CAPS_FASTREG_ENABLED ||
-            net->ibn_dev->ibd_dev_caps & IBLND_DEV_CAPS_FMR_ENABLED))
+           (net->ibn_dev->ibd_dev_caps & IBLND_DEV_CAPS_FASTREG_ENABLED
+#ifdef HAVE_FMR_POOL_API
+            || net->ibn_dev->ibd_dev_caps & IBLND_DEV_CAPS_FMR_ENABLED
+#endif
+       ))
                return NULL;
 
        /*
@@ -1302,8 +1317,6 @@ kiblnd_resolve_addr_cap(struct rdma_cm_id *cmid,
         unsigned short port;
         int rc;
 
-       LASSERT(capable(CAP_NET_BIND_SERVICE));
-
         /* allow the port to be reused */
         rc = rdma_set_reuseaddr(cmid, 1);
         if (rc != 0) {
@@ -1329,8 +1342,9 @@ kiblnd_resolve_addr_cap(struct rdma_cm_id *cmid,
                 }
         }
 
-        CERROR("Failed to bind to a free privileged port\n");
-        return rc;
+       CERROR("cannot bind to a free privileged port: rc = %d\n", rc);
+
+       return rc;
 }
 
 static int
@@ -1344,7 +1358,7 @@ kiblnd_resolve_addr(struct rdma_cm_id *cmid,
        int rc;
 
        if (!capable(CAP_NET_BIND_SERVICE)) {
-               new_creds = prepare_creds();
+               new_creds = prepare_kernel_cred(NULL);
                if (!new_creds)
                        return -ENOMEM;
 
@@ -1480,47 +1494,49 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid)
        struct kib_peer_ni *peer2;
        struct kib_conn *conn;
        rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
-        unsigned long      flags;
-        int                rc;
-       int                i;
+       unsigned long flags;
+       int rc;
+       int i;
        struct lnet_ioctl_config_o2iblnd_tunables *tunables;
 
-        /* If I get here, I've committed to send, so I complete the tx with
-         * failure on any problems */
+       /* If I get here, I've committed to send, so I complete the tx with
+        * failure on any problems
+        */
 
-        LASSERT (tx == NULL || tx->tx_conn == NULL); /* only set when assigned a conn */
-        LASSERT (tx == NULL || tx->tx_nwrq > 0);     /* work items have been set up */
+       LASSERT(!tx || !tx->tx_conn);     /* only set when assigned a conn */
+       LASSERT(!tx || tx->tx_nwrq > 0);  /* work items have been set up */
 
-        /* First time, just use a read lock since I expect to find my peer_ni
-         * connected */
+       /* First time, just use a read lock since I expect to find my peer_ni
+        * connected
+        */
        read_lock_irqsave(g_lock, flags);
 
-        peer_ni = kiblnd_find_peer_locked(ni, nid);
+       peer_ni = kiblnd_find_peer_locked(ni, nid);
        if (peer_ni != NULL && !list_empty(&peer_ni->ibp_conns)) {
-                /* Found a peer_ni with an established connection */
-                conn = kiblnd_get_conn_locked(peer_ni);
-                kiblnd_conn_addref(conn); /* 1 ref for me... */
+               /* Found a peer_ni with an established connection */
+               conn = kiblnd_get_conn_locked(peer_ni);
+               kiblnd_conn_addref(conn); /* 1 ref for me... */
 
                read_unlock_irqrestore(g_lock, flags);
 
-                if (tx != NULL)
-                        kiblnd_queue_tx(tx, conn);
-                kiblnd_conn_decref(conn); /* ...to here */
-                return;
-        }
+               if (tx != NULL)
+                       kiblnd_queue_tx(tx, conn);
+               kiblnd_conn_decref(conn); /* ...to here */
+               return;
+       }
 
        read_unlock(g_lock);
        /* Re-try with a write lock */
        write_lock(g_lock);
 
-        peer_ni = kiblnd_find_peer_locked(ni, nid);
-        if (peer_ni != NULL) {
+       peer_ni = kiblnd_find_peer_locked(ni, nid);
+       if (peer_ni != NULL) {
                if (list_empty(&peer_ni->ibp_conns)) {
-                        /* found a peer_ni, but it's still connecting... */
+                       /* found a peer_ni, but it's still connecting... */
                        LASSERT(kiblnd_peer_connecting(peer_ni));
-                        if (tx != NULL)
+                       if (tx != NULL)
                                list_add_tail(&tx->tx_list,
-                                                  &peer_ni->ibp_tx_queue);
+                                             &peer_ni->ibp_tx_queue);
                        write_unlock_irqrestore(g_lock, flags);
                } else {
                        conn = kiblnd_get_conn_locked(peer_ni);
@@ -1528,12 +1544,12 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid)
 
                        write_unlock_irqrestore(g_lock, flags);
 
-                        if (tx != NULL)
-                                kiblnd_queue_tx(tx, conn);
-                        kiblnd_conn_decref(conn); /* ...to here */
-                }
-                return;
-        }
+                       if (tx != NULL)
+                               kiblnd_queue_tx(tx, conn);
+                       kiblnd_conn_decref(conn); /* ...to here */
+               }
+               return;
+       }
 
        write_unlock_irqrestore(g_lock, flags);
 
@@ -1552,14 +1568,14 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid)
 
        write_lock_irqsave(g_lock, flags);
 
-        peer2 = kiblnd_find_peer_locked(ni, nid);
-        if (peer2 != NULL) {
+       peer2 = kiblnd_find_peer_locked(ni, nid);
+       if (peer2 != NULL) {
                if (list_empty(&peer2->ibp_conns)) {
-                        /* found a peer_ni, but it's still connecting... */
+                       /* found a peer_ni, but it's still connecting... */
                        LASSERT(kiblnd_peer_connecting(peer2));
-                        if (tx != NULL)
+                       if (tx != NULL)
                                list_add_tail(&tx->tx_list,
-                                                  &peer2->ibp_tx_queue);
+                                             &peer2->ibp_tx_queue);
                        write_unlock_irqrestore(g_lock, flags);
                } else {
                        conn = kiblnd_get_conn_locked(peer2);
@@ -1567,14 +1583,14 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid)
 
                        write_unlock_irqrestore(g_lock, flags);
 
-                        if (tx != NULL)
-                                kiblnd_queue_tx(tx, conn);
-                        kiblnd_conn_decref(conn); /* ...to here */
-                }
+                       if (tx != NULL)
+                               kiblnd_queue_tx(tx, conn);
+                       kiblnd_conn_decref(conn); /* ...to here */
+               }
 
-                kiblnd_peer_decref(peer_ni);
-                return;
-        }
+               kiblnd_peer_decref(peer_ni);
+               return;
+       }
 
        /* Brand new peer_ni */
        LASSERT(peer_ni->ibp_connecting == 0);
@@ -1587,14 +1603,14 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid)
        if (tx != NULL)
                list_add_tail(&tx->tx_list, &peer_ni->ibp_tx_queue);
 
-        kiblnd_peer_addref(peer_ni);
-       list_add_tail(&peer_ni->ibp_list, kiblnd_nid2peerlist(nid));
+       kiblnd_peer_addref(peer_ni);
+       hash_add(kiblnd_data.kib_peers, &peer_ni->ibp_list, nid);
 
        write_unlock_irqrestore(g_lock, flags);
 
        for (i = 0; i < tunables->lnd_conns_per_peer; i++)
                kiblnd_connect_peer(peer_ni);
-        kiblnd_peer_decref(peer_ni);
+       kiblnd_peer_decref(peer_ni);
 }
 
 int
@@ -1919,7 +1935,7 @@ kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
 int
 kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name)
 {
-       struct task_struct *task = kthread_run(fn, arg, name);
+       struct task_struct *task = kthread_run(fn, arg, "%s", name);
 
        if (IS_ERR(task))
                return PTR_ERR(task);
@@ -2119,6 +2135,10 @@ kiblnd_abort_txs(struct kib_conn *conn, struct list_head *txs)
                if (tx->tx_sending == 0) {
                        tx->tx_queued = 0;
                        list_move(&tx->tx_list, &zombies);
+               } else {
+                       /* keep tx until cq destroy */
+                       list_move(&tx->tx_list, &conn->ibc_zombie_txs);
+                       conn->ibc_waits ++;
                }
        }
 
@@ -2133,6 +2153,31 @@ kiblnd_abort_txs(struct kib_conn *conn, struct list_head *txs)
        kiblnd_txlist_done(&zombies, -ECONNABORTED, LNET_MSG_STATUS_OK);
 }
 
+static int
+kiblnd_tx_may_discard(struct kib_conn *conn)
+{
+       int rc = 0;
+       struct kib_tx *nxt;
+       struct kib_tx *tx;
+
+       spin_lock(&conn->ibc_lock);
+
+       list_for_each_entry_safe(tx, nxt, &conn->ibc_zombie_txs, tx_list) {
+               if (tx->tx_sending > 0 && tx->tx_lntmsg[0] &&
+                   lnet_md_discarded(tx->tx_lntmsg[0]->msg_md)) {
+                       tx->tx_sending --;
+                       if (tx->tx_sending == 0) {
+                               kiblnd_conn_decref(tx->tx_conn);
+                               tx->tx_conn = NULL;
+                               rc = 1;
+                       }
+               }
+       }
+
+       spin_unlock(&conn->ibc_lock);
+       return rc;
+}
+
 static void
 kiblnd_finalise_conn(struct kib_conn *conn)
 {
@@ -2343,7 +2388,7 @@ kiblnd_reject(struct rdma_cm_id *cmid, struct kib_rej *rej)
 static int
 kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 {
-       rwlock_t                *g_lock = &kiblnd_data.kib_global_lock;
+       rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
        struct kib_msg *reqmsg = priv;
        struct kib_msg *ackmsg;
        struct kib_dev *ibdev;
@@ -2352,27 +2397,27 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
        struct kib_conn *conn;
        struct lnet_ni *ni = NULL;
        struct kib_net *net = NULL;
-        lnet_nid_t             nid;
-        struct rdma_conn_param cp;
+       lnet_nid_t nid;
+       struct rdma_conn_param cp;
        struct kib_rej rej;
-       int                    version = IBLND_MSG_VERSION;
-       unsigned long          flags;
-       int                    rc;
-       struct sockaddr_in    *peer_addr;
-       LASSERT (!in_interrupt());
+       int version = IBLND_MSG_VERSION;
+       unsigned long flags;
+       int rc;
+       struct sockaddr_in *peer_addr;
 
+       LASSERT(!in_interrupt());
        /* cmid inherits 'context' from the corresponding listener id */
        ibdev = cmid->context;
        LASSERT(ibdev);
 
-        memset(&rej, 0, sizeof(rej));
-        rej.ibr_magic                = IBLND_MSG_MAGIC;
-        rej.ibr_why                  = IBLND_REJECT_FATAL;
-        rej.ibr_cp.ibcp_max_msg_size = IBLND_MSG_SIZE;
+       memset(&rej, 0, sizeof(rej));
+       rej.ibr_magic                = IBLND_MSG_MAGIC;
+       rej.ibr_why                  = IBLND_REJECT_FATAL;
+       rej.ibr_cp.ibcp_max_msg_size = IBLND_MSG_SIZE;
 
-        peer_addr = (struct sockaddr_in *)&(cmid->route.addr.dst_addr);
-        if (*kiblnd_tunables.kib_require_priv_port &&
-            ntohs(peer_addr->sin_port) >= PROT_SOCK) {
+       peer_addr = (struct sockaddr_in *)&(cmid->route.addr.dst_addr);
+       if (*kiblnd_tunables.kib_require_priv_port &&
+           ntohs(peer_addr->sin_port) >= PROT_SOCK) {
                __u32 ip = ntohl(peer_addr->sin_addr.s_addr);
                CERROR("peer_ni's port (%pI4h:%hu) is not privileged\n",
                       &ip, ntohs(peer_addr->sin_port));
@@ -2419,17 +2464,16 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
        if (ni == NULL ||                         /* no matching net */
            ni->ni_nid != reqmsg->ibm_dstnid ||   /* right NET, wrong NID! */
            net->ibn_dev != ibdev) {              /* wrong device */
-               CERROR("Can't accept conn from %s on %s (%s:%d:%pI4h): "
-                      "bad dst nid %s\n", libcfs_nid2str(nid),
-                      ni == NULL ? "NA" : libcfs_nid2str(ni->ni_nid),
+               CERROR("Can't accept conn from %s on %s (%s:%d:%pI4h): bad dst nid %s\n", libcfs_nid2str(nid),
+                      ni ? libcfs_nid2str(ni->ni_nid) : "NA",
                       ibdev->ibd_ifname, ibdev->ibd_nnets,
-                       &ibdev->ibd_ifip,
+                      &ibdev->ibd_ifip,
                       libcfs_nid2str(reqmsg->ibm_dstnid));
 
                goto failed;
        }
 
-       /* check time stamp as soon as possible */
+       /* check time stamp as soon as possible */
        if (reqmsg->ibm_dststamp != 0 &&
            reqmsg->ibm_dststamp != net->ibn_incarnation) {
                CWARN("Stale connection request\n");
@@ -2448,8 +2492,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 
        if (reqmsg->ibm_u.connparams.ibcp_queue_depth >
            kiblnd_msg_queue_size(version, ni)) {
-               CERROR("Can't accept conn from %s, queue depth too large: "
-                      " %d (<=%d wanted)\n",
+               CERROR("Can't accept conn from %s, queue depth too large:  %d (<=%d wanted)\n",
                       libcfs_nid2str(nid),
                       reqmsg->ibm_u.connparams.ibcp_queue_depth,
                       kiblnd_msg_queue_size(version, ni));
@@ -2462,8 +2505,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 
        if (reqmsg->ibm_u.connparams.ibcp_max_frags >
            IBLND_MAX_RDMA_FRAGS) {
-               CWARN("Can't accept conn from %s (version %x): "
-                     "max_frags %d too large (%d wanted)\n",
+               CWARN("Can't accept conn from %s (version %x): max_frags %d too large (%d wanted)\n",
                      libcfs_nid2str(nid), version,
                      reqmsg->ibm_u.connparams.ibcp_max_frags,
                      IBLND_MAX_RDMA_FRAGS);
@@ -2475,9 +2517,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
        } else if (reqmsg->ibm_u.connparams.ibcp_max_frags <
                   IBLND_MAX_RDMA_FRAGS &&
                   net->ibn_fmr_ps == NULL) {
-               CWARN("Can't accept conn from %s (version %x): "
-                     "max_frags %d incompatible without FMR pool "
-                     "(%d wanted)\n",
+               CWARN("Can't accept conn from %s (version %x): max_frags %d incompatible without FMR pool (%d wanted)\n",
                      libcfs_nid2str(nid), version,
                      reqmsg->ibm_u.connparams.ibcp_max_frags,
                      IBLND_MAX_RDMA_FRAGS);
@@ -2488,13 +2528,13 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
                goto failed;
        }
 
-        if (reqmsg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) {
-                CERROR("Can't accept %s: message size %d too big (%d max)\n",
-                       libcfs_nid2str(nid),
-                       reqmsg->ibm_u.connparams.ibcp_max_msg_size,
-                       IBLND_MSG_SIZE);
-                goto failed;
-        }
+       if (reqmsg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) {
+               CERROR("Can't accept %s: message size %d too big (%d max)\n",
+                      libcfs_nid2str(nid),
+                      reqmsg->ibm_u.connparams.ibcp_max_msg_size,
+                      IBLND_MSG_SIZE);
+               goto failed;
+       }
 
        /* assume 'nid' is a new peer_ni; create  */
        rc = kiblnd_create_peer(ni, &peer_ni, nid);
@@ -2510,16 +2550,16 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 
        write_lock_irqsave(g_lock, flags);
 
-        peer2 = kiblnd_find_peer_locked(ni, nid);
-        if (peer2 != NULL) {
-                if (peer2->ibp_version == 0) {
-                        peer2->ibp_version     = version;
-                        peer2->ibp_incarnation = reqmsg->ibm_srcstamp;
-                }
+       peer2 = kiblnd_find_peer_locked(ni, nid);
+       if (peer2 != NULL) {
+               if (peer2->ibp_version == 0) {
+                       peer2->ibp_version     = version;
+                       peer2->ibp_incarnation = reqmsg->ibm_srcstamp;
+               }
 
-                /* not the guy I've talked with */
-                if (peer2->ibp_incarnation != reqmsg->ibm_srcstamp ||
-                    peer2->ibp_version     != version) {
+               /* not the guy I've talked with */
+               if (peer2->ibp_incarnation != reqmsg->ibm_srcstamp ||
+                   peer2->ibp_version     != version) {
                        kiblnd_close_peer_conns_locked(peer2, -ESTALE);
 
                        if (kiblnd_peer_active(peer2)) {
@@ -2532,10 +2572,10 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
                              libcfs_nid2str(nid), peer2->ibp_version, version,
                              peer2->ibp_incarnation, reqmsg->ibm_srcstamp);
 
-                        kiblnd_peer_decref(peer_ni);
-                        rej.ibr_why = IBLND_REJECT_CONN_STALE;
-                        goto failed;
-                }
+                       kiblnd_peer_decref(peer_ni);
+                       rej.ibr_why = IBLND_REJECT_CONN_STALE;
+                       goto failed;
+               }
 
                /* Tie-break connection race in favour of the higher NID.
                 * If we keep running into a race condition multiple times,
@@ -2577,78 +2617,80 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
                peer2->ibp_queue_depth = peer_ni->ibp_queue_depth;
 
                write_unlock_irqrestore(g_lock, flags);
-                kiblnd_peer_decref(peer_ni);
-                peer_ni = peer2;
-        } else {
-                /* Brand new peer_ni */
-                LASSERT (peer_ni->ibp_accepting == 0);
-                LASSERT (peer_ni->ibp_version == 0 &&
-                         peer_ni->ibp_incarnation == 0);
+               kiblnd_peer_decref(peer_ni);
+               peer_ni = peer2;
+       } else {
+               /* Brand new peer_ni */
+               LASSERT(peer_ni->ibp_accepting == 0);
+               LASSERT(peer_ni->ibp_version == 0 &&
+                       peer_ni->ibp_incarnation == 0);
 
-                peer_ni->ibp_accepting   = 1;
-                peer_ni->ibp_version     = version;
-                peer_ni->ibp_incarnation = reqmsg->ibm_srcstamp;
+               peer_ni->ibp_accepting   = 1;
+               peer_ni->ibp_version     = version;
+               peer_ni->ibp_incarnation = reqmsg->ibm_srcstamp;
 
-                /* I have a ref on ni that prevents it being shutdown */
-                LASSERT (net->ibn_shutdown == 0);
+               /* I have a ref on ni that prevents it being shutdown */
+               LASSERT(net->ibn_shutdown == 0);
 
-                kiblnd_peer_addref(peer_ni);
-               list_add_tail(&peer_ni->ibp_list, kiblnd_nid2peerlist(nid));
+               kiblnd_peer_addref(peer_ni);
+               hash_add(kiblnd_data.kib_peers, &peer_ni->ibp_list, nid);
 
                write_unlock_irqrestore(g_lock, flags);
-        }
+       }
 
-       conn = kiblnd_create_conn(peer_ni, cmid, IBLND_CONN_PASSIVE_WAIT, version);
-        if (conn == NULL) {
-                kiblnd_peer_connect_failed(peer_ni, 0, -ENOMEM);
-                kiblnd_peer_decref(peer_ni);
-                rej.ibr_why = IBLND_REJECT_NO_RESOURCES;
-                goto failed;
-        }
+       conn = kiblnd_create_conn(peer_ni, cmid, IBLND_CONN_PASSIVE_WAIT,
+                                 version);
+       if (!conn) {
+               kiblnd_peer_connect_failed(peer_ni, 0, -ENOMEM);
+               kiblnd_peer_decref(peer_ni);
+               rej.ibr_why = IBLND_REJECT_NO_RESOURCES;
+               goto failed;
+       }
 
-        /* conn now "owns" cmid, so I return success from here on to ensure the
-         * CM callback doesn't destroy cmid. */
+       /* conn now "owns" cmid, so I return success from here on to ensure the
+        * CM callback doesn't destroy cmid.
+        */
        conn->ibc_incarnation      = reqmsg->ibm_srcstamp;
        conn->ibc_credits          = conn->ibc_queue_depth;
        conn->ibc_reserved_credits = conn->ibc_queue_depth;
        LASSERT(conn->ibc_credits + conn->ibc_reserved_credits +
                IBLND_OOB_MSGS(version) <= IBLND_RX_MSGS(conn));
 
-        ackmsg = &conn->ibc_connvars->cv_msg;
-        memset(ackmsg, 0, sizeof(*ackmsg));
+       ackmsg = &conn->ibc_connvars->cv_msg;
+       memset(ackmsg, 0, sizeof(*ackmsg));
 
-        kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK,
-                        sizeof(ackmsg->ibm_u.connparams));
+       kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK,
+                       sizeof(ackmsg->ibm_u.connparams));
        ackmsg->ibm_u.connparams.ibcp_queue_depth  = conn->ibc_queue_depth;
        ackmsg->ibm_u.connparams.ibcp_max_frags    = conn->ibc_max_frags;
        ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
 
-        kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp);
+       kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp);
 
-        memset(&cp, 0, sizeof(cp));
-        cp.private_data        = ackmsg;
-        cp.private_data_len    = ackmsg->ibm_nob;
-        cp.responder_resources = 0;             /* No atomic ops or RDMA reads */
-        cp.initiator_depth     = 0;
-        cp.flow_control        = 1;
-        cp.retry_count         = *kiblnd_tunables.kib_retry_count;
-        cp.rnr_retry_count     = *kiblnd_tunables.kib_rnr_retry_count;
+       memset(&cp, 0, sizeof(cp));
+       cp.private_data        = ackmsg;
+       cp.private_data_len    = ackmsg->ibm_nob;
+       cp.responder_resources = 0;            /* No atomic ops or RDMA reads */
+       cp.initiator_depth     = 0;
+       cp.flow_control        = 1;
+       cp.retry_count         = *kiblnd_tunables.kib_retry_count;
+       cp.rnr_retry_count     = *kiblnd_tunables.kib_rnr_retry_count;
 
-        CDEBUG(D_NET, "Accept %s\n", libcfs_nid2str(nid));
+       CDEBUG(D_NET, "Accept %s\n", libcfs_nid2str(nid));
 
-        rc = rdma_accept(cmid, &cp);
-        if (rc != 0) {
-                CERROR("Can't accept %s: %d\n", libcfs_nid2str(nid), rc);
-                rej.ibr_version = version;
-                rej.ibr_why     = IBLND_REJECT_FATAL;
+       rc = rdma_accept(cmid, &cp);
+       if (rc != 0) {
+               CERROR("Can't accept %s: %d\n", libcfs_nid2str(nid), rc);
+               rej.ibr_version = version;
+               rej.ibr_why     = IBLND_REJECT_FATAL;
 
-                kiblnd_reject(cmid, &rej);
-                kiblnd_connreq_done(conn, rc);
-                kiblnd_conn_decref(conn);
-        }
+               kiblnd_reject(cmid, &rej);
+               kiblnd_connreq_done(conn, rc);
+               kiblnd_conn_decref(conn);
+       }
 
-        lnet_ni_decref(ni);
-        return 0;
+       lnet_ni_decref(ni);
+       return 0;
 
  failed:
        if (ni != NULL) {
@@ -3073,8 +3115,7 @@ kiblnd_active_connect(struct rdma_cm_id *cmid)
 
         LASSERT(cmid->context == (void *)conn);
         LASSERT(conn->ibc_cmid == cmid);
-
-        rc = rdma_connect(cmid, &cp);
+       rc = rdma_connect_locked(cmid, &cp);
         if (rc != 0) {
                 CERROR("Can't connect to %s: %d\n",
                        libcfs_nid2str(peer_ni->ibp_nid), rc);
@@ -3283,8 +3324,9 @@ kiblnd_check_txs_locked(struct kib_conn *conn, struct list_head *txs)
                }
 
                if (ktime_compare(ktime_get(), tx->tx_deadline) >= 0) {
-                       CERROR("Timed out tx: %s, %lld seconds\n",
+                       CERROR("Timed out tx: %s(WSQ:%d%d%d), %lld seconds\n",
                               kiblnd_queue2str(conn, txs),
+                              tx->tx_waiting, tx->tx_sending, tx->tx_queued,
                               kiblnd_timeout() +
                               ktime_ms_delta(ktime_get(),
                                              tx->tx_deadline) / MSEC_PER_SEC);
@@ -3311,22 +3353,20 @@ kiblnd_check_conns (int idx)
        LIST_HEAD(closes);
        LIST_HEAD(checksends);
        LIST_HEAD(timedout_txs);
-       struct list_head *peers = &kiblnd_data.kib_peers[idx];
-       struct list_head *ptmp;
+       struct hlist_head *peers = &kiblnd_data.kib_peers[idx];
        struct kib_peer_ni *peer_ni;
-       struct kib_conn *conn;
+       struct kib_conn *conn;
        struct kib_tx *tx, *tx_tmp;
        struct list_head *ctmp;
-       unsigned long     flags;
+       unsigned long flags;
 
        /* NB. We expect to have a look at all the peers and not find any
         * RDMAs to time out, so we just use a shared lock while we
-        * take a look... */
+        * take a look...
+        */
        write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
 
-       list_for_each(ptmp, peers) {
-               peer_ni = list_entry(ptmp, struct kib_peer_ni, ibp_list);
-
+       hlist_for_each_entry(peer_ni, peers, ibp_list) {
                /* Check tx_deadline */
                list_for_each_entry_safe(tx, tx_tmp, &peer_ni->ibp_tx_queue, tx_list) {
                        if (ktime_compare(ktime_get(), tx->tx_deadline) >= 0) {
@@ -3356,10 +3396,10 @@ kiblnd_check_conns (int idx)
                        }
 
                        if (timedout) {
-                               CERROR("Timed out RDMA with %s (%lld): "
-                                      "c: %u, oc: %u, rc: %u\n",
+                               CERROR("Timed out RDMA with %s (%lld): c: %u, oc: %u, rc: %u\n",
                                       libcfs_nid2str(peer_ni->ibp_nid),
-                                      ktime_get_seconds() - peer_ni->ibp_last_alive,
+                                      ktime_get_seconds()
+                                      - peer_ni->ibp_last_alive,
                                       conn->ibc_credits,
                                       conn->ibc_outstanding_credits,
                                       conn->ibc_reserved_credits);
@@ -3382,7 +3422,8 @@ kiblnd_check_conns (int idx)
 
        /* Handle timeout by closing the whole
         * connection. We can only be sure RDMA activity
-        * has ceased once the QP has been modified. */
+        * has ceased once the QP has been modified.
+        */
        while (!list_empty(&closes)) {
                conn = list_entry(closes.next,
                                  struct kib_conn, ibc_connd_list);
@@ -3393,7 +3434,8 @@ kiblnd_check_conns (int idx)
 
        /* In case we have enough credits to return via a
         * NOOP, but there were no non-blocking tx descs
-        * free to do it last time... */
+        * free to do it last time...
+        */
        while (!list_empty(&checksends)) {
                conn = list_entry(checksends.next,
                                  struct kib_conn, ibc_connd_list);
@@ -3434,17 +3476,17 @@ kiblnd_disconnect_conn(struct kib_conn *conn)
 int
 kiblnd_connd (void *arg)
 {
-       spinlock_t        *lock= &kiblnd_data.kib_connd_lock;
+       spinlock_t *lock = &kiblnd_data.kib_connd_lock;
        wait_queue_entry_t wait;
-       unsigned long      flags;
+       unsigned long flags;
        struct kib_conn *conn;
-       int                timeout;
-       int                i;
-       int                dropped_lock;
-       int                peer_index = 0;
-       unsigned long      deadline = jiffies;
+       int timeout;
+       int i;
+       int dropped_lock;
+       int peer_index = 0;
+       unsigned long deadline = jiffies;
 
-       init_waitqueue_entry(&wait, current);
+       init_wait(&wait);
        kiblnd_data.kib_connd = current;
 
        spin_lock_irqsave(lock, flags);
@@ -3452,7 +3494,7 @@ kiblnd_connd (void *arg)
        while (!kiblnd_data.kib_shutdown) {
                int reconn = 0;
 
-                dropped_lock = 0;
+               dropped_lock = 0;
 
                if (!list_empty(&kiblnd_data.kib_connd_zombies)) {
                        struct kib_peer_ni *peer_ni = NULL;
@@ -3486,6 +3528,7 @@ kiblnd_connd (void *arg)
                }
 
                if (!list_empty(&kiblnd_data.kib_connd_conns)) {
+                       int wait;
                        conn = list_entry(kiblnd_data.kib_connd_conns.next,
                                          struct kib_conn, ibc_list);
                        list_del(&conn->ibc_list);
@@ -3494,10 +3537,16 @@ kiblnd_connd (void *arg)
                        dropped_lock = 1;
 
                        kiblnd_disconnect_conn(conn);
-                       kiblnd_conn_decref(conn);
+                       wait = conn->ibc_waits;
+                       if (wait == 0) /* keep ref for connd_wait, see below */
+                               kiblnd_conn_decref(conn);
 
                        spin_lock_irqsave(lock, flags);
-                }
+
+                       if (wait)
+                               list_add_tail(&conn->ibc_list,
+                                             &kiblnd_data.kib_connd_waits);
+               }
 
                while (reconn < KIB_RECONN_BREAK) {
                        if (kiblnd_data.kib_reconn_sec !=
@@ -3524,24 +3573,41 @@ kiblnd_connd (void *arg)
                        spin_lock_irqsave(lock, flags);
                }
 
-                /* careful with the jiffy wrap... */
-                timeout = (int)(deadline - jiffies);
-                if (timeout <= 0) {
-                        const int n = 4;
-                        const int p = 1;
-                        int       chunk = kiblnd_data.kib_peer_hash_size;
+               if (!list_empty(&kiblnd_data.kib_connd_waits)) {
+                       conn = list_entry(kiblnd_data.kib_connd_waits.next,
+                                         struct kib_conn, ibc_list);
+                       list_del(&conn->ibc_list);
+                       spin_unlock_irqrestore(lock, flags);
+
+                       dropped_lock = kiblnd_tx_may_discard(conn);
+                       if (dropped_lock)
+                               kiblnd_conn_decref(conn);
+
+                       spin_lock_irqsave(lock, flags);
+                       if (dropped_lock == 0)
+                               list_add_tail(&conn->ibc_list,
+                                             &kiblnd_data.kib_connd_waits);
+               }
+
+               /* careful with the jiffy wrap... */
+               timeout = (int)(deadline - jiffies);
+               if (timeout <= 0) {
+                       const int n = 4;
+                       const int p = 1;
+                       int chunk = HASH_SIZE(kiblnd_data.kib_peers);
                        unsigned int lnd_timeout;
 
                        spin_unlock_irqrestore(lock, flags);
-                        dropped_lock = 1;
+                       dropped_lock = 1;
 
-                        /* Time to check for RDMA timeouts on a few more
-                         * peers: I do checks every 'p' seconds on a
-                         * proportion of the peer_ni table and I need to check
-                         * every connection 'n' times within a timeout
-                         * interval, to ensure I detect a timeout on any
-                         * connection within (n+1)/n times the timeout
-                         * interval. */
+                       /* Time to check for RDMA timeouts on a few more
+                        * peers: I do checks every 'p' seconds on a
+                        * proportion of the peer_ni table and I need to check
+                        * every connection 'n' times within a timeout
+                        * interval, to ensure I detect a timeout on any
+                        * connection within (n+1)/n times the timeout
+                        * interval.
+                        */
 
                        lnd_timeout = kiblnd_timeout();
                        if (lnd_timeout > n * p)
@@ -3552,7 +3618,7 @@ kiblnd_connd (void *arg)
                        for (i = 0; i < chunk; i++) {
                                kiblnd_check_conns(peer_index);
                                peer_index = (peer_index + 1) %
-                                            kiblnd_data.kib_peer_hash_size;
+                                       HASH_SIZE(kiblnd_data.kib_peers);
                        }
 
                        deadline += cfs_time_seconds(p);
@@ -3702,7 +3768,7 @@ kiblnd_scheduler(void *arg)
        int                     did_something;
        int                     rc;
 
-       init_waitqueue_entry(&wait, current);
+       init_wait(&wait);
 
        sched = kiblnd_data.kib_scheds[KIB_THREAD_CPT(id)];
 
@@ -3840,7 +3906,7 @@ kiblnd_failover_thread(void *arg)
 
        LASSERT(*kiblnd_tunables.kib_dev_failover != 0);
 
-       init_waitqueue_entry(&wait, current);
+       init_wait(&wait);
        write_lock_irqsave(glock, flags);
 
         while (!kiblnd_data.kib_shutdown) {