Whamcloud - gitweb
LU-8303 lnet: make connection more stable with packet loss
[fs/lustre-release.git] / lnet / klnds / o2iblnd / o2iblnd_cb.c
index 1dbd518..b211f95 100644 (file)
@@ -261,7 +261,7 @@ kiblnd_handle_completion(kib_conn_t *conn, int txtype, int status, __u64 cookie)
        if (tx == NULL) {
                spin_unlock(&conn->ibc_lock);
 
-                CWARN("Unmatched completion type %x cookie "LPX64" from %s\n",
+               CWARN("Unmatched completion type %x cookie %#llx from %s\n",
                       txtype, cookie, libcfs_nid2str(conn->ibc_peer->ibp_nid));
                 kiblnd_close_conn(conn, -EPROTO);
                 return;
@@ -688,7 +688,11 @@ kiblnd_setup_rd_iov(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
                 fragnob = min(fragnob, (int)PAGE_SIZE - page_offset);
 
                 sg_set_page(sg, page, fragnob, page_offset);
-                sg++;
+               sg = sg_next(sg);
+               if (!sg) {
+                       CERROR("lacking enough sg entries to map tx\n");
+                       return -EFAULT;
+               }
 
                 if (offset + fragnob < iov->iov_len) {
                         offset += fragnob;
@@ -730,9 +734,13 @@ kiblnd_setup_rd_kiov (lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd,
 
                 fragnob = min((int)(kiov->kiov_len - offset), nob);
 
-                sg_set_page(sg, kiov->kiov_page, fragnob,
-                            kiov->kiov_offset + offset);
-                sg++;
+               sg_set_page(sg, kiov->kiov_page, fragnob,
+                           kiov->kiov_offset + offset);
+               sg = sg_next(sg);
+               if (!sg) {
+                       CERROR("lacking enough sg entries to map tx\n");
+                       return -EFAULT;
+               }
 
                 offset = 0;
                 kiov++;
@@ -848,7 +856,7 @@ __must_hold(&conn->ibc_lock)
                }
 
                LASSERTF(bad->wr_id == kiblnd_ptr2wreqid(tx, IBLND_WID_TX),
-                        "bad wr_id "LPX64", opc %d, flags %d, peer: %s\n",
+                        "bad wr_id %#llx, opc %d, flags %d, peer: %s\n",
                         bad->wr_id, bad->opcode, bad->send_flags,
                         libcfs_nid2str(conn->ibc_peer->ibp_nid));
 
@@ -976,7 +984,7 @@ kiblnd_tx_complete (kib_tx_t *tx, int status)
 
         if (failed) {
                 if (conn->ibc_state == IBLND_CONN_ESTABLISHED)
-                        CNETERR("Tx -> %s cookie "LPX64
+                       CNETERR("Tx -> %s cookie %#llx"
                                 " sending %d waiting %d: failed %d\n",
                                 libcfs_nid2str(conn->ibc_peer->ibp_nid),
                                 tx->tx_cookie, tx->tx_sending, tx->tx_waiting,
@@ -2404,7 +2412,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
                        }
                        write_unlock_irqrestore(g_lock, flags);
 
-                       CWARN("Conn stale %s version %x/%x incarnation "LPU64"/"LPU64"\n",
+                       CWARN("Conn stale %s version %x/%x incarnation %llu/%llu\n",
                              libcfs_nid2str(nid), peer2->ibp_version, version,
                              peer2->ibp_incarnation, reqmsg->ibm_srcstamp);
 
@@ -2513,12 +2521,14 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
         return 0;
 
  failed:
-        if (ni != NULL)
-                lnet_ni_decref(ni);
+       if (ni != NULL) {
+               rej.ibr_cp.ibcp_queue_depth =
+                       kiblnd_msg_queue_size(version, ni);
+               rej.ibr_cp.ibcp_max_frags   = kiblnd_rdma_frags(version, ni);
+               lnet_ni_decref(ni);
+       }
 
        rej.ibr_version = version;
-       rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni);
-       rej.ibr_cp.ibcp_max_frags   = kiblnd_rdma_frags(version, ni);
        kiblnd_reject(cmid, &rej);
 
        return -ECONNREFUSED;
@@ -3396,6 +3406,10 @@ kiblnd_qp_event(struct ib_event *event, void *arg)
         case IB_EVENT_COMM_EST:
                 CDEBUG(D_NET, "%s established\n",
                        libcfs_nid2str(conn->ibc_peer->ibp_nid));
+               /* We received a packet but connection isn't established
+                * probably handshake packet was lost, so free to
+                * force make connection established */
+               rdma_notify(conn->ibc_cmid, IB_EVENT_COMM_EST);
                 return;
 
         default: