Whamcloud - gitweb
LU-6261 gnilnd: kgnilnd_check_rdma_cq race in error path.
[fs/lustre-release.git] / lnet / klnds / gnilnd / gnilnd_cb.c
index 92aaf57..b8f56f5 100644 (file)
@@ -505,8 +505,14 @@ kgnilnd_setup_immediate_buffer(kgn_tx_t *tx, unsigned int niov, struct iovec *io
        if (nob == 0) {
                tx->tx_buffer = NULL;
        } else if (kiov != NULL) {
+
+               if ((niov > 0) && unlikely(niov > (nob/PAGE_SIZE))) {
+                       niov = ((nob + offset + PAGE_SIZE - 1) / PAGE_SIZE);
+               }
+
                LASSERTF(niov > 0 && niov < GNILND_MAX_IMMEDIATE/PAGE_SIZE,
-                        "bad niov %d\n", niov);
+                       "bad niov %d msg %p kiov %p iov %p offset %d nob%d\n",
+                       niov, msg, kiov, iov, offset, nob);
 
                while (offset >= kiov->kiov_len) {
                        offset -= kiov->kiov_len;
@@ -1158,6 +1164,7 @@ kgnilnd_unmap_buffer(kgn_tx_t *tx, int error)
                 * verified peer notification  - the theory is that
                 * a TX error can be communicated in all other cases */
                if (tx->tx_conn->gnc_state != GNILND_CONN_ESTABLISHED &&
+                   error != -GNILND_NOPURG &&
                    kgnilnd_check_purgatory_conn(tx->tx_conn)) {
                        kgnilnd_add_purgatory_tx(tx);
 
@@ -1465,7 +1472,6 @@ kgnilnd_sendmsg_nolock(kgn_tx_t *tx, void *immediate, unsigned int immediatenob,
         */
        msg->gnm_connstamp = conn->gnc_my_connstamp;
        msg->gnm_payload_len = immediatenob;
-       kgnilnd_conn_mutex_lock(&conn->gnc_smsg_mutex);
        msg->gnm_seq = atomic_read(&conn->gnc_tx_seq);
 
        /* always init here - kgn_checksum is a /sys module tunable
@@ -1580,6 +1586,7 @@ kgnilnd_sendmsg(kgn_tx_t *tx, void *immediate, unsigned int immediatenob,
 
        timestamp = jiffies;
        kgnilnd_gl_mutex_lock(&dev->gnd_cq_mutex);
+       kgnilnd_conn_mutex_lock(&tx->tx_conn->gnc_smsg_mutex);
        /* delay in jiffies - we are really concerned only with things that
         * result in a schedule() or really holding this off for long times .
         * NB - mutex_lock could spin for 2 jiffies before going to sleep to wait */
@@ -1624,7 +1631,8 @@ kgnilnd_sendmsg_trylock(kgn_tx_t *tx, void *immediate, unsigned int immediatenob
                rc = 0;
        } else {
                atomic_inc(&conn->gnc_device->gnd_fast_try);
-               rc = kgnilnd_gl_mutex_trylock(&conn->gnc_device->gnd_cq_mutex);
+               rc = kgnilnd_trylock(&conn->gnc_device->gnd_cq_mutex,
+                                    &conn->gnc_smsg_mutex);
        }
        if (!rc) {
                rc = -EAGAIN;
@@ -3239,6 +3247,11 @@ kgnilnd_check_rdma_cq(kgn_device_t *dev)
                spin_unlock(&conn->gnc_list_lock);
                kgnilnd_conn_mutex_unlock(&conn->gnc_rdma_mutex);
 
+               if (CFS_FAIL_CHECK(CFS_FAIL_GNI_RDMA_CQ_ERROR)) {
+                       event_data = 1LL << 48;
+                       rc = 1;
+               }
+
                if (likely(desc->status == GNI_RC_SUCCESS) && rc == 0) {
                        atomic_inc(&dev->gnd_rdma_ntx);
                        atomic64_add(tx->tx_nob, &dev->gnd_rdma_txbytes);
@@ -3293,7 +3306,7 @@ kgnilnd_check_rdma_cq(kgn_device_t *dev)
                                         -EFAULT,
                                         rcookie,
                                         tx->tx_msg.gnm_srcnid);
-                       kgnilnd_tx_done(tx, -EFAULT);
+                       kgnilnd_tx_done(tx, -GNILND_NOPURG);
                        kgnilnd_close_conn(conn, -ECOMM);
                }