LU-6261 gnilnd: kgnilnd_check_rdma_cq race in error path.

[fs/lustre-release.git] / lnet / klnds / gnilnd / gnilnd_cb.c
diff --git a/lnet/klnds/gnilnd/gnilnd_cb.c b/lnet/klnds/gnilnd/gnilnd_cb.c

index 92aaf57..b8f56f5 100644 (file)
--- a/lnet/klnds/gnilnd/gnilnd_cb.c
+++ b/lnet/klnds/gnilnd/gnilnd_cb.c
@@ -505,8 +505,14 @@ kgnilnd_setup_immediate_buffer(kgn_tx_t *tx, unsigned int niov, struct iovec *io
         if (nob == 0) {
                 tx->tx_buffer = NULL;
         } else if (kiov != NULL) {
+
+               if ((niov > 0) && unlikely(niov > (nob/PAGE_SIZE))) {
+                       niov = ((nob + offset + PAGE_SIZE - 1) / PAGE_SIZE);
+               }
+
                 LASSERTF(niov > 0 && niov < GNILND_MAX_IMMEDIATE/PAGE_SIZE,
-                        "bad niov %d\n", niov);
+                       "bad niov %d msg %p kiov %p iov %p offset %d nob%d\n",
+                       niov, msg, kiov, iov, offset, nob);
  
                 while (offset >= kiov->kiov_len) {
                         offset -= kiov->kiov_len;
@@ -1158,6 +1164,7 @@ kgnilnd_unmap_buffer(kgn_tx_t *tx, int error)
                  * verified peer notification  - the theory is that
                  * a TX error can be communicated in all other cases */
                 if (tx->tx_conn->gnc_state != GNILND_CONN_ESTABLISHED &&
+                   error != -GNILND_NOPURG &&
                     kgnilnd_check_purgatory_conn(tx->tx_conn)) {
                         kgnilnd_add_purgatory_tx(tx);
  
@@ -1465,7 +1472,6 @@ kgnilnd_sendmsg_nolock(kgn_tx_t *tx, void *immediate, unsigned int immediatenob,
          */
         msg->gnm_connstamp = conn->gnc_my_connstamp;
         msg->gnm_payload_len = immediatenob;
-       kgnilnd_conn_mutex_lock(&conn->gnc_smsg_mutex);
         msg->gnm_seq = atomic_read(&conn->gnc_tx_seq);
  
         /* always init here - kgn_checksum is a /sys module tunable
@@ -1580,6 +1586,7 @@ kgnilnd_sendmsg(kgn_tx_t *tx, void *immediate, unsigned int immediatenob,
  
         timestamp = jiffies;
         kgnilnd_gl_mutex_lock(&dev->gnd_cq_mutex);
+       kgnilnd_conn_mutex_lock(&tx->tx_conn->gnc_smsg_mutex);
         /* delay in jiffies - we are really concerned only with things that
          * result in a schedule() or really holding this off for long times .
          * NB - mutex_lock could spin for 2 jiffies before going to sleep to wait */
@@ -1624,7 +1631,8 @@ kgnilnd_sendmsg_trylock(kgn_tx_t *tx, void *immediate, unsigned int immediatenob
                 rc = 0;
         } else {
                 atomic_inc(&conn->gnc_device->gnd_fast_try);
-               rc = kgnilnd_gl_mutex_trylock(&conn->gnc_device->gnd_cq_mutex);
+               rc = kgnilnd_trylock(&conn->gnc_device->gnd_cq_mutex,
+                                    &conn->gnc_smsg_mutex);
         }
         if (!rc) {
                 rc = -EAGAIN;
@@ -3239,6 +3247,11 @@ kgnilnd_check_rdma_cq(kgn_device_t *dev)
                 spin_unlock(&conn->gnc_list_lock);
                 kgnilnd_conn_mutex_unlock(&conn->gnc_rdma_mutex);
  
+               if (CFS_FAIL_CHECK(CFS_FAIL_GNI_RDMA_CQ_ERROR)) {
+                       event_data = 1LL << 48;
+                       rc = 1;
+               }
+
                 if (likely(desc->status == GNI_RC_SUCCESS) && rc == 0) {
                         atomic_inc(&dev->gnd_rdma_ntx);
                         atomic64_add(tx->tx_nob, &dev->gnd_rdma_txbytes);
@@ -3293,7 +3306,7 @@ kgnilnd_check_rdma_cq(kgn_device_t *dev)
                                          -EFAULT,
                                          rcookie,
                                          tx->tx_msg.gnm_srcnid);
-                       kgnilnd_tx_done(tx, -EFAULT);
+                       kgnilnd_tx_done(tx, -GNILND_NOPURG);
                         kgnilnd_close_conn(conn, -ECOMM);
                 }