X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lnet%2Fklnds%2Fgnilnd%2Fgnilnd_cb.c;h=89f8bd224e71b9347825335374753bfe6ce48f20;hb=4bfe21d09c39a8a2601090b8ca3e7cc7d2698140;hp=73764436c898489e78c912cdacd6c8afb58f666b;hpb=2b8c4566a6a63cdf09e555a744ce6a1453651b98;p=fs%2Flustre-release.git

diff --git a/lnet/klnds/gnilnd/gnilnd_cb.c b/lnet/klnds/gnilnd/gnilnd_cb.c
index 7376443..89f8bd2 100644
--- a/lnet/klnds/gnilnd/gnilnd_cb.c
+++ b/lnet/klnds/gnilnd/gnilnd_cb.c
@@ -26,6 +26,7 @@
 
 #include <asm/page.h>
 #include <linux/nmi.h>
+#include <linux/pagemap.h>
 #include "gnilnd.h"
 
 /* this is useful when needed to debug wire corruption. */
@@ -151,7 +152,7 @@ kgnilnd_schedule_process_conn(kgn_conn_t *conn, int sched_intent)
  * as scheduled */
 
 int
-_kgnilnd_schedule_conn(kgn_conn_t *conn, const char *caller, int line, int refheld)
+_kgnilnd_schedule_conn(kgn_conn_t *conn, const char *caller, int line, int refheld, int lock_held)
 {
 	kgn_device_t        *dev = conn->gnc_device;
 	int                  sched;
@@ -184,10 +185,11 @@ _kgnilnd_schedule_conn(kgn_conn_t *conn, const char *caller, int line, int refhe
 			 conn, sched);
 
 		CDEBUG(D_INFO, "scheduling conn 0x%p caller %s:%d\n", conn, caller, line);
-
-		spin_lock(&dev->gnd_lock);
+		if (!lock_held)
+			spin_lock(&dev->gnd_lock);
 		list_add_tail(&conn->gnc_schedlist, &dev->gnd_ready_conns);
-		spin_unlock(&dev->gnd_lock);
+		if (!lock_held)
+			spin_unlock(&dev->gnd_lock);
 		set_mb(conn->gnc_last_sched_ask, jiffies);
 		rc = 1;
 	} else {
@@ -197,6 +199,23 @@ _kgnilnd_schedule_conn(kgn_conn_t *conn, const char *caller, int line, int refhe
 
 	/* make sure thread(s) going to process conns - but let it make
 	 * separate decision from conn schedule */
+	if (!lock_held)
+		kgnilnd_schedule_device(dev);
+	return rc;
+}
+
+int
+_kgnilnd_schedule_delay_conn(kgn_conn_t *conn)
+{
+	kgn_device_t	*dev = conn->gnc_device;
+	int rc = 0;
+	spin_lock(&dev->gnd_lock);
+	if (list_empty(&conn->gnc_delaylist)) {
+		list_add_tail(&conn->gnc_delaylist, &dev->gnd_delay_conns);
+		rc = 1;
+	}
+	spin_unlock(&dev->gnd_lock);
+
 	kgnilnd_schedule_device(dev);
 	return rc;
 }
@@ -237,7 +256,7 @@ kgnilnd_free_tx(kgn_tx_t *tx)
 
 	/* Only free the buffer if we used it */
 	if (tx->tx_buffer_copy != NULL) {
-		vfree(tx->tx_buffer_copy);
+		kgnilnd_vfree(tx->tx_buffer_copy, tx->tx_rdma_desc.length);
 		tx->tx_buffer_copy = NULL;
 		CDEBUG(D_MALLOC, "vfreed buffer2\n");
 	}
@@ -302,7 +321,7 @@ kgnilnd_cksum(void *ptr, size_t nob)
 	return sum;
 }
 
-inline __u16
+__u16
 kgnilnd_cksum_kiov(unsigned int nkiov, lnet_kiov_t *kiov,
 		    unsigned int offset, unsigned int nob, int dump_blob)
 {
@@ -774,7 +793,7 @@ kgnilnd_setup_rdma_buffer(kgn_tx_t *tx, unsigned int niov,
  *           transfer.
  */
 static void
-kgnilnd_parse_lnet_rdma(lnet_msg_t *lntmsg, unsigned int *niov,
+kgnilnd_parse_lnet_rdma(struct lnet_msg *lntmsg, unsigned int *niov,
 			unsigned int *offset, unsigned int *nob,
 			lnet_kiov_t **kiov, int put_len)
 {
@@ -801,7 +820,7 @@ kgnilnd_compute_rdma_cksum(kgn_tx_t *tx, int put_len)
 {
 	unsigned int     niov, offset, nob;
 	lnet_kiov_t     *kiov;
-	lnet_msg_t      *lntmsg = tx->tx_lntmsg[0];
+	struct lnet_msg      *lntmsg = tx->tx_lntmsg[0];
 	int              dump_cksum = (*kgnilnd_tunables.kgn_checksum_dump > 1);
 
 	GNITX_ASSERTF(tx, ((tx->tx_msg.gnm_type == GNILND_MSG_PUT_DONE) ||
@@ -853,7 +872,7 @@ kgnilnd_verify_rdma_cksum(kgn_tx_t *tx, __u16 rx_cksum, int put_len)
 	__u16            cksum;
 	unsigned int     niov, offset, nob;
 	lnet_kiov_t     *kiov;
-	lnet_msg_t      *lntmsg = tx->tx_lntmsg[0];
+	struct lnet_msg      *lntmsg = tx->tx_lntmsg[0];
 	int dump_on_err = *kgnilnd_tunables.kgn_checksum_dump;
 
 	/* we can only match certain requests */
@@ -1198,9 +1217,9 @@ kgnilnd_unmap_buffer(kgn_tx_t *tx, int error)
 void
 kgnilnd_tx_done(kgn_tx_t *tx, int completion)
 {
-	lnet_msg_t      *lntmsg0, *lntmsg1;
+	struct lnet_msg      *lntmsg0, *lntmsg1;
 	int             status0, status1;
-	lnet_ni_t       *ni = NULL;
+	struct lnet_ni       *ni = NULL;
 	kgn_conn_t      *conn = tx->tx_conn;
 
 	LASSERT(!in_interrupt());
@@ -1265,10 +1284,10 @@ kgnilnd_tx_done(kgn_tx_t *tx, int completion)
 	 * could free up lnet credits, resulting in a call chain back into
 	 * the LND via kgnilnd_send and friends */
 
-	lnet_finalize(ni, lntmsg0, status0);
+	lnet_finalize(lntmsg0, status0);
 
 	if (lntmsg1 != NULL) {
-		lnet_finalize(ni, lntmsg1, status1);
+		lnet_finalize(lntmsg1, status1);
 	}
 }
 
@@ -1343,70 +1362,35 @@ search_again:
 	return 0;
 }
 
-static inline int
-kgnilnd_tx_should_retry(kgn_conn_t *conn, kgn_tx_t *tx)
+static inline void
+kgnilnd_tx_log_retrans(kgn_conn_t *conn, kgn_tx_t *tx)
 {
-	int             max_retrans = *kgnilnd_tunables.kgn_max_retransmits;
 	int             log_retrans;
-	int             log_retrans_level;
-
-	/* I need kgni credits to send this.  Replace tx at the head of the
-	 * fmaq and I'll get rescheduled when credits appear */
-	tx->tx_state = 0;
-	tx->tx_retrans++;
-	conn->gnc_tx_retrans++;
-	log_retrans = ((tx->tx_retrans < 25) || ((tx->tx_retrans % 25) == 0) ||
-			(tx->tx_retrans > (max_retrans / 2)));
-	log_retrans_level = tx->tx_retrans < (max_retrans / 2) ? D_NET : D_NETERROR;
 
-	/* Decision time - either error, warn or just retransmit */
+	log_retrans = ((tx->tx_retrans < 25) || ((tx->tx_retrans % 25) == 0));
 
 	/* we don't care about TX timeout - it could be that the network is slower
 	 * or throttled. We'll keep retranmitting - so if the network is so slow
 	 * that we fill up our mailbox, we'll keep trying to resend that msg
 	 * until we exceed the max_retrans _or_ gnc_last_rx expires, indicating
 	 * that he hasn't send us any traffic in return */
-
-	if (tx->tx_retrans > max_retrans) {
-		/* this means we are not backing off the retransmits
-		 * in a healthy manner and are likely chewing up the
-		 * CPU cycles quite badly */
-		GNIDBG_TOMSG(D_ERROR, &tx->tx_msg,
-			"SOFTWARE BUG: too many retransmits (%d) for tx id %x "
-			"conn 0x%p->%s\n",
-			tx->tx_retrans, tx->tx_id, conn,
-			libcfs_nid2str(conn->gnc_peer->gnp_nid));
-
-		/* yes - double errors to help debug this condition */
-		GNIDBG_TOMSG(D_NETERROR, &tx->tx_msg, "connection dead. "
-			"unable to send to %s for %lu secs (%d tries)",
-			libcfs_nid2str(tx->tx_conn->gnc_peer->gnp_nid),
-			cfs_duration_sec(jiffies - tx->tx_cred_wait),
-			tx->tx_retrans);
-
-		kgnilnd_close_conn(conn, -ETIMEDOUT);
-
-		/* caller should terminate */
-		RETURN(0);
-	} else {
-		/* some reasonable throttling of the debug message */
-		if (log_retrans) {
-			unsigned long now = jiffies;
-			/* XXX Nic: Mystical TX debug here... */
-			GNIDBG_SMSG_CREDS(log_retrans_level, conn);
-			GNIDBG_TOMSG(log_retrans_level, &tx->tx_msg,
-				"NOT_DONE on conn 0x%p->%s id %x retrans %d wait %dus"
-				" last_msg %uus/%uus last_cq %uus/%uus",
-				conn, libcfs_nid2str(conn->gnc_peer->gnp_nid),
-				tx->tx_id, tx->tx_retrans,
-				jiffies_to_usecs(now - tx->tx_cred_wait),
-				jiffies_to_usecs(now - conn->gnc_last_tx),
-				jiffies_to_usecs(now - conn->gnc_last_rx),
-				jiffies_to_usecs(now - conn->gnc_last_tx_cq),
-				jiffies_to_usecs(now - conn->gnc_last_rx_cq));
-		}
-		/* caller should retry */
-		RETURN(1);
+	
+	/* some reasonable throttling of the debug message */
+	if (log_retrans) {
+		unsigned long now = jiffies;
+		/* XXX Nic: Mystical TX debug here... */
+		/* We expect retransmissions so only log when D_NET is enabled */
+		GNIDBG_SMSG_CREDS(D_NET, conn);
+		GNIDBG_TOMSG(D_NET, &tx->tx_msg,
+			"NOT_DONE on conn 0x%p->%s id %x retrans %d wait %dus"
+			" last_msg %uus/%uus last_cq %uus/%uus",
+			conn, libcfs_nid2str(conn->gnc_peer->gnp_nid),
+			tx->tx_id, tx->tx_retrans,
+			jiffies_to_usecs(now - tx->tx_cred_wait),
+			jiffies_to_usecs(now - conn->gnc_last_tx),
+			jiffies_to_usecs(now - conn->gnc_last_rx),
+			jiffies_to_usecs(now - conn->gnc_last_tx_cq),
+			jiffies_to_usecs(now - conn->gnc_last_rx_cq));
 	}
 }
 
@@ -1419,7 +1403,6 @@ kgnilnd_sendmsg_nolock(kgn_tx_t *tx, void *immediate, unsigned int immediatenob,
 {
 	kgn_conn_t      *conn = tx->tx_conn;
 	kgn_msg_t       *msg = &tx->tx_msg;
-	int              retry_send;
 	gni_return_t     rrc;
 	unsigned long    newest_last_rx, timeout;
 	unsigned long    now;
@@ -1529,9 +1512,11 @@ kgnilnd_sendmsg_nolock(kgn_tx_t *tx, void *immediate, unsigned int immediatenob,
 		return 0;
 
 	case GNI_RC_NOT_DONE:
-		/* XXX Nic: We need to figure out how to track this
-		 * - there are bound to be good reasons for it,
-		 * but we want to know when it happens */
+		/* Jshimek: We can get GNI_RC_NOT_DONE for 3 reasons currently
+		 * 1: out of mbox credits
+		 * 2: out of mbox payload credits
+		 * 3: On Aries out of dla credits
+		 */
 		kgnilnd_conn_mutex_unlock(&conn->gnc_smsg_mutex);
 		kgnilnd_gl_mutex_unlock(&conn->gnc_device->gnd_cq_mutex);
 		/* We'll handle this error inline - makes the calling logic much more
@@ -1542,31 +1527,36 @@ kgnilnd_sendmsg_nolock(kgn_tx_t *tx, void *immediate, unsigned int immediatenob,
 			return -EAGAIN;
 		}
 
-		retry_send = kgnilnd_tx_should_retry(conn, tx);
-		if (retry_send) {
-			/* add to head of list for the state and retries */
-			spin_lock(state_lock);
-			kgnilnd_tx_add_state_locked(tx, conn->gnc_peer, conn, state, 0);
-			spin_unlock(state_lock);
-
-			/* We only reschedule for a certain number of retries, then
-			 * we will wait for the CQ events indicating a release of SMSG
-			 * credits */
-			if (tx->tx_retrans < (*kgnilnd_tunables.kgn_max_retransmits/4)) {
-				kgnilnd_schedule_conn(conn);
-				return 0;
-			} else {
-				/* CQ event coming in signifies either TX completed or
-				 * RX receive. Either of these *could* free up credits
-				 * in the SMSG mbox and we should try sending again */
-				GNIDBG_TX(D_NET, tx, "waiting for CQID %u event to resend",
-					 tx->tx_conn->gnc_cqid);
-				/* use +ve return code to let upper layers know they
-				 * should stop looping on sends */
-				return EAGAIN;
-			}
+		/* I need kgni credits to send this.  Replace tx at the head of the
+		 * fmaq and I'll get rescheduled when credits appear. Reset the tx_state
+		 * and bump retrans counts since we are requeueing the tx.
+		 */
+		tx->tx_state = 0;
+		tx->tx_retrans++;
+		conn->gnc_tx_retrans++;
+
+		kgnilnd_tx_log_retrans(conn, tx);
+		/* add to head of list for the state and retries */
+		spin_lock(state_lock);
+		kgnilnd_tx_add_state_locked(tx, conn->gnc_peer, conn, state, 0);
+		spin_unlock(state_lock);
+
+		/* We only reschedule for a certain number of retries, then
+		 * we will wait for the CQ events indicating a release of SMSG
+		 * credits */
+		if (tx->tx_retrans < *kgnilnd_tunables.kgn_max_retransmits) {
+			kgnilnd_schedule_conn(conn);
+			return 0;
 		} else {
-			return -EAGAIN;
+			/* CQ event coming in signifies either TX completed or
+			 * RX receive. Either of these *could* free up credits
+			 * in the SMSG mbox and we should try sending again */
+			GNIDBG_TX(D_NET, tx, "waiting for CQID %u event to resend",
+				 tx->tx_conn->gnc_cqid);
+			kgnilnd_schedule_delay_conn(conn);
+			/* use +ve return code to let upper layers know they
+			 * should stop looping on sends */
+			return EAGAIN;
 		}
 	default:
 		/* handle bad retcode gracefully */
@@ -1656,7 +1646,7 @@ kgnilnd_sendmsg_trylock(kgn_tx_t *tx, void *immediate, unsigned int immediatenob
 }
 
 /* lets us know if we can push this RDMA through now */
-inline int
+static int
 kgnilnd_auth_rdma_bytes(kgn_device_t *dev, kgn_tx_t *tx)
 {
 	long    bytes_left;
@@ -1770,7 +1760,7 @@ kgnilnd_queue_tx(kgn_conn_t *conn, kgn_tx_t *tx)
 }
 
 void
-kgnilnd_launch_tx(kgn_tx_t *tx, kgn_net_t *net, lnet_process_id_t *target)
+kgnilnd_launch_tx(kgn_tx_t *tx, kgn_net_t *net, struct lnet_process_id *target)
 {
 	kgn_peer_t      *peer;
 	kgn_peer_t      *new_peer = NULL;
@@ -1805,7 +1795,7 @@ kgnilnd_launch_tx(kgn_tx_t *tx, kgn_net_t *net, lnet_process_id_t *target)
 		}
 
 		/* don't create a connection if the peer is marked down */
-		if (peer->gnp_down == GNILND_RCA_NODE_DOWN) {
+		if (peer->gnp_state != GNILND_PEER_UP) {
 			read_unlock(&kgnilnd_data.kgn_peer_conn_lock);
 			rc = -ENETRESET;
 			GOTO(no_peer, rc);
@@ -1844,7 +1834,7 @@ kgnilnd_launch_tx(kgn_tx_t *tx, kgn_net_t *net, lnet_process_id_t *target)
 	kgnilnd_add_peer_locked(target->nid, new_peer, &peer);
 
 	/* don't create a connection if the peer is not up */
-	if (peer->gnp_down != GNILND_RCA_NODE_UP) {
+	if (peer->gnp_state != GNILND_PEER_UP) {
 		write_unlock(&kgnilnd_data.kgn_peer_conn_lock);
 		rc = -ENETRESET;
 		GOTO(no_peer, rc);
@@ -1924,11 +1914,11 @@ kgnilnd_rdma(kgn_tx_t *tx, int type,
 
 			tx->tx_offset = ((__u64)((unsigned long)sink->gnrd_addr)) & 3;
 			if (tx->tx_offset)
-				kgnilnd_admin_addref(kgnilnd_data.kgn_rev_offset);
+				atomic_inc(&kgnilnd_data.kgn_rev_offset);
 
 			if ((nob + tx->tx_offset) & 3) {
 				desc_nob = ((nob + tx->tx_offset) + (4 - ((nob + tx->tx_offset) & 3)));
-				kgnilnd_admin_addref(kgnilnd_data.kgn_rev_length);
+				atomic_inc(&kgnilnd_data.kgn_rev_length);
 			} else {
 				desc_nob = (nob + tx->tx_offset);
 			}
@@ -1936,7 +1926,7 @@ kgnilnd_rdma(kgn_tx_t *tx, int type,
 			if (tx->tx_buffer_copy == NULL) {
 				/* Allocate the largest copy buffer we will need, this will prevent us from overwriting data
 				 * and require at most we allocate a few extra bytes. */
-				tx->tx_buffer_copy = vmalloc(desc_nob);
+				tx->tx_buffer_copy = kgnilnd_vzalloc(desc_nob);
 
 				if (!tx->tx_buffer_copy) {
 					/* allocation of buffer failed nak the rdma */
@@ -1944,11 +1934,12 @@ kgnilnd_rdma(kgn_tx_t *tx, int type,
 					kgnilnd_tx_done(tx, -EFAULT);
 					return 0;
 				}
-				kgnilnd_admin_addref(kgnilnd_data.kgn_rev_copy_buff);
+				atomic_inc(&kgnilnd_data.kgn_rev_copy_buff);
 				rc = kgnilnd_mem_register(conn->gnc_device->gnd_handle, (__u64)tx->tx_buffer_copy, desc_nob, NULL, GNI_MEM_READWRITE, &tx->tx_buffer_copy_map_key);
 				if (rc != GNI_RC_SUCCESS) {
 					/* Registration Failed nak rdma and kill the tx. */
-					vfree(tx->tx_buffer_copy);
+					kgnilnd_vfree(tx->tx_buffer_copy,
+						      desc_nob);
 					tx->tx_buffer_copy = NULL;
 					kgnilnd_nak_rdma(tx->tx_conn, tx->tx_msg.gnm_type, -EFAULT, cookie, tx->tx_msg.gnm_srcnid);
 					kgnilnd_tx_done(tx, -EFAULT);
@@ -1970,8 +1961,10 @@ kgnilnd_rdma(kgn_tx_t *tx, int type,
 	tx->tx_rdma_desc.remote_mem_hndl = sink->gnrd_key;
 	tx->tx_rdma_desc.length = desc_nob;
 	tx->tx_nob_rdma = nob;
-	if (*kgnilnd_tunables.kgn_bte_dlvr_mode)
-		tx->tx_rdma_desc.dlvr_mode = *kgnilnd_tunables.kgn_bte_dlvr_mode;
+	if (post_type == GNI_POST_RDMA_PUT && *kgnilnd_tunables.kgn_bte_put_dlvr_mode)
+		tx->tx_rdma_desc.dlvr_mode = *kgnilnd_tunables.kgn_bte_put_dlvr_mode;
+	if (post_type == GNI_POST_RDMA_GET && *kgnilnd_tunables.kgn_bte_get_dlvr_mode)
+		tx->tx_rdma_desc.dlvr_mode = *kgnilnd_tunables.kgn_bte_get_dlvr_mode;
 	/* prep final completion message */
 	kgnilnd_init_msg(&tx->tx_msg, type, tx->tx_msg.gnm_srcnid);
 	tx->tx_msg.gnm_u.completion.gncm_cookie = cookie;
@@ -2012,7 +2005,7 @@ kgnilnd_rdma(kgn_tx_t *tx, int type,
 		kgnilnd_unmap_buffer(tx, 0);
 
 		if (tx->tx_buffer_copy != NULL) {
-			vfree(tx->tx_buffer_copy);
+			kgnilnd_vfree(tx->tx_buffer_copy, desc_nob);
 			tx->tx_buffer_copy = NULL;
 		}
 
@@ -2077,6 +2070,8 @@ kgnilnd_release_msg(kgn_conn_t *conn)
 	LASSERTF(rrc == GNI_RC_SUCCESS, "bad rrc %d\n", rrc);
 	GNIDBG_SMSG_CREDS(D_NET, conn);
 
+	kgnilnd_schedule_conn(conn);
+
 	return;
 }
 
@@ -2106,11 +2101,11 @@ kgnilnd_consume_rx(kgn_rx_t *rx)
 }
 
 int
-kgnilnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
+kgnilnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
 {
-	lnet_hdr_t       *hdr = &lntmsg->msg_hdr;
+	struct lnet_hdr  *hdr = &lntmsg->msg_hdr;
 	int               type = lntmsg->msg_type;
-	lnet_process_id_t target = lntmsg->msg_target;
+	struct lnet_process_id target = lntmsg->msg_target;
 	int               target_is_router = lntmsg->msg_target_is_router;
 	int               routing = lntmsg->msg_routing;
 	unsigned int      niov = lntmsg->msg_niov;
@@ -2278,7 +2273,7 @@ out:
 }
 
 void
-kgnilnd_setup_rdma(lnet_ni_t *ni, kgn_rx_t *rx, lnet_msg_t *lntmsg, int mlen)
+kgnilnd_setup_rdma(struct lnet_ni *ni, kgn_rx_t *rx, struct lnet_msg *lntmsg, int mlen)
 {
 	kgn_conn_t    *conn = rx->grx_conn;
 	kgn_msg_t     *rxmsg = rx->grx_msg;
@@ -2335,11 +2330,11 @@ kgnilnd_setup_rdma(lnet_ni_t *ni, kgn_rx_t *rx, lnet_msg_t *lntmsg, int mlen)
 	kgnilnd_tx_done(tx, rc);
 	kgnilnd_nak_rdma(conn, done_type, rc, rxmsg->gnm_u.get.gngm_cookie, ni->ni_nid);
  failed_0:
-	lnet_finalize(ni, lntmsg, rc);
+	lnet_finalize(lntmsg, rc);
 }
 
 int
-kgnilnd_eager_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
+kgnilnd_eager_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
 		   void **new_private)
 {
 	kgn_rx_t        *rx = private;
@@ -2430,7 +2425,7 @@ kgnilnd_eager_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
 }
 
 int
-kgnilnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
+kgnilnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
 	     int delayed, unsigned int niov,
 	     struct kvec *iov, lnet_kiov_t *kiov,
 	     unsigned int offset, unsigned int mlen, unsigned int rlen)
@@ -2461,7 +2456,7 @@ kgnilnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
 
 		/* someone closed the conn after we copied this out, nuke it */
 		kgnilnd_consume_rx(rx);
-		lnet_finalize(ni, lntmsg, conn->gnc_error);
+		lnet_finalize(lntmsg, conn->gnc_error);
 		RETURN(0);
 	}
 	read_unlock(&kgnilnd_data.kgn_peer_conn_lock);
@@ -2535,14 +2530,14 @@ kgnilnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
 				&rxmsg[1], 0, mlen);
 
 		kgnilnd_consume_rx(rx);
-		lnet_finalize(ni, lntmsg, 0);
+		lnet_finalize(lntmsg, 0);
 		RETURN(0);
 
 	case GNILND_MSG_PUT_REQ:
 		/* LNET wants to truncate or drop transaction, sending NAK */
 		if (mlen == 0) {
 			kgnilnd_consume_rx(rx);
-			lnet_finalize(ni, lntmsg, 0);
+			lnet_finalize(lntmsg, 0);
 
 			/* only error if lntmsg == NULL, otherwise we are just
 			 * short circuiting the rdma process of 0 bytes */
@@ -2601,7 +2596,7 @@ nak_put_req:
 		/* LNET wants to truncate or drop transaction, sending NAK */
 		if (mlen == 0) {
 			kgnilnd_consume_rx(rx);
-			lnet_finalize(ni, lntmsg, 0);
+			lnet_finalize(lntmsg, 0);
 
 			/* only error if lntmsg == NULL, otherwise we are just
 			 * short circuiting the rdma process of 0 bytes */
@@ -2671,7 +2666,7 @@ nak_get_req_rev:
 		/* LNET wants to truncate or drop transaction, sending NAK */
 		if (mlen == 0) {
 			kgnilnd_consume_rx(rx);
-			lnet_finalize(ni, lntmsg, 0);
+			lnet_finalize(lntmsg, 0);
 
 			/* only error if lntmsg == NULL, otherwise we are just
 			 * short circuiting the rdma process of 0 bytes */
@@ -2746,7 +2741,7 @@ kgnilnd_check_conn_timeouts_locked(kgn_conn_t *conn)
 	if (time_after_eq(now, newest_last_rx + timeout)) {
 		uint32_t level = D_CONSOLE|D_NETERROR;
 
-		if (conn->gnc_peer->gnp_down == GNILND_RCA_NODE_DOWN) {
+		if (conn->gnc_peer->gnp_state == GNILND_PEER_DOWN) {
 			level = D_NET;
 		}
 			GNIDBG_CONN(level, conn,
@@ -2822,6 +2817,14 @@ kgnilnd_check_peer_timeouts_locked(kgn_peer_t *peer, struct list_head *todie,
 				conn->gnc_close_recvd = GNILND_CLOSE_INJECT1;
 				conn->gnc_peer_error = -ETIMEDOUT;
 			}
+
+			if (*kgnilnd_tunables.kgn_to_reconn_disable &&
+			    rc == -ETIMEDOUT) {
+				peer->gnp_state = GNILND_PEER_TIMED_OUT;
+				CDEBUG(D_WARNING, "%s conn timed out, will "
+				       "reconnect upon request from peer\n",
+				       libcfs_nid2str(conn->gnc_peer->gnp_nid));
+			}
 			/* Once we mark closed, any of the scheduler threads could
 			 * get it and move through before we hit the fail loc code */
 			kgnilnd_close_conn_locked(conn, rc);
@@ -2865,7 +2868,7 @@ kgnilnd_check_peer_timeouts_locked(kgn_peer_t *peer, struct list_head *todie,
 	/* Don't reconnect if we are still trying to clear out old conns.
 	 * This prevents us sending traffic on the new mbox before ensuring we are done
 	 * with the old one */
-	reconnect = (peer->gnp_down == GNILND_RCA_NODE_UP) &&
+	reconnect = (peer->gnp_state == GNILND_PEER_UP) &&
 		    (atomic_read(&peer->gnp_dirty_eps) == 0);
 
 	/* fast reconnect after a timeout */
@@ -3129,7 +3132,7 @@ int
 kgnilnd_recv_bte_get(kgn_tx_t *tx) {
 	unsigned niov, offset, nob;
 	lnet_kiov_t	*kiov;
-	lnet_msg_t *lntmsg = tx->tx_lntmsg[0];
+	struct lnet_msg *lntmsg = tx->tx_lntmsg[0];
 	kgnilnd_parse_lnet_rdma(lntmsg, &niov, &offset, &nob, &kiov, tx->tx_nob_rdma);
 
 	if (kiov != NULL) {
@@ -3327,6 +3330,7 @@ kgnilnd_check_fma_send_cq(kgn_device_t *dev)
 	kgn_conn_t            *conn = NULL;
 	int                    queued_fma, saw_reply, rc;
 	long                   num_processed = 0;
+	struct list_head      *ctmp, *ctmpN;
 
 	for (;;) {
 		/* make sure we don't keep looping if we need to reset */
@@ -3349,6 +3353,22 @@ kgnilnd_check_fma_send_cq(kgn_device_t *dev)
 			       "SMSG send CQ %d not ready (data %#llx) "
 			       "processed %ld\n", dev->gnd_id, event_data,
 			       num_processed);
+
+			if (num_processed > 0) {
+				spin_lock(&dev->gnd_lock);
+				if (!list_empty(&dev->gnd_delay_conns)) {
+					list_for_each_safe(ctmp, ctmpN, &dev->gnd_delay_conns) {
+						conn = list_entry(ctmp, kgn_conn_t, gnc_delaylist);
+						list_del_init(&conn->gnc_delaylist);
+						CDEBUG(D_NET, "Moving Conn %p from delay queue to ready_queue\n", conn);
+						kgnilnd_schedule_conn_nolock(conn);
+					}
+					spin_unlock(&dev->gnd_lock);
+					kgnilnd_schedule_device(dev);
+				} else {
+					spin_unlock(&dev->gnd_lock);
+				}
+			}
 			return num_processed;
 		}
 
@@ -4889,6 +4909,12 @@ kgnilnd_process_conns(kgn_device_t *dev, unsigned long deadline)
 
 		conn = list_first_entry(&dev->gnd_ready_conns, kgn_conn_t, gnc_schedlist);
 		list_del_init(&conn->gnc_schedlist);
+		/* 
+		 * Since we are processing conn now, we don't need to be on the delaylist any longer.
+		 */
+
+		if (!list_empty(&conn->gnc_delaylist))
+			list_del_init(&conn->gnc_delaylist);
 		spin_unlock(&dev->gnd_lock);
 
 		conn_sched = xchg(&conn->gnc_scheduled, GNILND_CONN_PROCESS);
@@ -4915,7 +4941,7 @@ kgnilnd_process_conns(kgn_device_t *dev, unsigned long deadline)
 				kgnilnd_conn_decref(conn);
 				up_write(&dev->gnd_conn_sem);
 			} else if (rc != 1) {
-			kgnilnd_conn_decref(conn);
+				kgnilnd_conn_decref(conn);
 			}
 			/* clear this so that scheduler thread doesn't spin */
 			found_work = 0;
@@ -4966,7 +4992,7 @@ kgnilnd_process_conns(kgn_device_t *dev, unsigned long deadline)
 			kgnilnd_conn_decref(conn);
 			up_write(&dev->gnd_conn_sem);
 		} else if (rc != 1) {
-		kgnilnd_conn_decref(conn);
+			kgnilnd_conn_decref(conn);
 		}
 
 		/* check list again with lock held */