void lnet_lib_exit(void);
extern unsigned lnet_transaction_timeout;
+extern unsigned lnet_retry_count;
extern unsigned int lnet_numa_range;
extern unsigned int lnet_health_sensitivity;
extern unsigned int lnet_peer_discovery_disabled;
enum lnet_msg_hstatus msg_health_status;
/* This is a recovery message */
bool msg_recovery;
+ /* the number of times a transmission has been retried */
+ int msg_retry_count;
/* flag to indicate that we do not want to resend this message */
bool msg_no_resend;
MODULE_PARM_DESC(lnet_transaction_timeout,
"Time in seconds to wait for a REPLY or an ACK");
+unsigned lnet_retry_count = 0;
+module_param(lnet_retry_count, uint, 0444);
+MODULE_PARM_DESC(lnet_retry_count,
+ "Maximum number of times to retry transmitting a message");
+
/*
* This sequence number keeps track of how many times DLC was used to
* update the local NIs. It is incremented when a NI is added or
/*
* Do a health check on the message:
- * return -1 if we're not going to handle the error
+ * return -1 if we're not going to handle the error or
+ * if we've reached the maximum number of retries.
* success case will return -1 as well
* return 0 if it the message is requeued for send
*/
if (msg->msg_no_resend)
return -1;
+ /* check if the message has exceeded the number of retries */
+ if (msg->msg_retry_count >= lnet_retry_count)
+ return -1;
+ msg->msg_retry_count++;
+
lnet_net_lock(msg->msg_tx_cpt);
/*