MODULE_PARM_DESC(lnet_transaction_timeout,
"Time in seconds to wait for a REPLY or an ACK");
+unsigned lnet_retry_count = 0;
+module_param(lnet_retry_count, uint, 0444);
+MODULE_PARM_DESC(lnet_retry_count,
+ "Maximum number of times to retry transmitting a message");
+
+unsigned lnet_lnd_timeout = LNET_LND_DEFAULT_TIMEOUT;
+
/*
* This sequence number keeps track of how many times DLC was used to
* update the local NIs. It is incremented when a NI is added or
spin_lock_init(&the_lnet.ln_eq_wait_lock);
spin_lock_init(&the_lnet.ln_msg_resend_lock);
init_waitqueue_head(&the_lnet.ln_eq_waitq);
- init_waitqueue_head(&the_lnet.ln_rc_waitq);
+ init_waitqueue_head(&the_lnet.ln_mt_waitq);
mutex_init(&the_lnet.ln_lnd_mutex);
}
return NULL;
}
+unsigned int
+lnet_get_lnd_timeout(void)
+{
+ return lnet_lnd_timeout;
+}
+EXPORT_SYMBOL(lnet_get_lnd_timeout);
+
void
lnet_register_lnd(struct lnet_lnd *lnd)
{
INIT_LIST_HEAD(&the_lnet.ln_dc_request);
INIT_LIST_HEAD(&the_lnet.ln_dc_working);
INIT_LIST_HEAD(&the_lnet.ln_dc_expired);
+ INIT_LIST_HEAD(&the_lnet.ln_mt_localNIRecovq);
+ INIT_LIST_HEAD(&the_lnet.ln_mt_peerNIRecovq);
init_waitqueue_head(&the_lnet.ln_dc_waitq);
rc = lnet_descriptor_setup();
bool
lnet_is_ni_healthy_locked(struct lnet_ni *ni)
{
- if (ni->ni_state == LNET_NI_STATE_ACTIVE ||
- ni->ni_state == LNET_NI_STATE_DEGRADED)
+ if (ni->ni_state & LNET_NI_STATE_ACTIVE)
return true;
return false;
list_del_init(&ni->ni_netlist);
/* the ni should be in deleting state. If it's not it's
* a bug */
- LASSERT(ni->ni_state == LNET_NI_STATE_DELETING);
+ LASSERT(ni->ni_state & LNET_NI_STATE_DELETING);
cfs_percpt_for_each(ref, j, ni->ni_refs) {
if (*ref == 0)
continue;
struct lnet_net *net = ni->ni_net;
lnet_net_lock(LNET_LOCK_EX);
- ni->ni_state = LNET_NI_STATE_DELETING;
+ lnet_ni_lock(ni);
+ ni->ni_state |= LNET_NI_STATE_DELETING;
+ ni->ni_state &= ~LNET_NI_STATE_ACTIVE;
+ lnet_ni_unlock(ni);
lnet_ni_unlink_locked(ni);
lnet_incr_dlc_seq();
lnet_net_unlock(LNET_LOCK_EX);
list_for_each_entry_safe(msg, tmp, &resend, msg_list) {
list_del_init(&msg->msg_list);
+ msg->msg_no_resend = true;
lnet_finalize(msg, -ECANCELED);
}
goto failed0;
}
- ni->ni_state = LNET_NI_STATE_ACTIVE;
+ lnet_ni_lock(ni);
+ ni->ni_state |= LNET_NI_STATE_ACTIVE;
+ ni->ni_state &= ~LNET_NI_STATE_INIT;
+ lnet_ni_unlock(ni);
/* We keep a reference on the loopback net through the loopback NI */
if (net->net_lnd->lnd_type == LOLND) {
lnet_ping_target_update(pbuf, ping_mdh);
- rc = lnet_router_checker_start();
+ rc = lnet_monitor_thr_start();
if (rc != 0)
goto err_stop_ping;
rc = lnet_push_target_init();
if (rc != 0)
- goto err_stop_router_checker;
+ goto err_stop_monitor_thr;
rc = lnet_peer_discovery_start();
if (rc != 0)
err_destroy_push_target:
lnet_push_target_fini();
-err_stop_router_checker:
- lnet_router_checker_stop();
+err_stop_monitor_thr:
+ lnet_monitor_thr_stop();
err_stop_ping:
lnet_ping_target_fini();
err_acceptor_stop:
lnet_router_debugfs_init();
lnet_peer_discovery_stop();
lnet_push_target_fini();
- lnet_router_checker_stop();
+ lnet_monitor_thr_stop();
lnet_ping_target_fini();
/* Teardown fns that use my own API functions BEFORE here */
struct lnet_ni *ni;
struct lnet_net *net = mynet;
+ /*
+ * It is possible that the net has been cleaned out while there is
+ * a message being sent. This function accessed the net without
+ * checking if the list is empty
+ */
if (prev == NULL) {
if (net == NULL)
net = list_entry(the_lnet.ln_nets.next, struct lnet_net,
net_list);
+ if (list_empty(&net->net_ni_list))
+ return NULL;
ni = list_entry(net->net_ni_list.next, struct lnet_ni,
ni_netlist);
/* get the next net */
net = list_entry(prev->ni_net->net_list.next, struct lnet_net,
net_list);
+ if (list_empty(&net->net_ni_list))
+ return NULL;
/* get the ni on it */
ni = list_entry(net->net_ni_list.next, struct lnet_ni,
ni_netlist);
return ni;
}
+ if (list_empty(&prev->ni_netlist))
+ return NULL;
+
/* there are more nis left */
ni = list_entry(prev->ni_netlist.next, struct lnet_ni, ni_netlist);
rc = LNetGet(LNET_NID_ANY, mdh, id,
LNET_RESERVED_PORTAL,
- LNET_PROTO_PING_MATCHBITS, 0);
+ LNET_PROTO_PING_MATCHBITS, 0, false);
if (rc != 0) {
/* Don't CERROR; this could be deliberate! */