In shutdown the resend queues are cleared and freed. The monitor
thread state is set to shutdown. It is possible to get lnet_finalize()
called after the queues are freed. The code checks for ln_state to see
if we're shutting down. But in this case we should really be checking
ln_mt_state. The monitor thread is the one that matters in this case,
because it's the one which allocates and frees the resend queues.
Test-Parameters: forbuildonly
Signed-off-by: Amir Shehata <ashehata@whamcloud.com>
Change-Id: Ia077cec7a52ef5cd2e1b231437c6265ba9416b1b
Reviewed-on: https://review.whamcloud.com/34778
Reviewed-by: Olaf Weber <olaf.weber@hpe.com>
Reviewed-by: Sebastien Buisson <sbuisson@ddn.com>
Reviewed-by: Chris Horn <hornc@cray.com>
Tested-by: Jenkins
lnet_prune_rc_data(1);
/* Shutting down */
lnet_prune_rc_data(1);
/* Shutting down */
+ lnet_net_lock(LNET_LOCK_EX);
the_lnet.ln_mt_state = LNET_MT_STATE_SHUTDOWN;
the_lnet.ln_mt_state = LNET_MT_STATE_SHUTDOWN;
+ lnet_net_unlock(LNET_LOCK_EX);
/* signal that the monitor thread is exiting */
up(&the_lnet.ln_mt_signal);
/* signal that the monitor thread is exiting */
up(&the_lnet.ln_mt_signal);
sema_init(&the_lnet.ln_mt_signal, 0);
sema_init(&the_lnet.ln_mt_signal, 0);
+ lnet_net_lock(LNET_LOCK_EX);
the_lnet.ln_mt_state = LNET_MT_STATE_RUNNING;
the_lnet.ln_mt_state = LNET_MT_STATE_RUNNING;
+ lnet_net_unlock(LNET_LOCK_EX);
task = kthread_run(lnet_monitor_thread, NULL, "monitor_thread");
if (IS_ERR(task)) {
rc = PTR_ERR(task);
task = kthread_run(lnet_monitor_thread, NULL, "monitor_thread");
if (IS_ERR(task)) {
rc = PTR_ERR(task);
+ lnet_net_lock(LNET_LOCK_EX);
the_lnet.ln_mt_state = LNET_MT_STATE_STOPPING;
the_lnet.ln_mt_state = LNET_MT_STATE_STOPPING;
+ lnet_net_unlock(LNET_LOCK_EX);
/* block until event callback signals exit */
down(&the_lnet.ln_mt_signal);
/* clean up */
lnet_router_cleanup();
free_mem:
/* block until event callback signals exit */
down(&the_lnet.ln_mt_signal);
/* clean up */
lnet_router_cleanup();
free_mem:
+ lnet_net_lock(LNET_LOCK_EX);
the_lnet.ln_mt_state = LNET_MT_STATE_SHUTDOWN;
the_lnet.ln_mt_state = LNET_MT_STATE_SHUTDOWN;
+ lnet_net_unlock(LNET_LOCK_EX);
lnet_rsp_tracker_clean();
lnet_clean_local_ni_recoveryq();
lnet_clean_peer_ni_recoveryq();
lnet_rsp_tracker_clean();
lnet_clean_local_ni_recoveryq();
lnet_clean_peer_ni_recoveryq();
return;
LASSERT(the_lnet.ln_mt_state == LNET_MT_STATE_RUNNING);
return;
LASSERT(the_lnet.ln_mt_state == LNET_MT_STATE_RUNNING);
+ lnet_net_lock(LNET_LOCK_EX);
the_lnet.ln_mt_state = LNET_MT_STATE_STOPPING;
the_lnet.ln_mt_state = LNET_MT_STATE_STOPPING;
+ lnet_net_unlock(LNET_LOCK_EX);
/* tell the monitor thread that we're shutting down */
wake_up(&the_lnet.ln_mt_waitq);
/* tell the monitor thread that we're shutting down */
wake_up(&the_lnet.ln_mt_waitq);
bool lo = false;
/* if we're shutting down no point in handling health. */
bool lo = false;
/* if we're shutting down no point in handling health. */
- if (the_lnet.ln_state != LNET_STATE_RUNNING)
+ if (the_lnet.ln_mt_state != LNET_MT_STATE_RUNNING)
return -1;
LASSERT(msg->msg_txni);
return -1;
LASSERT(msg->msg_txni);
lnet_net_lock(msg->msg_tx_cpt);
lnet_net_lock(msg->msg_tx_cpt);
+ /* check again under lock */
+ if (the_lnet.ln_mt_state != LNET_MT_STATE_RUNNING) {
+ lnet_net_unlock(msg->msg_tx_cpt);
+ return -1;
+ }
+
/*
* remove message from the active list and reset it in preparation
* for a resend. Two exception to this
/*
* remove message from the active list and reset it in preparation
* for a resend. Two exception to this