From d00babe126acc146eeeaa55b99e50bf8408ef208 Mon Sep 17 00:00:00 2001 From: Serguei Smirnov Date: Tue, 17 Oct 2023 11:43:14 -0700 Subject: [PATCH] LU-17207 lnet: race b/w monitor thr stop and discovery push As a result of race, discovery thread may attempt to dereference a message on ln_mt_resendqs which was just freed by monitor thread stopping. Make sure discovery thread is stopped first. Lustre-change: https://review.whamcloud.com/52734/ Lustre-commit: TBD (from 5c6ca4991382a805da6e824c1dbfab931987dda6) Signed-off-by: Serguei Smirnov Change-Id: I0dfcf3bc5bb3c8df195388599f571bdd3caaa3d7 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/52935 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lnet/lnet/api-ni.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index b805f4ef..dc84c67 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -2753,13 +2753,13 @@ LNetNIInit(lnet_pid_t requested_pid) if (rc != 0) goto err_stop_ping; - rc = lnet_peer_discovery_start(); + rc = lnet_monitor_thr_start(); if (rc != 0) goto err_destroy_push_target; - rc = lnet_monitor_thr_start(); + rc = lnet_peer_discovery_start(); if (rc != 0) - goto err_stop_discovery_thr; + goto err_stop_monitor_thr; lnet_fault_init(); lnet_router_debugfs_init(); @@ -2773,8 +2773,8 @@ LNetNIInit(lnet_pid_t requested_pid) return 0; -err_stop_discovery_thr: - lnet_peer_discovery_stop(); +err_stop_monitor_thr: + lnet_monitor_thr_stop(); err_destroy_push_target: lnet_push_target_fini(); err_stop_ping: @@ -2830,8 +2830,8 @@ LNetNIFini(void) lnet_fault_fini(); lnet_router_debugfs_fini(); - lnet_monitor_thr_stop(); lnet_peer_discovery_stop(); + lnet_monitor_thr_stop(); lnet_push_target_fini(); lnet_ping_target_fini(); -- 1.8.3.1