From: Serguei Smirnov Date: Tue, 17 Oct 2023 18:43:14 +0000 (-0700) Subject: LU-17207 lnet: race b/w monitor thr stop and discovery push X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=d00babe126acc146eeeaa55b99e50bf8408ef208;p=fs%2Flustre-release.git LU-17207 lnet: race b/w monitor thr stop and discovery push As a result of race, discovery thread may attempt to dereference a message on ln_mt_resendqs which was just freed by monitor thread stopping. Make sure discovery thread is stopped first. Lustre-change: https://review.whamcloud.com/52734/ Lustre-commit: TBD (from 5c6ca4991382a805da6e824c1dbfab931987dda6) Signed-off-by: Serguei Smirnov Change-Id: I0dfcf3bc5bb3c8df195388599f571bdd3caaa3d7 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/52935 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index b805f4ef..dc84c67 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -2753,13 +2753,13 @@ LNetNIInit(lnet_pid_t requested_pid) if (rc != 0) goto err_stop_ping; - rc = lnet_peer_discovery_start(); + rc = lnet_monitor_thr_start(); if (rc != 0) goto err_destroy_push_target; - rc = lnet_monitor_thr_start(); + rc = lnet_peer_discovery_start(); if (rc != 0) - goto err_stop_discovery_thr; + goto err_stop_monitor_thr; lnet_fault_init(); lnet_router_debugfs_init(); @@ -2773,8 +2773,8 @@ LNetNIInit(lnet_pid_t requested_pid) return 0; -err_stop_discovery_thr: - lnet_peer_discovery_stop(); +err_stop_monitor_thr: + lnet_monitor_thr_stop(); err_destroy_push_target: lnet_push_target_fini(); err_stop_ping: @@ -2830,8 +2830,8 @@ LNetNIFini(void) lnet_fault_fini(); lnet_router_debugfs_fini(); - lnet_monitor_thr_stop(); lnet_peer_discovery_stop(); + lnet_monitor_thr_stop(); lnet_push_target_fini(); lnet_ping_target_fini();