From 36b14a23a6e8240045074b097adfe01cb529d4a3 Mon Sep 17 00:00:00 2001 From: Serguei Smirnov Date: Tue, 17 Oct 2023 11:43:14 -0700 Subject: [PATCH] LU-17207 lnet: race b/w monitor thr stop and discovery push As a result of race, discovery thread may attempt to dereference a message on ln_mt_resendqs which was just freed by monitor thread stopping. Make sure discovery thread is stopped first. Signed-off-by: Serguei Smirnov Change-Id: I0dfcf3bc5bb3c8df195388599f571bdd3caaa3d7 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/52734 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin Reviewed-by: James Simmons Reviewed-by: Chris Horn Reviewed-by: Frank Sehr Reviewed-by: Cyril Bordage --- lnet/lnet/api-ni.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 3867c8a..1ab0ad1 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -3105,13 +3105,13 @@ LNetNIInit(lnet_pid_t requested_pid) if (rc != 0) goto err_stop_ping; - rc = lnet_peer_discovery_start(); + rc = lnet_monitor_thr_start(); if (rc != 0) goto err_destroy_push_target; - rc = lnet_monitor_thr_start(); + rc = lnet_peer_discovery_start(); if (rc != 0) - goto err_stop_discovery_thr; + goto err_stop_monitor_thr; lnet_fault_init(); lnet_router_debugfs_init(); @@ -3125,8 +3125,8 @@ LNetNIInit(lnet_pid_t requested_pid) return 0; -err_stop_discovery_thr: - lnet_peer_discovery_stop(); +err_stop_monitor_thr: + lnet_monitor_thr_stop(); err_destroy_push_target: lnet_push_target_fini(); err_stop_ping: @@ -3181,8 +3181,8 @@ LNetNIFini(void) lnet_fault_fini(); lnet_router_debugfs_fini(); - lnet_monitor_thr_stop(); lnet_peer_discovery_stop(); + lnet_monitor_thr_stop(); lnet_push_target_fini(); lnet_ping_target_fini(); -- 1.8.3.1