Whamcloud - gitweb
LU-17207 lnet: race b/w monitor thr stop and discovery push
authorSerguei Smirnov <ssmirnov@whamcloud.com>
Tue, 17 Oct 2023 18:43:14 +0000 (11:43 -0700)
committerAndreas Dilger <adilger@whamcloud.com>
Thu, 9 Nov 2023 08:39:23 +0000 (08:39 +0000)
As a result of race, discovery thread may attempt to dereference
a message on ln_mt_resendqs which was just freed by monitor thread
stopping. Make sure discovery thread is stopped first.

Lustre-change: https://review.whamcloud.com/52734/
Lustre-commit: TBD (from 5c6ca4991382a805da6e824c1dbfab931987dda6)

Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Change-Id: I0dfcf3bc5bb3c8df195388599f571bdd3caaa3d7
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/52935
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lnet/lnet/api-ni.c

index b805f4e..dc84c67 100644 (file)
@@ -2753,13 +2753,13 @@ LNetNIInit(lnet_pid_t requested_pid)
        if (rc != 0)
                goto err_stop_ping;
 
-       rc = lnet_peer_discovery_start();
+       rc = lnet_monitor_thr_start();
        if (rc != 0)
                goto err_destroy_push_target;
 
-       rc = lnet_monitor_thr_start();
+       rc = lnet_peer_discovery_start();
        if (rc != 0)
-               goto err_stop_discovery_thr;
+               goto err_stop_monitor_thr;
 
        lnet_fault_init();
        lnet_router_debugfs_init();
@@ -2773,8 +2773,8 @@ LNetNIInit(lnet_pid_t requested_pid)
 
        return 0;
 
-err_stop_discovery_thr:
-       lnet_peer_discovery_stop();
+err_stop_monitor_thr:
+       lnet_monitor_thr_stop();
 err_destroy_push_target:
        lnet_push_target_fini();
 err_stop_ping:
@@ -2830,8 +2830,8 @@ LNetNIFini(void)
                lnet_fault_fini();
 
                lnet_router_debugfs_fini();
-               lnet_monitor_thr_stop();
                lnet_peer_discovery_stop();
+               lnet_monitor_thr_stop();
                lnet_push_target_fini();
                lnet_ping_target_fini();