From 305ef66a66a4d45106d2f3c90d8b8398f3ec806f Mon Sep 17 00:00:00 2001 From: Cyril Bordage Date: Wed, 24 Apr 2024 04:21:53 +0200 Subject: [PATCH] LU-14810 lnet: ongoing push when discovery is stopped If a push is not completed when discovery thread is stopped, then we still have ln_dc_handler used as md handler (from lnet_peer_send_push). That leads to assert failure from lnet_assert_handler_unused. To fix that, we call lnet_assert_handler_unused only after the monitor thread has been stopped. Thus, the patch for LU-17496 is not needed anymore. Lustre-change: https://review.whamcloud.com/54884 Lustre-commit: 3ba393a5cb21ff0f8bd8a09c341ee01e936321c7 Fixes: 36b14a23a6 ("LU-17207 lnet: race b/w monitor thr stop and discovery push") Test-Parameters: testlist=sanity-lnet env=ONLY="212 220",ONLY_REPEAT=100 Signed-off-by: Cyril Bordage Change-Id: I426c37b12a3d29327a7295f528a5b875a9ac88a0 Reviewed-by: Shaun Tancheff Reviewed-by: Frank Sehr Reviewed-by: Serguei Smirnov Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/55167 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lnet/lnet/api-ni.c | 2 ++ lnet/lnet/lib-md.c | 3 ++- lnet/lnet/peer.c | 1 - 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 1346daf..645a7e1 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -2800,6 +2800,7 @@ LNetNIFini(void) if (the_lnet.ln_refcount != 1) { the_lnet.ln_refcount--; } else { + lnet_handler_t dc_handler = the_lnet.ln_dc_handler; LASSERT(!the_lnet.ln_niinit_self); lnet_net_lock(LNET_LOCK_EX); @@ -2811,6 +2812,7 @@ LNetNIFini(void) lnet_router_debugfs_fini(); lnet_peer_discovery_stop(); lnet_monitor_thr_stop(); + lnet_assert_handler_unused(dc_handler); lnet_push_target_fini(); lnet_ping_target_fini(); diff --git a/lnet/lnet/lib-md.c b/lnet/lnet/lib-md.c index cc2252c..1f6c6e4 100644 --- a/lnet/lnet/lib-md.c +++ b/lnet/lnet/lib-md.c @@ -306,8 +306,9 @@ void lnet_assert_handler_unused(lnet_handler_t handler) struct lnet_libmd *md; lnet_res_lock(cpt); - list_for_each_entry(md, &container->rec_active, md_list) + list_for_each_entry(md, &container->rec_active, md_list) { LASSERT(md->md_handler != handler); + } lnet_res_unlock(cpt); } } diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 55a7249..d545b4c 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -3918,7 +3918,6 @@ static int lnet_peer_discovery(void *arg) } lnet_net_unlock(LNET_LOCK_EX); - lnet_assert_handler_unused(the_lnet.ln_dc_handler); the_lnet.ln_dc_handler = NULL; the_lnet.ln_dc_state = LNET_DC_STATE_SHUTDOWN; -- 1.8.3.1