From 877d95b582db3d182d13dac4947c1f43b0e851dc Mon Sep 17 00:00:00 2001 From: Serguei Smirnov Date: Wed, 7 Oct 2020 18:51:06 -0400 Subject: [PATCH] LU-13892 lnet: lock-up during router check This is a fix for the issue with LNet lock-up while waiting for routers to become active with check_routers_before_use option. Release ln_api_mutex while waiting to allow incoming connections to be handled. Signed-off-by: Serguei Smirnov Change-Id: I63b1d1ce5ee2b27a3bd2cea78713fc6fc7502cf7 Reviewed-on: https://review.whamcloud.com/40172 Tested-by: jenkins Reviewed-by: Olaf Faaland-LLNL Tested-by: Maloo Reviewed-by: Amir Shehata Reviewed-by: Oleg Drokin --- lnet/lnet/router.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index 806cf8c..e2966cf 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -842,6 +842,7 @@ lnet_wait_known_routerstate(void) LASSERT(the_lnet.ln_mt_state == LNET_MT_STATE_RUNNING); + /* the_lnet.ln_api_mutex must be locked */ for (;;) { int cpt = lnet_net_lock_current(); @@ -865,8 +866,10 @@ lnet_wait_known_routerstate(void) if (all_known) return; + mutex_unlock(&the_lnet.ln_api_mutex); set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(cfs_time_seconds(1)); + mutex_lock(&the_lnet.ln_api_mutex); } } -- 1.8.3.1