From 137e5a197a592c360a6b49c9829e26045664bf69 Mon Sep 17 00:00:00 2001 From: Bruno Faccini Date: Fri, 23 Feb 2024 13:16:36 +0100 Subject: [PATCH] LU-17578 lnet: fix &the_lnet.ln_mt_peerNIRecovq race To avoid race &the_lnet.ln_mt_peerNIRecovq must always be accessed with lnet_net_lock(0) protection. Lustre-change: https://review.whamcloud.com/54163 Lustre-commit: 0a0e881d8884a220c485c0384351da12dc8aed9f Test-Parameters: trivial Fixes: da23037 ("LU-16563 lnet: use discovered ni status to set initial health") Change-Id: Ic5e0194020200afdecba4cbf5afed274b14da388 Signed-off-by: Bruno Faccini Reviewed-by: Chris Horn Reviewed-by: Frank Sehr Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/54382 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lnet/lnet/peer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index b1283e9..b4069c7 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -2926,9 +2926,11 @@ static void lnet_discovery_event_handler(struct lnet_event *event) static inline void handle_disc_lpni_health(struct lnet_peer_ni *lpni) { - if (lpni->lpni_ns_status == LNET_NI_STATUS_DOWN) + if (lpni->lpni_ns_status == LNET_NI_STATUS_DOWN) { + lnet_net_lock(0); lnet_handle_remote_failure_locked(lpni); - else if (lpni->lpni_ns_status == LNET_NI_STATUS_UP && + lnet_net_unlock(0); + } else if (lpni->lpni_ns_status == LNET_NI_STATUS_UP && !lpni->lpni_last_alive) atomic_set(&lpni->lpni_healthv, LNET_MAX_HEALTH_VALUE); } -- 1.8.3.1