To avoid race &the_lnet.ln_mt_peerNIRecovq must always be
accessed with lnet_net_lock(0) protection.
Lustre-change: https://review.whamcloud.com/54163
Lustre-commit:
0a0e881d8884a220c485c0384351da12dc8aed9f
Test-Parameters: trivial
Fixes: da23037 ("LU-16563 lnet: use discovered ni status to set initial health")
Change-Id: Ic5e0194020200afdecba4cbf5afed274b14da388
Signed-off-by: Bruno Faccini <bfaccini@nvidia.com>
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/54382
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
static inline void handle_disc_lpni_health(struct lnet_peer_ni *lpni)
{
- if (lpni->lpni_ns_status == LNET_NI_STATUS_DOWN)
+ if (lpni->lpni_ns_status == LNET_NI_STATUS_DOWN) {
+ lnet_net_lock(0);
lnet_handle_remote_failure_locked(lpni);
- else if (lpni->lpni_ns_status == LNET_NI_STATUS_UP &&
+ lnet_net_unlock(0);
+ } else if (lpni->lpni_ns_status == LNET_NI_STATUS_UP &&
!lpni->lpni_last_alive)
atomic_set(&lpni->lpni_healthv, LNET_MAX_HEALTH_VALUE);
}