From 58a6e113eb6fc06b6b0f9b46800828a690e43d26 Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Wed, 13 Oct 2021 18:30:01 -0500 Subject: [PATCH] LU-15102 lnet: Reset ni_ping_count only on receive The lnet_ni:ni_ping_count is currently reset on every (healthy) tx. We should only reset it when receiving a message over the NI. Taking net_lock 0 on every tx results in a performance loss for certain workloads. Lustre-change: https://review.whamcloud.com/45235 Lustre-commit: 9cc0a5ff5fc8f45aa60cd0407ae9893d5c116ccd Test-Parameters: trivial testlist=sanity-lnet Fixes: 8fdf2bc62a ("LU-13569 lnet: Recover local NI w/exponential backoff interval") HPE-bug-id: LUS-10427 Signed-off-by: Chris Horn Change-Id: I67ea3aa977cb5d67b04f7957120c29e9985c83e6 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/54407 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Frank Sehr Reviewed-by: Andreas Dilger --- lnet/lnet/lib-msg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lnet/lnet/lib-msg.c b/lnet/lnet/lib-msg.c index f040e37..c008456 100644 --- a/lnet/lnet/lib-msg.c +++ b/lnet/lnet/lib-msg.c @@ -896,8 +896,6 @@ lnet_health_check(struct lnet_msg *msg) * faster recovery. */ lnet_inc_healthv(&ni->ni_healthv, lnet_health_sensitivity); - lnet_net_lock(0); - ni->ni_ping_count = 0; /* * It's possible msg_txpeer is NULL in the LOLND * case. Only increment the peer's health if we're @@ -907,7 +905,9 @@ lnet_health_check(struct lnet_msg *msg) * as indication that the router is fully healthy. */ if (lpni && msg->msg_rx_committed) { + lnet_net_lock(0); lpni->lpni_ping_count = 0; + ni->ni_ping_count = 0; /* * If we're receiving a message from the router or * I'm a router, then set that lpni's health to @@ -933,8 +933,8 @@ lnet_health_check(struct lnet_msg *msg) &the_lnet.ln_mt_peerNIRecovq, ktime_get_seconds()); } + lnet_net_unlock(0); } - lnet_net_unlock(0); /* we can finalize this message */ return -1; -- 1.8.3.1