From 01a70a56540f095b3dc30656b7135636b4b3abef Mon Sep 17 00:00:00 2001 From: Alexander Boyko Date: Mon, 2 Nov 2020 06:02:47 -0500 Subject: [PATCH] LU-12956 ldlm: fix hrtimer using A race could happen between hrtimer_start() and hrtimer_expires_remaning(), cause the second one doesn't hold a lock on timer->base. And a first one could change it between different CPU. The following failure happened: BUG: unable to handle kernel NULL pointer dereference at 000000000028 IP: [] target_handle_connect+0x12ff/0x2b50 [ptlrpc] at remaining = hrtimer_expires_remaining(timer), timer->base was NULL The fix changes hrtimer_expires_remaining() to hrtimer_get_remaining() which helds a lock and prevents race. Fixes: 9334f1d51249 ("LU-11771 ldlm: use hrtimer for recovery to fix timeout messages") HPE-bug-id: LUS-9514 Signed-off-by: Alexander Boyko Change-Id: I2cea1e5e2d523f131f1acb3346cf0324adae624e Reviewed-on: https://review.whamcloud.com/40513 Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Andrew Perepechko Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- lustre/ldlm/ldlm_lib.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 927a424..baa75f5 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -840,7 +840,7 @@ static inline int target_check_recovery_timer(struct obd_device *target) if (!target->obd_recovering || target->obd_recovery_start == 0) return 0; - remaining = hrtimer_expires_remaining(&target->obd_recovery_timer); + remaining = hrtimer_get_remaining(&target->obd_recovery_timer); timeout = ktime_divns(remaining, NSEC_PER_SEC); if (timeout > -30) return 0; @@ -908,7 +908,7 @@ static int target_handle_reconnect(struct lustre_handle *conn, GOTO(out_already, rc); } - remaining = hrtimer_expires_remaining(&target->obd_recovery_timer); + remaining = hrtimer_get_remaining(&target->obd_recovery_timer); timeout = ktime_divns(remaining, NSEC_PER_SEC); if (timeout > 0) { LCONSOLE_WARN("%s: Client %s (at %s) reconnected, waiting for %d clients in recovery for %lld:%.02lld\n", @@ -1402,7 +1402,7 @@ no_export: known = atomic_read(&target->obd_max_recoverable_clients); stale = target->obd_stale_clients; - remaining = hrtimer_expires_remaining(timer); + remaining = hrtimer_get_remaining(timer); left = ktime_divns(remaining, NSEC_PER_SEC); if (ktime_to_ns(remaining) > 0) { @@ -1920,7 +1920,7 @@ static void extend_recovery_timer(struct obd_device *obd, timeout_t dr_timeout, } LASSERT(obd->obd_recovery_start != 0); - left_ns = hrtimer_expires_remaining(&obd->obd_recovery_timer); + left_ns = hrtimer_get_remaining(&obd->obd_recovery_timer); left = ktime_divns(left_ns, NSEC_PER_SEC); if (extend) { -- 1.8.3.1