From ca6c35cab141597809c6f3a58102fac8ac86104a Mon Sep 17 00:00:00 2001 From: Mr NeilBrown Date: Sat, 18 Jan 2020 09:39:47 -0500 Subject: [PATCH] LU-10467 lustre: convert users of back_to_sleep() When back_to_sleep() is passed to l_wait_event as the on_timeout hander, the effect is to potentially wait twice. The first wait ignores all signals and has a timeout. If the timeout fires without the event occuring, the l_wait_event() goes "back to sleep" indefinitely, but this time with fatal signals unblocked. This pattern can be made more clear with two separate wait calls: wait_event_idle_timeout() followed by l_wait_event_abortable(). Change-Id: I3536e33b4d982f37c960f31df1ea0d9808f9ced7 Signed-off-by: Mr NeilBrown Reviewed-on: https://review.whamcloud.com/35980 Tested-by: jenkins Reviewed-by: James Simmons Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Petros Koutoupis Reviewed-by: Oleg Drokin --- lustre/osp/osp_precreate.c | 10 ++++++---- lustre/ptlrpc/import.c | 16 ++++++++-------- lustre/ptlrpc/ptlrpcd.c | 9 +++++---- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/lustre/osp/osp_precreate.c b/lustre/osp/osp_precreate.c index 6e55a23..23b7713 100644 --- a/lustre/osp/osp_precreate.c +++ b/lustre/osp/osp_precreate.c @@ -1200,8 +1200,6 @@ static int osp_precreate_thread(void *_arg) { struct osp_device *d = _arg; struct ptlrpc_thread *thread = &d->opd_pre_thread; - struct l_wait_info lwi2 = LWI_TIMEOUT(cfs_time_seconds(5), - back_to_sleep, NULL); struct lu_env env; int rc; @@ -1267,8 +1265,12 @@ static int osp_precreate_thread(void *_arg) } if (osp_statfs_update(&env, d)) { - l_wait_event(d->opd_pre_waitq, - !osp_precreate_running(d), &lwi2); + if (wait_event_idle_timeout(d->opd_pre_waitq, + !osp_precreate_running(d), + cfs_time_seconds(5)) == 0) + l_wait_event_abortable( + d->opd_pre_waitq, + !osp_precreate_running(d)); continue; } diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index e6063e4..a21549e 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -1693,7 +1693,6 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose) spin_unlock(&imp->imp_lock); if (ptlrpc_import_in_recovery(imp)) { - struct l_wait_info lwi; long timeout_jiffies; time64_t timeout; @@ -1709,15 +1708,16 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose) req_portal = imp->imp_client->cli_request_portal; idx = import_at_get_index(imp, req_portal); timeout = at_get(&imp->imp_at.iat_service_estimate[idx]); - } + } timeout_jiffies = cfs_time_seconds(timeout); - lwi = LWI_TIMEOUT_INTR(max_t(long, timeout_jiffies, 1), - back_to_sleep, LWI_ON_SIGNAL_NOOP, NULL); - rc = l_wait_event(imp->imp_recovery_waitq, - !ptlrpc_import_in_recovery(imp), &lwi); - - } + if (wait_event_idle_timeout(imp->imp_recovery_waitq, + !ptlrpc_import_in_recovery(imp), + timeout_jiffies) == 0 && + l_wait_event_abortable(imp->imp_recovery_waitq, + !ptlrpc_import_in_recovery(imp)) < 0) + rc = -EINTR; + } spin_lock(&imp->imp_lock); if (imp->imp_state != LUSTRE_IMP_FULL) diff --git a/lustre/ptlrpc/ptlrpcd.c b/lustre/ptlrpc/ptlrpcd.c index 5b5ef37..9810009 100644 --- a/lustre/ptlrpc/ptlrpcd.c +++ b/lustre/ptlrpc/ptlrpcd.c @@ -274,12 +274,13 @@ void ptlrpcd_add_req(struct ptlrpc_request *req) spin_lock(&req->rq_lock); if (req->rq_invalid_rqset) { - struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(5), - back_to_sleep, NULL); - req->rq_invalid_rqset = 0; spin_unlock(&req->rq_lock); - l_wait_event(req->rq_set_waitq, (req->rq_set == NULL), &lwi); + if (wait_event_idle_timeout(req->rq_set_waitq, + req->rq_set == NULL, + cfs_time_seconds(5)) == 0) + l_wait_event_abortable(req->rq_set_waitq, + req->rq_set == NULL); } else if (req->rq_set) { /* * If we have a vaid "rq_set", just reuse it to avoid double -- 1.8.3.1