Whamcloud - gitweb
LU-10467 lustre: convert users of back_to_sleep() 80/35980/11
authorMr NeilBrown <neilb@suse.com>
Sat, 18 Jan 2020 14:39:47 +0000 (09:39 -0500)
committerOleg Drokin <green@whamcloud.com>
Sat, 8 Feb 2020 03:59:57 +0000 (03:59 +0000)
When back_to_sleep() is passed to l_wait_event as
the on_timeout hander, the effect is to potentially wait twice.
The first wait ignores all signals and has a timeout.
If the timeout fires without the event occuring, the l_wait_event()
goes "back to sleep" indefinitely, but this time with fatal
signals unblocked.

This pattern can be made more clear with two separate wait calls:
  wait_event_idle_timeout() followed by l_wait_event_abortable().

Change-Id: I3536e33b4d982f37c960f31df1ea0d9808f9ced7
Signed-off-by: Mr NeilBrown <neilb@suse.com>
Reviewed-on: https://review.whamcloud.com/35980
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Petros Koutoupis <pkoutoupis@cray.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/osp/osp_precreate.c
lustre/ptlrpc/import.c
lustre/ptlrpc/ptlrpcd.c

index 6e55a23..23b7713 100644 (file)
@@ -1200,8 +1200,6 @@ static int osp_precreate_thread(void *_arg)
 {
        struct osp_device       *d = _arg;
        struct ptlrpc_thread    *thread = &d->opd_pre_thread;
-       struct l_wait_info       lwi2 = LWI_TIMEOUT(cfs_time_seconds(5),
-                                                   back_to_sleep, NULL);
        struct lu_env            env;
        int                      rc;
 
@@ -1267,8 +1265,12 @@ static int osp_precreate_thread(void *_arg)
                }
 
                if (osp_statfs_update(&env, d)) {
-                       l_wait_event(d->opd_pre_waitq,
-                                    !osp_precreate_running(d), &lwi2);
+                       if (wait_event_idle_timeout(d->opd_pre_waitq,
+                                                   !osp_precreate_running(d),
+                                                   cfs_time_seconds(5)) == 0)
+                               l_wait_event_abortable(
+                                       d->opd_pre_waitq,
+                                       !osp_precreate_running(d));
                        continue;
                }
 
index e6063e4..a21549e 100644 (file)
@@ -1693,7 +1693,6 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
        spin_unlock(&imp->imp_lock);
 
        if (ptlrpc_import_in_recovery(imp)) {
-               struct l_wait_info lwi;
                long timeout_jiffies;
                time64_t timeout;
 
@@ -1709,15 +1708,16 @@ int ptlrpc_disconnect_import(struct obd_import *imp, int noclose)
                        req_portal = imp->imp_client->cli_request_portal;
                        idx = import_at_get_index(imp, req_portal);
                        timeout = at_get(&imp->imp_at.iat_service_estimate[idx]);
-                }
+               }
 
                timeout_jiffies = cfs_time_seconds(timeout);
-               lwi = LWI_TIMEOUT_INTR(max_t(long, timeout_jiffies, 1),
-                                       back_to_sleep, LWI_ON_SIGNAL_NOOP, NULL);
-                rc = l_wait_event(imp->imp_recovery_waitq,
-                                  !ptlrpc_import_in_recovery(imp), &lwi);
-
-        }
+               if (wait_event_idle_timeout(imp->imp_recovery_waitq,
+                                           !ptlrpc_import_in_recovery(imp),
+                                           timeout_jiffies) == 0 &&
+                   l_wait_event_abortable(imp->imp_recovery_waitq,
+                                          !ptlrpc_import_in_recovery(imp)) < 0)
+                       rc = -EINTR;
+       }
 
        spin_lock(&imp->imp_lock);
        if (imp->imp_state != LUSTRE_IMP_FULL)
index 5b5ef37..9810009 100644 (file)
@@ -274,12 +274,13 @@ void ptlrpcd_add_req(struct ptlrpc_request *req)
 
        spin_lock(&req->rq_lock);
        if (req->rq_invalid_rqset) {
-               struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(5),
-                                                    back_to_sleep, NULL);
-
                req->rq_invalid_rqset = 0;
                spin_unlock(&req->rq_lock);
-               l_wait_event(req->rq_set_waitq, (req->rq_set == NULL), &lwi);
+               if (wait_event_idle_timeout(req->rq_set_waitq,
+                                           req->rq_set == NULL,
+                                           cfs_time_seconds(5)) == 0)
+                       l_wait_event_abortable(req->rq_set_waitq,
+                                              req->rq_set == NULL);
        } else if (req->rq_set) {
                /*
                 * If we have a vaid "rq_set", just reuse it to avoid double