From: Mr NeilBrown Date: Fri, 24 Jan 2020 14:45:44 +0000 (-0500) Subject: LU-10467 ptlrpc: convert waiters on set->set_waitq X-Git-Tag: 2.13.52~25 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=4e5855ae4dea7954ce1891cd23abce033fe23f03;hp=ca6c35cab141597809c6f3a58102fac8ac86104a LU-10467 ptlrpc: convert waiters on set->set_waitq There are a couple of interesting aspects of waiters on ->set_waitq. One is the only usage of LWI_TIMEOUT_INTR_ALL(). This causes l_wait_event() to enable "fatal" signals during the timeout part of the wait. (normally signals are completely blocked when there is a timeout). This can be converted to l_wait_event_abortable_timeout(). Another is that ptlrpc_expired_set() is passed as the on_timeout handler. As this always returns true, it cauess l_wait_event() to quit after the timeout, and not go "back to sleep". We can instead call this explicitly after the wait_event_timeout returns 0 - which means that it timedout. Due to this change in call pattern, we can change the function to take a ptlrpc_request_set* instead of a void*, and to not return anything. Also, ptlrpc_interrupted_set() is sometimes passed as the on_signal function. Instead we can explicitly call this when we get a negative return from wait_event_abortable. Again, we can declare it as taking the real type and not a void*. The wait on set_waitq in ptlrpcd() might be a timedout wait or, if timeout == 0, it is an indefinite wait. We make that explicit with 2 separate cases. So this patch: - changes to wait_event_idle_timeout and l_wait_event_abortable_timeout, - calls ptlrpc_*_set explicitly based on return code - changes signatures for ptlrpc_*_set() Change-Id: Ieb97aa3ba9b1f988a30bb7a424588f87f75e8023 Signed-off-by: Mr NeilBrown Reviewed-on: https://review.whamcloud.com/35982 Tested-by: jenkins Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Alex Zhuravlev Reviewed-by: Shaun Tancheff Reviewed-by: Oleg Drokin --- diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index bee8d3f..aede53e 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -2323,9 +2323,8 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink) * Callback used when waiting on sets with l_wait_event. * Always returns 1. */ -int ptlrpc_expired_set(void *data) +void ptlrpc_expired_set(struct ptlrpc_request_set *set) { - struct ptlrpc_request_set *set = data; struct list_head *tmp; time64_t now = ktime_get_real_seconds(); @@ -2360,13 +2359,6 @@ int ptlrpc_expired_set(void *data) */ ptlrpc_expire_one_request(req, 1); } - - /* - * When waiting for a whole set, we always break out of the - * sleep so we can recalculate the timeout, or enable interrupts - * if everyone's timed out. - */ - RETURN(1); } /** @@ -2384,9 +2376,8 @@ EXPORT_SYMBOL(ptlrpc_mark_interrupted); * Interrupts (sets interrupted flag) all uncompleted requests in * a set \a data. Callback for l_wait_event for interruptible waits. */ -static void ptlrpc_interrupted_set(void *data) +static void ptlrpc_interrupted_set(struct ptlrpc_request_set *set) { - struct ptlrpc_request_set *set = data; struct list_head *tmp; LASSERT(set != NULL); @@ -2462,7 +2453,6 @@ int ptlrpc_set_wait(const struct lu_env *env, struct ptlrpc_request_set *set) { struct list_head *tmp; struct ptlrpc_request *req; - struct l_wait_info lwi; time64_t timeout; int rc; @@ -2498,24 +2488,35 @@ int ptlrpc_set_wait(const struct lu_env *env, struct ptlrpc_request_set *set) * We still want to block for a limited time, * so we allow interrupts during the timeout. */ - lwi = LWI_TIMEOUT_INTR_ALL( - cfs_time_seconds(timeout ? timeout : 1), - ptlrpc_expired_set, - ptlrpc_interrupted_set, set); - - rc = l_wait_event(set->set_waitq, - ptlrpc_check_set(NULL, set), &lwi); + rc = l_wait_event_abortable_timeout( + set->set_waitq, + ptlrpc_check_set(NULL, set), + cfs_time_seconds(timeout ? timeout : 1)); + if (rc == 0) { + rc = -ETIMEDOUT; + ptlrpc_expired_set(set); + } else if (rc < 0) { + rc = -EINTR; + ptlrpc_interrupted_set(set); + } else { + rc = 0; + } } else { /* * At least one request is in flight, so no * interrupts are allowed. Wait until all * complete, or an in-flight req times out. */ - lwi = LWI_TIMEOUT(cfs_time_seconds(timeout ? timeout : 1), - ptlrpc_expired_set, set); - - rc = l_wait_event(set->set_waitq, - ptlrpc_check_set(NULL, set), &lwi); + rc = wait_event_idle_timeout( + set->set_waitq, + ptlrpc_check_set(NULL, set), + cfs_time_seconds(timeout ? timeout : 1)); + if (rc == 0) { + ptlrpc_expired_set(set); + rc = -ETIMEDOUT; + } else { + rc = 0; + } /* * LU-769 - if we ignored the signal because diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h index 19d0487..ffa31c6 100644 --- a/lustre/ptlrpc/ptlrpc_internal.h +++ b/lustre/ptlrpc/ptlrpc_internal.h @@ -82,7 +82,7 @@ void ptlrpc_request_cache_free(struct ptlrpc_request *req); void ptlrpc_init_xid(void); void ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc, struct ptlrpc_request *req); -int ptlrpc_expired_set(void *data); +void ptlrpc_expired_set(struct ptlrpc_request_set *set); time64_t ptlrpc_set_next_timeout(struct ptlrpc_request_set *); void ptlrpc_resend_req(struct ptlrpc_request *request); void ptlrpc_set_bulk_mbits(struct ptlrpc_request *req); diff --git a/lustre/ptlrpc/ptlrpcd.c b/lustre/ptlrpc/ptlrpcd.c index 9810009..914f326 100644 --- a/lustre/ptlrpc/ptlrpcd.c +++ b/lustre/ptlrpc/ptlrpcd.c @@ -486,16 +486,20 @@ static int ptlrpcd(void *arg) * new_req_list and ptlrpcd_check() moves them into the set. */ do { - struct l_wait_info lwi; time64_t timeout; timeout = ptlrpc_set_next_timeout(set); - lwi = LWI_TIMEOUT(cfs_time_seconds(timeout), - ptlrpc_expired_set, set); lu_context_enter(&env.le_ctx); lu_context_enter(env.le_ses); - l_wait_event(set->set_waitq, ptlrpcd_check(&env, pc), &lwi); + if (timeout == 0) + wait_event_idle(set->set_waitq, + ptlrpcd_check(&env, pc)); + else if (wait_event_idle_timeout(set->set_waitq, + ptlrpcd_check(&env, pc), + cfs_time_seconds(timeout)) + == 0) + ptlrpc_expired_set(set); lu_context_exit(&env.le_ctx); lu_context_exit(env.le_ses);