From 0b09d826149f4baadce305df63396bf86eb20cf7 Mon Sep 17 00:00:00 2001 From: Sebastien Buisson Date: Sat, 8 Feb 2020 02:16:02 +0900 Subject: [PATCH] LU-13216 ptlrpc: sptlrpc_req_refresh_ctx's timeout semantic sptlrpc_req_refresh_ctx() is only called with two possible timeouts in the current code: either we do not wait at all, or we wait indefinitely. So change semantic of sptlrpc_req_refresh_ctx()'s timeout parameter, to be more consistent with wait* primitives. timeout == 0 means 'do not wait at all', and timeout == MAX_SCHEDULE_TIMEOUT means 'wait indefinitely'. Other timeout values are unsupported. Consequently, remove code that handles the case of a non zero and non infinite timeout. And remove the unsed ctx_refresh_timeout() callback. It solves a problem in the current code, because in case of indefinite wait, sptlrpc_req_refresh_ctx() is not supposed to try to refresh the request, so it must not call the ctx_refresh_timeout() callback. Fixes: c1fad6a9a5 ("LU-10467 ptlrpc: convert waiting in sptlrpc_req_refresh_ctx()") Test-Parameters: trivial envdefinitions=SHARED_KEY=true testlist=sanity-sec Test-Parameters: envdefinitions=SHARED_KEY=true testlist=sanity-sec Test-Parameters: envdefinitions=SHARED_KEY=true testlist=sanity-sec Test-Parameters: envdefinitions=SHARED_KEY=true testlist=sanity-sec Test-Parameters: envdefinitions=SHARED_KEY=true testlist=sanity-sec Signed-off-by: Sebastien Buisson Change-Id: I1b20457c86863d57f97f4d998f7c5fa8ec928b31 Reviewed-on: https://review.whamcloud.com/37473 Reviewed-by: Neil Brown Reviewed-by: James Simmons Tested-by: Maloo Tested-by: Andreas Dilger Tested-by: jenkins Reviewed-by: Oleg Drokin --- lustre/ptlrpc/client.c | 4 ++-- lustre/ptlrpc/sec.c | 54 ++++++++++++++++---------------------------------- 2 files changed, 19 insertions(+), 39 deletions(-) diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 2ce24c9..758d86e 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -1673,7 +1673,7 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req) lustre_msg_set_status(req->rq_reqmsg, current->pid); - rc = sptlrpc_req_refresh_ctx(req, -1); + rc = sptlrpc_req_refresh_ctx(req, 0); if (rc) { if (req->rq_err) { req->rq_status = rc; @@ -1990,7 +1990,7 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) * rq_wait_ctx is only touched by ptlrpcd, * so no lock is needed here. */ - status = sptlrpc_req_refresh_ctx(req, -1); + status = sptlrpc_req_refresh_ctx(req, 0); if (status) { if (req->rq_err) { req->rq_status = status; diff --git a/lustre/ptlrpc/sec.c b/lustre/ptlrpc/sec.c index 0e79684..dc68e0e 100644 --- a/lustre/ptlrpc/sec.c +++ b/lustre/ptlrpc/sec.c @@ -628,27 +628,6 @@ int ctx_check_refresh(struct ptlrpc_cli_ctx *ctx) } static -int ctx_refresh_timeout(struct ptlrpc_request *req) -{ - int rc; - - /* conn_cnt is needed in expire_one_request */ - lustre_msg_set_conn_cnt(req->rq_reqmsg, req->rq_import->imp_conn_cnt); - - rc = ptlrpc_expire_one_request(req, 1); - /* - * if we started recovery, we should mark this ctx dead; otherwise - * in case of lgssd died nobody would retire this ctx, following - * connecting will still find the same ctx thus cause deadlock. - * there's an assumption that expire time of the request should be - * later than the context refresh expire time. - */ - if (rc == 0) - req->rq_cli_ctx->cc_ops->die(req->rq_cli_ctx, 0); - return rc; -} - -static void ctx_refresh_interrupt(struct ptlrpc_request *req) { @@ -669,9 +648,9 @@ void req_off_ctx_list(struct ptlrpc_request *req, struct ptlrpc_cli_ctx *ctx) /** * To refresh the context of \req, if it's not up-to-date. * \param timeout - * - < 0: don't wait - * - = 0: wait until success or fatal error occur - * - > 0: timeout value (in seconds) + * - == 0: do not wait + * - == MAX_SCHEDULE_TIMEOUT: wait indefinitely + * - > 0: not supported * * The status of the context could be subject to be changed by other threads * at any time. We allow this race, but once we return with 0, the caller will @@ -693,6 +672,11 @@ int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout) if (req->rq_ctx_init || req->rq_ctx_fini) RETURN(0); + if (timeout != 0 && timeout != MAX_SCHEDULE_TIMEOUT) { + CERROR("req %p: invalid timeout %lu\n", req, timeout); + RETURN(-EINVAL); + } + /* * during the process a request's context might change type even * (e.g. from gss ctx to null ctx), so each loop we need to re-check @@ -805,7 +789,7 @@ again: list_add(&req->rq_ctx_chain, &ctx->cc_req_list); spin_unlock(&ctx->cc_lock); - if (timeout < 0) + if (timeout == 0) RETURN(-EWOULDBLOCK); /* Clear any flags that may be present from previous sends */ @@ -817,17 +801,13 @@ again: req->rq_restart = 0; spin_unlock(&req->rq_lock); - if (wait_event_idle_timeout(req->rq_reply_waitq, - ctx_check_refresh(ctx), - cfs_time_seconds(timeout)) == 0) { - rc = -ETIMEDOUT; - if (!ctx_refresh_timeout(req) && - l_wait_event_abortable(req->rq_reply_waitq, - ctx_check_refresh(ctx)) - == -ERESTARTSYS) { - rc = -EINTR; - ctx_refresh_interrupt(req); - } + /* by now we know that timeout value is MAX_SCHEDULE_TIMEOUT, + * so wait indefinitely with non-fatal signals blocked + */ + if (l_wait_event_abortable(req->rq_reply_waitq, + ctx_check_refresh(ctx)) == -ERESTARTSYS) { + rc = -EINTR; + ctx_refresh_interrupt(req); } /* @@ -981,7 +961,7 @@ int sptlrpc_import_check_ctx(struct obd_import *imp) req->rq_flvr = sec->ps_flvr; req->rq_cli_ctx = ctx; - rc = sptlrpc_req_refresh_ctx(req, 0); + rc = sptlrpc_req_refresh_ctx(req, MAX_SCHEDULE_TIMEOUT); LASSERT(list_empty(&req->rq_ctx_chain)); sptlrpc_cli_ctx_put(req->rq_cli_ctx, 1); ptlrpc_request_cache_free(req); -- 1.8.3.1