Whamcloud - gitweb
LU-13216 ptlrpc: sptlrpc_req_refresh_ctx's timeout semantic 73/37473/11
authorSebastien Buisson <sbuisson@ddn.com>
Fri, 7 Feb 2020 17:16:02 +0000 (02:16 +0900)
committerOleg Drokin <green@whamcloud.com>
Tue, 17 Mar 2020 03:41:10 +0000 (03:41 +0000)
sptlrpc_req_refresh_ctx() is only called with two possible timeouts
in the current code: either we do not wait at all, or we wait
indefinitely.
So change semantic of sptlrpc_req_refresh_ctx()'s timeout parameter,
to be more consistent with wait* primitives. timeout == 0 means
'do not wait at all', and timeout == MAX_SCHEDULE_TIMEOUT means
'wait indefinitely'. Other timeout values are unsupported.

Consequently, remove code that handles the case of a non zero and
non infinite timeout. And remove the unsed ctx_refresh_timeout()
callback.
It solves a problem in the current code, because in case of indefinite
wait, sptlrpc_req_refresh_ctx() is not supposed to try to refresh the
request, so it must not call the ctx_refresh_timeout() callback.

Fixes: c1fad6a9a5 ("LU-10467 ptlrpc: convert waiting in sptlrpc_req_refresh_ctx()")
Test-Parameters: trivial envdefinitions=SHARED_KEY=true testlist=sanity-sec
Test-Parameters: envdefinitions=SHARED_KEY=true testlist=sanity-sec
Test-Parameters: envdefinitions=SHARED_KEY=true testlist=sanity-sec
Test-Parameters: envdefinitions=SHARED_KEY=true testlist=sanity-sec
Test-Parameters: envdefinitions=SHARED_KEY=true testlist=sanity-sec
Signed-off-by: Sebastien Buisson <sbuisson@ddn.com>
Change-Id: I1b20457c86863d57f97f4d998f7c5fa8ec928b31
Reviewed-on: https://review.whamcloud.com/37473
Reviewed-by: Neil Brown <neilb@suse.de>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Tested-by: Maloo <maloo@whamcloud.com>
Tested-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/ptlrpc/client.c
lustre/ptlrpc/sec.c

index 2ce24c9..758d86e 100644 (file)
@@ -1673,7 +1673,7 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req)
 
        lustre_msg_set_status(req->rq_reqmsg, current->pid);
 
-       rc = sptlrpc_req_refresh_ctx(req, -1);
+       rc = sptlrpc_req_refresh_ctx(req, 0);
        if (rc) {
                if (req->rq_err) {
                        req->rq_status = rc;
@@ -1990,7 +1990,7 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
                                 * rq_wait_ctx is only touched by ptlrpcd,
                                 * so no lock is needed here.
                                 */
-                               status = sptlrpc_req_refresh_ctx(req, -1);
+                               status = sptlrpc_req_refresh_ctx(req, 0);
                                if (status) {
                                        if (req->rq_err) {
                                                req->rq_status = status;
index 0e79684..dc68e0e 100644 (file)
@@ -628,27 +628,6 @@ int ctx_check_refresh(struct ptlrpc_cli_ctx *ctx)
 }
 
 static
-int ctx_refresh_timeout(struct ptlrpc_request *req)
-{
-       int rc;
-
-       /* conn_cnt is needed in expire_one_request */
-       lustre_msg_set_conn_cnt(req->rq_reqmsg, req->rq_import->imp_conn_cnt);
-
-       rc = ptlrpc_expire_one_request(req, 1);
-       /*
-        * if we started recovery, we should mark this ctx dead; otherwise
-        * in case of lgssd died nobody would retire this ctx, following
-        * connecting will still find the same ctx thus cause deadlock.
-        * there's an assumption that expire time of the request should be
-        * later than the context refresh expire time.
-        */
-       if (rc == 0)
-               req->rq_cli_ctx->cc_ops->die(req->rq_cli_ctx, 0);
-       return rc;
-}
-
-static
 void ctx_refresh_interrupt(struct ptlrpc_request *req)
 {
 
@@ -669,9 +648,9 @@ void req_off_ctx_list(struct ptlrpc_request *req, struct ptlrpc_cli_ctx *ctx)
 /**
  * To refresh the context of \req, if it's not up-to-date.
  * \param timeout
- * - < 0: don't wait
- * - = 0: wait until success or fatal error occur
- * - > 0: timeout value (in seconds)
+ * - == 0: do not wait
+ * - == MAX_SCHEDULE_TIMEOUT: wait indefinitely
+ * - > 0: not supported
  *
  * The status of the context could be subject to be changed by other threads
  * at any time. We allow this race, but once we return with 0, the caller will
@@ -693,6 +672,11 @@ int sptlrpc_req_refresh_ctx(struct ptlrpc_request *req, long timeout)
        if (req->rq_ctx_init || req->rq_ctx_fini)
                RETURN(0);
 
+       if (timeout != 0 && timeout != MAX_SCHEDULE_TIMEOUT) {
+               CERROR("req %p: invalid timeout %lu\n", req, timeout);
+               RETURN(-EINVAL);
+       }
+
        /*
         * during the process a request's context might change type even
         * (e.g. from gss ctx to null ctx), so each loop we need to re-check
@@ -805,7 +789,7 @@ again:
                list_add(&req->rq_ctx_chain, &ctx->cc_req_list);
        spin_unlock(&ctx->cc_lock);
 
-       if (timeout < 0)
+       if (timeout == 0)
                RETURN(-EWOULDBLOCK);
 
        /* Clear any flags that may be present from previous sends */
@@ -817,17 +801,13 @@ again:
        req->rq_restart = 0;
        spin_unlock(&req->rq_lock);
 
-       if (wait_event_idle_timeout(req->rq_reply_waitq,
-                                   ctx_check_refresh(ctx),
-                                   cfs_time_seconds(timeout)) == 0) {
-               rc = -ETIMEDOUT;
-               if (!ctx_refresh_timeout(req) &&
-                   l_wait_event_abortable(req->rq_reply_waitq,
-                                          ctx_check_refresh(ctx))
-                   == -ERESTARTSYS) {
-                       rc = -EINTR;
-                       ctx_refresh_interrupt(req);
-               }
+       /* by now we know that timeout value is MAX_SCHEDULE_TIMEOUT,
+        * so wait indefinitely with non-fatal signals blocked
+        */
+       if (l_wait_event_abortable(req->rq_reply_waitq,
+                                  ctx_check_refresh(ctx)) == -ERESTARTSYS) {
+               rc = -EINTR;
+               ctx_refresh_interrupt(req);
        }
 
        /*
@@ -981,7 +961,7 @@ int sptlrpc_import_check_ctx(struct obd_import *imp)
        req->rq_flvr = sec->ps_flvr;
        req->rq_cli_ctx = ctx;
 
-       rc = sptlrpc_req_refresh_ctx(req, 0);
+       rc = sptlrpc_req_refresh_ctx(req, MAX_SCHEDULE_TIMEOUT);
        LASSERT(list_empty(&req->rq_ctx_chain));
        sptlrpc_cli_ctx_put(req->rq_cli_ctx, 1);
        ptlrpc_request_cache_free(req);