LU-4578 ptlrpc: Early replies need to honor at_max

author Chris Horn <hornc@cray.com>

Mon, 3 Feb 2014 21:24:01 +0000 (15:24 -0600)

committer Oleg Drokin <oleg.drokin@intel.com>

Tue, 6 May 2014 04:58:41 +0000 (04:58 +0000)
author Chris Horn <hornc@cray.com>
Mon, 3 Feb 2014 21:24:01 +0000 (15:24 -0600)
committer Oleg Drokin <oleg.drokin@intel.com>
Tue, 6 May 2014 04:58:41 +0000 (04:58 +0000)
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c

index a543788..3f3e220 100644 (file)
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -374,9 +374,11 @@ static int ptlrpc_at_recv_early_reply(struct ptlrpc_request *req)
  
         spin_lock(&req->rq_lock);
         olddl = req->rq_deadline;
-       /* server assumes it now has rq_timeout from when it sent the
-        * early reply, so client should give it at least that long. */
-       req->rq_deadline = cfs_time_current_sec() + req->rq_timeout +
+       /* server assumes it now has rq_timeout from when the request
+        * arrived, so the client should give it at least that long.
+        * since we don't know the arrival time we'll use the original
+        * sent time */
+       req->rq_deadline = req->rq_sent + req->rq_timeout +
                            ptlrpc_at_get_net_latency(req);
  
         DEBUG_REQ(D_ADAPTTO, req,
diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c

index 40753de..81cf710 100644 (file)
--- a/lustre/ptlrpc/import.c
+++ b/lustre/ptlrpc/import.c
@@ -1544,10 +1544,13 @@ EXPORT_SYMBOL(ptlrpc_cleanup_imp);
  /* Adaptive Timeout utils */
  extern unsigned int at_min, at_max, at_history;
  
-/* Bin into timeslices using AT_BINS bins.
-   This gives us a max of the last binlimit*AT_BINS secs without the storage,
-   but still smoothing out a return to normalcy from a slow response.
-   (E.g. remember the maximum latency in each minute of the last 4 minutes.) */
+/* Update at_current with the specified value (bounded by at_min and at_max),
+ * as well as the AT history "bins".
+ *  - Bin into timeslices using AT_BINS bins.
+ *  - This gives us a max of the last at_history seconds without the storage,
+ *    but still smoothing out a return to normalcy from a slow response.
+ *  - (E.g. remember the maximum latency in each minute of the last 4 minutes.)
+ */
  int at_measured(struct adaptive_timeout *at, unsigned int val)
  {
          unsigned int old = at->at_current;
diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c

index 98696f4..7cb957c 100644 (file)
--- a/lustre/ptlrpc/service.c
+++ b/lustre/ptlrpc/service.c
@@ -1278,13 +1278,16 @@ ptlrpc_at_remove_timed(struct ptlrpc_request *req)
         array->paa_count--;
  }
  
+/*
+ * Attempt to extend the request deadline by sending an early reply to the
+ * client.
+ */
  static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
  {
         struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
          struct ptlrpc_request *reqcopy;
          struct lustre_msg *reqmsg;
          cfs_duration_t olddl = req->rq_deadline - cfs_time_current_sec();
-        time_t newdl;
          int rc;
          ENTRY;
  
@@ -1326,8 +1329,13 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
                 at_measured(&svcpt->scp_at_estimate, min(at_extra,
                             req->rq_export->exp_obd->obd_recovery_timeout / 4));
         } else {
-               /* Fake our processing time into the future to ask the clients
-                * for some extra amount of time */
+               /* We want to extend the request deadline by at_extra seconds,
+                * so we set our service estimate to reflect how much time has
+                * passed since this request arrived plus an additional
+                * at_extra seconds. The client will calculate the new deadline
+                * based on this service estimate (plus some additional time to
+                * account for network latency). See ptlrpc_at_recv_early_reply
+                */
                 at_measured(&svcpt->scp_at_estimate, at_extra +
                             cfs_time_current_sec() -
                             req->rq_arrival_time.tv_sec);
@@ -1344,7 +1352,6 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
                         RETURN(-ETIMEDOUT);
                 }
         }
-       newdl = cfs_time_current_sec() + at_get(&svcpt->scp_at_estimate);
  
         reqcopy = ptlrpc_request_cache_alloc(GFP_NOFS);
         if (reqcopy == NULL)
@@ -1390,13 +1397,14 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
  
          rc = ptlrpc_send_reply(reqcopy, PTLRPC_REPLY_EARLY);
  
-        if (!rc) {
-                /* Adjust our own deadline to what we told the client */
-                req->rq_deadline = newdl;
-                req->rq_early_count++; /* number sent, server side */
-        } else {
-                DEBUG_REQ(D_ERROR, req, "Early reply send failed %d", rc);
-        }
+       if (!rc) {
+               /* Adjust our own deadline to what we told the client */
+               req->rq_deadline = req->rq_arrival_time.tv_sec +
+                                  at_get(&svcpt->scp_at_estimate);
+               req->rq_early_count++; /* number sent, server side */
+       } else {
+               DEBUG_REQ(D_ERROR, req, "Early reply send failed %d", rc);
+       }
  
          /* Free the (early) reply state from lustre_pack_reply.
             (ptlrpc_send_reply takes it's own rs ref, so this is safe here) */
author	Chris Horn <hornc@cray.com>
	Mon, 3 Feb 2014 21:24:01 +0000 (15:24 -0600)
committer	Oleg Drokin <oleg.drokin@intel.com>
	Tue, 6 May 2014 04:58:41 +0000 (04:58 +0000)
lustre/ptlrpc/client.c		patch \| blob \| history
lustre/ptlrpc/import.c		patch \| blob \| history
lustre/ptlrpc/service.c		patch \| blob \| history