LU-6084 ptlrpc: prevent request timeout grow due to recovery

author Mikhail Pershin <mike.pershin@intel.com>

Tue, 3 Feb 2015 18:30:14 +0000 (10:30 -0800)

committer Oleg Drokin <oleg.drokin@intel.com>

Mon, 9 Feb 2015 02:40:41 +0000 (02:40 +0000)
author Mikhail Pershin <mike.pershin@intel.com>
Tue, 3 Feb 2015 18:30:14 +0000 (10:30 -0800)
committer Oleg Drokin <oleg.drokin@intel.com>
Mon, 9 Feb 2015 02:40:41 +0000 (02:40 +0000)
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c

index 3ef3292..f677ea0 100644 (file)
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -359,27 +359,29 @@ __must_hold(&req->rq_lock)
         rc = sptlrpc_cli_unwrap_early_reply(req, &early_req);
         if (rc) {
                 spin_lock(&req->rq_lock);
-                RETURN(rc);
-        }
-
-        rc = unpack_reply(early_req);
-        if (rc == 0) {
-                /* Expecting to increase the service time estimate here */
-                ptlrpc_at_adj_service(req,
-                        lustre_msg_get_timeout(early_req->rq_repmsg));
-                ptlrpc_at_adj_net_latency(req,
-                        lustre_msg_get_service_time(early_req->rq_repmsg));
-        }
-
-        sptlrpc_cli_finish_early_reply(early_req);
+               RETURN(rc);
+       }
  
+       rc = unpack_reply(early_req);
         if (rc != 0) {
+               sptlrpc_cli_finish_early_reply(early_req);
                 spin_lock(&req->rq_lock);
                 RETURN(rc);
         }
  
-       /* Adjust the local timeout for this req */
-       ptlrpc_at_set_req_timeout(req);
+       /* Use new timeout value just to adjust the local value for this
+        * request, don't include it into at_history. It is unclear yet why
+        * service time increased and should it be counted or skipped, e.g.
+        * that can be recovery case or some error or server, the real reply
+        * will add all new data if it is worth to add. */
+       req->rq_timeout = lustre_msg_get_timeout(early_req->rq_repmsg);
+       lustre_msg_set_timeout(req->rq_reqmsg, req->rq_timeout);
+
+       /* Network latency can be adjusted, it is pure network delays */
+       ptlrpc_at_adj_net_latency(req,
+                       lustre_msg_get_service_time(early_req->rq_repmsg));
+
+       sptlrpc_cli_finish_early_reply(early_req);
  
         spin_lock(&req->rq_lock);
         olddl = req->rq_deadline;
diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c

index b5b70ab..267685c 100644 (file)
--- a/lustre/ptlrpc/service.c
+++ b/lustre/ptlrpc/service.c
@@ -1259,11 +1259,13 @@ ptlrpc_at_remove_timed(struct ptlrpc_request *req)
  static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
  {
         struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
-        struct ptlrpc_request *reqcopy;
-        struct lustre_msg *reqmsg;
-        cfs_duration_t olddl = req->rq_deadline - cfs_time_current_sec();
-        int rc;
-        ENTRY;
+       struct ptlrpc_request *reqcopy;
+       struct lustre_msg *reqmsg;
+       cfs_duration_t olddl = req->rq_deadline - cfs_time_current_sec();
+       time_t  newdl;
+       int rc;
+
+       ENTRY;
  
         if (CFS_FAIL_CHECK(OBD_FAIL_TGT_REPLAY_RECONNECT)) {
                 /* don't send early reply */
@@ -1305,10 +1307,11 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
                  * during the recovery period send at least 4 early replies,
                  * spacing them every at_extra if we can. at_estimate should
                  * always equal this fixed value during recovery. */
-               at_measured(&svcpt->scp_at_estimate,
-                           cfs_time_current_sec() -
-                           req->rq_arrival_time.tv_sec + min(at_extra,
-                           req->rq_export->exp_obd->obd_recovery_timeout / 4));
+               /* Don't account request processing time into AT history
+                * during recovery, it is not service time we need but
+                * includes also waiting time for recovering clients */
+               newdl = cfs_time_current_sec() + min(at_extra,
+                       req->rq_export->exp_obd->obd_recovery_timeout / 4);
         } else {
                 /* We want to extend the request deadline by at_extra seconds,
                  * so we set our service estimate to reflect how much time has
@@ -1320,17 +1323,16 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
                 at_measured(&svcpt->scp_at_estimate, at_extra +
                             cfs_time_current_sec() -
                             req->rq_arrival_time.tv_sec);
-
+               newdl = req->rq_arrival_time.tv_sec +
+                       at_get(&svcpt->scp_at_estimate);
         }
+
         /* Check to see if we've actually increased the deadline -
          * we may be past adaptive_max */
-       if (req->rq_deadline >= req->rq_arrival_time.tv_sec +
-           at_get(&svcpt->scp_at_estimate)) {
+       if (req->rq_deadline >= newdl) {
                 DEBUG_REQ(D_WARNING, req, "Couldn't add any time "
                           "(%ld/%ld), not sending early reply\n",
-                         olddl, req->rq_arrival_time.tv_sec +
-                         at_get(&svcpt->scp_at_estimate) -
-                         cfs_time_current_sec());
+                         olddl, newdl - cfs_time_current_sec());
                 RETURN(-ETIMEDOUT);
         }
  
@@ -1388,8 +1390,7 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
  
         if (!rc) {
                 /* Adjust our own deadline to what we told the client */
-               req->rq_deadline = req->rq_arrival_time.tv_sec +
-                                  at_get(&svcpt->scp_at_estimate);
+               req->rq_deadline = newdl;
                 req->rq_early_count++; /* number sent, server side */
         } else {
                 DEBUG_REQ(D_ERROR, req, "Early reply send failed %d", rc);
@@ -1951,6 +1952,7 @@ ptlrpc_server_handle_req_in(struct ptlrpc_service_part *svcpt,
                      MSGHDR_AT_SUPPORT) ?
                     /* The max time the client expects us to take */
                     lustre_msg_get_timeout(req->rq_reqmsg) : obd_timeout;
+
          req->rq_deadline = req->rq_arrival_time.tv_sec + deadline;
          if (unlikely(deadline == 0)) {
                  DEBUG_REQ(D_ERROR, req, "Dropping request with 0 timeout");
diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh

index b8e5094..5cab1a4 100755 (executable)
--- a/lustre/tests/replay-dual.sh
+++ b/lustre/tests/replay-dual.sh
@@ -491,9 +491,9 @@ test_20() { #16389
         zconf_mount $HOSTNAME $DIR2 || error "mount $DIR2 fail"
         local tier2=$((SECONDS - before))
  
-       # timeout is more than 2.25x original timeout
-       ((tier2 < tier1 * 9 / 4)) ||
-               error "recovery time $tier2 >= 2.25x original time $tier1"
+       # timeout is more than 1.5x original timeout
+       ((tier2 < tier1 * 6 / 4)) ||
+               error "recovery time $tier2 >= 1.5x original time $tier1"
  }
  run_test 20 "recovery time is not increasing"
  
diff --git a/lustre/tests/replay-vbr.sh b/lustre/tests/replay-vbr.sh

index f633a0d..ca00df5 100644 (file)
--- a/lustre/tests/replay-vbr.sh
+++ b/lustre/tests/replay-vbr.sh
@@ -711,12 +711,6 @@ test_7_cycle() {
      do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0"
      do_facet $SINGLEMDS "$LCTL set_param mdt.${!var}.commit_on_sharing=0"
  
-       local at_max_saved=0
-       if at_is_enabled; then
-               at_max_saved=$(at_max_get $SINGLEMDS)
-               at_max_set $TIMEOUT mds client
-       fi
-
      do_node $CLIENT1 mkdir -p $DIR/$tdir
      replay_barrier $SINGLEMDS
      # first operation
@@ -736,7 +730,6 @@ test_7_cycle() {
         wait_recovery_complete $SINGLEMDS
         wait_mds_ost_sync || error "wait_mds_ost_sync failed"
  
-       [[ $at_max_saved -eq 0 ]] || at_max_set $at_max_saved mds client
         rm -rf $DIR/$tdir
         return $rc
  }
author	Mikhail Pershin <mike.pershin@intel.com>
	Tue, 3 Feb 2015 18:30:14 +0000 (10:30 -0800)
committer	Oleg Drokin <oleg.drokin@intel.com>
	Mon, 9 Feb 2015 02:40:41 +0000 (02:40 +0000)
lustre/ptlrpc/client.c		patch \| blob \| history
lustre/ptlrpc/service.c		patch \| blob \| history
lustre/tests/replay-dual.sh		patch \| blob \| history
lustre/tests/replay-vbr.sh		patch \| blob \| history