From 8a635039c4dd838f6239ce79c37a307577a33d07 Mon Sep 17 00:00:00 2001
From: tappro <tappro>
Date: Tue, 13 Oct 2009 07:07:29 +0000
Subject: [PATCH] Branch HEAD b=20887 i=nathan i=rread

use AT naturally for recovery requests. Their deadline is set to at_extra value
so early replay will be set back if request is in recovery queue.
---
 lustre/ldlm/ldlm_lib.c  | 11 +++++------
 lustre/ptlrpc/client.c  | 21 +++++++--------------
 lustre/ptlrpc/import.c  |  3 ++-
 lustre/ptlrpc/service.c | 36 ++++++++++++++++++------------------
 4 files changed, 32 insertions(+), 39 deletions(-)
diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c
index 0fdcb05..c9d52fd 100644
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -1638,13 +1638,12 @@ static int handle_recovery_req(struct ptlrpc_thread *thread,
         /* don't reset timer for final stage */
         if (!exp_finished(req->rq_export)) {
                 /**
-                 * XXX: until bug 18948 is fixed (enable AT for request copy)
-                 * the client may reconnect during recovery so we may need to
-                 * wait RECONNECT_DELAY_MAX after each replay instead of
-                 * at_get(&req->rq_rqbd->rqbd_service->srv_at_estimate);
+                 * Add request timeout to the recovery time so next request from
+                 * this client may come in recovery time
                  */
-                 reset_recovery_timer(class_exp2obd(req->rq_export), AT_OFF ?
-                                      obd_timeout : RECONNECT_DELAY_MAX, 1);
+                 reset_recovery_timer(class_exp2obd(req->rq_export),
+                                      AT_OFF ? obd_timeout :
+                                      lustre_msg_get_timeout(req->rq_reqmsg), 1);
         }
         /**
          * bz18031: increase next_recovery_transno before target_request_copy_put()
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c
index 2dbdcf0..5b28d32 100644
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -198,16 +198,14 @@ void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req)
         if (AT_OFF) {
                 /* non-AT settings */
                 req->rq_timeout = req->rq_import->imp_server_timeout ?
-                        obd_timeout / 2 : obd_timeout;
-                lustre_msg_set_timeout(req->rq_reqmsg, req->rq_timeout);
-                return;
+                                  obd_timeout / 2 : obd_timeout;
+        } else {
+                at = &req->rq_import->imp_at;
+                idx = import_at_get_index(req->rq_import,
+                                          req->rq_request_portal);
+                serv_est = at_get(&at->iat_service_estimate[idx]);
+                req->rq_timeout = at_est2timeout(serv_est);
         }
-
-        at = &req->rq_import->imp_at;
-        idx = import_at_get_index(req->rq_import,
-                                  req->rq_request_portal);
-        serv_est = at_get(&at->iat_service_estimate[idx]);
-        req->rq_timeout = at_est2timeout(serv_est);
         /* We could get even fancier here, using history to predict increased
            loading... */
 
@@ -224,11 +222,6 @@ static void ptlrpc_at_adj_service(struct ptlrpc_request *req,
         unsigned int oldse;
         struct imp_at *at;
 
-        /* do estimate only if is not in recovery */
-        if ((req->rq_send_state != LUSTRE_IMP_FULL) &&
-             (req->rq_send_state != LUSTRE_IMP_CONNECTING))
-                return;
-
         LASSERT(req->rq_import);
         at = &req->rq_import->imp_at;
 
diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c
index fad264b..af8ce40 100644
--- a/lustre/ptlrpc/import.c
+++ b/lustre/ptlrpc/import.c
@@ -1227,7 +1227,8 @@ static int signal_completed_replay(struct obd_import *imp)
         req->rq_send_state = LUSTRE_IMP_REPLAY_WAIT;
         lustre_msg_add_flags(req->rq_reqmsg,
                              MSG_LOCK_REPLAY_DONE | MSG_REQ_REPLAY_DONE);
-        req->rq_timeout *= 3;
+        if (AT_OFF)
+                req->rq_timeout *= 3;
         req->rq_interpret_reply = completed_replay_interpret;
 
         ptlrpcd_add_req(req, PSCOPE_OTHER);
diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c
index 86ec4f7..2276196 100644
--- a/lustre/ptlrpc/service.c
+++ b/lustre/ptlrpc/service.c
@@ -890,8 +890,7 @@ static int ptlrpc_at_add_timed(struct ptlrpc_request *req)
         return 0;
 }
 
-static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req,
-                                      int extra_time)
+static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
 {
         struct ptlrpc_service *svc = req->rq_rqbd->rqbd_service;
         struct ptlrpc_request *reqcopy;
@@ -907,7 +906,7 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req,
                   "%ssending early reply (deadline %+lds, margin %+lds) for "
                   "%d+%d", AT_OFF ? "AT off - not " : "",
                   olddl, olddl - at_get(&svc->srv_at_estimate),
-                  at_get(&svc->srv_at_estimate), extra_time);
+                  at_get(&svc->srv_at_estimate), at_extra);
 
         if (AT_OFF)
                 RETURN(0);
@@ -927,22 +926,23 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req,
                 RETURN(-ENOSYS);
         }
 
-        if (req->rq_export && req->rq_export->exp_in_recovery) {
-                /* don't increase server estimates during recovery, and give
-                   clients the full recovery time. */
-                newdl = cfs_time_current_sec() +
-                        req->rq_export->exp_obd->obd_recovery_timeout;
+        if (req->rq_export &&
+            lustre_msg_get_flags(req->rq_reqmsg) &
+            (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) {
+                /**
+                 * Use at_extra as early reply period for recovery requests but
+                 * make sure it is not bigger than recovery time / 4
+                 */
+                at_add(&svc->srv_at_estimate,
+                       min(at_extra,
+                           req->rq_export->exp_obd->obd_recovery_timeout / 4));
         } else {
-                if (extra_time) {
-                        /* Fake our processing time into the future to ask the
-                           clients for some extra amount of time */
-                        extra_time += cfs_time_current_sec() -
-                                req->rq_arrival_time.tv_sec;
-                        at_add(&svc->srv_at_estimate, extra_time);
-                }
-                newdl = req->rq_arrival_time.tv_sec +
-                        at_get(&svc->srv_at_estimate);
+                /* Fake our processing time into the future to ask the clients
+                 * for some extra amount of time */
+                at_add(&svc->srv_at_estimate, at_extra);
         }
+
+        newdl = cfs_time_current_sec() + at_get(&svc->srv_at_estimate);
         if (req->rq_deadline >= newdl) {
                 /* We're not adding any time, no need to send an early reply
                    (e.g. maybe at adaptive_max) */
@@ -1119,7 +1119,7 @@ static int ptlrpc_at_check_timed(struct ptlrpc_service *svc)
                                 rq_timed_list);
                 list_del_init(&rq->rq_timed_list);
 
-                if (ptlrpc_at_send_early_reply(rq, at_extra) == 0)
+                if (ptlrpc_at_send_early_reply(rq) == 0)
                         ptlrpc_at_add_timed(rq);
 
                 ptlrpc_server_drop_request(rq);
-- 
1.8.3.1