- fix recovery time growing

author tappro <tappro>

Mon, 3 Nov 2008 13:34:52 +0000 (13:34 +0000)

committer tappro <tappro>

Mon, 3 Nov 2008 13:34:52 +0000 (13:34 +0000)
author tappro <tappro>
Mon, 3 Nov 2008 13:34:52 +0000 (13:34 +0000)
committer tappro <tappro>
Mon, 3 Nov 2008 13:34:52 +0000 (13:34 +0000)
diff --git a/lustre/include/lustre_import.h b/lustre/include/lustre_import.h

index 8eb8850..3fde7b2 100644 (file)
--- a/lustre/include/lustre_import.h
+++ b/lustre/include/lustre_import.h
@@ -191,6 +191,19 @@ void class_notify_import_observers(struct obd_import *imp, int event,
                                     void *event_arg);
  
  /* import.c */
+static inline unsigned int at_est2timeout(unsigned int val)
+{
+        /* add an arbitrary minimum: 125% +5 sec */
+        return (val + (val >> 2) + 5);
+}
+
+static inline unsigned int at_timeout2est(unsigned int val)
+{
+        /* restore estimate value from timeout */
+        LASSERT(val);
+        return ((val - 1) / 5 * 4);
+}
+
  static inline void at_init(struct adaptive_timeout *at, int val, int flags) {
          memset(at, 0, sizeof(*at));
          at->at_current = val;
diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c

index 44b3159..e739721 100644 (file)
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -1329,11 +1329,16 @@ target_start_and_reset_recovery_timer(struct obd_device *obd,
                                        struct ptlrpc_request *req,
                                        int new_client)
  {
-        int req_timeout = OBD_RECOVERY_FACTOR *
-                          lustre_msg_get_timeout(req->rq_reqmsg);
+        int req_timeout = lustre_msg_get_timeout(req->rq_reqmsg);
+
+        /* teach server about old server's estimates */
+        if (!new_client)
+                at_add(&req->rq_rqbd->rqbd_service->srv_at_estimate,
+                       at_timeout2est(req_timeout));
  
          check_and_start_recovery_timer(obd);
  
+        req_timeout *= OBD_RECOVERY_FACTOR;
          if (req_timeout > obd->obd_recovery_timeout && !new_client)
                  reset_recovery_timer(obd, req_timeout, 0);
  }
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c

index 39d7049..de5f94b 100644 (file)
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -207,8 +207,7 @@ void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req)
          idx = import_at_get_index(req->rq_import,
                                    req->rq_request_portal);
          serv_est = at_get(&at->iat_service_estimate[idx]);
-        /* add an arbitrary minimum: 125% +5 sec */
-        req->rq_timeout = serv_est + (serv_est >> 2) + 5;
+        req->rq_timeout = at_est2timeout(serv_est);
          /* We could get even fancier here, using history to predict increased
             loading... */
  
@@ -225,6 +224,10 @@ static void ptlrpc_at_adj_service(struct ptlrpc_request *req,
          unsigned int oldse;
          struct imp_at *at;
  
+        /* do estimate only if is not in recovery */
+        if (!(req->rq_send_state & (LUSTRE_IMP_FULL | LUSTRE_IMP_CONNECTING)))
+                return;
+
          LASSERT(req->rq_import);
          at = &req->rq_import->imp_at;
  
diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c

index c83c877..d79ad5b 100644 (file)
--- a/lustre/ptlrpc/niobuf.c
+++ b/lustre/ptlrpc/niobuf.c
@@ -321,9 +321,11 @@ static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
                                   req->rq_arrival_time.tv_sec, 1);
  
          if (!(flags & PTLRPC_REPLY_EARLY) &&
-            (req->rq_type != PTL_RPC_MSG_ERR)) {
-                /* early replies and errors don't count toward our service
-                   time estimate */
+            (req->rq_type != PTL_RPC_MSG_ERR) &&
+            !(lustre_msg_get_flags(req->rq_reqmsg) &
+              (MSG_RESENT | MSG_REPLAY | MSG_LAST_REPLAY))) {
+                /* early replies, errors and recovery requests don't count
+                 * toward our service time estimate */
                  int oldse = at_add(&svc->srv_at_estimate, service_time);
                  if (oldse != 0)
                          DEBUG_REQ(D_ADAPTTO, req,
diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c

index 974f526..05ff23d 100644 (file)
--- a/lustre/ptlrpc/service.c
+++ b/lustre/ptlrpc/service.c
@@ -737,15 +737,22 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req,
                  RETURN(-ENOSYS);
          }
  
-        if (extra_time) {
-                /* Fake our processing time into the future to ask the
-                   clients for some extra amount of time */
-                extra_time += cfs_time_current_sec() -
-                        req->rq_arrival_time.tv_sec;
-                at_add(&svc->srv_at_estimate, extra_time);
+        if (req->rq_export && req->rq_export->exp_in_recovery) {
+                /* don't increase server estimates during recovery, and give
+                   clients the full recovery time. */
+                newdl = cfs_time_current_sec() +
+                        req->rq_export->exp_obd->obd_recovery_timeout;
+        } else {
+                if (extra_time) {
+                        /* Fake our processing time into the future to ask the
+                           clients for some extra amount of time */
+                        extra_time += cfs_time_current_sec() -
+                                req->rq_arrival_time.tv_sec;
+                        at_add(&svc->srv_at_estimate, extra_time);
+                }
+                newdl = req->rq_arrival_time.tv_sec +
+                        at_get(&svc->srv_at_estimate);
          }
-
-        newdl = req->rq_arrival_time.tv_sec + at_get(&svc->srv_at_estimate);
          if (req->rq_deadline >= newdl) {
                  /* We're not adding any time, no need to send an early reply
                     (e.g. maybe at adaptive_max) */
diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh

index c0a9fe4..dc31f90 100755 (executable)
--- a/lustre/tests/replay-dual.sh
+++ b/lustre/tests/replay-dual.sh
@@ -376,6 +376,33 @@ test_19() { # Bug 10991 - resend of open request does not fail assertion.
  }
  run_test 19 "resend of open request"
  
+test_20() { #16389
+    BEFORE=`date +%s`
+    replay_barrier $SINGLEMDS
+    touch $MOUNT1/a
+    touch $MOUNT2/b
+    umount $MOUNT2
+    facet_failover $SINGLEMDS
+    df $MOUNT1 || return 1
+    rm $MOUNT1/a
+    zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail"
+    TIER1=$((`date +%s` - BEFORE))
+    BEFORE=`date +%s`
+    replay_barrier $SINGLEMDS
+    touch $MOUNT1/a
+    touch $MOUNT2/b
+    umount $MOUNT2
+    facet_failover $SINGLEMDS
+    df $MOUNT1 || return 1
+    rm $MOUNT1/a
+    zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail"
+    TIER2=$((`date +%s` - BEFORE))
+    [ $TIER2 -ge $((TIER1 * 2)) ] && \
+        error "recovery time is growing $TIER2 > $TIER1"
+    return 0
+}
+run_test 20 "recovery time is not increasing"
+
  equals_msg `basename $0`: test complete, cleaning up
  SLEEP=$((`date +%s` - $NOW))
  [ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP
author	tappro <tappro>
	Mon, 3 Nov 2008 13:34:52 +0000 (13:34 +0000)
committer	tappro <tappro>
	Mon, 3 Nov 2008 13:34:52 +0000 (13:34 +0000)
lustre/include/lustre_import.h		patch \| blob \| history
lustre/ldlm/ldlm_lib.c		patch \| blob \| history
lustre/ptlrpc/client.c		patch \| blob \| history
lustre/ptlrpc/niobuf.c		patch \| blob \| history
lustre/ptlrpc/service.c		patch \| blob \| history
lustre/tests/replay-dual.sh		patch \| blob \| history