b=21411 Improvement for AT to handle timeout more accurate.

author yangsheng <Sheng.Yang@Sun.COM>

Tue, 9 Feb 2010 21:06:02 +0000 (05:06 +0800)

committer Robert Read <rread@sun.com>

Thu, 11 Feb 2010 00:48:59 +0000 (16:48 -0800)
author yangsheng <Sheng.Yang@Sun.COM>
Tue, 9 Feb 2010 21:06:02 +0000 (05:06 +0800)
committer Robert Read <rread@sun.com>
Thu, 11 Feb 2010 00:48:59 +0000 (16:48 -0800)
diff --git a/lustre/include/lustre_import.h b/lustre/include/lustre_import.h

index 98124db..ebcc46f 100644 (file)
--- a/lustre/include/lustre_import.h
+++ b/lustre/include/lustre_import.h
@@ -238,7 +238,7 @@ static inline void at_init(struct adaptive_timeout *at, int val, int flags) {
  static inline int at_get(struct adaptive_timeout *at) {
          return at->at_current;
  }
  static inline int at_get(struct adaptive_timeout *at) {
          return at->at_current;
  }
-int at_add(struct adaptive_timeout *at, unsigned int val);
+int at_measured(struct adaptive_timeout *at, unsigned int val);
  int import_at_get_index(struct obd_import *imp, int portal);
  extern unsigned int at_max;
  #define AT_OFF (at_max == 0)
  int import_at_get_index(struct obd_import *imp, int portal);
  extern unsigned int at_max;
  #define AT_OFF (at_max == 0)
diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c

index d683bae..b0ac944 100644 (file)
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -1387,8 +1387,8 @@ target_start_and_reset_recovery_timer(struct obd_device *obd,
          if (!new_client && service_time)
                  /* Teach server about old server's estimates, as first guess
                   * at how long new requests will take. */
          if (!new_client && service_time)
                  /* Teach server about old server's estimates, as first guess
                   * at how long new requests will take. */
-                at_add(&req->rq_rqbd->rqbd_service->srv_at_estimate,
-                       service_time);
+                at_measured(&req->rq_rqbd->rqbd_service->srv_at_estimate,
+                            service_time);
  
          check_and_start_recovery_timer(obd);
  
  
          check_and_start_recovery_timer(obd);
  
diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c

index 78375dd..b89b699 100644 (file)
--- a/lustre/ldlm/ldlm_lockd.c
+++ b/lustre/ldlm/ldlm_lockd.c
@@ -855,8 +855,8 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
          /* Server-side enqueue wait time estimate, used in
              __ldlm_add_waiting_lock to set future enqueue timers */
          if (total_enqueue_wait < ldlm_get_enq_timeout(lock))
          /* Server-side enqueue wait time estimate, used in
              __ldlm_add_waiting_lock to set future enqueue timers */
          if (total_enqueue_wait < ldlm_get_enq_timeout(lock))
-                at_add(&lock->l_resource->lr_namespace->ns_at_estimate,
-                       total_enqueue_wait);
+                at_measured(&lock->l_resource->lr_namespace->ns_at_estimate,
+                            total_enqueue_wait);
          else
                  /* bz18618. Don't add lock enqueue time we spend waiting for a
                     previous callback to fail. Locks waiting legitimately will
          else
                  /* bz18618. Don't add lock enqueue time we spend waiting for a
                     previous callback to fail. Locks waiting legitimately will
diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c

index 50377c9..7b55b01 100644 (file)
--- a/lustre/ldlm/ldlm_request.c
+++ b/lustre/ldlm/ldlm_request.c
@@ -140,7 +140,8 @@ static int ldlm_completion_tail(struct ldlm_lock *lock)
                             CFS_DURATION_T"s", delay);
  
                  /* Update our time estimate */
                             CFS_DURATION_T"s", delay);
  
                  /* Update our time estimate */
-                at_add(&lock->l_resource->lr_namespace->ns_at_estimate, delay);
+                at_measured(&lock->l_resource->lr_namespace->ns_at_estimate,
+                            delay);
                  result = 0;
          }
          return result;
                  result = 0;
          }
          return result;
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c

index 4d81fe6..e02de9c 100644 (file)
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -228,7 +228,7 @@ static void ptlrpc_at_adj_service(struct ptlrpc_request *req,
          idx = import_at_get_index(req->rq_import, req->rq_request_portal);
          /* max service estimates are tracked on the server side,
             so just keep minimal history here */
          idx = import_at_get_index(req->rq_import, req->rq_request_portal);
          /* max service estimates are tracked on the server side,
             so just keep minimal history here */
-        oldse = at_add(&at->iat_service_estimate[idx], serv_est);
+        oldse = at_measured(&at->iat_service_estimate[idx], serv_est);
          if (oldse != 0)
                  CDEBUG(D_ADAPTTO, "The RPC service estimate for %s ptl %d "
                         "has changed from %d to %d\n",
          if (oldse != 0)
                  CDEBUG(D_ADAPTTO, "The RPC service estimate for %s ptl %d "
                         "has changed from %d to %d\n",
@@ -260,7 +260,7 @@ static void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req,
                        CFS_DURATION_T"\n", service_time,
                        cfs_time_sub(now, req->rq_sent));
  
                        CFS_DURATION_T"\n", service_time,
                        cfs_time_sub(now, req->rq_sent));
  
-        oldnl = at_add(&at->iat_net_latency, nl);
+        oldnl = at_measured(&at->iat_net_latency, nl);
          if (oldnl != 0)
                  CDEBUG(D_ADAPTTO, "The network latency for %s (nid %s) "
                         "has changed from %d to %d\n",
          if (oldnl != 0)
                  CDEBUG(D_ADAPTTO, "The network latency for %s (nid %s) "
                         "has changed from %d to %d\n",
diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c

index 12a19db..d5a1e74 100644 (file)
--- a/lustre/ptlrpc/import.c
+++ b/lustre/ptlrpc/import.c
@@ -518,9 +518,9 @@ static int import_select_connection(struct obd_import *imp)
              !imp->imp_recon_bk /* not retrying */) {
                  if (at_get(&imp->imp_at.iat_net_latency) <
                      CONNECTION_SWITCH_MAX) {
              !imp->imp_recon_bk /* not retrying */) {
                  if (at_get(&imp->imp_at.iat_net_latency) <
                      CONNECTION_SWITCH_MAX) {
-                        at_add(&imp->imp_at.iat_net_latency,
-                               at_get(&imp->imp_at.iat_net_latency) +
-                               CONNECTION_SWITCH_INC);
+                        at_measured(&imp->imp_at.iat_net_latency,
+                                    at_get(&imp->imp_at.iat_net_latency) +
+                                    CONNECTION_SWITCH_INC);
                  }
                  LASSERT(imp_conn->oic_last_attempt);
                  CWARN("%s: tried all connections, increasing latency to %ds\n",
                  }
                  LASSERT(imp_conn->oic_last_attempt);
                  CWARN("%s: tried all connections, increasing latency to %ds\n",
@@ -1487,7 +1487,7 @@ extern unsigned int at_min, at_max, at_history;
     This gives us a max of the last binlimit*AT_BINS secs without the storage,
     but still smoothing out a return to normalcy from a slow response.
     (E.g. remember the maximum latency in each minute of the last 4 minutes.) */
     This gives us a max of the last binlimit*AT_BINS secs without the storage,
     but still smoothing out a return to normalcy from a slow response.
     (E.g. remember the maximum latency in each minute of the last 4 minutes.) */
-int at_add(struct adaptive_timeout *at, unsigned int val)
+int at_measured(struct adaptive_timeout *at, unsigned int val)
  {
          unsigned int old = at->at_current;
          time_t now = cfs_time_current_sec();
  {
          unsigned int old = at->at_current;
          time_t now = cfs_time_current_sec();
diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c

index 8be8f00..571cf17 100644 (file)
--- a/lustre/ptlrpc/niobuf.c
+++ b/lustre/ptlrpc/niobuf.c
@@ -350,7 +350,7 @@ static void ptlrpc_at_set_reply(struct ptlrpc_request *req, int flags)
                 MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE))) {
                  /* early replies, errors and recovery requests don't count
                   * toward our service time estimate */
                 MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE))) {
                  /* early replies, errors and recovery requests don't count
                   * toward our service time estimate */
-                int oldse = at_add(&svc->srv_at_estimate, service_time);
+                int oldse = at_measured(&svc->srv_at_estimate, service_time);
                  if (oldse != 0)
                          DEBUG_REQ(D_ADAPTTO, req,
                                    "svc %s changed estimate from %d to %d",
                  if (oldse != 0)
                          DEBUG_REQ(D_ADAPTTO, req,
                                    "svc %s changed estimate from %d to %d",
diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c

index b72848a..f6e6a65 100644 (file)
--- a/lustre/ptlrpc/service.c
+++ b/lustre/ptlrpc/service.c
@@ -974,29 +974,34 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
          if (req->rq_export &&
              lustre_msg_get_flags(req->rq_reqmsg) &
              (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) {
          if (req->rq_export &&
              lustre_msg_get_flags(req->rq_reqmsg) &
              (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) {
-                /**
-                 * Use at_extra as early reply period for recovery requests but
-                 * make sure it is not bigger than recovery time / 4
-                 */
-                at_add(&svc->srv_at_estimate,
-                       min(at_extra,
-                           req->rq_export->exp_obd->obd_recovery_timeout / 4));
+                /* During recovery, we don't want to send too many early
+                 * replies, but on the other hand we want to make sure the
+                 * client has enough time to resend if the rpc is lost. So
+                 * during the recovery period send at least 4 early replies,
+                 * spacing them every at_extra if we can. at_estimate should
+                 * always equal this fixed value during recovery. */
+                at_measured(&svc->srv_at_estimate, min(at_extra,
+                            req->rq_export->exp_obd->obd_recovery_timeout / 4));
          } else {
                  /* Fake our processing time into the future to ask the clients
                   * for some extra amount of time */
          } else {
                  /* Fake our processing time into the future to ask the clients
                   * for some extra amount of time */
-                at_add(&svc->srv_at_estimate, at_extra);
+                at_measured(&svc->srv_at_estimate, at_extra +
+                            cfs_time_current_sec() -
+                            req->rq_arrival_time.tv_sec);
+
+                /* Check to see if we've actually increased the deadline -
+                 * we may be past adaptive_max */
+                if (req->rq_deadline >= req->rq_arrival_time.tv_sec +
+                    at_get(&svc->srv_at_estimate)) {
+                        DEBUG_REQ(D_WARNING, req, "Couldn't add any time "
+                                  "(%ld/%ld), not sending early reply\n",
+                                  olddl, req->rq_arrival_time.tv_sec +
+                                  at_get(&svc->srv_at_estimate) -
+                                  cfs_time_current_sec());
+                        RETURN(-ETIMEDOUT);
+                }
          }
          }
-
          newdl = cfs_time_current_sec() + at_get(&svc->srv_at_estimate);
          newdl = cfs_time_current_sec() + at_get(&svc->srv_at_estimate);
-        if (req->rq_deadline >= newdl) {
-                /* We're not adding any time, no need to send an early reply
-                   (e.g. maybe at adaptive_max) */
-                DEBUG_REQ(D_WARNING, req, "Couldn't add any time ("
-                          CFS_DURATION_T"/"CFS_DURATION_T"), "
-                          "not sending early reply\n", olddl,
-                          cfs_time_sub(newdl, cfs_time_current_sec()));
-                RETURN(-ETIMEDOUT);
-        }
  
          OBD_ALLOC(reqcopy, sizeof *reqcopy);
          if (reqcopy == NULL)
  
          OBD_ALLOC(reqcopy, sizeof *reqcopy);
          if (reqcopy == NULL)
author	yangsheng <Sheng.Yang@Sun.COM>
	Tue, 9 Feb 2010 21:06:02 +0000 (05:06 +0800)
committer	Robert Read <rread@sun.com>
	Thu, 11 Feb 2010 00:48:59 +0000 (16:48 -0800)
lustre/include/lustre_import.h		patch \| blob \| history
lustre/ldlm/ldlm_lib.c		patch \| blob \| history
lustre/ldlm/ldlm_lockd.c		patch \| blob \| history
lustre/ldlm/ldlm_request.c		patch \| blob \| history
lustre/ptlrpc/client.c		patch \| blob \| history
lustre/ptlrpc/import.c		patch \| blob \| history
lustre/ptlrpc/niobuf.c		patch \| blob \| history
lustre/ptlrpc/service.c		patch \| blob \| history