Branch HEAD

author johann <johann>

Thu, 27 Nov 2008 10:13:46 +0000 (10:13 +0000)

committer johann <johann>

Thu, 27 Nov 2008 10:13:46 +0000 (10:13 +0000)
author johann <johann>
Thu, 27 Nov 2008 10:13:46 +0000 (10:13 +0000)
committer johann <johann>
Thu, 27 Nov 2008 10:13:46 +0000 (10:13 +0000)
diff --git a/lustre/ChangeLog b/lustre/ChangeLog

index a4dd44c..3fb9d7d 100644 (file)
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -1842,6 +1842,17 @@ Details    : A security feature, which is to prevent users from being able
               configuration management server (MGS). The functionality also
               allows to specify sets of clients for which the remapping does
               not apply.
+
+Severity   : normal
+Bugzilla   : 16860
+Description: Excessive recovery window
+Details    : With AT enabled, the recovery window can be excessively long (6000+
+            seconds). To address this problem, we no longer use
+            OBD_RECOVERY_FACTOR when extending the recovery window (the connect
+            timeout no longer depends on the service time, it is set to
+            INITIAL_CONNECT_TIMEOUT now) and clients report the old service
+            time via pb_service_time.
+
  --------------------------------------------------------------------------------
  
  2007-08-10         Cluster File Systems, Inc. <info@clusterfs.com>
diff --git a/lustre/include/lustre_import.h b/lustre/include/lustre_import.h

index b594b85..4f6e83f 100644 (file)
--- a/lustre/include/lustre_import.h
+++ b/lustre/include/lustre_import.h
@@ -200,9 +200,9 @@ static inline unsigned int at_est2timeout(unsigned int val)
  
  static inline unsigned int at_timeout2est(unsigned int val)
  {
-        /* restore estimate value from timeout */
+        /* restore estimate value from timeout: e=4/5(t-5) */
          LASSERT(val);
-        return ((val - 1) / 5 * 4);
+        return (max((val << 2) / 5, 5U) - 4);
  }
  
  static inline void at_init(struct adaptive_timeout *at, int val, int flags) {
diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c

index bc49e4b..7ac4c93 100644 (file)
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -1361,18 +1361,27 @@ target_start_and_reset_recovery_timer(struct obd_device *obd,
                                        struct ptlrpc_request *req,
                                        int new_client)
  {
-        int req_timeout = lustre_msg_get_timeout(req->rq_reqmsg);
+        int service_time = lustre_msg_get_service_time(req->rq_reqmsg);
  
-        /* teach server about old server's estimates */
-        if (!new_client)
+        if (!new_client && service_time)
+                /* Teach server about old server's estimates, as first guess
+                 * at how long new requests will take. */
                  at_add(&req->rq_rqbd->rqbd_service->srv_at_estimate,
-                       at_timeout2est(req_timeout));
+                       service_time);
  
          check_and_start_recovery_timer(obd);
  
-        req_timeout *= OBD_RECOVERY_FACTOR;
-        if (req_timeout > obd->obd_recovery_timeout && !new_client)
-                reset_recovery_timer(obd, req_timeout, 0);
+        /* convert the service time to rpc timeout,
+         * reuse service_time to limit stack usage */
+        service_time = at_est2timeout(service_time);
+
+        /* We expect other clients to timeout within service_time, then try
+         * to reconnect, then try the failover server.  The max delay between
+         * connect attempts is SWITCH_MAX + SWITCH_INC + INITIAL */
+        service_time += 2 * (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC +
+                             INITIAL_CONNECT_TIMEOUT);
+        if (service_time > obd->obd_recovery_timeout && !new_client)
+                reset_recovery_timer(obd, service_time, 0);
  }
  
  #ifdef __KERNEL__
@@ -1595,7 +1604,7 @@ static int handle_recovery_req(struct ptlrpc_thread *thread,
          if (!req_replay_done(req->rq_export) ||
              !lock_replay_done(req->rq_export))
                  reset_recovery_timer(class_exp2obd(req->rq_export),
-                       OBD_RECOVERY_FACTOR * AT_OFF ? obd_timeout :
+                       AT_OFF ? obd_timeout :
                         at_get(&req->rq_rqbd->rqbd_service->srv_at_estimate), 1);
          ptlrpc_free_clone(req);
          RETURN(0);
diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c

index 38cd423..5021179 100644 (file)
--- a/lustre/ptlrpc/import.c
+++ b/lustre/ptlrpc/import.c
@@ -655,6 +655,19 @@ int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid)
                  GOTO(out, rc);
          }
  
+        /* Report the rpc service time to the server so that it knows how long
+         * to wait for clients to join recovery */
+        lustre_msg_set_service_time(request->rq_reqmsg,
+                                    at_timeout2est(request->rq_timeout));
+
+        /* The amount of time we give the server to process the connect req.
+         * import_select_connection will increase the net latency on
+         * repeated reconnect attempts to cover slow networks.
+         * We override/ignore the server rpc completion estimate here,
+         * which may be large if this is a reconnect attempt */
+        request->rq_timeout = INITIAL_CONNECT_TIMEOUT;
+        lustre_msg_set_timeout(request->rq_reqmsg, request->rq_timeout);
+
  #ifndef __KERNEL__
          lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_LIBCLIENT);
  #endif
@@ -681,10 +694,6 @@ int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid)
                  spin_unlock(&imp->imp_lock);
                  lustre_msg_add_op_flags(request->rq_reqmsg,
                                          MSG_CONNECT_INITIAL);
-                if (AT_OFF)
-                        /* AT will use INITIAL_CONNECT_TIMEOUT the first
-                           time, adaptive after that. */
-                        request->rq_timeout = INITIAL_CONNECT_TIMEOUT;
          }
  
          if (set_transno)
author	johann <johann>
	Thu, 27 Nov 2008 10:13:46 +0000 (10:13 +0000)
committer	johann <johann>
	Thu, 27 Nov 2008 10:13:46 +0000 (10:13 +0000)
lustre/ChangeLog		patch \| blob \| history
lustre/include/lustre_import.h		patch \| blob \| history
lustre/ldlm/ldlm_lib.c		patch \| blob \| history
lustre/ptlrpc/import.c		patch \| blob \| history