b=18618

author nathan <nathan>

Tue, 9 Jun 2009 22:56:57 +0000 (22:56 +0000)

committer nathan <nathan>

Tue, 9 Jun 2009 22:56:57 +0000 (22:56 +0000)
author nathan <nathan>
Tue, 9 Jun 2009 22:56:57 +0000 (22:56 +0000)
committer nathan <nathan>
Tue, 9 Jun 2009 22:56:57 +0000 (22:56 +0000)
diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c

index d6e9757..82c37cf 100644 (file)
--- a/lustre/ldlm/ldlm_lockd.c
+++ b/lustre/ldlm/ldlm_lockd.c
@@ -809,11 +809,6 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
          total_enqueue_wait = cfs_time_sub(cfs_time_current_sec(),
                                            lock->l_last_activity);
  
-        if (total_enqueue_wait > obd_timeout)
-                /* non-fatal with AT - change to LDLM_DEBUG? */
-                LDLM_WARN(lock, "enqueue wait took %lus from "CFS_TIME_T,
-                          total_enqueue_wait, lock->l_last_activity);
-
          req = ptlrpc_request_alloc(lock->l_export->exp_imp_reverse,
                                      &RQF_LDLM_CP_CALLBACK);
          if (req == NULL)
@@ -854,8 +849,18 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
  
          /* Server-side enqueue wait time estimate, used in
              __ldlm_add_waiting_lock to set future enqueue timers */
-        at_add(&lock->l_resource->lr_namespace->ns_at_estimate,
-               total_enqueue_wait);
+        if (total_enqueue_wait < ldlm_get_enq_timeout(lock))
+                at_add(&lock->l_resource->lr_namespace->ns_at_estimate,
+                       total_enqueue_wait);
+        else
+                /* bz18618. Don't add lock enqueue time we spend waiting for a
+                   previous callback to fail. Locks waiting legitimately will
+                   get extended by ldlm_refresh_waiting_lock regardless of the
+                   estimate, so it's okay to underestimate here. */
+                LDLM_DEBUG(lock, "lock completed after %lus; estimate was %ds. "
+                       "It is likely that a previous callback timed out.",
+                       total_enqueue_wait,
+                       at_get(&lock->l_resource->lr_namespace->ns_at_estimate));
  
          ptlrpc_request_set_replen(req);
  
diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c

index 94b53a4..1eaa0e1 100644 (file)
--- a/lustre/ldlm/ldlm_request.c
+++ b/lustre/ldlm/ldlm_request.c
@@ -116,7 +116,7 @@ int ldlm_get_enq_timeout(struct ldlm_lock *lock)
          /* Since these are non-updating timeouts, we should be conservative.
             It would be nice to have some kind of "early reply" mechanism for
             lock callbacks too... */
-        timeout = timeout + (timeout >> 1); /* 150% */
+        timeout = min_t(int, at_max, timeout + (timeout >> 1)); /* 150% */
          return max(timeout, ldlm_enqueue_min);
  }
  EXPORT_SYMBOL(ldlm_get_enq_timeout);
author	nathan <nathan>
	Tue, 9 Jun 2009 22:56:57 +0000 (22:56 +0000)
committer	nathan <nathan>
	Tue, 9 Jun 2009 22:56:57 +0000 (22:56 +0000)
lustre/ldlm/ldlm_lockd.c		patch \| blob \| history
lustre/ldlm/ldlm_request.c		patch \| blob \| history