Whamcloud - gitweb
b=18618
authornathan <nathan>
Tue, 9 Jun 2009 22:56:57 +0000 (22:56 +0000)
committernathan <nathan>
Tue, 9 Jun 2009 22:56:57 +0000 (22:56 +0000)
i=adilger
i=johann
don't increase ldlm timeout if previous client was evicted

lustre/ldlm/ldlm_lockd.c
lustre/ldlm/ldlm_request.c

index d6e9757..82c37cf 100644 (file)
@@ -809,11 +809,6 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
         total_enqueue_wait = cfs_time_sub(cfs_time_current_sec(),
                                           lock->l_last_activity);
 
-        if (total_enqueue_wait > obd_timeout)
-                /* non-fatal with AT - change to LDLM_DEBUG? */
-                LDLM_WARN(lock, "enqueue wait took %lus from "CFS_TIME_T,
-                          total_enqueue_wait, lock->l_last_activity);
-
         req = ptlrpc_request_alloc(lock->l_export->exp_imp_reverse,
                                     &RQF_LDLM_CP_CALLBACK);
         if (req == NULL)
@@ -854,8 +849,18 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
 
         /* Server-side enqueue wait time estimate, used in
             __ldlm_add_waiting_lock to set future enqueue timers */
-        at_add(&lock->l_resource->lr_namespace->ns_at_estimate,
-               total_enqueue_wait);
+        if (total_enqueue_wait < ldlm_get_enq_timeout(lock))
+                at_add(&lock->l_resource->lr_namespace->ns_at_estimate,
+                       total_enqueue_wait);
+        else
+                /* bz18618. Don't add lock enqueue time we spend waiting for a
+                   previous callback to fail. Locks waiting legitimately will
+                   get extended by ldlm_refresh_waiting_lock regardless of the
+                   estimate, so it's okay to underestimate here. */
+                LDLM_DEBUG(lock, "lock completed after %lus; estimate was %ds. "
+                       "It is likely that a previous callback timed out.",
+                       total_enqueue_wait,
+                       at_get(&lock->l_resource->lr_namespace->ns_at_estimate));
 
         ptlrpc_request_set_replen(req);
 
index 94b53a4..1eaa0e1 100644 (file)
@@ -116,7 +116,7 @@ int ldlm_get_enq_timeout(struct ldlm_lock *lock)
         /* Since these are non-updating timeouts, we should be conservative.
            It would be nice to have some kind of "early reply" mechanism for
            lock callbacks too... */
-        timeout = timeout + (timeout >> 1); /* 150% */
+        timeout = min_t(int, at_max, timeout + (timeout >> 1)); /* 150% */
         return max(timeout, ldlm_enqueue_min);
 }
 EXPORT_SYMBOL(ldlm_get_enq_timeout);