more information, please refer to bugzilla 17630.
Severity : normal
+Frequency : rare, if clients are evicted due to failure to return a lock
+Bugzilla : 18618
+Description: don't increase ldlm timeout if previous client was evicted
+Details : if a client doesn't respond to a BAST within the adaptive ldlm
+ enqueue timeout, don't adjust the adaptive estimate when the lock
+ is next granted.
+
+Severity : normal
Bugzilla : 18674
Description: abort bulk too early if client is reconnected
total_enqueue_wait = cfs_time_sub(cfs_time_current_sec(),
lock->l_last_activity);
- if (total_enqueue_wait > obd_timeout)
- /* non-fatal with AT - change to LDLM_DEBUG? */
- LDLM_WARN(lock, "enqueue wait took %lus from %lu",
- total_enqueue_wait, lock->l_last_activity);
-
lock_res_and_lock(lock);
if (lock->l_resource->lr_lvb_len) {
size[DLM_REQ_REC_OFF] = lock->l_resource->lr_lvb_len;
/* Server-side enqueue wait time estimate, used in
__ldlm_add_waiting_lock to set future enqueue timers */
- at_add(&lock->l_resource->lr_namespace->ns_at_estimate,
- total_enqueue_wait);
+ if (total_enqueue_wait < ldlm_get_enq_timeout(lock))
+ at_add(&lock->l_resource->lr_namespace->ns_at_estimate,
+ total_enqueue_wait);
+ else
+ /* bz18618. Don't add lock enqueue time we spend waiting for a
+ previous callback to fail. Locks waiting legitimately will
+ get extended by ldlm_refresh_waiting_lock regardless of the
+ estimate, so it's okay to underestimate here. */
+ LDLM_DEBUG(lock, "lock completed after %lus; estimate was %ds. "
+ "It is likely that a previous callback timed out.",
+ total_enqueue_wait,
+ at_get(&lock->l_resource->lr_namespace->ns_at_estimate));
ptlrpc_req_set_repsize(req, 1, NULL);
/* Since these are non-updating timeouts, we should be conservative.
It would be nice to have some kind of "early reply" mechanism for
lock callbacks too... */
- timeout = timeout + (timeout >> 1); /* 150% */
+ timeout = min_t(int, at_max, timeout + (timeout >> 1)); /* 150% */
return max(timeout, ldlm_enqueue_min);
}
EXPORT_SYMBOL(ldlm_get_enq_timeout);