b=18618

[fs/lustre-release.git] / lustre / ldlm / ldlm_request.c
diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c

index 6e11110..1eaa0e1 100644 (file)
--- a/lustre/ldlm/ldlm_request.c
+++ b/lustre/ldlm/ldlm_request.c
@@ -80,9 +80,9 @@ int ldlm_expired_completion_wait(void *data)
                  LDLM_ERROR(lock, "lock timed out (enqueued at "CFS_TIME_T", "
                             CFS_DURATION_T"s ago); not entering recovery in "
                             "server code, just going back to sleep",
-                           lock->l_enqueued_time.tv_sec,
+                           lock->l_last_activity,
                             cfs_time_sub(cfs_time_current_sec(),
-                           lock->l_enqueued_time.tv_sec));
+                           lock->l_last_activity));
                  if (cfs_time_after(cfs_time_current(), next_dump)) {
                          last_dump = next_dump;
                          next_dump = cfs_time_shift(300);
@@ -99,9 +99,8 @@ int ldlm_expired_completion_wait(void *data)
          ptlrpc_fail_import(imp, lwd->lwd_conn_cnt);
          LDLM_ERROR(lock, "lock timed out (enqueued at "CFS_TIME_T", "
                    CFS_DURATION_T"s ago), entering recovery for %s@%s",
-                  lock->l_enqueued_time.tv_sec,
-                  cfs_time_sub(cfs_time_current_sec(),
-                  lock->l_enqueued_time.tv_sec),
+                  lock->l_last_activity,
+                  cfs_time_sub(cfs_time_current_sec(), lock->l_last_activity),
                    obd2cli_tgt(obd), imp->imp_connection->c_remote_uuid.uuid);
  
          RETURN(0);
@@ -117,7 +116,7 @@ int ldlm_get_enq_timeout(struct ldlm_lock *lock)
          /* Since these are non-updating timeouts, we should be conservative.
             It would be nice to have some kind of "early reply" mechanism for
             lock callbacks too... */
-        timeout = timeout + (timeout >> 1); /* 150% */
+        timeout = min_t(int, at_max, timeout + (timeout >> 1)); /* 150% */
          return max(timeout, ldlm_enqueue_min);
  }
  EXPORT_SYMBOL(ldlm_get_enq_timeout);
@@ -136,7 +135,7 @@ static int ldlm_completion_tail(struct ldlm_lock *lock)
                  result = -EIO;
          } else {
                  delay = cfs_time_sub(cfs_time_current_sec(),
-                                     lock->l_enqueued_time.tv_sec);
+                                     lock->l_last_activity);
                  LDLM_DEBUG(lock, "client-side enqueue: granted after "
                             CFS_DURATION_T"s", delay);
  
@@ -1951,9 +1950,15 @@ static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure)
          /* we use l_pending_chain here, because it's unused on clients. */
          LASSERTF(list_empty(&lock->l_pending_chain),"lock %p next %p prev %p\n",
                   lock, &lock->l_pending_chain.next,&lock->l_pending_chain.prev);
-        /* bug 9573: don't replay locks left after eviction */
-        if (!(lock->l_flags & LDLM_FL_FAILED))
+        /* bug 9573: don't replay locks left after eviction, or
+         * bug 17614: locks being actively cancelled. Get a reference
+         * on a lock so that it does not disapear under us (e.g. due to cancel)
+         */
+        if (!(lock->l_flags & (LDLM_FL_FAILED|LDLM_FL_CANCELING))) {
                  list_add(&lock->l_pending_chain, list);
+                LDLM_LOCK_GET(lock);
+        }
+
          return LDLM_ITER_CONTINUE;
  }
  
@@ -2107,9 +2112,12 @@ int ldlm_replay_locks(struct obd_import *imp)
  
          list_for_each_entry_safe(lock, next, &list, l_pending_chain) {
                  list_del_init(&lock->l_pending_chain);
-                if (rc)
+                if (rc) {
+                        LDLM_LOCK_PUT(lock);
                          continue; /* or try to do the rest? */
+                }
                  rc = replay_one_lock(imp, lock);
+                LDLM_LOCK_PUT(lock);
          }
  
          atomic_dec(&imp->imp_replay_inflight);