Whamcloud - gitweb
LU-5266 ldlm: granting the same lock twice on recovery
[fs/lustre-release.git] / lustre / ldlm / ldlm_lockd.c
index 4215d1a..4d9e72f 100644 (file)
@@ -339,7 +339,7 @@ static void waiting_locks_callback(unsigned long unused)
                 ldlm_lock_to_ns(lock)->ns_timeouts++;
                 LDLM_ERROR(lock, "lock callback timer expired after %lds: "
                            "evicting client at %s ",
-                           cfs_time_current_sec()- lock->l_last_activity,
+                           cfs_time_current_sec() - lock->l_last_activity,
                            libcfs_nid2str(
                                    lock->l_export->exp_connection->c_peer.nid));
 
@@ -862,6 +862,8 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock,
         if (AT_OFF)
                 req->rq_timeout = ldlm_get_rq_timeout();
 
+       lock->l_last_activity = cfs_time_current_sec();
+
         if (lock->l_export && lock->l_export->exp_nid_stats &&
             lock->l_export->exp_nid_stats->nid_ldlm_stats)
                 lprocfs_counter_incr(lock->l_export->exp_nid_stats->nid_ldlm_stats,
@@ -898,6 +900,11 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
         total_enqueue_wait = cfs_time_sub(cfs_time_current_sec(),
                                           lock->l_last_activity);
 
+       if (OBD_FAIL_PRECHECK(OBD_FAIL_OST_LDLM_REPLY_NET)) {
+               LDLM_DEBUG(lock, "dropping CP AST");
+               RETURN(0);
+       }
+
         req = ptlrpc_request_alloc(lock->l_export->exp_imp_reverse,
                                     &RQF_LDLM_CP_CALLBACK);
         if (req == NULL)
@@ -954,6 +961,8 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
         LDLM_DEBUG(lock, "server preparing completion AST (after %lds wait)",
                    total_enqueue_wait);
 
+       lock->l_last_activity = cfs_time_current_sec();
+
         /* Server-side enqueue wait time estimate, used in
             __ldlm_add_waiting_lock to set future enqueue timers */
         if (total_enqueue_wait < ldlm_get_enq_timeout(lock))
@@ -1070,6 +1079,8 @@ int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data)
         if (AT_OFF)
                 req->rq_timeout = ldlm_get_rq_timeout();
 
+       lock->l_last_activity = cfs_time_current_sec();
+
        req->rq_interpret_reply = ldlm_cb_interpret;
 
         if (lock->l_export && lock->l_export->exp_nid_stats &&
@@ -1226,7 +1237,8 @@ int ldlm_handle_enqueue0(struct ldlm_namespace *ns,
         }
 #endif
 
-        if (unlikely(flags & LDLM_FL_REPLAY)) {
+       if (unlikely((flags & LDLM_FL_REPLAY) ||
+                    (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT))) {
                 /* Find an existing lock in the per-export lock hash */
                /* In the function below, .hs_keycmp resolves to
                 * ldlm_export_lock_keycmp() */
@@ -1236,8 +1248,9 @@ int ldlm_handle_enqueue0(struct ldlm_namespace *ns,
                 if (lock != NULL) {
                         DEBUG_REQ(D_DLMTRACE, req, "found existing lock cookie "
                                   LPX64, lock->l_handle.h_cookie);
+                       flags |= LDLM_FL_RESENT;
                         GOTO(existing_lock, rc = 0);
-                }
+               }
         }
 
        /* The lock's callback data might be set in the policy function */
@@ -2590,6 +2603,8 @@ static int ldlm_bl_thread_main(void *arg)
                 if (blwi->blwi_mem_pressure)
                        memory_pressure_set();
 
+               OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_PAUSE_CANCEL2, 4);
+
                 if (blwi->blwi_count) {
                         int count;
                        /* The special case when we cancel locks in LRU