Whamcloud - gitweb
LU-5398 ldlm: handle NULL lock in ldlm_handle_enqueue0()
[fs/lustre-release.git] / lustre / ldlm / ldlm_lockd.c
index 4215d1a..f56c776 100644 (file)
@@ -339,7 +339,7 @@ static void waiting_locks_callback(unsigned long unused)
                 ldlm_lock_to_ns(lock)->ns_timeouts++;
                 LDLM_ERROR(lock, "lock callback timer expired after %lds: "
                            "evicting client at %s ",
-                           cfs_time_current_sec()- lock->l_last_activity,
+                           cfs_time_current_sec() - lock->l_last_activity,
                            libcfs_nid2str(
                                    lock->l_export->exp_connection->c_peer.nid));
 
@@ -862,6 +862,8 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock,
         if (AT_OFF)
                 req->rq_timeout = ldlm_get_rq_timeout();
 
+       lock->l_last_activity = cfs_time_current_sec();
+
         if (lock->l_export && lock->l_export->exp_nid_stats &&
             lock->l_export->exp_nid_stats->nid_ldlm_stats)
                 lprocfs_counter_incr(lock->l_export->exp_nid_stats->nid_ldlm_stats,
@@ -898,6 +900,11 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
         total_enqueue_wait = cfs_time_sub(cfs_time_current_sec(),
                                           lock->l_last_activity);
 
+       if (OBD_FAIL_PRECHECK(OBD_FAIL_OST_LDLM_REPLY_NET)) {
+               LDLM_DEBUG(lock, "dropping CP AST");
+               RETURN(0);
+       }
+
         req = ptlrpc_request_alloc(lock->l_export->exp_imp_reverse,
                                     &RQF_LDLM_CP_CALLBACK);
         if (req == NULL)
@@ -954,6 +961,8 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
         LDLM_DEBUG(lock, "server preparing completion AST (after %lds wait)",
                    total_enqueue_wait);
 
+       lock->l_last_activity = cfs_time_current_sec();
+
         /* Server-side enqueue wait time estimate, used in
             __ldlm_add_waiting_lock to set future enqueue timers */
         if (total_enqueue_wait < ldlm_get_enq_timeout(lock))
@@ -1070,6 +1079,8 @@ int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data)
         if (AT_OFF)
                 req->rq_timeout = ldlm_get_rq_timeout();
 
+       lock->l_last_activity = cfs_time_current_sec();
+
        req->rq_interpret_reply = ldlm_cb_interpret;
 
         if (lock->l_export && lock->l_export->exp_nid_stats &&
@@ -1226,7 +1237,8 @@ int ldlm_handle_enqueue0(struct ldlm_namespace *ns,
         }
 #endif
 
-        if (unlikely(flags & LDLM_FL_REPLAY)) {
+       if (unlikely((flags & LDLM_FL_REPLAY) ||
+                    (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT))) {
                 /* Find an existing lock in the per-export lock hash */
                /* In the function below, .hs_keycmp resolves to
                 * ldlm_export_lock_keycmp() */
@@ -1236,8 +1248,9 @@ int ldlm_handle_enqueue0(struct ldlm_namespace *ns,
                 if (lock != NULL) {
                         DEBUG_REQ(D_DLMTRACE, req, "found existing lock cookie "
                                   LPX64, lock->l_handle.h_cookie);
+                       flags |= LDLM_FL_RESENT;
                         GOTO(existing_lock, rc = 0);
-                }
+               }
         }
 
        /* The lock's callback data might be set in the policy function */
@@ -1245,8 +1258,11 @@ int ldlm_handle_enqueue0(struct ldlm_namespace *ns,
                                dlm_req->lock_desc.l_resource.lr_type,
                                dlm_req->lock_desc.l_req_mode,
                                cbs, NULL, 0, LVB_T_NONE);
-       if (IS_ERR(lock))
-               GOTO(out, rc = PTR_ERR(lock));
+       if (IS_ERR(lock)) {
+               rc = PTR_ERR(lock);
+               lock = NULL;
+               GOTO(out, rc);
+       }
 
         lock->l_last_activity = cfs_time_current_sec();
         lock->l_remote_handle = dlm_req->lock_handle[0];
@@ -1383,7 +1399,7 @@ existing_lock:
 
         /* The LOCK_CHANGED code in ldlm_lock_enqueue depends on this
          * ldlm_reprocess_all.  If this moves, revisit that code. -phil */
-       if (!IS_ERR(lock)) {
+       if (lock != NULL) {
                LDLM_DEBUG(lock, "server-side enqueue handler, sending reply"
                           "(err=%d, rc=%d)", err, rc);
 
@@ -2590,6 +2606,8 @@ static int ldlm_bl_thread_main(void *arg)
                 if (blwi->blwi_mem_pressure)
                        memory_pressure_set();
 
+               OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_PAUSE_CANCEL2, 4);
+
                 if (blwi->blwi_count) {
                         int count;
                        /* The special case when we cancel locks in LRU