From c2851054f0e5b1c2976a865ee66a8f941d7a31a5 Mon Sep 17 00:00:00 2001
From: zhanghc <zhanghc>
Date: Tue, 13 Jan 2009 15:00:50 +0000
Subject: [PATCH] b=17887

1, print the actual time to wait for
   lock cancellation in "waiting_locks_callback"

2, compare resource in "ost_rw_hpreq_lock_match"

3, replace "LDLM_ERROR" with "LDLM_WARN" if a lock
   is completed after waiting more than obd_timeout

i=vitaly.fertman
i=johann
---
 lustre/include/lustre_dlm.h |  9 ++++++---
 lustre/ldlm/ldlm_lock.c     |  2 +-
 lustre/ldlm/ldlm_lockd.c    | 24 ++++++++++++------------
 lustre/ldlm/ldlm_request.c  | 13 ++++++-------
 lustre/ost/ost_handler.c    |  6 +++++-
 5 files changed, 30 insertions(+), 24 deletions(-)
diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h
index 94f7e3b..b8df94d 100644
--- a/lustre/include/lustre_dlm.h
+++ b/lustre/include/lustre_dlm.h
@@ -532,8 +532,8 @@ struct ldlm_lock {
          * it's no longer in use.  If the lock is not granted, a process sleeps
          * on this waitq to learn when it becomes granted. */
         cfs_waitq_t           l_waitq;
-        struct timeval        l_enqueued_time;
 
+        cfs_time_t            l_last_activity;  /* seconds */ 
         cfs_time_t            l_last_used;      /* jiffies */
         struct ldlm_extent    l_req_extent;
 
@@ -642,13 +642,16 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, __u32 mask,
                       ...)
         __attribute__ ((format (printf, 4, 5)));
 
-#define LDLM_ERROR(lock, fmt, a...) do {                                 \
+#define LDLM_DEBUG_LIMIT(mask, lock, fmt, a...) do {                     \
         static cfs_debug_limit_state_t _ldlm_cdls;                       \
-        ldlm_lock_debug(&_ldlm_cdls, D_ERROR, lock,                      \
+        ldlm_lock_debug(&_ldlm_cdls, mask, lock,                         \
                         __FILE__, __FUNCTION__, __LINE__,                \
                         "### " fmt , ##a);                               \
 } while (0)
 
+#define LDLM_ERROR(lock, fmt, a...) LDLM_DEBUG_LIMIT(D_ERROR, lock, fmt, ## a)
+#define LDLM_WARN(lock, fmt, a...)  LDLM_DEBUG_LIMIT(D_WARNING, lock, fmt, ## a)
+
 #define LDLM_DEBUG(lock, fmt, a...)   do {                              \
         ldlm_lock_debug(NULL, D_DLMTRACE, lock,                         \
                         __FILE__, __FUNCTION__, __LINE__,               \
diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c
index 972d457..f37f834 100644
--- a/lustre/ldlm/ldlm_lock.c
+++ b/lustre/ldlm/ldlm_lock.c
@@ -1177,7 +1177,7 @@ ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns,
         struct ldlm_interval *node = NULL;
         ENTRY;
 
-        do_gettimeofday(&lock->l_enqueued_time);
+        lock->l_last_activity = cfs_time_current_sec();
         /* policies are not executed on the client or during replay */
         if ((*flags & (LDLM_FL_HAS_INTENT|LDLM_FL_REPLAY)) == LDLM_FL_HAS_INTENT
             && !local && ns->ns_policy) {
diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c
index 75fac78..7fd5fa2 100644
--- a/lustre/ldlm/ldlm_lockd.c
+++ b/lustre/ldlm/ldlm_lockd.c
@@ -300,7 +300,7 @@ static void waiting_locks_callback(unsigned long unused)
                 lock->l_resource->lr_namespace->ns_timeouts++;
                 LDLM_ERROR(lock, "lock callback timer expired after %lds: "
                            "evicting client at %s ",
-                           cfs_time_current_sec()- lock->l_enqueued_time.tv_sec,
+                           cfs_time_current_sec()- lock->l_last_activity,
                            libcfs_nid2str(
                                    lock->l_export->exp_connection->c_peer.nid));
                 if (lock == last) {
@@ -755,6 +755,8 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock,
 
         LDLM_DEBUG(lock, "server preparing blocking AST");
 
+        lock->l_last_activity = cfs_time_current_sec();
+
         ptlrpc_req_set_repsize(req, 1, NULL);
         if (instant_cancel) {
                 unlock_res(lock->l_resource);
@@ -786,7 +788,6 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
         struct ldlm_cb_set_arg *arg = data;
         struct ldlm_request *body;
         struct ptlrpc_request *req;
-        struct timeval granted_time;
         long total_enqueue_wait;
         __u32 size[3] = { [MSG_PTLRPC_BODY_OFF] = sizeof(struct ptlrpc_body),
                         [DLM_LOCKREQ_OFF]     = sizeof(*body) };
@@ -796,14 +797,13 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
         LASSERT(lock != NULL);
         LASSERT(data != NULL);
 
-        do_gettimeofday(&granted_time);
-        total_enqueue_wait = cfs_timeval_sub(&granted_time,
-                                             &lock->l_enqueued_time, NULL);
+        total_enqueue_wait = cfs_time_sub(cfs_time_current_sec(),
+                                          lock->l_last_activity);
 
-        if (total_enqueue_wait / ONE_MILLION > obd_timeout)
+        if (total_enqueue_wait > obd_timeout)
                 /* non-fatal with AT - change to LDLM_DEBUG? */
-                LDLM_ERROR(lock, "enqueue wait took %luus from %lu",
-                           total_enqueue_wait, lock->l_enqueued_time.tv_sec);
+                LDLM_WARN(lock, "enqueue wait took %lus from %lu",
+                           total_enqueue_wait, lock->l_last_activity);
 
         lock_res_and_lock(lock);
         if (lock->l_resource->lr_lvb_len) {
@@ -839,13 +839,13 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
                 unlock_res_and_lock(lock);
         }
 
-        LDLM_DEBUG(lock, "server preparing completion AST (after %ldus wait)",
+        LDLM_DEBUG(lock, "server preparing completion AST (after %lds wait)",
                    total_enqueue_wait);
 
         /* Server-side enqueue wait time estimate, used in
             __ldlm_add_waiting_lock to set future enqueue timers */
         at_add(&lock->l_resource->lr_namespace->ns_at_estimate,
-               total_enqueue_wait / ONE_MILLION);
+               total_enqueue_wait);
 
         ptlrpc_req_set_repsize(req, 1, NULL);
 
@@ -1086,7 +1086,7 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req,
         if (!lock)
                 GOTO(out, rc = -ENOMEM);
 
-        do_gettimeofday(&lock->l_enqueued_time);
+        lock->l_last_activity = cfs_time_current_sec();
         lock->l_remote_handle = dlm_req->lock_handle[0];
         LDLM_DEBUG(lock, "server-side enqueue handler, new lock created");
 
@@ -1291,7 +1291,7 @@ int ldlm_handle_convert(struct ptlrpc_request *req)
 
                 LDLM_DEBUG(lock, "server-side convert handler START");
 
-                do_gettimeofday(&lock->l_enqueued_time);
+                lock->l_last_activity = cfs_time_current_sec();
                 res = ldlm_lock_convert(lock, dlm_req->lock_desc.l_req_mode,
                                         &dlm_rep->lock_flags);
                 if (res) {
diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c
index e145f03..29286cf 100644
--- a/lustre/ldlm/ldlm_request.c
+++ b/lustre/ldlm/ldlm_request.c
@@ -76,9 +76,8 @@ int ldlm_expired_completion_wait(void *data)
 
                 LDLM_ERROR(lock, "lock timed out (enqueued at %lu, %lus ago); "
                            "not entering recovery in server code, just going "
-                           "back to sleep", lock->l_enqueued_time.tv_sec,
-                           cfs_time_current_sec() -
-                           lock->l_enqueued_time.tv_sec);
+                           "back to sleep", lock->l_last_activity,
+                           cfs_time_current_sec() - lock->l_last_activity);
                 if (cfs_time_after(cfs_time_current(), next_dump)) {
                         last_dump = next_dump;
                         next_dump = cfs_time_shift(300);
@@ -94,8 +93,8 @@ int ldlm_expired_completion_wait(void *data)
         imp = obd->u.cli.cl_import;
         ptlrpc_fail_import(imp, lwd->lwd_conn_cnt);
         LDLM_ERROR(lock, "lock timed out (enqueued at %lu, %lus ago), entering "
-                   "recovery for %s@%s", lock->l_enqueued_time.tv_sec,
-                   CURRENT_SECONDS - lock->l_enqueued_time.tv_sec,
+                   "recovery for %s@%s", lock->l_last_activity,
+                   cfs_time_current_sec() - lock->l_last_activity,
                    obd2cli_tgt(obd), imp->imp_connection->c_remote_uuid.uuid);
 
         RETURN(0);
@@ -211,11 +210,11 @@ noreproc:
         }
 
         LDLM_DEBUG(lock, "client-side enqueue waking up: granted after %lds",
-                   cfs_time_current_sec() - lock->l_enqueued_time.tv_sec);
+                   cfs_time_current_sec() - lock->l_last_activity);
 
         /* Update our time estimate */
         at_add(&lock->l_resource->lr_namespace->ns_at_estimate,
-               cfs_time_current_sec() - lock->l_enqueued_time.tv_sec);
+               cfs_time_current_sec() - lock->l_last_activity);
 
         RETURN(0);
 }
diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c
index f84d696..17ad02e 100644
--- a/lustre/ost/ost_handler.c
+++ b/lustre/ost/ost_handler.c
@@ -1510,7 +1510,7 @@ int ost_msg_check_version(struct lustre_msg *msg)
 }
 
 static int ost_rw_hpreq_lock_match(struct ptlrpc_request *req,
-                                       struct ldlm_lock *lock)
+                                   struct ldlm_lock *lock)
 {
         struct niobuf_remote *nb;
         struct obd_ioobj *ioo;
@@ -1547,6 +1547,10 @@ static int ost_rw_hpreq_lock_match(struct ptlrpc_request *req,
         end = (nb[ioo->ioo_bufcnt - 1].offset +
                nb[ioo->ioo_bufcnt - 1].len - 1) | ~CFS_PAGE_MASK;
 
+        LASSERT(lock->l_resource != NULL);
+        if (lock->l_resource->lr_name.name[0] != ioo->ioo_id)
+                RETURN(0);
+
         if (!(lock->l_granted_mode & mode))
                 RETURN(0);
 
-- 
1.8.3.1