Whamcloud - gitweb
b=17887
authorzhanghc <zhanghc>
Tue, 3 Feb 2009 14:16:52 +0000 (14:16 +0000)
committerzhanghc <zhanghc>
Tue, 3 Feb 2009 14:16:52 +0000 (14:16 +0000)
1, print the actual time to wait for lock
   cancellation in "waiting_locks_callback"

2, compare resource in "ost_rw_hpreq_lock_match"

3, replace "LDLM_ERROR" with "LDLM_WARN" if a lock
   is completed after waiting more than odb_timeout

i=vitaly.fertman
i=johann

lustre/include/lustre_dlm.h
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/ldlm/ldlm_request.c
lustre/ost/ost_handler.c

index 133b8fa..9edf487 100644 (file)
@@ -639,7 +639,11 @@ struct ldlm_lock {
          */
         cfs_waitq_t           l_waitq;
 
          */
         cfs_waitq_t           l_waitq;
 
-        struct timeval        l_enqueued_time;
+        /** 
+         * Seconds. it will be updated if there is any activity related to 
+         * the lock, e.g. enqueue the lock or send block AST.
+         */
+        cfs_time_t            l_last_activity;
 
         /**
          * Jiffies. Should be converted to time if needed.
 
         /**
          * Jiffies. Should be converted to time if needed.
@@ -790,13 +794,16 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, __u32 mask,
                       ...)
         __attribute__ ((format (printf, 4, 5)));
 
                       ...)
         __attribute__ ((format (printf, 4, 5)));
 
-#define LDLM_ERROR(lock, fmt, a...) do {                                \
+#define LDLM_DEBUG_LIMIT(mask, lock, fmt, a...) do {                    \
         static cfs_debug_limit_state_t _ldlm_cdls;                      \
         static cfs_debug_limit_state_t _ldlm_cdls;                      \
-        ldlm_lock_debug(&_ldlm_cdls, D_ERROR, lock,                     \
+        ldlm_lock_debug(&_ldlm_cdls, mask, lock,                        \
                         __FILE__, __FUNCTION__, __LINE__,               \
                         "### " fmt , ##a);                              \
 } while (0)
 
                         __FILE__, __FUNCTION__, __LINE__,               \
                         "### " fmt , ##a);                              \
 } while (0)
 
+#define LDLM_ERROR(lock, fmt, a...) LDLM_DEBUG_LIMIT(D_ERROR, lock, fmt, ## a)
+#define LDLM_WARN(lock, fmt, a...)  LDLM_DEBUG_LIMIT(D_WARNING, lock, fmt, ## a)
+
 #define LDLM_DEBUG(lock, fmt, a...)   do {                              \
         ldlm_lock_debug(NULL, D_DLMTRACE, lock,                         \
                         __FILE__, __FUNCTION__, __LINE__,               \
 #define LDLM_DEBUG(lock, fmt, a...)   do {                              \
         ldlm_lock_debug(NULL, D_DLMTRACE, lock,                         \
                         __FILE__, __FUNCTION__, __LINE__,               \
index 0a26b56..b3b36a8 100644 (file)
@@ -1216,7 +1216,7 @@ ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns,
         struct ldlm_interval *node = NULL;
         ENTRY;
 
         struct ldlm_interval *node = NULL;
         ENTRY;
 
-        do_gettimeofday(&lock->l_enqueued_time);
+        lock->l_last_activity = cfs_time_current_sec();
         /* policies are not executed on the client or during replay */
         if ((*flags & (LDLM_FL_HAS_INTENT|LDLM_FL_REPLAY)) == LDLM_FL_HAS_INTENT
             && !local && ns->ns_policy) {
         /* policies are not executed on the client or during replay */
         if ((*flags & (LDLM_FL_HAS_INTENT|LDLM_FL_REPLAY)) == LDLM_FL_HAS_INTENT
             && !local && ns->ns_policy) {
index ce66f5c..b56a42c 100644 (file)
@@ -336,7 +336,7 @@ repeat:
                 lock->l_resource->lr_namespace->ns_timeouts++;
                 LDLM_ERROR(lock, "lock callback timer expired after %lds: "
                            "evicting client at %s ",
                 lock->l_resource->lr_namespace->ns_timeouts++;
                 LDLM_ERROR(lock, "lock callback timer expired after %lds: "
                            "evicting client at %s ",
-                           cfs_time_current_sec()- lock->l_enqueued_time.tv_sec,
+                           cfs_time_current_sec()- lock->l_last_activity,
                            libcfs_nid2str(
                                    lock->l_export->exp_connection->c_peer.nid));
 
                            libcfs_nid2str(
                                    lock->l_export->exp_connection->c_peer.nid));
 
@@ -795,7 +795,6 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
         struct ldlm_cb_set_arg *arg = data;
         struct ldlm_request    *body;
         struct ptlrpc_request  *req;
         struct ldlm_cb_set_arg *arg = data;
         struct ldlm_request    *body;
         struct ptlrpc_request  *req;
-        struct timeval          granted_time;
         long                    total_enqueue_wait;
         int                     instant_cancel = 0;
         int                     rc = 0;
         long                    total_enqueue_wait;
         int                     instant_cancel = 0;
         int                     rc = 0;
@@ -804,14 +803,13 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
         LASSERT(lock != NULL);
         LASSERT(data != NULL);
 
         LASSERT(lock != NULL);
         LASSERT(data != NULL);
 
-        do_gettimeofday(&granted_time);
-        total_enqueue_wait = cfs_timeval_sub(&granted_time,
-                                             &lock->l_enqueued_time, NULL);
+        total_enqueue_wait = cfs_time_sub(cfs_time_current_sec(),
+                                          lock->l_last_activity);
 
 
-        if (total_enqueue_wait / ONE_MILLION > obd_timeout)
+        if (total_enqueue_wait > obd_timeout)
                 /* non-fatal with AT - change to LDLM_DEBUG? */
                 /* non-fatal with AT - change to LDLM_DEBUG? */
-                LDLM_ERROR(lock, "enqueue wait took %luus from "CFS_TIME_T,
-                           total_enqueue_wait, lock->l_enqueued_time.tv_sec);
+                LDLM_WARN(lock, "enqueue wait took %lus from "CFS_TIME_T,
+                          total_enqueue_wait, lock->l_last_activity);
 
         req = ptlrpc_request_alloc(lock->l_export->exp_imp_reverse,
                                     &RQF_LDLM_CP_CALLBACK);
 
         req = ptlrpc_request_alloc(lock->l_export->exp_imp_reverse,
                                     &RQF_LDLM_CP_CALLBACK);
@@ -848,13 +846,13 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
                 unlock_res_and_lock(lock);
         }
 
                 unlock_res_and_lock(lock);
         }
 
-        LDLM_DEBUG(lock, "server preparing completion AST (after %ldus wait)",
+        LDLM_DEBUG(lock, "server preparing completion AST (after %lds wait)",
                    total_enqueue_wait);
 
         /* Server-side enqueue wait time estimate, used in
             __ldlm_add_waiting_lock to set future enqueue timers */
         at_add(&lock->l_resource->lr_namespace->ns_at_estimate,
                    total_enqueue_wait);
 
         /* Server-side enqueue wait time estimate, used in
             __ldlm_add_waiting_lock to set future enqueue timers */
         at_add(&lock->l_resource->lr_namespace->ns_at_estimate,
-               total_enqueue_wait / ONE_MILLION);
+               total_enqueue_wait);
 
         ptlrpc_request_set_replen(req);
 
 
         ptlrpc_request_set_replen(req);
 
@@ -1092,7 +1090,7 @@ int ldlm_handle_enqueue0(struct ldlm_namespace *ns,
         if (!lock)
                 GOTO(out, rc = -ENOMEM);
 
         if (!lock)
                 GOTO(out, rc = -ENOMEM);
 
-        do_gettimeofday(&lock->l_enqueued_time);
+        lock->l_last_activity = cfs_time_current_sec();
         lock->l_remote_handle = dlm_req->lock_handle[0];
         LDLM_DEBUG(lock, "server-side enqueue handler, new lock created");
 
         lock->l_remote_handle = dlm_req->lock_handle[0];
         LDLM_DEBUG(lock, "server-side enqueue handler, new lock created");
 
@@ -1305,7 +1303,7 @@ int ldlm_handle_convert0(struct ptlrpc_request *req,
 
                 LDLM_DEBUG(lock, "server-side convert handler START");
 
 
                 LDLM_DEBUG(lock, "server-side convert handler START");
 
-                do_gettimeofday(&lock->l_enqueued_time);
+                lock->l_last_activity = cfs_time_current_sec();
                 res = ldlm_lock_convert(lock, dlm_req->lock_desc.l_req_mode,
                                         &dlm_rep->lock_flags);
                 if (res) {
                 res = ldlm_lock_convert(lock, dlm_req->lock_desc.l_req_mode,
                                         &dlm_rep->lock_flags);
                 if (res) {
index 6e11110..b6fb1d1 100644 (file)
@@ -80,9 +80,9 @@ int ldlm_expired_completion_wait(void *data)
                 LDLM_ERROR(lock, "lock timed out (enqueued at "CFS_TIME_T", "
                            CFS_DURATION_T"s ago); not entering recovery in "
                            "server code, just going back to sleep",
                 LDLM_ERROR(lock, "lock timed out (enqueued at "CFS_TIME_T", "
                            CFS_DURATION_T"s ago); not entering recovery in "
                            "server code, just going back to sleep",
-                           lock->l_enqueued_time.tv_sec,
+                           lock->l_last_activity,
                            cfs_time_sub(cfs_time_current_sec(),
                            cfs_time_sub(cfs_time_current_sec(),
-                           lock->l_enqueued_time.tv_sec));
+                           lock->l_last_activity));
                 if (cfs_time_after(cfs_time_current(), next_dump)) {
                         last_dump = next_dump;
                         next_dump = cfs_time_shift(300);
                 if (cfs_time_after(cfs_time_current(), next_dump)) {
                         last_dump = next_dump;
                         next_dump = cfs_time_shift(300);
@@ -99,9 +99,8 @@ int ldlm_expired_completion_wait(void *data)
         ptlrpc_fail_import(imp, lwd->lwd_conn_cnt);
         LDLM_ERROR(lock, "lock timed out (enqueued at "CFS_TIME_T", "
                   CFS_DURATION_T"s ago), entering recovery for %s@%s",
         ptlrpc_fail_import(imp, lwd->lwd_conn_cnt);
         LDLM_ERROR(lock, "lock timed out (enqueued at "CFS_TIME_T", "
                   CFS_DURATION_T"s ago), entering recovery for %s@%s",
-                  lock->l_enqueued_time.tv_sec,
-                  cfs_time_sub(cfs_time_current_sec(),
-                  lock->l_enqueued_time.tv_sec),
+                  lock->l_last_activity,
+                  cfs_time_sub(cfs_time_current_sec(), lock->l_last_activity),
                   obd2cli_tgt(obd), imp->imp_connection->c_remote_uuid.uuid);
 
         RETURN(0);
                   obd2cli_tgt(obd), imp->imp_connection->c_remote_uuid.uuid);
 
         RETURN(0);
@@ -136,7 +135,7 @@ static int ldlm_completion_tail(struct ldlm_lock *lock)
                 result = -EIO;
         } else {
                 delay = cfs_time_sub(cfs_time_current_sec(),
                 result = -EIO;
         } else {
                 delay = cfs_time_sub(cfs_time_current_sec(),
-                                     lock->l_enqueued_time.tv_sec);
+                                     lock->l_last_activity);
                 LDLM_DEBUG(lock, "client-side enqueue: granted after "
                            CFS_DURATION_T"s", delay);
 
                 LDLM_DEBUG(lock, "client-side enqueue: granted after "
                            CFS_DURATION_T"s", delay);
 
index 1ae7308..cb520bb 100644 (file)
@@ -1605,6 +1605,11 @@ static int ost_rw_hpreq_lock_match(struct ptlrpc_request *req,
         end = (nb[ioo->ioo_bufcnt - 1].offset +
                nb[ioo->ioo_bufcnt - 1].len - 1) | ~CFS_PAGE_MASK;
 
         end = (nb[ioo->ioo_bufcnt - 1].offset +
                nb[ioo->ioo_bufcnt - 1].len - 1) | ~CFS_PAGE_MASK;
 
+        LASSERT(lock->l_resource != NULL);
+        if (!osc_res_name_eq(ioo->ioo_id, ioo->ioo_gr, 
+                             &lock->l_resource->lr_name))
+                RETURN(0);
+
         if (!(lock->l_granted_mode & mode))
                 RETURN(0);
 
         if (!(lock->l_granted_mode & mode))
                 RETURN(0);