Whamcloud - gitweb
LU-4269 ldlm: Hold lock when clearing flag
[fs/lustre-release.git] / lustre / ldlm / ldlm_request.c
index aa30599..a90d452 100644 (file)
@@ -27,7 +27,7 @@
  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2010, 2012, Intel Corporation.
+ * Copyright (c) 2010, 2013, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -102,9 +102,6 @@ int ldlm_expired_completion_wait(void *data)
         if (lock->l_conn_export == NULL) {
                 static cfs_time_t next_dump = 0, last_dump = 0;
 
-                if (ptlrpc_check_suspend())
-                        RETURN(0);
-
                LCONSOLE_WARN("lock timed out (enqueued at "CFS_TIME_T", "
                              CFS_DURATION_T"s ago)\n",
                              lock->l_last_activity,
@@ -161,24 +158,24 @@ EXPORT_SYMBOL(ldlm_get_enq_timeout);
  */
 static int ldlm_completion_tail(struct ldlm_lock *lock)
 {
-        long delay;
-        int  result;
-
-        if (lock->l_destroyed || lock->l_flags & LDLM_FL_FAILED) {
-                LDLM_DEBUG(lock, "client-side enqueue: destroyed");
-                result = -EIO;
-        } else {
-                delay = cfs_time_sub(cfs_time_current_sec(),
-                                     lock->l_last_activity);
-                LDLM_DEBUG(lock, "client-side enqueue: granted after "
-                           CFS_DURATION_T"s", delay);
-
-                /* Update our time estimate */
-                at_measured(ldlm_lock_to_ns_at(lock),
-                            delay);
-                result = 0;
-        }
-        return result;
+       long delay;
+       int  result;
+
+       if (ldlm_is_destroyed(lock) || ldlm_is_failed(lock)) {
+               LDLM_DEBUG(lock, "client-side enqueue: destroyed");
+               result = -EIO;
+       } else {
+               delay = cfs_time_sub(cfs_time_current_sec(),
+                                    lock->l_last_activity);
+               LDLM_DEBUG(lock, "client-side enqueue: granted after "
+                          CFS_DURATION_T"s", delay);
+
+               /* Update our time estimate */
+               at_measured(ldlm_lock_to_ns_at(lock),
+                           delay);
+               result = 0;
+       }
+       return result;
 }
 
 /**
@@ -188,23 +185,23 @@ static int ldlm_completion_tail(struct ldlm_lock *lock)
  */
 int ldlm_completion_ast_async(struct ldlm_lock *lock, __u64 flags, void *data)
 {
-        ENTRY;
+       ENTRY;
 
-        if (flags == LDLM_FL_WAIT_NOREPROC) {
-                LDLM_DEBUG(lock, "client-side enqueue waiting on pending lock");
-                RETURN(0);
-        }
+       if (flags == LDLM_FL_WAIT_NOREPROC) {
+               LDLM_DEBUG(lock, "client-side enqueue waiting on pending lock");
+               RETURN(0);
+       }
 
-        if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
-                       LDLM_FL_BLOCK_CONV))) {
-                cfs_waitq_signal(&lock->l_waitq);
-                RETURN(ldlm_completion_tail(lock));
-        }
+       if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
+                      LDLM_FL_BLOCK_CONV))) {
+               wake_up(&lock->l_waitq);
+               RETURN(ldlm_completion_tail(lock));
+       }
 
-        LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, "
-                   "going forward");
-        ldlm_reprocess_all(lock->l_resource);
-        RETURN(0);
+       LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, "
+                  "going forward");
+       ldlm_reprocess_all(lock->l_resource);
+       RETURN(0);
 }
 EXPORT_SYMBOL(ldlm_completion_ast_async);
 
@@ -245,11 +242,11 @@ int ldlm_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
                 goto noreproc;
         }
 
-        if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
-                       LDLM_FL_BLOCK_CONV))) {
-                cfs_waitq_signal(&lock->l_waitq);
-                RETURN(0);
-        }
+       if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
+                      LDLM_FL_BLOCK_CONV))) {
+               wake_up(&lock->l_waitq);
+               RETURN(0);
+       }
 
         LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, "
                    "sleeping");
@@ -270,7 +267,7 @@ noreproc:
 
         lwd.lwd_lock = lock;
 
-        if (lock->l_flags & LDLM_FL_NO_TIMEOUT) {
+       if (ldlm_is_no_timeout(lock)) {
                 LDLM_DEBUG(lock, "waiting indefinitely because of NO_TIMEOUT");
                 lwi = LWI_INTR(interrupted_completion_wait, &lwd);
         } else {
@@ -288,7 +285,7 @@ noreproc:
         if (ns_is_client(ldlm_lock_to_ns(lock)) &&
             OBD_FAIL_CHECK_RESET(OBD_FAIL_LDLM_INTR_CP_AST,
                                  OBD_FAIL_LDLM_CP_BL_RACE | OBD_FAIL_ONCE)) {
-                lock->l_flags |= LDLM_FL_FAIL_LOC;
+               ldlm_set_fail_loc(lock);
                 rc = -EINTR;
         } else {
                 /* Go to sleep until the lock is granted or cancelled. */
@@ -322,7 +319,7 @@ int ldlm_blocking_ast_nocheck(struct ldlm_lock *lock)
         int do_ast;
         ENTRY;
 
-        lock->l_flags |= LDLM_FL_CBPENDING;
+       ldlm_set_cbpending(lock);
         do_ast = (!lock->l_readers && !lock->l_writers);
         unlock_res_and_lock(lock);
 
@@ -446,16 +443,21 @@ int ldlm_cli_enqueue_local(struct ldlm_namespace *ns,
         /* NB: we don't have any lock now (lock_res_and_lock)
          * because it's a new lock */
         ldlm_lock_addref_internal_nolock(lock, mode);
-        lock->l_flags |= LDLM_FL_LOCAL;
+       ldlm_set_local(lock);
         if (*flags & LDLM_FL_ATOMIC_CB)
-                lock->l_flags |= LDLM_FL_ATOMIC_CB;
+               ldlm_set_atomic_cb(lock);
 
         if (policy != NULL)
                 lock->l_policy_data = *policy;
         if (client_cookie != NULL)
                 lock->l_client_cookie = *client_cookie;
-        if (type == LDLM_EXTENT)
-                lock->l_req_extent = policy->l_extent;
+       if (type == LDLM_EXTENT) {
+               /* extent lock without policy is a bug */
+               if (policy == NULL)
+                       LBUG();
+
+               lock->l_req_extent = policy->l_extent;
+       }
 
         err = ldlm_lock_enqueue(ns, &lock, policy, flags);
         if (unlikely(err != ELDLM_OK))
@@ -485,13 +487,13 @@ static void failed_lock_cleanup(struct ldlm_namespace *ns,
         lock_res_and_lock(lock);
         /* Check that lock is not granted or failed, we might race. */
         if ((lock->l_req_mode != lock->l_granted_mode) &&
-            !(lock->l_flags & LDLM_FL_FAILED)) {
-                /* Make sure that this lock will not be found by raced
-                 * bl_ast and -EINVAL reply is sent to server anyways.
-                 * bug 17645 */
-                lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_FAILED |
-                                 LDLM_FL_ATOMIC_CB | LDLM_FL_CBPENDING;
-                need_cancel = 1;
+           !ldlm_is_failed(lock)) {
+               /* Make sure that this lock will not be found by raced
+                * bl_ast and -EINVAL reply is sent to server anyways.
+                * b=17645*/
+               lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_FAILED |
+                                LDLM_FL_ATOMIC_CB | LDLM_FL_CBPENDING;
+               need_cancel = 1;
         }
         unlock_res_and_lock(lock);
 
@@ -602,7 +604,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 
        *flags = ldlm_flags_from_wire(reply->lock_flags);
        lock->l_flags |= ldlm_flags_from_wire(reply->lock_flags &
-                                             LDLM_INHERIT_FLAGS);
+                                             LDLM_FL_INHERIT_MASK);
         /* move NO_TIMEOUT flag to the lock to force ldlm_lock_match()
          * to wait with no timeout as well */
        lock->l_flags |= ldlm_flags_from_wire(reply->lock_flags &
@@ -624,25 +626,19 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
                         lock->l_req_mode = newmode;
                 }
 
-                if (memcmp(reply->lock_desc.l_resource.lr_name.name,
-                          lock->l_resource->lr_name.name,
-                          sizeof(struct ldlm_res_id))) {
-                        CDEBUG(D_INFO, "remote intent success, locking "
-                                        "(%ld,%ld,%ld) instead of "
-                                        "(%ld,%ld,%ld)\n",
-                              (long)reply->lock_desc.l_resource.lr_name.name[0],
-                              (long)reply->lock_desc.l_resource.lr_name.name[1],
-                              (long)reply->lock_desc.l_resource.lr_name.name[2],
-                              (long)lock->l_resource->lr_name.name[0],
-                              (long)lock->l_resource->lr_name.name[1],
-                              (long)lock->l_resource->lr_name.name[2]);
-
-                        rc = ldlm_lock_change_resource(ns, lock,
-                                        &reply->lock_desc.l_resource.lr_name);
-                        if (rc || lock->l_resource == NULL)
-                                GOTO(cleanup, rc = -ENOMEM);
-                        LDLM_DEBUG(lock, "client-side enqueue, new resource");
-                }
+               if (!ldlm_res_eq(&reply->lock_desc.l_resource.lr_name,
+                                &lock->l_resource->lr_name)) {
+                       CDEBUG(D_INFO, "remote intent success, locking "DLDLMRES
+                                      " instead of "DLDLMRES"\n",
+                              PLDLMRES(&reply->lock_desc.l_resource),
+                              PLDLMRES(lock->l_resource));
+
+                       rc = ldlm_lock_change_resource(ns, lock,
+                                       &reply->lock_desc.l_resource.lr_name);
+                       if (rc || lock->l_resource == NULL)
+                               GOTO(cleanup, rc = -ENOMEM);
+                       LDLM_DEBUG(lock, "client-side enqueue, new resource");
+               }
                if (with_policy)
                        if (!(type == LDLM_IBITS &&
                              !(exp_connect_flags(exp) & OBD_CONNECT_IBITS)))
@@ -661,7 +657,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
              * bug 7311). */
             (LIBLUSTRE_CLIENT && type == LDLM_EXTENT)) {
                 lock_res_and_lock(lock);
-                lock->l_flags |= LDLM_FL_CBPENDING |  LDLM_FL_BL_AST;
+               lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST;
                 unlock_res_and_lock(lock);
                 LDLM_DEBUG(lock, "enqueue reply includes blocking AST");
         }
@@ -723,7 +719,7 @@ static inline int ldlm_req_handles_avail(int req_size, int off)
 {
         int avail;
 
-        avail = min_t(int, LDLM_MAXREQSIZE, CFS_PAGE_SIZE - 512) - req_size;
+       avail = min_t(int, LDLM_MAXREQSIZE, PAGE_CACHE_SIZE - 512) - req_size;
         if (likely(avail >= 0))
                 avail /= (int)sizeof(struct lustre_handle);
         else
@@ -890,17 +886,16 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
                 LDLM_DEBUG(lock, "client-side enqueue START");
                 LASSERT(exp == lock->l_conn_export);
         } else {
-                const struct ldlm_callback_suite cbs = {
-                        .lcs_completion = einfo->ei_cb_cp,
-                        .lcs_blocking   = einfo->ei_cb_bl,
-                        .lcs_glimpse    = einfo->ei_cb_gl,
-                        .lcs_weigh      = einfo->ei_cb_wg
-                };
-                lock = ldlm_lock_create(ns, res_id, einfo->ei_type,
-                                        einfo->ei_mode, &cbs, einfo->ei_cbdata,
+               const struct ldlm_callback_suite cbs = {
+                       .lcs_completion = einfo->ei_cb_cp,
+                       .lcs_blocking   = einfo->ei_cb_bl,
+                       .lcs_glimpse    = einfo->ei_cb_gl
+               };
+               lock = ldlm_lock_create(ns, res_id, einfo->ei_type,
+                                       einfo->ei_mode, &cbs, einfo->ei_cbdata,
                                        lvb_len, lvb_type);
-                if (lock == NULL)
-                        RETURN(-ENOMEM);
+               if (lock == NULL)
+                       RETURN(-ENOMEM);
                 /* for the local lock, add the reference */
                 ldlm_lock_addref_internal(lock, einfo->ei_mode);
                 ldlm_lock2handle(lock, lockh);
@@ -920,8 +915,13 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
                                 lock->l_policy_data = *policy;
                 }
 
-                if (einfo->ei_type == LDLM_EXTENT)
-                        lock->l_req_extent = policy->l_extent;
+               if (einfo->ei_type == LDLM_EXTENT) {
+                       /* extent lock without policy is a bug */
+                       if (policy == NULL)
+                               LBUG();
+
+                       lock->l_req_extent = policy->l_extent;
+               }
                 LDLM_DEBUG(lock, "client-side enqueue START, flags %llx\n",
                           *flags);
         }
@@ -929,7 +929,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
        lock->l_conn_export = exp;
        lock->l_export = NULL;
        lock->l_blocking_ast = einfo->ei_cb_bl;
-       lock->l_flags |= (*flags & LDLM_FL_NO_LRU);
+       lock->l_flags |= (*flags & (LDLM_FL_NO_LRU | LDLM_FL_EXCL));
 
         /* lock not sent to server yet */
 
@@ -1027,7 +1027,7 @@ static int ldlm_cli_convert_local(struct ldlm_lock *lock, int new_mode,
                 ldlm_reprocess_all(res);
                 rc = 0;
         } else {
-                rc = EDEADLOCK;
+               rc = LUSTRE_EDEADLK;
         }
         LDLM_DEBUG(lock, "client-side local convert handler END");
         LDLM_LOCK_PUT(lock);
@@ -1099,7 +1099,7 @@ int ldlm_cli_convert(struct lustre_handle *lockh, int new_mode, __u32 *flags)
                                 GOTO(out, rc);
                 }
         } else {
-                rc = EDEADLOCK;
+               rc = LUSTRE_EDEADLK;
         }
         EXIT;
  out:
@@ -1127,13 +1127,13 @@ static __u64 ldlm_cli_cancel_local(struct ldlm_lock *lock)
                 LDLM_DEBUG(lock, "client-side cancel");
                 /* Set this flag to prevent others from getting new references*/
                 lock_res_and_lock(lock);
-                lock->l_flags |= LDLM_FL_CBPENDING;
+               ldlm_set_cbpending(lock);
                local_only = !!(lock->l_flags &
                                (LDLM_FL_LOCAL_ONLY|LDLM_FL_CANCEL_ON_BLOCK));
-                ldlm_cancel_callback(lock);
-                rc = (lock->l_flags & LDLM_FL_BL_AST) ?
-                        LDLM_FL_BL_AST : LDLM_FL_CANCELING;
-                unlock_res_and_lock(lock);
+               ldlm_cancel_callback(lock);
+               rc = (ldlm_is_bl_ast(lock)) ?
+                       LDLM_FL_BL_AST : LDLM_FL_CANCELING;
+               unlock_res_and_lock(lock);
 
                 if (local_only) {
                         CDEBUG(D_DLMTRACE, "not sending request (at caller's "
@@ -1252,7 +1252,7 @@ int ldlm_cli_cancel_req(struct obd_export *exp, cfs_list_t *cancels,
                 } else {
                         rc = ptlrpc_queue_wait(req);
                 }
-                if (rc == ESTALE) {
+               if (rc == LUSTRE_ESTALE) {
                         CDEBUG(D_DLMTRACE, "client/server (nid %s) "
                                "out of sync -- not fatal\n",
                                libcfs_nid2str(req->rq_import->
@@ -1360,10 +1360,10 @@ int ldlm_cli_cancel(struct lustre_handle *lockh,
         }
 
         rc = ldlm_cli_cancel_local(lock);
-       if (rc == LDLM_FL_LOCAL_ONLY) {
-                LDLM_LOCK_RELEASE(lock);
+       if (rc == LDLM_FL_LOCAL_ONLY || cancel_flags & LCF_LOCAL) {
+               LDLM_LOCK_RELEASE(lock);
                RETURN(0);
-        }
+       }
        /* Even if the lock is marked as LDLM_FL_BL_AST, this is a LDLM_CANCEL
         * RPC which goes to canceld portal, so we can cancel other LRU locks
         * here and send them all as one LDLM_CANCEL RPC. */
@@ -1448,25 +1448,27 @@ static ldlm_policy_res_t ldlm_cancel_no_wait_policy(struct ldlm_namespace *ns,
                                                     int unused, int added,
                                                     int count)
 {
-        ldlm_policy_res_t result = LDLM_POLICY_CANCEL_LOCK;
-        ldlm_cancel_for_recovery cb = ns->ns_cancel_for_recovery;
-        lock_res_and_lock(lock);
-
-        /* don't check added & count since we want to process all locks
-         * from unused list */
-        switch (lock->l_resource->lr_type) {
-                case LDLM_EXTENT:
-                case LDLM_IBITS:
-                        if (cb && cb(lock))
-                                break;
-                default:
-                        result = LDLM_POLICY_SKIP_LOCK;
-                        lock->l_flags |= LDLM_FL_SKIPPED;
-                        break;
-        }
+       ldlm_policy_res_t result = LDLM_POLICY_CANCEL_LOCK;
+       ldlm_cancel_for_recovery cb = ns->ns_cancel_for_recovery;
+
+       /* don't check added & count since we want to process all locks
+        * from unused list.
+        * It's fine to not take lock to access lock->l_resource since
+        * the lock has already been granted so it won't change. */
+       switch (lock->l_resource->lr_type) {
+               case LDLM_EXTENT:
+               case LDLM_IBITS:
+                       if (cb && cb(lock))
+                               break;
+               default:
+                       result = LDLM_POLICY_SKIP_LOCK;
+                       lock_res_and_lock(lock);
+                       ldlm_set_skipped(lock);
+                       unlock_res_and_lock(lock);
+                       break;
+       }
 
-        unlock_res_and_lock(lock);
-        RETURN(result);
+       RETURN(result);
 }
 
 /**
@@ -1661,26 +1663,26 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, cfs_list_t *cancels,
                 cfs_list_for_each_entry_safe(lock, next, &ns->ns_unused_list,
                                             l_lru) {
                         /* No locks which got blocking requests. */
-                        LASSERT(!(lock->l_flags & LDLM_FL_BL_AST));
+                       LASSERT(!ldlm_is_bl_ast(lock));
 
-                        if (flags & LDLM_CANCEL_NO_WAIT &&
-                            lock->l_flags & LDLM_FL_SKIPPED)
-                                /* already processed */
-                                continue;
+                       if (flags & LDLM_CANCEL_NO_WAIT &&
+                           ldlm_is_skipped(lock))
+                               /* already processed */
+                               continue;
 
                        /* Somebody is already doing CANCEL. No need for this
                         * lock in LRU, do not traverse it again. */
-                        if (!(lock->l_flags & LDLM_FL_CANCELING))
+                       if (!ldlm_is_canceling(lock))
                                 break;
 
-                        ldlm_lock_remove_from_lru_nolock(lock);
-                }
-                if (&lock->l_lru == &ns->ns_unused_list)
-                        break;
+                       ldlm_lock_remove_from_lru_nolock(lock);
+               }
+               if (&lock->l_lru == &ns->ns_unused_list)
+                       break;
 
-                LDLM_LOCK_GET(lock);
+               LDLM_LOCK_GET(lock);
                spin_unlock(&ns->ns_lock);
-                lu_ref_add(&lock->l_reference, __FUNCTION__, cfs_current());
+               lu_ref_add(&lock->l_reference, __FUNCTION__, current);
 
                /* Pass the lock through the policy filter and see if it
                 * should stay in LRU.
@@ -1695,64 +1697,63 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, cfs_list_t *cancels,
                 * old locks, but additionally choose them by
                 * their weight. Big extent locks will stay in
                 * the cache. */
-                result = pf(ns, lock, unused, added, count);
-                if (result == LDLM_POLICY_KEEP_LOCK) {
-                        lu_ref_del(&lock->l_reference,
-                                   __FUNCTION__, cfs_current());
-                        LDLM_LOCK_RELEASE(lock);
+               result = pf(ns, lock, unused, added, count);
+               if (result == LDLM_POLICY_KEEP_LOCK) {
+                       lu_ref_del(&lock->l_reference,
+                                  __FUNCTION__, current);
+                       LDLM_LOCK_RELEASE(lock);
                        spin_lock(&ns->ns_lock);
                        break;
                }
                if (result == LDLM_POLICY_SKIP_LOCK) {
                        lu_ref_del(&lock->l_reference,
-                                  __func__, cfs_current());
+                                  __func__, current);
                        LDLM_LOCK_RELEASE(lock);
                        spin_lock(&ns->ns_lock);
-                        continue;
-                }
+                       continue;
+               }
 
-                lock_res_and_lock(lock);
-                /* Check flags again under the lock. */
-                if ((lock->l_flags & LDLM_FL_CANCELING) ||
-                    (ldlm_lock_remove_from_lru(lock) == 0)) {
+               lock_res_and_lock(lock);
+               /* Check flags again under the lock. */
+               if (ldlm_is_canceling(lock) ||
+                   (ldlm_lock_remove_from_lru(lock) == 0)) {
                        /* Another thread is removing lock from LRU, or
                         * somebody is already doing CANCEL, or there
                         * is a blocking request which will send cancel
                         * by itself, or the lock is no longer unused. */
-                        unlock_res_and_lock(lock);
-                        lu_ref_del(&lock->l_reference,
-                                   __FUNCTION__, cfs_current());
-                        LDLM_LOCK_RELEASE(lock);
+                       unlock_res_and_lock(lock);
+                       lu_ref_del(&lock->l_reference, __FUNCTION__, current);
+                       LDLM_LOCK_RELEASE(lock);
                        spin_lock(&ns->ns_lock);
-                        continue;
-                }
-                LASSERT(!lock->l_readers && !lock->l_writers);
-
-                /* If we have chosen to cancel this lock voluntarily, we
-                 * better send cancel notification to server, so that it
-                 * frees appropriate state. This might lead to a race
-                 * where while we are doing cancel here, server is also
-                 * silently cancelling this lock. */
-                lock->l_flags &= ~LDLM_FL_CANCEL_ON_BLOCK;
-
-                /* Setting the CBPENDING flag is a little misleading,
-                 * but prevents an important race; namely, once
-                 * CBPENDING is set, the lock can accumulate no more
-                 * readers/writers. Since readers and writers are
-                 * already zero here, ldlm_lock_decref() won't see
-                 * this flag and call l_blocking_ast */
-                lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING;
-
-                /* We can't re-add to l_lru as it confuses the
-                 * refcounting in ldlm_lock_remove_from_lru() if an AST
-                 * arrives after we drop lr_lock below. We use l_bl_ast
-                 * and can't use l_pending_chain as it is used both on
-                 * server and client nevertheless bug 5666 says it is
-                 * used only on server */
-                LASSERT(cfs_list_empty(&lock->l_bl_ast));
-                cfs_list_add(&lock->l_bl_ast, cancels);
-                unlock_res_and_lock(lock);
-                lu_ref_del(&lock->l_reference, __FUNCTION__, cfs_current());
+                       continue;
+               }
+               LASSERT(!lock->l_readers && !lock->l_writers);
+
+               /* If we have chosen to cancel this lock voluntarily, we
+                * better send cancel notification to server, so that it
+                * frees appropriate state. This might lead to a race
+                * where while we are doing cancel here, server is also
+                * silently cancelling this lock. */
+               ldlm_clear_cancel_on_block(lock);
+
+               /* Setting the CBPENDING flag is a little misleading,
+                * but prevents an important race; namely, once
+                * CBPENDING is set, the lock can accumulate no more
+                * readers/writers. Since readers and writers are
+                * already zero here, ldlm_lock_decref() won't see
+                * this flag and call l_blocking_ast */
+               lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING;
+
+               /* We can't re-add to l_lru as it confuses the
+                * refcounting in ldlm_lock_remove_from_lru() if an AST
+                * arrives after we drop lr_lock below. We use l_bl_ast
+                * and can't use l_pending_chain as it is used both on
+                * server and client nevertheless bug 5666 says it is
+                * used only on server */
+               LASSERT(cfs_list_empty(&lock->l_bl_ast));
+               cfs_list_add(&lock->l_bl_ast, cancels);
+               unlock_res_and_lock(lock);
+               lu_ref_del(&lock->l_reference, __FUNCTION__, current);
                spin_lock(&ns->ns_lock);
                added++;
                unused--;
@@ -1807,10 +1808,10 @@ int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr,
  * list.
  */
 int ldlm_cancel_resource_local(struct ldlm_resource *res,
-                               cfs_list_t *cancels,
-                               ldlm_policy_data_t *policy,
-                               ldlm_mode_t mode, int lock_flags,
-                               ldlm_cancel_flags_t cancel_flags, void *opaque)
+                              cfs_list_t *cancels,
+                              ldlm_policy_data_t *policy,
+                              ldlm_mode_t mode, __u64 lock_flags,
+                              ldlm_cancel_flags_t cancel_flags, void *opaque)
 {
         struct ldlm_lock *lock;
         int count = 0;
@@ -1830,9 +1831,8 @@ int ldlm_cancel_resource_local(struct ldlm_resource *res,
 
                /* If somebody is already doing CANCEL, or blocking AST came,
                 * skip this lock. */
-                if (lock->l_flags & LDLM_FL_BL_AST ||
-                    lock->l_flags & LDLM_FL_CANCELING)
-                        continue;
+               if (ldlm_is_bl_ast(lock) || ldlm_is_canceling(lock))
+                       continue;
 
                 if (lockmode_compat(lock->l_granted_mode, mode))
                         continue;
@@ -1844,9 +1844,9 @@ int ldlm_cancel_resource_local(struct ldlm_resource *res,
                       policy->l_inodebits.bits))
                         continue;
 
-                /* See CBPENDING comment in ldlm_cancel_lru */
-                lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING |
-                                 lock_flags;
+               /* See CBPENDING comment in ldlm_cancel_lru */
+               lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING |
+                                lock_flags;
 
                 LASSERT(cfs_list_empty(&lock->l_bl_ast));
                 cfs_list_add(&lock->l_bl_ast, cancels);
@@ -1947,7 +1947,8 @@ int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
                                            0, flags | LCF_BL_AST, opaque);
         rc = ldlm_cli_cancel_list(&cancels, count, NULL, flags);
         if (rc != ELDLM_OK)
-                CERROR("ldlm_cli_cancel_unused_resource: %d\n", rc);
+               CERROR("canceling unused lock "DLDLMRES": rc = %d\n",
+                      PLDLMRES(res), rc);
 
         LDLM_RESOURCE_DELREF(res);
         ldlm_resource_putref(res);
@@ -1961,21 +1962,16 @@ struct ldlm_cli_cancel_arg {
 };
 
 static int ldlm_cli_hash_cancel_unused(cfs_hash_t *hs, cfs_hash_bd_t *bd,
-                                       cfs_hlist_node_t *hnode, void *arg)
+                                      cfs_hlist_node_t *hnode, void *arg)
 {
-        struct ldlm_resource           *res = cfs_hash_object(hs, hnode);
-        struct ldlm_cli_cancel_arg     *lc = arg;
-        int                             rc;
-
-        rc = ldlm_cli_cancel_unused_resource(ldlm_res_to_ns(res), &res->lr_name,
-                                             NULL, LCK_MINMODE,
-                                             lc->lc_flags, lc->lc_opaque);
-        if (rc != 0) {
-                CERROR("ldlm_cli_cancel_unused ("LPU64"): %d\n",
-                       res->lr_name.name[0], rc);
-        }
-        /* must return 0 for hash iteration */
-        return 0;
+       struct ldlm_resource           *res = cfs_hash_object(hs, hnode);
+       struct ldlm_cli_cancel_arg     *lc = arg;
+
+       ldlm_cli_cancel_unused_resource(ldlm_res_to_ns(res), &res->lr_name,
+                                       NULL, LCK_MINMODE, lc->lc_flags,
+                                       lc->lc_opaque);
+       /* must return 0 for hash iteration */
+       return 0;
 }
 
 /**
@@ -2137,18 +2133,17 @@ static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure)
 }
 
 static int replay_lock_interpret(const struct lu_env *env,
-                                 struct ptlrpc_request *req,
-                                 struct ldlm_async_args *aa, int rc)
+                                struct ptlrpc_request *req,
+                                struct ldlm_async_args *aa, int rc)
 {
-        struct ldlm_lock     *lock;
-        struct ldlm_reply    *reply;
-        struct obd_export    *exp;
-
-        ENTRY;
-        cfs_atomic_dec(&req->rq_import->imp_replay_inflight);
-        if (rc != ELDLM_OK)
-                GOTO(out, rc);
+       struct ldlm_lock     *lock;
+       struct ldlm_reply    *reply;
+       struct obd_export    *exp;
 
+       ENTRY;
+       atomic_dec(&req->rq_import->imp_replay_inflight);
+       if (rc != ELDLM_OK)
+               GOTO(out, rc);
 
         reply = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
         if (reply == NULL)
@@ -2198,7 +2193,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
 
 
         /* Bug 11974: Do not replay a lock which is actively being canceled */
-        if (lock->l_flags & LDLM_FL_CANCELING) {
+       if (ldlm_is_canceling(lock)) {
                 LDLM_DEBUG(lock, "Not replaying canceled lock:");
                 RETURN(0);
         }
@@ -2206,7 +2201,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
         /* If this is reply-less callback lock, we cannot replay it, since
          * server might have long dropped it, but notification of that event was
          * lost by network. (and server granted conflicting lock already) */
-        if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) {
+       if (ldlm_is_cancel_on_block(lock)) {
                 LDLM_DEBUG(lock, "Not replaying reply-less lock:");
                 ldlm_lock_cancel(lock);
                 RETURN(0);
@@ -2257,18 +2252,18 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
          * also, we mark the request to be put on a dedicated
          * queue to be processed after all request replayes.
          * bug 6063 */
-        lustre_msg_set_flags(req->rq_reqmsg, MSG_REQ_REPLAY_DONE);
+       lustre_msg_set_flags(req->rq_reqmsg, MSG_REQ_REPLAY_DONE);
 
-        LDLM_DEBUG(lock, "replaying lock:");
+       LDLM_DEBUG(lock, "replaying lock:");
 
-        cfs_atomic_inc(&req->rq_import->imp_replay_inflight);
-        CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
-        aa = ptlrpc_req_async_args(req);
-        aa->lock_handle = body->lock_handle[0];
-        req->rq_interpret_reply = (ptlrpc_interpterer_t)replay_lock_interpret;
-        ptlrpcd_add_req(req, PDL_POLICY_LOCAL, -1);
+       atomic_inc(&req->rq_import->imp_replay_inflight);
+       CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
+       aa = ptlrpc_req_async_args(req);
+       aa->lock_handle = body->lock_handle[0];
+       req->rq_interpret_reply = (ptlrpc_interpterer_t)replay_lock_interpret;
+       ptlrpcd_add_req(req, PDL_POLICY_LOCAL, -1);
 
-        RETURN(0);
+       RETURN(0);
 }
 
 /**
@@ -2302,39 +2297,39 @@ static void ldlm_cancel_unused_locks_for_replay(struct ldlm_namespace *ns)
 
 int ldlm_replay_locks(struct obd_import *imp)
 {
-        struct ldlm_namespace *ns = imp->imp_obd->obd_namespace;
-        CFS_LIST_HEAD(list);
-        struct ldlm_lock *lock, *next;
-        int rc = 0;
+       struct ldlm_namespace *ns = imp->imp_obd->obd_namespace;
+       CFS_LIST_HEAD(list);
+       struct ldlm_lock *lock, *next;
+       int rc = 0;
 
-        ENTRY;
+       ENTRY;
 
-        LASSERT(cfs_atomic_read(&imp->imp_replay_inflight) == 0);
+       LASSERT(atomic_read(&imp->imp_replay_inflight) == 0);
 
-        /* don't replay locks if import failed recovery */
-        if (imp->imp_vbr_failed)
-                RETURN(0);
+       /* don't replay locks if import failed recovery */
+       if (imp->imp_vbr_failed)
+               RETURN(0);
 
-        /* ensure this doesn't fall to 0 before all have been queued */
-        cfs_atomic_inc(&imp->imp_replay_inflight);
+       /* ensure this doesn't fall to 0 before all have been queued */
+       atomic_inc(&imp->imp_replay_inflight);
 
-        if (ldlm_cancel_unused_locks_before_replay)
-                ldlm_cancel_unused_locks_for_replay(ns);
+       if (ldlm_cancel_unused_locks_before_replay)
+               ldlm_cancel_unused_locks_for_replay(ns);
 
-        ldlm_namespace_foreach(ns, ldlm_chain_lock_for_replay, &list);
+       ldlm_namespace_foreach(ns, ldlm_chain_lock_for_replay, &list);
 
-        cfs_list_for_each_entry_safe(lock, next, &list, l_pending_chain) {
-                cfs_list_del_init(&lock->l_pending_chain);
-                if (rc) {
-                        LDLM_LOCK_RELEASE(lock);
-                        continue; /* or try to do the rest? */
-                }
-                rc = replay_one_lock(imp, lock);
-                LDLM_LOCK_RELEASE(lock);
-        }
+       cfs_list_for_each_entry_safe(lock, next, &list, l_pending_chain) {
+               cfs_list_del_init(&lock->l_pending_chain);
+               if (rc) {
+                       LDLM_LOCK_RELEASE(lock);
+                       continue; /* or try to do the rest? */
+               }
+               rc = replay_one_lock(imp, lock);
+               LDLM_LOCK_RELEASE(lock);
+       }
 
-        cfs_atomic_dec(&imp->imp_replay_inflight);
+       atomic_dec(&imp->imp_replay_inflight);
 
-        RETURN(rc);
+       RETURN(rc);
 }
 EXPORT_SYMBOL(ldlm_replay_locks);