LU-4269 ldlm: Hold lock when clearing flag

[fs/lustre-release.git] / lustre / ldlm / ldlm_request.c
diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c

index aa30599..a90d452 100644 (file)
--- a/lustre/ldlm/ldlm_request.c
+++ b/lustre/ldlm/ldlm_request.c
@@ -27,7 +27,7 @@
   * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
   * Use is subject to license terms.
   *
- * Copyright (c) 2010, 2012, Intel Corporation.
+ * Copyright (c) 2010, 2013, Intel Corporation.
   */
  /*
   * This file is part of Lustre, http://www.lustre.org/
@@ -102,9 +102,6 @@ int ldlm_expired_completion_wait(void *data)
          if (lock->l_conn_export == NULL) {
                  static cfs_time_t next_dump = 0, last_dump = 0;
  
-                if (ptlrpc_check_suspend())
-                        RETURN(0);
-
                 LCONSOLE_WARN("lock timed out (enqueued at "CFS_TIME_T", "
                               CFS_DURATION_T"s ago)\n",
                               lock->l_last_activity,
@@ -161,24 +158,24 @@ EXPORT_SYMBOL(ldlm_get_enq_timeout);
   */
  static int ldlm_completion_tail(struct ldlm_lock *lock)
  {
-        long delay;
-        int  result;
-
-        if (lock->l_destroyed || lock->l_flags & LDLM_FL_FAILED) {
-                LDLM_DEBUG(lock, "client-side enqueue: destroyed");
-                result = -EIO;
-        } else {
-                delay = cfs_time_sub(cfs_time_current_sec(),
-                                     lock->l_last_activity);
-                LDLM_DEBUG(lock, "client-side enqueue: granted after "
-                           CFS_DURATION_T"s", delay);
-
-                /* Update our time estimate */
-                at_measured(ldlm_lock_to_ns_at(lock),
-                            delay);
-                result = 0;
-        }
-        return result;
+       long delay;
+       int  result;
+
+       if (ldlm_is_destroyed(lock) || ldlm_is_failed(lock)) {
+               LDLM_DEBUG(lock, "client-side enqueue: destroyed");
+               result = -EIO;
+       } else {
+               delay = cfs_time_sub(cfs_time_current_sec(),
+                                    lock->l_last_activity);
+               LDLM_DEBUG(lock, "client-side enqueue: granted after "
+                          CFS_DURATION_T"s", delay);
+
+               /* Update our time estimate */
+               at_measured(ldlm_lock_to_ns_at(lock),
+                           delay);
+               result = 0;
+       }
+       return result;
  }
  
  /**
@@ -188,23 +185,23 @@ static int ldlm_completion_tail(struct ldlm_lock *lock)
   */
  int ldlm_completion_ast_async(struct ldlm_lock *lock, __u64 flags, void *data)
  {
-        ENTRY;
+       ENTRY;
  
-        if (flags == LDLM_FL_WAIT_NOREPROC) {
-                LDLM_DEBUG(lock, "client-side enqueue waiting on pending lock");
-                RETURN(0);
-        }
+       if (flags == LDLM_FL_WAIT_NOREPROC) {
+               LDLM_DEBUG(lock, "client-side enqueue waiting on pending lock");
+               RETURN(0);
+       }
  
-        if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
-                       LDLM_FL_BLOCK_CONV))) {
-                cfs_waitq_signal(&lock->l_waitq);
-                RETURN(ldlm_completion_tail(lock));
-        }
+       if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
+                      LDLM_FL_BLOCK_CONV))) {
+               wake_up(&lock->l_waitq);
+               RETURN(ldlm_completion_tail(lock));
+       }
  
-        LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, "
-                   "going forward");
-        ldlm_reprocess_all(lock->l_resource);
-        RETURN(0);
+       LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, "
+                  "going forward");
+       ldlm_reprocess_all(lock->l_resource);
+       RETURN(0);
  }
  EXPORT_SYMBOL(ldlm_completion_ast_async);
  
@@ -245,11 +242,11 @@ int ldlm_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
                  goto noreproc;
          }
  
-        if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
-                       LDLM_FL_BLOCK_CONV))) {
-                cfs_waitq_signal(&lock->l_waitq);
-                RETURN(0);
-        }
+       if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
+                      LDLM_FL_BLOCK_CONV))) {
+               wake_up(&lock->l_waitq);
+               RETURN(0);
+       }
  
          LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, "
                     "sleeping");
@@ -270,7 +267,7 @@ noreproc:
  
          lwd.lwd_lock = lock;
  
-        if (lock->l_flags & LDLM_FL_NO_TIMEOUT) {
+       if (ldlm_is_no_timeout(lock)) {
                  LDLM_DEBUG(lock, "waiting indefinitely because of NO_TIMEOUT");
                  lwi = LWI_INTR(interrupted_completion_wait, &lwd);
          } else {
@@ -288,7 +285,7 @@ noreproc:
          if (ns_is_client(ldlm_lock_to_ns(lock)) &&
              OBD_FAIL_CHECK_RESET(OBD_FAIL_LDLM_INTR_CP_AST,
                                   OBD_FAIL_LDLM_CP_BL_RACE | OBD_FAIL_ONCE)) {
-                lock->l_flags |= LDLM_FL_FAIL_LOC;
+               ldlm_set_fail_loc(lock);
                  rc = -EINTR;
          } else {
                  /* Go to sleep until the lock is granted or cancelled. */
@@ -322,7 +319,7 @@ int ldlm_blocking_ast_nocheck(struct ldlm_lock *lock)
          int do_ast;
          ENTRY;
  
-        lock->l_flags |= LDLM_FL_CBPENDING;
+       ldlm_set_cbpending(lock);
          do_ast = (!lock->l_readers && !lock->l_writers);
          unlock_res_and_lock(lock);
  
@@ -446,16 +443,21 @@ int ldlm_cli_enqueue_local(struct ldlm_namespace *ns,
          /* NB: we don't have any lock now (lock_res_and_lock)
           * because it's a new lock */
          ldlm_lock_addref_internal_nolock(lock, mode);
-        lock->l_flags |= LDLM_FL_LOCAL;
+       ldlm_set_local(lock);
          if (*flags & LDLM_FL_ATOMIC_CB)
-                lock->l_flags |= LDLM_FL_ATOMIC_CB;
+               ldlm_set_atomic_cb(lock);
  
          if (policy != NULL)
                  lock->l_policy_data = *policy;
          if (client_cookie != NULL)
                  lock->l_client_cookie = *client_cookie;
-        if (type == LDLM_EXTENT)
-                lock->l_req_extent = policy->l_extent;
+       if (type == LDLM_EXTENT) {
+               /* extent lock without policy is a bug */
+               if (policy == NULL)
+                       LBUG();
+
+               lock->l_req_extent = policy->l_extent;
+       }
  
          err = ldlm_lock_enqueue(ns, &lock, policy, flags);
          if (unlikely(err != ELDLM_OK))
@@ -485,13 +487,13 @@ static void failed_lock_cleanup(struct ldlm_namespace *ns,
          lock_res_and_lock(lock);
          /* Check that lock is not granted or failed, we might race. */
          if ((lock->l_req_mode != lock->l_granted_mode) &&
-            !(lock->l_flags & LDLM_FL_FAILED)) {
-                /* Make sure that this lock will not be found by raced
-                 * bl_ast and -EINVAL reply is sent to server anyways.
-                 * bug 17645 */
-                lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_FAILED |
-                                 LDLM_FL_ATOMIC_CB | LDLM_FL_CBPENDING;
-                need_cancel = 1;
+           !ldlm_is_failed(lock)) {
+               /* Make sure that this lock will not be found by raced
+                * bl_ast and -EINVAL reply is sent to server anyways.
+                * b=17645*/
+               lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_FAILED |
+                                LDLM_FL_ATOMIC_CB | LDLM_FL_CBPENDING;
+               need_cancel = 1;
          }
          unlock_res_and_lock(lock);
  
@@ -602,7 +604,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
  
         *flags = ldlm_flags_from_wire(reply->lock_flags);
         lock->l_flags |= ldlm_flags_from_wire(reply->lock_flags &
-                                             LDLM_INHERIT_FLAGS);
+                                             LDLM_FL_INHERIT_MASK);
          /* move NO_TIMEOUT flag to the lock to force ldlm_lock_match()
           * to wait with no timeout as well */
         lock->l_flags |= ldlm_flags_from_wire(reply->lock_flags &
@@ -624,25 +626,19 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
                          lock->l_req_mode = newmode;
                  }
  
-                if (memcmp(reply->lock_desc.l_resource.lr_name.name,
-                          lock->l_resource->lr_name.name,
-                          sizeof(struct ldlm_res_id))) {
-                        CDEBUG(D_INFO, "remote intent success, locking "
-                                        "(%ld,%ld,%ld) instead of "
-                                        "(%ld,%ld,%ld)\n",
-                              (long)reply->lock_desc.l_resource.lr_name.name[0],
-                              (long)reply->lock_desc.l_resource.lr_name.name[1],
-                              (long)reply->lock_desc.l_resource.lr_name.name[2],
-                              (long)lock->l_resource->lr_name.name[0],
-                              (long)lock->l_resource->lr_name.name[1],
-                              (long)lock->l_resource->lr_name.name[2]);
-
-                        rc = ldlm_lock_change_resource(ns, lock,
-                                        &reply->lock_desc.l_resource.lr_name);
-                        if (rc || lock->l_resource == NULL)
-                                GOTO(cleanup, rc = -ENOMEM);
-                        LDLM_DEBUG(lock, "client-side enqueue, new resource");
-                }
+               if (!ldlm_res_eq(&reply->lock_desc.l_resource.lr_name,
+                                &lock->l_resource->lr_name)) {
+                       CDEBUG(D_INFO, "remote intent success, locking "DLDLMRES
+                                      " instead of "DLDLMRES"\n",
+                              PLDLMRES(&reply->lock_desc.l_resource),
+                              PLDLMRES(lock->l_resource));
+
+                       rc = ldlm_lock_change_resource(ns, lock,
+                                       &reply->lock_desc.l_resource.lr_name);
+                       if (rc || lock->l_resource == NULL)
+                               GOTO(cleanup, rc = -ENOMEM);
+                       LDLM_DEBUG(lock, "client-side enqueue, new resource");
+               }
                 if (with_policy)
                         if (!(type == LDLM_IBITS &&
                               !(exp_connect_flags(exp) & OBD_CONNECT_IBITS)))
@@ -661,7 +657,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
               * bug 7311). */
              (LIBLUSTRE_CLIENT && type == LDLM_EXTENT)) {
                  lock_res_and_lock(lock);
-                lock->l_flags |= LDLM_FL_CBPENDING |  LDLM_FL_BL_AST;
+               lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST;
                  unlock_res_and_lock(lock);
                  LDLM_DEBUG(lock, "enqueue reply includes blocking AST");
          }
@@ -723,7 +719,7 @@ static inline int ldlm_req_handles_avail(int req_size, int off)
  {
          int avail;
  
-        avail = min_t(int, LDLM_MAXREQSIZE, CFS_PAGE_SIZE - 512) - req_size;
+       avail = min_t(int, LDLM_MAXREQSIZE, PAGE_CACHE_SIZE - 512) - req_size;
          if (likely(avail >= 0))
                  avail /= (int)sizeof(struct lustre_handle);
          else
@@ -890,17 +886,16 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
                  LDLM_DEBUG(lock, "client-side enqueue START");
                  LASSERT(exp == lock->l_conn_export);
          } else {
-                const struct ldlm_callback_suite cbs = {
-                        .lcs_completion = einfo->ei_cb_cp,
-                        .lcs_blocking   = einfo->ei_cb_bl,
-                        .lcs_glimpse    = einfo->ei_cb_gl,
-                        .lcs_weigh      = einfo->ei_cb_wg
-                };
-                lock = ldlm_lock_create(ns, res_id, einfo->ei_type,
-                                        einfo->ei_mode, &cbs, einfo->ei_cbdata,
+               const struct ldlm_callback_suite cbs = {
+                       .lcs_completion = einfo->ei_cb_cp,
+                       .lcs_blocking   = einfo->ei_cb_bl,
+                       .lcs_glimpse    = einfo->ei_cb_gl
+               };
+               lock = ldlm_lock_create(ns, res_id, einfo->ei_type,
+                                       einfo->ei_mode, &cbs, einfo->ei_cbdata,
                                         lvb_len, lvb_type);
-                if (lock == NULL)
-                        RETURN(-ENOMEM);
+               if (lock == NULL)
+                       RETURN(-ENOMEM);
                  /* for the local lock, add the reference */
                  ldlm_lock_addref_internal(lock, einfo->ei_mode);
                  ldlm_lock2handle(lock, lockh);
@@ -920,8 +915,13 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
                                  lock->l_policy_data = *policy;
                  }
  
-                if (einfo->ei_type == LDLM_EXTENT)
-                        lock->l_req_extent = policy->l_extent;
+               if (einfo->ei_type == LDLM_EXTENT) {
+                       /* extent lock without policy is a bug */
+                       if (policy == NULL)
+                               LBUG();
+
+                       lock->l_req_extent = policy->l_extent;
+               }
                  LDLM_DEBUG(lock, "client-side enqueue START, flags %llx\n",
                            *flags);
          }
@@ -929,7 +929,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
         lock->l_conn_export = exp;
         lock->l_export = NULL;
         lock->l_blocking_ast = einfo->ei_cb_bl;
-       lock->l_flags |= (*flags & LDLM_FL_NO_LRU);
+       lock->l_flags |= (*flags & (LDLM_FL_NO_LRU | LDLM_FL_EXCL));
  
          /* lock not sent to server yet */
  
@@ -1027,7 +1027,7 @@ static int ldlm_cli_convert_local(struct ldlm_lock *lock, int new_mode,
                  ldlm_reprocess_all(res);
                  rc = 0;
          } else {
-                rc = EDEADLOCK;
+               rc = LUSTRE_EDEADLK;
          }
          LDLM_DEBUG(lock, "client-side local convert handler END");
          LDLM_LOCK_PUT(lock);
@@ -1099,7 +1099,7 @@ int ldlm_cli_convert(struct lustre_handle *lockh, int new_mode, __u32 *flags)
                                  GOTO(out, rc);
                  }
          } else {
-                rc = EDEADLOCK;
+               rc = LUSTRE_EDEADLK;
          }
          EXIT;
   out:
@@ -1127,13 +1127,13 @@ static __u64 ldlm_cli_cancel_local(struct ldlm_lock *lock)
                  LDLM_DEBUG(lock, "client-side cancel");
                  /* Set this flag to prevent others from getting new references*/
                  lock_res_and_lock(lock);
-                lock->l_flags |= LDLM_FL_CBPENDING;
+               ldlm_set_cbpending(lock);
                 local_only = !!(lock->l_flags &
                                 (LDLM_FL_LOCAL_ONLY|LDLM_FL_CANCEL_ON_BLOCK));
-                ldlm_cancel_callback(lock);
-                rc = (lock->l_flags & LDLM_FL_BL_AST) ?
-                        LDLM_FL_BL_AST : LDLM_FL_CANCELING;
-                unlock_res_and_lock(lock);
+               ldlm_cancel_callback(lock);
+               rc = (ldlm_is_bl_ast(lock)) ?
+                       LDLM_FL_BL_AST : LDLM_FL_CANCELING;
+               unlock_res_and_lock(lock);
  
                  if (local_only) {
                          CDEBUG(D_DLMTRACE, "not sending request (at caller's "
@@ -1252,7 +1252,7 @@ int ldlm_cli_cancel_req(struct obd_export *exp, cfs_list_t *cancels,
                  } else {
                          rc = ptlrpc_queue_wait(req);
                  }
-                if (rc == ESTALE) {
+               if (rc == LUSTRE_ESTALE) {
                          CDEBUG(D_DLMTRACE, "client/server (nid %s) "
                                 "out of sync -- not fatal\n",
                                 libcfs_nid2str(req->rq_import->
@@ -1360,10 +1360,10 @@ int ldlm_cli_cancel(struct lustre_handle *lockh,
          }
  
          rc = ldlm_cli_cancel_local(lock);
-       if (rc == LDLM_FL_LOCAL_ONLY) {
-                LDLM_LOCK_RELEASE(lock);
+       if (rc == LDLM_FL_LOCAL_ONLY || cancel_flags & LCF_LOCAL) {
+               LDLM_LOCK_RELEASE(lock);
                 RETURN(0);
-        }
+       }
         /* Even if the lock is marked as LDLM_FL_BL_AST, this is a LDLM_CANCEL
          * RPC which goes to canceld portal, so we can cancel other LRU locks
          * here and send them all as one LDLM_CANCEL RPC. */
@@ -1448,25 +1448,27 @@ static ldlm_policy_res_t ldlm_cancel_no_wait_policy(struct ldlm_namespace *ns,
                                                      int unused, int added,
                                                      int count)
  {
-        ldlm_policy_res_t result = LDLM_POLICY_CANCEL_LOCK;
-        ldlm_cancel_for_recovery cb = ns->ns_cancel_for_recovery;
-        lock_res_and_lock(lock);
-
-        /* don't check added & count since we want to process all locks
-         * from unused list */
-        switch (lock->l_resource->lr_type) {
-                case LDLM_EXTENT:
-                case LDLM_IBITS:
-                        if (cb && cb(lock))
-                                break;
-                default:
-                        result = LDLM_POLICY_SKIP_LOCK;
-                        lock->l_flags |= LDLM_FL_SKIPPED;
-                        break;
-        }
+       ldlm_policy_res_t result = LDLM_POLICY_CANCEL_LOCK;
+       ldlm_cancel_for_recovery cb = ns->ns_cancel_for_recovery;
+
+       /* don't check added & count since we want to process all locks
+        * from unused list.
+        * It's fine to not take lock to access lock->l_resource since
+        * the lock has already been granted so it won't change. */
+       switch (lock->l_resource->lr_type) {
+               case LDLM_EXTENT:
+               case LDLM_IBITS:
+                       if (cb && cb(lock))
+                               break;
+               default:
+                       result = LDLM_POLICY_SKIP_LOCK;
+                       lock_res_and_lock(lock);
+                       ldlm_set_skipped(lock);
+                       unlock_res_and_lock(lock);
+                       break;
+       }
  
-        unlock_res_and_lock(lock);
-        RETURN(result);
+       RETURN(result);
  }
  
  /**
@@ -1661,26 +1663,26 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, cfs_list_t *cancels,
                  cfs_list_for_each_entry_safe(lock, next, &ns->ns_unused_list,
                                              l_lru) {
                          /* No locks which got blocking requests. */
-                        LASSERT(!(lock->l_flags & LDLM_FL_BL_AST));
+                       LASSERT(!ldlm_is_bl_ast(lock));
  
-                        if (flags & LDLM_CANCEL_NO_WAIT &&
-                            lock->l_flags & LDLM_FL_SKIPPED)
-                                /* already processed */
-                                continue;
+                       if (flags & LDLM_CANCEL_NO_WAIT &&
+                           ldlm_is_skipped(lock))
+                               /* already processed */
+                               continue;
  
                         /* Somebody is already doing CANCEL. No need for this
                          * lock in LRU, do not traverse it again. */
-                        if (!(lock->l_flags & LDLM_FL_CANCELING))
+                       if (!ldlm_is_canceling(lock))
                                  break;
  
-                        ldlm_lock_remove_from_lru_nolock(lock);
-                }
-                if (&lock->l_lru == &ns->ns_unused_list)
-                        break;
+                       ldlm_lock_remove_from_lru_nolock(lock);
+               }
+               if (&lock->l_lru == &ns->ns_unused_list)
+                       break;
  
-                LDLM_LOCK_GET(lock);
+               LDLM_LOCK_GET(lock);
                 spin_unlock(&ns->ns_lock);
-                lu_ref_add(&lock->l_reference, __FUNCTION__, cfs_current());
+               lu_ref_add(&lock->l_reference, __FUNCTION__, current);
  
                 /* Pass the lock through the policy filter and see if it
                  * should stay in LRU.
@@ -1695,64 +1697,63 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, cfs_list_t *cancels,
                  * old locks, but additionally choose them by
                  * their weight. Big extent locks will stay in
                  * the cache. */
-                result = pf(ns, lock, unused, added, count);
-                if (result == LDLM_POLICY_KEEP_LOCK) {
-                        lu_ref_del(&lock->l_reference,
-                                   __FUNCTION__, cfs_current());
-                        LDLM_LOCK_RELEASE(lock);
+               result = pf(ns, lock, unused, added, count);
+               if (result == LDLM_POLICY_KEEP_LOCK) {
+                       lu_ref_del(&lock->l_reference,
+                                  __FUNCTION__, current);
+                       LDLM_LOCK_RELEASE(lock);
                         spin_lock(&ns->ns_lock);
                         break;
                 }
                 if (result == LDLM_POLICY_SKIP_LOCK) {
                         lu_ref_del(&lock->l_reference,
-                                  __func__, cfs_current());
+                                  __func__, current);
                         LDLM_LOCK_RELEASE(lock);
                         spin_lock(&ns->ns_lock);
-                        continue;
-                }
+                       continue;
+               }
  
-                lock_res_and_lock(lock);
-                /* Check flags again under the lock. */
-                if ((lock->l_flags & LDLM_FL_CANCELING) ||
-                    (ldlm_lock_remove_from_lru(lock) == 0)) {
+               lock_res_and_lock(lock);
+               /* Check flags again under the lock. */
+               if (ldlm_is_canceling(lock) ||
+                   (ldlm_lock_remove_from_lru(lock) == 0)) {
                         /* Another thread is removing lock from LRU, or
                          * somebody is already doing CANCEL, or there
                          * is a blocking request which will send cancel
                          * by itself, or the lock is no longer unused. */
-                        unlock_res_and_lock(lock);
-                        lu_ref_del(&lock->l_reference,
-                                   __FUNCTION__, cfs_current());
-                        LDLM_LOCK_RELEASE(lock);
+                       unlock_res_and_lock(lock);
+                       lu_ref_del(&lock->l_reference, __FUNCTION__, current);
+                       LDLM_LOCK_RELEASE(lock);
                         spin_lock(&ns->ns_lock);
-                        continue;
-                }
-                LASSERT(!lock->l_readers && !lock->l_writers);
-
-                /* If we have chosen to cancel this lock voluntarily, we
-                 * better send cancel notification to server, so that it
-                 * frees appropriate state. This might lead to a race
-                 * where while we are doing cancel here, server is also
-                 * silently cancelling this lock. */
-                lock->l_flags &= ~LDLM_FL_CANCEL_ON_BLOCK;
-
-                /* Setting the CBPENDING flag is a little misleading,
-                 * but prevents an important race; namely, once
-                 * CBPENDING is set, the lock can accumulate no more
-                 * readers/writers. Since readers and writers are
-                 * already zero here, ldlm_lock_decref() won't see
-                 * this flag and call l_blocking_ast */
-                lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING;
-
-                /* We can't re-add to l_lru as it confuses the
-                 * refcounting in ldlm_lock_remove_from_lru() if an AST
-                 * arrives after we drop lr_lock below. We use l_bl_ast
-                 * and can't use l_pending_chain as it is used both on
-                 * server and client nevertheless bug 5666 says it is
-                 * used only on server */
-                LASSERT(cfs_list_empty(&lock->l_bl_ast));
-                cfs_list_add(&lock->l_bl_ast, cancels);
-                unlock_res_and_lock(lock);
-                lu_ref_del(&lock->l_reference, __FUNCTION__, cfs_current());
+                       continue;
+               }
+               LASSERT(!lock->l_readers && !lock->l_writers);
+
+               /* If we have chosen to cancel this lock voluntarily, we
+                * better send cancel notification to server, so that it
+                * frees appropriate state. This might lead to a race
+                * where while we are doing cancel here, server is also
+                * silently cancelling this lock. */
+               ldlm_clear_cancel_on_block(lock);
+
+               /* Setting the CBPENDING flag is a little misleading,
+                * but prevents an important race; namely, once
+                * CBPENDING is set, the lock can accumulate no more
+                * readers/writers. Since readers and writers are
+                * already zero here, ldlm_lock_decref() won't see
+                * this flag and call l_blocking_ast */
+               lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING;
+
+               /* We can't re-add to l_lru as it confuses the
+                * refcounting in ldlm_lock_remove_from_lru() if an AST
+                * arrives after we drop lr_lock below. We use l_bl_ast
+                * and can't use l_pending_chain as it is used both on
+                * server and client nevertheless bug 5666 says it is
+                * used only on server */
+               LASSERT(cfs_list_empty(&lock->l_bl_ast));
+               cfs_list_add(&lock->l_bl_ast, cancels);
+               unlock_res_and_lock(lock);
+               lu_ref_del(&lock->l_reference, __FUNCTION__, current);
                 spin_lock(&ns->ns_lock);
                 added++;
                 unused--;
@@ -1807,10 +1808,10 @@ int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr,
   * list.
   */
  int ldlm_cancel_resource_local(struct ldlm_resource *res,
-                               cfs_list_t *cancels,
-                               ldlm_policy_data_t *policy,
-                               ldlm_mode_t mode, int lock_flags,
-                               ldlm_cancel_flags_t cancel_flags, void *opaque)
+                              cfs_list_t *cancels,
+                              ldlm_policy_data_t *policy,
+                              ldlm_mode_t mode, __u64 lock_flags,
+                              ldlm_cancel_flags_t cancel_flags, void *opaque)
  {
          struct ldlm_lock *lock;
          int count = 0;
@@ -1830,9 +1831,8 @@ int ldlm_cancel_resource_local(struct ldlm_resource *res,
  
                 /* If somebody is already doing CANCEL, or blocking AST came,
                  * skip this lock. */
-                if (lock->l_flags & LDLM_FL_BL_AST ||
-                    lock->l_flags & LDLM_FL_CANCELING)
-                        continue;
+               if (ldlm_is_bl_ast(lock) || ldlm_is_canceling(lock))
+                       continue;
  
                  if (lockmode_compat(lock->l_granted_mode, mode))
                          continue;
@@ -1844,9 +1844,9 @@ int ldlm_cancel_resource_local(struct ldlm_resource *res,
                        policy->l_inodebits.bits))
                          continue;
  
-                /* See CBPENDING comment in ldlm_cancel_lru */
-                lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING |
-                                 lock_flags;
+               /* See CBPENDING comment in ldlm_cancel_lru */
+               lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING |
+                                lock_flags;
  
                  LASSERT(cfs_list_empty(&lock->l_bl_ast));
                  cfs_list_add(&lock->l_bl_ast, cancels);
@@ -1947,7 +1947,8 @@ int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
                                             0, flags | LCF_BL_AST, opaque);
          rc = ldlm_cli_cancel_list(&cancels, count, NULL, flags);
          if (rc != ELDLM_OK)
-                CERROR("ldlm_cli_cancel_unused_resource: %d\n", rc);
+               CERROR("canceling unused lock "DLDLMRES": rc = %d\n",
+                      PLDLMRES(res), rc);
  
          LDLM_RESOURCE_DELREF(res);
          ldlm_resource_putref(res);
@@ -1961,21 +1962,16 @@ struct ldlm_cli_cancel_arg {
  };
  
  static int ldlm_cli_hash_cancel_unused(cfs_hash_t *hs, cfs_hash_bd_t *bd,
-                                       cfs_hlist_node_t *hnode, void *arg)
+                                      cfs_hlist_node_t *hnode, void *arg)
  {
-        struct ldlm_resource           *res = cfs_hash_object(hs, hnode);
-        struct ldlm_cli_cancel_arg     *lc = arg;
-        int                             rc;
-
-        rc = ldlm_cli_cancel_unused_resource(ldlm_res_to_ns(res), &res->lr_name,
-                                             NULL, LCK_MINMODE,
-                                             lc->lc_flags, lc->lc_opaque);
-        if (rc != 0) {
-                CERROR("ldlm_cli_cancel_unused ("LPU64"): %d\n",
-                       res->lr_name.name[0], rc);
-        }
-        /* must return 0 for hash iteration */
-        return 0;
+       struct ldlm_resource           *res = cfs_hash_object(hs, hnode);
+       struct ldlm_cli_cancel_arg     *lc = arg;
+
+       ldlm_cli_cancel_unused_resource(ldlm_res_to_ns(res), &res->lr_name,
+                                       NULL, LCK_MINMODE, lc->lc_flags,
+                                       lc->lc_opaque);
+       /* must return 0 for hash iteration */
+       return 0;
  }
  
  /**
@@ -2137,18 +2133,17 @@ static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure)
  }
  
  static int replay_lock_interpret(const struct lu_env *env,
-                                 struct ptlrpc_request *req,
-                                 struct ldlm_async_args *aa, int rc)
+                                struct ptlrpc_request *req,
+                                struct ldlm_async_args *aa, int rc)
  {
-        struct ldlm_lock     *lock;
-        struct ldlm_reply    *reply;
-        struct obd_export    *exp;
-
-        ENTRY;
-        cfs_atomic_dec(&req->rq_import->imp_replay_inflight);
-        if (rc != ELDLM_OK)
-                GOTO(out, rc);
+       struct ldlm_lock     *lock;
+       struct ldlm_reply    *reply;
+       struct obd_export    *exp;
  
+       ENTRY;
+       atomic_dec(&req->rq_import->imp_replay_inflight);
+       if (rc != ELDLM_OK)
+               GOTO(out, rc);
  
          reply = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
          if (reply == NULL)
@@ -2198,7 +2193,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
  
  
          /* Bug 11974: Do not replay a lock which is actively being canceled */
-        if (lock->l_flags & LDLM_FL_CANCELING) {
+       if (ldlm_is_canceling(lock)) {
                  LDLM_DEBUG(lock, "Not replaying canceled lock:");
                  RETURN(0);
          }
@@ -2206,7 +2201,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
          /* If this is reply-less callback lock, we cannot replay it, since
           * server might have long dropped it, but notification of that event was
           * lost by network. (and server granted conflicting lock already) */
-        if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) {
+       if (ldlm_is_cancel_on_block(lock)) {
                  LDLM_DEBUG(lock, "Not replaying reply-less lock:");
                  ldlm_lock_cancel(lock);
                  RETURN(0);
@@ -2257,18 +2252,18 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
           * also, we mark the request to be put on a dedicated
           * queue to be processed after all request replayes.
           * bug 6063 */
-        lustre_msg_set_flags(req->rq_reqmsg, MSG_REQ_REPLAY_DONE);
+       lustre_msg_set_flags(req->rq_reqmsg, MSG_REQ_REPLAY_DONE);
  
-        LDLM_DEBUG(lock, "replaying lock:");
+       LDLM_DEBUG(lock, "replaying lock:");
  
-        cfs_atomic_inc(&req->rq_import->imp_replay_inflight);
-        CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
-        aa = ptlrpc_req_async_args(req);
-        aa->lock_handle = body->lock_handle[0];
-        req->rq_interpret_reply = (ptlrpc_interpterer_t)replay_lock_interpret;
-        ptlrpcd_add_req(req, PDL_POLICY_LOCAL, -1);
+       atomic_inc(&req->rq_import->imp_replay_inflight);
+       CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
+       aa = ptlrpc_req_async_args(req);
+       aa->lock_handle = body->lock_handle[0];
+       req->rq_interpret_reply = (ptlrpc_interpterer_t)replay_lock_interpret;
+       ptlrpcd_add_req(req, PDL_POLICY_LOCAL, -1);
  
-        RETURN(0);
+       RETURN(0);
  }
  
  /**
@@ -2302,39 +2297,39 @@ static void ldlm_cancel_unused_locks_for_replay(struct ldlm_namespace *ns)
  
  int ldlm_replay_locks(struct obd_import *imp)
  {
-        struct ldlm_namespace *ns = imp->imp_obd->obd_namespace;
-        CFS_LIST_HEAD(list);
-        struct ldlm_lock *lock, *next;
-        int rc = 0;
+       struct ldlm_namespace *ns = imp->imp_obd->obd_namespace;
+       CFS_LIST_HEAD(list);
+       struct ldlm_lock *lock, *next;
+       int rc = 0;
  
-        ENTRY;
+       ENTRY;
  
-        LASSERT(cfs_atomic_read(&imp->imp_replay_inflight) == 0);
+       LASSERT(atomic_read(&imp->imp_replay_inflight) == 0);
  
-        /* don't replay locks if import failed recovery */
-        if (imp->imp_vbr_failed)
-                RETURN(0);
+       /* don't replay locks if import failed recovery */
+       if (imp->imp_vbr_failed)
+               RETURN(0);
  
-        /* ensure this doesn't fall to 0 before all have been queued */
-        cfs_atomic_inc(&imp->imp_replay_inflight);
+       /* ensure this doesn't fall to 0 before all have been queued */
+       atomic_inc(&imp->imp_replay_inflight);
  
-        if (ldlm_cancel_unused_locks_before_replay)
-                ldlm_cancel_unused_locks_for_replay(ns);
+       if (ldlm_cancel_unused_locks_before_replay)
+               ldlm_cancel_unused_locks_for_replay(ns);
  
-        ldlm_namespace_foreach(ns, ldlm_chain_lock_for_replay, &list);
+       ldlm_namespace_foreach(ns, ldlm_chain_lock_for_replay, &list);
  
-        cfs_list_for_each_entry_safe(lock, next, &list, l_pending_chain) {
-                cfs_list_del_init(&lock->l_pending_chain);
-                if (rc) {
-                        LDLM_LOCK_RELEASE(lock);
-                        continue; /* or try to do the rest? */
-                }
-                rc = replay_one_lock(imp, lock);
-                LDLM_LOCK_RELEASE(lock);
-        }
+       cfs_list_for_each_entry_safe(lock, next, &list, l_pending_chain) {
+               cfs_list_del_init(&lock->l_pending_chain);
+               if (rc) {
+                       LDLM_LOCK_RELEASE(lock);
+                       continue; /* or try to do the rest? */
+               }
+               rc = replay_one_lock(imp, lock);
+               LDLM_LOCK_RELEASE(lock);
+       }
  
-        cfs_atomic_dec(&imp->imp_replay_inflight);
+       atomic_dec(&imp->imp_replay_inflight);
  
-        RETURN(rc);
+       RETURN(rc);
  }
  EXPORT_SYMBOL(ldlm_replay_locks);